Reduce memory footprint during analysis

2025-10-26 19:57:24 +01:00 · 2022-09-25 19:01:59 +02:00 · 2022-09-25 19:01:59 +02:00 · 2eec3285f9
commit 2eec3285f9
parent f59a71b16c
4 changed files with 33 additions and 21 deletions
--- a/scripts/ranking.R
+++ b/scripts/ranking.R
@ -6,6 +6,7 @@ library(here)

 i_am("scripts/input.R")

+genes <- fread(here("scripts", "input", "genes.csv"))
 data <- fread(here("scripts", "output", "results.csv"))

 data[, score := 0.5 * above_95 +
@ -22,17 +23,24 @@ data[is.na(score), score := 0.0]

 setorder(data, -score)

+# Reintroduce gene IDs and HGNC symbols.
+
+setnames(data, "gene", "id")
+
+data <- merge(
+  data,
+  genes,
+  by = "id",
+  all.x = TRUE,
+  sort = FALSE
+)
+
+setnames(data, "hgnc_symbol", "hgnc_name")
+data[, id := NULL]
+
 # Remove duplicates. This will keep the best row for each duplicated gene.
 data <- unique(data, by = "gene")

-data[, `:=`(
-  hgnc_name = gprofiler2::gconvert(
-    gene,
-    target = "HGNC",
-    mthreshold = 1,
-    filter_na = FALSE
-  )$target,
-  rank = .I
-)]
+data[, rank := .I]

 fwrite(data, file = here("scripts", "output", "genes.csv"))