mirror of
https://github.com/johrpan/ubigen.git
synced 2025-10-26 19:57:24 +01:00
Reduce memory footprint during analysis
This commit is contained in:
parent
f59a71b16c
commit
2eec3285f9
4 changed files with 33 additions and 21 deletions
|
|
@ -32,5 +32,6 @@ Imports:
|
||||||
Suggests:
|
Suggests:
|
||||||
biomaRt,
|
biomaRt,
|
||||||
edgeR,
|
edgeR,
|
||||||
|
here,
|
||||||
purrr,
|
purrr,
|
||||||
stringr
|
stringr
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ library(here)
|
||||||
|
|
||||||
i_am("scripts/input.R")
|
i_am("scripts/input.R")
|
||||||
|
|
||||||
data <- fread(here("scripts", "input", "data_long.csv.gz"))
|
data <- fread(here("scripts", "input", "data_long.csv"))
|
||||||
|
|
||||||
data[, `:=`(
|
data[, `:=`(
|
||||||
expression_median = median(expression),
|
expression_median = median(expression),
|
||||||
|
|
|
||||||
|
|
@ -70,21 +70,24 @@ getpm <- DGEList(counts = read_counts) |>
|
||||||
data_wide_samples <- data.table(getpm, keep.rownames = "gene")
|
data_wide_samples <- data.table(getpm, keep.rownames = "gene")
|
||||||
data_wide_samples[, hgnc_symbol := hgnc_symbols]
|
data_wide_samples[, hgnc_symbol := hgnc_symbols]
|
||||||
|
|
||||||
|
# Create lookup tables for genes and samples.
|
||||||
|
|
||||||
|
genes <- data_wide_samples[, .(id = .I, gene, hgnc_symbol)]
|
||||||
|
fwrite(genes, file = here("scripts", "input", "genes.csv"))
|
||||||
|
|
||||||
|
sample_names <- colnames(data_wide_samples[, !c("gene", "hgnc_symbol")])
|
||||||
|
samples <- data.table(id = seq_along(sample_names), sample = sample_names)
|
||||||
|
fwrite(samples, file = here("scripts", "input", "samples.csv"))
|
||||||
|
|
||||||
|
data_wide_samples[, `:=`(gene = .I, hgnc_symbol = NULL)]
|
||||||
|
colnames(data_wide_samples) <- c("gene", seq_along(sample_names))
|
||||||
|
|
||||||
data_long <- melt(
|
data_long <- melt(
|
||||||
data_wide_samples,
|
data_wide_samples,
|
||||||
id.vars = c("gene", "hgnc_symbol"),
|
id.vars = "gene",
|
||||||
variable.name = "sample",
|
variable.name = "sample",
|
||||||
value.name = "expression",
|
value.name = "expression",
|
||||||
variable.factor = FALSE
|
variable.factor = FALSE
|
||||||
)
|
)
|
||||||
|
|
||||||
fwrite(
|
fwrite(data_long, file = here("scripts", "input", "data_long.csv"))
|
||||||
data_wide_samples,
|
|
||||||
file = here(
|
|
||||||
"scripts",
|
|
||||||
"input",
|
|
||||||
"data_wide_samples.csv.gz"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
fwrite(data_long, file = here("scripts", "input", "data_long.csv.gz"))
|
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ library(here)
|
||||||
|
|
||||||
i_am("scripts/input.R")
|
i_am("scripts/input.R")
|
||||||
|
|
||||||
|
genes <- fread(here("scripts", "input", "genes.csv"))
|
||||||
data <- fread(here("scripts", "output", "results.csv"))
|
data <- fread(here("scripts", "output", "results.csv"))
|
||||||
|
|
||||||
data[, score := 0.5 * above_95 +
|
data[, score := 0.5 * above_95 +
|
||||||
|
|
@ -22,17 +23,24 @@ data[is.na(score), score := 0.0]
|
||||||
|
|
||||||
setorder(data, -score)
|
setorder(data, -score)
|
||||||
|
|
||||||
|
# Reintroduce gene IDs and HGNC symbols.
|
||||||
|
|
||||||
|
setnames(data, "gene", "id")
|
||||||
|
|
||||||
|
data <- merge(
|
||||||
|
data,
|
||||||
|
genes,
|
||||||
|
by = "id",
|
||||||
|
all.x = TRUE,
|
||||||
|
sort = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
setnames(data, "hgnc_symbol", "hgnc_name")
|
||||||
|
data[, id := NULL]
|
||||||
|
|
||||||
# Remove duplicates. This will keep the best row for each duplicated gene.
|
# Remove duplicates. This will keep the best row for each duplicated gene.
|
||||||
data <- unique(data, by = "gene")
|
data <- unique(data, by = "gene")
|
||||||
|
|
||||||
data[, `:=`(
|
data[, rank := .I]
|
||||||
hgnc_name = gprofiler2::gconvert(
|
|
||||||
gene,
|
|
||||||
target = "HGNC",
|
|
||||||
mthreshold = 1,
|
|
||||||
filter_na = FALSE
|
|
||||||
)$target,
|
|
||||||
rank = .I
|
|
||||||
)]
|
|
||||||
|
|
||||||
fwrite(data, file = here("scripts", "output", "genes.csv"))
|
fwrite(data, file = here("scripts", "output", "genes.csv"))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue