Handle caching

2025-10-26 18:57:25 +01:00 · 2021-10-21 17:25:44 +02:00 · 2021-10-21 17:25:44 +02:00 · df6e23d219
commit df6e23d219
parent b8365e0efb
7 changed files with 247 additions and 191 deletions
--- a/R/correlation.R
+++ b/R/correlation.R
@ -5,69 +5,75 @@ correlation <- function(preset, progress = NULL) {
    gene_ids <- preset$gene_ids
    reference_gene_ids <- preset$reference_gene_ids

-    # Prefilter distances by species.
-    distances <- geposan::distances[species %chin% species_ids]
+    cached("correlation", c(species_ids, gene_ids, reference_gene_ids), {
+        # Prefilter distances by species.
+        distances <- geposan::distances[species %chin% species_ids]

-    # Tranform data to get species as rows and genes as columns. We construct
-    # columns per species, because it requires fewer iterations, and transpose
-    # the table afterwards.
+        # Tranform data to get species as rows and genes as columns. We
+        # construct columns per species, because it requires fewer iterations,
+        # and transpose the table afterwards.

-    data <- data.table(gene = gene_ids)
+        data <- data.table(gene = gene_ids)

-    # Make a column containing distance data for each species.
-    for (species_id in species_ids) {
-        species_distances <- distances[species == species_id, .(gene, distance)]
-        data <- merge(data, species_distances, all.x = TRUE)
-        setnames(data, "distance", species_id)
-    }
+        # Make a column containing distance data for each species.
+        for (species_id in species_ids) {
+            species_distances <- distances[
+                species == species_id,
+                .(gene, distance)
+            ]

-    # Transpose to the desired format.
-    data <- transpose(data, make.names = "gene")
-
-    if (!is.null(progress)) progress(0.33)
-
-    # Take the reference data.
-    reference_data <- data[, ..reference_gene_ids]
-
-    # Perform the correlation between all possible pairs.
-    results <- stats::cor(
-        data[, ..gene_ids],
-        reference_data,
-        use = "pairwise.complete.obs",
-        method = "spearman"
-    )
-
-    results <- data.table(results, keep.rownames = TRUE)
-    setnames(results, "rn", "gene")
-
-    # Remove correlations between the reference genes themselves.
-    for (reference_gene_id in reference_gene_ids) {
-        column <- quote(reference_gene_id)
-        results[gene == reference_gene_id, eval(column) := NA]
-    }
-
-    if (!is.null(progress)) progress(0.66)
-
-    # Compute the final score as the mean of known correlation scores. Negative
-    # correlations will correctly lessen the score, which will be clamped to
-    # zero as its lower bound. Genes with no possible correlations at all will
-    # be assumed to have a score of 0.0.
-
-    compute_score <- function(scores) {
-        score <- mean(scores, na.rm = TRUE)
-
-        if (is.na(score) | score < 0.0) {
-            score <- 0.0
+            data <- merge(data, species_distances, all.x = TRUE)
+            setnames(data, "distance", species_id)
        }

-        score
-    }
+        # Transpose to the desired format.
+        data <- transpose(data, make.names = "gene")

-    results[,
-        score := compute_score(as.matrix(.SD)),
-        .SDcols = reference_gene_ids,
-        by = gene
-    ]
+        if (!is.null(progress)) progress(0.33)

-    results[, .(gene, score)]
+        # Take the reference data.
+        reference_data <- data[, ..reference_gene_ids]
+
+        # Perform the correlation between all possible pairs.
+        results <- stats::cor(
+            data[, ..gene_ids],
+            reference_data,
+            use = "pairwise.complete.obs",
+            method = "spearman"
+        )
+
+        results <- data.table(results, keep.rownames = TRUE)
+        setnames(results, "rn", "gene")
+
+        # Remove correlations between the reference genes themselves.
+        for (reference_gene_id in reference_gene_ids) {
+            column <- quote(reference_gene_id)
+            results[gene == reference_gene_id, eval(column) := NA]
+        }
+
+        if (!is.null(progress)) progress(0.66)
+
+        # Compute the final score as the mean of known correlation scores.
+        # Negative correlations will correctly lessen the score, which will be
+        # clamped to zero as its lower bound. Genes with no possible
+        # correlations at all will be assumed to have a score of 0.0.
+
+        compute_score <- function(scores) {
+            score <- mean(scores, na.rm = TRUE)
+
+            if (is.na(score) | score < 0.0) {
+                score <- 0.0
+            }
+
+            score
+        }
+
+        results[,
+            score := compute_score(as.matrix(.SD)),
+            .SDcols = reference_gene_ids,
+            by = gene
+        ]
+
+        results[, .(gene, score)]
+    })
 }