Update gene data based on the suggested ranking

This also exports the ranking function itself for external use.
2025-10-26 19:57:24 +01:00 · 2022-06-15 10:24:10 +02:00 · 2022-06-15 10:24:10 +02:00 · 8a96a6eca9
commit 8a96a6eca9
parent e290aba9ab
6 changed files with 91 additions and 25 deletions
--- a/R/ranking.R
+++ b/R/ranking.R
@ -0,0 +1,48 @@
+#' Rank genes based on how ubiquitous they are.
+#'
+#' This function will compute a weighted average across multiple metrics that
+#' define how ubiquitous a gene is based on its expression across samples.
+#'
+#' @param cross_sample_metric Metric to use for calculating the number of
+#'   samples a gene is expressed in. One of `above_95`, `above_median` or
+#'   `above_zero`.
+#' @param cross_sample_weight Weighting of the cross sample metric within the
+#'   final score.
+#' @param mean_expression_weight Weighting of the gene's mean expression within
+#'   the final score.
+#' @param sd_expression_weight Weighting of the standard deviation of the
+#'   gene's expression within the final score.
+#'
+#' @return A `data.table` with gene data as well as the scores, ranks and
+#'   percentiles for each gene.
+#'
+#' @export
+rank_genes <- function(cross_sample_metric = "above_95",
+                       cross_sample_weight = 0.5,
+                       mean_expression_weight = 0.25,
+                       sd_expression_weight = -0.25) {
+  total_weight <- cross_sample_weight +
+    mean_expression_weight +
+    sd_expression_weight
+
+  data <- copy(ubigen::genes)
+
+  data[, score :=
+    (cross_sample_weight * get(cross_sample_metric) +
+      mean_expression_weight * mean_expression_normalized +
+      sd_expression_weight * sd_expression_normalized) /
+      total_weight]
+
+  # Normalize scores to be between 0.0 and 1.0.
+  data[, score := (score - min(score, na.rm = TRUE)) /
+    (max(score, na.rm = TRUE) - min(score, na.rm = TRUE))]
+
+  # These are genes that are not expressed at all.
+  data[is.na(score), score := 0.0]
+
+  setorder(data, -score)
+  data[, rank := .I]
+  data[, percentile := 1 - rank / max(rank)]
+
+  data
+}
--- a/R/server.R
+++ b/R/server.R
@ -2,30 +2,12 @@
 #' @noRd
 server <- function(input, output, session) {
  ranked_data <- reactive({
-    total_weight <- abs(input$cross_sample_weight) +
-      abs(input$mean_expression) +
-      abs(input$sd_expression)
-
-    data <- data.table::copy(ubigen::genes)
-
-    data[, score :=
-      (input$cross_sample_weight * get(input$cross_sample_metric) +
-        input$mean_expression * mean_expression_normalized +
-        input$sd_expression * sd_expression_normalized) /
-        total_weight]
-
-    # Normalize scores to be between 0.0 and 1.0.
-    data[, score := (score - min(score, na.rm = TRUE)) /
-      (max(score, na.rm = TRUE) - min(score, na.rm = TRUE))]
-
-    # These are genes that are not expressed at all.
-    data[is.na(score), score := 0.0]
-
-    data.table::setorder(data, -score)
-    data[, rank := .I]
-    data[, percentile := 1 - rank / max(rank)]
-
-    data
+    rank_genes(
+      cross_sample_metric = input$cross_sample_metric,
+      cross_sample_weight = input$cross_sample_weight,
+      mean_expression_weight = input$mean_expression,
+      sd_expression_weight = input$sd_expression
+    )
  })

  custom_genes <- gene_selector_server("custom_genes") |> debounce(500)