Update gene data based on the suggested ranking

This also exports the ranking function itself for external use.
2025-10-26 19:57:24 +01:00 · 2022-06-15 10:24:10 +02:00 · 2022-06-15 10:24:10 +02:00 · 8a96a6eca9
commit 8a96a6eca9
parent e290aba9ab
6 changed files with 91 additions and 25 deletions
--- a/1
+++ b/1
@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 export(rank_genes)
 export(run_app)
 import(data.table)
 import(shiny)
--- a/R/ranking.R
+++ b/R/ranking.R
@ -0,0 +1,48 @@
 #' Rank genes based on how ubiquitous they are.
 #'
 #' This function will compute a weighted average across multiple metrics that
 #' define how ubiquitous a gene is based on its expression across samples.
 #'
 #' @param cross_sample_metric Metric to use for calculating the number of
 #'   samples a gene is expressed in. One of `above_95`, `above_median` or
 #'   `above_zero`.
 #' @param cross_sample_weight Weighting of the cross sample metric within the
 #'   final score.
 #' @param mean_expression_weight Weighting of the gene's mean expression within
 #'   the final score.
 #' @param sd_expression_weight Weighting of the standard deviation of the
 #'   gene's expression within the final score.
 #'
 #' @return A `data.table` with gene data as well as the scores, ranks and
 #'   percentiles for each gene.
 #'
 #' @export
 rank_genes <- function(cross_sample_metric = "above_95",
                       cross_sample_weight = 0.5,
                       mean_expression_weight = 0.25,
                       sd_expression_weight = -0.25) {
  total_weight <- cross_sample_weight +
    mean_expression_weight +
    sd_expression_weight
  data <- copy(ubigen::genes)
  data[, score :=
    (cross_sample_weight * get(cross_sample_metric) +
      mean_expression_weight * mean_expression_normalized +
      sd_expression_weight * sd_expression_normalized) /
      total_weight]
  # Normalize scores to be between 0.0 and 1.0.
  data[, score := (score - min(score, na.rm = TRUE)) /
    (max(score, na.rm = TRUE) - min(score, na.rm = TRUE))]
  # These are genes that are not expressed at all.
  data[is.na(score), score := 0.0]
  setorder(data, -score)
  data[, rank := .I]
  data[, percentile := 1 - rank / max(rank)]
  data
 }
--- a/R/server.R
+++ b/R/server.R
@ -2,30 +2,12 @@
 #' @noRd
 server <- function(input, output, session) {
  ranked_data <- reactive({
-    total_weight <- abs(input$cross_sample_weight) +
+    rank_genes(
-      abs(input$mean_expression) +
+      cross_sample_metric = input$cross_sample_metric,
-      abs(input$sd_expression)
+      cross_sample_weight = input$cross_sample_weight,
-
+      mean_expression_weight = input$mean_expression,
-    data <- data.table::copy(ubigen::genes)
+      sd_expression_weight = input$sd_expression
-
+    )
    data[, score :=
      (input$cross_sample_weight * get(input$cross_sample_metric) +
        input$mean_expression * mean_expression_normalized +
        input$sd_expression * sd_expression_normalized) /
        total_weight]
    # Normalize scores to be between 0.0 and 1.0.
    data[, score := (score - min(score, na.rm = TRUE)) /
      (max(score, na.rm = TRUE) - min(score, na.rm = TRUE))]
    # These are genes that are not expressed at all.
    data[is.na(score), score := 0.0]
    data.table::setorder(data, -score)
    data[, rank := .I]
    data[, percentile := 1 - rank / max(rank)]
    data
  })
  custom_genes <- gene_selector_server("custom_genes") |> debounce(500)
--- a/data/genes.rda
+++ b/data/genes.rda
--- a/man/genes.Rd
+++ b/man/genes.Rd
@ -5,7 +5,7 @@
 \alias{genes}
 \title{A \code{data.table} containig data on genes and their expression behavior.}
 \format{
-An object of class \code{data.table} (inherits from \code{data.frame}) with 56156 rows and 13 columns.
+An object of class \code{data.table} (inherits from \code{data.frame}) with 56156 rows and 14 columns.
 }
 \usage{
 genes
--- a/man/rank_genes.Rd
+++ b/man/rank_genes.Rd
@ -0,0 +1,35 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/ranking.R
 \name{rank_genes}
 \alias{rank_genes}
 \title{Rank genes based on how ubiquitous they are.}
 \usage{
 rank_genes(
  cross_sample_metric = "above_95",
  cross_sample_weight = 0.5,
  mean_expression_weight = 0.25,
  sd_expression_weight = -0.25
 )
 }
 \arguments{
 \item{cross_sample_metric}{Metric to use for calculating the number of
 samples a gene is expressed in. One of \code{above_95}, \code{above_median} or
 \code{above_zero}.}
 \item{cross_sample_weight}{Weighting of the cross sample metric within the
 final score.}
 \item{mean_expression_weight}{Weighting of the gene's mean expression within
 the final score.}
 \item{sd_expression_weight}{Weighting of the standard deviation of the
 gene's expression within the final score.}
 }
 \value{
 A \code{data.table} with gene data as well as the scores, ranks and
 percentiles for each gene.
 }
 \description{
 This function will compute a weighted average across multiple metrics that
 define how ubiquitous a gene is based on its expression across samples.
 }