Update gene data based on the suggested ranking

This also exports the ranking function itself for external use.
This commit is contained in:
Elias Projahn 2022-06-15 10:24:10 +02:00
parent e290aba9ab
commit 8a96a6eca9
6 changed files with 91 additions and 25 deletions

48
R/ranking.R Normal file
View file

@ -0,0 +1,48 @@
#' Rank genes based on how ubiquitous they are.
#'
#' This function will compute a weighted average across multiple metrics that
#' define how ubiquitous a gene is based on its expression across samples.
#'
#' @param cross_sample_metric Metric to use for calculating the number of
#' samples a gene is expressed in. One of `above_95`, `above_median` or
#' `above_zero`.
#' @param cross_sample_weight Weighting of the cross sample metric within the
#' final score.
#' @param mean_expression_weight Weighting of the gene's mean expression within
#' the final score.
#' @param sd_expression_weight Weighting of the standard deviation of the
#' gene's expression within the final score.
#'
#' @return A `data.table` with gene data as well as the scores, ranks and
#' percentiles for each gene.
#'
#' @export
rank_genes <- function(cross_sample_metric = "above_95",
cross_sample_weight = 0.5,
mean_expression_weight = 0.25,
sd_expression_weight = -0.25) {
total_weight <- cross_sample_weight +
mean_expression_weight +
sd_expression_weight
data <- copy(ubigen::genes)
data[, score :=
(cross_sample_weight * get(cross_sample_metric) +
mean_expression_weight * mean_expression_normalized +
sd_expression_weight * sd_expression_normalized) /
total_weight]
# Normalize scores to be between 0.0 and 1.0.
data[, score := (score - min(score, na.rm = TRUE)) /
(max(score, na.rm = TRUE) - min(score, na.rm = TRUE))]
# These are genes that are not expressed at all.
data[is.na(score), score := 0.0]
setorder(data, -score)
data[, rank := .I]
data[, percentile := 1 - rank / max(rank)]
data
}

View file

@ -2,30 +2,12 @@
#' @noRd
server <- function(input, output, session) {
ranked_data <- reactive({
total_weight <- abs(input$cross_sample_weight) +
abs(input$mean_expression) +
abs(input$sd_expression)
data <- data.table::copy(ubigen::genes)
data[, score :=
(input$cross_sample_weight * get(input$cross_sample_metric) +
input$mean_expression * mean_expression_normalized +
input$sd_expression * sd_expression_normalized) /
total_weight]
# Normalize scores to be between 0.0 and 1.0.
data[, score := (score - min(score, na.rm = TRUE)) /
(max(score, na.rm = TRUE) - min(score, na.rm = TRUE))]
# These are genes that are not expressed at all.
data[is.na(score), score := 0.0]
data.table::setorder(data, -score)
data[, rank := .I]
data[, percentile := 1 - rank / max(rank)]
data
rank_genes(
cross_sample_metric = input$cross_sample_metric,
cross_sample_weight = input$cross_sample_weight,
mean_expression_weight = input$mean_expression,
sd_expression_weight = input$sd_expression
)
})
custom_genes <- gene_selector_server("custom_genes") |> debounce(500)