diff --git a/NAMESPACE b/NAMESPACE index 20fc17a..7482eaf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(rank_genes) export(run_app) import(data.table) import(shiny) diff --git a/R/ranking.R b/R/ranking.R new file mode 100644 index 0000000..2309ef3 --- /dev/null +++ b/R/ranking.R @@ -0,0 +1,48 @@ +#' Rank genes based on how ubiquitous they are. +#' +#' This function will compute a weighted average across multiple metrics that +#' define how ubiquitous a gene is based on its expression across samples. +#' +#' @param cross_sample_metric Metric to use for calculating the number of +#' samples a gene is expressed in. One of `above_95`, `above_median` or +#' `above_zero`. +#' @param cross_sample_weight Weighting of the cross sample metric within the +#' final score. +#' @param mean_expression_weight Weighting of the gene's mean expression within +#' the final score. +#' @param sd_expression_weight Weighting of the standard deviation of the +#' gene's expression within the final score. +#' +#' @return A `data.table` with gene data as well as the scores, ranks and +#' percentiles for each gene. +#' +#' @export +rank_genes <- function(cross_sample_metric = "above_95", + cross_sample_weight = 0.5, + mean_expression_weight = 0.25, + sd_expression_weight = -0.25) { + total_weight <- cross_sample_weight + + mean_expression_weight + + sd_expression_weight + + data <- copy(ubigen::genes) + + data[, score := + (cross_sample_weight * get(cross_sample_metric) + + mean_expression_weight * mean_expression_normalized + + sd_expression_weight * sd_expression_normalized) / + total_weight] + + # Normalize scores to be between 0.0 and 1.0. + data[, score := (score - min(score, na.rm = TRUE)) / + (max(score, na.rm = TRUE) - min(score, na.rm = TRUE))] + + # These are genes that are not expressed at all. + data[is.na(score), score := 0.0] + + setorder(data, -score) + data[, rank := .I] + data[, percentile := 1 - rank / max(rank)] + + data +} diff --git a/R/server.R b/R/server.R index b91acfa..4896842 100644 --- a/R/server.R +++ b/R/server.R @@ -2,30 +2,12 @@ #' @noRd server <- function(input, output, session) { ranked_data <- reactive({ - total_weight <- abs(input$cross_sample_weight) + - abs(input$mean_expression) + - abs(input$sd_expression) - - data <- data.table::copy(ubigen::genes) - - data[, score := - (input$cross_sample_weight * get(input$cross_sample_metric) + - input$mean_expression * mean_expression_normalized + - input$sd_expression * sd_expression_normalized) / - total_weight] - - # Normalize scores to be between 0.0 and 1.0. - data[, score := (score - min(score, na.rm = TRUE)) / - (max(score, na.rm = TRUE) - min(score, na.rm = TRUE))] - - # These are genes that are not expressed at all. - data[is.na(score), score := 0.0] - - data.table::setorder(data, -score) - data[, rank := .I] - data[, percentile := 1 - rank / max(rank)] - - data + rank_genes( + cross_sample_metric = input$cross_sample_metric, + cross_sample_weight = input$cross_sample_weight, + mean_expression_weight = input$mean_expression, + sd_expression_weight = input$sd_expression + ) }) custom_genes <- gene_selector_server("custom_genes") |> debounce(500) diff --git a/data/genes.rda b/data/genes.rda index b5a4e16..5e2ad3a 100644 Binary files a/data/genes.rda and b/data/genes.rda differ diff --git a/man/genes.Rd b/man/genes.Rd index 06438be..e4a2a80 100644 --- a/man/genes.Rd +++ b/man/genes.Rd @@ -5,7 +5,7 @@ \alias{genes} \title{A \code{data.table} containig data on genes and their expression behavior.} \format{ -An object of class \code{data.table} (inherits from \code{data.frame}) with 56156 rows and 13 columns. +An object of class \code{data.table} (inherits from \code{data.frame}) with 56156 rows and 14 columns. } \usage{ genes diff --git a/man/rank_genes.Rd b/man/rank_genes.Rd new file mode 100644 index 0000000..f1065d6 --- /dev/null +++ b/man/rank_genes.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ranking.R +\name{rank_genes} +\alias{rank_genes} +\title{Rank genes based on how ubiquitous they are.} +\usage{ +rank_genes( + cross_sample_metric = "above_95", + cross_sample_weight = 0.5, + mean_expression_weight = 0.25, + sd_expression_weight = -0.25 +) +} +\arguments{ +\item{cross_sample_metric}{Metric to use for calculating the number of +samples a gene is expressed in. One of \code{above_95}, \code{above_median} or +\code{above_zero}.} + +\item{cross_sample_weight}{Weighting of the cross sample metric within the +final score.} + +\item{mean_expression_weight}{Weighting of the gene's mean expression within +the final score.} + +\item{sd_expression_weight}{Weighting of the standard deviation of the +gene's expression within the final score.} +} +\value{ +A \code{data.table} with gene data as well as the scores, ranks and +percentiles for each gene. +} +\description{ +This function will compute a weighted average across multiple metrics that +define how ubiquitous a gene is based on its expression across samples. +}