mirror of
https://github.com/johrpan/ubigen.git
synced 2025-10-26 11:47:24 +01:00
Use package's ranking function for included data
This commit is contained in:
parent
698ea5086a
commit
a6f0a64c2c
3 changed files with 18 additions and 16 deletions
10
R/ranking.R
10
R/ranking.R
|
|
@ -3,22 +3,26 @@
|
|||
#' This function will compute a weighted average across multiple metrics that
|
||||
#' define how ubiquitous a gene is based on its expression across samples.
|
||||
#'
|
||||
#' @param data The input data to use. This should either be the result of a
|
||||
#' previous call to this function or the return value of [analyze()].
|
||||
#'
|
||||
#' @return A `data.table` with gene data as well as the scores, ranks and
|
||||
#' percentiles for each gene.
|
||||
#'
|
||||
#' @export
|
||||
rank_genes <- function(cross_sample_metric = "above_95",
|
||||
rank_genes <- function(data = ubigen::genes,
|
||||
cross_sample_metric = "above_95",
|
||||
cross_sample_weight = 0.5,
|
||||
level_metric = "median_expression_normalized",
|
||||
level_weight = 0.25,
|
||||
variation_metric = "qcv_expression_normalized",
|
||||
variation_weight = -0.25) {
|
||||
data <- copy(data)
|
||||
|
||||
total_weight <- abs(cross_sample_weight) +
|
||||
abs(level_weight) +
|
||||
abs(variation_weight)
|
||||
|
||||
data <- copy(ubigen::genes)
|
||||
|
||||
data[, score :=
|
||||
(cross_sample_weight * get(cross_sample_metric) +
|
||||
level_weight * get(level_metric) +
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
\title{Rank genes based on how ubiquitous they are.}
|
||||
\usage{
|
||||
rank_genes(
|
||||
data = ubigen::genes,
|
||||
cross_sample_metric = "above_95",
|
||||
cross_sample_weight = 0.5,
|
||||
level_metric = "median_expression_normalized",
|
||||
|
|
@ -13,6 +14,10 @@ rank_genes(
|
|||
variation_weight = -0.25
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{The input data to use. This should either be the result of a
|
||||
previous call to this function or the return value of \code{\link[=analyze]{analyze()}}.}
|
||||
}
|
||||
\value{
|
||||
A \code{data.table} with gene data as well as the scores, ranks and
|
||||
percentiles for each gene.
|
||||
|
|
|
|||
|
|
@ -6,22 +6,14 @@ library(here)
|
|||
|
||||
i_am("scripts/input.R")
|
||||
|
||||
# To save memory, the data includes fake IDs for genes. The actual Ensembl IDs
|
||||
# are part of the separate genes table.
|
||||
|
||||
genes <- fread(here("scripts", "input", "genes.csv"))
|
||||
data <- fread(here("scripts", "output", "results.csv"))
|
||||
|
||||
data[, score := 0.5 * above_95 +
|
||||
0.25 * median_expression_normalized +
|
||||
-0.25 * qcv_expression_normalized]
|
||||
|
||||
# Normalize scores to be between 0.0 and 1.0.
|
||||
data[, score := (score - min(score, na.rm = TRUE)) /
|
||||
(max(score, na.rm = TRUE) - min(score, na.rm = TRUE))]
|
||||
|
||||
# These are genes that are not expressed at all or expressed just once, in case
|
||||
# the standard deviation is used in the score.
|
||||
data[is.na(score), score := 0.0]
|
||||
|
||||
setorder(data, -score)
|
||||
# Rank the data using default parameters.
|
||||
data <- ubigen::rank_genes(data = data)
|
||||
|
||||
# Reintroduce gene IDs and HGNC symbols.
|
||||
|
||||
|
|
@ -41,6 +33,7 @@ data[, id := NULL]
|
|||
# Remove duplicates. This will keep the best row for each duplicated gene.
|
||||
data <- unique(data, by = "gene")
|
||||
|
||||
# Reassign ranks, because duplicates may have been removed.
|
||||
data[, rank := .I]
|
||||
|
||||
fwrite(data, file = here("scripts", "output", "genes.csv"))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue