geposan/R/comparison.R

92 lines
2.7 KiB
R

#' Compare a set of genes with the ranking.
#'
#' @param ranking A ranking created using [ranking()].
#' @param comparison_gene_ids IDs of the genes of interest.
#'
#' @returns A comparison object with the following items:
#' \describe{
#' \item{`quantiles`}{A `data.table` containing quantile values for the
#' score, rank and percentile of the comparison genes.
#' }
#' \item{`mean_score`}{The mean score of the genes.}
#' \item{`mean_rank`}{The mean rank of the genes.}
#' \item{`mean_percentile`}{The mean percentile of the genes.}
#' \item{`p_value`}{p-value for the null hypothesis that the comparison
#' genes do _not_ rank better than other genes. In other words: A low
#' p-value means that the comparison genes rank particularly high.}
#' }
#'
#' @export
compare <- function(ranking, comparison_gene_ids) {
if (!inherits(ranking, "geposan_ranking")) {
stop("Invalid ranking. Use geposan::ranking().")
}
comparison_ranking <- ranking[gene %chin% comparison_gene_ids]
quantiles <- data.table(
quantile = c("0%", "25%", "50%", "75%", "100%"),
score = stats::quantile(comparison_ranking[, score]),
rank = stats::quantile(
comparison_ranking[, rank],
probs = seq(1, 0, -0.25)
),
percentile = stats::quantile(comparison_ranking[, percentile])
)
p_value <- stats::wilcox.test(
x = comparison_ranking[, score],
y = ranking[!gene %chin% comparison_gene_ids, score],
alternative = "greater"
)$p.value
structure(
list(
quantiles = quantiles,
mean_score = comparison_ranking[, mean(score)],
mean_rank = comparison_ranking[, mean(rank)],
mean_percentile = comparison_ranking[, mean(percentile)],
p_value = p_value
),
class = "geposan_comparison"
)
}
#' S3 method to print a comparison object.
#'
#' @param x The comparison to print.
#' @param ... Other parameters.
#'
#' @seealso [compare()]
#'
#' @export
print.geposan_comparison <- function(x, ...) {
cat("geposan comparison:\n\n")
quantiles_formatted <- x$quantiles[, .(
"Quantile" = quantile,
"Score" = round(score, 3),
"Rank" = rank,
"Percentile" = paste0(
format(round(percentile * 100, 1), nsmall = 1),
"%"
)
)]
print(quantiles_formatted, row.names = FALSE)
cat(sprintf(
paste0(
"\n Mean score: %.3f",
"\n Mean rank: %.1f",
"\n Mean percentile: %.1f%%",
"\n p-value for better scores: %.4f\n"
),
x$mean_score,
x$mean_rank,
x$mean_percentile * 100,
x$p_value
))
invisible(x)
}