comparison: Add more useful information

This commit is contained in:
Elias Projahn 2022-01-26 10:56:35 +01:00
parent c633b17db6
commit 016a9ada9d
2 changed files with 44 additions and 30 deletions

View file

@ -5,12 +5,12 @@
#' #'
#' @returns A comparison object with the following items: #' @returns A comparison object with the following items:
#' \describe{ #' \describe{
#' \item{`mean`}{The mean score of the genes.} #' \item{`quantiles`}{A `data.table` containing quantile values for the
#' \item{`min`}{The lowest score of the genes.} #' score, rank and percentile of the comparison genes.
#' \item{`max`}{The highest score of the genes.} #' }
#' \item{`mean_score`}{The mean score of the genes.}
#' \item{`mean_rank`}{The mean rank of the genes.} #' \item{`mean_rank`}{The mean rank of the genes.}
#' \item{`first_rank`}{The first rank of the genes.} #' \item{`mean_percentile`}{The mean percentile of the genes.}
#' \item{`last_rank`}{The last rank of the genes.}
#' \item{`p_value`}{p-value for the null hypothesis that the comparison #' \item{`p_value`}{p-value for the null hypothesis that the comparison
#' genes do _not_ rank better than other genes. In other words: A low #' genes do _not_ rank better than other genes. In other words: A low
#' p-value means that the comparison genes rank particularly high.} #' p-value means that the comparison genes rank particularly high.}
@ -24,6 +24,16 @@ compare <- function(ranking, comparison_gene_ids) {
comparison_ranking <- ranking[gene %chin% comparison_gene_ids] comparison_ranking <- ranking[gene %chin% comparison_gene_ids]
quantiles <- data.table(
quantile = c("0%", "25%", "50%", "75%", "100%"),
score = stats::quantile(comparison_ranking[, score]),
rank = stats::quantile(
comparison_ranking[, rank],
probs = seq(1, 0, -0.25)
),
percentile = stats::quantile(comparison_ranking[, percentile])
)
p_value <- stats::wilcox.test( p_value <- stats::wilcox.test(
x = comparison_ranking[, score], x = comparison_ranking[, score],
y = ranking[!gene %chin% comparison_gene_ids, score], y = ranking[!gene %chin% comparison_gene_ids, score],
@ -32,12 +42,10 @@ compare <- function(ranking, comparison_gene_ids) {
structure( structure(
list( list(
mean = comparison_ranking[, mean(score)], quantiles = quantiles,
min = comparison_ranking[, min(score)], mean_score = comparison_ranking[, mean(score)],
max = comparison_ranking[, max(score)],
mean_rank = comparison_ranking[, mean(rank)], mean_rank = comparison_ranking[, mean(rank)],
first_rank = comparison_ranking[, min(rank)], mean_percentile = comparison_ranking[, mean(percentile)],
last_rank = comparison_ranking[, max(rank)],
p_value = p_value p_value = p_value
), ),
class = "geposan_comparison" class = "geposan_comparison"
@ -53,24 +61,30 @@ compare <- function(ranking, comparison_gene_ids) {
#' #'
#' @export #' @export
print.geposan_comparison <- function(x, ...) { print.geposan_comparison <- function(x, ...) {
cat("geposan comparison:") cat("geposan comparison:\n\n")
quantiles_formatted <- x$quantiles[, .(
"Quantile" = quantile,
"Score" = round(score, 3),
"Rank" = rank,
"Percentile" = paste0(
format(round(percentile * 100, 1), nsmall = 1),
"%"
)
)]
print(quantiles_formatted, row.names = FALSE)
cat(sprintf( cat(sprintf(
paste( paste0(
"\n\n Mean score: %.3f", "\n Mean score: %.3f",
"\n Min score: %.3f", "\n Mean rank: %.1f",
"\n Max score: %.3f", "\n Mean percentile: %.1f%%",
"\n\n Mean rank: %.1f", "\n p-value for better scores: %.4f\n"
"\n First rank: %i",
"\n Last rank: %i",
"\n\n p-value for better ranking: %.4f\n",
sep = ""
), ),
x$mean, x$mean_score,
x$min,
x$max,
x$mean_rank, x$mean_rank,
x$first_rank, x$mean_percentile * 100,
x$last_rank,
x$p_value x$p_value
)) ))

View file

@ -14,12 +14,12 @@ compare(ranking, comparison_gene_ids)
\value{ \value{
A comparison object with the following items: A comparison object with the following items:
\describe{ \describe{
\item{\code{mean}}{The mean score of the genes.} \item{\code{quantiles}}{A \code{data.table} containing quantile values for the
\item{\code{min}}{The lowest score of the genes.} score, rank and percentile of the comparison genes.
\item{\code{max}}{The highest score of the genes.} }
\item{\code{mean_score}}{The mean score of the genes.}
\item{\code{mean_rank}}{The mean rank of the genes.} \item{\code{mean_rank}}{The mean rank of the genes.}
\item{\code{first_rank}}{The first rank of the genes.} \item{\code{mean_percentile}}{The mean percentile of the genes.}
\item{\code{last_rank}}{The last rank of the genes.}
\item{\code{p_value}}{p-value for the null hypothesis that the comparison \item{\code{p_value}}{p-value for the null hypothesis that the comparison
genes do \emph{not} rank better than other genes. In other words: A low genes do \emph{not} rank better than other genes. In other words: A low
p-value means that the comparison genes rank particularly high.} p-value means that the comparison genes rank particularly high.}