compare: Include confidence intervals

2025-10-26 10:47:25 +01:00 · 2022-06-03 17:41:45 +02:00 · 2022-06-03 17:41:45 +02:00 · 3df4ec5d89
commit 3df4ec5d89
parent 3217c9bd29
3 changed files with 27 additions and 21 deletions
--- a/R/comparison.R
+++ b/R/comparison.R
@ -11,9 +11,7 @@
 #'     \item{`mean_score`}{The mean score of the genes.}
 #'     \item{`mean_rank`}{The mean rank of the genes.}
 #'     \item{`mean_percentile`}{The mean percentile of the genes.}
-#'     \item{`p_value`}{p-value for the null hypothesis that the comparison
+#'     \item{`test_result`}{Results of applying a Wilcoxon rank sum test.}
 #'       genes do _not_ rank better than other genes. In other words: A low
 #'       p-value means that the comparison genes rank particularly high.}
 #'   }
 #'
 #' @export
@ -34,11 +32,11 @@ compare <- function(ranking, comparison_gene_ids) {
    percentile = stats::quantile(comparison_ranking[, percentile])
  )
-  p_value <- stats::wilcox.test(
+  test <- stats::wilcox.test(
    x = comparison_ranking[, score],
    y = ranking[!gene %chin% comparison_gene_ids, score],
-    alternative = "greater"
+    conf.int = TRUE
-  )$p.value
+  )
  structure(
    list(
@ -46,7 +44,7 @@ compare <- function(ranking, comparison_gene_ids) {
      mean_score = comparison_ranking[, mean(score)],
      mean_rank = comparison_ranking[, mean(rank)],
      mean_percentile = comparison_ranking[, mean(percentile)],
-      p_value = p_value
+      test_result = test
    ),
    class = "geposan_comparison"
  )
@ -75,17 +73,16 @@ print.geposan_comparison <- function(x, ...) {
  print(quantiles_formatted, row.names = FALSE)
-  cat(sprintf(
+  cat(glue::glue(
-    paste0(
+    "\n",
-      "\n  Mean score: %.3f",
+    "\n Mean score: {num(x$mean_score, 3)}",
-      "\n  Mean rank: %.1f",
+    "\n Mean rank: {num(x$mean_rank, 1)}",
-      "\n  Mean percentile: %.1f%%",
+    "\n Mean percentile: {num(x$mean_percentile * 100, 2)}",
-      "\n  p-value for better scores: %.4f\n"
+    "\n",
-    ),
+    "\n Estimated difference in medians: ",
-    x$mean_score,
+    "{num(x$test$conf.int[1], 2)} to {num(x$test$conf.int[2], 2)}",
-    x$mean_rank,
+    "\n Confidence level: 95%",
-    x$mean_percentile * 100,
+    "\n p-value: {num(x$test$p.value, 4)}"
    x$p_value
  ))
  invisible(x)
--- a/R/utils.R
+++ b/R/utils.R
@ -29,6 +29,17 @@ cached <- function(name, objects, expr) {
  data
 }
 #' Format and round a numeric value.
 #'
 #' @param number The number to use.
 #' @param digits Number of decimal places.
 #'
 #' @return A character value.
 #' @noRd
 num <- function(number, digits) {
  format(round(number, digits = digits), nsmall = digits)
 }
 # This is needed to make data.table's symbols available within the package.
 #' @import data.table
 NULL
--- a/man/compare.Rd
+++ b/man/compare.Rd
@ -20,9 +20,7 @@ score, rank and percentile of the comparison genes.
 \item{\code{mean_score}}{The mean score of the genes.}
 \item{\code{mean_rank}}{The mean rank of the genes.}
 \item{\code{mean_percentile}}{The mean percentile of the genes.}
-\item{\code{p_value}}{p-value for the null hypothesis that the comparison
+\item{\code{test_result}}{Results of applying a Wilcoxon rank sum test.}
 genes do \emph{not} rank better than other genes. In other words: A low
 p-value means that the comparison genes rank particularly high.}
 }
 }
 \description{