validate: Update and extend

This commit is contained in:
Elias Projahn 2022-01-26 11:38:39 +01:00
parent 016a9ada9d
commit 3cedc4fea4
2 changed files with 106 additions and 80 deletions

View file

@ -1,80 +1,94 @@
#' Perform cross-validation for the analysis. #' Perform cross-validation for the ranking.
#' #'
#' This function reoptimizes the analysis leaving out one of the original #' This function reoptimizes the ranking leaving out one of the original
#' reference genes at a time. #' reference genes at a time.
#' #'
#' @param analysis The analysis to validate. #' @param ranking The ranking to validate.
#' @param reference_gene_ids The reference gene IDs whose ranking should be
#' validated.
#' @param method_ids IDs of the methods that were used.
#' @param progress An optional progress function that should accept a single #' @param progress An optional progress function that should accept a single
#' value between 0.0 and 1.0 for progress information. #' value between 0.0 and 1.0 for progress information.
#' #'
#' @returns An object containing the mean absolute error and the mean percent #' @returns A validation object with the following items:
#' rank for the original analysis as well as the validation. #' \describe{
#' \item{`validation`}{A `data.table` containing percentiles of the
#' comparison genes from the original ranking as well as their validation.
#' }
#' \item{`mean_score`}{The mean score of the genes.}
#' \item{`mean_percentile_original`}{The mean percentile of the genes in
#' the original ranking.
#' }
#' \item{`mean_percentile_validation`}{The mean percentile of the genes
#' when optimizing without themselves.
#' }
#' \item{`mean_error`}{The mean absolute error.}
#' }
#' #'
#' @export #' @export
validate <- function(analysis, progress = NULL) { validate <- function(ranking, reference_gene_ids, method_ids, progress = NULL) {
if (inherits(analysis, "geposan_analysis")) { if (!inherits(ranking, "geposan_ranking")) {
stop("Analysis is invalid. Use geposan::analyze().") stop("Ranking is invalid. Use geposan::ranking().")
} }
cached("validation", analysis$preset, { if (is.null(progress)) {
reference_gene_ids <- analysis$preset$reference_gene_ids progress_bar <- progress::progress_bar$new()
n_references <- length(reference_gene_ids) progress_bar$update(0.0)
methods <- analysis$preset$methods
ranking_reference <- analysis$ranking
n_ranks <- nrow(ranking_reference)
mean_error_reference <- mean( progress <- function(progress_value) {
1.0 - ranking_reference[gene %chin% reference_gene_ids, score] if (!progress_bar$finished) {
) progress_bar$update(progress_value)
if (progress_value >= 1.0) {
mean_rank_reference <- mean( progress_bar$terminate()
1.0 - ranking_reference[gene %chin% reference_gene_ids, rank] / }
n_ranks
)
mean_error_validation <- 0.0
mean_rank_validation <- 0.0
progress_state <- 0.0
progress_step <- 1.0 / n_references
for (validation_gene_id in reference_gene_ids) {
included_gene_ids <- reference_gene_ids[
reference_gene_ids != validation_gene_id
]
weights <- optimal_weights(
analysis,
methods,
included_gene_ids
)
ranking_validation <- ranking(analysis, weights)
mean_error_validation <- mean_error_validation +
(1.0 - ranking_validation[gene == validation_gene_id, score]) /
n_references
mean_rank_validation <- mean_rank_validation +
(1.0 - ranking_validation[gene == validation_gene_id, rank] /
n_ranks) / n_references
if (!is.null(progress)) {
progress_state <- progress_state + progress_step
progress(progress_state)
} }
} }
}
structure( progress_state <- 0.0
list( progress_step <- 1.0 / length(reference_gene_ids)
mean_error_reference = mean_error_reference,
mean_error_validation = mean_error_validation, results <- ranking[gene %chin% reference_gene_ids, .(gene, percentile)]
mean_rank_reference = mean_rank_reference,
mean_rank_validation = mean_rank_validation for (gene_id in reference_gene_ids) {
), included_gene_ids <- reference_gene_ids[
class = "geposan_validation" reference_gene_ids != gene_id
]
weights <- optimal_weights(
ranking,
method_ids,
included_gene_ids
) )
})
ranking_validation <- ranking(ranking, weights)
results[
gene == gene_id,
percentile_validation := ranking_validation[
gene == gene_id,
percentile
]
]
if (!is.null(progress)) {
progress_state <- progress_state + progress_step
progress(progress_state)
}
}
results[, error := percentile - percentile_validation]
setorder(results, error)
structure(
list(
validation = results,
mean_percentile_original = results[, mean(percentile)],
mean_percentile_validation = results[, mean(percentile_validation)],
mean_error = results[, mean(error)]
),
class = "geposan_validation"
)
} }
#' S3 method to print a validation object. #' S3 method to print a validation object.
@ -86,22 +100,17 @@ validate <- function(analysis, progress = NULL) {
#' #'
#' @export #' @export
print.geposan_validation <- function(x, ...) { print.geposan_validation <- function(x, ...) {
cat("geposan validation:\n")
cat(sprintf( cat(sprintf(
paste0( paste0(
"\n Absolute scores:", "geposan validation:",
"\n Mean error reference: %.3f", "\n Mean percentile original: %.1f%%",
"\n Mean error validation: %.3f", "\n Mean percentile validation: %.1f%%",
"\n", "\n Mean error: %.1f percent points",
"\n Ranks:",
"\n Mean rank reference: %.1f%%",
"\n Mean rank validation: %.1f%%",
"\n" "\n"
), ),
x$mean_error_reference, x$mean_percentile_original * 100,
x$mean_error_validation, x$mean_percentile_validation * 100,
x$mean_rank_reference * 100, x$mean_error * 100
x$mean_rank_validation * 100
)) ))
invisible(x) invisible(x)

View file

@ -2,21 +2,38 @@
% Please edit documentation in R/validate.R % Please edit documentation in R/validate.R
\name{validate} \name{validate}
\alias{validate} \alias{validate}
\title{Perform cross-validation for the analysis.} \title{Perform cross-validation for the ranking.}
\usage{ \usage{
validate(analysis, progress = NULL) validate(ranking, reference_gene_ids, method_ids, progress = NULL)
} }
\arguments{ \arguments{
\item{analysis}{The analysis to validate.} \item{ranking}{The ranking to validate.}
\item{reference_gene_ids}{The reference gene IDs whose ranking should be
validated.}
\item{method_ids}{IDs of the methods that were used.}
\item{progress}{An optional progress function that should accept a single \item{progress}{An optional progress function that should accept a single
value between 0.0 and 1.0 for progress information.} value between 0.0 and 1.0 for progress information.}
} }
\value{ \value{
An object containing the mean absolute error and the mean percent A validation object with the following items:
rank for the original analysis as well as the validation. \describe{
\item{\code{validation}}{A \code{data.table} containing percentiles of the
comparison genes from the original ranking as well as their validation.
}
\item{\code{mean_score}}{The mean score of the genes.}
\item{\code{mean_percentile_original}}{The mean percentile of the genes in
the original ranking.
}
\item{\code{mean_percentile_validation}}{The mean percentile of the genes
when optimizing without themselves.
}
\item{\code{mean_error}}{The mean absolute error.}
}
} }
\description{ \description{
This function reoptimizes the analysis leaving out one of the original This function reoptimizes the ranking leaving out one of the original
reference genes at a time. reference genes at a time.
} }