From 5a58f457a43508b262ed2a2d6110b479042ab6a4 Mon Sep 17 00:00:00 2001 From: Elias Projahn Date: Fri, 19 Nov 2021 15:07:15 +0100 Subject: [PATCH] analyze: Add optimization --- R/analyze.R | 22 ++++++++++++++++++---- R/preset.R | 21 ++++++++++++++++++--- R/ranking.R | 34 +++++++++++++--------------------- man/analyze.Rd | 5 ++--- man/preset.Rd | 13 ++++++++++++- 5 files changed, 63 insertions(+), 32 deletions(-) diff --git a/R/analyze.R b/R/analyze.R index ddc8249..54e4f46 100644 --- a/R/analyze.R +++ b/R/analyze.R @@ -9,9 +9,8 @@ #' items: #' \describe{ #' \item{`preset`}{The preset that was used.} -#' \item{`results`}{A [data.table] with one row for each gene identified by -#' it's ID (`gene` column). The additional columns contain the resulting -#' scores per method and are named after the method IDs.} +#' \item{`weights`}{The optimal weights for ranking the reference genes.} +#' \item{`ranking`}{The optimal ranking created using the weights.} #' } #' #' @export @@ -75,10 +74,25 @@ analyze <- function(preset, progress = NULL) { total_progress <- total_progress + 1 / method_count } + results <- structure( + results, + class = c("geposan_results", class(results)) + ) + + weights <- optimal_weights( + results, + preset$methods, + preset$reference_gene_ids, + target = preset$optimization_target + ) + + ranking <- ranking(results, weights) + structure( list( preset = preset, - results = results + weights = weights, + ranking = ranking ), class = "geposan_analysis" ) diff --git a/R/preset.R b/R/preset.R index 50ee098..9d3bb84 100644 --- a/R/preset.R +++ b/R/preset.R @@ -21,10 +21,18 @@ #' position data. #' - `proximity` Mean proximity to telomeres. #' +#' Available optimization targets are: +#' +#' - `mean` Mean rank of the reference genes. +#' - `max` First rank of the reference genes. +#' - `min` Last rank of the reference genes. +#' #' @param methods Methods to apply. #' @param species_ids IDs of species to include. #' @param gene_ids IDs of genes to screen. #' @param reference_gene_ids IDs of reference genes to compare to. +#' @param optimization_target Parameter of the reference genes that the ranking +#' should be optimized for. #' #' @return The preset to use with [analyze()]. #' @@ -40,7 +48,8 @@ preset <- function(methods = c( ), species_ids = NULL, gene_ids = NULL, - reference_gene_ids = NULL) { + reference_gene_ids = NULL, + optimization_target = "mean_rank") { # Count included species per gene. genes_n_species <- geposan::distances[ species %chin% species_ids, @@ -61,7 +70,8 @@ preset <- function(methods = c( methods = sort(methods), species_ids = sort(species_ids), gene_ids = sort(gene_ids_filtered), - reference_gene_ids = sort(reference_gene_ids) + reference_gene_ids = sort(reference_gene_ids), + optimization_target = optimization_target ), class = "geposan_preset" ) @@ -87,9 +97,14 @@ print.geposan_preset <- function(x, ...) { )) cat(sprintf( - "\n Comparison data: %i reference genes\n", + "\n Comparison data: %i reference genes", length(x$reference_gene_ids) )) + cat(sprintf( + "\n Optimization target: %s\n", + x$optimization_target + )) + invisible(x) } diff --git a/R/ranking.R b/R/ranking.R index 3930ca3..67d52d4 100644 --- a/R/ranking.R +++ b/R/ranking.R @@ -13,11 +13,14 @@ #' #' @export ranking <- function(analysis, weights) { - if (!"geposan_analysis" %chin% class(analysis)) { + if ("geposan_analysis" %chin% class(analysis)) { + ranking <- copy(analysis$ranking) + } else if ("geposan_results" %chin% class(analysis)) { + ranking <- copy(analysis) + } else { stop("Invalid analyis. Use geposan::analyze().") } - ranking <- copy(analysis$results) ranking[, score := 0.0] for (method in names(weights)) { @@ -36,7 +39,7 @@ ranking <- function(analysis, weights) { structure( ranking, - class = c("geposan_ranking", "geposan_analysis", class(ranking)) + class = c("geposan_ranking", "geposan_results", class(ranking)) ) } @@ -57,24 +60,13 @@ ranking <- function(analysis, weights) { #' @export optimal_weights <- function(analysis, methods, reference_gene_ids, target = "mean") { - if (!"geposan_analysis" %chin% class(analysis)) { + if (!any(c("geposan_analysis", "geposan_results") %chin% class(analysis))) { stop("Invalid analyis. Use geposan::analyze().") } - # Create the named list from the factors vector. - weights <- function(factors) { - result <- NULL - - mapply(function(method, factor) { - result[[method]] <<- factor - }, methods, factors) - - result - } - # Compute the target rank of the reference genes when applying the weights. target_rank <- function(factors) { - data <- ranking(analysis, weights(factors)) + data <- ranking(analysis, as.list(factors)) result <- data[gene %chin% reference_gene_ids, if (target == "min") { min(rank) @@ -91,10 +83,10 @@ optimal_weights <- function(analysis, methods, reference_gene_ids, } } - factors <- stats::optim( - rep(0.0, length(methods)), - target_rank - )$par + initial_factors <- rep(1.0, length(methods)) + names(initial_factors) <- methods - weights(factors / max(abs(factors))) + optimal_factors <- stats::optim(initial_factors, target_rank)$par + + as.list(optimal_factors / max(abs(optimal_factors))) } diff --git a/man/analyze.Rd b/man/analyze.Rd index b528f9f..699e063 100644 --- a/man/analyze.Rd +++ b/man/analyze.Rd @@ -18,9 +18,8 @@ An object containing the results of the analysis with the following items: \describe{ \item{\code{preset}}{The preset that was used.} -\item{\code{results}}{A \link{data.table} with one row for each gene identified by -it's ID (\code{gene} column). The additional columns contain the resulting -scores per method and are named after the method IDs.} +\item{\code{weights}}{The optimal weights for ranking the reference genes.} +\item{\code{ranking}}{The optimal ranking created using the weights.} } } \description{ diff --git a/man/preset.Rd b/man/preset.Rd index e6414c5..1334b50 100644 --- a/man/preset.Rd +++ b/man/preset.Rd @@ -9,7 +9,8 @@ preset( "correlation_positions", "neural", "neural_positions", "proximity"), species_ids = NULL, gene_ids = NULL, - reference_gene_ids = NULL + reference_gene_ids = NULL, + optimization_target = "mean_rank" ) } \arguments{ @@ -20,6 +21,9 @@ preset( \item{gene_ids}{IDs of genes to screen.} \item{reference_gene_ids}{IDs of reference genes to compare to.} + +\item{optimization_target}{Parameter of the reference genes that the ranking +should be optimized for.} } \value{ The preset to use with \code{\link[=analyze]{analyze()}}. @@ -47,4 +51,11 @@ telomere across species. position data. \item \code{proximity} Mean proximity to telomeres. } + +Available optimization targets are: +\itemize{ +\item \code{mean} Mean rank of the reference genes. +\item \code{max} First rank of the reference genes. +\item \code{min} Last rank of the reference genes. +} }