mirror of
				https://github.com/johrpan/geposan.git
				synced 2025-10-26 10:47:25 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			105 lines
		
	
	
	
		
			3.5 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			105 lines
		
	
	
	
		
			3.5 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
| #' Rank the results by computing a score.
 | |
| #'
 | |
| #' This function takes the result of [analyze()] and creates a score by
 | |
| #' computing a weighted mean across the different methods' results.
 | |
| #'
 | |
| #' @param analysis Analysis object resulting from [analyze()].
 | |
| #' @param weights Named list pairing method names with weighting factors. Only
 | |
| #'   methods that are contained within this list will be included.
 | |
| #'
 | |
| #' @returns A ranking object. The object extends the analysis result with
 | |
| #'   additional columns containing the `score`, the `rank` and the `percentile`
 | |
| #'   for each gene. It will be ordered by rank.
 | |
| #'
 | |
| #' @export
 | |
| ranking <- function(analysis, weights) {
 | |
|     ranking <- if (inherits(analysis, "geposan_analysis")) {
 | |
|         copy(analysis$scores)
 | |
|     } else if (inherits(analysis, "geposan_ranking")) {
 | |
|         copy(analysis)
 | |
|     } else {
 | |
|         stop("Invalid analyis. Use geposan::analyze().")
 | |
|     }
 | |
| 
 | |
|     ranking[, score := 0.0]
 | |
| 
 | |
|     for (method in names(weights)) {
 | |
|         weighted <- weights[[method]] * ranking[, ..method]
 | |
|         ranking[, score := score + weighted]
 | |
|     }
 | |
| 
 | |
|     # Normalize scores to be between 0.0 and 1.0.
 | |
|     min_score <- ranking[, min(score)]
 | |
|     max_score <- ranking[, max(score)]
 | |
|     score_range <- max_score - min_score
 | |
|     ranking[, score := (score - min_score) / score_range]
 | |
| 
 | |
|     setorder(ranking, -score)
 | |
|     ranking[, rank := .I]
 | |
|     ranking[, percentile := 1 - rank / nrow(ranking)]
 | |
| 
 | |
|     structure(
 | |
|         ranking,
 | |
|         class = c("geposan_ranking", class(ranking))
 | |
|     )
 | |
| }
 | |
| 
 | |
| #' Find the best weights to rank the results.
 | |
| #'
 | |
| #' This function finds the optimal parameters to [ranking()] that result in the
 | |
| #' reference genes ranking particulary high.
 | |
| #'
 | |
| #' @param analysis Results from [analyze()] or [ranking()].
 | |
| #' @param methods Methods to include in the score.
 | |
| #' @param reference_gene_ids IDs of the reference genes.
 | |
| #' @param target The optimization target. It may be one of "mean", "median",
 | |
| #'   "min" or "max" and results in the respective rank being optimized.
 | |
| #'
 | |
| #' @returns Named list pairing method names with their optimal weights. This
 | |
| #'   can be used as an argument to [ranking()].
 | |
| #'
 | |
| #' @export
 | |
| optimal_weights <- function(analysis, methods, reference_gene_ids,
 | |
|                             target = "mean") {
 | |
|     if (!inherits(analysis, c("geposan_analysis", "geposan_ranking"))) {
 | |
|         stop("Invalid analyis. Use geposan::analyze().")
 | |
|     }
 | |
| 
 | |
|     cached(
 | |
|         "optimization",
 | |
|         c(analysis$preset, methods, reference_gene_ids, target),
 | |
|         { # nolint
 | |
|             # Compute the target rank of the reference genes when applying the
 | |
|             # weights.
 | |
|             target_rank <- function(factors) {
 | |
|                 data <- ranking(analysis, as.list(factors))
 | |
| 
 | |
|                 result <- data[
 | |
|                     gene %chin% reference_gene_ids,
 | |
|                     if (target == "min") {
 | |
|                         min(rank)
 | |
|                     } else if (target == "max") {
 | |
|                         max(rank)
 | |
|                     } else if (target == "mean") {
 | |
|                         mean(rank)
 | |
|                     } else {
 | |
|                         stats::median(rank)
 | |
|                     }
 | |
|                 ]
 | |
| 
 | |
|                 if (result > 0) {
 | |
|                     result
 | |
|                 } else {
 | |
|                     Inf
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             initial_factors <- rep(1.0, length(methods))
 | |
|             names(initial_factors) <- methods
 | |
| 
 | |
|             optimal_factors <- stats::optim(initial_factors, target_rank)$par
 | |
| 
 | |
|             as.list(optimal_factors / max(abs(optimal_factors)))
 | |
|         }
 | |
|     )
 | |
| }
 |