mirror of
				https://github.com/johrpan/geposan.git
				synced 2025-10-26 02:37:25 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			90 lines
		
	
	
	
		
			2.8 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			90 lines
		
	
	
	
		
			2.8 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
| #' Rank the results by computing a score.
 | |
| #'
 | |
| #' This function takes the result of [analyze()] and creates a score by
 | |
| #' computing a weighted mean across the different methods' results.
 | |
| #'
 | |
| #' @param analysis Analysis object resulting from [analyze()].
 | |
| #' @param weights Named list pairing method names with weighting factors. Only
 | |
| #'   methods that are contained within this list will be included.
 | |
| #'
 | |
| #' @returns A ranking object. The object extends the analysis with additional
 | |
| #'   columns containing the `score` and the `rank` of each gene. It will be
 | |
| #'   ordered by rank.
 | |
| #'
 | |
| #' @export
 | |
| ranking <- function(analysis, weights) {
 | |
|     if (!"geposan_analysis" %chin% class(analysis)) {
 | |
|         stop("Invalid analyis. Use geposan::analyze().")
 | |
|     }
 | |
| 
 | |
|     ranking <- copy(analysis)
 | |
|     ranking[, score := 0.0]
 | |
| 
 | |
|     for (method in names(weights)) {
 | |
|         weighted <- weights[[method]] * ranking[, ..method]
 | |
|         ranking[, score := score + weighted]
 | |
|     }
 | |
| 
 | |
|     # Normalize scores to be between 0.0 and 1.0.
 | |
|     ranking[, score := score / sum(unlist(weights))]
 | |
| 
 | |
|     setorder(ranking, -score)
 | |
|     ranking[, rank := .I]
 | |
| 
 | |
|     structure(
 | |
|         ranking,
 | |
|         class = c("geposan_ranking", "geposan_analysis", class(ranking))
 | |
|     )
 | |
| }
 | |
| 
 | |
| #' Find the best weights to rank the results.
 | |
| #'
 | |
| #' This function finds the optimal parameters to [ranking()] that result in the
 | |
| #' reference genes ranking particulary high.
 | |
| #'
 | |
| #' @param analysis Results from [analyze()] or [ranking()].
 | |
| #' @param methods Methods to include in the score.
 | |
| #' @param reference_gene_ids IDs of the reference genes.
 | |
| #' @param target The optimization target. It may be one of "mean", "min" or
 | |
| #'   "max" and results in the respective rank being optimized.
 | |
| #'
 | |
| #' @returns Named list pairing method names with their optimal weights. This
 | |
| #'   can be used as an argument to [ranking()].
 | |
| #'
 | |
| #' @export
 | |
| optimal_weights <- function(analysis, methods, reference_gene_ids,
 | |
|                             target = "mean") {
 | |
|     if (!"geposan_analysis" %chin% class(analysis)) {
 | |
|         stop("Invalid analyis. Use geposan::analyze().")
 | |
|     }
 | |
| 
 | |
|     # Create the named list from the factors vector.
 | |
|     weights <- function(factors) {
 | |
|         result <- NULL
 | |
| 
 | |
|         mapply(function(method, factor) {
 | |
|             result[[method]] <<- factor
 | |
|         }, methods, factors)
 | |
| 
 | |
|         result
 | |
|     }
 | |
| 
 | |
|     # Compute the target rank of the reference genes when applying the weights.
 | |
|     target_rank <- function(factors) {
 | |
|         data <- ranking(analysis, weights(factors))
 | |
| 
 | |
|         data[gene %chin% reference_gene_ids, if (target == "min") {
 | |
|             min(rank)
 | |
|         } else if (target == "max") {
 | |
|             max(rank)
 | |
|         } else {
 | |
|             mean(rank)
 | |
|         }]
 | |
|     }
 | |
| 
 | |
|     factors <- stats::optim(rep(1.0, length(methods)), target_rank)$par
 | |
|     factors[factors < 0.0] <- 0.0
 | |
|     total_weight <- sum(factors)
 | |
| 
 | |
|     weights(factors / total_weight)
 | |
| }
 |