mirror of
https://github.com/johrpan/geposanui.git
synced 2025-10-26 19:27:24 +01:00
34 lines
1.2 KiB
R
34 lines
1.2 KiB
R
|
|
#' Find the best weights to rank the data.
|
||
|
|
#'
|
||
|
|
#' This function ranks the provided data table based on a weighted score
|
||
|
|
#' computed from the specified `columns`. It tries to find the optimal weights
|
||
|
|
#' that result in a ranking, where the mean rank of the given reference genes
|
||
|
|
#' is as high as possible.
|
||
|
|
#'
|
||
|
|
#' @param data Input data including the columns.
|
||
|
|
#' @param colums Columns containing the separate scores between 0.0 and 1.0.
|
||
|
|
#' @param reference_gene_ids IDs of the reference genes within the input data.
|
||
|
|
#'
|
||
|
|
#' @returns Vector of optimal column weights adding up to 1.0.
|
||
|
|
optimize_weights <- function(data, columns, reference_gene_ids) {
|
||
|
|
#' Compute the mean rank of the reference genes when applying the weights.
|
||
|
|
mean_rank <- function(weights) {
|
||
|
|
data <- copy(data)
|
||
|
|
data[, score := 0.0]
|
||
|
|
|
||
|
|
for (i in seq_along(columns)) {
|
||
|
|
column <- columns[i]
|
||
|
|
weighted <- weights[i] * data[, ..column]
|
||
|
|
data[, score := score + weighted]
|
||
|
|
}
|
||
|
|
|
||
|
|
setorder(data, -score)
|
||
|
|
data[, rank := .I]
|
||
|
|
|
||
|
|
data[gene %chin% reference_gene_ids, mean(rank)]
|
||
|
|
}
|
||
|
|
|
||
|
|
weights <- optim(rep(1.0, length(columns)), mean_rank)$par
|
||
|
|
total_weight <- sum(weights)
|
||
|
|
weights / total_weight
|
||
|
|
}
|