geposanui/optimize.R

#' Find the best weights to rank the data.
#'
#' This function ranks the provided data table based on a weighted score
#' computed from the specified `columns`. It tries to find the optimal weights
#' that result in a ranking, where the mean rank of the given reference genes
#' is as high as possible.
#'
#' @param data Input data including the columns.
#' @param colums Columns containing the separate scores between 0.0 and 1.0.
#' @param reference_gene_ids IDs of the reference genes within the input data.
#'
#' @returns Vector of optimal column weights adding up to 1.0.
optimize_weights <- function(data, columns, reference_gene_ids) {
    #' Compute the mean rank of the reference genes when applying the weights.
    mean_rank <- function(weights) {
        data <- copy(data)
        data[, score := 0.0]

        for (i in seq_along(columns)) {
            column <- columns[i]
            weighted <- weights[i] * data[, ..column]
            data[, score := score + weighted]
        }

        setorder(data, -score)
        data[, rank := .I]

        data[gene %chin% reference_gene_ids, mean(rank)]
    }

    weights <- optim(rep(1.0, length(columns)), mean_rank)$par
    total_weight <- sum(weights)
    weights / total_weight
}
Add optimizer and buttons to disable methods 2021-10-16 17:02:39 +02:00			`#' Find the best weights to rank the data.`
			`#'`
			`#' This function ranks the provided data table based on a weighted score`
			#' computed from the specified `columns`. It tries to find the optimal weights
			`#' that result in a ranking, where the mean rank of the given reference genes`
			`#' is as high as possible.`
			`#'`
			`#' @param data Input data including the columns.`
			`#' @param colums Columns containing the separate scores between 0.0 and 1.0.`
			`#' @param reference_gene_ids IDs of the reference genes within the input data.`
			`#'`
			`#' @returns Vector of optimal column weights adding up to 1.0.`
			`optimize_weights <- function(data, columns, reference_gene_ids) {`
			`#' Compute the mean rank of the reference genes when applying the weights.`
			`mean_rank <- function(weights) {`
			`data <- copy(data)`
			`data[, score := 0.0]`

			`for (i in seq_along(columns)) {`
			`column <- columns[i]`
			`weighted <- weights[i] * data[, ..column]`
			`data[, score := score + weighted]`
			`}`

			`setorder(data, -score)`
			`data[, rank := .I]`

			`data[gene %chin% reference_gene_ids, mean(rank)]`
			`}`

			`weights <- optim(rep(1.0, length(columns)), mean_rank)$par`
			`total_weight <- sum(weights)`
			`weights / total_weight`
			`}`