geposan/R/ranking.R

#' Rank the results by computing a score.
#'
#' This function takes the result of [analyze()] and creates a score by
#' computing a weighted mean across the different methods' results.
#'
#' @param analysis Analysis object resulting from [analyze()].
#' @param weights Named list pairing method names with weighting factors. Only
#'   methods that are contained within this list will be included.
#'
#' @returns A ranking object. The object extends the analysis with additional
#'   columns containing the `score` and the `rank` of each gene. It will be
#'   ordered by rank.
#'
#' @export
ranking <- function(analysis, weights) {
    if (!"geposan_analysis" %chin% class(analysis)) {
        stop("Invalid analyis. Use geposan::analyze().")
    }

    ranking <- copy(analysis)
    ranking[, score := 0.0]

    for (method in names(weights)) {
        weighted <- weights[[method]] * ranking[, ..method]
        ranking[, score := score + weighted]
    }

    # Normalize scores to be between 0.0 and 1.0.
    ranking[, score := score / sum(unlist(weights))]

    setorder(ranking, -score)
    ranking[, rank := .I]

    structure(
        ranking,
        class = c("geposan_ranking", "geposan_analysis", class(ranking))
    )
}

#' S3 method for plotting a ranking.
#'
#' @param gene_sets A list of gene sets (containing vectors of gene IDs) that
#'   will be highlighted in the plot.
#' @param labels Labels for the gene sets.
#'
#' @seealso ranking()
#'
#' @export
plot.geposan_ranking <- function(ranking, gene_sets = NULL, labels = NULL) {
    if (!requireNamespace("plotly", quietly = TRUE)) {
        stop("Please install \"plotly\" to use this function.")
    }

    plot <- plotly::plot_ly() |>
        plotly::add_trace(
            data = ranking,
            x = ~rank,
            y = ~score,
            color = "All genes",
            type = "scatter",
            mode = "markers",
            hoverinfo = "skip"
        ) |>
        plotly::layout(
            xaxis = list(title = "Rank"),
            yaxis = list(title = "Score")
        )

    if (length(gene_sets) > 0) {
        # Take out the genes to be highlighted.
        gene_set_data <- ranking[gene %chin% unlist(gene_sets)]

        # Add labels for each gene set.
        for (i in seq_along(gene_sets)) {
            gene_set_data[gene %chin% gene_sets[[i]], label := labels[i]]
        }

        # Include gene information which will be used for laebling
        gene_set_data <- merge(gene_set_data, genes, by.x = "gene", by.y = "id")

        plot <- plot |> plotly::add_trace(
            data = gene_set_data,
            x = ~rank,
            y = ~score,
            color = ~label,
            text = ~name,
            type = "scatter",
            mode = "markers",
            marker = list(size = 20)
        )
    }

    plot
}

#' Find the best weights to rank the results.
#'
#' This function finds the optimal parameters to [ranking()] that result in the
#' reference genes ranking particulary high.
#'
#' @param analysis Results from [analyze()] or [ranking()].
#' @param methods Methods to include in the score.
#' @param reference_gene_ids IDs of the reference genes.
#' @param target The optimization target. It may be one of "mean", "min" or
#'   "max" and results in the respective rank being optimized.
#'
#' @returns Named list pairing method names with their optimal weights. This
#'   can be used as an argument to [ranking()].
#'
#' @export
optimal_weights <- function(analysis, methods, reference_gene_ids,
                            target = "mean") {
    if (!"geposan_analysis" %chin% class(analysis)) {
        stop("Invalid analyis. Use geposan::analyze().")
    }

    # Create the named list from the factors vector.
    weights <- function(factors) {
        result <- NULL

        mapply(function(method, factor) {
            result[[method]] <<- factor
        }, methods, factors)

        result
    }

    # Compute the target rank of the reference genes when applying the weights.
    target_rank <- function(factors) {
        data <- ranking(analysis, weights(factors))

        data[gene %chin% reference_gene_ids, if (target == "min") {
            min(rank)
        } else if (target == "max") {
            max(rank)
        } else {
            mean(rank)
        }]
    }

    factors <- stats::optim(rep(1.0, length(methods)), target_rank)$par
    factors[factors < 0.0] <- 0.0
    total_weight <- sum(factors)

    weights(factors / total_weight)
}
Initial commit 2021-10-19 13:39:55 +02:00			`#' Rank the results by computing a score.`
			`#'`
ranking: Use S3 classes and rename optimize method 2021-11-05 14:47:33 +01:00			`#' This function takes the result of [analyze()] and creates a score by`
Initial commit 2021-10-19 13:39:55 +02:00			`#' computing a weighted mean across the different methods' results.`
			`#'`
ranking: Use S3 classes and rename optimize method 2021-11-05 14:47:33 +01:00			`#' @param analysis Analysis object resulting from [analyze()].`
			`#' @param weights Named list pairing method names with weighting factors. Only`
			`#' methods that are contained within this list will be included.`
Initial commit 2021-10-19 13:39:55 +02:00			`#'`
ranking: Use S3 classes and rename optimize method 2021-11-05 14:47:33 +01:00			`#' @returns A ranking object. The object extends the analysis with additional`
			#' columns containing the `score` and the `rank` of each gene. It will be
			`#' ordered by rank.`
Initial commit 2021-10-19 13:39:55 +02:00			`#'`
			`#' @export`
ranking: Use S3 classes and rename optimize method 2021-11-05 14:47:33 +01:00			`ranking <- function(analysis, weights) {`
			`if (!"geposan_analysis" %chin% class(analysis)) {`
			`stop("Invalid analyis. Use geposan::analyze().")`
			`}`

			`ranking <- copy(analysis)`
			`ranking[, score := 0.0]`
Initial commit 2021-10-19 13:39:55 +02:00
			`for (method in names(weights)) {`
ranking: Use S3 classes and rename optimize method 2021-11-05 14:47:33 +01:00			`weighted <- weights[[method]] * ranking[, ..method]`
			`ranking[, score := score + weighted]`
Initial commit 2021-10-19 13:39:55 +02:00			`}`

			`# Normalize scores to be between 0.0 and 1.0.`
ranking: Use S3 classes and rename optimize method 2021-11-05 14:47:33 +01:00			`ranking[, score := score / sum(unlist(weights))]`

			`setorder(ranking, -score)`
			`ranking[, rank := .I]`
Initial commit 2021-10-19 13:39:55 +02:00
ranking: Use S3 classes and rename optimize method 2021-11-05 14:47:33 +01:00			`structure(`
			`ranking,`
			`class = c("geposan_ranking", "geposan_analysis", class(ranking))`
			`)`
Initial commit 2021-10-19 13:39:55 +02:00			`}`

ranking: Add plot method 2021-11-05 23:05:40 +01:00			`#' S3 method for plotting a ranking.`
			`#'`
			`#' @param gene_sets A list of gene sets (containing vectors of gene IDs) that`
			`#' will be highlighted in the plot.`
			`#' @param labels Labels for the gene sets.`
			`#'`
			`#' @seealso ranking()`
			`#'`
			`#' @export`
			`plot.geposan_ranking <- function(ranking, gene_sets = NULL, labels = NULL) {`
			`if (!requireNamespace("plotly", quietly = TRUE)) {`
			`stop("Please install \"plotly\" to use this function.")`
			`}`

			`plot <- plotly::plot_ly() \|>`
			`plotly::add_trace(`
			`data = ranking,`
			`x = ~rank,`
			`y = ~score,`
			`color = "All genes",`
			`type = "scatter",`
			`mode = "markers",`
			`hoverinfo = "skip"`
			`) \|>`
			`plotly::layout(`
			`xaxis = list(title = "Rank"),`
			`yaxis = list(title = "Score")`
			`)`

			`if (length(gene_sets) > 0) {`
			`# Take out the genes to be highlighted.`
			`gene_set_data <- ranking[gene %chin% unlist(gene_sets)]`

			`# Add labels for each gene set.`
			`for (i in seq_along(gene_sets)) {`
			`gene_set_data[gene %chin% gene_sets[[i]], label := labels[i]]`
			`}`

			`# Include gene information which will be used for laebling`
			`gene_set_data <- merge(gene_set_data, genes, by.x = "gene", by.y = "id")`

			`plot <- plot \|> plotly::add_trace(`
			`data = gene_set_data,`
			`x = ~rank,`
			`y = ~score,`
			`color = ~label,`
			`text = ~name,`
			`type = "scatter",`
			`mode = "markers",`
			`marker = list(size = 20)`
			`)`
			`}`

			`plot`
			`}`

Initial commit 2021-10-19 13:39:55 +02:00			`#' Find the best weights to rank the results.`
			`#'`
			`#' This function finds the optimal parameters to [ranking()] that result in the`
			`#' reference genes ranking particulary high.`
			`#'`
ranking: Use S3 classes and rename optimize method 2021-11-05 14:47:33 +01:00			`#' @param analysis Results from [analyze()] or [ranking()].`
Initial commit 2021-10-19 13:39:55 +02:00			`#' @param methods Methods to include in the score.`
			`#' @param reference_gene_ids IDs of the reference genes.`
Add more optimization targets 2021-10-21 11:42:44 +02:00			`#' @param target The optimization target. It may be one of "mean", "min" or`
			`#' "max" and results in the respective rank being optimized.`
Initial commit 2021-10-19 13:39:55 +02:00			`#'`
ranking: Use S3 classes and rename optimize method 2021-11-05 14:47:33 +01:00			`#' @returns Named list pairing method names with their optimal weights. This`
			`#' can be used as an argument to [ranking()].`
Initial commit 2021-10-19 13:39:55 +02:00			`#'`
			`#' @export`
ranking: Use S3 classes and rename optimize method 2021-11-05 14:47:33 +01:00			`optimal_weights <- function(analysis, methods, reference_gene_ids,`
			`target = "mean") {`
			`if (!"geposan_analysis" %chin% class(analysis)) {`
			`stop("Invalid analyis. Use geposan::analyze().")`
			`}`

Initial commit 2021-10-19 13:39:55 +02:00			`# Create the named list from the factors vector.`
			`weights <- function(factors) {`
			`result <- NULL`

			`mapply(function(method, factor) {`
			`result[[method]] <<- factor`
			`}, methods, factors)`

			`result`
			`}`

Add more optimization targets 2021-10-21 11:42:44 +02:00			`# Compute the target rank of the reference genes when applying the weights.`
			`target_rank <- function(factors) {`
ranking: Use S3 classes and rename optimize method 2021-11-05 14:47:33 +01:00			`data <- ranking(analysis, weights(factors))`
Add more optimization targets 2021-10-21 11:42:44 +02:00
			`data[gene %chin% reference_gene_ids, if (target == "min") {`
			`min(rank)`
			`} else if (target == "max") {`
			`max(rank)`
			`} else {`
			`mean(rank)`
			`}]`
Initial commit 2021-10-19 13:39:55 +02:00			`}`

Add more optimization targets 2021-10-21 11:42:44 +02:00			`factors <- stats::optim(rep(1.0, length(methods)), target_rank)$par`
ranking: Clamp weights to 0.0 2021-10-21 16:21:55 +02:00			`factors[factors < 0.0] <- 0.0`
Initial commit 2021-10-19 13:39:55 +02:00			`total_weight <- sum(factors)`

			`weights(factors / total_weight)`
			`}`