geposan/R/validate.R

#' Perform cross-validation for the ranking.
#'
#' This function reoptimizes the ranking leaving out one of the original
#' reference genes at a time.
#'
#' @param ranking The ranking to validate.
#' @param reference_gene_ids The reference gene IDs whose ranking should be
#'   validated.
#' @param method_ids IDs of the methods that were used.
#' @param progress An optional progress function that should accept a single
#'   value between 0.0 and 1.0 for progress information.
#'
#' @returns A validation object with the following items:
#'   \describe{
#'     \item{`validation`}{A `data.table` containing percentiles of the
#'       comparison genes from the original ranking as well as their validation.
#'     }
#'     \item{`mean_score`}{The mean score of the genes.}
#'     \item{`mean_percentile_original`}{The mean percentile of the genes in
#'       the original ranking.
#'     }
#'     \item{`mean_percentile_validation`}{The mean percentile of the genes
#'       when optimizing without themselves.
#'     }
#'     \item{`mean_error`}{The mean absolute error.}
#'   }
#'
#' @export
validate <- function(ranking, reference_gene_ids, method_ids, progress = NULL) {
    if (!inherits(ranking, "geposan_ranking")) {
        stop("Ranking is invalid. Use geposan::ranking().")
    }

    if (is.null(progress)) {
        progress_bar <- progress::progress_bar$new()
        progress_bar$update(0.0)

        progress <- function(progress_value) {
            if (!progress_bar$finished) {
                progress_bar$update(progress_value)
                if (progress_value >= 1.0) {
                    progress_bar$terminate()
                }
            }
        }
    }

    progress_state <- 0.0
    progress_step <- 1.0 / length(reference_gene_ids)

    results <- ranking[gene %chin% reference_gene_ids, .(gene, percentile)]

    for (gene_id in reference_gene_ids) {
        included_gene_ids <- reference_gene_ids[
            reference_gene_ids != gene_id
        ]

        weights <- optimal_weights(
            ranking,
            method_ids,
            included_gene_ids
        )

        ranking_validation <- ranking(ranking, weights)

        results[
            gene == gene_id,
            percentile_validation := ranking_validation[
                gene == gene_id,
                percentile
            ]
        ]

        if (!is.null(progress)) {
            progress_state <- progress_state + progress_step
            progress(progress_state)
        }
    }

    results[, error := percentile - percentile_validation]
    setorder(results, error)

    structure(
        list(
            validation = results,
            mean_percentile_original = results[, mean(percentile)],
            mean_percentile_validation = results[, mean(percentile_validation)],
            mean_error = results[, mean(error)]
        ),
        class = "geposan_validation"
    )
}

#' S3 method to print a validation object.
#'
#' @param x The validation to print.
#' @param ... Other parameters.
#'
#' @seealso [validate()]
#'
#' @export
print.geposan_validation <- function(x, ...) {
    cat(sprintf(
        paste0(
            "geposan validation:",
            "\n  Mean percentile original: %.1f%%",
            "\n  Mean percentile validation: %.1f%%",
            "\n  Mean error: %.1f percent points",
            "\n"
        ),
        x$mean_percentile_original * 100,
        x$mean_percentile_validation * 100,
        x$mean_error * 100
    ))

    invisible(x)
}