correlation: Make summary function customizable

This commit also changes the default behavior to using
`stats::median()`.
This commit is contained in:
Elias Projahn 2022-02-24 14:34:18 +01:00
parent 3cedc4fea4
commit 7fe07aceec
2 changed files with 19 additions and 5 deletions

View file

@ -1,9 +1,13 @@
#' Score genes based on their correlation with the reference genes.
#'
#' @param summarize A function for combining the different correlation
#' coefficients into one metric. By default, [stats::median()] is used. Other
#' suggested options include [max()] and [mean()].
#'
#' @return An object of class `geposan_method`.
#'
#' @export
correlation <- function() {
correlation <- function(summarize = stats::median) {
method(
id = "correlation",
name = "Correlation",
@ -15,7 +19,7 @@ correlation <- function() {
cached(
"correlation",
c(species_ids, gene_ids, reference_gene_ids),
c(species_ids, gene_ids, reference_gene_ids, summarize),
{ # nolint
# Prefilter distances by species.
distances <- geposan::distances[species %chin% species_ids]
@ -65,9 +69,14 @@ correlation <- function() {
progress(0.66)
# Find the highes correlation.
# Combine the correlation coefficients.
results[,
max_correlation := max(.SD, na.rm = TRUE),
max_correlation := as.double(summarize(na.omit(
# Convert the data.table subset into a
# vector to get the correct na.omit
# behavior.
as.matrix(.SD)[1, ]
))),
.SDcols = reference_gene_ids,
by = gene
]

View file

@ -4,7 +4,12 @@
\alias{correlation}
\title{Score genes based on their correlation with the reference genes.}
\usage{
correlation()
correlation(summarize = stats::median)
}
\arguments{
\item{summarize}{A function for combining the different correlation
coefficients into one metric. By default, \code{\link[stats:median]{stats::median()}} is used. Other
suggested options include \code{\link[=max]{max()}} and \code{\link[=mean]{mean()}}.}
}
\value{
An object of class \code{geposan_method}.