geposan/R/method_distance.R

56 lines
1.7 KiB
R
Raw Normal View History

#' Score the distance of genes to the telomeres across species.
#'
#' A score will be given to each gene such that 0.0 corresponds to the maximal
#' distance across all genes and 1.0 corresponds to a distance of 0.
#'
2022-06-22 11:20:39 +02:00
#' @param id Unique ID for the method and its results.
#' @param name Human readable name for the method.
#' @param description Method description.
#' @param summarize A function for combining the different proximities into one
#' metric. By default, [stats::median()] is used. Other suggested options
#' include [min()] and [mean()].
#'
#' @return An object of class `geposan_method`.
#'
#' @export
2022-08-11 12:39:21 +02:00
distance <- function(id = "distance",
2022-08-12 12:41:56 +02:00
name = "Distance",
description = "Distance to telomeres",
summarize = densest) {
2022-05-26 12:42:19 +02:00
method(
2022-06-22 11:20:39 +02:00
id = id,
name = name,
description = description,
2024-01-31 12:14:55 +01:00
help = "Median distance to the telomeres across species.",
2022-05-26 12:42:19 +02:00
function(preset, progress) {
species_ids <- preset$species_ids
gene_ids <- preset$gene_ids
2022-08-12 12:41:56 +02:00
cached(id, c(species_ids, gene_ids), {
2022-05-26 12:42:19 +02:00
# Prefilter distances by species and gene.
data <- geposan::distances[
species %chin% preset$species_ids &
gene %chin% preset$gene_ids
]
2022-05-26 12:42:19 +02:00
# Compute the score as described above.
data <- data[,
.(combined_distance = as.double(summarize(distance))),
by = "gene"
]
2022-05-26 12:42:19 +02:00
# Normalize scores.
2022-08-11 12:39:21 +02:00
data[, score := combined_distance / max(combined_distance)]
2022-05-26 12:42:19 +02:00
progress(1.0)
2022-05-26 12:42:19 +02:00
result(
2022-08-11 12:39:21 +02:00
method = "distance",
2022-05-26 12:42:19 +02:00
scores = data[, .(gene, score)],
details = list(data = data)
)
})
}
)
}