2021-10-19 13:39:55 +02:00
|
|
|
# Score the mean distance of genes to the telomeres across species.
|
|
|
|
|
#
|
|
|
|
|
# A score will be given to each gene such that 0.0 corresponds to the maximal
|
|
|
|
|
# mean distance across all genes and 1.0 corresponds to a distance of 0.
|
2021-11-05 19:49:54 +01:00
|
|
|
proximity <- function(preset, use_positions = FALSE, progress = NULL) {
|
2021-10-21 17:25:44 +02:00
|
|
|
species_ids <- preset$species_ids
|
|
|
|
|
gene_ids <- preset$gene_ids
|
2021-10-19 13:39:55 +02:00
|
|
|
|
2021-11-05 19:49:54 +01:00
|
|
|
cached("proximity", c(species_ids, gene_ids, use_positions), {
|
2021-10-21 17:25:44 +02:00
|
|
|
# Prefilter distances by species and gene.
|
2021-11-05 19:49:54 +01:00
|
|
|
data <- geposan::distances[
|
2021-10-21 17:25:44 +02:00
|
|
|
species %chin% preset$species_ids & gene %chin% preset$gene_ids
|
|
|
|
|
]
|
2021-10-19 13:39:55 +02:00
|
|
|
|
2021-10-21 17:25:44 +02:00
|
|
|
# Compute the score as described above.
|
2021-10-19 13:39:55 +02:00
|
|
|
|
2021-11-05 19:49:54 +01:00
|
|
|
data <- if (use_positions) {
|
|
|
|
|
data[, .(mean_distance = mean(position)), by = "gene"]
|
|
|
|
|
} else {
|
|
|
|
|
data[, .(mean_distance = mean(distance)), by = "gene"]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
max_distance <- data[, max(mean_distance)]
|
|
|
|
|
data[, score := 1 - mean_distance / max_distance]
|
2021-10-19 15:03:10 +02:00
|
|
|
|
2021-10-21 17:25:44 +02:00
|
|
|
if (!is.null(progress)) {
|
|
|
|
|
# We do everything in one go, so it's not possible to report
|
|
|
|
|
# detailed progress information. As the method is relatively quick,
|
|
|
|
|
# this should not be a problem.
|
|
|
|
|
progress(1.0)
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-05 19:49:54 +01:00
|
|
|
data[, .(gene, score)]
|
2021-10-21 17:25:44 +02:00
|
|
|
})
|
2021-10-19 13:39:55 +02:00
|
|
|
}
|