geposan/R/proximity.R

# Score the mean distance of genes to the telomeres across species.
#
# A score will be given to each gene such that 0.0 corresponds to the maximal
# mean distance across all genes and 1.0 corresponds to a distance of 0.
proximity <- function(preset, use_positions = FALSE, progress = NULL) {
    species_ids <- preset$species_ids
    gene_ids <- preset$gene_ids

    cached("proximity", c(species_ids, gene_ids, use_positions), {
        # Prefilter distances by species and gene.
        data <- geposan::distances[
            species %chin% preset$species_ids & gene %chin% preset$gene_ids
        ]

        # Compute the score as described above.

        data <- if (use_positions) {
            data[, .(mean_distance = mean(position)), by = "gene"]
        } else {
            data[, .(mean_distance = mean(distance)), by = "gene"]
        }

        max_distance <- data[, max(mean_distance)]
        data[, score := 1 - mean_distance / max_distance]

        if (!is.null(progress)) {
            # We do everything in one go, so it's not possible to report
            # detailed progress information. As the method is relatively quick,
            # this should not be a problem.
            progress(1.0)
        }

        data[, .(gene, score)]
    })
}
Initial commit 2021-10-19 13:39:55 +02:00			`# Score the mean distance of genes to the telomeres across species.`
			`#`
			`# A score will be given to each gene such that 0.0 corresponds to the maximal`
			`# mean distance across all genes and 1.0 corresponds to a distance of 0.`
Implement all methods using positions additionally 2021-11-05 19:49:54 +01:00			`proximity <- function(preset, use_positions = FALSE, progress = NULL) {`
Handle caching 2021-10-21 17:25:44 +02:00			`species_ids <- preset$species_ids`
			`gene_ids <- preset$gene_ids`
Initial commit 2021-10-19 13:39:55 +02:00
Implement all methods using positions additionally 2021-11-05 19:49:54 +01:00			`cached("proximity", c(species_ids, gene_ids, use_positions), {`
Handle caching 2021-10-21 17:25:44 +02:00			`# Prefilter distances by species and gene.`
Implement all methods using positions additionally 2021-11-05 19:49:54 +01:00			`data <- geposan::distances[`
Handle caching 2021-10-21 17:25:44 +02:00			`species %chin% preset$species_ids & gene %chin% preset$gene_ids`
			`]`
Initial commit 2021-10-19 13:39:55 +02:00
Handle caching 2021-10-21 17:25:44 +02:00			`# Compute the score as described above.`
Initial commit 2021-10-19 13:39:55 +02:00
Implement all methods using positions additionally 2021-11-05 19:49:54 +01:00			`data <- if (use_positions) {`
			`data[, .(mean_distance = mean(position)), by = "gene"]`
			`} else {`
			`data[, .(mean_distance = mean(distance)), by = "gene"]`
			`}`

			`max_distance <- data[, max(mean_distance)]`
			`data[, score := 1 - mean_distance / max_distance]`
Add framework for progress information 2021-10-19 15:03:10 +02:00
Handle caching 2021-10-21 17:25:44 +02:00			`if (!is.null(progress)) {`
			`# We do everything in one go, so it's not possible to report`
			`# detailed progress information. As the method is relatively quick,`
			`# this should not be a problem.`
			`progress(1.0)`
			`}`

Implement all methods using positions additionally 2021-11-05 19:49:54 +01:00			`data[, .(gene, score)]`
Handle caching 2021-10-21 17:25:44 +02:00			`})`
Initial commit 2021-10-19 13:39:55 +02:00			`}`