Restructure classes and their responsibilities

This commit is contained in:
Elias Projahn 2021-12-16 13:01:44 +01:00
parent 01ec301d6d
commit e2b93babe5
27 changed files with 974 additions and 634 deletions

View file

@ -1,81 +1,89 @@
# Score genes based on their proximity to the reference genes.
#
# This method finds the distance value with the maximum density for each gene
# (i.e. the mode of its estimated distribution). Genes are scored by comparing
# those distance values with the values of the reference genes.
adjacency <- function(preset, progress = NULL) {
species_ids <- preset$species_ids
gene_ids <- preset$gene_ids
reference_gene_ids <- preset$reference_gene_ids
#' Score genes based on their proximity to the reference genes.
#'
#' This method finds the distance value with the maximum density for each gene
#' (i.e. the mode of its estimated distribution). Genes are scored by comparing
#' those distance values with the values of the reference genes.
#'
#' @return An object of class `geposan_method`.
#'
#' @export
adjacency <- function() {
method(
id = "adjacency",
name = "Adjacency",
description = "Adjacency to reference genes",
function(preset, progress) {
species_ids <- preset$species_ids
gene_ids <- preset$gene_ids
reference_gene_ids <- preset$reference_gene_ids
cached("adjacency", c(species_ids, gene_ids, reference_gene_ids), {
# Get the virtual distance value with the highest density.
compute_densest_distance <- function(distances) {
if (length(distances) <= 2) {
mean(distances)
} else {
d <- stats::density(distances)
d$x[which.max(d$y)]
}
cached("adjacency", c(species_ids, gene_ids, reference_gene_ids), {
# Get the virtual distance value with the highest density.
compute_densest_distance <- function(distances) {
if (length(distances) <= 2) {
mean(distances)
} else {
d <- stats::density(distances)
d$x[which.max(d$y)]
}
}
# Filter distances by species and gene and find the distance
# with the highest density of values for each gene.
data <- geposan::distances[
species %chin% species_ids & gene %chin% gene_ids,
.(densest_distance = compute_densest_distance(distance)),
by = gene
]
# Compute the absolute value of the difference between the
# provided densest distance value in comparison to the mean of
# the densest distances of the comparison genes.
compute_difference <- function(densest_distance,
comparison_ids) {
# Get the mean of the densest distances of the reference
# genes.
mean_densest_distance <- data[
gene %chin% comparison_ids,
mean(densest_distance)
]
abs(densest_distance - mean_densest_distance)
}
# Compute the differences to the reference genes.
data[
!gene %chin% reference_gene_ids,
difference := compute_difference(
densest_distance,
reference_gene_ids
)
]
progress(0.5)
# Exclude the reference gene itself when computing its
# difference.
data[
gene %chin% reference_gene_ids,
difference := compute_difference(
densest_distance,
reference_gene_ids[reference_gene_ids != gene]
),
by = gene
]
# Compute the final score by normalizing the difference.
data[, score := 1 - difference / max(difference)]
progress(1.0)
result(
method = "adjacency",
scores = data[, .(gene, score)],
details = list(data = data)
)
})
}
# Filter distances by species and gene and find the distance with the
# highest density of values for each gene.
data <- geposan::distances[
species %chin% species_ids & gene %chin% gene_ids,
.(densest_distance = compute_densest_distance(distance)),
by = gene
]
# Compute the absolute value of the difference between the provided
# densest distance value in comparison to the mean of the densest
# distances of the comparison genes.
compute_difference <- function(densest_distance, comparison_ids) {
# Get the mean of the densest distances of the reference genes.
mean_densest_distance <- data[
gene %chin% comparison_ids,
mean(densest_distance)
]
abs(densest_distance - mean_densest_distance)
}
# Compute the differences to the reference genes.
data[
!gene %chin% reference_gene_ids,
difference := compute_difference(
densest_distance,
reference_gene_ids
)
]
if (!is.null(progress)) {
progress(0.5)
}
# Exclude the reference gene itself when computing its difference.
data[
gene %chin% reference_gene_ids,
difference := compute_difference(
densest_distance,
reference_gene_ids[reference_gene_ids != gene]
),
by = gene
]
# Compute the final score by normalizing the difference.
data[, score := 1 - difference / max(difference)]
if (!is.null(progress)) {
progress(1.0)
}
structure(
list(
results = data[, .(gene, score)],
details = data
),
class = "geposan_method_results"
)
})
)
}