mirror of
https://github.com/johrpan/geposan.git
synced 2025-10-26 18:57:25 +01:00
Add new method adjacency
This commit is contained in:
parent
b1eff374b3
commit
88b1a9ab6a
3 changed files with 85 additions and 6 deletions
81
R/adjacency.R
Normal file
81
R/adjacency.R
Normal file
|
|
@ -0,0 +1,81 @@
|
||||||
|
# Score genes based on their proximity to the reference genes.
|
||||||
|
#
|
||||||
|
# This method finds the distance value with the maximum density for each gene
|
||||||
|
# (i.e. the mode of its estimated distribution). Genes are scored by comparing
|
||||||
|
# those distance values with the values of the reference genes.
|
||||||
|
adjacency <- function(preset, progress = NULL) {
|
||||||
|
species_ids <- preset$species_ids
|
||||||
|
gene_ids <- preset$gene_ids
|
||||||
|
reference_gene_ids <- preset$reference_gene_ids
|
||||||
|
|
||||||
|
cached("adjacency", c(species_ids, gene_ids, reference_gene_ids), {
|
||||||
|
# Get the virtual distance value with the highest density.
|
||||||
|
compute_densest_distance <- function(distances) {
|
||||||
|
if (length(distances <= 2)) {
|
||||||
|
mean(distances)
|
||||||
|
} else {
|
||||||
|
d <- stats::density(distances)
|
||||||
|
d$x[which.max(d$y)]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Filter distances by species and gene and find the distance with the
|
||||||
|
# highest density of values for each gene.
|
||||||
|
data <- geposan::distances[
|
||||||
|
species %chin% species_ids & gene %chin% gene_ids,
|
||||||
|
.(densest_distance = compute_densest_distance(distance)),
|
||||||
|
by = gene
|
||||||
|
]
|
||||||
|
|
||||||
|
# Compute the absolute value of the difference between the provided
|
||||||
|
# densest distance value in comparison to the mean of the densest
|
||||||
|
# distances of the comparison genes.
|
||||||
|
compute_difference <- function(densest_distance, comparison_ids) {
|
||||||
|
# Get the mean of the densest distances of the reference genes.
|
||||||
|
mean_densest_distance <- data[
|
||||||
|
gene %chin% comparison_ids,
|
||||||
|
mean(densest_distance)
|
||||||
|
]
|
||||||
|
|
||||||
|
abs(densest_distance - mean_densest_distance)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Compute the differences to the reference genes.
|
||||||
|
data[
|
||||||
|
!gene %chin% reference_gene_ids,
|
||||||
|
difference := compute_difference(
|
||||||
|
densest_distance,
|
||||||
|
reference_gene_ids
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
if (!is.null(progress)) {
|
||||||
|
progress(0.5)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Exclude the reference gene itself when computing its difference.
|
||||||
|
data[
|
||||||
|
gene %chin% reference_gene_ids,
|
||||||
|
difference := compute_difference(
|
||||||
|
densest_distance,
|
||||||
|
reference_gene_ids[reference_gene_ids != gene]
|
||||||
|
),
|
||||||
|
by = gene
|
||||||
|
]
|
||||||
|
|
||||||
|
# Compute the final score by normalizing the difference.
|
||||||
|
data[, score := 1 - difference / max(difference)]
|
||||||
|
|
||||||
|
if (!is.null(progress)) {
|
||||||
|
progress(1.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
structure(
|
||||||
|
list(
|
||||||
|
results = data[, .(gene, score)],
|
||||||
|
details = data
|
||||||
|
),
|
||||||
|
class = "geposan_method_results"
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
@ -34,6 +34,7 @@ analyze <- function(preset, progress = NULL) {
|
||||||
"clusteriness" = clusteriness,
|
"clusteriness" = clusteriness,
|
||||||
"correlation" = correlation,
|
"correlation" = correlation,
|
||||||
"neural" = neural,
|
"neural" = neural,
|
||||||
|
"adjacency" = adjacency,
|
||||||
"proximity" = proximity
|
"proximity" = proximity
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,14 +11,10 @@
|
||||||
#'
|
#'
|
||||||
#' - `clusteriness` How much the gene distances to the nearest telomere
|
#' - `clusteriness` How much the gene distances to the nearest telomere
|
||||||
#' cluster across species.
|
#' cluster across species.
|
||||||
#' - `clusteriness_positions` The same as `clusteriness` but using absolute
|
|
||||||
#' gene positions instead of distances.
|
|
||||||
#' - `correlation` The mean correlation of gene distances to the nearest
|
#' - `correlation` The mean correlation of gene distances to the nearest
|
||||||
#' telomere across species.
|
#' telomere across species.
|
||||||
#' - `correlation_positions` Correlation using position data.
|
#' - `neural` Assessment by neural network trained on the reference genes.
|
||||||
#' - `neural` Assessment by neural network trained using distances.
|
#' - `adjacency` Proximity to reference genes.
|
||||||
#' - `neural_positions` Assessment by neural network trained using absolute
|
|
||||||
#' position data.
|
|
||||||
#' - `proximity` Mean proximity to telomeres.
|
#' - `proximity` Mean proximity to telomeres.
|
||||||
#'
|
#'
|
||||||
#' Available optimization targets are:
|
#' Available optimization targets are:
|
||||||
|
|
@ -42,6 +38,7 @@ preset <- function(methods = c(
|
||||||
"clusteriness",
|
"clusteriness",
|
||||||
"correlation",
|
"correlation",
|
||||||
"neural",
|
"neural",
|
||||||
|
"adjacency",
|
||||||
"proximity"
|
"proximity"
|
||||||
),
|
),
|
||||||
species_ids = NULL,
|
species_ids = NULL,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue