geposan/R/adjacency.R

#' Score genes based on their proximity to the reference genes.
#'
#' This method finds the distance value with the maximum density for each gene
#' (i.e. the mode of its estimated distribution). Genes are scored by comparing
#' those distance values with the values of the reference genes.
#'
#' @return An object of class `geposan_method`.
#'
#' @export
adjacency <- function() {
    method(
        id = "adjacency",
        name = "Adjacency",
        description = "Adjacency to reference genes",
        function(preset, progress) {
            species_ids <- preset$species_ids
            gene_ids <- preset$gene_ids
            reference_gene_ids <- preset$reference_gene_ids

            cached("adjacency", c(species_ids, gene_ids, reference_gene_ids), {
                # Get the virtual distance value with the highest density.
                compute_densest_distance <- function(distances) {
                    if (length(distances) <= 2) {
                        mean(distances)
                    } else {
                        d <- stats::density(distances)
                        d$x[which.max(d$y)]
                    }
                }

                # Filter distances by species and gene and find the distance
                # with the highest density of values for each gene.
                data <- geposan::distances[
                    species %chin% species_ids & gene %chin% gene_ids,
                    .(densest_distance = compute_densest_distance(distance)),
                    by = gene
                ]

                # Compute the absolute value of the difference between the
                # provided densest distance value in comparison to the mean of
                # the densest distances of the comparison genes.
                compute_difference <- function(densest_distance,
                                               comparison_ids) {
                    # Get the mean of the densest distances of the reference
                    # genes.
                    mean_densest_distance <- data[
                        gene %chin% comparison_ids,
                        mean(densest_distance)
                    ]

                    abs(densest_distance - mean_densest_distance)
                }

                # Compute the differences to the reference genes.
                data[
                    !gene %chin% reference_gene_ids,
                    difference := compute_difference(
                        densest_distance,
                        reference_gene_ids
                    )
                ]

                progress(0.5)

                # Exclude the reference gene itself when computing its
                # difference.
                data[
                    gene %chin% reference_gene_ids,
                    difference := compute_difference(
                        densest_distance,
                        reference_gene_ids[reference_gene_ids != gene]
                    ),
                    by = gene
                ]

                # Compute the final score by normalizing the difference.
                data[, score := 1 - difference / max(difference)]

                progress(1.0)

                result(
                    method = "adjacency",
                    scores = data[, .(gene, score)],
                    details = list(data = data)
                )
            })
        }
    )
}