adjacency: Use minimum difference in distances

2025-10-26 10:47:25 +01:00 · 2022-01-09 20:26:42 +01:00 · 2022-01-09 20:26:42 +01:00 · 32776469bf
commit 32776469bf
parent 2ceda0691b
2 changed files with 23 additions and 9 deletions
--- a/R/adjacency.R
+++ b/R/adjacency.R
@ -26,11 +26,14 @@ densest <- function(data) {
 #'
 #' @param estimate A function that will be used to summarize the distance
 #'   values for each gene. See [densest()] for the default implementation.
+#' @param combination A function that will be used to combine the different
+#'   distances to the reference genes. By default [min()] is used. That means
+#'   the distance to the nearest reference gene will be scored.
 #'
 #' @return An object of class `geposan_method`.
 #'
 #' @export
-adjacency <- function(estimate = densest) {
+adjacency <- function(estimate = densest, combination = min) {
    method(
        id = "adjacency",
        name = "Adjacency",
@ -42,26 +45,32 @@ adjacency <- function(estimate = densest) {

            cached(
                "adjacency",
-                c(species_ids, gene_ids, reference_gene_ids, estimate),
+                c(
+                    species_ids,
+                    gene_ids,
+                    reference_gene_ids,
+                    estimate,
+                    combination
+                ),
                { # nolint
                    # Filter distances by species and gene and summarize each
                    # gene's distance values using the estimation function.
                    data <- geposan::distances[
                        species %chin% species_ids & gene %chin% gene_ids,
-                        .(distance = estimate(distance)),
+                        .(distance = as.numeric(estimate(distance))),
                        by = gene
                    ]

                    # Compute the absolute value of the difference between the
                    # estimated distances of each gene to the reference genes.
-                    compute_difference <- function(distance,
+                    compute_difference <- function(distance_value,
                                                   comparison_ids) {
-                        reference_distance <- data[
+                        differences <- data[
                            gene %chin% comparison_ids,
-                            mean(distance)
+                            .(difference = abs(distance_value - distance))
                        ]

-                        abs(distance - reference_distance)
+                        combination(differences$difference)
                    }

                    # Compute the differences to the reference genes.
@ -70,7 +79,8 @@ adjacency <- function(estimate = densest) {
                        difference := compute_difference(
                            distance,
                            reference_gene_ids
-                        )
+                        ),
+                        by = gene
                    ]

                    progress(0.5)
--- a/man/adjacency.Rd
+++ b/man/adjacency.Rd
@ -4,11 +4,15 @@
 \alias{adjacency}
 \title{Score genes based on their proximity to the reference genes.}
 \usage{
-adjacency(estimate = densest)
+adjacency(estimate = densest, combination = min)
 }
 \arguments{
 \item{estimate}{A function that will be used to summarize the distance
 values for each gene. See \code{\link[=densest]{densest()}} for the default implementation.}
+
+\item{combination}{A function that will be used to combine the different
+distances to the reference genes. By default \code{\link[=min]{min()}} is used. That means
+the distance to the nearest reference gene will be scored.}
 }
 \value{
 An object of class \code{geposan_method}.