adjacency: Use minimum difference in distances

This commit is contained in:
Elias Projahn 2022-01-09 20:26:42 +01:00
parent 2ceda0691b
commit 32776469bf
2 changed files with 23 additions and 9 deletions

View file

@ -26,11 +26,14 @@ densest <- function(data) {
#' #'
#' @param estimate A function that will be used to summarize the distance #' @param estimate A function that will be used to summarize the distance
#' values for each gene. See [densest()] for the default implementation. #' values for each gene. See [densest()] for the default implementation.
#' @param combination A function that will be used to combine the different
#' distances to the reference genes. By default [min()] is used. That means
#' the distance to the nearest reference gene will be scored.
#' #'
#' @return An object of class `geposan_method`. #' @return An object of class `geposan_method`.
#' #'
#' @export #' @export
adjacency <- function(estimate = densest) { adjacency <- function(estimate = densest, combination = min) {
method( method(
id = "adjacency", id = "adjacency",
name = "Adjacency", name = "Adjacency",
@ -42,26 +45,32 @@ adjacency <- function(estimate = densest) {
cached( cached(
"adjacency", "adjacency",
c(species_ids, gene_ids, reference_gene_ids, estimate), c(
species_ids,
gene_ids,
reference_gene_ids,
estimate,
combination
),
{ # nolint { # nolint
# Filter distances by species and gene and summarize each # Filter distances by species and gene and summarize each
# gene's distance values using the estimation function. # gene's distance values using the estimation function.
data <- geposan::distances[ data <- geposan::distances[
species %chin% species_ids & gene %chin% gene_ids, species %chin% species_ids & gene %chin% gene_ids,
.(distance = estimate(distance)), .(distance = as.numeric(estimate(distance))),
by = gene by = gene
] ]
# Compute the absolute value of the difference between the # Compute the absolute value of the difference between the
# estimated distances of each gene to the reference genes. # estimated distances of each gene to the reference genes.
compute_difference <- function(distance, compute_difference <- function(distance_value,
comparison_ids) { comparison_ids) {
reference_distance <- data[ differences <- data[
gene %chin% comparison_ids, gene %chin% comparison_ids,
mean(distance) .(difference = abs(distance_value - distance))
] ]
abs(distance - reference_distance) combination(differences$difference)
} }
# Compute the differences to the reference genes. # Compute the differences to the reference genes.
@ -70,7 +79,8 @@ adjacency <- function(estimate = densest) {
difference := compute_difference( difference := compute_difference(
distance, distance,
reference_gene_ids reference_gene_ids
) ),
by = gene
] ]
progress(0.5) progress(0.5)

View file

@ -4,11 +4,15 @@
\alias{adjacency} \alias{adjacency}
\title{Score genes based on their proximity to the reference genes.} \title{Score genes based on their proximity to the reference genes.}
\usage{ \usage{
adjacency(estimate = densest) adjacency(estimate = densest, combination = min)
} }
\arguments{ \arguments{
\item{estimate}{A function that will be used to summarize the distance \item{estimate}{A function that will be used to summarize the distance
values for each gene. See \code{\link[=densest]{densest()}} for the default implementation.} values for each gene. See \code{\link[=densest]{densest()}} for the default implementation.}
\item{combination}{A function that will be used to combine the different
distances to the reference genes. By default \code{\link[=min]{min()}} is used. That means
the distance to the nearest reference gene will be scored.}
} }
\value{ \value{
An object of class \code{geposan_method}. An object of class \code{geposan_method}.