adjacency: Combine reference genes first

This commit is contained in:
Elias Projahn 2022-08-18 19:14:37 +02:00
parent 8cde59c1c7
commit 2b859b55dd
2 changed files with 9 additions and 19 deletions

View file

@ -10,8 +10,6 @@
#' @param distance_estimate A function that will be used to summarize the #' @param distance_estimate A function that will be used to summarize the
#' distance values for each gene. See [densest()] for the default #' distance values for each gene. See [densest()] for the default
#' implementation. #' implementation.
#' @param summarize A function that will be used to combine the different
#' distances to the reference genes. By default [stats::median()] is used.
#' #'
#' @return An object of class `geposan_method`. #' @return An object of class `geposan_method`.
#' #'
@ -19,8 +17,7 @@
adjacency <- function(id = "adjacency", adjacency <- function(id = "adjacency",
name = "Adjacency", name = "Adjacency",
description = "Adjacency to reference genes", description = "Adjacency to reference genes",
distance_estimate = densest, distance_estimate = densest) {
summarize = stats::median) {
method( method(
id = id, id = id,
name = name, name = name,
@ -36,8 +33,7 @@ adjacency <- function(id = "adjacency",
species_ids, species_ids,
gene_ids, gene_ids,
reference_gene_ids, reference_gene_ids,
distance_estimate, distance_estimate
summarize
), ),
{ # nolint { # nolint
# Filter distances by species and gene and summarize each # Filter distances by species and gene and summarize each
@ -50,14 +46,14 @@ adjacency <- function(id = "adjacency",
# Compute the absolute value of the difference between the # Compute the absolute value of the difference between the
# estimated distances of each gene to the reference genes. # estimated distances of each gene to the reference genes.
compute_difference <- function(distance_value, compute_difference <- function(distance_values,
comparison_ids) { comparison_ids) {
differences <- data[ comparison_distance <- data[
gene %chin% comparison_ids, gene %chin% comparison_ids,
.(difference = abs(distance_value - distance)) distance_estimate(distance)
] ]
summarize(differences$difference) abs(distance_values - comparison_distance)
} }
# Compute the differences to the reference genes. # Compute the differences to the reference genes.
@ -66,8 +62,7 @@ adjacency <- function(id = "adjacency",
difference := compute_difference( difference := compute_difference(
distance, distance,
reference_gene_ids reference_gene_ids
), )
by = gene
] ]
progress(0.5) progress(0.5)
@ -79,8 +74,7 @@ adjacency <- function(id = "adjacency",
difference := compute_difference( difference := compute_difference(
distance, distance,
reference_gene_ids[reference_gene_ids != gene] reference_gene_ids[reference_gene_ids != gene]
), )
by = gene
] ]
# Compute the final score by normalizing the difference. # Compute the final score by normalizing the difference.

View file

@ -8,8 +8,7 @@ adjacency(
id = "adjacency", id = "adjacency",
name = "Adjacency", name = "Adjacency",
description = "Adjacency to reference genes", description = "Adjacency to reference genes",
distance_estimate = densest, distance_estimate = densest
summarize = stats::median
) )
} }
\arguments{ \arguments{
@ -22,9 +21,6 @@ adjacency(
\item{distance_estimate}{A function that will be used to summarize the \item{distance_estimate}{A function that will be used to summarize the
distance values for each gene. See \code{\link[=densest]{densest()}} for the default distance values for each gene. See \code{\link[=densest]{densest()}} for the default
implementation.} implementation.}
\item{summarize}{A function that will be used to combine the different
distances to the reference genes. By default \code{\link[stats:median]{stats::median()}} is used.}
} }
\value{ \value{
An object of class \code{geposan_method}. An object of class \code{geposan_method}.