Remove species adjacency method

This commit is contained in:
Elias Projahn 2022-06-28 13:06:59 +02:00
parent ab545a415c
commit 0ae6836d86
6 changed files with 0 additions and 200 deletions

View file

@ -27,6 +27,5 @@ export(preset)
export(proximity)
export(ranking)
export(result)
export(species_adjacency)
export(validate)
import(data.table)

View file

@ -37,7 +37,6 @@ all_methods <- function() {
correlation(),
neural(),
adjacency(),
species_adjacency(),
proximity()
)
}

View file

@ -39,8 +39,6 @@ densest <- function(data) {
#'
#' @return An object of class `geposan_method`.
#'
#' @seealso [species_adjacency()]
#'
#' @export
adjacency <- function(id = "adjacency",
name = "Adjacency",

View file

@ -1,156 +0,0 @@
#' Score genes based on their adjacency to the reference genes within species.
#'
#' For each gene and species, the method will first combine the gene's distances
#' to the reference genes within that species. Afterwards, the results are
#' summarized across species and determine the gene's score.
#'
#' @param id Unique ID for the method and its results.
#' @param name Human readable name for the method.
#' @param description Method description.
#' @param distance_estimate Function for combining the distance differences
#' within one species.
#' @param summarize Function for summarizing the distance values across species.
#'
#' @return An object of class `geposan_method`.
#'
#' @seealso [adjacency()]
#'
#' @export
species_adjacency <- function(id = "species_adjacency",
name = "Species adj.",
description = "Species adjacency",
distance_estimate = stats::median,
summarize = stats::median) {
method(
id = id,
name = name,
description = description,
function(preset, progress) {
species_ids <- preset$species_ids
gene_ids <- preset$gene_ids
reference_gene_ids <- preset$reference_gene_ids
cached(
"species_adjacency",
c(
species_ids,
gene_ids,
reference_gene_ids,
distance_estimate,
summarize
),
{ # nolint
# Prefilter distances.
data <- geposan::distances[
species %chin% species_ids & gene %chin% gene_ids
]
progress_state <- 0.0
progress_step <- 0.9 / length(species_ids)
# Iterate through all species and find the distance
# estimates within that species.
for (species_id in species_ids) {
# For all genes, compute the distance to one reference
# gene at a time in one go.
for (reference_gene_id in reference_gene_ids) {
comparison_distance <- data[
species == species_id &
gene == reference_gene_id,
distance
]
column <- quote(reference_gene_id)
if (length(comparison_distance) != 1) {
# If we don't have a comparison distance, we
# can't compute a difference. This happens, if
# the species doesn't have the reference gene.
data[
species == species_id &
gene %chin% gene_ids,
eval(column) := NA_integer_
]
} else {
data[
species == species_id &
gene %chin% gene_ids,
eval(column) :=
abs(distance - comparison_distance)
]
}
}
# Combine the distances to the different reference genes
# into one value using the provided function.
data[
species == species_id &
gene %chin% gene_ids,
combined_distance := as.numeric(
distance_estimate(stats::na.omit(
# Convert the data.table subset into a
# vector to get the correct na.omit
# behavior.
as.matrix(.SD)[1, ]
))
),
.SDcols = reference_gene_ids,
by = gene
]
progress_state <- progress_state + progress_step
progress(progress_state)
}
progress(0.9)
# Remove the distances between the reference genes.
for (reference_gene_id in reference_gene_ids) {
column <- quote(reference_gene_id)
data[gene == reference_gene_id, eval(column) := NA]
}
# Recompute the combined distance for the reference genes.
data[
gene %chin% reference_gene_ids,
combined_distance := as.numeric(
distance_estimate(stats::na.omit(
as.matrix(.SD)[1, ]
))
),
.SDcols = reference_gene_ids,
by = list(species, gene)
]
# Combine the distances into one value.
results <- data[,
.(
summarized_distances = as.numeric(
summarize(stats::na.omit(combined_distance))
)
),
by = gene
]
# Compute the final score by normalizing the difference.
results[
,
score := 1 - summarized_distances /
max(summarized_distances)
]
progress(1.0)
result(
method = "species_adjacency",
scores = results[, .(gene, score)],
details = list(
data = data,
results = results
)
)
}
)
}
)
}

View file

@ -34,6 +34,3 @@ In this case, the distance data that is available for one gene is first
combined. The resulting value is compared to the reference genes and
determines the gene's score in relation to other genes.
}
\seealso{
\code{\link[=species_adjacency]{species_adjacency()}}
}

View file

@ -1,37 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/method_species_adjacency.R
\name{species_adjacency}
\alias{species_adjacency}
\title{Score genes based on their adjacency to the reference genes within species.}
\usage{
species_adjacency(
id = "species_adjacency",
name = "Species adj.",
description = "Species adjacency",
distance_estimate = stats::median,
summarize = stats::median
)
}
\arguments{
\item{id}{Unique ID for the method and its results.}
\item{name}{Human readable name for the method.}
\item{description}{Method description.}
\item{distance_estimate}{Function for combining the distance differences
within one species.}
\item{summarize}{Function for summarizing the distance values across species.}
}
\value{
An object of class \code{geposan_method}.
}
\description{
For each gene and species, the method will first combine the gene's distances
to the reference genes within that species. Afterwards, the results are
summarized across species and determine the gene's score.
}
\seealso{
\code{\link[=adjacency]{adjacency()}}
}