From 0ae6836d865d6553f772c5b71b9f61e8afe5fa66 Mon Sep 17 00:00:00 2001 From: Elias Projahn Date: Tue, 28 Jun 2022 13:06:59 +0200 Subject: [PATCH] Remove species adjacency method --- NAMESPACE | 1 - R/method.R | 1 - R/method_adjacency.R | 2 - R/method_species_adjacency.R | 156 ----------------------------------- man/adjacency.Rd | 3 - man/species_adjacency.Rd | 37 --------- 6 files changed, 200 deletions(-) delete mode 100644 R/method_species_adjacency.R delete mode 100644 man/species_adjacency.Rd diff --git a/NAMESPACE b/NAMESPACE index 11d7bb8..adb35de 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,5 @@ export(preset) export(proximity) export(ranking) export(result) -export(species_adjacency) export(validate) import(data.table) diff --git a/R/method.R b/R/method.R index 2d909b1..b7dd34c 100644 --- a/R/method.R +++ b/R/method.R @@ -37,7 +37,6 @@ all_methods <- function() { correlation(), neural(), adjacency(), - species_adjacency(), proximity() ) } diff --git a/R/method_adjacency.R b/R/method_adjacency.R index de0c68c..b841d61 100644 --- a/R/method_adjacency.R +++ b/R/method_adjacency.R @@ -39,8 +39,6 @@ densest <- function(data) { #' #' @return An object of class `geposan_method`. #' -#' @seealso [species_adjacency()] -#' #' @export adjacency <- function(id = "adjacency", name = "Adjacency", diff --git a/R/method_species_adjacency.R b/R/method_species_adjacency.R deleted file mode 100644 index 0b57ba6..0000000 --- a/R/method_species_adjacency.R +++ /dev/null @@ -1,156 +0,0 @@ -#' Score genes based on their adjacency to the reference genes within species. -#' -#' For each gene and species, the method will first combine the gene's distances -#' to the reference genes within that species. Afterwards, the results are -#' summarized across species and determine the gene's score. -#' -#' @param id Unique ID for the method and its results. -#' @param name Human readable name for the method. -#' @param description Method description. -#' @param distance_estimate Function for combining the distance differences -#' within one species. -#' @param summarize Function for summarizing the distance values across species. -#' -#' @return An object of class `geposan_method`. -#' -#' @seealso [adjacency()] -#' -#' @export -species_adjacency <- function(id = "species_adjacency", - name = "Species adj.", - description = "Species adjacency", - distance_estimate = stats::median, - summarize = stats::median) { - method( - id = id, - name = name, - description = description, - function(preset, progress) { - species_ids <- preset$species_ids - gene_ids <- preset$gene_ids - reference_gene_ids <- preset$reference_gene_ids - - cached( - "species_adjacency", - c( - species_ids, - gene_ids, - reference_gene_ids, - distance_estimate, - summarize - ), - { # nolint - # Prefilter distances. - data <- geposan::distances[ - species %chin% species_ids & gene %chin% gene_ids - ] - - progress_state <- 0.0 - progress_step <- 0.9 / length(species_ids) - - # Iterate through all species and find the distance - # estimates within that species. - for (species_id in species_ids) { - # For all genes, compute the distance to one reference - # gene at a time in one go. - for (reference_gene_id in reference_gene_ids) { - comparison_distance <- data[ - species == species_id & - gene == reference_gene_id, - distance - ] - - column <- quote(reference_gene_id) - - if (length(comparison_distance) != 1) { - # If we don't have a comparison distance, we - # can't compute a difference. This happens, if - # the species doesn't have the reference gene. - data[ - species == species_id & - gene %chin% gene_ids, - eval(column) := NA_integer_ - ] - } else { - data[ - species == species_id & - gene %chin% gene_ids, - eval(column) := - abs(distance - comparison_distance) - ] - } - } - - # Combine the distances to the different reference genes - # into one value using the provided function. - data[ - species == species_id & - gene %chin% gene_ids, - combined_distance := as.numeric( - distance_estimate(stats::na.omit( - # Convert the data.table subset into a - # vector to get the correct na.omit - # behavior. - as.matrix(.SD)[1, ] - )) - ), - .SDcols = reference_gene_ids, - by = gene - ] - - progress_state <- progress_state + progress_step - progress(progress_state) - } - - progress(0.9) - - # Remove the distances between the reference genes. - for (reference_gene_id in reference_gene_ids) { - column <- quote(reference_gene_id) - data[gene == reference_gene_id, eval(column) := NA] - } - - # Recompute the combined distance for the reference genes. - data[ - gene %chin% reference_gene_ids, - combined_distance := as.numeric( - distance_estimate(stats::na.omit( - as.matrix(.SD)[1, ] - )) - ), - .SDcols = reference_gene_ids, - by = list(species, gene) - ] - - # Combine the distances into one value. - results <- data[, - .( - summarized_distances = as.numeric( - summarize(stats::na.omit(combined_distance)) - ) - ), - by = gene - ] - - # Compute the final score by normalizing the difference. - results[ - , - score := 1 - summarized_distances / - max(summarized_distances) - ] - - progress(1.0) - - result( - method = "species_adjacency", - scores = results[, .(gene, score)], - details = list( - data = data, - results = results - ) - ) - } - ) - } - ) -} diff --git a/man/adjacency.Rd b/man/adjacency.Rd index a5de928..4ff948d 100644 --- a/man/adjacency.Rd +++ b/man/adjacency.Rd @@ -34,6 +34,3 @@ In this case, the distance data that is available for one gene is first combined. The resulting value is compared to the reference genes and determines the gene's score in relation to other genes. } -\seealso{ -\code{\link[=species_adjacency]{species_adjacency()}} -} diff --git a/man/species_adjacency.Rd b/man/species_adjacency.Rd deleted file mode 100644 index d2b64bd..0000000 --- a/man/species_adjacency.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/method_species_adjacency.R -\name{species_adjacency} -\alias{species_adjacency} -\title{Score genes based on their adjacency to the reference genes within species.} -\usage{ -species_adjacency( - id = "species_adjacency", - name = "Species adj.", - description = "Species adjacency", - distance_estimate = stats::median, - summarize = stats::median -) -} -\arguments{ -\item{id}{Unique ID for the method and its results.} - -\item{name}{Human readable name for the method.} - -\item{description}{Method description.} - -\item{distance_estimate}{Function for combining the distance differences -within one species.} - -\item{summarize}{Function for summarizing the distance values across species.} -} -\value{ -An object of class \code{geposan_method}. -} -\description{ -For each gene and species, the method will first combine the gene's distances -to the reference genes within that species. Afterwards, the results are -summarized across species and determine the gene's score. -} -\seealso{ -\code{\link[=adjacency]{adjacency()}} -}