Remove species adjacency method

2025-10-26 10:47:25 +01:00 · 2022-06-28 13:06:59 +02:00 · 2022-06-28 13:06:59 +02:00 · 0ae6836d86
commit 0ae6836d86
parent ab545a415c
6 changed files with 0 additions and 200 deletions
--- a/1
+++ b/1
@ -27,6 +27,5 @@ export(preset)
 export(proximity)
 export(ranking)
 export(result)
 export(species_adjacency)
 export(validate)
 import(data.table)
--- a/R/method.R
+++ b/R/method.R
@ -37,7 +37,6 @@ all_methods <- function() {
    correlation(),
    neural(),
    adjacency(),
    species_adjacency(),
    proximity()
  )
 }
--- a/R/method_adjacency.R
+++ b/R/method_adjacency.R
@ -39,8 +39,6 @@ densest <- function(data) {
 #'
 #' @return An object of class `geposan_method`.
 #'
 #' @seealso [species_adjacency()]
 #'
 #' @export
 adjacency <- function(id = "adjacency",
                      name = "Adjacency",
--- a/R/method_species_adjacency.R
+++ b/R/method_species_adjacency.R
@ -1,156 +0,0 @@
 #' Score genes based on their adjacency to the reference genes within species.
 #'
 #' For each gene and species, the method will first combine the gene's distances
 #' to the reference genes within that species. Afterwards, the results are
 #' summarized across species and determine the gene's score.
 #'
 #' @param id Unique ID for the method and its results.
 #' @param name Human readable name for the method.
 #' @param description Method description.
 #' @param distance_estimate Function for combining the distance differences
 #'   within one species.
 #' @param summarize Function for summarizing the distance values across species.
 #'
 #' @return An object of class `geposan_method`.
 #'
 #' @seealso [adjacency()]
 #'
 #' @export
 species_adjacency <- function(id = "species_adjacency",
                              name = "Species adj.",
                              description = "Species adjacency",
                              distance_estimate = stats::median,
                              summarize = stats::median) {
  method(
    id = id,
    name = name,
    description = description,
    function(preset, progress) {
      species_ids <- preset$species_ids
      gene_ids <- preset$gene_ids
      reference_gene_ids <- preset$reference_gene_ids
      cached(
        "species_adjacency",
        c(
          species_ids,
          gene_ids,
          reference_gene_ids,
          distance_estimate,
          summarize
        ),
        { # nolint
          # Prefilter distances.
          data <- geposan::distances[
            species %chin% species_ids & gene %chin% gene_ids
          ]
          progress_state <- 0.0
          progress_step <- 0.9 / length(species_ids)
          # Iterate through all species and find the distance
          # estimates within that species.
          for (species_id in species_ids) {
            # For all genes, compute the distance to one reference
            # gene at a time in one go.
            for (reference_gene_id in reference_gene_ids) {
              comparison_distance <- data[
                species == species_id &
                  gene == reference_gene_id,
                distance
              ]
              column <- quote(reference_gene_id)
              if (length(comparison_distance) != 1) {
                # If we don't have a comparison distance, we
                # can't compute a difference. This happens, if
                # the species doesn't have the reference gene.
                data[
                  species == species_id &
                    gene %chin% gene_ids,
                  eval(column) := NA_integer_
                ]
              } else {
                data[
                  species == species_id &
                    gene %chin% gene_ids,
                  eval(column) :=
                    abs(distance - comparison_distance)
                ]
              }
            }
            # Combine the distances to the different reference genes
            # into one value using the provided function.
            data[
              species == species_id &
                gene %chin% gene_ids,
              combined_distance := as.numeric(
                distance_estimate(stats::na.omit(
                  # Convert the data.table subset into a
                  # vector to get the correct na.omit
                  # behavior.
                  as.matrix(.SD)[1, ]
                ))
              ),
              .SDcols = reference_gene_ids,
              by = gene
            ]
            progress_state <- progress_state + progress_step
            progress(progress_state)
          }
          progress(0.9)
          # Remove the distances between the reference genes.
          for (reference_gene_id in reference_gene_ids) {
            column <- quote(reference_gene_id)
            data[gene == reference_gene_id, eval(column) := NA]
          }
          # Recompute the combined distance for the reference genes.
          data[
            gene %chin% reference_gene_ids,
            combined_distance := as.numeric(
              distance_estimate(stats::na.omit(
                as.matrix(.SD)[1, ]
              ))
            ),
            .SDcols = reference_gene_ids,
            by = list(species, gene)
          ]
          # Combine the distances into one value.
          results <- data[,
            .(
              summarized_distances = as.numeric(
                summarize(stats::na.omit(combined_distance))
              )
            ),
            by = gene
          ]
          # Compute the final score by normalizing the difference.
          results[
            ,
            score := 1 - summarized_distances /
              max(summarized_distances)
          ]
          progress(1.0)
          result(
            method = "species_adjacency",
            scores = results[, .(gene, score)],
            details = list(
              data = data,
              results = results
            )
          )
        }
      )
    }
  )
 }
--- a/man/adjacency.Rd
+++ b/man/adjacency.Rd
@ -34,6 +34,3 @@ In this case, the distance data that is available for one gene is first
 combined. The resulting value is compared to the reference genes and
 determines the gene's score in relation to other genes.
 }
 \seealso{
 \code{\link[=species_adjacency]{species_adjacency()}}
 }
--- a/man/species_adjacency.Rd
+++ b/man/species_adjacency.Rd
@ -1,37 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/method_species_adjacency.R
 \name{species_adjacency}
 \alias{species_adjacency}
 \title{Score genes based on their adjacency to the reference genes within species.}
 \usage{
 species_adjacency(
  id = "species_adjacency",
  name = "Species adj.",
  description = "Species adjacency",
  distance_estimate = stats::median,
  summarize = stats::median
 )
 }
 \arguments{
 \item{id}{Unique ID for the method and its results.}
 \item{name}{Human readable name for the method.}
 \item{description}{Method description.}
 \item{distance_estimate}{Function for combining the distance differences
 within one species.}
 \item{summarize}{Function for summarizing the distance values across species.}
 }
 \value{
 An object of class \code{geposan_method}.
 }
 \description{
 For each gene and species, the method will first combine the gene's distances
 to the reference genes within that species. Afterwards, the results are
 summarized across species and determine the gene's score.
 }
 \seealso{
 \code{\link[=adjacency]{adjacency()}}
 }