geposan/R/method_adjacency.R

#' Score genes based on their proximity to the reference genes.
#'
#' In this case, the distance data that is available for one gene is first
#' combined. The resulting value is compared to the reference genes and
#' determines the gene's score in relation to other genes.
#'
#' @param id Unique ID for the method and its results.
#' @param name Human readable name for the method.
#' @param description Method description.
#' @param distance_estimate A function that will be used to summarize the
#'   distance values for each gene. See [densest()] for the default
#'   implementation.
#'
#' @return An object of class `geposan_method`.
#'
#' @export
adjacency <- function(id = "adjacency",
                      name = "Adjacency",
                      description = "Adjacency to reference genes",
                      distance_estimate = densest) {
  method(
    id = id,
    name = name,
    description = description,
    help = paste0(
      "Adjacency to the reference genes across species. This method penalizes ",
      "genes that do not occur in the region typical for the reference genes, ",
      "without artificially defining a fixed boundary."
    ),
    function(preset, progress) {
      species_ids <- preset$species_ids
      gene_ids <- preset$gene_ids
      reference_gene_ids <- preset$reference_gene_ids

      cached(
        id,
        c(
          species_ids,
          gene_ids,
          reference_gene_ids,
          distance_estimate
        ),
        { # nolint
          # Filter distances by species and gene and summarize each
          # gene's distance values using the estimation function.
          data <- geposan::distances[
            species %chin% species_ids & gene %chin% gene_ids,
            .(distance = as.numeric(distance_estimate(distance))),
            by = gene
          ]

          # Compute the absolute value of the difference between the
          # estimated distances of each gene to the reference genes.
          compute_difference <- function(distance_values,
                                         comparison_ids) {
            comparison_distance <- data[
              gene %chin% comparison_ids,
              distance_estimate(distance)
            ]

            abs(distance_values - comparison_distance)
          }

          # Compute the differences to the reference genes.
          data[
            !gene %chin% reference_gene_ids,
            difference := compute_difference(
              distance,
              reference_gene_ids
            )
          ]

          progress(0.5)

          # Exclude the reference gene itself when computing its
          # difference.
          data[
            gene %chin% reference_gene_ids,
            difference := compute_difference(
              distance,
              reference_gene_ids[reference_gene_ids != gene]
            )
          ]

          # Compute the final score by normalizing the difference.
          data[, score := 1 - difference / max(difference)]

          progress(1.0)

          result(
            method = "adjacency",
            scores = data[, .(gene, score)],
            details = list(data = data)
          )
        }
      )
    }
  )
}
Restructure classes and their responsibilities 2021-12-16 13:01:44 +01:00			`#' Score genes based on their proximity to the reference genes.`
			`#'`
Add species adjacency method 2022-01-17 20:11:07 +01:00			`#' In this case, the distance data that is available for one gene is first`
			`#' combined. The resulting value is compared to the reference genes and`
			`#' determines the gene's score in relation to other genes.`
			`#'`
Allow customizing method metadata 2022-06-22 11:20:39 +02:00			`#' @param id Unique ID for the method and its results.`
			`#' @param name Human readable name for the method.`
			`#' @param description Method description.`
Add species adjacency method 2022-01-17 20:11:07 +01:00			`#' @param distance_estimate A function that will be used to summarize the`
			`#' distance values for each gene. See [densest()] for the default`
			`#' implementation.`
Restructure classes and their responsibilities 2021-12-16 13:01:44 +01:00			`#'`
			#' @return An object of class `geposan_method`.
			`#'`
			`#' @export`
Allow customizing method metadata 2022-06-22 11:20:39 +02:00			`adjacency <- function(id = "adjacency",`
			`name = "Adjacency",`
			`description = "Adjacency to reference genes",`
adjacency: Combine reference genes first 2022-08-18 19:14:37 +02:00			`distance_estimate = densest) {`
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`method(`
Allow customizing method metadata 2022-06-22 11:20:39 +02:00			`id = id,`
			`name = name,`
			`description = description,`
method: Add context help 2024-01-31 12:14:55 +01:00			`help = paste0(`
			`"Adjacency to the reference genes across species. This method penalizes ",`
			`"genes that do not occur in the region typical for the reference genes, ",`
			`"without artificially defining a fixed boundary."`
			`),`
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`function(preset, progress) {`
			`species_ids <- preset$species_ids`
			`gene_ids <- preset$gene_ids`
			`reference_gene_ids <- preset$reference_gene_ids`
Add new method adjacency 2021-11-25 20:55:11 +01:00
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`cached(`
Namespace all method IDs 2022-08-12 12:41:56 +02:00			`id,`
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`c(`
			`species_ids,`
			`gene_ids,`
			`reference_gene_ids,`
adjacency: Combine reference genes first 2022-08-18 19:14:37 +02:00			`distance_estimate`
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`),`
			`{ # nolint`
			`# Filter distances by species and gene and summarize each`
			`# gene's distance values using the estimation function.`
			`data <- geposan::distances[`
			`species %chin% species_ids & gene %chin% gene_ids,`
			`.(distance = as.numeric(distance_estimate(distance))),`
			`by = gene`
			`]`
Add new method adjacency 2021-11-25 20:55:11 +01:00
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`# Compute the absolute value of the difference between the`
			`# estimated distances of each gene to the reference genes.`
adjacency: Combine reference genes first 2022-08-18 19:14:37 +02:00			`compute_difference <- function(distance_values,`
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`comparison_ids) {`
adjacency: Combine reference genes first 2022-08-18 19:14:37 +02:00			`comparison_distance <- data[`
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`gene %chin% comparison_ids,`
adjacency: Combine reference genes first 2022-08-18 19:14:37 +02:00			`distance_estimate(distance)`
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`]`
Add new method adjacency 2021-11-25 20:55:11 +01:00
adjacency: Combine reference genes first 2022-08-18 19:14:37 +02:00			`abs(distance_values - comparison_distance)`
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`}`
Add new method adjacency 2021-11-25 20:55:11 +01:00
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`# Compute the differences to the reference genes.`
			`data[`
			`!gene %chin% reference_gene_ids,`
			`difference := compute_difference(`
			`distance,`
			`reference_gene_ids`
adjacency: Combine reference genes first 2022-08-18 19:14:37 +02:00			`)`
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`]`
Add new method adjacency 2021-11-25 20:55:11 +01:00
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`progress(0.5)`
Add new method adjacency 2021-11-25 20:55:11 +01:00
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`# Exclude the reference gene itself when computing its`
			`# difference.`
			`data[`
			`gene %chin% reference_gene_ids,`
			`difference := compute_difference(`
			`distance,`
			`reference_gene_ids[reference_gene_ids != gene]`
adjacency: Combine reference genes first 2022-08-18 19:14:37 +02:00			`)`
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`]`
adjacency: Make distance estimation customizable 2022-01-09 20:21:27 +01:00
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`# Compute the final score by normalizing the difference.`
			`data[, score := 1 - difference / max(difference)]`
Add new method adjacency 2021-11-25 20:55:11 +01:00
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`progress(1.0)`
Add new method adjacency 2021-11-25 20:55:11 +01:00
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`result(`
			`method = "adjacency",`
			`scores = data[, .(gene, score)],`
			`details = list(data = data)`
			`)`
Restructure classes and their responsibilities 2021-12-16 13:01:44 +01:00			`}`
Reindent code to use just two spaces 2022-05-26 12:42:19 +02:00			`)`
			`}`
			`)`
Add new method adjacency 2021-11-25 20:55:11 +01:00			`}`