Restructure classes and their responsibilities

2025-10-26 10:47:25 +01:00 · 2021-12-16 13:01:44 +01:00 · 2021-12-16 13:01:44 +01:00 · e2b93babe5
commit e2b93babe5
parent 01ec301d6d
27 changed files with 974 additions and 634 deletions
--- a/R/adjacency.R
+++ b/R/adjacency.R
@ -1,81 +1,89 @@
-# Score genes based on their proximity to the reference genes.
-#
-# This method finds the distance value with the maximum density for each gene
-# (i.e. the mode of its estimated distribution). Genes are scored by comparing
-# those distance values with the values of the reference genes.
-adjacency <- function(preset, progress = NULL) {
-    species_ids <- preset$species_ids
-    gene_ids <- preset$gene_ids
-    reference_gene_ids <- preset$reference_gene_ids
+#' Score genes based on their proximity to the reference genes.
+#'
+#' This method finds the distance value with the maximum density for each gene
+#' (i.e. the mode of its estimated distribution). Genes are scored by comparing
+#' those distance values with the values of the reference genes.
+#'
+#' @return An object of class `geposan_method`.
+#'
+#' @export
+adjacency <- function() {
+    method(
+        id = "adjacency",
+        name = "Adjacency",
+        description = "Adjacency to reference genes",
+        function(preset, progress) {
+            species_ids <- preset$species_ids
+            gene_ids <- preset$gene_ids
+            reference_gene_ids <- preset$reference_gene_ids

-    cached("adjacency", c(species_ids, gene_ids, reference_gene_ids), {
-        # Get the virtual distance value with the highest density.
-        compute_densest_distance <- function(distances) {
-            if (length(distances) <= 2) {
-                mean(distances)
-            } else {
-                d <- stats::density(distances)
-                d$x[which.max(d$y)]
-            }
+            cached("adjacency", c(species_ids, gene_ids, reference_gene_ids), {
+                # Get the virtual distance value with the highest density.
+                compute_densest_distance <- function(distances) {
+                    if (length(distances) <= 2) {
+                        mean(distances)
+                    } else {
+                        d <- stats::density(distances)
+                        d$x[which.max(d$y)]
+                    }
+                }
+
+                # Filter distances by species and gene and find the distance
+                # with the highest density of values for each gene.
+                data <- geposan::distances[
+                    species %chin% species_ids & gene %chin% gene_ids,
+                    .(densest_distance = compute_densest_distance(distance)),
+                    by = gene
+                ]
+
+                # Compute the absolute value of the difference between the
+                # provided densest distance value in comparison to the mean of
+                # the densest distances of the comparison genes.
+                compute_difference <- function(densest_distance,
+                                               comparison_ids) {
+                    # Get the mean of the densest distances of the reference
+                    # genes.
+                    mean_densest_distance <- data[
+                        gene %chin% comparison_ids,
+                        mean(densest_distance)
+                    ]
+
+                    abs(densest_distance - mean_densest_distance)
+                }
+
+                # Compute the differences to the reference genes.
+                data[
+                    !gene %chin% reference_gene_ids,
+                    difference := compute_difference(
+                        densest_distance,
+                        reference_gene_ids
+                    )
+                ]
+
+                progress(0.5)
+
+                # Exclude the reference gene itself when computing its
+                # difference.
+                data[
+                    gene %chin% reference_gene_ids,
+                    difference := compute_difference(
+                        densest_distance,
+                        reference_gene_ids[reference_gene_ids != gene]
+                    ),
+                    by = gene
+                ]
+
+                # Compute the final score by normalizing the difference.
+                data[, score := 1 - difference / max(difference)]
+
+                progress(1.0)
+
+                result(
+                    method = "adjacency",
+                    scores = data[, .(gene, score)],
+                    details = list(data = data)
+                )
+            })
        }
-
-        # Filter distances by species and gene and find the distance with the
-        # highest density of values for each gene.
-        data <- geposan::distances[
-            species %chin% species_ids & gene %chin% gene_ids,
-            .(densest_distance = compute_densest_distance(distance)),
-            by = gene
-        ]
-
-        # Compute the absolute value of the difference between the provided
-        # densest distance value in comparison to the mean of the densest
-        # distances of the comparison genes.
-        compute_difference <- function(densest_distance, comparison_ids) {
-            # Get the mean of the densest distances of the reference genes.
-            mean_densest_distance <- data[
-                gene %chin% comparison_ids,
-                mean(densest_distance)
-            ]
-
-            abs(densest_distance - mean_densest_distance)
-        }
-
-        # Compute the differences to the reference genes.
-        data[
-            !gene %chin% reference_gene_ids,
-            difference := compute_difference(
-                densest_distance,
-                reference_gene_ids
-            )
-        ]
-
-        if (!is.null(progress)) {
-            progress(0.5)
-        }
-
-        # Exclude the reference gene itself when computing its difference.
-        data[
-            gene %chin% reference_gene_ids,
-            difference := compute_difference(
-                densest_distance,
-                reference_gene_ids[reference_gene_ids != gene]
-            ),
-            by = gene
-        ]
-
-        # Compute the final score by normalizing the difference.
-        data[, score := 1 - difference / max(difference)]
-
-        if (!is.null(progress)) {
-            progress(1.0)
-        }
-
-        structure(
-            list(
-                results = data[, .(gene, score)],
-                details = data
-            ),
-            class = "geposan_method_results"
-        )
-    })
+    )
 }
--- a/R/analyze.R
+++ b/R/analyze.R
@ -1,16 +1,17 @@
-#' Analyze by applying the specified preset.
+#' Analyze genes based on position data.
 #'
 #' @param preset The preset to use which should be created using [preset()].
 #' @param progress A function to be called for progress information. The
 #'   function should accept a number between 0.0 and 1.0 for the current
-#'   progress.
+#'   progress. If no function is provided, a simple text progress bar will be
+#'   shown.
 #'
 #' @returns An object containing the results of the analysis with the following
 #'   items:
 #'   \describe{
 #'     \item{`preset`}{The preset that was used.}
-#'     \item{`weights`}{The optimal weights for ranking the reference genes.}
-#'     \item{`ranking`}{The optimal ranking created using the weights.}
+#'     \item{`scores`}{Table containing all scores for each gene.}
+#'     \item{`results`}{Results from the different methods including details.}
 #'   }
 #'
 #' @export
@ -19,80 +20,69 @@ analyze <- function(preset, progress = NULL) {
        stop("Preset is invalid. Use geposan::preset() to create one.")
    }

-    # Available methods by ID.
-    #
-    # A method describes a way to perform a computation on gene distance data
-    # that results in a single score per gene. The function should accept the
-    # preset to apply (see [preset()]) and an optional progress function (that
-    # may be called with a number between 0.0 and 1.0) as its parameters.
-    #
-    # The function should return a [data.table] with the following columns:
-    #
-    #  - `gene` Gene ID of the processed gene.
-    #  - `score` Score for the gene between 0.0 and 1.0.
-    methods <- list(
-        "clusteriness" = clusteriness,
-        "correlation" = correlation,
-        "neural" = neural,
-        "adjacency" = adjacency,
-        "proximity" = proximity
-    )
+    if (is.null(progress)) {
+        progress_bar <- progress::progress_bar$new()
+        progress_bar$update(0.0)

-    analysis <- cached("analysis", preset, {
-        total_progress <- 0.0
-        method_count <- length(preset$methods)
-        results <- data.table(gene = preset$gene_ids)
-
-        for (method_id in preset$methods) {
-            method_progress <- if (!is.null(progress)) {
-                function(p) {
-                    progress(total_progress + p / method_count)
+        progress <- function(progress_value) {
+            if (!progress_bar$finished) {
+                progress_bar$update(progress_value)
+                if (progress_value >= 1.0) {
+                    progress_bar$terminate()
                }
            }
-
-            method_results <- methods[[method_id]](
-                preset,
-                progress = method_progress
-            )$results
-
-            setnames(method_results, "score", method_id)
-
-            results <- merge(
-                results,
-                method_results,
-                by = "gene"
-            )
-
-            total_progress <- total_progress + 1 / method_count
        }
-
-        results <- structure(
-            results,
-            class = c("geposan_results", class(results))
-        )
-
-        weights <- optimal_weights(
-            results,
-            preset$methods,
-            preset$reference_gene_ids,
-            target = preset$optimization_target
-        )
-
-        ranking <- ranking(results, weights)
-
-        structure(
-            list(
-                preset = preset,
-                weights = weights,
-                ranking = ranking
-            ),
-            class = "geposan_analysis"
-        )
-    })
-
-    if (!is.null(progress)) {
-        progress(1.0)
    }

-    analysis
+    progress_buffer <- 0.0
+    method_count <- length(preset$methods)
+
+    method_progress <- function(progress_value) {
+        progress(progress_buffer + progress_value / method_count)
+    }
+
+    scores <- data.table(gene = preset$gene_id)
+    results <- list()
+
+    for (method in preset$methods) {
+        method_results <- method$func(preset, method_progress)
+
+        scores <- merge(scores, method_results$scores)
+        setnames(scores, "score", method$id)
+
+        results <- c(results, list(method_results))
+
+        progress_buffer <- progress_buffer + 1 / method_count
+        progress(progress_buffer)
+    }
+
+    structure(
+        list(
+            preset = preset,
+            scores = scores,
+            results = results
+        ),
+        class = "geposan_analysis"
+    )
+}
+
+#' Print an analysis object.
+#'
+#' @param x The analysis to print.
+#' @param ... Other parameters.
+#'
+#' @seealso [analyze()]
+#'
+#' @export
+print.geposan_analysis <- function(x, ...) {
+    cat("geposan analysis:\n\n")
+    print(x$preset)
+    cat("\n")
+
+    for (result in x$results) {
+        print(result)
+        cat("\n")
+    }
+
+    invisible(x)
 }
--- a/R/clusteriness.R
+++ b/R/clusteriness.R
@ -1,84 +0,0 @@
-# Perform a cluster analysis.
-#
-# This function will cluster the data using `hclust` and `cutree` (with the
-# specified height). Every cluster with at least two members qualifies for
-# further analysis. Clusters are then ranked based on their size in relation
-# to the number of values. The return value is a final score between zero and
-# one. Lower ranking clusters contribute less to this score.
-#
-# @param data The values that should be scored.
-# @param height The maximum span of values considered to be in one cluster.
-# @param weight The weight that will be given to the next largest cluster in
-#   relation to the previous one. For example, if `weight` is 0.7 (the default),
-#   the first cluster will weigh 1.0, the second 0.7, the third 0.49 etc.
-clusteriness_priv <- function(data, height = 1000000, weight = 0.7) {
-    n <- length(data)
-
-    # Return a score of 0.0 if there is just one or no value at all.
-    if (n < 2) {
-        return(0.0)
-    }
-
-    # Cluster the data and compute the cluster sizes.
-
-    tree <- stats::hclust(stats::dist(data))
-    clusters <- stats::cutree(tree, h = height)
-    cluster_sizes <- sort(tabulate(clusters), decreasing = TRUE)
-
-    # Compute the "clusteriness" score.
-
-    score <- 0.0
-
-    for (i in seq_along(cluster_sizes)) {
-        cluster_size <- cluster_sizes[i]
-
-        if (cluster_size >= 2) {
-            cluster_score <- cluster_size / n
-            score <- score + weight ^ (i - 1) * cluster_score
-        }
-    }
-
-    score
-}
-
-# Process genes clustering their distance to telomeres.
-clusteriness <- function(preset, progress = NULL) {
-    species_ids <- preset$species_ids
-    gene_ids <- preset$gene_ids
-
-    cached("clusteriness", c(species_ids, gene_ids), {
-        results <- data.table(gene = gene_ids)
-
-        # Prefilter the input data by species.
-        distances <- geposan::distances[species %chin% species_ids]
-
-        # Add an index for quickly accessing data per gene.
-        setkey(distances, gene)
-
-        genes_done <- 0
-        genes_total <- length(gene_ids)
-
-        # Perform the cluster analysis for one gene.
-        compute <- function(gene_id) {
-            data <- distances[gene_id, distance]
-            score <- clusteriness_priv(data)
-
-            if (!is.null(progress)) {
-                genes_done <<- genes_done + 1
-                progress(genes_done / genes_total)
-            }
-
-            score
-        }
-
-        structure(
-            list(
-                results = results[,
-                    score := compute(gene),
-                    by = gene
-                ]
-            ),
-            class = "geposan_method_results"
-        )
-    })
-}
--- a/R/clustering.R
+++ b/R/clustering.R
@ -0,0 +1,93 @@
+#' Perform a cluster analysis.
+#'
+#' This function will cluster the data using [stats::hclust()] and
+#' [stats::cutree()]. Every cluster with at least two members qualifies for
+#' further analysis. Clusters are then ranked based on their size in relation
+#' to the total number of values. The return value is a final score between
+#' 0.0 and 1.0. Lower ranking clusters contribute less to this score.
+#'
+#' @param data The values that should be scored.
+#' @param span The maximum span of values considered to be in one cluster.
+#' @param weight The weight that will be given to the next largest cluster in
+#'   relation to the previous one. For example, if `weight` is 0.7 (the
+#'   default), the first cluster will weigh 1.0, the second 0.7, the third 0.49
+#'   etc.
+clusteriness <- function(data, span = 1000000, weight = 0.7) {
+    n <- length(data)
+
+    # Return a score of 0.0 if there is just one or no value at all.
+    if (n < 2) {
+        return(0.0)
+    }
+
+    # Cluster the data and compute the cluster sizes.
+
+    tree <- stats::hclust(stats::dist(data))
+    clusters <- stats::cutree(tree, h = span)
+    cluster_sizes <- sort(tabulate(clusters), decreasing = TRUE)
+
+    # Compute the "clusteriness" score.
+
+    score <- 0.0
+
+    for (i in seq_along(cluster_sizes)) {
+        cluster_size <- cluster_sizes[i]
+
+        if (cluster_size >= 2) {
+            cluster_score <- cluster_size / n
+            score <- score + weight^(i - 1) * cluster_score
+        }
+    }
+
+    score
+}
+
+#' Process genes clustering their distance to telomeres.
+#'
+#' The result will be cached and can be reused for different presets, because
+#' it is independent of the reference genes in use.
+#'
+#' @return An object of class `geposan_method`.
+#'
+#' @seealso [clusteriness()]
+#'
+#' @export
+clustering <- function() {
+    method(
+        id = "clustering",
+        name = "Clustering",
+        description = "Clustering of genes",
+        function(preset, progress) {
+            species_ids <- preset$species_ids
+            gene_ids <- preset$gene_ids
+
+            cached("clustering", c(species_ids, gene_ids), {
+                scores <- data.table(gene = gene_ids)
+
+                # Prefilter the input data by species.
+                distances <- geposan::distances[species %chin% species_ids]
+
+                genes_done <- 0
+                genes_total <- length(gene_ids)
+
+                # Perform the cluster analysis for one gene.
+                compute <- function(gene_id) {
+                    data <- distances[gene == gene_id, distance]
+                    score <- clusteriness(data)
+
+                    genes_done <<- genes_done + 1
+                    progress(genes_done / genes_total)
+
+                    score
+                }
+
+                scores[, score := compute(gene), by = gene]
+
+                result(
+                    method = "clustering",
+                    scores = scores
+                )
+            })
+        }
+    )
+}
--- a/R/correlation.R
+++ b/R/correlation.R
@ -1,88 +1,101 @@
-# Compute the mean correlation coefficient comparing gene distances with a set
-# of reference genes.
-correlation <- function(preset, progress = NULL) {
-    species_ids <- preset$species_ids
-    gene_ids <- preset$gene_ids
-    reference_gene_ids <- preset$reference_gene_ids
+#' Compute the mean correlation coefficient comparing gene distances with a set
+#' of reference genes.
+#'
+#' @return An object of class `geposan_method`.
+#'
+#' @export
+correlation <- function() {
+    method(
+        id = "correlation",
+        name = "Correlation",
+        description = "Correlation with reference genes",
+        function(preset, progress) {
+            species_ids <- preset$species_ids
+            gene_ids <- preset$gene_ids
+            reference_gene_ids <- preset$reference_gene_ids

-    cached(
-        "correlation", c(species_ids, gene_ids, reference_gene_ids), {
-            # Prefilter distances by species.
-            distances <- geposan::distances[species %chin% species_ids]
+            cached(
+                "correlation",
+                c(species_ids, gene_ids, reference_gene_ids),
+                { # nolint
+                    # Prefilter distances by species.
+                    distances <- geposan::distances[species %chin% species_ids]

-            # Tranform data to get species as rows and genes as columns. We
-            # construct columns per species, because it requires fewer
-            # iterations, and transpose the table afterwards.
+                    # Tranform data to get species as rows and genes as columns.
+                    # We construct columns per species, because it requires
+                    # fewer iterations, and transpose the table afterwards.

-            data <- data.table(gene = gene_ids)
+                    data <- data.table(gene = gene_ids)

-            # Make a column containing distance data for each species.
-            for (species_id in species_ids) {
-                species_data <- distances[
-                    species == species_id,
-                    .(gene, distance)
-                ]
+                    # Make a column containing distance data for each species.
+                    for (species_id in species_ids) {
+                        species_data <- distances[
+                            species == species_id,
+                            .(gene, distance)
+                        ]

-                data <- merge(data, species_data, all.x = TRUE)
-                setnames(data, "distance", species_id)
-            }
+                        data <- merge(data, species_data, all.x = TRUE)
+                        setnames(data, "distance", species_id)
+                    }

-            # Transpose to the desired format.
-            data <- transpose(data, make.names = "gene")
+                    # Transpose to the desired format.
+                    data <- transpose(data, make.names = "gene")

-            if (!is.null(progress)) progress(0.33)
+                    progress(0.33)

-            # Take the reference data.
-            reference_data <- data[, ..reference_gene_ids]
+                    # Take the reference data.
+                    reference_data <- data[, ..reference_gene_ids]

-            # Perform the correlation between all possible pairs.
-            results <- stats::cor(
-                data[, ..gene_ids],
-                reference_data,
-                use = "pairwise.complete.obs",
-                method = "spearman"
-            )
+                    # Perform the correlation between all possible pairs.
+                    results <- stats::cor(
+                        data[, ..gene_ids],
+                        reference_data,
+                        use = "pairwise.complete.obs",
+                        method = "spearman"
+                    )

-            results <- data.table(results, keep.rownames = TRUE)
-            setnames(results, "rn", "gene")
+                    results <- data.table(results, keep.rownames = TRUE)
+                    setnames(results, "rn", "gene")

-            # Remove correlations between the reference genes themselves.
-            for (reference_gene_id in reference_gene_ids) {
-                column <- quote(reference_gene_id)
-                results[gene == reference_gene_id, eval(column) := NA]
-            }
+                    # Remove correlations between the reference genes
+                    # themselves.
+                    for (reference_gene_id in reference_gene_ids) {
+                        column <- quote(reference_gene_id)
+                        results[gene == reference_gene_id, eval(column) := NA]
+                    }

-            if (!is.null(progress)) progress(0.66)
+                    progress(0.66)

-            # Compute the final score as the mean of known correlation scores.
-            # Negative correlations will correctly lessen the score, which will
-            # be clamped to zero as its lower bound. Genes with no possible
-            # correlations at all will be assumed to have a score of 0.0.
+                    # Compute the final score as the mean of known correlation
+                    # scores. Negative correlations will correctly lessen the
+                    # score, which will be clamped to zero as its lower bound.
+                    # Genes with no possible correlations at all will be assumed
+                    # to have a score of 0.0.

-            compute_score <- function(scores) {
-                score <- mean(scores, na.rm = TRUE)
+                    compute_score <- function(scores) {
+                        score <- mean(scores, na.rm = TRUE)

-                if (is.na(score) | score < 0.0) {
-                    score <- 0.0
+                        if (is.na(score) | score < 0.0) {
+                            score <- 0.0
+                        }
+
+                        score
+                    }
+
+                    results[,
+                        score := compute_score(as.matrix(.SD)),
+                        .SDcols = reference_gene_ids,
+                        by = gene
+                    ]
+
+                    results[, .(gene, score)]
+
+                    result(
+                        method = "correlation",
+                        scores = results[, .(gene, score)],
+                        details = list(all_correlations = results)
+                    )
                }
-
-                score
-            }
-
-            results[,
-                score := compute_score(as.matrix(.SD)),
-                .SDcols = reference_gene_ids,
-                by = gene
-            ]
-
-            results[, .(gene, score)]
-
-            structure(
-                list(
-                    results = results[, .(gene, score)],
-                    all_correlations = results
-                ),
-                class = "geposan_method_results"
            )
        }
    )
--- a/R/method.R
+++ b/R/method.R
@ -0,0 +1,67 @@
+#' Describe a new method for analyzing gene position data.
+#'
+#' @param id Unique identifier for the method.
+#' @param name Human readable name.
+#' @param description Slightly longer description.
+#' @param func Function to apply the method. The function should accept two
+#'   parameters: an object of class `geposan_preset` as input and a function to
+#'   report progress information to as a numeric value. The return value should
+#'   be an object of class `geposan_result`.
+#'
+#' @return An object of class `geposan_method`.
+#'
+#' @export
+method <- function(id, name, description, func) {
+    stopifnot(is.character(id) & length(id) == 1)
+    stopifnot(is.character(name) & length(name) == 1)
+    stopifnot(is.character(description) & length(description) == 1)
+    stopifnot(is.function(func))
+
+    structure(
+        list(
+            id = id,
+            name = name,
+            description = description,
+            func = func
+        ),
+        class = "geposan_method"
+    )
+}
+
+#' Get a list of all available methods.
+#'
+#' @export
+all_methods <- function() {
+    list(
+        clustering(),
+        correlation(),
+        neural(),
+        adjacency(),
+        proximity()
+    )
+}
+
+#' Print a method object.
+#'
+#' @param x The method to print.
+#' @param ... Other parameters.
+#'
+#' @seealso [method()]
+#'
+#' @export
+print.geposan_method <- function(x, ...) {
+    cat(sprintf(
+        paste0(
+            "geposan method:",
+            "\n  Method ID: %s",
+            "\n  Name: %s",
+            "\n  Description: %s",
+            "\n"
+        ),
+        x$id,
+        x$name,
+        x$description
+    ))
+
+    invisible(x)
+}
--- a/R/neural.R
+++ b/R/neural.R
@ -1,248 +1,254 @@
-# Find genes by training and applying a neural network.
-#
-# @param seed The seed will be used to make the results reproducible.
-# @param n_models This number specifies how many sets of training data should
-#   be created. For each set, there will be a model trained on the remaining
-#   training data and validated using this set. For non-training genes, the
-#   final score will be the mean of the result of applying the different
-#   models.
-neural <- function(preset, progress = NULL, seed = 751833, n_models = 5) {
-    species_ids <- preset$species_ids
-    gene_ids <- preset$gene_ids
-    reference_gene_ids <- preset$reference_gene_ids
+#' Find genes by training and applying a neural network.
+#'
+#' @param seed The seed will be used to make the results reproducible.
+#' @param n_models This number specifies how many sets of training data should
+#'   be created. For each set, there will be a model trained on the remaining
+#'   training data and validated using this set. For non-training genes, the
+#'   final score will be the mean of the result of applying the different
+#'   models. There should be at least two training sets. The analysis will only
+#'   work, if there is at least one reference gene per training set.
+#'
+#' @return An object of class `geposan_method`.
+#'
+#' @export
+neural <- function(seed = 180199, n_models = 5) {
+    method(
+        id = "neural",
+        name = "Neural",
+        description = "Assessment by neural network",
+        function(preset, progress) {
+            species_ids <- preset$species_ids
+            gene_ids <- preset$gene_ids
+            reference_gene_ids <- preset$reference_gene_ids

-    cached(
-        "neural",
-        c(species_ids, gene_ids, reference_gene_ids, seed, n_models),
-        { # nolint
-            reference_count <- length(reference_gene_ids)
-            if (!n_models %in% 2:reference_count) {
-                stop(paste0(
-                    "n_models has to be between 2 and the number of reference ",
-                    "genes."
-                ))
-            }
+            cached(
+                "neural",
+                c(species_ids, gene_ids, reference_gene_ids, seed, n_models),
+                { # nolint
+                    reference_count <- length(reference_gene_ids)
+                    stopifnot(n_models %in% 2:reference_count)

-            # Make results reproducible.
-            tensorflow::set_random_seed(seed)
+                    # Make results reproducible.
+                    tensorflow::set_random_seed(seed)

-            # Step 1: Prepare input data.
-            # ---------------------------
+                    # Step 1: Prepare input data.
+                    # ---------------------------

-            # Prefilter distances by species.
-            distances <- geposan::distances[species %chin% species_ids]
+                    # Prefilter distances by species.
+                    distances <- geposan::distances[species %chin% species_ids]

-            # Input data for the network. This contains the gene ID as an
-            # identifier as well as the per-species gene distances as input
-            # variables.
-            data <- data.table(gene = gene_ids)
+                    # Input data for the network. This contains the gene ID as
+                    # an identifier as well as the per-species gene distances as
+                    # input variables.
+                    data <- data.table(gene = gene_ids)

-            # Buffer to keep track of the names of the input variables.
-            input_vars <- NULL
+                    # Buffer to keep track of the names of the input variables.
+                    input_vars <- NULL

-            # Make a columns containing positions and distances for each
-            # species.
-            for (species_id in species_ids) {
-                species_data <- distances[
-                    species == species_id,
-                    .(gene, distance)
-                ]
+                    # Make a columns containing positions and distances for each
+                    # species.
+                    for (species_id in species_ids) {
+                        species_data <- distances[
+                            species == species_id,
+                            .(gene, distance)
+                        ]

-                # Only include species with at least 25% known values. As
-                # positions and distances always coexist, we don't loose any
-                # data here.
+                        # Only include species with at least 25% known values.
+                        # As positions and distances always coexist, we don't
+                        # loose any data here.

-                species_data <- stats::na.omit(species_data)
+                        species_data <- stats::na.omit(species_data)

-                if (nrow(species_data) >= 0.25 * length(gene_ids)) {
-                    data <- merge(data, species_data, all.x = TRUE)
+                        if (nrow(species_data) >= 0.25 * length(gene_ids)) {
+                            data <- merge(data, species_data, all.x = TRUE)

-                    # Replace missing data with mean values. The neural network
-                    # can't handle NAs in a meaningful way. Choosing extreme
-                    # values here would result in heavily biased results.
-                    # Therefore, the mean value is chosen as a compromise.
-                    # However, this will of course lessen the significance of
-                    # the results.
+                            # Replace missing data with mean values. The neural
+                            # network can't handle NAs in a meaningful way.
+                            # Choosing extreme values here would result in
+                            # heavily biased results. Therefore, the mean value
+                            # is chosen as a compromise. However, this will of
+                            # course lessen the significance of the results.

-                    mean_distance <- round(species_data[, mean(distance)])
-                    data[is.na(distance), `:=`(distance = mean_distance)]
+                            mean_distance <- round(
+                                species_data[, mean(distance)]
+                            )

-                    # Name the new column after the species.
-                    setnames(data, "distance", species_id)
+                            data[is.na(distance), distance := mean_distance]

-                    # Add the input variable to the buffer.
-                    input_vars <- c(input_vars, species_id)
-                }
-            }
+                            # Name the new column after the species.
+                            setnames(data, "distance", species_id)

-            if (!is.null(progress)) {
-                progress(0.1)
-            }
+                            # Add the input variable to the buffer.
+                            input_vars <- c(input_vars, species_id)
+                        }
+                    }

-            # Step 2: Prepare training data.
-            # ------------------------------
+                    progress(0.1)

-            # Take out the reference data.
+                    # Step 2: Prepare training data.
+                    # ------------------------------

-            reference_data <- data[gene %chin% reference_gene_ids]
-            reference_data[, score := 1.0]
+                    # Take out the reference data.

-            # Take out random samples from the remaining genes. This is another
-            # compromise with a negative impact on significance. Because there
-            # is no information on genes with are explicitely *not* TPE-OLD
-            # genes, we have to assume that a random sample of genes has a low
-            # probability of including TPE-OLD genes.
+                    reference_data <- data[gene %chin% reference_gene_ids]
+                    reference_data[, score := 1.0]

-            without_reference_data <- data[!gene %chin% reference_gene_ids]
+                    # Take out random samples from the remaining genes. This is
+                    # another compromise with a negative impact on
+                    # significance. We assume that a random gene is not likely
+                    # to match the reference genes.

-            control_data <- without_reference_data[
-                sample(
-                    nrow(without_reference_data),
-                    reference_count
-                )
-            ]
+                    without_reference_data <- data[
+                        !gene %chin% reference_gene_ids
+                    ]

-            control_data[, score := 0.0]
+                    control_data <- without_reference_data[
+                        sample(
+                            nrow(without_reference_data),
+                            reference_count
+                        )
+                    ]

-            # Split the training data into random sets to have validation data
-            # for each model.
+                    control_data[, score := 0.0]

-            # Scramble the source tables.
-            reference_data <- reference_data[sample(reference_count)]
-            control_data <- control_data[sample(reference_count)]
+                    # Split the training data into random sets to have
+                    # validation data for each model.

-            networks <- list()
+                    # Scramble the source tables.
+                    reference_data <- reference_data[sample(reference_count)]
+                    control_data <- control_data[sample(reference_count)]

-            indices <- seq_len(reference_count)
-            indices_split <- split(indices, indices %% n_models)
+                    networks <- list()

-            for (i in seq_len(n_models)) {
-                training_data <- rbindlist(list(
-                    reference_data[!indices_split[[i]]],
-                    control_data[!indices_split[[i]]]
-                ))
+                    indices <- seq_len(reference_count)
+                    indices_split <- split(indices, indices %% n_models)

-                validation_data <- rbindlist(list(
-                    reference_data[indices_split[[i]]],
-                    control_data[indices_split[[i]]]
-                ))
+                    for (i in seq_len(n_models)) {
+                        training_data <- rbindlist(list(
+                            reference_data[!indices_split[[i]]],
+                            control_data[!indices_split[[i]]]
+                        ))

-                networks[[i]] <- list(
-                    training_data = training_data,
-                    validation_data = validation_data
-                )
-            }
+                        validation_data <- rbindlist(list(
+                            reference_data[indices_split[[i]]],
+                            control_data[indices_split[[i]]]
+                        ))

-            # Step 3: Create, train and apply neural network.
-            # -----------------------------------------------
+                        networks[[i]] <- list(
+                            training_data = training_data,
+                            validation_data = validation_data
+                        )
+                    }

-            # Layers for the neural network.
-            input_layer <- length(input_vars)
-            layer1 <- input_layer
-            layer2 <- 0.5 * input_layer
-            layer3 <- 0.5 * layer2
+                    # Step 3: Create, train and apply neural network.
+                    # -----------------------------------------------

-            # Convert data to matrix and normalize it.
-            to_matrix <- function(data) {
-                data_matrix <- as.matrix(data[, ..input_vars])
-                colnames(data_matrix) <- NULL
-                keras::normalize(data_matrix)
-            }
+                    # Layers for the neural network.
+                    input_layer <- length(input_vars)
+                    layer1 <- input_layer
+                    layer2 <- 0.5 * input_layer
+                    layer3 <- 0.5 * layer2

-            data_matrix <- to_matrix(data)
-            output_vars <- NULL
+                    # Convert data to matrix and normalize it.
+                    to_matrix <- function(data) {
+                        data_matrix <- as.matrix(data[, ..input_vars])
+                        colnames(data_matrix) <- NULL
+                        keras::normalize(data_matrix)
+                    }

-            for (i in seq_along(networks)) {
-                # Create a new model for each training session, because the
-                # model would keep its state across training sessions otherwise.
-                model <- keras::keras_model_sequential() |>
-                    keras::layer_dense(
-                        units = layer1,
-                        activation = "relu",
-                        input_shape = input_layer,
-                    ) |>
-                    keras::layer_dense(
-                        units = layer2,
-                        activation = "relu",
-                        kernel_regularizer = keras::regularizer_l2()
-                    ) |>
-                    keras::layer_dense(
-                        units = layer3,
-                        activation = "relu",
-                        kernel_regularizer = keras::regularizer_l2()
-                    ) |>
-                    keras::layer_dense(
-                        units = 1,
-                        activation = "sigmoid"
-                    ) |>
-                    keras::compile(
-                        loss = keras::loss_mean_absolute_error(),
-                        optimizer = keras::optimizer_adam()
+                    data_matrix <- to_matrix(data)
+                    output_vars <- NULL
+
+                    for (i in seq_along(networks)) {
+                        # Create a new model for each training session, because
+                        # the model would keep its state across training
+                        # sessions otherwise.
+                        model <- keras::keras_model_sequential() |>
+                            keras::layer_dense(
+                                units = layer1,
+                                activation = "relu",
+                                input_shape = input_layer,
+                            ) |>
+                            keras::layer_dense(
+                                units = layer2,
+                                activation = "relu",
+                                kernel_regularizer = keras::regularizer_l2()
+                            ) |>
+                            keras::layer_dense(
+                                units = layer3,
+                                activation = "relu",
+                                kernel_regularizer = keras::regularizer_l2()
+                            ) |>
+                            keras::layer_dense(
+                                units = 1,
+                                activation = "sigmoid"
+                            ) |>
+                            keras::compile(
+                                loss = keras::loss_mean_absolute_error(),
+                                optimizer = keras::optimizer_adam()
+                            )
+
+                        # Train the model.
+
+                        network <- networks[[i]]
+
+                        training_data <- network$training_data
+                        training_matrix <- to_matrix(training_data)
+                        validation_data <- network$validation_data
+                        validation_matrix <- to_matrix(validation_data)
+
+                        fit <- keras::fit(
+                            model,
+                            x = training_matrix,
+                            y = training_data$score,
+                            validation_data = list(
+                                x_val = validation_matrix,
+                                y_val = validation_data$score
+                            ),
+                            epochs = 500,
+                            verbose = FALSE
+                        )
+
+                        # Apply the model.
+
+                        data[, new_score := stats::predict(model, data_matrix)]
+
+                        # Remove the values of the training data itself.
+                        data[gene %chin% training_data$gene, new_score := NA]
+
+                        output_var <- sprintf("score%i", i)
+                        setnames(data, "new_score", output_var)
+                        output_vars <- c(output_vars, output_var)
+
+
+                        # Store the details.
+
+                        networks[[i]]$model <- keras::serialize_model(model)
+                        networks[[i]]$fit <- fit
+
+                        progress(0.1 + i * (0.9 / n_models))
+                    }
+
+                    # Compute the final score as the mean score.
+                    data[,
+                        score := mean(as.numeric(.SD), na.rm = TRUE),
+                        .SDcols = output_vars,
+                        by = gene
+                    ]
+
+                    progress(1.0)
+
+                    result(
+                        method = "neural",
+                        scores = data[, .(gene, score)],
+                        details = list(
+                            seed = seed,
+                            n_models = n_models,
+                            all_results = data[, !..input_vars],
+                            networks = networks
+                        )
                    )
-
-                # Train the model.
-
-                network <- networks[[i]]
-
-                training_data <- network$training_data
-                training_matrix <- to_matrix(training_data)
-                validation_data <- network$validation_data
-                validation_matrix <- to_matrix(validation_data)
-
-                fit <- keras::fit(
-                    model,
-                    x = training_matrix,
-                    y = training_data$score,
-                    validation_data = list(
-                        x_val = validation_matrix,
-                        y_val = validation_data$score
-                    ),
-                    epochs = 500,
-                    verbose = FALSE
-                )
-
-                # Apply the model.
-
-                data[, new_score := stats::predict(model, data_matrix)]
-
-                # Remove the values of the training data itself.
-                data[gene %chin% training_data$gene, new_score := NA]
-
-                output_var <- sprintf("score%i", i)
-                setnames(data, "new_score", output_var)
-                output_vars <- c(output_vars, output_var)
-
-
-                # Store the details.
-
-                networks[[i]]$model <- keras::serialize_model(model)
-                networks[[i]]$fit <- fit
-
-                if (!is.null(progress)) {
-                    progress(0.1 + i * (0.9 / n_models))
                }
-            }
-
-            # Compute the final score as the mean score.
-            data[,
-                score := mean(as.numeric(.SD), na.rm = TRUE),
-                .SDcols = output_vars,
-                by = gene
-            ]
-
-            if (!is.null(progress)) {
-                progress(1.0)
-            }
-
-            structure(
-                list(
-                    results = data[, .(gene, score)],
-                    seed = seed,
-                    n_models = n_models,
-                    all_results = data[, !..input_vars],
-                    networks = networks
-                ),
-                class = "geposan_method_results"
            )
        }
    )
--- a/R/preset.R
+++ b/R/preset.R
@ -5,46 +5,22 @@
 #' reference genes to be able to assess the results later. The genes will be
 #' filtered based on how many species have data for them. Genes which only have
 #' orthologs for less than 25% of the input species will be excluded from the
-#' preset and the analyis.
+#' preset and the analyis. See the different method functions for the available
+#' methods: [clustering()], [correlation()], [neural()], [adjacency()] and
+#' [proximity()].
 #'
-#' Available methods are:
-#'
-#'  - `clusteriness` How much the gene distances to the nearest telomere
-#'    cluster across species.
-#'  - `correlation` The mean correlation of gene distances to the nearest
-#'    telomere across species.
-#'  - `neural` Assessment by neural network trained on the reference genes.
-#'  - `adjacency` Proximity to reference genes.
-#'  - `proximity` Mean proximity to telomeres.
-#'
-#' Available optimization targets are:
-#'
-#'  - `mean` Mean rank of the reference genes.
-#'  - `median` Median rank of the reference genes.
-#'  - `max` First rank of the reference genes.
-#'  - `min` Last rank of the reference genes.
-#'
-#' @param methods Methods to apply.
+#' @param methods List of methods to apply.
 #' @param species_ids IDs of species to include.
 #' @param gene_ids IDs of genes to screen.
 #' @param reference_gene_ids IDs of reference genes to compare to.
-#' @param optimization_target Parameter of the reference genes that the ranking
-#'   should be optimized for.
 #'
 #' @return The preset to use with [analyze()].
 #'
 #' @export
-preset <- function(methods = c(
-                       "clusteriness",
-                       "correlation",
-                       "neural",
-                       "adjacency",
-                       "proximity"
-                   ),
-                   species_ids = NULL,
-                   gene_ids = NULL,
-                   reference_gene_ids = NULL,
-                   optimization_target = "mean_rank") {
+preset <- function(methods = all_methods(),
+                   species_ids = geposan::species$id,
+                   gene_ids = geposan::genes$id,
+                   reference_gene_ids) {
    # Count included species per gene.
    genes_n_species <- geposan::distances[
        species %chin% species_ids,
@ -63,11 +39,10 @@ preset <- function(methods = c(
    # for the object later.
    structure(
        list(
-            methods = sort(methods),
+            methods = methods,
            species_ids = sort(species_ids),
            gene_ids = sort(gene_ids_filtered),
-            reference_gene_ids = sort(reference_gene_ids),
-            optimization_target = optimization_target
+            reference_gene_ids = sort(reference_gene_ids)
        ),
        class = "geposan_preset"
    )
@ -82,25 +57,20 @@ preset <- function(methods = c(
 #'
 #' @export
 print.geposan_preset <- function(x, ...) {
-    cat("geposan preset:")
-    cat("\n  Included methods: ")
-    cat(x$methods, sep = ", ")
-
    cat(sprintf(
-        "\n  Input data: %i species, %i genes",
+        paste0(
+            "geposan preset:",
+            "\n  Included methods: %s",
+            "\n  Number of species: %i",
+            "\n  Number of genes: %i",
+            "\n  Reference genes: %i",
+            "\n"
+        ),
+        paste(sapply(x$methods, function(m) m$id), collapse = ", "),
        length(x$species_ids),
-        length(x$gene_ids)
-    ))
-
-    cat(sprintf(
-        "\n  Comparison data: %i reference genes",
+        length(x$gene_ids),
        length(x$reference_gene_ids)
    ))

-    cat(sprintf(
-        "\n  Optimization target: %s\n",
-        x$optimization_target
-    ))
-
    invisible(x)
 }
--- a/R/proximity.R
+++ b/R/proximity.R
@ -1,34 +1,39 @@
-# Score the mean distance of genes to the telomeres across species.
-#
-# A score will be given to each gene such that 0.0 corresponds to the maximal
-# mean distance across all genes and 1.0 corresponds to a distance of 0.
-proximity <- function(preset, progress = NULL) {
-    species_ids <- preset$species_ids
-    gene_ids <- preset$gene_ids
+#' Score the mean distance of genes to the telomeres across species.
+#'
+#' A score will be given to each gene such that 0.0 corresponds to the maximal
+#' mean distance across all genes and 1.0 corresponds to a distance of 0.
+#'
+#' @return An object of class `geposan_method`.
+#'
+#' @export
+proximity <- function() {
+    method(
+        id = "proximity",
+        name = "Proximity",
+        description = "Proximity to telomeres",
+        function(preset, progress) {
+            species_ids <- preset$species_ids
+            gene_ids <- preset$gene_ids

-    cached("proximity", c(species_ids, gene_ids), {
-        # Prefilter distances by species and gene.
-        data <- geposan::distances[
-            species %chin% preset$species_ids & gene %chin% preset$gene_ids
-        ]
+            cached("proximity", c(species_ids, gene_ids), {
+                # Prefilter distances by species and gene.
+                data <- geposan::distances[
+                    species %chin% preset$species_ids &
+                        gene %chin% preset$gene_ids
+                ]

-        # Compute the score as described above.
-        data <- data[, .(mean_distance = mean(distance)), by = "gene"]
-        max_distance <- data[, max(mean_distance)]
-        data[, score := 1 - mean_distance / max_distance]
+                # Compute the score as described above.
+                data <- data[, .(mean_distance = mean(distance)), by = "gene"]
+                max_distance <- data[, max(mean_distance)]
+                data[, score := 1 - mean_distance / max_distance]

-        if (!is.null(progress)) {
-            # We do everything in one go, so it's not possible to report
-            # detailed progress information. As the method is relatively quick,
-            # this should not be a problem.
-            progress(1.0)
+                progress(1.0)
+
+                result(
+                    method = "proximity",
+                    scores = data[, .(gene, score)]
+                )
+            })
        }
-
-        structure(
-            list(
-                results = data[, .(gene, score)]
-            ),
-            class = "geposan_method_results"
-        )
-    })
+    )
 }
--- a/R/ranking.R
+++ b/R/ranking.R
@ -13,10 +13,10 @@
 #'
 #' @export
 ranking <- function(analysis, weights) {
-    if (inherits(analysis, "geposan_analysis")) {
-        ranking <- copy(analysis$ranking)
-    } else if (inherits(analysis, "geposan_results")) {
-        ranking <- copy(analysis)
+    ranking <- if (inherits(analysis, "geposan_analysis")) {
+        copy(analysis$scores)
+    } else if (inherits(analysis, "geposan_ranking")) {
+        copy(analysis)
    } else {
        stop("Invalid analyis. Use geposan::analyze().")
    }
@ -39,7 +39,7 @@ ranking <- function(analysis, weights) {

    structure(
        ranking,
-        class = c("geposan_ranking", "geposan_results", class(ranking))
+        class = c("geposan_ranking", class(ranking))
    )
 }

@ -60,7 +60,7 @@ ranking <- function(analysis, weights) {
 #' @export
 optimal_weights <- function(analysis, methods, reference_gene_ids,
                            target = "mean") {
-    if (!inherits(analysis, c("geposan_analysis", "geposan_results"))) {
+    if (!inherits(analysis, c("geposan_analysis", "geposan_ranking"))) {
        stop("Invalid analyis. Use geposan::analyze().")
    }

--- a/R/result.R
+++ b/R/result.R
@ -0,0 +1,50 @@
+#' Result of applying a method on gene position data.
+#'
+#' @param method_id ID of the method that produced this result.
+#' @param scores A `data.frame` mapping gene IDs (`gene`) to computed scores
+#'   between 0.0 and 1.0 (`score`).
+#' @param details Optional details that may contain intermediate results as
+#'   well as other information on the method application.
+#'
+#' @return An object of class `geposan_result`.
+#'
+#' @export
+result <- function(method_id, scores, details = list()) {
+    stopifnot(is.data.frame(scores) &
+        c("gene", "score") %chin% colnames(scores))
+    stopifnot(is.list(details))
+
+    structure(
+        list(
+            method_id = method_id,
+            scores = scores,
+            details = details
+        ),
+        class = "geposan_result"
+    )
+}
+
+#' Print a result object.
+#'
+#' @param x The result to print.
+#' @param ... Other parameters.
+#'
+#' @seealso [result()]
+#'
+#' @export
+print.geposan_result <- function(x, ...) {
+    cat(sprintf(
+        paste0(
+            "geposan result:",
+            "\n  Method: %s",
+            "\n  Number of genes: %i",
+            "\n  Available details: %s",
+            "\n"
+        ),
+        x$method_id,
+        nrow(x$scores),
+        paste(names(x$details), collapse = ", ")
+    ))
+
+    invisible(x)
+}