Reindent code to use just two spaces

2025-10-26 10:47:25 +01:00 · 2022-05-26 12:42:19 +02:00 · 2022-05-26 12:42:19 +02:00 · c04b6337e9
commit c04b6337e9
parent a1e6147466
17 changed files with 1583 additions and 1582 deletions
--- a/R/analyze.R
+++ b/R/analyze.R
@ -18,54 +18,54 @@
 #'
 #' @export
 analyze <- function(preset, progress = NULL, include_results = TRUE) {
-    if (!inherits(preset, "geposan_preset")) {
+  if (!inherits(preset, "geposan_preset")) {
-        stop("Preset is invalid. Use geposan::preset() to create one.")
+    stop("Preset is invalid. Use geposan::preset() to create one.")
-    }
+  }
-    if (is.null(progress)) {
+  if (is.null(progress)) {
-        progress_bar <- progress::progress_bar$new()
+    progress_bar <- progress::progress_bar$new()
-        progress_bar$update(0.0)
+    progress_bar$update(0.0)
-        progress <- function(progress_value) {
+    progress <- function(progress_value) {
-            if (!progress_bar$finished) {
+      if (!progress_bar$finished) {
-                progress_bar$update(progress_value)
+        progress_bar$update(progress_value)
-                if (progress_value >= 1.0) {
+        if (progress_value >= 1.0) {
-                    progress_bar$terminate()
+          progress_bar$terminate()
                }
            }
        }
      }
    }
  }
-    progress_buffer <- 0.0
+  progress_buffer <- 0.0
-    method_count <- length(preset$methods)
+  method_count <- length(preset$methods)
-    method_progress <- function(progress_value) {
+  method_progress <- function(progress_value) {
-        progress(progress_buffer + progress_value / method_count)
+    progress(progress_buffer + progress_value / method_count)
-    }
+  }
-    scores <- data.table(gene = preset$gene_id)
+  scores <- data.table(gene = preset$gene_id)
-    results <- list()
+  results <- list()
-    for (method in preset$methods) {
+  for (method in preset$methods) {
-        method_results <- method$func(preset, method_progress)
+    method_results <- method$func(preset, method_progress)
-        scores <- merge(scores, method_results$scores, by = "gene")
+    scores <- merge(scores, method_results$scores, by = "gene")
-        setnames(scores, "score", method$id)
+    setnames(scores, "score", method$id)
-        results <- c(results, list(method_results))
+    results <- c(results, list(method_results))
-        progress_buffer <- progress_buffer + 1 / method_count
+    progress_buffer <- progress_buffer + 1 / method_count
-        progress(progress_buffer)
+    progress(progress_buffer)
-    }
+  }
-    structure(
+  structure(
-        list(
+    list(
-            preset = preset,
+      preset = preset,
-            scores = scores,
+      scores = scores,
-            results = if (include_results) results else NULL
+      results = if (include_results) results else NULL
-        ),
+    ),
-        class = "geposan_analysis"
+    class = "geposan_analysis"
-    )
+  )
 }
 #' Print an analysis object.
@ -77,14 +77,14 @@ analyze <- function(preset, progress = NULL, include_results = TRUE) {
 #'
 #' @export
 print.geposan_analysis <- function(x, ...) {
-    cat("geposan analysis:\n\n")
+  cat("geposan analysis:\n\n")
-    print(x$preset)
+  print(x$preset)
  cat("\n")
  for (result in x$results) {
    print(result)
    cat("\n")
  }
-    for (result in x$results) {
+  invisible(x)
        print(result)
        cat("\n")
    }
    invisible(x)
 }
--- a/R/comparison.R
+++ b/R/comparison.R
@ -18,38 +18,38 @@
 #'
 #' @export
 compare <- function(ranking, comparison_gene_ids) {
-    if (!inherits(ranking, "geposan_ranking")) {
+  if (!inherits(ranking, "geposan_ranking")) {
-        stop("Invalid ranking. Use geposan::ranking().")
+    stop("Invalid ranking. Use geposan::ranking().")
-    }
+  }
-    comparison_ranking <- ranking[gene %chin% comparison_gene_ids]
+  comparison_ranking <- ranking[gene %chin% comparison_gene_ids]
-    quantiles <- data.table(
+  quantiles <- data.table(
-        quantile = c("0%", "25%", "50%", "75%", "100%"),
+    quantile = c("0%", "25%", "50%", "75%", "100%"),
-        score = stats::quantile(comparison_ranking[, score]),
+    score = stats::quantile(comparison_ranking[, score]),
-        rank = stats::quantile(
+    rank = stats::quantile(
-            comparison_ranking[, rank],
+      comparison_ranking[, rank],
-            probs = seq(1, 0, -0.25)
+      probs = seq(1, 0, -0.25)
-        ),
+    ),
-        percentile = stats::quantile(comparison_ranking[, percentile])
+    percentile = stats::quantile(comparison_ranking[, percentile])
-    )
+  )
-    p_value <- stats::wilcox.test(
+  p_value <- stats::wilcox.test(
-        x = comparison_ranking[, score],
+    x = comparison_ranking[, score],
-        y = ranking[!gene %chin% comparison_gene_ids, score],
+    y = ranking[!gene %chin% comparison_gene_ids, score],
-        alternative = "greater"
+    alternative = "greater"
-    )$p.value
+  )$p.value
-    structure(
+  structure(
-        list(
+    list(
-            quantiles = quantiles,
+      quantiles = quantiles,
-            mean_score = comparison_ranking[, mean(score)],
+      mean_score = comparison_ranking[, mean(score)],
-            mean_rank = comparison_ranking[, mean(rank)],
+      mean_rank = comparison_ranking[, mean(rank)],
-            mean_percentile = comparison_ranking[, mean(percentile)],
+      mean_percentile = comparison_ranking[, mean(percentile)],
-            p_value = p_value
+      p_value = p_value
-        ),
+    ),
-        class = "geposan_comparison"
+    class = "geposan_comparison"
-    )
+  )
 }
 #' S3 method to print a comparison object.
@ -61,32 +61,32 @@ compare <- function(ranking, comparison_gene_ids) {
 #'
 #' @export
 print.geposan_comparison <- function(x, ...) {
-    cat("geposan comparison:\n\n")
+  cat("geposan comparison:\n\n")
-    quantiles_formatted <- x$quantiles[, .(
+  quantiles_formatted <- x$quantiles[, .(
-        "Quantile" = quantile,
+    "Quantile" = quantile,
-        "Score" = round(score, 3),
+    "Score" = round(score, 3),
-        "Rank" = rank,
+    "Rank" = rank,
-        "Percentile" = paste0(
+    "Percentile" = paste0(
-            format(round(percentile * 100, 1), nsmall = 1),
+      format(round(percentile * 100, 1), nsmall = 1),
-            "%"
+      "%"
-        )
+    )
-    )]
+  )]
-    print(quantiles_formatted, row.names = FALSE)
+  print(quantiles_formatted, row.names = FALSE)
-    cat(sprintf(
+  cat(sprintf(
-        paste0(
+    paste0(
-            "\n  Mean score: %.3f",
+      "\n  Mean score: %.3f",
-            "\n  Mean rank: %.1f",
+      "\n  Mean rank: %.1f",
-            "\n  Mean percentile: %.1f%%",
+      "\n  Mean percentile: %.1f%%",
-            "\n  p-value for better scores: %.4f\n"
+      "\n  p-value for better scores: %.4f\n"
-        ),
+    ),
-        x$mean_score,
+    x$mean_score,
-        x$mean_rank,
+    x$mean_rank,
-        x$mean_percentile * 100,
+    x$mean_percentile * 100,
-        x$p_value
+    x$p_value
-    ))
+  ))
-    invisible(x)
+  invisible(x)
 }
--- a/R/method.R
+++ b/R/method.R
@ -12,34 +12,34 @@
 #'
 #' @export
 method <- function(id, name, description, func) {
-    stopifnot(is.character(id) & length(id) == 1)
+  stopifnot(is.character(id) & length(id) == 1)
-    stopifnot(is.character(name) & length(name) == 1)
+  stopifnot(is.character(name) & length(name) == 1)
-    stopifnot(is.character(description) & length(description) == 1)
+  stopifnot(is.character(description) & length(description) == 1)
-    stopifnot(is.function(func))
+  stopifnot(is.function(func))
-    structure(
+  structure(
-        list(
+    list(
-            id = id,
+      id = id,
-            name = name,
+      name = name,
-            description = description,
+      description = description,
-            func = func
+      func = func
-        ),
+    ),
-        class = "geposan_method"
+    class = "geposan_method"
-    )
+  )
 }
 #' Get a list of all available methods.
 #'
 #' @export
 all_methods <- function() {
-    list(
+  list(
-        clustering(),
+    clustering(),
-        correlation(),
+    correlation(),
-        neural(),
+    neural(),
-        adjacency(),
+    adjacency(),
-        species_adjacency(),
+    species_adjacency(),
-        proximity()
+    proximity()
-    )
+  )
 }
 #' Print a method object.
@ -51,18 +51,18 @@ all_methods <- function() {
 #'
 #' @export
 print.geposan_method <- function(x, ...) {
-    cat(sprintf(
+  cat(sprintf(
-        paste0(
+    paste0(
-            "geposan method:",
+      "geposan method:",
-            "\n  Method ID: %s",
+      "\n  Method ID: %s",
-            "\n  Name: %s",
+      "\n  Name: %s",
-            "\n  Description: %s",
+      "\n  Description: %s",
-            "\n"
+      "\n"
-        ),
+    ),
-        x$id,
+    x$id,
-        x$name,
+    x$name,
-        x$description
+    x$description
-    ))
+  ))
-    invisible(x)
+  invisible(x)
 }
--- a/R/method_adjacency.R
+++ b/R/method_adjacency.R
@ -12,14 +12,14 @@
 #'
 #' @export
 densest <- function(data) {
-    as.numeric(if (length(data) <= 0) {
+  as.numeric(if (length(data) <= 0) {
-        NULL
+    NULL
-    } else if (length(data) == 1) {
+  } else if (length(data) == 1) {
-        data
+    data
-    } else {
+  } else {
-        density <- stats::density(data)
+    density <- stats::density(data)
-        mean(density$x[density$y == max(density$y)])
+    mean(density$x[density$y == max(density$y)])
-    })
+  })
 }
 #' Score genes based on their proximity to the reference genes.
@ -40,80 +40,80 @@ densest <- function(data) {
 #'
 #' @export
 adjacency <- function(distance_estimate = densest, summarize = stats::median) {
-    method(
+  method(
-        id = "adjacency",
+    id = "adjacency",
-        name = "Adjacency",
+    name = "Adjacency",
-        description = "Adjacency to reference genes",
+    description = "Adjacency to reference genes",
-        function(preset, progress) {
+    function(preset, progress) {
-            species_ids <- preset$species_ids
+      species_ids <- preset$species_ids
-            gene_ids <- preset$gene_ids
+      gene_ids <- preset$gene_ids
-            reference_gene_ids <- preset$reference_gene_ids
+      reference_gene_ids <- preset$reference_gene_ids
-            cached(
+      cached(
-                "adjacency",
+        "adjacency",
-                c(
+        c(
-                    species_ids,
+          species_ids,
-                    gene_ids,
+          gene_ids,
-                    reference_gene_ids,
+          reference_gene_ids,
-                    distance_estimate,
+          distance_estimate,
-                    summarize
+          summarize
-                ),
+        ),
-                { # nolint
+        { # nolint
-                    # Filter distances by species and gene and summarize each
+          # Filter distances by species and gene and summarize each
-                    # gene's distance values using the estimation function.
+          # gene's distance values using the estimation function.
-                    data <- geposan::distances[
+          data <- geposan::distances[
-                        species %chin% species_ids & gene %chin% gene_ids,
+            species %chin% species_ids & gene %chin% gene_ids,
-                        .(distance = as.numeric(distance_estimate(distance))),
+            .(distance = as.numeric(distance_estimate(distance))),
-                        by = gene
+            by = gene
-                    ]
+          ]
-                    # Compute the absolute value of the difference between the
+          # Compute the absolute value of the difference between the
-                    # estimated distances of each gene to the reference genes.
+          # estimated distances of each gene to the reference genes.
-                    compute_difference <- function(distance_value,
+          compute_difference <- function(distance_value,
-                                                   comparison_ids) {
+                                         comparison_ids) {
-                        differences <- data[
+            differences <- data[
-                            gene %chin% comparison_ids,
+              gene %chin% comparison_ids,
-                            .(difference = abs(distance_value - distance))
+              .(difference = abs(distance_value - distance))
-                        ]
+            ]
-                        summarize(differences$difference)
+            summarize(differences$difference)
-                    }
+          }
-                    # Compute the differences to the reference genes.
+          # Compute the differences to the reference genes.
-                    data[
+          data[
-                        !gene %chin% reference_gene_ids,
+            !gene %chin% reference_gene_ids,
-                        difference := compute_difference(
+            difference := compute_difference(
-                            distance,
+              distance,
-                            reference_gene_ids
+              reference_gene_ids
-                        ),
+            ),
-                        by = gene
+            by = gene
-                    ]
+          ]
-                    progress(0.5)
+          progress(0.5)
-                    # Exclude the reference gene itself when computing its
+          # Exclude the reference gene itself when computing its
-                    # difference.
+          # difference.
-                    data[
+          data[
-                        gene %chin% reference_gene_ids,
+            gene %chin% reference_gene_ids,
-                        difference := compute_difference(
+            difference := compute_difference(
-                            distance,
+              distance,
-                            reference_gene_ids[reference_gene_ids != gene]
+              reference_gene_ids[reference_gene_ids != gene]
-                        ),
+            ),
-                        by = gene
+            by = gene
-                    ]
+          ]
-                    # Compute the final score by normalizing the difference.
+          # Compute the final score by normalizing the difference.
-                    data[, score := 1 - difference / max(difference)]
+          data[, score := 1 - difference / max(difference)]
-                    progress(1.0)
+          progress(1.0)
-                    result(
+          result(
-                        method = "adjacency",
+            method = "adjacency",
-                        scores = data[, .(gene, score)],
+            scores = data[, .(gene, score)],
-                        details = list(data = data)
+            details = list(data = data)
-                    )
+          )
                }
            )
        }
-    )
+      )
    }
  )
 }
--- a/R/method_clustering.R
+++ b/R/method_clustering.R
@ -13,33 +13,33 @@
 #'   default), the first cluster will weigh 1.0, the second 0.7, the third 0.49
 #'   etc.
 clusteriness <- function(data, span = 100000, weight = 0.7) {
-    n <- length(data)
+  n <- length(data)
-    # Return a score of 0.0 if there is just one or no value at all.
+  # Return a score of 0.0 if there is just one or no value at all.
-    if (n < 2) {
+  if (n < 2) {
-        return(0.0)
+    return(0.0)
  }
  # Cluster the data and compute the cluster sizes.
  tree <- stats::hclust(stats::dist(data))
  clusters <- stats::cutree(tree, h = span)
  cluster_sizes <- sort(tabulate(clusters), decreasing = TRUE)
  # Compute the "clusteriness" score.
  score <- 0.0
  for (i in seq_along(cluster_sizes)) {
    cluster_size <- cluster_sizes[i]
    if (cluster_size >= 2) {
      cluster_score <- cluster_size / n
      score <- score + weight^(i - 1) * cluster_score
    }
  }
-    # Cluster the data and compute the cluster sizes.
+  score
    tree <- stats::hclust(stats::dist(data))
    clusters <- stats::cutree(tree, h = span)
    cluster_sizes <- sort(tabulate(clusters), decreasing = TRUE)
    # Compute the "clusteriness" score.
    score <- 0.0
    for (i in seq_along(cluster_sizes)) {
        cluster_size <- cluster_sizes[i]
        if (cluster_size >= 2) {
            cluster_score <- cluster_size / n
            score <- score + weight^(i - 1) * cluster_score
        }
    }
    score
 }
 #' Process genes clustering their distance to telomeres.
@ -53,41 +53,41 @@ clusteriness <- function(data, span = 100000, weight = 0.7) {
 #'
 #' @export
 clustering <- function() {
-    method(
+  method(
-        id = "clustering",
+    id = "clustering",
-        name = "Clustering",
+    name = "Clustering",
-        description = "Clustering of genes",
+    description = "Clustering of genes",
-        function(preset, progress) {
+    function(preset, progress) {
-            species_ids <- preset$species_ids
+      species_ids <- preset$species_ids
-            gene_ids <- preset$gene_ids
+      gene_ids <- preset$gene_ids
-            cached("clustering", c(species_ids, gene_ids), {
+      cached("clustering", c(species_ids, gene_ids), {
-                scores <- data.table(gene = gene_ids)
+        scores <- data.table(gene = gene_ids)
-                # Prefilter the input data by species.
+        # Prefilter the input data by species.
-                distances <- geposan::distances[species %chin% species_ids]
+        distances <- geposan::distances[species %chin% species_ids]
-                genes_done <- 0
+        genes_done <- 0
-                genes_total <- length(gene_ids)
+        genes_total <- length(gene_ids)
-                # Perform the cluster analysis for one gene.
+        # Perform the cluster analysis for one gene.
-                compute <- function(gene_id) {
+        compute <- function(gene_id) {
-                    data <- distances[gene == gene_id, distance]
+          data <- distances[gene == gene_id, distance]
-                    score <- clusteriness(data)
+          score <- clusteriness(data)
-                    genes_done <<- genes_done + 1
+          genes_done <<- genes_done + 1
-                    progress(genes_done / genes_total)
+          progress(genes_done / genes_total)
-                    score
+          score
                }
                scores[, score := compute(gene), by = gene]
                result(
                    method = "clustering",
                    scores = scores
                )
            })
        }
-    )
+
        scores[, score := compute(gene), by = gene]
        result(
          method = "clustering",
          scores = scores
        )
      })
    }
  )
 }
--- a/R/method_correlation.R
+++ b/R/method_correlation.R
@ -8,96 +8,97 @@
 #'
 #' @export
 correlation <- function(summarize = stats::median) {
-    method(
+  method(
-        id = "correlation",
+    id = "correlation",
-        name = "Correlation",
+    name = "Correlation",
-        description = "Correlation with reference genes",
+    description = "Correlation with reference genes",
-        function(preset, progress) {
+    function(preset, progress) {
-            species_ids <- preset$species_ids
+      species_ids <- preset$species_ids
-            gene_ids <- preset$gene_ids
+      gene_ids <- preset$gene_ids
-            reference_gene_ids <- preset$reference_gene_ids
+      reference_gene_ids <- preset$reference_gene_ids
-            cached(
+      cached(
-                "correlation",
+        "correlation",
-                c(species_ids, gene_ids, reference_gene_ids, summarize),
+        c(species_ids, gene_ids, reference_gene_ids, summarize),
-                { # nolint
+        { # nolint
-                    # Prefilter distances by species.
+          # Prefilter distances by species.
-                    distances <- geposan::distances[species %chin% species_ids]
+          distances <- geposan::distances[species %chin% species_ids]
-                    # Tranform data to get species as rows and genes as columns.
+          # Tranform data to get species as rows and genes as columns.
-                    # We construct columns per species, because it requires
+          # We construct columns per species, because it requires
-                    # fewer iterations, and transpose the table afterwards.
+          # fewer iterations, and transpose the table afterwards.
-                    data <- data.table(gene = gene_ids)
+          data <- data.table(gene = gene_ids)
-                    # Make a column containing distance data for each species.
+          # Make a column containing distance data for each species.
-                    for (species_id in species_ids) {
+          for (species_id in species_ids) {
-                        species_data <- distances[
+            species_data <- distances[
-                            species == species_id,
+              species == species_id,
-                            .(gene, distance)
+              .(gene, distance)
-                        ]
+            ]
-                        data <- merge(data, species_data, all.x = TRUE)
+            data <- merge(data, species_data, all.x = TRUE)
-                        setnames(data, "distance", species_id)
+            setnames(data, "distance", species_id)
-                    }
+          }
-                    # Transpose to the desired format.
+          # Transpose to the desired format.
-                    data <- transpose(data, make.names = "gene")
+          data <- transpose(data, make.names = "gene")
-                    progress(0.33)
+          progress(0.33)
-                    # Take the reference data.
+          # Take the reference data.
-                    reference_data <- data[, ..reference_gene_ids]
+          reference_data <- data[, ..reference_gene_ids]
-                    # Perform the correlation between all possible pairs.
+          # Perform the correlation between all possible pairs.
-                    results <- stats::cor(
+          results <- stats::cor(
-                        data[, ..gene_ids],
+            data[, ..gene_ids],
-                        reference_data,
+            reference_data,
-                        use = "pairwise.complete.obs",
+            use = "pairwise.complete.obs",
-                        method = "spearman"
+            method = "spearman"
-                    )
+          )
-                    results <- data.table(results, keep.rownames = TRUE)
+          results <- data.table(results, keep.rownames = TRUE)
-                    setnames(results, "rn", "gene")
+          setnames(results, "rn", "gene")
-                    # Remove correlations between the reference genes
+          # Remove correlations between the reference genes
-                    # themselves.
+          # themselves.
-                    for (reference_gene_id in reference_gene_ids) {
+          for (reference_gene_id in reference_gene_ids) {
-                        column <- quote(reference_gene_id)
+            column <- quote(reference_gene_id)
-                        results[gene == reference_gene_id, eval(column) := NA]
+            results[gene == reference_gene_id, eval(column) := NA]
-                    }
+          }
-                    progress(0.66)
+          progress(0.66)
-                    # Combine the correlation coefficients.
+          # Combine the correlation coefficients.
-                    results[,
+          results[,
-                        max_correlation := as.double(summarize(stats::na.omit(
+            max_correlation := as.double(summarize(stats::na.omit(
-                            # Convert the data.table subset into a
+              # Convert the data.table subset into a
-                            # vector to get the correct na.omit
+              # vector to get the correct na.omit
-                            # behavior.
+              # behavior.
-                            as.matrix(.SD)[1, ]
+              as.matrix(.SD)[1, ]
-                        ))),
+            ))),
-                        .SDcols = reference_gene_ids,
+            .SDcols = reference_gene_ids,
-                        by = gene
+            by = gene
-                    ]
+          ]
-                    # Normalize scores.
+          # Normalize scores.
-                    results[,
+          results[
-                        score := (max_correlation - min(max_correlation)) /
+            ,
-                            (max(max_correlation) - min(max_correlation))
+            score := (max_correlation - min(max_correlation)) /
-                    ]
+              (max(max_correlation) - min(max_correlation))
          ]
-                    # Normalize scores.
+          # Normalize scores.
-                    results[, .(gene, score)]
+          results[, .(gene, score)]
-                    result(
+          result(
-                        method = "correlation",
+            method = "correlation",
-                        scores = results[, .(gene, score)],
+            scores = results[, .(gene, score)],
-                        details = list(all_correlations = results)
+            details = list(all_correlations = results)
-                    )
+          )
                }
            )
        }
-    )
+      )
    }
  )
 }
--- a/R/method_neural.R
+++ b/R/method_neural.R
@ -12,244 +12,244 @@
 #'
 #' @export
 neural <- function(seed = 180199, n_models = 5) {
-    method(
+  method(
-        id = "neural",
+    id = "neural",
-        name = "Neural",
+    name = "Neural",
-        description = "Assessment by neural network",
+    description = "Assessment by neural network",
-        function(preset, progress) {
+    function(preset, progress) {
-            species_ids <- preset$species_ids
+      species_ids <- preset$species_ids
-            gene_ids <- preset$gene_ids
+      gene_ids <- preset$gene_ids
-            reference_gene_ids <- preset$reference_gene_ids
+      reference_gene_ids <- preset$reference_gene_ids
-            cached(
+      cached(
-                "neural",
+        "neural",
-                c(species_ids, gene_ids, reference_gene_ids, seed, n_models),
+        c(species_ids, gene_ids, reference_gene_ids, seed, n_models),
-                { # nolint
+        { # nolint
-                    reference_count <- length(reference_gene_ids)
+          reference_count <- length(reference_gene_ids)
-                    stopifnot(n_models %in% 2:reference_count)
+          stopifnot(n_models %in% 2:reference_count)
-                    # Make results reproducible.
+          # Make results reproducible.
-                    tensorflow::set_random_seed(seed)
+          tensorflow::set_random_seed(seed)
-                    # Step 1: Prepare input data.
+          # Step 1: Prepare input data.
-                    # ---------------------------
+          # ---------------------------
-                    # Prefilter distances by species.
+          # Prefilter distances by species.
-                    distances <- geposan::distances[species %chin% species_ids]
+          distances <- geposan::distances[species %chin% species_ids]
-                    # Input data for the network. This contains the gene ID as
+          # Input data for the network. This contains the gene ID as
-                    # an identifier as well as the per-species gene distances as
+          # an identifier as well as the per-species gene distances as
-                    # input variables.
+          # input variables.
-                    data <- data.table(gene = gene_ids)
+          data <- data.table(gene = gene_ids)
-                    # Buffer to keep track of the names of the input variables.
+          # Buffer to keep track of the names of the input variables.
-                    input_vars <- NULL
+          input_vars <- NULL
-                    # Make a columns containing positions and distances for each
+          # Make a columns containing positions and distances for each
-                    # species.
+          # species.
-                    for (species_id in species_ids) {
+          for (species_id in species_ids) {
-                        species_data <- distances[
+            species_data <- distances[
-                            species == species_id,
+              species == species_id,
-                            .(gene, distance)
+              .(gene, distance)
-                        ]
+            ]
-                        # Only include species with at least 25% known values.
+            # Only include species with at least 25% known values.
-                        # As positions and distances always coexist, we don't
+            # As positions and distances always coexist, we don't
-                        # loose any data here.
+            # loose any data here.
-                        species_data <- stats::na.omit(species_data)
+            species_data <- stats::na.omit(species_data)
-                        if (nrow(species_data) >= 0.25 * length(gene_ids)) {
+            if (nrow(species_data) >= 0.25 * length(gene_ids)) {
-                            data <- merge(data, species_data, all.x = TRUE)
+              data <- merge(data, species_data, all.x = TRUE)
-                            # Replace missing data with mean values. The neural
+              # Replace missing data with mean values. The neural
-                            # network can't handle NAs in a meaningful way.
+              # network can't handle NAs in a meaningful way.
-                            # Choosing extreme values here would result in
+              # Choosing extreme values here would result in
-                            # heavily biased results. Therefore, the mean value
+              # heavily biased results. Therefore, the mean value
-                            # is chosen as a compromise. However, this will of
+              # is chosen as a compromise. However, this will of
-                            # course lessen the significance of the results.
+              # course lessen the significance of the results.
-                            mean_distance <- round(
+              mean_distance <- round(
-                                species_data[, mean(distance)]
+                species_data[, mean(distance)]
-                            )
+              )
-                            data[is.na(distance), distance := mean_distance]
+              data[is.na(distance), distance := mean_distance]
-                            # Name the new column after the species.
+              # Name the new column after the species.
-                            setnames(data, "distance", species_id)
+              setnames(data, "distance", species_id)
-                            # Add the input variable to the buffer.
+              # Add the input variable to the buffer.
-                            input_vars <- c(input_vars, species_id)
+              input_vars <- c(input_vars, species_id)
-                        }
+            }
-                    }
+          }
-                    progress(0.1)
+          progress(0.1)
-                    # Step 2: Prepare training data.
+          # Step 2: Prepare training data.
-                    # ------------------------------
+          # ------------------------------
-                    # Take out the reference data.
+          # Take out the reference data.
-                    reference_data <- data[gene %chin% reference_gene_ids]
+          reference_data <- data[gene %chin% reference_gene_ids]
-                    reference_data[, score := 1.0]
+          reference_data[, score := 1.0]
-                    # Take out random samples from the remaining genes. This is
+          # Take out random samples from the remaining genes. This is
-                    # another compromise with a negative impact on
+          # another compromise with a negative impact on
-                    # significance. We assume that a random gene is not likely
+          # significance. We assume that a random gene is not likely
-                    # to match the reference genes.
+          # to match the reference genes.
-                    without_reference_data <- data[
+          without_reference_data <- data[
-                        !gene %chin% reference_gene_ids
+            !gene %chin% reference_gene_ids
-                    ]
+          ]
-                    control_data <- without_reference_data[
+          control_data <- without_reference_data[
-                        sample(
+            sample(
-                            nrow(without_reference_data),
+              nrow(without_reference_data),
-                            reference_count
+              reference_count
                        )
                    ]
                    control_data[, score := 0.0]
                    # Split the training data into random sets to have
                    # validation data for each model.
                    # Scramble the source tables.
                    reference_data <- reference_data[sample(reference_count)]
                    control_data <- control_data[sample(reference_count)]
                    networks <- list()
                    indices <- seq_len(reference_count)
                    indices_split <- split(indices, indices %% n_models)
                    for (i in seq_len(n_models)) {
                        training_data <- rbindlist(list(
                            reference_data[!indices_split[[i]]],
                            control_data[!indices_split[[i]]]
                        ))
                        validation_data <- rbindlist(list(
                            reference_data[indices_split[[i]]],
                            control_data[indices_split[[i]]]
                        ))
                        networks[[i]] <- list(
                            training_data = training_data,
                            validation_data = validation_data
                        )
                    }
                    # Step 3: Create, train and apply neural network.
                    # -----------------------------------------------
                    # Layers for the neural network.
                    input_layer <- length(input_vars)
                    layer1 <- input_layer
                    layer2 <- 0.5 * input_layer
                    layer3 <- 0.5 * layer2
                    # Convert data to matrix and normalize it.
                    to_matrix <- function(data) {
                        data_matrix <- as.matrix(data[, ..input_vars])
                        colnames(data_matrix) <- NULL
                        keras::normalize(data_matrix)
                    }
                    data_matrix <- to_matrix(data)
                    output_vars <- NULL
                    for (i in seq_along(networks)) {
                        # Create a new model for each training session, because
                        # the model would keep its state across training
                        # sessions otherwise.
                        model <- keras::keras_model_sequential() |>
                            keras::layer_dense(
                                units = layer1,
                                activation = "relu",
                                input_shape = input_layer,
                            ) |>
                            keras::layer_dense(
                                units = layer2,
                                activation = "relu",
                                kernel_regularizer = keras::regularizer_l2()
                            ) |>
                            keras::layer_dense(
                                units = layer3,
                                activation = "relu",
                                kernel_regularizer = keras::regularizer_l2()
                            ) |>
                            keras::layer_dense(
                                units = 1,
                                activation = "sigmoid"
                            ) |>
                            keras::compile(
                                loss = keras::loss_mean_absolute_error(),
                                optimizer = keras::optimizer_adam()
                            )
                        # Train the model.
                        network <- networks[[i]]
                        training_data <- network$training_data
                        training_matrix <- to_matrix(training_data)
                        validation_data <- network$validation_data
                        validation_matrix <- to_matrix(validation_data)
                        fit <- keras::fit(
                            model,
                            x = training_matrix,
                            y = training_data$score,
                            validation_data = list(
                                x_val = validation_matrix,
                                y_val = validation_data$score
                            ),
                            epochs = 500,
                            verbose = FALSE
                        )
                        # Apply the model.
                        data[, new_score := stats::predict(model, data_matrix)]
                        # Remove the values of the training data itself.
                        data[gene %chin% training_data$gene, new_score := NA]
                        output_var <- sprintf("score%i", i)
                        setnames(data, "new_score", output_var)
                        output_vars <- c(output_vars, output_var)
                        # Store the details.
                        networks[[i]]$model <- keras::serialize_model(model)
                        networks[[i]]$fit <- fit
                        progress(0.1 + i * (0.9 / n_models))
                    }
                    # Compute the final score as the mean score.
                    data[,
                        score := mean(as.numeric(.SD), na.rm = TRUE),
                        .SDcols = output_vars,
                        by = gene
                    ]
                    progress(1.0)
                    result(
                        method = "neural",
                        scores = data[, .(gene, score)],
                        details = list(
                            seed = seed,
                            n_models = n_models,
                            all_results = data[, !..input_vars],
                            networks = networks
                        )
                    )
                }
            )
          ]
          control_data[, score := 0.0]
          # Split the training data into random sets to have
          # validation data for each model.
          # Scramble the source tables.
          reference_data <- reference_data[sample(reference_count)]
          control_data <- control_data[sample(reference_count)]
          networks <- list()
          indices <- seq_len(reference_count)
          indices_split <- split(indices, indices %% n_models)
          for (i in seq_len(n_models)) {
            training_data <- rbindlist(list(
              reference_data[!indices_split[[i]]],
              control_data[!indices_split[[i]]]
            ))
            validation_data <- rbindlist(list(
              reference_data[indices_split[[i]]],
              control_data[indices_split[[i]]]
            ))
            networks[[i]] <- list(
              training_data = training_data,
              validation_data = validation_data
            )
          }
          # Step 3: Create, train and apply neural network.
          # -----------------------------------------------
          # Layers for the neural network.
          input_layer <- length(input_vars)
          layer1 <- input_layer
          layer2 <- 0.5 * input_layer
          layer3 <- 0.5 * layer2
          # Convert data to matrix and normalize it.
          to_matrix <- function(data) {
            data_matrix <- as.matrix(data[, ..input_vars])
            colnames(data_matrix) <- NULL
            keras::normalize(data_matrix)
          }
          data_matrix <- to_matrix(data)
          output_vars <- NULL
          for (i in seq_along(networks)) {
            # Create a new model for each training session, because
            # the model would keep its state across training
            # sessions otherwise.
            model <- keras::keras_model_sequential() |>
              keras::layer_dense(
                units = layer1,
                activation = "relu",
                input_shape = input_layer,
              ) |>
              keras::layer_dense(
                units = layer2,
                activation = "relu",
                kernel_regularizer = keras::regularizer_l2()
              ) |>
              keras::layer_dense(
                units = layer3,
                activation = "relu",
                kernel_regularizer = keras::regularizer_l2()
              ) |>
              keras::layer_dense(
                units = 1,
                activation = "sigmoid"
              ) |>
              keras::compile(
                loss = keras::loss_mean_absolute_error(),
                optimizer = keras::optimizer_adam()
              )
            # Train the model.
            network <- networks[[i]]
            training_data <- network$training_data
            training_matrix <- to_matrix(training_data)
            validation_data <- network$validation_data
            validation_matrix <- to_matrix(validation_data)
            fit <- keras::fit(
              model,
              x = training_matrix,
              y = training_data$score,
              validation_data = list(
                x_val = validation_matrix,
                y_val = validation_data$score
              ),
              epochs = 500,
              verbose = FALSE
            )
            # Apply the model.
            data[, new_score := stats::predict(model, data_matrix)]
            # Remove the values of the training data itself.
            data[gene %chin% training_data$gene, new_score := NA]
            output_var <- sprintf("score%i", i)
            setnames(data, "new_score", output_var)
            output_vars <- c(output_vars, output_var)
            # Store the details.
            networks[[i]]$model <- keras::serialize_model(model)
            networks[[i]]$fit <- fit
            progress(0.1 + i * (0.9 / n_models))
          }
          # Compute the final score as the mean score.
          data[,
            score := mean(as.numeric(.SD), na.rm = TRUE),
            .SDcols = output_vars,
            by = gene
          ]
          progress(1.0)
          result(
            method = "neural",
            scores = data[, .(gene, score)],
            details = list(
              seed = seed,
              n_models = n_models,
              all_results = data[, !..input_vars],
              networks = networks
            )
          )
        }
-    )
+      )
    }
  )
 }
--- a/R/method_proximity.R
+++ b/R/method_proximity.R
@ -11,38 +11,38 @@
 #'
 #' @export
 proximity <- function(summarize = stats::median) {
-    method(
+  method(
-        id = "proximity",
+    id = "proximity",
-        name = "Proximity",
+    name = "Proximity",
-        description = "Proximity to telomeres",
+    description = "Proximity to telomeres",
-        function(preset, progress) {
+    function(preset, progress) {
-            species_ids <- preset$species_ids
+      species_ids <- preset$species_ids
-            gene_ids <- preset$gene_ids
+      gene_ids <- preset$gene_ids
-            cached("proximity", c(species_ids, gene_ids), {
+      cached("proximity", c(species_ids, gene_ids), {
-                # Prefilter distances by species and gene.
+        # Prefilter distances by species and gene.
-                data <- geposan::distances[
+        data <- geposan::distances[
-                    species %chin% preset$species_ids &
+          species %chin% preset$species_ids &
-                        gene %chin% preset$gene_ids
+            gene %chin% preset$gene_ids
-                ]
+        ]
-                # Compute the score as described above.
+        # Compute the score as described above.
-                data <- data[,
+        data <- data[,
-                    .(combined_distance = as.double(summarize(distance))),
+          .(combined_distance = as.double(summarize(distance))),
-                    by = "gene"
+          by = "gene"
-                ]
+        ]
-                # Normalize scores.
+        # Normalize scores.
-                data[, score := 1 - combined_distance / max(combined_distance)]
+        data[, score := 1 - combined_distance / max(combined_distance)]
-                progress(1.0)
+        progress(1.0)
-                result(
+        result(
-                    method = "proximity",
+          method = "proximity",
-                    scores = data[, .(gene, score)],
+          scores = data[, .(gene, score)],
-                    details = list(data = data)
+          details = list(data = data)
-                )
+        )
-            })
+      })
-        }
+    }
-    )
+  )
 }
--- a/R/method_species_adjacency.R
+++ b/R/method_species_adjacency.R
@ -15,136 +15,136 @@
 #' @export
 species_adjacency <- function(distance_estimate = stats::median,
                              summarize = stats::median) {
-    method(
+  method(
-        id = "species_adjacency",
+    id = "species_adjacency",
-        name = "Species adj.",
+    name = "Species adj.",
-        description = "Species adjacency",
+    description = "Species adjacency",
-        function(preset, progress) {
+    function(preset, progress) {
-            species_ids <- preset$species_ids
+      species_ids <- preset$species_ids
-            gene_ids <- preset$gene_ids
+      gene_ids <- preset$gene_ids
-            reference_gene_ids <- preset$reference_gene_ids
+      reference_gene_ids <- preset$reference_gene_ids
-            cached(
+      cached(
-                "species_adjacency",
+        "species_adjacency",
-                c(
+        c(
-                    species_ids,
+          species_ids,
-                    gene_ids,
+          gene_ids,
-                    reference_gene_ids,
+          reference_gene_ids,
-                    distance_estimate,
+          distance_estimate,
-                    summarize
+          summarize
-                ),
+        ),
-                { # nolint
+        { # nolint
-                    # Prefilter distances.
+          # Prefilter distances.
-                    data <- geposan::distances[
+          data <- geposan::distances[
-                        species %chin% species_ids & gene %chin% gene_ids
+            species %chin% species_ids & gene %chin% gene_ids
-                    ]
+          ]
-                    progress_state <- 0.0
+          progress_state <- 0.0
-                    progress_step <- 0.9 / length(species_ids)
+          progress_step <- 0.9 / length(species_ids)
-                    # Iterate through all species and find the distance
+          # Iterate through all species and find the distance
-                    # estimates within that species.
+          # estimates within that species.
-                    for (species_id in species_ids) {
+          for (species_id in species_ids) {
-                        # For all genes, compute the distance to one reference
+            # For all genes, compute the distance to one reference
-                        # gene at a time in one go.
+            # gene at a time in one go.
-                        for (reference_gene_id in reference_gene_ids) {
+            for (reference_gene_id in reference_gene_ids) {
-                            comparison_distance <- data[
+              comparison_distance <- data[
-                                species == species_id &
+                species == species_id &
-                                    gene == reference_gene_id,
+                  gene == reference_gene_id,
-                                distance
+                distance
-                            ]
+              ]
-                            column <- quote(reference_gene_id)
+              column <- quote(reference_gene_id)
-                            if (length(comparison_distance) != 1) {
+              if (length(comparison_distance) != 1) {
-                                # If we don't have a comparison distance, we
+                # If we don't have a comparison distance, we
-                                # can't compute a difference. This happens, if
+                # can't compute a difference. This happens, if
-                                # the species doesn't have the reference gene.
+                # the species doesn't have the reference gene.
-                                data[
+                data[
-                                    species == species_id &
+                  species == species_id &
-                                        gene %chin% gene_ids,
+                    gene %chin% gene_ids,
-                                    eval(column) := NA_integer_
+                  eval(column) := NA_integer_
-                                ]
+                ]
-                            } else {
+              } else {
-                                data[
+                data[
-                                    species == species_id &
+                  species == species_id &
-                                        gene %chin% gene_ids,
+                    gene %chin% gene_ids,
-                                    eval(column) :=
+                  eval(column) :=
-                                        abs(distance - comparison_distance)
+                    abs(distance - comparison_distance)
-                                ]
+                ]
-                            }
+              }
-                        }
+            }
-                        # Combine the distances to the different reference genes
+            # Combine the distances to the different reference genes
-                        # into one value using the provided function.
+            # into one value using the provided function.
-                        data[
+            data[
-                            species == species_id &
+              species == species_id &
-                                gene %chin% gene_ids,
+                gene %chin% gene_ids,
-                            combined_distance := as.numeric(
+              combined_distance := as.numeric(
-                                distance_estimate(stats::na.omit(
+                distance_estimate(stats::na.omit(
-                                    # Convert the data.table subset into a
+                  # Convert the data.table subset into a
-                                    # vector to get the correct na.omit
+                  # vector to get the correct na.omit
-                                    # behavior.
+                  # behavior.
-                                    as.matrix(.SD)[1, ]
+                  as.matrix(.SD)[1, ]
-                                ))
+                ))
-                            ),
+              ),
-                            .SDcols = reference_gene_ids,
+              .SDcols = reference_gene_ids,
-                            by = gene
+              by = gene
-                        ]
+            ]
-                        progress_state <- progress_state + progress_step
+            progress_state <- progress_state + progress_step
-                        progress(progress_state)
+            progress(progress_state)
-                    }
+          }
-                    progress(0.9)
+          progress(0.9)
-                    # Remove the distances between the reference genes.
+          # Remove the distances between the reference genes.
-                    for (reference_gene_id in reference_gene_ids) {
+          for (reference_gene_id in reference_gene_ids) {
-                        column <- quote(reference_gene_id)
+            column <- quote(reference_gene_id)
-                        data[gene == reference_gene_id, eval(column) := NA]
+            data[gene == reference_gene_id, eval(column) := NA]
-                    }
+          }
-                    # Recompute the combined distance for the reference genes.
+          # Recompute the combined distance for the reference genes.
-                    data[
+          data[
-                        gene %chin% reference_gene_ids,
+            gene %chin% reference_gene_ids,
-                        combined_distance := as.numeric(
+            combined_distance := as.numeric(
-                            distance_estimate(stats::na.omit(
+              distance_estimate(stats::na.omit(
-                                as.matrix(.SD)[1, ])
+                as.matrix(.SD)[1, ]
-                            )
+              ))
-                        ),
+            ),
-                        .SDcols = reference_gene_ids,
+            .SDcols = reference_gene_ids,
-                        by = list(species, gene)
+            by = list(species, gene)
-                    ]
+          ]
-                    # Combine the distances into one value.
+          # Combine the distances into one value.
-                    results <- data[,
+          results <- data[,
-                        .(
+            .(
-                            summarized_distances = as.numeric(
+              summarized_distances = as.numeric(
-                                summarize(stats::na.omit(combined_distance))
+                summarize(stats::na.omit(combined_distance))
-                            )
+              )
-                        ),
+            ),
-                        by = gene
+            by = gene
-                    ]
+          ]
-                    # Compute the final score by normalizing the difference.
+          # Compute the final score by normalizing the difference.
-                    results[
+          results[
-                        ,
+            ,
-                        score := 1 - summarized_distances /
+            score := 1 - summarized_distances /
-                            max(summarized_distances)
+              max(summarized_distances)
-                    ]
+          ]
-                    progress(1.0)
+          progress(1.0)
-                    result(
+          result(
-                        method = "species_adjacency",
+            method = "species_adjacency",
-                        scores = results[, .(gene, score)],
+            scores = results[, .(gene, score)],
-                        details = list(
+            details = list(
-                            data = data,
+              data = data,
-                            results = results
+              results = results
                        )
                    )
                }
            )
          )
        }
-    )
+      )
    }
  )
 }
--- a/R/plots.R
+++ b/R/plots.R
@ -9,7 +9,7 @@ base_color_transparent <- function() "#1964bf80"
 #' Color palette for gene sets.
 #' @noRd
 gene_set_color <- function(index) {
-    c("#FF7F00", "#4DAF4A", "#984EA3")[index]
+  c("#FF7F00", "#4DAF4A", "#984EA3")[index]
 }
 #' Plot gene positions.
@ -22,74 +22,74 @@ gene_set_color <- function(index) {
 #'
 #' @export
 plot_positions <- function(species_ids, gene_sets) {
-    if (!requireNamespace("plotly", quietly = TRUE)) {
+  if (!requireNamespace("plotly", quietly = TRUE)) {
-        stop("Please install \"plotly\" to use this function.")
+    stop("Please install \"plotly\" to use this function.")
-    }
+  }
-    # Prefilter data by species.
+  # Prefilter data by species.
-    data <- geposan::distances[species %chin% species_ids]
+  data <- geposan::distances[species %chin% species_ids]
-    species_max_distance <- data[,
+  species_max_distance <- data[,
-        .(max_distance = max(distance)),
+    .(max_distance = max(distance)),
-        by = species
+    by = species
-    ]
+  ]
-    # Prefilter species.
+  # Prefilter species.
-    species <- geposan::species[id %chin% species_ids]
+  species <- geposan::species[id %chin% species_ids]
-    plot <- plotly::plot_ly() |>
+  plot <- plotly::plot_ly() |>
-        plotly::layout(
+    plotly::layout(
-            xaxis = list(
+      xaxis = list(
-                title = "Species",
+        title = "Species",
-                tickvals = species$id,
+        tickvals = species$id,
-                ticktext = species$name
+        ticktext = species$name
-            ),
+      ),
-            yaxis = list(title = "Distance to telomeres [Bp]"),
+      yaxis = list(title = "Distance to telomeres [Bp]"),
-            bargap = 0.9
+      bargap = 0.9
-        ) |>
+    ) |>
-        plotly::add_bars(
+    plotly::add_bars(
-            data = species_max_distance,
+      data = species_max_distance,
-            x = ~species,
+      x = ~species,
-            y = ~max_distance,
+      y = ~max_distance,
-            name = "All genes",
+      name = "All genes",
-            marker = list(color = base_color())
+      marker = list(color = base_color())
    )
  if (length(gene_sets) > 0) {
    # Include gene information which will be used for labeling
    gene_set_data <- merge(
      data[gene %chin% unlist(gene_sets)],
      geposan::genes,
      by.x = "gene",
      by.y = "id"
    )
    index <- 1
    for (gene_set_name in names(gene_sets)) {
      gene_set <- gene_sets[[gene_set_name]]
      plot <- plot |> plotly::add_markers(
        data = gene_set_data[gene %chin% gene_set],
        x = ~species,
        y = ~distance,
        name = gene_set_name,
        text = ~ glue::glue(
          "<b>{name}</b><br>",
          "{round(distance / 1000000, digits = 2)} MBp"
        ),
        hoverinfo = "text",
        marker = list(
          size = 10,
          color = gene_set_color(index)
        )
      )
-    if (length(gene_sets) > 0) {
+      index <- index + 1
        # Include gene information which will be used for labeling
        gene_set_data <- merge(
            data[gene %chin% unlist(gene_sets)],
            geposan::genes,
            by.x = "gene",
            by.y = "id"
        )
        index <- 1
        for (gene_set_name in names(gene_sets)) {
            gene_set <- gene_sets[[gene_set_name]]
            plot <- plot |> plotly::add_markers(
                data = gene_set_data[gene %chin% gene_set],
                x = ~species,
                y = ~distance,
                name = gene_set_name,
                text = ~ glue::glue(
                    "<b>{name}</b><br>",
                    "{round(distance / 1000000, digits = 2)} MBp"
                ),
                hoverinfo = "text",
                marker = list(
                    size = 10,
                    color = gene_set_color(index)
                )
            )
            index <- index + 1
        }
    }
  }
-    plot
+  plot
 }
@ -108,75 +108,75 @@ plot_positions <- function(species_ids, gene_sets) {
 #'
 #' @export
 plot_rankings <- function(rankings, gene_sets) {
-    if (!requireNamespace("plotly", quietly = TRUE)) {
+  if (!requireNamespace("plotly", quietly = TRUE)) {
-        stop("Please install \"plotly\" to use this function.")
+    stop("Please install \"plotly\" to use this function.")
-    }
+  }
-    plot <- plotly::plot_ly() |>
+  plot <- plotly::plot_ly() |>
-        plotly::layout(
+    plotly::layout(
-            xaxis = list(tickvals = names(rankings)),
+      xaxis = list(tickvals = names(rankings)),
-            yaxis = list(title = "Score")
+      yaxis = list(title = "Score")
    )
  is_first <- TRUE
  for (ranking_name in names(rankings)) {
    ranking <- rankings[[ranking_name]]
    plot <- plot |> plotly::add_trace(
      data = ranking,
      x = ranking_name,
      y = ~score,
      name = "All genes",
      type = "violin",
      spanmode = "hard",
      points = FALSE,
      showlegend = is_first,
      hoverinfo = "skip",
      line = list(color = base_color()),
      fillcolor = base_color_transparent()
    )
    if (length(gene_sets) > 0) {
      gene_set_data <- merge(
        ranking[gene %chin% unlist(gene_sets)],
        geposan::genes,
        by.x = "gene",
        by.y = "id"
      )
      index <- 1
      for (gene_set_name in names(gene_sets)) {
        gene_set <- gene_sets[[gene_set_name]]
        plot <- plot |> plotly::add_markers(
          data = gene_set_data[gene %chin% gene_set],
          x = ranking_name,
          y = ~score,
          name = gene_set_name,
          text = ~ glue::glue(
            "<b>{name}</b><br>",
            "Score: {round(score, digits = 2)}<br>",
            "Rank: {rank}<br>",
            "Percentile: {round(percentile * 100, digits = 2)}%"
          ),
          hoverinfo = "text",
          showlegend = is_first,
          marker = list(
            size = 10,
            color = gene_set_color(index)
          )
        )
-    is_first <- TRUE
+        index <- index + 1
-
+      }
    for (ranking_name in names(rankings)) {
        ranking <- rankings[[ranking_name]]
        plot <- plot |> plotly::add_trace(
            data = ranking,
            x = ranking_name,
            y = ~score,
            name = "All genes",
            type = "violin",
            spanmode = "hard",
            points = FALSE,
            showlegend = is_first,
            hoverinfo = "skip",
            line = list(color = base_color()),
            fillcolor = base_color_transparent()
        )
        if (length(gene_sets) > 0) {
            gene_set_data <- merge(
                ranking[gene %chin% unlist(gene_sets)],
                geposan::genes,
                by.x = "gene",
                by.y = "id"
            )
            index <- 1
            for (gene_set_name in names(gene_sets)) {
                gene_set <- gene_sets[[gene_set_name]]
                plot <- plot |> plotly::add_markers(
                    data = gene_set_data[gene %chin% gene_set],
                    x = ranking_name,
                    y = ~score,
                    name = gene_set_name,
                    text = ~ glue::glue(
                        "<b>{name}</b><br>",
                        "Score: {round(score, digits = 2)}<br>",
                        "Rank: {rank}<br>",
                        "Percentile: {round(percentile * 100, digits = 2)}%"
                    ),
                    hoverinfo = "text",
                    showlegend = is_first,
                    marker = list(
                        size = 10,
                        color = gene_set_color(index)
                    )
                )
                index <- index + 1
            }
        }
        is_first <- FALSE
    }
-    plot
+    is_first <- FALSE
  }
  plot
 }
@ -194,88 +194,88 @@ plot_rankings <- function(rankings, gene_sets) {
 #'
 #' @export
 plot_scores <- function(ranking, gene_sets = NULL, max_rank = NULL) {
-    if (!requireNamespace("plotly", quietly = TRUE)) {
+  if (!requireNamespace("plotly", quietly = TRUE)) {
-        stop("Please install \"plotly\" to use this function.")
+    stop("Please install \"plotly\" to use this function.")
-    }
+  }
-    # To speed up rendering, don't show every single gene.
+  # To speed up rendering, don't show every single gene.
-    n_ranks <- nrow(ranking)
+  n_ranks <- nrow(ranking)
-    sample_ranking <- ranking[seq(1, n_ranks, 5)]
+  sample_ranking <- ranking[seq(1, n_ranks, 5)]
-    plot <- plotly::plot_ly() |>
+  plot <- plotly::plot_ly() |>
-        plotly::add_lines(
+    plotly::add_lines(
-            data = sample_ranking,
+      data = sample_ranking,
-            x = ~percentile,
+      x = ~percentile,
-            y = ~score,
+      y = ~score,
-            name = "All genes",
+      name = "All genes",
-            hoverinfo = "skip",
+      hoverinfo = "skip",
-            line = list(width = 4, color = base_color())
+      line = list(width = 4, color = base_color())
-        ) |>
+    ) |>
-        plotly::layout(
+    plotly::layout(
-            xaxis = list(
+      xaxis = list(
-                title = "Percentile",
+        title = "Percentile",
-                tickformat = ".0%"
+        tickformat = ".0%"
-            ),
+      ),
-            yaxis = list(title = "Score")
+      yaxis = list(title = "Score")
    )
  if (length(gene_sets) > 0) {
    # Include gene information which will be used for labeling
    gene_set_data <- merge(
      ranking[gene %chin% unlist(gene_sets)],
      geposan::genes,
      by.x = "gene",
      by.y = "id"
    )
    index <- 1
    for (gene_set_name in names(gene_sets)) {
      gene_set <- gene_sets[[gene_set_name]]
      plot <- plot |> plotly::add_markers(
        data = gene_set_data[gene %chin% gene_set],
        x = ~percentile,
        y = ~score,
        name = gene_set_name,
        text = ~ glue::glue(
          "<b>{name}</b><br>",
          "Score: {round(score, digits = 2)}<br>",
          "Rank: {rank}<br>",
          "Percentile: {round(percentile * 100, digits = 2)}%"
        ),
        hoverinfo = "text",
        marker = list(
          size = 10,
          color = gene_set_color(index)
        )
      )
-    if (length(gene_sets) > 0) {
+      index <- index + 1
-        # Include gene information which will be used for labeling
+    }
-        gene_set_data <- merge(
+  }
-            ranking[gene %chin% unlist(gene_sets)],
+
-            geposan::genes,
+
-            by.x = "gene",
+  if (!is.null(max_rank)) {
-            by.y = "id"
+    first_not_included_rank <- max_rank + 1
    last_rank <- ranking[, .N]
    if (first_not_included_rank <= last_rank) {
      plot <- plot |> plotly::layout(
        shapes = list(
          type = "rect",
          fillcolor = "black",
          opacity = 0.1,
          x0 = 1 - first_not_included_rank / n_ranks,
          x1 = 1 - last_rank / n_ranks,
          y0 = 0.0,
          y1 = 1.0
        )
-
+      )
        index <- 1
        for (gene_set_name in names(gene_sets)) {
            gene_set <- gene_sets[[gene_set_name]]
            plot <- plot |> plotly::add_markers(
                data = gene_set_data[gene %chin% gene_set],
                x = ~percentile,
                y = ~score,
                name = gene_set_name,
                text = ~ glue::glue(
                    "<b>{name}</b><br>",
                    "Score: {round(score, digits = 2)}<br>",
                    "Rank: {rank}<br>",
                    "Percentile: {round(percentile * 100, digits = 2)}%"
                ),
                hoverinfo = "text",
                marker = list(
                    size = 10,
                    color = gene_set_color(index)
                )
            )
            index <- index + 1
        }
    }
  }
-
+  plot
    if (!is.null(max_rank)) {
        first_not_included_rank <- max_rank + 1
        last_rank <- ranking[, .N]
        if (first_not_included_rank <= last_rank) {
            plot <- plot |> plotly::layout(
                shapes = list(
                    type = "rect",
                    fillcolor = "black",
                    opacity = 0.1,
                    x0 = 1 - first_not_included_rank / n_ranks,
                    x1 = 1 - last_rank / n_ranks,
                    y0 = 0.0,
                    y1 = 1.0
                )
            )
        }
    }
    plot
 }
 #' Visualize a ranking by comparing gene sets in a boxplot.
@ -290,44 +290,44 @@ plot_scores <- function(ranking, gene_sets = NULL, max_rank = NULL) {
 #'
 #' @export
 plot_boxplot <- function(ranking, gene_sets = NULL) {
-    if (!requireNamespace("plotly", quietly = TRUE)) {
+  if (!requireNamespace("plotly", quietly = TRUE)) {
-        stop("Please install \"plotly\" to use this function.")
+    stop("Please install \"plotly\" to use this function.")
  }
  plot <- plotly::plot_ly() |>
    plotly::add_boxplot(
      data = ranking,
      x = "All genes",
      y = ~score,
      name = "All genes",
      showlegend = FALSE,
      line = list(color = base_color())
    ) |>
    plotly::layout(
      xaxis = list(tickvals = c("All genes", names(gene_sets))),
      yaxis = list(title = "Score")
    )
  if (length(gene_sets) > 0) {
    index <- 1
    for (gene_set_name in names(gene_sets)) {
      gene_set <- gene_sets[[gene_set_name]]
      plot <- plot |> plotly::add_boxplot(
        data = ranking[gene %chin% gene_set],
        x = gene_set_name,
        y = ~score,
        name = gene_set_name,
        showlegend = FALSE,
        line = list(color = gene_set_color(index))
      )
      index <- index + 1
    }
  }
-    plot <- plotly::plot_ly() |>
+  plot
        plotly::add_boxplot(
            data = ranking,
            x = "All genes",
            y = ~score,
            name = "All genes",
            showlegend = FALSE,
            line = list(color = base_color())
        ) |>
        plotly::layout(
            xaxis = list(tickvals = c("All genes", names(gene_sets))),
            yaxis = list(title = "Score")
        )
    if (length(gene_sets) > 0) {
        index <- 1
        for (gene_set_name in names(gene_sets)) {
            gene_set <- gene_sets[[gene_set_name]]
            plot <- plot |> plotly::add_boxplot(
                data = ranking[gene %chin% gene_set],
                x = gene_set_name,
                y = ~score,
                name = gene_set_name,
                showlegend = FALSE,
                line = list(color = gene_set_color(index))
            )
            index <- index + 1
        }
    }
    plot
 }
 #' Show the distribution of scores across chromosomes.
@ -340,46 +340,46 @@ plot_boxplot <- function(ranking, gene_sets = NULL) {
 #'
 #' @export
 plot_chromosomes <- function(ranking) {
-    if (!requireNamespace("plotly", quietly = TRUE)) {
+  if (!requireNamespace("plotly", quietly = TRUE)) {
-        stop("Please install \"plotly\" to use this function.")
+    stop("Please install \"plotly\" to use this function.")
-    }
+  }
-    data <- merge(ranking, geposan::genes, by.x = "gene", by.y = "id")
+  data <- merge(ranking, geposan::genes, by.x = "gene", by.y = "id")
-    data <- data[, .(score = mean(score)), by = "chromosome"]
+  data <- data[, .(score = mean(score)), by = "chromosome"]
-    # Get an orderable integer from a chromosome name.
+  # Get an orderable integer from a chromosome name.
-    chromosome_index <- function(chromosome) {
+  chromosome_index <- function(chromosome) {
-        index <- suppressWarnings(as.integer(chromosome))
+    index <- suppressWarnings(as.integer(chromosome))
-        ifelse(
+    ifelse(
-            !is.na(index),
+      !is.na(index),
-            index,
+      index,
-            ifelse(
+      ifelse(
-                chromosome == "X",
+        chromosome == "X",
-                998,
+        998,
-                999
+        999
-            )
+      )
-        )
+    )
-    }
+  }
-    data[, index := chromosome_index(chromosome)]
+  data[, index := chromosome_index(chromosome)]
-    setorder(data, "index")
+  setorder(data, "index")
-    plotly::plot_ly(
+  plotly::plot_ly(
-        data = data,
+    data = data,
-        x = ~chromosome,
+    x = ~chromosome,
-        y = ~score,
+    y = ~score,
-        type = "bar",
+    type = "bar",
-        marker = list(color = base_color())
+    marker = list(color = base_color())
-    ) |>
+  ) |>
-        plotly::layout(
+    plotly::layout(
-            xaxis = list(
+      xaxis = list(
-                title = "Chromosome",
+        title = "Chromosome",
-                categoryorder = "array",
+        categoryorder = "array",
-                categoryarray = ~chromosome
+        categoryarray = ~chromosome
-            ),
+      ),
-            yaxis = list(title = "Mean score")
+      yaxis = list(title = "Mean score")
-        )
+    )
 }
 #' Plot scores in relation to chromosomal position of genes.
@ -398,74 +398,74 @@ plot_chromosomes <- function(ranking) {
 plot_scores_by_position <- function(ranking,
                                    chromosome_name = NULL,
                                    gene_sets = NULL) {
-    if (!requireNamespace("plotly", quietly = TRUE)) {
+  if (!requireNamespace("plotly", quietly = TRUE)) {
-        stop("Please install \"plotly\" to use this function.")
+    stop("Please install \"plotly\" to use this function.")
-    }
+  }
-    distance_data <- if (!is.null(chromosome_name)) {
+  distance_data <- if (!is.null(chromosome_name)) {
-        chromosome_name_ <- chromosome_name
+    chromosome_name_ <- chromosome_name
-        geposan::distances[
+    geposan::distances[
-            species == "hsapiens" &
+      species == "hsapiens" &
-                chromosome_name == chromosome_name_
+        chromosome_name == chromosome_name_
-        ]
+    ]
-    } else {
+  } else {
-        geposan::distances[species == "hsapiens"]
+    geposan::distances[species == "hsapiens"]
-    }
+  }
-    data <- merge(ranking, distance_data, by = "gene")
+  data <- merge(ranking, distance_data, by = "gene")
-    data <- merge(
+  data <- merge(
-        data,
+    data,
-        geposan::genes,
+    geposan::genes,
-        by.x = "gene",
+    by.x = "gene",
-        by.y = "id"
+    by.y = "id"
  )
  data[, `:=`(gene_set = "All genes", color = base_color())]
  index <- 1
  for (gene_set_name in names(gene_sets)) {
    gene_set_genes <- gene_sets[[gene_set_name]]
    data[
      gene %chin% gene_set_genes,
      `:=`(
        gene_set = gene_set_name,
        color = gene_set_color(index)
      )
    ]
    index <- index + 1
  }
  # Use distances instead of positions in case all chromosomes are included.
  if (is.null(chromosome_name)) {
    data[, x := distance]
  } else {
    data[, x := start_position]
  }
  plotly::plot_ly() |>
    plotly::add_markers(
      data = data,
      x = ~x,
      y = ~score,
      name = ~gene_set,
      text = ~ glue::glue(
        "<b>{name}</b><br>",
        if (is.null(chromosome_name)) "Distance: " else "Position: ",
        "{round(x / 1000000, digits = 2)} MBp<br>",
        "Score: {round(score, digits = 2)}<br>",
        "Rank: {rank}<br>",
        "Percentile: {round(percentile * 100, digits = 2)}%"
      ),
      hoverinfo = "text",
    ) |>
    plotly::layout(
      xaxis = list(title = if (is.null(chromosome_name)) {
        "Distance (Bp)"
      } else {
        "Position (Bp)"
      }),
      yaxis = list(title = "Score")
    )
    data[, `:=`(gene_set = "All genes", color = base_color())]
    index <- 1
    for (gene_set_name in names(gene_sets)) {
        gene_set_genes <- gene_sets[[gene_set_name]]
        data[
            gene %chin% gene_set_genes,
            `:=`(
                gene_set = gene_set_name,
                color = gene_set_color(index)
            )
        ]
        index <- index + 1
    }
    # Use distances instead of positions in case all chromosomes are included.
    if (is.null(chromosome_name)) {
        data[, x := distance]
    } else {
        data[, x := start_position]
    }
    plotly::plot_ly() |>
        plotly::add_markers(
            data = data,
            x = ~x,
            y = ~score,
            name = ~gene_set,
            text = ~ glue::glue(
                "<b>{name}</b><br>",
                if (is.null(chromosome_name)) "Distance: " else "Position: ",
                "{round(x / 1000000, digits = 2)} MBp<br>",
                "Score: {round(score, digits = 2)}<br>",
                "Rank: {rank}<br>",
                "Percentile: {round(percentile * 100, digits = 2)}%"
            ),
            hoverinfo = "text",
        ) |>
        plotly::layout(
            xaxis = list(title = if (is.null(chromosome_name)) {
                "Distance (Bp)"
            } else {
                "Position (Bp)"
            }),
            yaxis = list(title = "Score")
        )
 }
--- a/R/preset.R
+++ b/R/preset.R
@ -21,55 +21,55 @@ preset <- function(reference_gene_ids,
                   methods = all_methods(),
                   species_ids = geposan::species$id,
                   gene_ids = geposan::genes$id) {
-    # Count included species per gene.
+  # Count included species per gene.
-    genes_n_species <- geposan::distances[
+  genes_n_species <- geposan::distances[
-        species %chin% species_ids,
+    species %chin% species_ids,
-        .(n_species = .N),
+    .(n_species = .N),
-        by = "gene"
+    by = "gene"
-    ]
+  ]
-    # Filter out genes with less than 25% existing orthologs.
+  # Filter out genes with less than 25% existing orthologs.
-    gene_ids_filtered <- genes_n_species[
+  gene_ids_filtered <- genes_n_species[
-        gene %chin% gene_ids &
+    gene %chin% gene_ids &
-            n_species >= 0.25 * length(species_ids),
+      n_species >= 0.25 * length(species_ids),
-        gene
+    gene
-    ]
+  ]
-    reference_gene_ids_excluded <- reference_gene_ids[
+  reference_gene_ids_excluded <- reference_gene_ids[
-        !reference_gene_ids %chin% gene_ids_filtered
+    !reference_gene_ids %chin% gene_ids_filtered
-    ]
+  ]
-    if (length(reference_gene_ids_excluded > 0)) {
+  if (length(reference_gene_ids_excluded > 0)) {
-        warning(paste0(
+    warning(paste0(
-            "The following reference gene IDs are excluded from the preset ",
+      "The following reference gene IDs are excluded from the preset ",
-            "because they don't have enough data: ",
+      "because they don't have enough data: ",
-            paste(reference_gene_ids_excluded, collapse = ", ")
+      paste(reference_gene_ids_excluded, collapse = ", ")
-        ))
+    ))
-    }
+  }
-    reference_gene_ids_included <- reference_gene_ids[
+  reference_gene_ids_included <- reference_gene_ids[
-        reference_gene_ids %chin% gene_ids_filtered
+    reference_gene_ids %chin% gene_ids_filtered
-    ]
+  ]
-    if (length(reference_gene_ids_included) < 1) {
+  if (length(reference_gene_ids_included) < 1) {
-        stop(paste0(
+    stop(paste0(
-            "There has to be at least one reference gene for the preset to be ",
+      "There has to be at least one reference gene for the preset to be ",
-            "valid. Please note that some methods may require more reference ",
+      "valid. Please note that some methods may require more reference ",
-            "genes."
+      "genes."
-        ))
+    ))
-    }
+  }
-    # The included data gets sorted to be able to produce predictable hashes
+  # The included data gets sorted to be able to produce predictable hashes
-    # for the object later.
+  # for the object later.
-    structure(
+  structure(
-        list(
+    list(
-            reference_gene_ids = sort(reference_gene_ids_included),
+      reference_gene_ids = sort(reference_gene_ids_included),
-            methods = methods,
+      methods = methods,
-            species_ids = sort(species_ids),
+      species_ids = sort(species_ids),
-            gene_ids = sort(gene_ids_filtered)
+      gene_ids = sort(gene_ids_filtered)
-        ),
+    ),
-        class = "geposan_preset"
+    class = "geposan_preset"
-    )
+  )
 }
 #' S3 method to print a preset object.
@ -81,20 +81,20 @@ preset <- function(reference_gene_ids,
 #'
 #' @export
 print.geposan_preset <- function(x, ...) {
-    cat(sprintf(
+  cat(sprintf(
-        paste0(
+    paste0(
-            "geposan preset:",
+      "geposan preset:",
-            "\n  Reference genes: %i",
+      "\n  Reference genes: %i",
-            "\n  Included methods: %s",
+      "\n  Included methods: %s",
-            "\n  Number of species: %i",
+      "\n  Number of species: %i",
-            "\n  Number of genes: %i",
+      "\n  Number of genes: %i",
-            "\n"
+      "\n"
-        ),
+    ),
-        length(x$reference_gene_ids),
+    length(x$reference_gene_ids),
-        paste(sapply(x$methods, function(m) m$id), collapse = ", "),
+    paste(sapply(x$methods, function(m) m$id), collapse = ", "),
-        length(x$species_ids),
+    length(x$species_ids),
-        length(x$gene_ids)
+    length(x$gene_ids)
-    ))
+  ))
-    invisible(x)
+  invisible(x)
 }
--- a/R/ranking.R
+++ b/R/ranking.R
@ -13,35 +13,35 @@
 #'
 #' @export
 ranking <- function(analysis, weights) {
-    ranking <- if (inherits(analysis, "geposan_analysis")) {
+  ranking <- if (inherits(analysis, "geposan_analysis")) {
-        copy(analysis$scores)
+    copy(analysis$scores)
-    } else if (inherits(analysis, "geposan_ranking")) {
+  } else if (inherits(analysis, "geposan_ranking")) {
-        copy(analysis)
+    copy(analysis)
-    } else {
+  } else {
-        stop("Invalid analyis. Use geposan::analyze().")
+    stop("Invalid analyis. Use geposan::analyze().")
-    }
+  }
-    ranking[, score := 0.0]
+  ranking[, score := 0.0]
-    for (method in names(weights)) {
+  for (method in names(weights)) {
-        weighted <- weights[[method]] * ranking[, ..method]
+    weighted <- weights[[method]] * ranking[, ..method]
-        ranking[, score := score + weighted]
+    ranking[, score := score + weighted]
-    }
+  }
-    # Normalize scores to be between 0.0 and 1.0.
+  # Normalize scores to be between 0.0 and 1.0.
-    min_score <- ranking[, min(score)]
+  min_score <- ranking[, min(score)]
-    max_score <- ranking[, max(score)]
+  max_score <- ranking[, max(score)]
-    score_range <- max_score - min_score
+  score_range <- max_score - min_score
-    ranking[, score := (score - min_score) / score_range]
+  ranking[, score := (score - min_score) / score_range]
-    setorder(ranking, -score)
+  setorder(ranking, -score)
-    ranking[, rank := .I]
+  ranking[, rank := .I]
-    ranking[, percentile := 1 - rank / nrow(ranking)]
+  ranking[, percentile := 1 - rank / nrow(ranking)]
-    structure(
+  structure(
-        ranking,
+    ranking,
-        class = c("geposan_ranking", class(ranking))
+    class = c("geposan_ranking", class(ranking))
-    )
+  )
 }
 #' Find the best weights to rank the results.
@ -61,40 +61,40 @@ ranking <- function(analysis, weights) {
 #' @export
 optimal_weights <- function(analysis, methods, reference_gene_ids,
                            target = "mean") {
-    if (!inherits(analysis, c("geposan_analysis", "geposan_ranking"))) {
+  if (!inherits(analysis, c("geposan_analysis", "geposan_ranking"))) {
-        stop("Invalid analyis. Use geposan::analyze().")
+    stop("Invalid analyis. Use geposan::analyze().")
  }
  # Compute the target rank of the reference genes when applying the
  # weights.
  target_rank <- function(factors) {
    data <- ranking(analysis, as.list(factors))
    result <- data[
      gene %chin% reference_gene_ids,
      if (target == "min") {
        min(rank)
      } else if (target == "max") {
        max(rank)
      } else if (target == "mean") {
        mean(rank)
      } else {
        stats::median(rank)
      }
    ]
    if (result > 0) {
      result
    } else {
      Inf
    }
  }
  initial_factors <- rep(1.0, length(methods))
  names(initial_factors) <- methods
-    # Compute the target rank of the reference genes when applying the
+  optimal_factors <- stats::optim(initial_factors, target_rank)$par
    # weights.
    target_rank <- function(factors) {
        data <- ranking(analysis, as.list(factors))
-        result <- data[
+  as.list(optimal_factors / max(abs(optimal_factors)))
            gene %chin% reference_gene_ids,
            if (target == "min") {
                min(rank)
            } else if (target == "max") {
                max(rank)
            } else if (target == "mean") {
                mean(rank)
            } else {
                stats::median(rank)
            }
        ]
        if (result > 0) {
            result
        } else {
            Inf
        }
    }
    initial_factors <- rep(1.0, length(methods))
    names(initial_factors) <- methods
    optimal_factors <- stats::optim(initial_factors, target_rank)$par
    as.list(optimal_factors / max(abs(optimal_factors)))
 }
--- a/R/result.R
+++ b/R/result.R
@ -10,18 +10,18 @@
 #'
 #' @export
 result <- function(method_id, scores, details = list()) {
-    stopifnot(is.data.frame(scores) &
+  stopifnot(is.data.frame(scores) &
-        c("gene", "score") %chin% colnames(scores))
+    c("gene", "score") %chin% colnames(scores))
-    stopifnot(is.list(details))
+  stopifnot(is.list(details))
-    structure(
+  structure(
-        list(
+    list(
-            method_id = method_id,
+      method_id = method_id,
-            scores = scores,
+      scores = scores,
-            details = details
+      details = details
-        ),
+    ),
-        class = "geposan_result"
+    class = "geposan_result"
-    )
+  )
 }
 #' Print a result object.
@ -33,18 +33,18 @@ result <- function(method_id, scores, details = list()) {
 #'
 #' @export
 print.geposan_result <- function(x, ...) {
-    cat(sprintf(
+  cat(sprintf(
-        paste0(
+    paste0(
-            "geposan result:",
+      "geposan result:",
-            "\n  Method: %s",
+      "\n  Method: %s",
-            "\n  Number of genes: %i",
+      "\n  Number of genes: %i",
-            "\n  Available details: %s",
+      "\n  Available details: %s",
-            "\n"
+      "\n"
-        ),
+    ),
-        x$method_id,
+    x$method_id,
-        nrow(x$scores),
+    nrow(x$scores),
-        paste(names(x$details), collapse = ", ")
+    paste(names(x$details), collapse = ", ")
-    ))
+  ))
-    invisible(x)
+  invisible(x)
 }
--- a/R/utils.R
+++ b/R/utils.R
@ -8,25 +8,25 @@
 # @param objects A vector of objects that this expression depends on. The hash
 #   of those objects will be used for identifying the cache file.
 cached <- function(name, objects, expr) {
-    if (!dir.exists("cache")) {
+  if (!dir.exists("cache")) {
-        dir.create("cache")
+    dir.create("cache")
-    }
+  }
-    id <- rlang::hash(objects)
+  id <- rlang::hash(objects)
-    cache_file <- sprintf("cache/%s_%s.rda", name, id)
+  cache_file <- sprintf("cache/%s_%s.rda", name, id)
-    if (file.exists(cache_file)) {
+  if (file.exists(cache_file)) {
-        # If the cache file exists, we restore the data from it.
+    # If the cache file exists, we restore the data from it.
-        load(cache_file)
+    load(cache_file)
-    } else {
+  } else {
-        # If the cache file doesn't exist, we have to do the computation.
+    # If the cache file doesn't exist, we have to do the computation.
-        data <- expr
+    data <- expr
-        # The results are cached for the next run.
+    # The results are cached for the next run.
-        save(data, file = cache_file, compress = "xz")
+    save(data, file = cache_file, compress = "xz")
-    }
+  }
-    data
+  data
 }
 # This is needed to make data.table's symbols available within the package.
--- a/R/validate.R
+++ b/R/validate.R
@ -27,68 +27,68 @@
 #'
 #' @export
 validate <- function(ranking, reference_gene_ids, method_ids, progress = NULL) {
-    if (!inherits(ranking, "geposan_ranking")) {
+  if (!inherits(ranking, "geposan_ranking")) {
-        stop("Ranking is invalid. Use geposan::ranking().")
+    stop("Ranking is invalid. Use geposan::ranking().")
-    }
+  }
-    if (is.null(progress)) {
+  if (is.null(progress)) {
-        progress_bar <- progress::progress_bar$new()
+    progress_bar <- progress::progress_bar$new()
-        progress_bar$update(0.0)
+    progress_bar$update(0.0)
-        progress <- function(progress_value) {
+    progress <- function(progress_value) {
-            if (!progress_bar$finished) {
+      if (!progress_bar$finished) {
-                progress_bar$update(progress_value)
+        progress_bar$update(progress_value)
-                if (progress_value >= 1.0) {
+        if (progress_value >= 1.0) {
-                    progress_bar$terminate()
+          progress_bar$terminate()
                }
            }
        }
      }
    }
  }
-    progress_state <- 0.0
+  progress_state <- 0.0
-    progress_step <- 1.0 / length(reference_gene_ids)
+  progress_step <- 1.0 / length(reference_gene_ids)
-    results <- ranking[gene %chin% reference_gene_ids, .(gene, percentile)]
+  results <- ranking[gene %chin% reference_gene_ids, .(gene, percentile)]
-    for (gene_id in reference_gene_ids) {
+  for (gene_id in reference_gene_ids) {
-        included_gene_ids <- reference_gene_ids[
+    included_gene_ids <- reference_gene_ids[
-            reference_gene_ids != gene_id
+      reference_gene_ids != gene_id
-        ]
+    ]
-        weights <- optimal_weights(
+    weights <- optimal_weights(
-            ranking,
+      ranking,
-            method_ids,
+      method_ids,
-            included_gene_ids
+      included_gene_ids
        )
        ranking_validation <- ranking(ranking, weights)
        results[
            gene == gene_id,
            percentile_validation := ranking_validation[
                gene == gene_id,
                percentile
            ]
        ]
        if (!is.null(progress)) {
            progress_state <- progress_state + progress_step
            progress(progress_state)
        }
    }
    results[, error := percentile - percentile_validation]
    setorder(results, error)
    structure(
        list(
            validation = results,
            mean_percentile_original = results[, mean(percentile)],
            mean_percentile_validation = results[, mean(percentile_validation)],
            mean_error = results[, mean(error)]
        ),
        class = "geposan_validation"
    )
    ranking_validation <- ranking(ranking, weights)
    results[
      gene == gene_id,
      percentile_validation := ranking_validation[
        gene == gene_id,
        percentile
      ]
    ]
    if (!is.null(progress)) {
      progress_state <- progress_state + progress_step
      progress(progress_state)
    }
  }
  results[, error := percentile - percentile_validation]
  setorder(results, error)
  structure(
    list(
      validation = results,
      mean_percentile_original = results[, mean(percentile)],
      mean_percentile_validation = results[, mean(percentile_validation)],
      mean_error = results[, mean(error)]
    ),
    class = "geposan_validation"
  )
 }
 #' S3 method to print a validation object.
@ -100,18 +100,18 @@ validate <- function(ranking, reference_gene_ids, method_ids, progress = NULL) {
 #'
 #' @export
 print.geposan_validation <- function(x, ...) {
-    cat(sprintf(
+  cat(sprintf(
-        paste0(
+    paste0(
-            "geposan validation:",
+      "geposan validation:",
-            "\n  Mean percentile original: %.1f%%",
+      "\n  Mean percentile original: %.1f%%",
-            "\n  Mean percentile validation: %.1f%%",
+      "\n  Mean percentile validation: %.1f%%",
-            "\n  Mean error: %.1f percent points",
+      "\n  Mean error: %.1f percent points",
-            "\n"
+      "\n"
-        ),
+    ),
-        x$mean_percentile_original * 100,
+    x$mean_percentile_original * 100,
-        x$mean_percentile_validation * 100,
+    x$mean_percentile_validation * 100,
-        x$mean_error * 100
+    x$mean_error * 100
-    ))
+  ))
-    invisible(x)
+  invisible(x)
 }
--- a/scripts/chromosome_names.R
+++ b/scripts/chromosome_names.R
@ -5,23 +5,23 @@ ensembl_api_url <- "https://rest.ensembl.org"
 #' Perform a request to the Ensembl REST API.
 ensembl_request <- function(api_path) {
-    content(stop_for_status(GET(
+  content(stop_for_status(GET(
-        paste0(ensembl_api_url, api_path),
+    paste0(ensembl_api_url, api_path),
-        content_type_json()
+    content_type_json()
-    )))
+  )))
 }
 #' Get IDs of all available vertebrates.
 get_species_ids <- function() {
-    species <- ensembl_request("/info/species")$species
+  species <- ensembl_request("/info/species")$species
-    sapply(species, function(species) species$name)
+  sapply(species, function(species) species$name)
 }
 #' Get all chromosomes names for a species.
 get_species_chromosomes <- function(species_id) {
-    chromosomes <- unlist(ensembl_request(
+  chromosomes <- unlist(ensembl_request(
-        paste0("/info/assembly/", species_id)
+    paste0("/info/assembly/", species_id)
-    )$karyotype)
+  )$karyotype)
 }
 #' Get a vector of all available unqiue chromosome names.
@ -29,6 +29,6 @@ get_species_chromosomes <- function(species_id) {
 #' There are multiple names for mitochondrial DNA which have to be removed
 #' manually, unfortunately.
 get_all_chromosomes <- function() {
-    chromosomes <- sapply(get_species_ids(), get_species_chromosomes)
+  chromosomes <- sapply(get_species_ids(), get_species_chromosomes)
-    unique(unlist(chromosomes))
+  unique(unlist(chromosomes))
 }
--- a/scripts/ensembl.R
+++ b/scripts/ensembl.R
@ -12,8 +12,8 @@ ensembl_datasets <- data.table(biomaRt::listDatasets(ensembl))
 # Filter out species ID and name from the result.
 species <- ensembl_datasets[, .(
-    id = stringr::str_match(dataset, "(.*)_gene_ensembl")[, 2],
+  id = stringr::str_match(dataset, "(.*)_gene_ensembl")[, 2],
-    name = stringr::str_match(description, "(.*) genes \\(.*\\)")[, 2]
+  name = stringr::str_match(description, "(.*) genes \\(.*\\)")[, 2]
 )]
 # List of assemblies that the Ensembl Rest API advertises as chromosomes.
@ -23,232 +23,232 @@ species <- ensembl_datasets[, .(
 #
 # See get_all_chromosomes()
 valid_chromosome_names <- c(
-    "1",
+  "1",
-    "2",
+  "2",
-    "3",
+  "3",
-    "4",
+  "4",
-    "5",
+  "5",
-    "6",
+  "6",
-    "7",
+  "7",
-    "8",
+  "8",
-    "9",
+  "9",
-    "10",
+  "10",
-    "11",
+  "11",
-    "12",
+  "12",
-    "13",
+  "13",
-    "14",
+  "14",
-    "15",
+  "15",
-    "16",
+  "16",
-    "17",
+  "17",
-    "18",
+  "18",
-    "19",
+  "19",
-    "20",
+  "20",
-    "X",
+  "X",
-    "Y",
+  "Y",
-    "21",
+  "21",
-    "4A",
+  "4A",
-    "1A",
+  "1A",
-    "22",
+  "22",
-    "23",
+  "23",
-    "24",
+  "24",
-    "25LG1",
+  "25LG1",
-    "25LG2",
+  "25LG2",
-    "26",
+  "26",
-    "27",
+  "27",
-    "28",
+  "28",
-    "LGE22",
+  "LGE22",
-    "Z",
+  "Z",
-    "25",
+  "25",
-    "29",
+  "29",
-    "A1",
+  "A1",
-    "A2",
+  "A2",
-    "A3",
+  "A3",
-    "B1",
+  "B1",
-    "B2",
+  "B2",
-    "B3",
+  "B3",
-    "B4",
+  "B4",
-    "C1",
+  "C1",
-    "C2",
+  "C2",
-    "D1",
+  "D1",
-    "D2",
+  "D2",
-    "D3",
+  "D3",
-    "D4",
+  "D4",
-    "E1",
+  "E1",
-    "E2",
+  "E2",
-    "E3",
+  "E3",
-    "F1",
+  "F1",
-    "F2",
+  "F2",
-    "2A",
+  "2A",
-    "2B",
+  "2B",
-    "LG01",
+  "LG01",
-    "LG02",
+  "LG02",
-    "LG03",
+  "LG03",
-    "LG04",
+  "LG04",
-    "LG05",
+  "LG05",
-    "LG06",
+  "LG06",
-    "LG07",
+  "LG07",
-    "LG08",
+  "LG08",
-    "LG09",
+  "LG09",
-    "LG10",
+  "LG10",
-    "LG11",
+  "LG11",
-    "LG12",
+  "LG12",
-    "LG13",
+  "LG13",
-    "LG14",
+  "LG14",
-    "LG15",
+  "LG15",
-    "LG16",
+  "LG16",
-    "LG17",
+  "LG17",
-    "LG18",
+  "LG18",
-    "LG19",
+  "LG19",
-    "LG20",
+  "LG20",
-    "LG21",
+  "LG21",
-    "LG22",
+  "LG22",
-    "LG23",
+  "LG23",
-    "LG24",
+  "LG24",
-    "LG25",
+  "LG25",
-    "I",
+  "I",
-    "II",
+  "II",
-    "III",
+  "III",
-    "IV",
+  "IV",
-    "V",
+  "V",
-    "VI",
+  "VI",
-    "VII",
+  "VII",
-    "VIII",
+  "VIII",
-    "IX",
+  "IX",
-    "XI",
+  "XI",
-    "XII",
+  "XII",
-    "XIII",
+  "XIII",
-    "XIV",
+  "XIV",
-    "XV",
+  "XV",
-    "XVI",
+  "XVI",
-    "XVII",
+  "XVII",
-    "XVIII",
+  "XVIII",
-    "XIX",
+  "XIX",
-    "XX",
+  "XX",
-    "XXI",
+  "XXI",
-    "XXII",
+  "XXII",
-    "XXIII",
+  "XXIII",
-    "XXIV",
+  "XXIV",
-    "W",
+  "W",
-    "30",
+  "30",
-    "31",
+  "31",
-    "32",
+  "32",
-    "33",
+  "33",
-    "34",
+  "34",
-    "35",
+  "35",
-    "36",
+  "36",
-    "37",
+  "37",
-    "38",
+  "38",
-    "39",
+  "39",
-    "40",
+  "40",
-    "2L",
+  "2L",
-    "2R",
+  "2R",
-    "3L",
+  "3L",
-    "3R",
+  "3R",
-    "LG1",
+  "LG1",
-    "LG2",
+  "LG2",
-    "LG3",
+  "LG3",
-    "LG4",
+  "LG4",
-    "LG5",
+  "LG5",
-    "LG6",
+  "LG6",
-    "LG7",
+  "LG7",
-    "LG8",
+  "LG8",
-    "LG9",
+  "LG9",
-    "LG26",
+  "LG26",
-    "LG27",
+  "LG27",
-    "LG28",
+  "LG28",
-    "LG29",
+  "LG29",
-    "LG30",
+  "LG30",
-    "1a",
+  "1a",
-    "7b",
+  "7b",
-    "22a",
+  "22a",
-    "LGE22C19W28_E50C23",
+  "LGE22C19W28_E50C23",
-    "LGE64",
+  "LGE64",
-    "7a",
+  "7a",
-    "MIC_1",
+  "MIC_1",
-    "MIC_10",
+  "MIC_10",
-    "MIC_11",
+  "MIC_11",
-    "MIC_2",
+  "MIC_2",
-    "MIC_3",
+  "MIC_3",
-    "MIC_4",
+  "MIC_4",
-    "MIC_5",
+  "MIC_5",
-    "MIC_6",
+  "MIC_6",
-    "MIC_7",
+  "MIC_7",
-    "MIC_8",
+  "MIC_8",
-    "MIC_9",
+  "MIC_9",
-    "sgr01",
+  "sgr01",
-    "sgr02",
+  "sgr02",
-    "sgr03",
+  "sgr03",
-    "sgr04",
+  "sgr04",
-    "sgr05",
+  "sgr05",
-    "sgr06",
+  "sgr06",
-    "sgr07",
+  "sgr07",
-    "sgr08",
+  "sgr08",
-    "sgr09",
+  "sgr09",
-    "sgr10",
+  "sgr10",
-    "sgr11",
+  "sgr11",
-    "sgr12",
+  "sgr12",
-    "sgr13",
+  "sgr13",
-    "sgr14",
+  "sgr14",
-    "sgr15",
+  "sgr15",
-    "sgr16",
+  "sgr16",
-    "sgr17",
+  "sgr17",
-    "sgr18",
+  "sgr18",
-    "sgr19",
+  "sgr19",
-    "X1",
+  "X1",
-    "X2",
+  "X2",
-    "X3",
+  "X3",
-    "X4",
+  "X4",
-    "X5",
+  "X5",
-    "a",
+  "a",
-    "b",
+  "b",
-    "c",
+  "c",
-    "d",
+  "d",
-    "f",
+  "f",
-    "g",
+  "g",
-    "h",
+  "h",
-    "41",
+  "41",
-    "42",
+  "42",
-    "43",
+  "43",
-    "44",
+  "44",
-    "45",
+  "45",
-    "46",
+  "46",
-    "47",
+  "47",
-    "48",
+  "48",
-    "49",
+  "49",
-    "50",
+  "50",
-    "LG28B",
+  "LG28B",
-    "LG30F",
+  "LG30F",
-    "LG36F",
+  "LG36F",
-    "LG37M",
+  "LG37M",
-    "LG42F",
+  "LG42F",
-    "LG44F",
+  "LG44F",
-    "LG45M",
+  "LG45M",
-    "LG48F",
+  "LG48F",
-    "LG49B",
+  "LG49B",
-    "LG34",
+  "LG34",
-    "LG35",
+  "LG35",
-    "LG7_11",
+  "LG7_11",
-    "groupI",
+  "groupI",
-    "groupII",
+  "groupII",
-    "groupIII",
+  "groupIII",
-    "groupIV",
+  "groupIV",
-    "groupV",
+  "groupV",
-    "groupVI",
+  "groupVI",
-    "groupVII",
+  "groupVII",
-    "groupVIII",
+  "groupVIII",
-    "groupIX",
+  "groupIX",
-    "groupX",
+  "groupX",
-    "groupXI",
+  "groupXI",
-    "groupXII",
+  "groupXII",
-    "groupXIII",
+  "groupXIII",
-    "groupXIV",
+  "groupXIV",
-    "groupXV",
+  "groupXV",
-    "groupXVI",
+  "groupXVI",
-    "groupXVII",
+  "groupXVII",
-    "groupXVIII",
+  "groupXVIII",
-    "groupXIX",
+  "groupXIX",
-    "groupXX",
+  "groupXX",
-    "groupXXI"
+  "groupXXI"
 )
 #' Get all chromosome names for an Ensembl dataset.
@ -256,8 +256,8 @@ valid_chromosome_names <- c(
 #' The function tries to filter out valid chromosome names from the available
 #' assemblies in the dataset.
 get_chromosome_names <- function(dataset) {
-    chromosome_names <- biomaRt::listFilterOptions(dataset, "chromosome_name")
+  chromosome_names <- biomaRt::listFilterOptions(dataset, "chromosome_name")
-    chromosome_names[chromosome_names %chin% valid_chromosome_names]
+  chromosome_names[chromosome_names %chin% valid_chromosome_names]
 }
 # Retrieve information on human genes. This will only include genes on
@ -267,16 +267,16 @@ rlog::log_info("Retrieving information on human genes")
 dataset <- biomaRt::useDataset("hsapiens_gene_ensembl", mart = ensembl)
 human_data <- data.table(biomaRt::getBM(
-    attributes = c(
+  attributes = c(
-        "ensembl_gene_id",
+    "ensembl_gene_id",
-        "hgnc_symbol",
+    "hgnc_symbol",
-        "chromosome_name",
+    "chromosome_name",
-        "start_position",
+    "start_position",
-        "end_position"
+    "end_position"
-    ),
+  ),
-    filters = "chromosome_name",
+  filters = "chromosome_name",
-    values = get_chromosome_names(dataset),
+  values = get_chromosome_names(dataset),
-    mart = dataset
+  mart = dataset
 ))
 # Remove duplicated gene IDs (at the time of writing, there are a handful).
@ -284,9 +284,9 @@ human_data <- unique(human_data, by = "ensembl_gene_id")
 # Only keep relevant information on genes.
 genes <- human_data[, .(
-    id = ensembl_gene_id,
+  id = ensembl_gene_id,
-    name = hgnc_symbol,
+  name = hgnc_symbol,
-    chromosome = chromosome_name
+  chromosome = chromosome_name
 )]
 # Retrieve gene distance data across species.
@ -296,39 +296,39 @@ distances <- data.table()
 #' Handle data for one species.
 handle_species <- function(species_id, species_data) {
-    chromosomes <- species_data[,
+  chromosomes <- species_data[,
-        .(chromosome_length = max(end_position)),
+    .(chromosome_length = max(end_position)),
-        by = chromosome_name
+    by = chromosome_name
-    ]
+  ]
-    # Store the number of chromosomes in the species table.
+  # Store the number of chromosomes in the species table.
-    species[id == species_id, n_chromosomes := nrow(chromosomes)]
+  species[id == species_id, n_chromosomes := nrow(chromosomes)]
-    # Store the median chromosome length in the species table.
+  # Store the median chromosome length in the species table.
-    species[
+  species[
-        id == species_id,
+    id == species_id,
-        median_chromosome_length := chromosomes[, median(chromosome_length)]
+    median_chromosome_length := chromosomes[, median(chromosome_length)]
-    ]
+  ]
-    # Precompute the genes' distance to the nearest telomere.
+  # Precompute the genes' distance to the nearest telomere.
-    species_distances <- species_data[
+  species_distances <- species_data[
-        chromosomes,
+    chromosomes,
-        .(
+    .(
-            species = species_id,
+      species = species_id,
-            gene = ensembl_gene_id,
+      gene = ensembl_gene_id,
-            chromosome_name = chromosome_name,
+      chromosome_name = chromosome_name,
-            start_position = start_position,
+      start_position = start_position,
-            end_position = end_position,
+      end_position = end_position,
-            distance = pmin(
+      distance = pmin(
-                start_position,
+        start_position,
-                chromosome_length - end_position
+        chromosome_length - end_position
-            )
+      )
-        ),
+    ),
-        on = "chromosome_name"
+    on = "chromosome_name"
-    ]
+  ]
-    # Add species distances to the distances table.
+  # Add species distances to the distances table.
-    distances <<- rbindlist(list(distances, species_distances))
+  distances <<- rbindlist(list(distances, species_distances))
 }
 # Handle the human first, as we already retrieved the data and don't need to
@ -337,67 +337,67 @@ handle_species("hsapiens", human_data)
 # Iterate through all other species and retrieve their distance data.
 for (species_id in species[id != "hsapiens", id]) {
-    rlog::log_info(sprintf("Loading species \"%s\"", species_id))
+  rlog::log_info(sprintf("Loading species \"%s\"", species_id))
-    dataset <- biomaRt::useDataset(
+  dataset <- biomaRt::useDataset(
-        sprintf("%s_gene_ensembl", species_id),
+    sprintf("%s_gene_ensembl", species_id),
-        mart = ensembl
+    mart = ensembl
-    )
+  )
-    # Besides the attributes that are always present, we need to check for
+  # Besides the attributes that are always present, we need to check for
-    # human orthologs. Some species don't have that information and will be
+  # human orthologs. Some species don't have that information and will be
-    # skipped.
+  # skipped.
-    if (!"hsapiens_homolog_ensembl_gene" %chin%
+  if (!"hsapiens_homolog_ensembl_gene" %chin%
-        biomaRt::listAttributes(dataset, what = "name")) {
+    biomaRt::listAttributes(dataset, what = "name")) {
-        rlog::log_info("No data on human orthologs")
+    rlog::log_info("No data on human orthologs")
-        species <- species[id != species_id]
+    species <- species[id != species_id]
-        next
+    next
-    }
+  }
-    chromosome_names <- get_chromosome_names(dataset)
+  chromosome_names <- get_chromosome_names(dataset)
-    # Skip the species, if there are no assembled chromosomes.
+  # Skip the species, if there are no assembled chromosomes.
-    if (length(chromosome_names) <= 0) {
+  if (length(chromosome_names) <= 0) {
-        rlog::log_info("No matching chromosome assemblies")
+    rlog::log_info("No matching chromosome assemblies")
-        species <- species[id != species_id]
+    species <- species[id != species_id]
-        next
+    next
-    }
+  }
-    # Retrieve information on all genes of the current species, that have
+  # Retrieve information on all genes of the current species, that have
-    # human orthologs. This is called "homolog" in the Ensembl schema.
+  # human orthologs. This is called "homolog" in the Ensembl schema.
-    species_distances <- data.table(biomaRt::getBM(
+  species_distances <- data.table(biomaRt::getBM(
-        attributes = c(
+    attributes = c(
-            "hsapiens_homolog_ensembl_gene",
+      "hsapiens_homolog_ensembl_gene",
-            "chromosome_name",
+      "chromosome_name",
-            "start_position",
+      "start_position",
-            "end_position"
+      "end_position"
-        ),
+    ),
-        filters = c("with_hsapiens_homolog", "chromosome_name"),
+    filters = c("with_hsapiens_homolog", "chromosome_name"),
-        values = list(TRUE, chromosome_names),
+    values = list(TRUE, chromosome_names),
-        mart = dataset
+    mart = dataset
-    ))
+  ))
-    # Only include human genes that we have information on.
+  # Only include human genes that we have information on.
-    species_distances <- species_distances[
+  species_distances <- species_distances[
-        hsapiens_homolog_ensembl_gene %chin% genes$id
+    hsapiens_homolog_ensembl_gene %chin% genes$id
-    ]
+  ]
-    # Only include one ortholog per human gene.
+  # Only include one ortholog per human gene.
-    species_distances <- unique(
+  species_distances <- unique(
-        species_distances,
+    species_distances,
-        by = "hsapiens_homolog_ensembl_gene"
+    by = "hsapiens_homolog_ensembl_gene"
-    )
+  )
-    # Rename gene ID column to match the human data.
+  # Rename gene ID column to match the human data.
-    setnames(
+  setnames(
-        species_distances,
+    species_distances,
-        "hsapiens_homolog_ensembl_gene",
+    "hsapiens_homolog_ensembl_gene",
-        "ensembl_gene_id"
+    "ensembl_gene_id"
-    )
+  )
-    handle_species(species_id, species_distances)
+  handle_species(species_id, species_distances)
 }
 # Save data in the appropriate place.