Separate out geposan package

2025-10-26 19:27:24 +01:00 · 2021-10-19 14:15:28 +02:00 · 2021-10-19 14:15:28 +02:00 · dc0265a13f
commit dc0265a13f
parent 68354bf808
16 changed files with 154 additions and 731 deletions
--- a/data.R
+++ b/data.R
@ -0,0 +1,115 @@
 library(data.table)
 #' Species IDs of known replicatively aging species.
 species_ids_replicative <- c(
    "bihybrid",
    "btaurus",
    "bthybrid",
    "cfamiliaris",
    "chircus",
    "cjacchus",
    "clfamiliaris",
    "csabaeus",
    "ecaballus",
    "fcatus",
    "ggorilla",
    "hsapiens",
    "lafricana",
    "mfascicularis",
    "mmulatta",
    "mmurinus",
    "mnemestrina",
    "nleucogenys",
    "oaries",
    "pabelii",
    "panubis",
    "ppaniscus",
    "ptroglodytes",
    "sscrofa",
    "tgelada"
 )
 #' Species from [geposan] and their aging status.
 species <- geposan::species[, .(
    id,
    name,
    replicative = id %chin% species_ids_replicative
 )]
 #' Gene names of genes for verified TPE-OLD genes.
 genes_verified_tpe_old <- c(
    "C1S",
    "DSP",
    "ISG15",
    "SORBS2",
    "TERT"
 )
 #' Gene names of genes with a suggested TPE-OLD.
 genes_suggested_tpe_old <- c(
    "AKAP3",
    "ANO2",
    "CCND2",
    "CD163L1",
    "CD9",
    "FOXM1",
    "GALNT8",
    "NDUFA9",
    "TEAD4",
    "TIGAR",
    "TSPAN9"
 )
 #' Genes from [geposan] and their TPE-OLD status.
 genes <- geposan::genes[, .(
    id,
    name,
    chromosome,
    n_species,
    suggested = name %chin% genes_suggested_tpe_old,
    verified = name %chin% genes_verified_tpe_old
 )]
 #' All available methods from [geposan] and additional information on them.
 methods <- list(
    list(
        id = "clusteriness",
        name = "Clustering",
        description = "Clustering of genes"
    ),
    list(
        id = "correlation",
        name = "Correlation",
        description = "Correlation with known genes"
    ),
    list(
        id = "proximity",
        name = "Proximity",
        description = "Proximity to telomeres"
    ),
    list(
        id = "neural",
        name = "Neural",
        description = "Assessment by neural network"
    )
 )
 #' Gene IDs of known or suggested TPE-OLD genes.
 genes_tpe_old <- genes[suggested | verified == TRUE, id]
 #' Preset for [geposan] including all species and TPE-OLD genes for reference.
 preset_all_species <- geposan::preset(
    methods = c("clusteriness", "correlation", "proximity", "neural"),
    species = species$id,
    genes = genes$id,
    reference_genes = genes_tpe_old
 )
 #' Preset for [geposan] including only replicatively aging species as well as
 #' TPE-OLD genes for reference.
 preset_replicative_species <- geposan::preset(
    methods = c("clusteriness", "correlation", "proximity", "neural"),
    species = species_ids_replicative,
    genes = genes$id,
    reference_genes = genes_tpe_old
 )
--- a/main.R
+++ b/main.R
@ -0,0 +1,9 @@
 library(shiny)
 # Initialize data first.
 source("data.R")
 source("server.R")
 source("ui.R")
 runApp(shinyApp(ui, server))
--- a/optimize.R
+++ b/optimize.R
@ -1,34 +0,0 @@
 #' Find the best weights to rank the data.
 #'
 #' This function ranks the provided data table based on a weighted score
 #' computed from the specified `columns`. It tries to find the optimal weights
 #' that result in a ranking, where the mean rank of the given reference genes
 #' is as high as possible.
 #'
 #' @param data Input data including the columns.
 #' @param colums Columns containing the separate scores between 0.0 and 1.0.
 #' @param reference_gene_ids IDs of the reference genes within the input data.
 #'
 #' @returns Vector of optimal column weights adding up to 1.0.
 optimize_weights <- function(data, columns, reference_gene_ids) {
    #' Compute the mean rank of the reference genes when applying the weights.
    mean_rank <- function(weights) {
        data <- copy(data)
        data[, score := 0.0]
        for (i in seq_along(columns)) {
            column <- columns[i]
            weighted <- weights[i] * data[, ..column]
            data[, score := score + weighted]
        }
        setorder(data, -score)
        data[, rank := .I]
        data[gene %chin% reference_gene_ids, mean(rank)]
    }
    weights <- optim(rep(1.0, length(columns)), mean_rank)$par
    total_weight <- sum(weights)
    weights / total_weight
 }
--- a/process/clusteriness.R
+++ b/process/clusteriness.R
@ -1,56 +0,0 @@
 library(data.table)
 #' Perform a cluster analysis.
 #'
 #' This function will cluster the data using `hclust` and `cutree` (with the
 #' specified height). Every cluster with at least two members qualifies for
 #' further analysis. Clusters are then ranked based on their size in relation
 #' to the number of values. The return value is a final score between zero and
 #' one. Lower ranking clusters contribute less to this score.
 clusteriness <- function(data, height = 1000000) {
    n <- length(data)
    # Return a score of 0.0 if there is just one or no value at all.
    if (n < 2) {
        return(0.0)
    }
    # Cluster the data and compute the cluster sizes.
    tree <- hclust(dist(data))
    clusters <- cutree(tree, h = height)
    cluster_sizes <- sort(tabulate(clusters), decreasing = TRUE)
    # Compute the "clusteriness" score.
    score <- 0.0
    for (i in seq_along(cluster_sizes)) {
        cluster_size <- cluster_sizes[i]
        if (cluster_size >= 2) {
            cluster_score <- cluster_size / n
            score <- score + cluster_score / i
        }
    }
    score
 }
 #' Process genes clustering their distance to telomeres.
 process_clusteriness <- function(distances, gene_ids, preset) {
    results <- data.table(gene = gene_ids)
    # Prefilter the input data by species.
    distances <- distances[species %chin% preset$species_ids]
    # Add an index for quickly accessing data per gene.
    setkey(distances, gene)
    #' Perform the cluster analysis for one gene.
    compute <- function(gene_id) {
        clusteriness(distances[gene_id, distance])
    }
    results[, score := compute(gene), by = 1:nrow(results)]
 }
--- a/process/correlation.R
+++ b/process/correlation.R
@ -1,63 +0,0 @@
 library(data.table)
 #' Compute the mean correlation coefficient comparing gene distances with a set
 #' of reference genes.
 process_correlation <- function(distances, gene_ids, preset) {
    results <- data.table(gene = gene_ids)
    reference_gene_ids <- preset$reference_gene_ids
    reference_count <- length(reference_gene_ids)
    # Prefilter distances by species.
    distances <- distances[species %chin% preset$species_ids]
    # Add an index for quickly accessing data per gene.
    setkey(distances, gene)
    # Prepare the reference genes' data.
    reference_distances <- distances[gene %chin% reference_gene_ids]
    #' Perform the correlation for one gene.
    compute <- function(gene_id) {
        gene_distances <- distances[gene_id]
        gene_species_count <- nrow(gene_distances)
        # Return a score of 0.0 if there is just one or no value at all.
        if (gene_species_count <= 1) {
            return(0.0)
        }
        #' Buffer for the sum of correlation coefficients.
        correlation_sum <- 0
        # Correlate with all reference genes but not with the gene itself.
        for (reference_gene_id in
            reference_gene_ids[reference_gene_ids != gene_id]) {
            data <- merge(
                gene_distances,
                reference_distances[reference_gene_id],
                by = "species"
            )
            # Skip this reference gene, if there are not enough value pairs.
            # This will lessen the final score, because it effectively
            # represents a correlation coefficient of 0.0.
            if (nrow(data) <= 1) {
                next
            }
            # Order data by the reference gene's distance to get a monotonic
            # relation.
            setorder(data, distance.y)
            correlation_sum <- correlation_sum + abs(cor(
                data[, distance.x], data[, distance.y],
                method = "spearman"
            ))
        }
        # Compute the score as the mean correlation coefficient.
        score <- correlation_sum / reference_count
    }
    results[, score := compute(gene), by = 1:nrow(results)]
 }
--- a/process/input.R
+++ b/process/input.R
@ -1,254 +0,0 @@
 library(biomaRt)
 library(data.table)
 library(progress)
 library(rlog)
 library(stringr)
 #' Species IDs of known replicatively aging species.
 species_ids_replicative <- c(
    "bihybrid",
    "btaurus",
    "bthybrid",
    "cfamiliaris",
    "chircus",
    "cjacchus",
    "clfamiliaris",
    "csabaeus",
    "ecaballus",
    "fcatus",
    "ggorilla",
    "hsapiens",
    "lafricana",
    "mfascicularis",
    "mmulatta",
    "mmurinus",
    "mnemestrina",
    "nleucogenys",
    "oaries",
    "pabelii",
    "panubis",
    "ppaniscus",
    "ptroglodytes",
    "sscrofa",
    "tgelada"
 )
 #' Gene names of genes for verified TPE-OLD genes.
 genes_verified_tpe_old <- c(
    "C1S",
    "DSP",
    "ISG15",
    "SORBS2",
    "TERT"
 )
 #' Gene names of genes with a suggested TPE-OLD.
 genes_suggested_tpe_old <- c(
    "AKAP3",
    "ANO2",
    "CCND2",
    "CD163L1",
    "CD9",
    "FOXM1",
    "GALNT8",
    "NDUFA9",
    "TEAD4",
    "TIGAR",
    "TSPAN9"
 )
 #' Shared accessor for the Ensembl API.
 ensembl <- NULL
 #' Get the ensembl accessor and initialize it if necessary.
 get_ensembl <- function() {
    if (is.null(ensembl)) {
        ensembl <<- useEnsembl("ensembl", version = 104)
    }
    ensembl
 }
 #' Get all chromosome names for a Ensembl dataset.
 #'
 #' Valid chromosome names include decimal numbers as well as 'X' and 'Y'.
 get_chromosome_names <- function(dataset) {
    chromosome_names <- listFilterOptions(dataset, "chromosome_name")
    chromosome_names[str_which(chromosome_names, "^[0-9]+|[XY]$")]
 }
 #' Retrieve information on species.
 #'
 #' The result will be a `data.table` with the following columns:
 #'
 #'  - `id` Species ID as presented by Ensembl.
 #'  - `name` Human readable species name.
 #'  - `replicative` Whether the species is likely to be aging replicatively.
 retrieve_species <- function() {
    # Ensembl datasets correspond to distinct species.
    ensembl_datasets <- data.table(listDatasets(get_ensembl()))
    # Filter out species ID and name from the result.
    species <- ensembl_datasets[, .(
        id = str_match(dataset, "(.*)_gene_ensembl")[, 2],
        name = str_match(description, "(.*) genes \\(.*\\)")[, 2]
    )]
    species[, replicative := id %chin% species_ids_replicative]
 }
 #' Retrieve information on human genes.
 #'
 #' This will only include genes on assembled chromosomes. Chromosomes are
 #' filtered based on their name being either a decimal number, 'X' or 'Y'.
 #'
 #' The result will be a `data.table` with the following columns:
 #'
 #'  - `id` Ensembl gene ID.
 #'  - `ǹame` HGNC name of the gene.
 #'  - `chromosome` Human chromosome on which the gene is located.
 retrieve_genes <- function() {
    dataset <- useDataset("hsapiens_gene_ensembl", mart = get_ensembl())
    genes <- data.table(getBM(
        attributes = c("ensembl_gene_id", "hgnc_symbol", "chromosome_name"),
        filters = "chromosome_name",
        values = get_chromosome_names(dataset),
        mart = useDataset("hsapiens_gene_ensembl", mart = get_ensembl())
    ))
    genes[, .(
        id = ensembl_gene_id,
        name = hgnc_symbol,
        chromosome = chromosome_name,
        verified = hgnc_symbol %chin% genes_verified_tpe_old,
        suggested = hgnc_symbol %chin% genes_suggested_tpe_old
    )]
 }
 #' Retrieve gene distance data.
 #'
 #' The data will include all available values for the given species and genes
 #' that are located on assembled chromosomes.
 #'
 #' The result will be a `data.table` with the following columns:
 #'
 #'  - `species` Species ID.
 #'  - `gene` Ensembl gene ID.
 #'  - `distance` Distance to nearest telomere in base pairs.
 retrieve_distances <- function(species_ids, gene_ids) {
    ensembl <- get_ensembl()
    # Exclude the human from the species, in case it is present there.
    species_ids <- species_ids[species_ids != "hsapiens"]
    species_count <- length(species_ids)
    gene_count <- length(gene_ids)
    log_info(sprintf(
        "Retrieving distance data for %i genes from %i species",
        gene_count,
        species_count
    ))
    progress <- progress_bar$new(
        total = gene_count,
        format = "Retrieving distance data [:bar] :percent (ETA :eta)"
    )
    # Special case the human species and retrieve all available distance
    # information.
    dataset <- useDataset("hsapiens_gene_ensembl", mart = ensembl)
    human_distances <- data.table(getBM(
        attributes = c(
            "ensembl_gene_id",
            "chromosome_name",
            "start_position",
            "end_position"
        ),
        filters = "chromosome_name",
        values = get_chromosome_names(dataset),
        mart = dataset
    ))
    # Compute the nearest distance to telomeres.
    human_distances[,
        chromosome_length := max(end_position),
        by = chromosome_name
    ]
    distances <- human_distances[, .(
        species = "hsapiens",
        gene = ensembl_gene_id,
        distance = pmin(
            start_position,
            chromosome_length - end_position
        )
    )]
    for (i in 1:species_count) {
        species_id <- species_ids[i]
        progress$tick()
        dataset <- useDataset(
            sprintf("%s_gene_ensembl", species_id),
            mart = ensembl
        )
        # Besides the attributes that are always present, we need to check for
        # human orthologs. Some species don't have that information and will be
        # skipped.
        if (!"hsapiens_homolog_ensembl_gene" %chin%
            listAttributes(dataset, what = "name")) {
            next
        }
        chromosome_names <- get_chromosome_names(dataset)
        # Skip the species, if there are no assembled chromosomes.
        if (length(chromosome_names) <= 0) {
            next
        }
        # Retrieve information on all genes of the current species, that have
        # human orthologs. This is called "homolog" in the Ensembl schema.
        ensembl_distances <- data.table(getBM(
            attributes = c(
                "hsapiens_homolog_ensembl_gene",
                "chromosome_name",
                "start_position",
                "end_position"
            ),
            filters = c("with_hsapiens_homolog", "chromosome_name"),
            values = list(TRUE, chromosome_names),
            mart = dataset
        ))
        # Precompute the genes' distance to the nearest telomere.
        ensembl_distances[,
            chromosome_length := max(end_position),
            by = chromosome_name
        ]
        species_distances <- ensembl_distances[, .(
            species = species_id,
            gene = hsapiens_homolog_ensembl_gene,
            distance = pmin(
                start_position,
                chromosome_length - end_position
            )
        )]
        distances <- rbindlist(list(distances, species_distances))
    }
    # Arbitrarily exclude duplicated genes.
    # TODO: Consider a refined approach or work out how to include all
    # duplicates.
    unique(distances, by = c("species", "gene"))
 }
--- a/process/methods.R
+++ b/process/methods.R
@ -1,62 +0,0 @@
 source("process/clusteriness.R")
 source("process/correlation.R")
 source("process/neural.R")
 source("process/proximity.R")
 #' Construct a new method.
 #'
 #' A method describes a way to perform a computation on gene distance data that
 #' results in a single score per gene. The function should accept the following
 #' parameters in this order:
 #'
 #'  - `distances` Distance data to use.
 #'  - `gene_ids` Genes to process.
 #'  - `preset` Preset to apply.
 #'
 #' The function should return a `data.table` with the following columns:
 #'
 #'  - `gene` Gene ID of the processed gene.
 #'  - `score` Score for the gene between 0.0 and 1.0.
 #'
 #' @param id Internal identifier for the method.
 #' @param name Human readable name for the method.
 #' @param description Short human readable description.
 #' @param fn Function to perform the computation.
 #'
 #' @return A named list containing the arguments.
 method <- function(id, name, description, fn) {
    list(
        id = id,
        name = name,
        description = description,
        fn = fn
    )
 }
 #' All methods to be included in the analysis.
 methods <- list(
    method(
        "clusteriness",
        "Clustering",
        "Clustering of genes",
        process_clusteriness
    ),
    method(
        "correlation",
        "Correlation",
        "Correlation with known genes",
        process_correlation
    ),
    method(
        "proximity",
        "Proximity",
        "Proximity to telomeres",
        process_proximity
    ),
    method(
        "neural",
        "Neural",
        "Assessment by neural network",
        process_neural
    )
 )
--- a/process/neural.R
+++ b/process/neural.R
@ -1,101 +0,0 @@
 library(data.table)
 library(neuralnet)
 #' Find genes by training a neural network on reference position data.
 #' @param seed A seed to get reproducible results.
 process_neural <- function(distances, gene_ids, preset, seed = 726839) {
    species_ids <- preset$species_ids
    reference_gene_ids <- preset$reference_gene_ids
    set.seed(seed)
    gene_count <- length(gene_ids)
    # Prefilter distances by species.
    distances <- distances[species %chin% species_ids]
    #' Input data for the network. This contains the gene ID as an identifier
    #' as well as the per-species gene distances as input variables.
    data <- data.table(gene = gene_ids)
    #' Buffer to keep track of species included in the computation. Species
    #' from `species_ids` may be excluded if they don't have enough data.
    species_ids_included <- NULL
    for (species_id in species_ids) {
        # Make a column specific to this species.
        species_distances <- distances[species == species_id, .(gene, distance)]
        setnames(species_distances, "distance", species_id)
        # Only include species with at least 25% known values.
        species_distances <- na.omit(species_distances)
        if (nrow(species_distances) >= 0.25 * gene_count) {
            species_ids_included <- append(species_ids_included, species_id)
            data <- merge(data, species_distances, all = TRUE)
        }
    }
    # Replace missing data with mean values. The neural network can't handle
    # NAs in a meaningful way. Choosing extreme values here would result in
    # heavily biased results. Therefore, the mean value is chosen as a
    # compromise. However, this will of course lessen the significance of the
    # results.
    for (species_id in species_ids_included) {
        mean_value <- data[, mean(get(species_id), na.rm = TRUE)]
        data[
            is.na(get(species_id)),
            eval(quote(species_id)) := round(mean_value)
        ]
    }
    # Extract the reference genes.
    reference_data <- data[gene %chin% reference_gene_ids]
    reference_data[, neural := 1.0]
    # Take out random samples from the remaining genes. This is another
    # compromise with a negative impact on significance. Because there is no
    # information on genes with are explicitely *not* TPE-OLD genes, we have to
    # assume that a random sample of genes has a low probability of including
    # TPE-OLD genes.
    without_reference_data <- data[!gene %chin% reference_gene_ids]
    reference_samples <- without_reference_data[
        sample(
            nrow(without_reference_data),
            nrow(reference_data)
        )
    ]
    reference_samples[, neural := 0.0]
    # Merge training data. The training data includes all reference genes as
    # well as an equal number of random sample genes.
    training_data <- rbindlist(list(reference_data, reference_samples))
    # Construct and train the neural network.
    nn_formula <- as.formula(sprintf(
        "neural~%s",
        paste(species_ids_included, collapse = "+")
    ))
    layer1 <- length(species_ids_included) * 0.66
    layer2 <- layer1 * 0.66
    layer3 <- layer2 * 0.66
    nn <- neuralnet(
        nn_formula,
        training_data,
        hidden = c(layer1, layer2, layer3),
        linear.output = FALSE
    )
    # Return the resulting scores given by applying the neural network.
    data[, score := compute(nn, data)$net.result]
    data[, .(gene, score)]
 }
--- a/process/presets.R
+++ b/process/presets.R
@ -1,29 +0,0 @@
 library(data.table)
 #' Create a new preset.
 #'
 #' A preset is a combination of input values to all processing methods. The
 #' preset's hash will be used to cache the results of applying those.
 #'
 #' @param species_ids IDs of species to include.
 #' @param reference_gene_ids Reference genes to use.
 #'
 #' @return A named list containing the arguments.
 preset <- function(species_ids, reference_gene_ids) {
    list(
        species_ids = species_ids,
        reference_gene_ids = reference_gene_ids
    )
 }
 #' A default preset including only replicatively aging species.
 preset_replicative_species <- preset(
    species[replicative == TRUE, id],
    genes[suggested | verified == TRUE, id]
 )
 #' A default preset including all species.
 preset_all_species <-  preset(
    species[, id],
    genes[suggested | verified == TRUE, id]
 )
--- a/process/process.R
+++ b/process/process.R
@ -1,58 +0,0 @@
 library(data.table)
 source("process/util.R")
 # Load input data
 source("process/input.R")
 species <- run_cached("inputs/species", retrieve_species)
 genes <- run_cached("inputs/genes", retrieve_genes)
 distances <- run_cached(
    "inputs/distances",
    retrieve_distances,
    species[, id],
    genes[, id]
 )
 genes <- merge(
    genes,
    distances[, .(n_species = .N), by = "gene"],
    by.x = "id",
    by.y = "gene"
 )
 source("process/methods.R")
 source("process/presets.R")
 #' Apply all methods with the specified preset without caching.
 process_priv <- function(preset) {
    results <- data.table(gene = genes[, id])
    for (method in methods) {
        method_results <- method$fn(distances, genes[, id], preset)
        setnames(method_results, "score", method$id)
        results <- merge(
            results,
            method_results
        )
    }
    results
 }
 #' Apply all methods with the specified preset.
 #'
 #' The result will be cached by the preset's hash and restored from cache, if
 #' possible. The return value is a `data.table` with one row for each gene
 #' identified by it's ID (`gene` column). The additional columns contain the
 #' resulting per method and are named after the method IDs.
 process <- function(preset) {
    run_cached(
        sprintf("results/%s", rlang::hash(preset)),
        process_priv,
        preset
    )
 }
--- a/process/proximity.R
+++ b/process/proximity.R
@ -1,20 +0,0 @@
 library(data.table)
 #' Score the mean distance of genes to the telomeres across species.
 #'
 #' A score will be given to each gene such that 0.0 corresponds to the maximal
 #' mean distance across all genes and 1.0 corresponds to a distance of 0.
 process_proximity <- function(distances, gene_ids, preset) {
    species_count <- length(preset$species_ids)
    # Prefilter distances by species.
    distances <- distances[species %chin% preset$species_ids]
    # Compute the score as described above.
    distances <- distances[, .(mean_distance = mean(distance)), by = "gene"]
    max_distance <- distances[, max(mean_distance)]
    distances[, score := 1 - mean_distance / max_distance]
    distances[, .(gene, score)]
 }
--- a/shiny/server.R
+++ b/shiny/server.R
@ -1,13 +1,14 @@
 library(data.table)
 library(DT)
 library(geposan)
 library(gprofiler2)
 library(plotly)
 library(rclipboard)
 library(shiny)
 source("optimize.R")
 source("rank_plot.R")
 source("scatter_plot.R")
 source("utils.R")
 #' Java script function to replace gene IDs with Ensembl gene links.
 js_link <- JS("function(row, data) {
@ -45,16 +46,19 @@ server <- function(input, output, session) {
            }
        }
-        reference_gene_ids <- genes[suggested | verified == TRUE, id]
+        weights <- geposan::optimize_weights(
-        weights <- optimize_weights(results, method_ids, reference_gene_ids)
+            results,
            method_ids,
            genes_tpe_old
        )
-        mapply(function(method_id, weight) {
+        for (method_id in method_ids) {
            updateSliderInput(
                session,
                sprintf("%s_weight", method_id),
-                value = weight * 100
+                value = weights[[method_id]] * 100
            )
-        }, method_ids, weights)
+        }
    })
    # Observe each method's enable button.
@ -67,14 +71,17 @@ server <- function(input, output, session) {
    #' Rank the results based on the specified weights. Filter out genes with
    #' too few species but don't apply the cut-off score.
    results <- reactive({
-        # Select the species preset.
+        # Select the preset.
-
+        preset <- if (input$species == "all") {
-        results <- if (input$species == "all") {
+            preset_all_species
            process(preset_all_species)
        } else {
-            process(preset_replicative_species)
+            preset_replicative_species
        }
        # Perform the analysis cached based on the preset's hash.
        results <- run_cached(rlang::hash(preset), geposan::analyze, preset)
        # Add all gene information to the results.
        results <- merge(
            results,
            genes,
@ -82,41 +89,21 @@ server <- function(input, output, session) {
            by.y = "id"
        )
-        # Compute scoring factors and the weighted score.
+        # Exclude genes with too few species.
        results <- results[n_species >= input$n_species]
-        total_weight <- 0.0
+        # Rank the results based on the weights.
-        results[, score := 0.0]
+
        weights <- NULL
        for (method in methods) {
            if (input[[method$id]]) {
                weight <- input[[sprintf("%s_weight", method$id)]]
-                total_weight <- total_weight + weight
+                weights[[method$id]] <- weight
                column <- method$id
                weighted <- weight * results[, ..column]
                results[, score := score + weighted]
            }
        }
-        results[, score := score / total_weight]
+        geposan::ranking(results, weights)
        # Exclude genes with too few species.
        results <- results[n_species >= input$n_species]
        # Penalize missing species.
        if (input$penalize) {
            species_count <- if (input$species == "all") {
                nrow(species)
            } else {
                length(species_ids_replicative)
            }
            results <- results[, score := score * n_species / species_count]
        }
        # Order the results based on their score.
        setorder(results, -score, na.last = TRUE)
        results[, rank := .I]
    })
    #' Apply the cut-off score to the ranked results.
--- a/shiny/main.R
+++ b/shiny/main.R
@ -1,7 +0,0 @@
 library(shiny)
 source("process/process.R")
 source("shiny/server.R")
 source("shiny/ui.R")
 runApp(shinyApp(ui, server))
--- a/shiny/ui.R
+++ b/shiny/ui.R
@ -56,11 +56,7 @@ ui <- fluidPage(
                        value = 100
                    )
                )
-            }),
+            })
            checkboxInput(
                "penalize",
                "Penalize missing values"
            ),
        ),
        mainPanel(
            tabsetPanel(
--- a/process/util.R
+++ b/process/util.R