Convert to R package

2025-10-26 11:17:24 +01:00 · 2021-10-20 15:34:52 +02:00 · 2021-10-20 15:34:52 +02:00 · d4611f47cf
commit d4611f47cf
parent b2e2dbf1af
15 changed files with 799 additions and 114 deletions
--- a/R/app.R
+++ b/R/app.R
@ -0,0 +1,8 @@
+#' Run the application server.
+#'
+#' @param port The port to serve the application on.
+#'
+#' @export
+run_app <- function(port = 3464) {
+    shiny::runApp(shiny::shinyApp(ui, server), port = port)
+}
--- a/R/data.R
+++ b/R/data.R
@ -0,0 +1,112 @@
+# Species IDs of known replicatively aging species.
+species_ids_replicative <- c(
+    "bihybrid",
+    "btaurus",
+    "bthybrid",
+    "cfamiliaris",
+    "chircus",
+    "cjacchus",
+    "clfamiliaris",
+    "csabaeus",
+    "ecaballus",
+    "fcatus",
+    "ggorilla",
+    "hsapiens",
+    "lafricana",
+    "mfascicularis",
+    "mmulatta",
+    "mmurinus",
+    "mnemestrina",
+    "nleucogenys",
+    "oaries",
+    "pabelii",
+    "panubis",
+    "ppaniscus",
+    "ptroglodytes",
+    "sscrofa",
+    "tgelada"
+)
+
+# Species from [geposan] and their aging status.
+species <- geposan::species[, .(
+    id,
+    name,
+    replicative = id %chin% species_ids_replicative
+)]
+
+# Gene names of genes for verified TPE-OLD genes.
+genes_verified_tpe_old <- c(
+    "C1S",
+    "DSP",
+    "ISG15",
+    "SORBS2",
+    "TERT"
+)
+
+# Gene names of genes with a suggested TPE-OLD.
+genes_suggested_tpe_old <- c(
+    "AKAP3",
+    "ANO2",
+    "CCND2",
+    "CD163L1",
+    "CD9",
+    "FOXM1",
+    "GALNT8",
+    "NDUFA9",
+    "TEAD4",
+    "TIGAR",
+    "TSPAN9"
+)
+
+# Genes from [geposan] and their TPE-OLD status.
+genes <- geposan::genes[, .(
+    id,
+    name,
+    chromosome,
+    suggested = name %chin% genes_suggested_tpe_old,
+    verified = name %chin% genes_verified_tpe_old
+)]
+
+# All available methods from [geposan] and additional information on them.
+methods <- list(
+    list(
+        id = "clusteriness",
+        name = "Clustering",
+        description = "Clustering of genes"
+    ),
+    list(
+        id = "correlation",
+        name = "Correlation",
+        description = "Correlation with known genes"
+    ),
+    list(
+        id = "proximity",
+        name = "Proximity",
+        description = "Proximity to telomeres"
+    ),
+    list(
+        id = "neural",
+        name = "Neural",
+        description = "Assessment by neural network"
+    )
+)
+
+# Gene IDs of known or suggested TPE-OLD genes.
+genes_tpe_old <- genes[suggested | verified == TRUE, id]
+
+# Preset for [geposan] including all species and TPE-OLD genes for reference.
+preset_all_species <- geposan::preset(
+    methods = c("clusteriness", "correlation", "proximity", "neural"),
+    species = species$id,
+    genes = genes$id,
+    reference_genes = genes_tpe_old
+)
+
+# Preset for [geposan] including only replicatively aging species as well as
+# TPE-OLD genes for reference.
+preset_replicative_species <- geposan::preset(
+    methods = c("clusteriness", "correlation", "proximity", "neural"),
+    species = species_ids_replicative,
+    genes = genes$id,
+    reference_genes = genes_tpe_old
+)
--- a/R/methods.R
+++ b/R/methods.R
@ -0,0 +1,93 @@
+# Construct UI for the methods editor.
+methods_ui <- function(id) {
+    initial_weight <- 100 / length(methods)
+
+    verticalLayout(
+        h3("Methods"),
+        actionButton(
+            NS(id, "optimize_button"),
+            "Find optimal weights",
+            icon = icon("check-double")
+        ),
+        div(style = "margin-top: 16px"),
+        lapply(methods, function(method) {
+            verticalLayout(
+                checkboxInput(
+                    NS(id, method$id),
+                    span(
+                        method$description,
+                        style = "font-weight: bold"
+                    ),
+                    value = TRUE
+                ),
+                sliderInput(
+                    NS(id, sprintf("%s_weight", method$id)),
+                    NULL,
+                    post = "%",
+                    min = 0,
+                    max = 100,
+                    step = 1,
+                    value = initial_weight
+                )
+            )
+        })
+    )
+}
+
+# Construct server for the methods editor.
+#
+# @param analysis The reactive containing the results to be weighted.
+#
+# @return A reactive containing the weighted results.
+methods_server <- function(id, analysis) {
+    moduleServer(id, function(input, output, session) {
+        observeEvent(input$optimize_button, {
+            method_ids <- NULL
+
+            # Only include activated methods.
+            for (method in methods) {
+                if (input[[method$id]]) {
+                    method_ids <- c(method_ids, method$id)
+                }
+            }
+
+            weights <- geposan::optimize_weights(
+                analysis(),
+                method_ids,
+                genes_tpe_old
+            )
+
+            for (method_id in method_ids) {
+                updateSliderInput(
+                    session,
+                    sprintf("%s_weight", method_id),
+                    value = weights[[method_id]] * 100
+                )
+            }
+        })
+
+        # Observe each method's enable button and synchronise the slider state.
+        lapply(methods, function(method) {
+            observeEvent(input[[method$id]], {
+                shinyjs::toggleState(
+                    session$ns(sprintf("%s_weight", method$id))
+                )
+            }, ignoreInit = TRUE)
+        })
+
+        reactive({
+            # Take the actual weights from the sliders.
+
+            weights <- NULL
+
+            for (method in methods) {
+                if (input[[method$id]]) {
+                    weight <- input[[sprintf("%s_weight", method$id)]]
+                    weights[[method$id]] <- weight
+                }
+            }
+
+            geposan::ranking(analysis(), weights)
+        })
+    })
+}
--- a/R/rank_plot.R
+++ b/R/rank_plot.R
@ -0,0 +1,53 @@
+# Draw a plot displaying the rank of reference genes.
+#
+# The input table should contain the following columns:
+#
+#  - `gene` Gene IDs of genes to display.
+#  - `name` Name of genes to display.
+#  - `score` Score of the genes.
+#  - `rank` Rank of the genes based on the score.
+#
+# @param results Results to display.
+# @param reference_gene_ids IDs of reference genes.
+# @param cutoff Cut-off score.
+rank_plot <- function(results, reference_gene_ids, cutoff) {
+    first_not_included_rank <- results[score < cutoff, min(rank)]
+    last_rank <- results[, .N]
+
+    plot <- plotly::plot_ly() |> plotly::add_trace(
+        data = results,
+        x = ~rank,
+        y = ~score,
+        name = "All genes",
+        type = "scatter",
+        mode = "line",
+        hoverinfo = "skip"
+    ) |> plotly::add_trace(
+        data = results[gene %chin% reference_gene_ids],
+        x = ~rank,
+        y = ~score,
+        color = ~gene,
+        name = ~name,
+        width = 10,
+        type = "bar"
+    )  |> plotly::layout(
+        xaxis = list(title = "Ranks"),
+        yaxis = list(title = "Score")
+    )
+
+    if (first_not_included_rank <= last_rank) {
+        plot <- plot |> plotly::layout(
+            shapes = list(
+                type = "rect",
+                fillcolor = "black",
+                opacity = 0.1,
+                x0 = first_not_included_rank,
+                x1 = last_rank,
+                y0 = 0.0,
+                y1 = 1.0
+            )
+        )
+    }
+
+    plot
+}
--- a/R/scatter_plot.R
+++ b/R/scatter_plot.R
@ -0,0 +1,33 @@
+# Draw a scatter plot containing gene positions.
+#
+# @param results Results from [`process_input()`].
+# @param species Species to be displayed.
+# @param genes Genes to be displayed.
+scatter_plot <- function(results, species, genes) {
+    species_ids <- species[, id]
+
+    data <- merge(
+        genes[, .(id, name)],
+        geposan::distances[species %in% species_ids],
+        by.x = "id", by.y = "gene"
+    )
+
+    data[name == "", name := "Unknown"]
+
+    plotly::plot_ly(
+        data = data,
+        x = ~species,
+        y = ~distance,
+        color = ~id,
+        name = ~name,
+        type = "scatter",
+        mode = "markers"
+    ) |> plotly::layout(
+        xaxis = list(
+            title = "Species",
+            tickvals = species_ids,
+            ticktext = species[, name]
+        ),
+        yaxis = list(title = "Distance to telomeres [Bp]")
+    )
+}
--- a/R/server.R
+++ b/R/server.R
@ -0,0 +1,204 @@
+# Java script function to replace gene IDs with Ensembl gene links.
+js_link <- DT::JS("function(row, data) {
+    let id = data[1];
+    var name = data[2];
+    if (!name) name = 'Unknown';
+    let url = `https://www.ensembl.org/Homo_sapiens/Gene/Summary?g=${id}`;
+    $('td:eq(1)', row).html(`<a href=\"${url}\" target=\"_blank\">${name}</a>`);
+}")
+
+server <- function(input, output, session) {
+    # Show the customized slider for setting the required number of species.
+    output$n_species_slider <- renderUI({
+        sliderInput(
+            "n_species",
+            "Required number of species per gene",
+            min = 0,
+            max = if (input$species == "all") {
+                nrow(species)
+            } else {
+                length(species_ids_replicative)
+            },
+            step = 1,
+            value = 10
+        )
+    })
+
+    # Compute the results according to the preset.
+    analysis <- reactive({
+        # Select the preset.
+        preset <- if (input$species == "all") {
+            preset_all_species
+        } else {
+            preset_replicative_species
+        }
+
+        # Perform the analysis cached based on the preset's hash.
+        results <- withProgress(
+            message = "Analyzing genes",
+            value = 0.0, {
+                run_cached(
+                    rlang::hash(preset),
+                    geposan::analyze,
+                    preset,
+                    function(progress) {
+                        setProgress(progress)
+                    }
+                )
+            }
+        )
+
+        # Add all gene information to the results.
+        results <- merge(
+            results,
+            genes,
+            by.x = "gene",
+            by.y = "id"
+        )
+
+        # Count included species from the preset per gene.
+        genes_n_species <- geposan::distances[
+            species %chin% preset$species_ids,
+            .(n_species = .N),
+            by = "gene"
+        ]
+
+        setkey(genes_n_species, gene)
+
+        # Exclude genes with too few species.
+        results[genes_n_species[gene, n_species] >= input$n_species]
+    })
+
+    # Rank the results.
+    results <- methods_server("methods", analysis)
+
+    # Apply the cut-off score to the ranked results.
+    results_filtered <- reactive({
+        results()[score >= input$cutoff / 100]
+    })
+
+    output$genes <- DT::renderDT({
+        method_ids <- sapply(methods, function(method) method$id)
+        method_names <- sapply(methods, function(method) method$name)
+        columns <- c("rank", "gene", "name", "chromosome", method_ids, "score")
+        column_names <- c("", "Gene", "", "Chromosome", method_names, "Score")
+
+        dt <- DT::datatable(
+            results_filtered()[, ..columns],
+            rownames = FALSE,
+            colnames = column_names,
+            style = "bootstrap",
+            fillContainer = TRUE,
+            extensions = "Scroller",
+            options = list(
+                rowCallback = js_link,
+                columnDefs = list(list(visible = FALSE, targets = 2)),
+                deferRender = TRUE,
+                scrollY = 200,
+                scroller = TRUE
+            )
+        )
+
+        DT::formatPercentage(dt, c(method_ids, "score"), digits = 1)
+    })
+
+    output$copy <- renderUI({
+        results <- results_filtered()
+
+        gene_ids <- results[, gene]
+        names <- results[name != "", name]
+
+        genes_text <- paste(gene_ids, collapse = "\n")
+        names_text <- paste(names, collapse = "\n")
+
+        splitLayout(
+            cellWidths = "auto",
+            rclipboard::rclipButton(
+                "copy_ids_button",
+                "Copy gene IDs",
+                genes_text,
+                icon = icon("clipboard")
+            ),
+            rclipboard::rclipButton(
+                "copy_names_button",
+                "Copy gene names",
+                names_text,
+                icon = icon("clipboard")
+            )
+        )
+    })
+
+    output$scatter <- plotly::renderPlotly({
+        results <- results_filtered()
+
+        gene_ids <- results[input$genes_rows_selected, gene]
+        genes <- genes[id %chin% gene_ids]
+
+        species <- if (input$species == "all") {
+            species
+        } else {
+            species[replicative == TRUE]
+        }
+
+        scatter_plot(results, species, genes)
+    })
+
+    output$assessment_synopsis <- renderText({
+        reference_gene_ids <- genes[suggested | verified == TRUE, id]
+
+        included_reference_count <- results_filtered()[
+            gene %chin% reference_gene_ids,
+            .N
+        ]
+
+        reference_results <- results()[gene %chin% reference_gene_ids]
+        total_reference_count <- nrow(reference_results)
+
+        if (total_reference_count > 0) {
+            mean_rank <- as.character(round(
+                reference_results[, mean(rank)],
+                digits = 1
+            ))
+
+            max_rank <- as.character(reference_results[, max(rank)])
+        } else {
+            mean_rank <- "Unknown"
+            max_rank <- "Unknown"
+        }
+
+        sprintf(
+            "Included reference genes: %i/%i<br> \
+            Mean rank of reference genes: %s<br> \
+            Maximum rank of reference genes: %s",
+            included_reference_count,
+            total_reference_count,
+            mean_rank,
+            max_rank
+        )
+    })
+
+    output$rank_plot <- plotly::renderPlotly({
+        rank_plot(
+            results(),
+            genes[suggested | verified == TRUE, id],
+            input$cutoff / 100
+        )
+    })
+
+    output$gost <- plotly::renderPlotly({
+        if (input$enable_gost) {
+            result <- gprofiler2::gost(
+                results_filtered()[, gene],
+                ordered_query = TRUE
+            )
+
+            gprofiler2::gostplot(
+                result,
+                capped = FALSE,
+                interactive = TRUE
+            )
+        } else {
+            NULL
+        }
+    })
+}
--- a/R/ui.R
+++ b/R/ui.R
@ -0,0 +1,80 @@
+ui <- fluidPage(
+    shinyjs::useShinyjs(),
+    rclipboard::rclipboardSetup(),
+    titlePanel("TPE-OLD candidates"),
+    sidebarLayout(
+        sidebarPanel(
+            width = 3,
+            h3("Filter criteria"),
+            selectInput(
+                "species",
+                "Species to include",
+                choices = list(
+                    "Replicatively aging" = "replicative",
+                    "All qualified" = "all"
+                )
+            ),
+            uiOutput("n_species_slider"),
+            sliderInput(
+                "cutoff",
+                "Cut-off score",
+                post = "%",
+                min = 0,
+                max = 100,
+                step = 1,
+                value = 50
+            ),
+            methods_ui("methods")
+        ),
+        mainPanel(
+            tabsetPanel(
+                type = "pills",
+                header = div(style = "margin-top: 16px"),
+                tabPanel(
+                    "Results",
+                    uiOutput("copy"),
+                    div(
+                        style = "margin-top: 16px",
+                        DT::DTOutput("genes", height = "1000px")
+                    )
+                ),
+                tabPanel(
+                    "Positions",
+                    plotly::plotlyOutput(
+                        "scatter",
+                        width = "100%",
+                        height = "600px"
+                    )
+                ),
+                tabPanel(
+                    "Assessment",
+                    htmlOutput("assessment_synopsis"),
+                    div(
+                        style = "margin-top: 16px",
+                        plotly::plotlyOutput(
+                            "rank_plot",
+                            width = "100%",
+                            height = "600px"
+                        )
+                    ),
+                ),
+                tabPanel(
+                    "Analysis",
+                    checkboxInput(
+                        "enable_gost",
+                        "Perform a gene set enrichment analysis on the \
+                        filtered result genes."
+                    ),
+                    conditionalPanel(
+                        "input.enable_gost == true",
+                        plotly::plotlyOutput(
+                            "gost",
+                            width = "100%",
+                            height = "600px"
+                        )
+                    )
+                )
+            )
+        )
+    )
+)
--- a/R/utils.R
+++ b/R/utils.R
@ -0,0 +1,31 @@
+# Run a function caching the result on the file system.
+#
+# The function will be called with the appended arguments. The [`id`] argument
+# will be used to identify the cache file on the file system and in log
+# messages.
+run_cached <- function(id, func, ...) {
+    if (!dir.exists("cache")) {
+        dir.create("cache")
+    }
+
+    cache_file <- paste("cache/", id, ".rds", sep = "")
+
+    if (file.exists(cache_file)) {
+        # If the cache file exists, we restore the data from it.
+        readRDS(cache_file)
+    } else {
+        # If the cache file doesn't exist, we have to do the computation.
+        data <- func(...)
+
+        # The results are cached for the next run.
+        saveRDS(data, cache_file)
+
+        data
+    }
+}
+
+# This is needed to make data.table's and shiny's symbols available within the
+# package.
+#' @import data.table
+#' @import shiny
+NULL