Convert to R package

This commit is contained in:
Elias Projahn 2021-10-20 15:34:52 +02:00
parent b2e2dbf1af
commit d4611f47cf
15 changed files with 799 additions and 114 deletions

8
R/app.R Normal file
View file

@ -0,0 +1,8 @@
#' Run the application server.
#'
#' @param port The port to serve the application on.
#'
#' @export
run_app <- function(port = 3464) {
shiny::runApp(shiny::shinyApp(ui, server), port = port)
}

112
R/data.R Normal file
View file

@ -0,0 +1,112 @@
# Species IDs of known replicatively aging species.
species_ids_replicative <- c(
"bihybrid",
"btaurus",
"bthybrid",
"cfamiliaris",
"chircus",
"cjacchus",
"clfamiliaris",
"csabaeus",
"ecaballus",
"fcatus",
"ggorilla",
"hsapiens",
"lafricana",
"mfascicularis",
"mmulatta",
"mmurinus",
"mnemestrina",
"nleucogenys",
"oaries",
"pabelii",
"panubis",
"ppaniscus",
"ptroglodytes",
"sscrofa",
"tgelada"
)
# Species from [geposan] and their aging status.
species <- geposan::species[, .(
id,
name,
replicative = id %chin% species_ids_replicative
)]
# Gene names of genes for verified TPE-OLD genes.
genes_verified_tpe_old <- c(
"C1S",
"DSP",
"ISG15",
"SORBS2",
"TERT"
)
# Gene names of genes with a suggested TPE-OLD.
genes_suggested_tpe_old <- c(
"AKAP3",
"ANO2",
"CCND2",
"CD163L1",
"CD9",
"FOXM1",
"GALNT8",
"NDUFA9",
"TEAD4",
"TIGAR",
"TSPAN9"
)
# Genes from [geposan] and their TPE-OLD status.
genes <- geposan::genes[, .(
id,
name,
chromosome,
suggested = name %chin% genes_suggested_tpe_old,
verified = name %chin% genes_verified_tpe_old
)]
# All available methods from [geposan] and additional information on them.
methods <- list(
list(
id = "clusteriness",
name = "Clustering",
description = "Clustering of genes"
),
list(
id = "correlation",
name = "Correlation",
description = "Correlation with known genes"
),
list(
id = "proximity",
name = "Proximity",
description = "Proximity to telomeres"
),
list(
id = "neural",
name = "Neural",
description = "Assessment by neural network"
)
)
# Gene IDs of known or suggested TPE-OLD genes.
genes_tpe_old <- genes[suggested | verified == TRUE, id]
# Preset for [geposan] including all species and TPE-OLD genes for reference.
preset_all_species <- geposan::preset(
methods = c("clusteriness", "correlation", "proximity", "neural"),
species = species$id,
genes = genes$id,
reference_genes = genes_tpe_old
)
# Preset for [geposan] including only replicatively aging species as well as
# TPE-OLD genes for reference.
preset_replicative_species <- geposan::preset(
methods = c("clusteriness", "correlation", "proximity", "neural"),
species = species_ids_replicative,
genes = genes$id,
reference_genes = genes_tpe_old
)

93
R/methods.R Normal file
View file

@ -0,0 +1,93 @@
# Construct UI for the methods editor.
methods_ui <- function(id) {
initial_weight <- 100 / length(methods)
verticalLayout(
h3("Methods"),
actionButton(
NS(id, "optimize_button"),
"Find optimal weights",
icon = icon("check-double")
),
div(style = "margin-top: 16px"),
lapply(methods, function(method) {
verticalLayout(
checkboxInput(
NS(id, method$id),
span(
method$description,
style = "font-weight: bold"
),
value = TRUE
),
sliderInput(
NS(id, sprintf("%s_weight", method$id)),
NULL,
post = "%",
min = 0,
max = 100,
step = 1,
value = initial_weight
)
)
})
)
}
# Construct server for the methods editor.
#
# @param analysis The reactive containing the results to be weighted.
#
# @return A reactive containing the weighted results.
methods_server <- function(id, analysis) {
moduleServer(id, function(input, output, session) {
observeEvent(input$optimize_button, {
method_ids <- NULL
# Only include activated methods.
for (method in methods) {
if (input[[method$id]]) {
method_ids <- c(method_ids, method$id)
}
}
weights <- geposan::optimize_weights(
analysis(),
method_ids,
genes_tpe_old
)
for (method_id in method_ids) {
updateSliderInput(
session,
sprintf("%s_weight", method_id),
value = weights[[method_id]] * 100
)
}
})
# Observe each method's enable button and synchronise the slider state.
lapply(methods, function(method) {
observeEvent(input[[method$id]], {
shinyjs::toggleState(
session$ns(sprintf("%s_weight", method$id))
)
}, ignoreInit = TRUE)
})
reactive({
# Take the actual weights from the sliders.
weights <- NULL
for (method in methods) {
if (input[[method$id]]) {
weight <- input[[sprintf("%s_weight", method$id)]]
weights[[method$id]] <- weight
}
}
geposan::ranking(analysis(), weights)
})
})
}

53
R/rank_plot.R Normal file
View file

@ -0,0 +1,53 @@
# Draw a plot displaying the rank of reference genes.
#
# The input table should contain the following columns:
#
# - `gene` Gene IDs of genes to display.
# - `name` Name of genes to display.
# - `score` Score of the genes.
# - `rank` Rank of the genes based on the score.
#
# @param results Results to display.
# @param reference_gene_ids IDs of reference genes.
# @param cutoff Cut-off score.
rank_plot <- function(results, reference_gene_ids, cutoff) {
first_not_included_rank <- results[score < cutoff, min(rank)]
last_rank <- results[, .N]
plot <- plotly::plot_ly() |> plotly::add_trace(
data = results,
x = ~rank,
y = ~score,
name = "All genes",
type = "scatter",
mode = "line",
hoverinfo = "skip"
) |> plotly::add_trace(
data = results[gene %chin% reference_gene_ids],
x = ~rank,
y = ~score,
color = ~gene,
name = ~name,
width = 10,
type = "bar"
) |> plotly::layout(
xaxis = list(title = "Ranks"),
yaxis = list(title = "Score")
)
if (first_not_included_rank <= last_rank) {
plot <- plot |> plotly::layout(
shapes = list(
type = "rect",
fillcolor = "black",
opacity = 0.1,
x0 = first_not_included_rank,
x1 = last_rank,
y0 = 0.0,
y1 = 1.0
)
)
}
plot
}

33
R/scatter_plot.R Normal file
View file

@ -0,0 +1,33 @@
# Draw a scatter plot containing gene positions.
#
# @param results Results from [`process_input()`].
# @param species Species to be displayed.
# @param genes Genes to be displayed.
scatter_plot <- function(results, species, genes) {
species_ids <- species[, id]
data <- merge(
genes[, .(id, name)],
geposan::distances[species %in% species_ids],
by.x = "id", by.y = "gene"
)
data[name == "", name := "Unknown"]
plotly::plot_ly(
data = data,
x = ~species,
y = ~distance,
color = ~id,
name = ~name,
type = "scatter",
mode = "markers"
) |> plotly::layout(
xaxis = list(
title = "Species",
tickvals = species_ids,
ticktext = species[, name]
),
yaxis = list(title = "Distance to telomeres [Bp]")
)
}

204
R/server.R Normal file
View file

@ -0,0 +1,204 @@
# Java script function to replace gene IDs with Ensembl gene links.
js_link <- DT::JS("function(row, data) {
let id = data[1];
var name = data[2];
if (!name) name = 'Unknown';
let url = `https://www.ensembl.org/Homo_sapiens/Gene/Summary?g=${id}`;
$('td:eq(1)', row).html(`<a href=\"${url}\" target=\"_blank\">${name}</a>`);
}")
server <- function(input, output, session) {
# Show the customized slider for setting the required number of species.
output$n_species_slider <- renderUI({
sliderInput(
"n_species",
"Required number of species per gene",
min = 0,
max = if (input$species == "all") {
nrow(species)
} else {
length(species_ids_replicative)
},
step = 1,
value = 10
)
})
# Compute the results according to the preset.
analysis <- reactive({
# Select the preset.
preset <- if (input$species == "all") {
preset_all_species
} else {
preset_replicative_species
}
# Perform the analysis cached based on the preset's hash.
results <- withProgress(
message = "Analyzing genes",
value = 0.0, {
run_cached(
rlang::hash(preset),
geposan::analyze,
preset,
function(progress) {
setProgress(progress)
}
)
}
)
# Add all gene information to the results.
results <- merge(
results,
genes,
by.x = "gene",
by.y = "id"
)
# Count included species from the preset per gene.
genes_n_species <- geposan::distances[
species %chin% preset$species_ids,
.(n_species = .N),
by = "gene"
]
setkey(genes_n_species, gene)
# Exclude genes with too few species.
results[genes_n_species[gene, n_species] >= input$n_species]
})
# Rank the results.
results <- methods_server("methods", analysis)
# Apply the cut-off score to the ranked results.
results_filtered <- reactive({
results()[score >= input$cutoff / 100]
})
output$genes <- DT::renderDT({
method_ids <- sapply(methods, function(method) method$id)
method_names <- sapply(methods, function(method) method$name)
columns <- c("rank", "gene", "name", "chromosome", method_ids, "score")
column_names <- c("", "Gene", "", "Chromosome", method_names, "Score")
dt <- DT::datatable(
results_filtered()[, ..columns],
rownames = FALSE,
colnames = column_names,
style = "bootstrap",
fillContainer = TRUE,
extensions = "Scroller",
options = list(
rowCallback = js_link,
columnDefs = list(list(visible = FALSE, targets = 2)),
deferRender = TRUE,
scrollY = 200,
scroller = TRUE
)
)
DT::formatPercentage(dt, c(method_ids, "score"), digits = 1)
})
output$copy <- renderUI({
results <- results_filtered()
gene_ids <- results[, gene]
names <- results[name != "", name]
genes_text <- paste(gene_ids, collapse = "\n")
names_text <- paste(names, collapse = "\n")
splitLayout(
cellWidths = "auto",
rclipboard::rclipButton(
"copy_ids_button",
"Copy gene IDs",
genes_text,
icon = icon("clipboard")
),
rclipboard::rclipButton(
"copy_names_button",
"Copy gene names",
names_text,
icon = icon("clipboard")
)
)
})
output$scatter <- plotly::renderPlotly({
results <- results_filtered()
gene_ids <- results[input$genes_rows_selected, gene]
genes <- genes[id %chin% gene_ids]
species <- if (input$species == "all") {
species
} else {
species[replicative == TRUE]
}
scatter_plot(results, species, genes)
})
output$assessment_synopsis <- renderText({
reference_gene_ids <- genes[suggested | verified == TRUE, id]
included_reference_count <- results_filtered()[
gene %chin% reference_gene_ids,
.N
]
reference_results <- results()[gene %chin% reference_gene_ids]
total_reference_count <- nrow(reference_results)
if (total_reference_count > 0) {
mean_rank <- as.character(round(
reference_results[, mean(rank)],
digits = 1
))
max_rank <- as.character(reference_results[, max(rank)])
} else {
mean_rank <- "Unknown"
max_rank <- "Unknown"
}
sprintf(
"Included reference genes: %i/%i<br> \
Mean rank of reference genes: %s<br> \
Maximum rank of reference genes: %s",
included_reference_count,
total_reference_count,
mean_rank,
max_rank
)
})
output$rank_plot <- plotly::renderPlotly({
rank_plot(
results(),
genes[suggested | verified == TRUE, id],
input$cutoff / 100
)
})
output$gost <- plotly::renderPlotly({
if (input$enable_gost) {
result <- gprofiler2::gost(
results_filtered()[, gene],
ordered_query = TRUE
)
gprofiler2::gostplot(
result,
capped = FALSE,
interactive = TRUE
)
} else {
NULL
}
})
}

80
R/ui.R Normal file
View file

@ -0,0 +1,80 @@
ui <- fluidPage(
shinyjs::useShinyjs(),
rclipboard::rclipboardSetup(),
titlePanel("TPE-OLD candidates"),
sidebarLayout(
sidebarPanel(
width = 3,
h3("Filter criteria"),
selectInput(
"species",
"Species to include",
choices = list(
"Replicatively aging" = "replicative",
"All qualified" = "all"
)
),
uiOutput("n_species_slider"),
sliderInput(
"cutoff",
"Cut-off score",
post = "%",
min = 0,
max = 100,
step = 1,
value = 50
),
methods_ui("methods")
),
mainPanel(
tabsetPanel(
type = "pills",
header = div(style = "margin-top: 16px"),
tabPanel(
"Results",
uiOutput("copy"),
div(
style = "margin-top: 16px",
DT::DTOutput("genes", height = "1000px")
)
),
tabPanel(
"Positions",
plotly::plotlyOutput(
"scatter",
width = "100%",
height = "600px"
)
),
tabPanel(
"Assessment",
htmlOutput("assessment_synopsis"),
div(
style = "margin-top: 16px",
plotly::plotlyOutput(
"rank_plot",
width = "100%",
height = "600px"
)
),
),
tabPanel(
"Analysis",
checkboxInput(
"enable_gost",
"Perform a gene set enrichment analysis on the \
filtered result genes."
),
conditionalPanel(
"input.enable_gost == true",
plotly::plotlyOutput(
"gost",
width = "100%",
height = "600px"
)
)
)
)
)
)
)

31
R/utils.R Normal file
View file

@ -0,0 +1,31 @@
# Run a function caching the result on the file system.
#
# The function will be called with the appended arguments. The [`id`] argument
# will be used to identify the cache file on the file system and in log
# messages.
run_cached <- function(id, func, ...) {
if (!dir.exists("cache")) {
dir.create("cache")
}
cache_file <- paste("cache/", id, ".rds", sep = "")
if (file.exists(cache_file)) {
# If the cache file exists, we restore the data from it.
readRDS(cache_file)
} else {
# If the cache file doesn't exist, we have to do the computation.
data <- func(...)
# The results are cached for the next run.
saveRDS(data, cache_file)
data
}
}
# This is needed to make data.table's and shiny's symbols available within the
# package.
#' @import data.table
#' @import shiny
NULL