From 68354bf80844a429f039e31390a9f83af4b4659d Mon Sep 17 00:00:00 2001 From: Elias Projahn Date: Sat, 16 Oct 2021 21:46:59 +0200 Subject: [PATCH] Reorganize source files and generalize presets --- init.R | 82 ------------------------ main.R | 6 -- clusteriness.R => process/clusteriness.R | 13 +--- correlation.R => process/correlation.R | 16 +---- input.R => process/input.R | 0 methods.R => process/methods.R | 11 ++-- neural.R => process/neural.R | 15 +---- process/presets.R | 29 +++++++++ process/process.R | 58 +++++++++++++++++ proximity.R => process/proximity.R | 15 +---- util.R => process/util.R | 0 shiny/main.R | 7 ++ server.R => shiny/server.R | 12 +++- ui.R => shiny/ui.R | 2 - 14 files changed, 119 insertions(+), 147 deletions(-) delete mode 100644 init.R delete mode 100644 main.R rename clusteriness.R => process/clusteriness.R (76%) rename correlation.R => process/correlation.R (78%) rename input.R => process/input.R (100%) rename methods.R => process/methods.R (87%) rename neural.R => process/neural.R (86%) create mode 100644 process/presets.R create mode 100644 process/process.R rename proximity.R => process/proximity.R (54%) rename util.R => process/util.R (100%) create mode 100644 shiny/main.R rename server.R => shiny/server.R (96%) rename ui.R => shiny/ui.R (99%) diff --git a/init.R b/init.R deleted file mode 100644 index f630d79..0000000 --- a/init.R +++ /dev/null @@ -1,82 +0,0 @@ -source("input.R") -source("methods.R") -source("util.R") - -# Load input data - -species <- run_cached("input_species", retrieve_species) -genes <- run_cached("input_genes", retrieve_genes) - -distances <- run_cached( - "input_distances", - retrieve_distances, - species[, id], - genes[, id] -) - -all_species <- species[, id] -replicative_species <- species[replicative == TRUE, id] -all_genes <- genes[, id] -tpe_old_genes <- genes[suggested | verified == TRUE, id] - -# Apply all methods for all species - -results_all <- merge( - genes, - distances[, .(n_species = .N), by = "gene"], - by.x = "id", - by.y = "gene" -) - -setnames(results_all, "id", "gene") - -for (method in methods) { - method_results <- run_cached( - sprintf("%s_all", method$id), - method$fn, - distances, - all_species, - all_genes, - tpe_old_genes - ) - - setnames(method_results, "score", method$id) - - results_all <- merge( - results_all, - method_results, - ) -} - -# Apply all methods for replicatively aging species - -results_replicative <- merge( - genes, - distances[ - species %chin% species_ids_replicative, - .(n_species = .N), - by = gene - ], - by.x = "id", - by.y = "gene" -) - -setnames(results_replicative, "id", "gene") - -for (method in methods) { - method_results <- run_cached( - sprintf("%s_replicative", method$id), - method$fn, - distances, - replicative_species, - all_genes, - tpe_old_genes - ) - - setnames(method_results, "score", method$id) - - results_replicative <- merge( - results_replicative, - method_results, - ) -} \ No newline at end of file diff --git a/main.R b/main.R deleted file mode 100644 index 6b70e73..0000000 --- a/main.R +++ /dev/null @@ -1,6 +0,0 @@ -library(shiny) - -source("server.R") -source("ui.R") - -runApp(shinyApp(ui, server)) \ No newline at end of file diff --git a/clusteriness.R b/process/clusteriness.R similarity index 76% rename from clusteriness.R rename to process/clusteriness.R index 6af743e..d860bb8 100644 --- a/clusteriness.R +++ b/process/clusteriness.R @@ -38,20 +38,11 @@ clusteriness <- function(data, height = 1000000) { } #' Process genes clustering their distance to telomeres. -#' -#' The return value will be a data.table with the following columns: -#' -#' - `gene` Gene ID of the processed gene. -#' - `score` Score quantidying the gene's clusters. -#' -#' @param distances Gene distance data to use. -#' @param species_ids IDs of species to include in the analysis. -#' @param gene_ids Genes to include in the computation. -process_clusteriness <- function(distances, species_ids, gene_ids, ...) { +process_clusteriness <- function(distances, gene_ids, preset) { results <- data.table(gene = gene_ids) # Prefilter the input data by species. - distances <- distances[species %chin% species_ids] + distances <- distances[species %chin% preset$species_ids] # Add an index for quickly accessing data per gene. setkey(distances, gene) diff --git a/correlation.R b/process/correlation.R similarity index 78% rename from correlation.R rename to process/correlation.R index cb69db1..5cce8f7 100644 --- a/correlation.R +++ b/process/correlation.R @@ -2,23 +2,13 @@ library(data.table) #' Compute the mean correlation coefficient comparing gene distances with a set #' of reference genes. -#' -#' The result will be a data.table with the following columns: -#' -#' - `gene` Gene ID of the processed gene. -#' - `score` Mean correlation coefficient. -#' -#' @param distances Distance data to use. -#' @param species_ids Species, whose data should be included. -#' @param gene_ids Genes to process. -#' @param reference_gene_ids Genes to compare to. -process_correlation <- function(distances, species_ids, gene_ids, - reference_gene_ids) { +process_correlation <- function(distances, gene_ids, preset) { results <- data.table(gene = gene_ids) + reference_gene_ids <- preset$reference_gene_ids reference_count <- length(reference_gene_ids) # Prefilter distances by species. - distances <- distances[species %chin% species_ids] + distances <- distances[species %chin% preset$species_ids] # Add an index for quickly accessing data per gene. setkey(distances, gene) diff --git a/input.R b/process/input.R similarity index 100% rename from input.R rename to process/input.R diff --git a/methods.R b/process/methods.R similarity index 87% rename from methods.R rename to process/methods.R index a15d255..a060710 100644 --- a/methods.R +++ b/process/methods.R @@ -1,7 +1,7 @@ -source("clusteriness.R") -source("correlation.R") -source("neural.R") -source("proximity.R") +source("process/clusteriness.R") +source("process/correlation.R") +source("process/neural.R") +source("process/proximity.R") #' Construct a new method. #' @@ -10,9 +10,8 @@ source("proximity.R") #' parameters in this order: #' #' - `distances` Distance data to use. -#' - `species_ids` Species, whose data should be included. #' - `gene_ids` Genes to process. -#' - `reference_gene_ids` Genes to compare to. +#' - `preset` Preset to apply. #' #' The function should return a `data.table` with the following columns: #' diff --git a/neural.R b/process/neural.R similarity index 86% rename from neural.R rename to process/neural.R index f84f7ab..ef137d5 100644 --- a/neural.R +++ b/process/neural.R @@ -2,19 +2,10 @@ library(data.table) library(neuralnet) #' Find genes by training a neural network on reference position data. -#' -#' The result will be a data.table with the following columns: -#' -#' - `gene` Gene ID of the processed gene. -#' - `score` Output score given by the neural network. -#' -#' @param distances Distance data to use. -#' @param species_ids Species, whose data should be included. -#' @param gene_ids Genes to process. This should include the reference genes. -#' @param reference_gene_ids Genes to compare to. #' @param seed A seed to get reproducible results. -process_neural <- function(distances, species_ids, gene_ids, - reference_gene_ids, seed = 726839) { +process_neural <- function(distances, gene_ids, preset, seed = 726839) { + species_ids <- preset$species_ids + reference_gene_ids <- preset$reference_gene_ids set.seed(seed) gene_count <- length(gene_ids) diff --git a/process/presets.R b/process/presets.R new file mode 100644 index 0000000..c0efc00 --- /dev/null +++ b/process/presets.R @@ -0,0 +1,29 @@ +library(data.table) + +#' Create a new preset. +#' +#' A preset is a combination of input values to all processing methods. The +#' preset's hash will be used to cache the results of applying those. +#' +#' @param species_ids IDs of species to include. +#' @param reference_gene_ids Reference genes to use. +#' +#' @return A named list containing the arguments. +preset <- function(species_ids, reference_gene_ids) { + list( + species_ids = species_ids, + reference_gene_ids = reference_gene_ids + ) +} + +#' A default preset including only replicatively aging species. +preset_replicative_species <- preset( + species[replicative == TRUE, id], + genes[suggested | verified == TRUE, id] +) + +#' A default preset including all species. +preset_all_species <- preset( + species[, id], + genes[suggested | verified == TRUE, id] +) \ No newline at end of file diff --git a/process/process.R b/process/process.R new file mode 100644 index 0000000..0e3a2db --- /dev/null +++ b/process/process.R @@ -0,0 +1,58 @@ +library(data.table) + +source("process/util.R") + +# Load input data + +source("process/input.R") + +species <- run_cached("inputs/species", retrieve_species) +genes <- run_cached("inputs/genes", retrieve_genes) + +distances <- run_cached( + "inputs/distances", + retrieve_distances, + species[, id], + genes[, id] +) + +genes <- merge( + genes, + distances[, .(n_species = .N), by = "gene"], + by.x = "id", + by.y = "gene" +) + +source("process/methods.R") +source("process/presets.R") + +#' Apply all methods with the specified preset without caching. +process_priv <- function(preset) { + results <- data.table(gene = genes[, id]) + + for (method in methods) { + method_results <- method$fn(distances, genes[, id], preset) + setnames(method_results, "score", method$id) + + results <- merge( + results, + method_results + ) + } + + results +} + +#' Apply all methods with the specified preset. +#' +#' The result will be cached by the preset's hash and restored from cache, if +#' possible. The return value is a `data.table` with one row for each gene +#' identified by it's ID (`gene` column). The additional columns contain the +#' resulting per method and are named after the method IDs. +process <- function(preset) { + run_cached( + sprintf("results/%s", rlang::hash(preset)), + process_priv, + preset + ) +} \ No newline at end of file diff --git a/proximity.R b/process/proximity.R similarity index 54% rename from proximity.R rename to process/proximity.R index 7da4363..cef008c 100644 --- a/proximity.R +++ b/process/proximity.R @@ -4,20 +4,11 @@ library(data.table) #' #' A score will be given to each gene such that 0.0 corresponds to the maximal #' mean distance across all genes and 1.0 corresponds to a distance of 0. -#' -#' The result will be a data.table with the following columns: -#' -#' - `gene` Gene ID of the processed gene. -#' - `score` Score for the proximity. -#' -#' @param distances Distance data to use. -#' @param species_ids Species, whose data should be included. -#' @param gene_ids Genes to process. -process_proximity <- function(distances, species_ids, gene_ids, ...) { - species_count <- length(species_ids) +process_proximity <- function(distances, gene_ids, preset) { + species_count <- length(preset$species_ids) # Prefilter distances by species. - distances <- distances[species %chin% species_ids] + distances <- distances[species %chin% preset$species_ids] # Compute the score as described above. diff --git a/util.R b/process/util.R similarity index 100% rename from util.R rename to process/util.R diff --git a/shiny/main.R b/shiny/main.R new file mode 100644 index 0000000..39228ac --- /dev/null +++ b/shiny/main.R @@ -0,0 +1,7 @@ +library(shiny) + +source("process/process.R") +source("shiny/server.R") +source("shiny/ui.R") + +runApp(shinyApp(ui, server)) \ No newline at end of file diff --git a/server.R b/shiny/server.R similarity index 96% rename from server.R rename to shiny/server.R index ea2930b..79a5acd 100644 --- a/server.R +++ b/shiny/server.R @@ -5,7 +5,6 @@ library(plotly) library(rclipboard) library(shiny) -source("init.R") source("optimize.R") source("rank_plot.R") source("scatter_plot.R") @@ -71,11 +70,18 @@ server <- function(input, output, session) { # Select the species preset. results <- if (input$species == "all") { - results_all + process(preset_all_species) } else { - results_replicative + process(preset_replicative_species) } + results <- merge( + results, + genes, + by.x = "gene", + by.y = "id" + ) + # Compute scoring factors and the weighted score. total_weight <- 0.0 diff --git a/ui.R b/shiny/ui.R similarity index 99% rename from ui.R rename to shiny/ui.R index 0401597..17e519f 100644 --- a/ui.R +++ b/shiny/ui.R @@ -3,8 +3,6 @@ library(plotly) library(rclipboard) library(shiny) -source("methods.R") - ui <- fluidPage( shinyjs::useShinyjs(), rclipboardSetup(),