mirror of
https://github.com/johrpan/geposanui.git
synced 2025-10-26 19:27:24 +01:00
Reorganize source files and generalize presets
This commit is contained in:
parent
8104e9bd8a
commit
68354bf808
14 changed files with 119 additions and 147 deletions
82
init.R
82
init.R
|
|
@ -1,82 +0,0 @@
|
||||||
source("input.R")
|
|
||||||
source("methods.R")
|
|
||||||
source("util.R")
|
|
||||||
|
|
||||||
# Load input data
|
|
||||||
|
|
||||||
species <- run_cached("input_species", retrieve_species)
|
|
||||||
genes <- run_cached("input_genes", retrieve_genes)
|
|
||||||
|
|
||||||
distances <- run_cached(
|
|
||||||
"input_distances",
|
|
||||||
retrieve_distances,
|
|
||||||
species[, id],
|
|
||||||
genes[, id]
|
|
||||||
)
|
|
||||||
|
|
||||||
all_species <- species[, id]
|
|
||||||
replicative_species <- species[replicative == TRUE, id]
|
|
||||||
all_genes <- genes[, id]
|
|
||||||
tpe_old_genes <- genes[suggested | verified == TRUE, id]
|
|
||||||
|
|
||||||
# Apply all methods for all species
|
|
||||||
|
|
||||||
results_all <- merge(
|
|
||||||
genes,
|
|
||||||
distances[, .(n_species = .N), by = "gene"],
|
|
||||||
by.x = "id",
|
|
||||||
by.y = "gene"
|
|
||||||
)
|
|
||||||
|
|
||||||
setnames(results_all, "id", "gene")
|
|
||||||
|
|
||||||
for (method in methods) {
|
|
||||||
method_results <- run_cached(
|
|
||||||
sprintf("%s_all", method$id),
|
|
||||||
method$fn,
|
|
||||||
distances,
|
|
||||||
all_species,
|
|
||||||
all_genes,
|
|
||||||
tpe_old_genes
|
|
||||||
)
|
|
||||||
|
|
||||||
setnames(method_results, "score", method$id)
|
|
||||||
|
|
||||||
results_all <- merge(
|
|
||||||
results_all,
|
|
||||||
method_results,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Apply all methods for replicatively aging species
|
|
||||||
|
|
||||||
results_replicative <- merge(
|
|
||||||
genes,
|
|
||||||
distances[
|
|
||||||
species %chin% species_ids_replicative,
|
|
||||||
.(n_species = .N),
|
|
||||||
by = gene
|
|
||||||
],
|
|
||||||
by.x = "id",
|
|
||||||
by.y = "gene"
|
|
||||||
)
|
|
||||||
|
|
||||||
setnames(results_replicative, "id", "gene")
|
|
||||||
|
|
||||||
for (method in methods) {
|
|
||||||
method_results <- run_cached(
|
|
||||||
sprintf("%s_replicative", method$id),
|
|
||||||
method$fn,
|
|
||||||
distances,
|
|
||||||
replicative_species,
|
|
||||||
all_genes,
|
|
||||||
tpe_old_genes
|
|
||||||
)
|
|
||||||
|
|
||||||
setnames(method_results, "score", method$id)
|
|
||||||
|
|
||||||
results_replicative <- merge(
|
|
||||||
results_replicative,
|
|
||||||
method_results,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
6
main.R
6
main.R
|
|
@ -1,6 +0,0 @@
|
||||||
library(shiny)
|
|
||||||
|
|
||||||
source("server.R")
|
|
||||||
source("ui.R")
|
|
||||||
|
|
||||||
runApp(shinyApp(ui, server))
|
|
||||||
|
|
@ -38,20 +38,11 @@ clusteriness <- function(data, height = 1000000) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Process genes clustering their distance to telomeres.
|
#' Process genes clustering their distance to telomeres.
|
||||||
#'
|
process_clusteriness <- function(distances, gene_ids, preset) {
|
||||||
#' The return value will be a data.table with the following columns:
|
|
||||||
#'
|
|
||||||
#' - `gene` Gene ID of the processed gene.
|
|
||||||
#' - `score` Score quantidying the gene's clusters.
|
|
||||||
#'
|
|
||||||
#' @param distances Gene distance data to use.
|
|
||||||
#' @param species_ids IDs of species to include in the analysis.
|
|
||||||
#' @param gene_ids Genes to include in the computation.
|
|
||||||
process_clusteriness <- function(distances, species_ids, gene_ids, ...) {
|
|
||||||
results <- data.table(gene = gene_ids)
|
results <- data.table(gene = gene_ids)
|
||||||
|
|
||||||
# Prefilter the input data by species.
|
# Prefilter the input data by species.
|
||||||
distances <- distances[species %chin% species_ids]
|
distances <- distances[species %chin% preset$species_ids]
|
||||||
|
|
||||||
# Add an index for quickly accessing data per gene.
|
# Add an index for quickly accessing data per gene.
|
||||||
setkey(distances, gene)
|
setkey(distances, gene)
|
||||||
|
|
@ -2,23 +2,13 @@ library(data.table)
|
||||||
|
|
||||||
#' Compute the mean correlation coefficient comparing gene distances with a set
|
#' Compute the mean correlation coefficient comparing gene distances with a set
|
||||||
#' of reference genes.
|
#' of reference genes.
|
||||||
#'
|
process_correlation <- function(distances, gene_ids, preset) {
|
||||||
#' The result will be a data.table with the following columns:
|
|
||||||
#'
|
|
||||||
#' - `gene` Gene ID of the processed gene.
|
|
||||||
#' - `score` Mean correlation coefficient.
|
|
||||||
#'
|
|
||||||
#' @param distances Distance data to use.
|
|
||||||
#' @param species_ids Species, whose data should be included.
|
|
||||||
#' @param gene_ids Genes to process.
|
|
||||||
#' @param reference_gene_ids Genes to compare to.
|
|
||||||
process_correlation <- function(distances, species_ids, gene_ids,
|
|
||||||
reference_gene_ids) {
|
|
||||||
results <- data.table(gene = gene_ids)
|
results <- data.table(gene = gene_ids)
|
||||||
|
reference_gene_ids <- preset$reference_gene_ids
|
||||||
reference_count <- length(reference_gene_ids)
|
reference_count <- length(reference_gene_ids)
|
||||||
|
|
||||||
# Prefilter distances by species.
|
# Prefilter distances by species.
|
||||||
distances <- distances[species %chin% species_ids]
|
distances <- distances[species %chin% preset$species_ids]
|
||||||
|
|
||||||
# Add an index for quickly accessing data per gene.
|
# Add an index for quickly accessing data per gene.
|
||||||
setkey(distances, gene)
|
setkey(distances, gene)
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
source("clusteriness.R")
|
source("process/clusteriness.R")
|
||||||
source("correlation.R")
|
source("process/correlation.R")
|
||||||
source("neural.R")
|
source("process/neural.R")
|
||||||
source("proximity.R")
|
source("process/proximity.R")
|
||||||
|
|
||||||
#' Construct a new method.
|
#' Construct a new method.
|
||||||
#'
|
#'
|
||||||
|
|
@ -10,9 +10,8 @@ source("proximity.R")
|
||||||
#' parameters in this order:
|
#' parameters in this order:
|
||||||
#'
|
#'
|
||||||
#' - `distances` Distance data to use.
|
#' - `distances` Distance data to use.
|
||||||
#' - `species_ids` Species, whose data should be included.
|
|
||||||
#' - `gene_ids` Genes to process.
|
#' - `gene_ids` Genes to process.
|
||||||
#' - `reference_gene_ids` Genes to compare to.
|
#' - `preset` Preset to apply.
|
||||||
#'
|
#'
|
||||||
#' The function should return a `data.table` with the following columns:
|
#' The function should return a `data.table` with the following columns:
|
||||||
#'
|
#'
|
||||||
|
|
@ -2,19 +2,10 @@ library(data.table)
|
||||||
library(neuralnet)
|
library(neuralnet)
|
||||||
|
|
||||||
#' Find genes by training a neural network on reference position data.
|
#' Find genes by training a neural network on reference position data.
|
||||||
#'
|
|
||||||
#' The result will be a data.table with the following columns:
|
|
||||||
#'
|
|
||||||
#' - `gene` Gene ID of the processed gene.
|
|
||||||
#' - `score` Output score given by the neural network.
|
|
||||||
#'
|
|
||||||
#' @param distances Distance data to use.
|
|
||||||
#' @param species_ids Species, whose data should be included.
|
|
||||||
#' @param gene_ids Genes to process. This should include the reference genes.
|
|
||||||
#' @param reference_gene_ids Genes to compare to.
|
|
||||||
#' @param seed A seed to get reproducible results.
|
#' @param seed A seed to get reproducible results.
|
||||||
process_neural <- function(distances, species_ids, gene_ids,
|
process_neural <- function(distances, gene_ids, preset, seed = 726839) {
|
||||||
reference_gene_ids, seed = 726839) {
|
species_ids <- preset$species_ids
|
||||||
|
reference_gene_ids <- preset$reference_gene_ids
|
||||||
set.seed(seed)
|
set.seed(seed)
|
||||||
gene_count <- length(gene_ids)
|
gene_count <- length(gene_ids)
|
||||||
|
|
||||||
29
process/presets.R
Normal file
29
process/presets.R
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
library(data.table)
|
||||||
|
|
||||||
|
#' Create a new preset.
|
||||||
|
#'
|
||||||
|
#' A preset is a combination of input values to all processing methods. The
|
||||||
|
#' preset's hash will be used to cache the results of applying those.
|
||||||
|
#'
|
||||||
|
#' @param species_ids IDs of species to include.
|
||||||
|
#' @param reference_gene_ids Reference genes to use.
|
||||||
|
#'
|
||||||
|
#' @return A named list containing the arguments.
|
||||||
|
preset <- function(species_ids, reference_gene_ids) {
|
||||||
|
list(
|
||||||
|
species_ids = species_ids,
|
||||||
|
reference_gene_ids = reference_gene_ids
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' A default preset including only replicatively aging species.
|
||||||
|
preset_replicative_species <- preset(
|
||||||
|
species[replicative == TRUE, id],
|
||||||
|
genes[suggested | verified == TRUE, id]
|
||||||
|
)
|
||||||
|
|
||||||
|
#' A default preset including all species.
|
||||||
|
preset_all_species <- preset(
|
||||||
|
species[, id],
|
||||||
|
genes[suggested | verified == TRUE, id]
|
||||||
|
)
|
||||||
58
process/process.R
Normal file
58
process/process.R
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
library(data.table)
|
||||||
|
|
||||||
|
source("process/util.R")
|
||||||
|
|
||||||
|
# Load input data
|
||||||
|
|
||||||
|
source("process/input.R")
|
||||||
|
|
||||||
|
species <- run_cached("inputs/species", retrieve_species)
|
||||||
|
genes <- run_cached("inputs/genes", retrieve_genes)
|
||||||
|
|
||||||
|
distances <- run_cached(
|
||||||
|
"inputs/distances",
|
||||||
|
retrieve_distances,
|
||||||
|
species[, id],
|
||||||
|
genes[, id]
|
||||||
|
)
|
||||||
|
|
||||||
|
genes <- merge(
|
||||||
|
genes,
|
||||||
|
distances[, .(n_species = .N), by = "gene"],
|
||||||
|
by.x = "id",
|
||||||
|
by.y = "gene"
|
||||||
|
)
|
||||||
|
|
||||||
|
source("process/methods.R")
|
||||||
|
source("process/presets.R")
|
||||||
|
|
||||||
|
#' Apply all methods with the specified preset without caching.
|
||||||
|
process_priv <- function(preset) {
|
||||||
|
results <- data.table(gene = genes[, id])
|
||||||
|
|
||||||
|
for (method in methods) {
|
||||||
|
method_results <- method$fn(distances, genes[, id], preset)
|
||||||
|
setnames(method_results, "score", method$id)
|
||||||
|
|
||||||
|
results <- merge(
|
||||||
|
results,
|
||||||
|
method_results
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
results
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Apply all methods with the specified preset.
|
||||||
|
#'
|
||||||
|
#' The result will be cached by the preset's hash and restored from cache, if
|
||||||
|
#' possible. The return value is a `data.table` with one row for each gene
|
||||||
|
#' identified by it's ID (`gene` column). The additional columns contain the
|
||||||
|
#' resulting per method and are named after the method IDs.
|
||||||
|
process <- function(preset) {
|
||||||
|
run_cached(
|
||||||
|
sprintf("results/%s", rlang::hash(preset)),
|
||||||
|
process_priv,
|
||||||
|
preset
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
@ -4,20 +4,11 @@ library(data.table)
|
||||||
#'
|
#'
|
||||||
#' A score will be given to each gene such that 0.0 corresponds to the maximal
|
#' A score will be given to each gene such that 0.0 corresponds to the maximal
|
||||||
#' mean distance across all genes and 1.0 corresponds to a distance of 0.
|
#' mean distance across all genes and 1.0 corresponds to a distance of 0.
|
||||||
#'
|
process_proximity <- function(distances, gene_ids, preset) {
|
||||||
#' The result will be a data.table with the following columns:
|
species_count <- length(preset$species_ids)
|
||||||
#'
|
|
||||||
#' - `gene` Gene ID of the processed gene.
|
|
||||||
#' - `score` Score for the proximity.
|
|
||||||
#'
|
|
||||||
#' @param distances Distance data to use.
|
|
||||||
#' @param species_ids Species, whose data should be included.
|
|
||||||
#' @param gene_ids Genes to process.
|
|
||||||
process_proximity <- function(distances, species_ids, gene_ids, ...) {
|
|
||||||
species_count <- length(species_ids)
|
|
||||||
|
|
||||||
# Prefilter distances by species.
|
# Prefilter distances by species.
|
||||||
distances <- distances[species %chin% species_ids]
|
distances <- distances[species %chin% preset$species_ids]
|
||||||
|
|
||||||
# Compute the score as described above.
|
# Compute the score as described above.
|
||||||
|
|
||||||
7
shiny/main.R
Normal file
7
shiny/main.R
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
library(shiny)
|
||||||
|
|
||||||
|
source("process/process.R")
|
||||||
|
source("shiny/server.R")
|
||||||
|
source("shiny/ui.R")
|
||||||
|
|
||||||
|
runApp(shinyApp(ui, server))
|
||||||
|
|
@ -5,7 +5,6 @@ library(plotly)
|
||||||
library(rclipboard)
|
library(rclipboard)
|
||||||
library(shiny)
|
library(shiny)
|
||||||
|
|
||||||
source("init.R")
|
|
||||||
source("optimize.R")
|
source("optimize.R")
|
||||||
source("rank_plot.R")
|
source("rank_plot.R")
|
||||||
source("scatter_plot.R")
|
source("scatter_plot.R")
|
||||||
|
|
@ -71,11 +70,18 @@ server <- function(input, output, session) {
|
||||||
# Select the species preset.
|
# Select the species preset.
|
||||||
|
|
||||||
results <- if (input$species == "all") {
|
results <- if (input$species == "all") {
|
||||||
results_all
|
process(preset_all_species)
|
||||||
} else {
|
} else {
|
||||||
results_replicative
|
process(preset_replicative_species)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
results <- merge(
|
||||||
|
results,
|
||||||
|
genes,
|
||||||
|
by.x = "gene",
|
||||||
|
by.y = "id"
|
||||||
|
)
|
||||||
|
|
||||||
# Compute scoring factors and the weighted score.
|
# Compute scoring factors and the weighted score.
|
||||||
|
|
||||||
total_weight <- 0.0
|
total_weight <- 0.0
|
||||||
|
|
@ -3,8 +3,6 @@ library(plotly)
|
||||||
library(rclipboard)
|
library(rclipboard)
|
||||||
library(shiny)
|
library(shiny)
|
||||||
|
|
||||||
source("methods.R")
|
|
||||||
|
|
||||||
ui <- fluidPage(
|
ui <- fluidPage(
|
||||||
shinyjs::useShinyjs(),
|
shinyjs::useShinyjs(),
|
||||||
rclipboardSetup(),
|
rclipboardSetup(),
|
||||||
Loading…
Add table
Add a link
Reference in a new issue