mirror of
https://github.com/johrpan/geposanui.git
synced 2025-10-26 11:17:24 +01:00
Generalize method definitions
This commit is contained in:
parent
d3edeefbe2
commit
9b0b3c13f5
7 changed files with 137 additions and 169 deletions
|
|
@ -42,12 +42,12 @@ clusteriness <- function(data, height = 1000000) {
|
|||
#' The return value will be a data.table with the following columns:
|
||||
#'
|
||||
#' - `gene` Gene ID of the processed gene.
|
||||
#' - `clusteriness` Score quantidying the gene's clusters.
|
||||
#' - `score` Score quantidying the gene's clusters.
|
||||
#'
|
||||
#' @param distances Gene distance data to use.
|
||||
#' @param species_ids IDs of species to include in the analysis.
|
||||
#' @param gene_ids Genes to include in the computation.
|
||||
process_clustering <- function(distances, species_ids, gene_ids) {
|
||||
process_clusteriness <- function(distances, species_ids, gene_ids, ...) {
|
||||
results <- data.table(gene = gene_ids)
|
||||
|
||||
# Prefilter the input data by species.
|
||||
|
|
@ -61,5 +61,5 @@ process_clustering <- function(distances, species_ids, gene_ids) {
|
|||
clusteriness(distances[gene_id, distance])
|
||||
}
|
||||
|
||||
results[, clusteriness := compute(gene), by = 1:nrow(results)]
|
||||
results[, score := compute(gene), by = 1:nrow(results)]
|
||||
}
|
||||
|
|
@ -6,7 +6,7 @@ library(data.table)
|
|||
#' The result will be a data.table with the following columns:
|
||||
#'
|
||||
#' - `gene` Gene ID of the processed gene.
|
||||
#' - `correlation` Mean correlation coefficient.
|
||||
#' - `score` Mean correlation coefficient.
|
||||
#'
|
||||
#' @param distances Distance data to use.
|
||||
#' @param species_ids Species, whose data should be included.
|
||||
|
|
@ -69,5 +69,5 @@ process_correlation <- function(distances, species_ids, gene_ids,
|
|||
score <- correlation_sum / reference_count
|
||||
}
|
||||
|
||||
results[, correlation := compute(gene), by = 1:nrow(results)]
|
||||
results[, score := compute(gene), by = 1:nrow(results)]
|
||||
}
|
||||
137
init.R
137
init.R
|
|
@ -1,7 +1,5 @@
|
|||
source("clustering.R")
|
||||
source("correlation.R")
|
||||
source("input.R")
|
||||
source("neural.R")
|
||||
source("methods.R")
|
||||
source("util.R")
|
||||
|
||||
# Load input data
|
||||
|
|
@ -16,66 +14,12 @@ distances <- run_cached(
|
|||
genes[, id]
|
||||
)
|
||||
|
||||
# Load processed data
|
||||
|
||||
all_species <- species[, id]
|
||||
replicative_species <- species[replicative == TRUE, id]
|
||||
all_genes <- genes[, id]
|
||||
tpe_old_genes <- genes[suggested | verified == TRUE, id]
|
||||
|
||||
clustering_all <- run_cached(
|
||||
"clustering_all",
|
||||
process_clustering,
|
||||
distances,
|
||||
all_species,
|
||||
all_genes
|
||||
)
|
||||
|
||||
clustering_replicative <- run_cached(
|
||||
"clustering_replicative",
|
||||
process_clustering,
|
||||
distances,
|
||||
replicative_species,
|
||||
all_genes
|
||||
)
|
||||
|
||||
correlation_all <- run_cached(
|
||||
"correlation_all",
|
||||
process_correlation,
|
||||
distances,
|
||||
all_species,
|
||||
all_genes,
|
||||
tpe_old_genes
|
||||
)
|
||||
|
||||
correlation_replicative <- run_cached(
|
||||
"correlation_replicative",
|
||||
process_correlation,
|
||||
distances,
|
||||
replicative_species,
|
||||
all_genes,
|
||||
tpe_old_genes
|
||||
)
|
||||
|
||||
neural_all <- run_cached(
|
||||
"neural_all",
|
||||
process_neural,
|
||||
distances,
|
||||
all_species,
|
||||
all_genes,
|
||||
tpe_old_genes
|
||||
)
|
||||
|
||||
neural_replicative <- run_cached(
|
||||
"neural_replicative",
|
||||
process_neural,
|
||||
distances,
|
||||
replicative_species,
|
||||
all_genes,
|
||||
tpe_old_genes
|
||||
)
|
||||
|
||||
# Merge processed data as well as gene information.
|
||||
# Apply all methods for all species
|
||||
|
||||
results_all <- merge(
|
||||
genes,
|
||||
|
|
@ -84,26 +28,27 @@ results_all <- merge(
|
|||
by.y = "gene"
|
||||
)
|
||||
|
||||
results_all <- merge(
|
||||
results_all,
|
||||
clustering_all,
|
||||
by.x = "id",
|
||||
by.y = "gene"
|
||||
)
|
||||
setnames(results_all, "id", "gene")
|
||||
|
||||
results_all <- merge(
|
||||
results_all,
|
||||
correlation_all,
|
||||
by.x = "id",
|
||||
by.y = "gene"
|
||||
)
|
||||
for (method in methods) {
|
||||
method_results <- run_cached(
|
||||
sprintf("%s_all", method$id),
|
||||
method$fn,
|
||||
distances,
|
||||
all_species,
|
||||
all_genes,
|
||||
tpe_old_genes
|
||||
)
|
||||
|
||||
results_all <- merge(
|
||||
setnames(method_results, "score", method$id)
|
||||
|
||||
results_all <- merge(
|
||||
results_all,
|
||||
neural_all,
|
||||
by.x = "id",
|
||||
by.y = "gene"
|
||||
)
|
||||
method_results,
|
||||
)
|
||||
}
|
||||
|
||||
# Apply all methods for replicatively aging species
|
||||
|
||||
results_replicative <- merge(
|
||||
genes,
|
||||
|
|
@ -116,28 +61,22 @@ results_replicative <- merge(
|
|||
by.y = "gene"
|
||||
)
|
||||
|
||||
results_replicative <- merge(
|
||||
results_replicative,
|
||||
clustering_replicative,
|
||||
by.x = "id",
|
||||
by.y = "gene"
|
||||
)
|
||||
|
||||
results_replicative <- merge(
|
||||
results_replicative,
|
||||
correlation_replicative,
|
||||
by.x = "id",
|
||||
by.y = "gene"
|
||||
)
|
||||
|
||||
results_replicative <- merge(
|
||||
results_replicative,
|
||||
neural_replicative,
|
||||
by.x = "id",
|
||||
by.y = "gene"
|
||||
)
|
||||
|
||||
# Rename `id` columns to `gene`.
|
||||
|
||||
setnames(results_all, "id", "gene")
|
||||
setnames(results_replicative, "id", "gene")
|
||||
|
||||
for (method in methods) {
|
||||
method_results <- run_cached(
|
||||
sprintf("%s_replicative", method$id),
|
||||
method$fn,
|
||||
distances,
|
||||
replicative_species,
|
||||
all_genes,
|
||||
tpe_old_genes
|
||||
)
|
||||
|
||||
setnames(method_results, "score", method$id)
|
||||
|
||||
results_replicative <- merge(
|
||||
results_replicative,
|
||||
method_results,
|
||||
)
|
||||
}
|
||||
56
methods.R
Normal file
56
methods.R
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
source("clusteriness.R")
|
||||
source("correlation.R")
|
||||
source("neural.R")
|
||||
|
||||
#' Construct a new method.
|
||||
#'
|
||||
#' A method describes a way to perform a computation on gene distance data that
|
||||
#' results in a single score per gene. The function should accept the following
|
||||
#' parameters in this order:
|
||||
#'
|
||||
#' - `distances` Distance data to use.
|
||||
#' - `species_ids` Species, whose data should be included.
|
||||
#' - `gene_ids` Genes to process.
|
||||
#' - `reference_gene_ids` Genes to compare to.
|
||||
#'
|
||||
#' The function should return a `data.table` with the following columns:
|
||||
#'
|
||||
#' - `gene` Gene ID of the processed gene.
|
||||
#' - `score` Score for the gene between 0.0 and 1.0.
|
||||
#'
|
||||
#' @param id Internal identifier for the method.
|
||||
#' @param name Human readable name for the method.
|
||||
#' @param description Short human readable description.
|
||||
#' @param fn Function to perform the computation.
|
||||
#'
|
||||
#' @return A named list containing the arguments.
|
||||
method <- function(id, name, description, fn) {
|
||||
list(
|
||||
id = id,
|
||||
name = name,
|
||||
description = description,
|
||||
fn = fn
|
||||
)
|
||||
}
|
||||
|
||||
#' All methods to be included in the analysis.
|
||||
methods <- list(
|
||||
method(
|
||||
"clusteriness",
|
||||
"Clustering",
|
||||
"Clustering of genes",
|
||||
process_clusteriness
|
||||
),
|
||||
method(
|
||||
"correlation",
|
||||
"Correlation",
|
||||
"Correlation with known genes",
|
||||
process_correlation
|
||||
),
|
||||
method(
|
||||
"neural",
|
||||
"Neural",
|
||||
"Assessment by neural network",
|
||||
process_neural
|
||||
)
|
||||
)
|
||||
6
neural.R
6
neural.R
|
|
@ -6,7 +6,7 @@ library(neuralnet)
|
|||
#' The result will be a data.table with the following columns:
|
||||
#'
|
||||
#' - `gene` Gene ID of the processed gene.
|
||||
#' - `neural` Output score given by the neural network.
|
||||
#' - `score` Output score given by the neural network.
|
||||
#'
|
||||
#' @param distances Distance data to use.
|
||||
#' @param species_ids Species, whose data should be included.
|
||||
|
|
@ -105,6 +105,6 @@ process_neural <- function(distances, species_ids, gene_ids,
|
|||
|
||||
# Return the resulting scores given by applying the neural network.
|
||||
|
||||
data[, neural := compute(nn, data)$net.result]
|
||||
data[, .(gene, neural)]
|
||||
data[, score := compute(nn, data)$net.result]
|
||||
data[, .(gene, score)]
|
||||
}
|
||||
55
server.R
55
server.R
|
|
@ -47,16 +47,18 @@ server <- function(input, output) {
|
|||
|
||||
# Compute scoring factors and the weighted score.
|
||||
|
||||
clusteriness_weight <- input$clusteriness / 100
|
||||
correlation_weight <- input$correlation / 100
|
||||
neural_weight <- input$neural / 100
|
||||
total_weight <- clusteriness_weight + correlation_weight + neural_weight
|
||||
clusteriness_factor <- clusteriness_weight / total_weight
|
||||
correlation_factor <- correlation_weight / total_weight
|
||||
neural_factor <- neural_weight / total_weight
|
||||
total_weight <- 0.0
|
||||
results[, score := 0.0]
|
||||
|
||||
results[, score := clusteriness_factor * clusteriness +
|
||||
correlation_factor * correlation + neural_factor * neural]
|
||||
for (method in methods) {
|
||||
weight <- input[[method$id]]
|
||||
total_weight <- total_weight + weight
|
||||
column <- method$id
|
||||
weighted <- weight * results[, ..column]
|
||||
results[, score := score + weighted]
|
||||
}
|
||||
|
||||
results[, score := score / total_weight]
|
||||
|
||||
# Exclude genes with too few species.
|
||||
results <- results[n_species >= input$n_species]
|
||||
|
|
@ -75,33 +77,22 @@ server <- function(input, output) {
|
|||
# Apply the cut-off score.
|
||||
results <- results[score >= input$cutoff / 100]
|
||||
|
||||
# Order the results based on their score. The resulting index will be
|
||||
# used as the "rank".
|
||||
# Order the results based on their score.
|
||||
|
||||
setorder(results, -score, na.last = TRUE)
|
||||
results[, rank := .I]
|
||||
})
|
||||
|
||||
output$genes <- renderDT({
|
||||
method_ids <- sapply(methods, function(method) method$id)
|
||||
method_names <- sapply(methods, function(method) method$name)
|
||||
columns <- c("rank", "gene", "name", method_ids, "score")
|
||||
column_names <- c("", "Gene", "", method_names, "Score")
|
||||
|
||||
dt <- datatable(
|
||||
results()[, .(
|
||||
.I,
|
||||
gene,
|
||||
name,
|
||||
clusteriness,
|
||||
correlation,
|
||||
neural,
|
||||
score
|
||||
)],
|
||||
results()[, ..columns],
|
||||
rownames = FALSE,
|
||||
colnames = c(
|
||||
"",
|
||||
"Gene",
|
||||
"",
|
||||
"Clusters",
|
||||
"Correlation",
|
||||
"Neural",
|
||||
"Score"
|
||||
),
|
||||
colnames = column_names,
|
||||
style = "bootstrap",
|
||||
options = list(
|
||||
rowCallback = js_link,
|
||||
|
|
@ -109,11 +100,7 @@ server <- function(input, output) {
|
|||
)
|
||||
)
|
||||
|
||||
formatPercentage(
|
||||
dt,
|
||||
c("clusteriness", "correlation", "neural", "score"),
|
||||
digits = 1
|
||||
)
|
||||
formatPercentage(dt, c(method_ids, "score"), digits = 1)
|
||||
})
|
||||
|
||||
output$synposis <- renderText({
|
||||
|
|
|
|||
30
ui.R
30
ui.R
|
|
@ -3,6 +3,8 @@ library(plotly)
|
|||
library(rclipboard)
|
||||
library(shiny)
|
||||
|
||||
source("methods.R")
|
||||
|
||||
ui <- fluidPage(
|
||||
rclipboardSetup(),
|
||||
titlePanel("TPE-OLD candidates"),
|
||||
|
|
@ -22,33 +24,17 @@ ui <- fluidPage(
|
|||
),
|
||||
wellPanel(
|
||||
h3("Ranking"),
|
||||
lapply(methods, function(method) {
|
||||
sliderInput(
|
||||
"clusteriness",
|
||||
"Clustering of genes",
|
||||
method$id,
|
||||
method$description,
|
||||
post = "%",
|
||||
min = 0,
|
||||
max = 100,
|
||||
step = 1,
|
||||
value = 58
|
||||
),
|
||||
sliderInput(
|
||||
"correlation",
|
||||
"Correlation with known genes",
|
||||
post = "%",
|
||||
min = 0,
|
||||
max = 100,
|
||||
step = 1,
|
||||
value = 36
|
||||
),
|
||||
sliderInput(
|
||||
"neural",
|
||||
"Assessment by neural network",
|
||||
post = "%",
|
||||
min = 0,
|
||||
max = 100,
|
||||
step = 1,
|
||||
value = 6
|
||||
),
|
||||
value = 100
|
||||
)
|
||||
}),
|
||||
sliderInput(
|
||||
"cutoff",
|
||||
"Cut-off score",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue