mirror of
				https://github.com/johrpan/geposanui.git
				synced 2025-10-26 19:27:24 +01:00 
			
		
		
		
	Generalize method definitions
This commit is contained in:
		
							parent
							
								
									d3edeefbe2
								
							
						
					
					
						commit
						9b0b3c13f5
					
				
					 7 changed files with 137 additions and 169 deletions
				
			
		|  | @ -42,12 +42,12 @@ clusteriness <- function(data, height = 1000000) { | ||||||
| #' The return value will be a data.table with the following columns: | #' The return value will be a data.table with the following columns: | ||||||
| #' | #' | ||||||
| #'  - `gene` Gene ID of the processed gene. | #'  - `gene` Gene ID of the processed gene. | ||||||
| #'  - `clusteriness` Score quantidying the gene's clusters. | #'  - `score` Score quantidying the gene's clusters. | ||||||
| #' | #' | ||||||
| #' @param distances Gene distance data to use. | #' @param distances Gene distance data to use. | ||||||
| #' @param species_ids IDs of species to include in the analysis. | #' @param species_ids IDs of species to include in the analysis. | ||||||
| #' @param gene_ids Genes to include in the computation. | #' @param gene_ids Genes to include in the computation. | ||||||
| process_clustering <- function(distances, species_ids, gene_ids) { | process_clusteriness <- function(distances, species_ids, gene_ids, ...) { | ||||||
|     results <- data.table(gene = gene_ids) |     results <- data.table(gene = gene_ids) | ||||||
| 
 | 
 | ||||||
|     # Prefilter the input data by species. |     # Prefilter the input data by species. | ||||||
|  | @ -61,5 +61,5 @@ process_clustering <- function(distances, species_ids, gene_ids) { | ||||||
|         clusteriness(distances[gene_id, distance]) |         clusteriness(distances[gene_id, distance]) | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     results[, clusteriness := compute(gene), by = 1:nrow(results)] |     results[, score := compute(gene), by = 1:nrow(results)] | ||||||
| } | } | ||||||
|  | @ -6,7 +6,7 @@ library(data.table) | ||||||
| #' The result will be a data.table with the following columns: | #' The result will be a data.table with the following columns: | ||||||
| #' | #' | ||||||
| #'  - `gene` Gene ID of the processed gene. | #'  - `gene` Gene ID of the processed gene. | ||||||
| #'  - `correlation` Mean correlation coefficient. | #'  - `score` Mean correlation coefficient. | ||||||
| #' | #' | ||||||
| #' @param distances Distance data to use. | #' @param distances Distance data to use. | ||||||
| #' @param species_ids Species, whose data should be included. | #' @param species_ids Species, whose data should be included. | ||||||
|  | @ -69,5 +69,5 @@ process_correlation <- function(distances, species_ids, gene_ids, | ||||||
|         score <- correlation_sum / reference_count |         score <- correlation_sum / reference_count | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     results[, correlation := compute(gene), by = 1:nrow(results)] |     results[, score := compute(gene), by = 1:nrow(results)] | ||||||
| } | } | ||||||
							
								
								
									
										137
									
								
								init.R
									
										
									
									
									
								
							
							
						
						
									
										137
									
								
								init.R
									
										
									
									
									
								
							|  | @ -1,7 +1,5 @@ | ||||||
| source("clustering.R") |  | ||||||
| source("correlation.R") |  | ||||||
| source("input.R") | source("input.R") | ||||||
| source("neural.R") | source("methods.R") | ||||||
| source("util.R") | source("util.R") | ||||||
| 
 | 
 | ||||||
| # Load input data | # Load input data | ||||||
|  | @ -16,66 +14,12 @@ distances <- run_cached( | ||||||
|     genes[, id] |     genes[, id] | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| # Load processed data |  | ||||||
| 
 |  | ||||||
| all_species <- species[, id] | all_species <- species[, id] | ||||||
| replicative_species <- species[replicative == TRUE, id] | replicative_species <- species[replicative == TRUE, id] | ||||||
| all_genes <- genes[, id] | all_genes <- genes[, id] | ||||||
| tpe_old_genes <- genes[suggested | verified == TRUE, id] | tpe_old_genes <- genes[suggested | verified == TRUE, id] | ||||||
| 
 | 
 | ||||||
| clustering_all <- run_cached( | # Apply all methods for all species | ||||||
|     "clustering_all", |  | ||||||
|     process_clustering, |  | ||||||
|     distances, |  | ||||||
|     all_species, |  | ||||||
|     all_genes |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| clustering_replicative <- run_cached( |  | ||||||
|     "clustering_replicative", |  | ||||||
|     process_clustering, |  | ||||||
|     distances, |  | ||||||
|     replicative_species, |  | ||||||
|     all_genes |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| correlation_all <- run_cached( |  | ||||||
|     "correlation_all", |  | ||||||
|     process_correlation, |  | ||||||
|     distances, |  | ||||||
|     all_species, |  | ||||||
|     all_genes, |  | ||||||
|     tpe_old_genes |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| correlation_replicative <- run_cached( |  | ||||||
|     "correlation_replicative", |  | ||||||
|     process_correlation, |  | ||||||
|     distances, |  | ||||||
|     replicative_species, |  | ||||||
|     all_genes, |  | ||||||
|     tpe_old_genes |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| neural_all <- run_cached( |  | ||||||
|     "neural_all", |  | ||||||
|     process_neural, |  | ||||||
|     distances, |  | ||||||
|     all_species, |  | ||||||
|     all_genes, |  | ||||||
|     tpe_old_genes |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| neural_replicative <- run_cached( |  | ||||||
|     "neural_replicative", |  | ||||||
|     process_neural, |  | ||||||
|     distances, |  | ||||||
|     replicative_species, |  | ||||||
|     all_genes, |  | ||||||
|     tpe_old_genes |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| # Merge processed data as well as gene information. |  | ||||||
| 
 | 
 | ||||||
| results_all <- merge( | results_all <- merge( | ||||||
|     genes, |     genes, | ||||||
|  | @ -84,26 +28,27 @@ results_all <- merge( | ||||||
|     by.y = "gene" |     by.y = "gene" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| results_all <- merge( | setnames(results_all, "id", "gene") | ||||||
|     results_all, |  | ||||||
|     clustering_all, |  | ||||||
|     by.x = "id", |  | ||||||
|     by.y = "gene" |  | ||||||
| ) |  | ||||||
| 
 | 
 | ||||||
| results_all <- merge( | for (method in methods) { | ||||||
|     results_all, |     method_results <- run_cached( | ||||||
|     correlation_all, |         sprintf("%s_all", method$id), | ||||||
|     by.x = "id", |         method$fn, | ||||||
|     by.y = "gene" |         distances, | ||||||
| ) |         all_species, | ||||||
|  |         all_genes, | ||||||
|  |         tpe_old_genes | ||||||
|  |     ) | ||||||
| 
 | 
 | ||||||
| results_all <- merge( |     setnames(method_results, "score", method$id) | ||||||
|  | 
 | ||||||
|  |     results_all <- merge( | ||||||
|         results_all, |         results_all, | ||||||
|     neural_all, |         method_results, | ||||||
|     by.x = "id", |     ) | ||||||
|     by.y = "gene" | } | ||||||
| ) | 
 | ||||||
|  | # Apply all methods for replicatively aging species | ||||||
| 
 | 
 | ||||||
| results_replicative <- merge( | results_replicative <- merge( | ||||||
|     genes, |     genes, | ||||||
|  | @ -116,28 +61,22 @@ results_replicative <- merge( | ||||||
|     by.y = "gene" |     by.y = "gene" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| results_replicative <- merge( |  | ||||||
|     results_replicative, |  | ||||||
|     clustering_replicative, |  | ||||||
|     by.x = "id", |  | ||||||
|     by.y = "gene" |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| results_replicative <- merge( |  | ||||||
|     results_replicative, |  | ||||||
|     correlation_replicative, |  | ||||||
|     by.x = "id", |  | ||||||
|     by.y = "gene" |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| results_replicative <- merge( |  | ||||||
|     results_replicative, |  | ||||||
|     neural_replicative, |  | ||||||
|     by.x = "id", |  | ||||||
|     by.y = "gene" |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| # Rename `id` columns to `gene`. |  | ||||||
| 
 |  | ||||||
| setnames(results_all, "id", "gene") |  | ||||||
| setnames(results_replicative, "id", "gene") | setnames(results_replicative, "id", "gene") | ||||||
|  | 
 | ||||||
|  | for (method in methods) { | ||||||
|  |     method_results <- run_cached( | ||||||
|  |         sprintf("%s_replicative", method$id), | ||||||
|  |         method$fn, | ||||||
|  |         distances, | ||||||
|  |         replicative_species, | ||||||
|  |         all_genes, | ||||||
|  |         tpe_old_genes | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     setnames(method_results, "score", method$id) | ||||||
|  | 
 | ||||||
|  |     results_replicative <- merge( | ||||||
|  |         results_replicative, | ||||||
|  |         method_results, | ||||||
|  |     ) | ||||||
|  | } | ||||||
							
								
								
									
										56
									
								
								methods.R
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								methods.R
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,56 @@ | ||||||
|  | source("clusteriness.R") | ||||||
|  | source("correlation.R") | ||||||
|  | source("neural.R") | ||||||
|  | 
 | ||||||
|  | #' Construct a new method. | ||||||
|  | #' | ||||||
|  | #' A method describes a way to perform a computation on gene distance data that | ||||||
|  | #' results in a single score per gene. The function should accept the following | ||||||
|  | #' parameters in this order: | ||||||
|  | #' | ||||||
|  | #'  - `distances` Distance data to use. | ||||||
|  | #'  - `species_ids` Species, whose data should be included. | ||||||
|  | #'  - `gene_ids` Genes to process. | ||||||
|  | #'  - `reference_gene_ids` Genes to compare to. | ||||||
|  | #' | ||||||
|  | #' The function should return a `data.table` with the following columns: | ||||||
|  | #' | ||||||
|  | #'  - `gene` Gene ID of the processed gene. | ||||||
|  | #'  - `score` Score for the gene between 0.0 and 1.0. | ||||||
|  | #' | ||||||
|  | #' @param id Internal identifier for the method. | ||||||
|  | #' @param name Human readable name for the method. | ||||||
|  | #' @param description Short human readable description. | ||||||
|  | #' @param fn Function to perform the computation. | ||||||
|  | #' | ||||||
|  | #' @return A named list containing the arguments. | ||||||
|  | method <- function(id, name, description, fn) { | ||||||
|  |     list( | ||||||
|  |         id = id, | ||||||
|  |         name = name, | ||||||
|  |         description = description, | ||||||
|  |         fn = fn | ||||||
|  |     ) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #' All methods to be included in the analysis. | ||||||
|  | methods <- list( | ||||||
|  |     method( | ||||||
|  |         "clusteriness", | ||||||
|  |         "Clustering", | ||||||
|  |         "Clustering of genes", | ||||||
|  |         process_clusteriness | ||||||
|  |     ), | ||||||
|  |     method( | ||||||
|  |         "correlation", | ||||||
|  |         "Correlation", | ||||||
|  |         "Correlation with known genes", | ||||||
|  |         process_correlation | ||||||
|  |     ), | ||||||
|  |     method( | ||||||
|  |         "neural", | ||||||
|  |         "Neural", | ||||||
|  |         "Assessment by neural network", | ||||||
|  |         process_neural | ||||||
|  |     ) | ||||||
|  | ) | ||||||
							
								
								
									
										6
									
								
								neural.R
									
										
									
									
									
								
							
							
						
						
									
										6
									
								
								neural.R
									
										
									
									
									
								
							|  | @ -6,7 +6,7 @@ library(neuralnet) | ||||||
| #' The result will be a data.table with the following columns: | #' The result will be a data.table with the following columns: | ||||||
| #' | #' | ||||||
| #'  - `gene` Gene ID of the processed gene. | #'  - `gene` Gene ID of the processed gene. | ||||||
| #'  - `neural` Output score given by the neural network. | #'  - `score` Output score given by the neural network. | ||||||
| #' | #' | ||||||
| #' @param distances Distance data to use. | #' @param distances Distance data to use. | ||||||
| #' @param species_ids Species, whose data should be included. | #' @param species_ids Species, whose data should be included. | ||||||
|  | @ -105,6 +105,6 @@ process_neural <- function(distances, species_ids, gene_ids, | ||||||
| 
 | 
 | ||||||
|     # Return the resulting scores given by applying the neural network. |     # Return the resulting scores given by applying the neural network. | ||||||
| 
 | 
 | ||||||
|     data[, neural := compute(nn, data)$net.result] |     data[, score := compute(nn, data)$net.result] | ||||||
|     data[, .(gene, neural)] |     data[, .(gene, score)] | ||||||
| } | } | ||||||
							
								
								
									
										55
									
								
								server.R
									
										
									
									
									
								
							
							
						
						
									
										55
									
								
								server.R
									
										
									
									
									
								
							|  | @ -47,16 +47,18 @@ server <- function(input, output) { | ||||||
| 
 | 
 | ||||||
|         # Compute scoring factors and the weighted score. |         # Compute scoring factors and the weighted score. | ||||||
| 
 | 
 | ||||||
|         clusteriness_weight <- input$clusteriness / 100 |         total_weight <- 0.0 | ||||||
|         correlation_weight <- input$correlation / 100 |         results[, score := 0.0] | ||||||
|         neural_weight <- input$neural / 100 |  | ||||||
|         total_weight <- clusteriness_weight + correlation_weight + neural_weight |  | ||||||
|         clusteriness_factor <- clusteriness_weight / total_weight |  | ||||||
|         correlation_factor <- correlation_weight / total_weight |  | ||||||
|         neural_factor <- neural_weight / total_weight |  | ||||||
| 
 | 
 | ||||||
|         results[, score := clusteriness_factor * clusteriness + |         for (method in methods) { | ||||||
|             correlation_factor * correlation + neural_factor * neural] |             weight <- input[[method$id]] | ||||||
|  |             total_weight <- total_weight + weight | ||||||
|  |             column <- method$id | ||||||
|  |             weighted <- weight * results[, ..column] | ||||||
|  |             results[, score := score + weighted] | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         results[, score := score / total_weight] | ||||||
| 
 | 
 | ||||||
|         # Exclude genes with too few species. |         # Exclude genes with too few species. | ||||||
|         results <- results[n_species >= input$n_species] |         results <- results[n_species >= input$n_species] | ||||||
|  | @ -75,33 +77,22 @@ server <- function(input, output) { | ||||||
|         # Apply the cut-off score. |         # Apply the cut-off score. | ||||||
|         results <- results[score >= input$cutoff / 100] |         results <- results[score >= input$cutoff / 100] | ||||||
| 
 | 
 | ||||||
|         # Order the results based on their score. The resulting index will be |         # Order the results based on their score. | ||||||
|         # used as the "rank". |  | ||||||
| 
 | 
 | ||||||
|         setorder(results, -score, na.last = TRUE) |         setorder(results, -score, na.last = TRUE) | ||||||
|  |         results[, rank := .I] | ||||||
|     }) |     }) | ||||||
| 
 | 
 | ||||||
|     output$genes <- renderDT({ |     output$genes <- renderDT({ | ||||||
|  |         method_ids <- sapply(methods, function(method) method$id) | ||||||
|  |         method_names <- sapply(methods, function(method) method$name) | ||||||
|  |         columns <- c("rank", "gene", "name", method_ids, "score") | ||||||
|  |         column_names <- c("", "Gene", "", method_names, "Score") | ||||||
|  | 
 | ||||||
|         dt <- datatable( |         dt <- datatable( | ||||||
|             results()[, .( |             results()[, ..columns], | ||||||
|                 .I, |  | ||||||
|                 gene, |  | ||||||
|                 name, |  | ||||||
|                 clusteriness, |  | ||||||
|                 correlation, |  | ||||||
|                 neural, |  | ||||||
|                 score |  | ||||||
|             )], |  | ||||||
|             rownames = FALSE, |             rownames = FALSE, | ||||||
|             colnames = c( |             colnames = column_names, | ||||||
|                 "", |  | ||||||
|                 "Gene", |  | ||||||
|                 "", |  | ||||||
|                 "Clusters", |  | ||||||
|                 "Correlation", |  | ||||||
|                 "Neural", |  | ||||||
|                 "Score" |  | ||||||
|             ), |  | ||||||
|             style = "bootstrap", |             style = "bootstrap", | ||||||
|             options = list( |             options = list( | ||||||
|                 rowCallback = js_link, |                 rowCallback = js_link, | ||||||
|  | @ -109,11 +100,7 @@ server <- function(input, output) { | ||||||
|             ) |             ) | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|         formatPercentage( |         formatPercentage(dt, c(method_ids, "score"), digits = 1) | ||||||
|             dt, |  | ||||||
|             c("clusteriness", "correlation", "neural", "score"), |  | ||||||
|             digits = 1 |  | ||||||
|         ) |  | ||||||
|     }) |     }) | ||||||
| 
 | 
 | ||||||
|     output$synposis <- renderText({ |     output$synposis <- renderText({ | ||||||
|  |  | ||||||
							
								
								
									
										30
									
								
								ui.R
									
										
									
									
									
								
							
							
						
						
									
										30
									
								
								ui.R
									
										
									
									
									
								
							|  | @ -3,6 +3,8 @@ library(plotly) | ||||||
| library(rclipboard) | library(rclipboard) | ||||||
| library(shiny) | library(shiny) | ||||||
| 
 | 
 | ||||||
|  | source("methods.R") | ||||||
|  | 
 | ||||||
| ui <- fluidPage( | ui <- fluidPage( | ||||||
|     rclipboardSetup(), |     rclipboardSetup(), | ||||||
|     titlePanel("TPE-OLD candidates"), |     titlePanel("TPE-OLD candidates"), | ||||||
|  | @ -22,33 +24,17 @@ ui <- fluidPage( | ||||||
|         ), |         ), | ||||||
|         wellPanel( |         wellPanel( | ||||||
|             h3("Ranking"), |             h3("Ranking"), | ||||||
|  |             lapply(methods, function(method) { | ||||||
|                 sliderInput( |                 sliderInput( | ||||||
|                 "clusteriness", |                     method$id, | ||||||
|                 "Clustering of genes", |                     method$description, | ||||||
|                     post = "%", |                     post = "%", | ||||||
|                     min = 0, |                     min = 0, | ||||||
|                     max = 100, |                     max = 100, | ||||||
|                     step = 1, |                     step = 1, | ||||||
|                 value = 58 |                     value = 100 | ||||||
|             ), |                 ) | ||||||
|             sliderInput( |             }), | ||||||
|                 "correlation", |  | ||||||
|                 "Correlation with known genes", |  | ||||||
|                 post = "%", |  | ||||||
|                 min = 0, |  | ||||||
|                 max = 100, |  | ||||||
|                 step = 1, |  | ||||||
|                 value = 36 |  | ||||||
|             ), |  | ||||||
|             sliderInput( |  | ||||||
|                 "neural", |  | ||||||
|                 "Assessment by neural network", |  | ||||||
|                 post = "%", |  | ||||||
|                 min = 0, |  | ||||||
|                 max = 100, |  | ||||||
|                 step = 1, |  | ||||||
|                 value = 6 |  | ||||||
|             ), |  | ||||||
|             sliderInput( |             sliderInput( | ||||||
|                 "cutoff", |                 "cutoff", | ||||||
|                 "Cut-off score", |                 "Cut-off score", | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue