mirror of
				https://github.com/johrpan/geposanui.git
				synced 2025-10-26 11:17:24 +01:00 
			
		
		
		
	Generalize method definitions
This commit is contained in:
		
							parent
							
								
									d3edeefbe2
								
							
						
					
					
						commit
						9b0b3c13f5
					
				
					 7 changed files with 137 additions and 169 deletions
				
			
		|  | @ -42,12 +42,12 @@ clusteriness <- function(data, height = 1000000) { | |||
| #' The return value will be a data.table with the following columns: | ||||
| #' | ||||
| #'  - `gene` Gene ID of the processed gene. | ||||
| #'  - `clusteriness` Score quantidying the gene's clusters. | ||||
| #'  - `score` Score quantidying the gene's clusters. | ||||
| #' | ||||
| #' @param distances Gene distance data to use. | ||||
| #' @param species_ids IDs of species to include in the analysis. | ||||
| #' @param gene_ids Genes to include in the computation. | ||||
| process_clustering <- function(distances, species_ids, gene_ids) { | ||||
| process_clusteriness <- function(distances, species_ids, gene_ids, ...) { | ||||
|     results <- data.table(gene = gene_ids) | ||||
| 
 | ||||
|     # Prefilter the input data by species. | ||||
|  | @ -61,5 +61,5 @@ process_clustering <- function(distances, species_ids, gene_ids) { | |||
|         clusteriness(distances[gene_id, distance]) | ||||
|     } | ||||
| 
 | ||||
|     results[, clusteriness := compute(gene), by = 1:nrow(results)] | ||||
|     results[, score := compute(gene), by = 1:nrow(results)] | ||||
| } | ||||
|  | @ -6,7 +6,7 @@ library(data.table) | |||
| #' The result will be a data.table with the following columns: | ||||
| #' | ||||
| #'  - `gene` Gene ID of the processed gene. | ||||
| #'  - `correlation` Mean correlation coefficient. | ||||
| #'  - `score` Mean correlation coefficient. | ||||
| #' | ||||
| #' @param distances Distance data to use. | ||||
| #' @param species_ids Species, whose data should be included. | ||||
|  | @ -69,5 +69,5 @@ process_correlation <- function(distances, species_ids, gene_ids, | |||
|         score <- correlation_sum / reference_count | ||||
|     } | ||||
| 
 | ||||
|     results[, correlation := compute(gene), by = 1:nrow(results)] | ||||
|     results[, score := compute(gene), by = 1:nrow(results)] | ||||
| } | ||||
							
								
								
									
										137
									
								
								init.R
									
										
									
									
									
								
							
							
						
						
									
										137
									
								
								init.R
									
										
									
									
									
								
							|  | @ -1,7 +1,5 @@ | |||
| source("clustering.R") | ||||
| source("correlation.R") | ||||
| source("input.R") | ||||
| source("neural.R") | ||||
| source("methods.R") | ||||
| source("util.R") | ||||
| 
 | ||||
| # Load input data | ||||
|  | @ -16,66 +14,12 @@ distances <- run_cached( | |||
|     genes[, id] | ||||
| ) | ||||
| 
 | ||||
| # Load processed data | ||||
| 
 | ||||
| all_species <- species[, id] | ||||
| replicative_species <- species[replicative == TRUE, id] | ||||
| all_genes <- genes[, id] | ||||
| tpe_old_genes <- genes[suggested | verified == TRUE, id] | ||||
| 
 | ||||
| clustering_all <- run_cached( | ||||
|     "clustering_all", | ||||
|     process_clustering, | ||||
|     distances, | ||||
|     all_species, | ||||
|     all_genes | ||||
| ) | ||||
| 
 | ||||
| clustering_replicative <- run_cached( | ||||
|     "clustering_replicative", | ||||
|     process_clustering, | ||||
|     distances, | ||||
|     replicative_species, | ||||
|     all_genes | ||||
| ) | ||||
| 
 | ||||
| correlation_all <- run_cached( | ||||
|     "correlation_all", | ||||
|     process_correlation, | ||||
|     distances, | ||||
|     all_species, | ||||
|     all_genes, | ||||
|     tpe_old_genes | ||||
| ) | ||||
| 
 | ||||
| correlation_replicative <- run_cached( | ||||
|     "correlation_replicative", | ||||
|     process_correlation, | ||||
|     distances, | ||||
|     replicative_species, | ||||
|     all_genes, | ||||
|     tpe_old_genes | ||||
| ) | ||||
| 
 | ||||
| neural_all <- run_cached( | ||||
|     "neural_all", | ||||
|     process_neural, | ||||
|     distances, | ||||
|     all_species, | ||||
|     all_genes, | ||||
|     tpe_old_genes | ||||
| ) | ||||
| 
 | ||||
| neural_replicative <- run_cached( | ||||
|     "neural_replicative", | ||||
|     process_neural, | ||||
|     distances, | ||||
|     replicative_species, | ||||
|     all_genes, | ||||
|     tpe_old_genes | ||||
| ) | ||||
| 
 | ||||
| # Merge processed data as well as gene information. | ||||
| # Apply all methods for all species | ||||
| 
 | ||||
| results_all <- merge( | ||||
|     genes, | ||||
|  | @ -84,26 +28,27 @@ results_all <- merge( | |||
|     by.y = "gene" | ||||
| ) | ||||
| 
 | ||||
| results_all <- merge( | ||||
|     results_all, | ||||
|     clustering_all, | ||||
|     by.x = "id", | ||||
|     by.y = "gene" | ||||
| ) | ||||
| setnames(results_all, "id", "gene") | ||||
| 
 | ||||
| results_all <- merge( | ||||
|     results_all, | ||||
|     correlation_all, | ||||
|     by.x = "id", | ||||
|     by.y = "gene" | ||||
| ) | ||||
| for (method in methods) { | ||||
|     method_results <- run_cached( | ||||
|         sprintf("%s_all", method$id), | ||||
|         method$fn, | ||||
|         distances, | ||||
|         all_species, | ||||
|         all_genes, | ||||
|         tpe_old_genes | ||||
|     ) | ||||
| 
 | ||||
| results_all <- merge( | ||||
|     setnames(method_results, "score", method$id) | ||||
| 
 | ||||
|     results_all <- merge( | ||||
|         results_all, | ||||
|     neural_all, | ||||
|     by.x = "id", | ||||
|     by.y = "gene" | ||||
| ) | ||||
|         method_results, | ||||
|     ) | ||||
| } | ||||
| 
 | ||||
| # Apply all methods for replicatively aging species | ||||
| 
 | ||||
| results_replicative <- merge( | ||||
|     genes, | ||||
|  | @ -116,28 +61,22 @@ results_replicative <- merge( | |||
|     by.y = "gene" | ||||
| ) | ||||
| 
 | ||||
| results_replicative <- merge( | ||||
|     results_replicative, | ||||
|     clustering_replicative, | ||||
|     by.x = "id", | ||||
|     by.y = "gene" | ||||
| ) | ||||
| 
 | ||||
| results_replicative <- merge( | ||||
|     results_replicative, | ||||
|     correlation_replicative, | ||||
|     by.x = "id", | ||||
|     by.y = "gene" | ||||
| ) | ||||
| 
 | ||||
| results_replicative <- merge( | ||||
|     results_replicative, | ||||
|     neural_replicative, | ||||
|     by.x = "id", | ||||
|     by.y = "gene" | ||||
| ) | ||||
| 
 | ||||
| # Rename `id` columns to `gene`. | ||||
| 
 | ||||
| setnames(results_all, "id", "gene") | ||||
| setnames(results_replicative, "id", "gene") | ||||
| 
 | ||||
| for (method in methods) { | ||||
|     method_results <- run_cached( | ||||
|         sprintf("%s_replicative", method$id), | ||||
|         method$fn, | ||||
|         distances, | ||||
|         replicative_species, | ||||
|         all_genes, | ||||
|         tpe_old_genes | ||||
|     ) | ||||
| 
 | ||||
|     setnames(method_results, "score", method$id) | ||||
| 
 | ||||
|     results_replicative <- merge( | ||||
|         results_replicative, | ||||
|         method_results, | ||||
|     ) | ||||
| } | ||||
							
								
								
									
										56
									
								
								methods.R
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								methods.R
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,56 @@ | |||
| source("clusteriness.R") | ||||
| source("correlation.R") | ||||
| source("neural.R") | ||||
| 
 | ||||
| #' Construct a new method. | ||||
| #' | ||||
| #' A method describes a way to perform a computation on gene distance data that | ||||
| #' results in a single score per gene. The function should accept the following | ||||
| #' parameters in this order: | ||||
| #' | ||||
| #'  - `distances` Distance data to use. | ||||
| #'  - `species_ids` Species, whose data should be included. | ||||
| #'  - `gene_ids` Genes to process. | ||||
| #'  - `reference_gene_ids` Genes to compare to. | ||||
| #' | ||||
| #' The function should return a `data.table` with the following columns: | ||||
| #' | ||||
| #'  - `gene` Gene ID of the processed gene. | ||||
| #'  - `score` Score for the gene between 0.0 and 1.0. | ||||
| #' | ||||
| #' @param id Internal identifier for the method. | ||||
| #' @param name Human readable name for the method. | ||||
| #' @param description Short human readable description. | ||||
| #' @param fn Function to perform the computation. | ||||
| #' | ||||
| #' @return A named list containing the arguments. | ||||
| method <- function(id, name, description, fn) { | ||||
|     list( | ||||
|         id = id, | ||||
|         name = name, | ||||
|         description = description, | ||||
|         fn = fn | ||||
|     ) | ||||
| } | ||||
| 
 | ||||
| #' All methods to be included in the analysis. | ||||
| methods <- list( | ||||
|     method( | ||||
|         "clusteriness", | ||||
|         "Clustering", | ||||
|         "Clustering of genes", | ||||
|         process_clusteriness | ||||
|     ), | ||||
|     method( | ||||
|         "correlation", | ||||
|         "Correlation", | ||||
|         "Correlation with known genes", | ||||
|         process_correlation | ||||
|     ), | ||||
|     method( | ||||
|         "neural", | ||||
|         "Neural", | ||||
|         "Assessment by neural network", | ||||
|         process_neural | ||||
|     ) | ||||
| ) | ||||
							
								
								
									
										6
									
								
								neural.R
									
										
									
									
									
								
							
							
						
						
									
										6
									
								
								neural.R
									
										
									
									
									
								
							|  | @ -6,7 +6,7 @@ library(neuralnet) | |||
| #' The result will be a data.table with the following columns: | ||||
| #' | ||||
| #'  - `gene` Gene ID of the processed gene. | ||||
| #'  - `neural` Output score given by the neural network. | ||||
| #'  - `score` Output score given by the neural network. | ||||
| #' | ||||
| #' @param distances Distance data to use. | ||||
| #' @param species_ids Species, whose data should be included. | ||||
|  | @ -105,6 +105,6 @@ process_neural <- function(distances, species_ids, gene_ids, | |||
| 
 | ||||
|     # Return the resulting scores given by applying the neural network. | ||||
| 
 | ||||
|     data[, neural := compute(nn, data)$net.result] | ||||
|     data[, .(gene, neural)] | ||||
|     data[, score := compute(nn, data)$net.result] | ||||
|     data[, .(gene, score)] | ||||
| } | ||||
							
								
								
									
										55
									
								
								server.R
									
										
									
									
									
								
							
							
						
						
									
										55
									
								
								server.R
									
										
									
									
									
								
							|  | @ -47,16 +47,18 @@ server <- function(input, output) { | |||
| 
 | ||||
|         # Compute scoring factors and the weighted score. | ||||
| 
 | ||||
|         clusteriness_weight <- input$clusteriness / 100 | ||||
|         correlation_weight <- input$correlation / 100 | ||||
|         neural_weight <- input$neural / 100 | ||||
|         total_weight <- clusteriness_weight + correlation_weight + neural_weight | ||||
|         clusteriness_factor <- clusteriness_weight / total_weight | ||||
|         correlation_factor <- correlation_weight / total_weight | ||||
|         neural_factor <- neural_weight / total_weight | ||||
|         total_weight <- 0.0 | ||||
|         results[, score := 0.0] | ||||
| 
 | ||||
|         results[, score := clusteriness_factor * clusteriness + | ||||
|             correlation_factor * correlation + neural_factor * neural] | ||||
|         for (method in methods) { | ||||
|             weight <- input[[method$id]] | ||||
|             total_weight <- total_weight + weight | ||||
|             column <- method$id | ||||
|             weighted <- weight * results[, ..column] | ||||
|             results[, score := score + weighted] | ||||
|         } | ||||
| 
 | ||||
|         results[, score := score / total_weight] | ||||
| 
 | ||||
|         # Exclude genes with too few species. | ||||
|         results <- results[n_species >= input$n_species] | ||||
|  | @ -75,33 +77,22 @@ server <- function(input, output) { | |||
|         # Apply the cut-off score. | ||||
|         results <- results[score >= input$cutoff / 100] | ||||
| 
 | ||||
|         # Order the results based on their score. The resulting index will be | ||||
|         # used as the "rank". | ||||
|         # Order the results based on their score. | ||||
| 
 | ||||
|         setorder(results, -score, na.last = TRUE) | ||||
|         results[, rank := .I] | ||||
|     }) | ||||
| 
 | ||||
|     output$genes <- renderDT({ | ||||
|         method_ids <- sapply(methods, function(method) method$id) | ||||
|         method_names <- sapply(methods, function(method) method$name) | ||||
|         columns <- c("rank", "gene", "name", method_ids, "score") | ||||
|         column_names <- c("", "Gene", "", method_names, "Score") | ||||
| 
 | ||||
|         dt <- datatable( | ||||
|             results()[, .( | ||||
|                 .I, | ||||
|                 gene, | ||||
|                 name, | ||||
|                 clusteriness, | ||||
|                 correlation, | ||||
|                 neural, | ||||
|                 score | ||||
|             )], | ||||
|             results()[, ..columns], | ||||
|             rownames = FALSE, | ||||
|             colnames = c( | ||||
|                 "", | ||||
|                 "Gene", | ||||
|                 "", | ||||
|                 "Clusters", | ||||
|                 "Correlation", | ||||
|                 "Neural", | ||||
|                 "Score" | ||||
|             ), | ||||
|             colnames = column_names, | ||||
|             style = "bootstrap", | ||||
|             options = list( | ||||
|                 rowCallback = js_link, | ||||
|  | @ -109,11 +100,7 @@ server <- function(input, output) { | |||
|             ) | ||||
|         ) | ||||
| 
 | ||||
|         formatPercentage( | ||||
|             dt, | ||||
|             c("clusteriness", "correlation", "neural", "score"), | ||||
|             digits = 1 | ||||
|         ) | ||||
|         formatPercentage(dt, c(method_ids, "score"), digits = 1) | ||||
|     }) | ||||
| 
 | ||||
|     output$synposis <- renderText({ | ||||
|  |  | |||
							
								
								
									
										30
									
								
								ui.R
									
										
									
									
									
								
							
							
						
						
									
										30
									
								
								ui.R
									
										
									
									
									
								
							|  | @ -3,6 +3,8 @@ library(plotly) | |||
| library(rclipboard) | ||||
| library(shiny) | ||||
| 
 | ||||
| source("methods.R") | ||||
| 
 | ||||
| ui <- fluidPage( | ||||
|     rclipboardSetup(), | ||||
|     titlePanel("TPE-OLD candidates"), | ||||
|  | @ -22,33 +24,17 @@ ui <- fluidPage( | |||
|         ), | ||||
|         wellPanel( | ||||
|             h3("Ranking"), | ||||
|             lapply(methods, function(method) { | ||||
|                 sliderInput( | ||||
|                 "clusteriness", | ||||
|                 "Clustering of genes", | ||||
|                     method$id, | ||||
|                     method$description, | ||||
|                     post = "%", | ||||
|                     min = 0, | ||||
|                     max = 100, | ||||
|                     step = 1, | ||||
|                 value = 58 | ||||
|             ), | ||||
|             sliderInput( | ||||
|                 "correlation", | ||||
|                 "Correlation with known genes", | ||||
|                 post = "%", | ||||
|                 min = 0, | ||||
|                 max = 100, | ||||
|                 step = 1, | ||||
|                 value = 36 | ||||
|             ), | ||||
|             sliderInput( | ||||
|                 "neural", | ||||
|                 "Assessment by neural network", | ||||
|                 post = "%", | ||||
|                 min = 0, | ||||
|                 max = 100, | ||||
|                 step = 1, | ||||
|                 value = 6 | ||||
|             ), | ||||
|                     value = 100 | ||||
|                 ) | ||||
|             }), | ||||
|             sliderInput( | ||||
|                 "cutoff", | ||||
|                 "Cut-off score", | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue