| 
									
										
										
										
											2021-10-19 13:39:55 +02:00
										 |  |  | #' Analyze by applying the specified preset. | 
					
						
							|  |  |  | #' | 
					
						
							| 
									
										
										
										
											2021-11-03 14:17:39 +01:00
										 |  |  | #' @param preset The preset to use which should be created using [preset()]. | 
					
						
							| 
									
										
										
										
											2021-10-19 15:03:10 +02:00
										 |  |  | #' @param progress A function to be called for progress information. The | 
					
						
							|  |  |  | #'   function should accept a number between 0.0 and 1.0 for the current | 
					
						
							|  |  |  | #'   progress. | 
					
						
							| 
									
										
										
										
											2021-10-19 13:39:55 +02:00
										 |  |  | #' | 
					
						
							| 
									
										
										
										
											2021-11-12 10:16:11 +01:00
										 |  |  | #' @returns An object containing the results of the analysis with the following | 
					
						
							|  |  |  | #'   items: | 
					
						
							|  |  |  | #'   \describe{ | 
					
						
							|  |  |  | #'     \item{`preset`}{The preset that was used.} | 
					
						
							|  |  |  | #'     \item{`results`}{A [data.table] with one row for each gene identified by | 
					
						
							|  |  |  | #'       it's ID (`gene` column). The additional columns contain the resulting | 
					
						
							|  |  |  | #'       scores per method and are named after the method IDs.} | 
					
						
							|  |  |  | #'   } | 
					
						
							| 
									
										
										
										
											2021-10-19 13:39:55 +02:00
										 |  |  | #' | 
					
						
							|  |  |  | #' @export | 
					
						
							| 
									
										
										
										
											2021-10-19 15:03:10 +02:00
										 |  |  | analyze <- function(preset, progress = NULL) { | 
					
						
							| 
									
										
										
										
											2021-11-03 14:17:39 +01:00
										 |  |  |     if (class(preset) != "geposan_preset") { | 
					
						
							|  |  |  |         stop("Preset is invalid. Use geposan::preset() to create one.") | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-19 13:39:55 +02:00
										 |  |  |     # Available methods by ID. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # A method describes a way to perform a computation on gene distance data | 
					
						
							|  |  |  |     # that results in a single score per gene. The function should accept the | 
					
						
							| 
									
										
										
										
											2021-10-21 14:26:03 +02:00
										 |  |  |     # preset to apply (see [preset()]) and an optional progress function (that | 
					
						
							|  |  |  |     # may be called with a number between 0.0 and 1.0) as its parameters. | 
					
						
							| 
									
										
										
										
											2021-10-19 13:39:55 +02:00
										 |  |  |     # | 
					
						
							|  |  |  |     # The function should return a [data.table] with the following columns: | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     #  - `gene` Gene ID of the processed gene. | 
					
						
							|  |  |  |     #  - `score` Score for the gene between 0.0 and 1.0. | 
					
						
							|  |  |  |     methods <- list( | 
					
						
							|  |  |  |         "clusteriness" = clusteriness, | 
					
						
							| 
									
										
										
										
											2021-11-05 19:49:54 +01:00
										 |  |  |         "clusteriness_positions" = function(...) { | 
					
						
							|  |  |  |             clusteriness(..., use_positions = TRUE) | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2021-11-14 17:21:39 +01:00
										 |  |  |         "correlation" = correlation, | 
					
						
							| 
									
										
										
										
											2021-11-05 19:49:54 +01:00
										 |  |  |         "correlation_positions" = function(...) { | 
					
						
							|  |  |  |             correlation(..., use_positions = TRUE) | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2021-11-16 16:23:03 +01:00
										 |  |  |         "neural" = neural, | 
					
						
							|  |  |  |         "neural_positions" = function(...) { | 
					
						
							|  |  |  |             neural(..., use_positions = TRUE) | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         "proximity" = proximity | 
					
						
							| 
									
										
										
										
											2021-10-19 13:39:55 +02:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-12 10:16:11 +01:00
										 |  |  |     results <- cached("analysis", preset, { | 
					
						
							| 
									
										
										
										
											2021-10-21 17:25:44 +02:00
										 |  |  |         total_progress <- 0.0 | 
					
						
							| 
									
										
										
										
											2021-11-05 14:33:39 +01:00
										 |  |  |         method_count <- length(preset$methods) | 
					
						
							| 
									
										
										
										
											2021-11-12 10:16:11 +01:00
										 |  |  |         results <- data.table(gene = preset$gene_ids) | 
					
						
							| 
									
										
										
										
											2021-10-19 13:39:55 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-03 14:17:39 +01:00
										 |  |  |         for (method_id in preset$methods) { | 
					
						
							|  |  |  |             method_progress <- if (!is.null(progress)) { | 
					
						
							|  |  |  |                 function(p) { | 
					
						
							|  |  |  |                     progress(total_progress + p / method_count) | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2021-10-21 17:25:44 +02:00
										 |  |  |             } | 
					
						
							| 
									
										
										
										
											2021-10-19 15:03:10 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-05 19:49:54 +01:00
										 |  |  |             method_results <- methods[[method_id]]( | 
					
						
							|  |  |  |                 preset, | 
					
						
							|  |  |  |                 progress = method_progress | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-21 17:25:44 +02:00
										 |  |  |             setnames(method_results, "score", method_id) | 
					
						
							| 
									
										
										
										
											2021-10-19 13:39:55 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-12 18:22:22 +01:00
										 |  |  |             results <- merge( | 
					
						
							| 
									
										
										
										
											2021-11-12 10:16:11 +01:00
										 |  |  |                 results, | 
					
						
							| 
									
										
										
										
											2021-10-21 17:25:44 +02:00
										 |  |  |                 method_results, | 
					
						
							|  |  |  |                 by = "gene" | 
					
						
							|  |  |  |             ) | 
					
						
							| 
									
										
										
										
											2021-10-19 15:03:10 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-21 17:25:44 +02:00
										 |  |  |             total_progress <- total_progress + 1 / method_count | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-10-19 15:03:10 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-17 22:57:31 +01:00
										 |  |  |         # Count included species from the preset per gene. | 
					
						
							|  |  |  |         genes_n_species <- geposan::distances[ | 
					
						
							|  |  |  |             species %chin% preset$species_ids, | 
					
						
							|  |  |  |             .(n_species = .N), | 
					
						
							|  |  |  |             by = "gene" | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         setkey(genes_n_species, "gene") | 
					
						
							| 
									
										
										
										
											2021-10-19 13:39:55 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-17 22:57:31 +01:00
										 |  |  |         # Return the results for genes with enough species. | 
					
						
							|  |  |  |         results[genes_n_species[gene, n_species] >= preset$min_n_species] | 
					
						
							| 
									
										
										
										
											2021-10-21 17:25:44 +02:00
										 |  |  |     }) | 
					
						
							| 
									
										
										
										
											2021-11-05 14:33:39 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-17 22:57:31 +01:00
										 |  |  |     if (!is.null(progress)) { | 
					
						
							|  |  |  |         progress(1.0) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-05 14:33:39 +01:00
										 |  |  |     structure( | 
					
						
							| 
									
										
										
										
											2021-11-12 10:16:11 +01:00
										 |  |  |         list( | 
					
						
							|  |  |  |             preset = preset, | 
					
						
							|  |  |  |             results = results | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         class = "geposan_analysis" | 
					
						
							| 
									
										
										
										
											2021-11-05 14:33:39 +01:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2021-10-19 13:39:55 +02:00
										 |  |  | } |