| 
									
										
										
										
											2021-11-03 14:17:39 +01:00
										 |  |  | #' Create a new preset. | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' A preset is used to specify which methods and inputs should be used for an | 
					
						
							|  |  |  | #' analysis. Note that the genes to process should normally include the | 
					
						
							| 
									
										
										
										
											2021-11-18 12:30:19 +01:00
										 |  |  | #' reference genes to be able to assess the results later. The genes will be | 
					
						
							|  |  |  | #' filtered based on how many species have data for them. Genes which only have | 
					
						
							|  |  |  | #' orthologs for less than 25% of the input species will be excluded from the | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  | #' preset and the analyis. See the different method functions for the available | 
					
						
							|  |  |  | #' methods: [clustering()], [correlation()], [neural()], [adjacency()] and | 
					
						
							|  |  |  | #' [proximity()]. | 
					
						
							| 
									
										
										
										
											2021-11-03 14:17:39 +01:00
										 |  |  | #' | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  | #' @param methods List of methods to apply. | 
					
						
							| 
									
										
										
										
											2021-11-03 14:17:39 +01:00
										 |  |  | #' @param species_ids IDs of species to include. | 
					
						
							|  |  |  | #' @param gene_ids IDs of genes to screen. | 
					
						
							|  |  |  | #' @param reference_gene_ids IDs of reference genes to compare to. | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' @return The preset to use with [analyze()]. | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' @export | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  | preset <- function(methods = all_methods(), | 
					
						
							|  |  |  |                    species_ids = geposan::species$id, | 
					
						
							|  |  |  |                    gene_ids = geposan::genes$id, | 
					
						
							|  |  |  |                    reference_gene_ids) { | 
					
						
							| 
									
										
										
										
											2021-11-18 12:30:19 +01:00
										 |  |  |     # Count included species per gene. | 
					
						
							|  |  |  |     genes_n_species <- geposan::distances[ | 
					
						
							|  |  |  |         species %chin% species_ids, | 
					
						
							|  |  |  |         .(n_species = .N), | 
					
						
							|  |  |  |         by = "gene" | 
					
						
							|  |  |  |     ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Filter out genes with less than 25% existing orthologs. | 
					
						
							|  |  |  |     gene_ids_filtered <- genes_n_species[ | 
					
						
							| 
									
										
										
										
											2021-11-26 11:41:49 +01:00
										 |  |  |         gene %chin% gene_ids & | 
					
						
							|  |  |  |             n_species >= 0.25 * length(species_ids), | 
					
						
							| 
									
										
										
										
											2021-11-18 12:30:19 +01:00
										 |  |  |         gene | 
					
						
							|  |  |  |     ] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-03 14:17:39 +01:00
										 |  |  |     # The included data gets sorted to be able to produce predictable hashes | 
					
						
							|  |  |  |     # for the object later. | 
					
						
							|  |  |  |     structure( | 
					
						
							|  |  |  |         list( | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  |             methods = methods, | 
					
						
							| 
									
										
										
										
											2021-11-03 14:17:39 +01:00
										 |  |  |             species_ids = sort(species_ids), | 
					
						
							| 
									
										
										
										
											2021-11-18 12:30:19 +01:00
										 |  |  |             gene_ids = sort(gene_ids_filtered), | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  |             reference_gene_ids = sort(reference_gene_ids) | 
					
						
							| 
									
										
										
										
											2021-11-03 14:17:39 +01:00
										 |  |  |         ), | 
					
						
							|  |  |  |         class = "geposan_preset" | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #' S3 method to print a preset object. | 
					
						
							|  |  |  | #' | 
					
						
							| 
									
										
										
										
											2021-11-06 13:22:57 +01:00
										 |  |  | #' @param x The preset to print. | 
					
						
							|  |  |  | #' @param ... Other parameters. | 
					
						
							|  |  |  | #' | 
					
						
							| 
									
										
										
										
											2021-11-03 14:17:39 +01:00
										 |  |  | #' @seealso [preset()] | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' @export | 
					
						
							| 
									
										
										
										
											2021-11-06 13:22:57 +01:00
										 |  |  | print.geposan_preset <- function(x, ...) { | 
					
						
							| 
									
										
										
										
											2021-11-03 14:17:39 +01:00
										 |  |  |     cat(sprintf( | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  |         paste0( | 
					
						
							|  |  |  |             "geposan preset:", | 
					
						
							|  |  |  |             "\n  Included methods: %s", | 
					
						
							|  |  |  |             "\n  Number of species: %i", | 
					
						
							|  |  |  |             "\n  Number of genes: %i", | 
					
						
							|  |  |  |             "\n  Reference genes: %i", | 
					
						
							|  |  |  |             "\n" | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         paste(sapply(x$methods, function(m) m$id), collapse = ", "), | 
					
						
							| 
									
										
										
										
											2021-11-06 13:22:57 +01:00
										 |  |  |         length(x$species_ids), | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  |         length(x$gene_ids), | 
					
						
							| 
									
										
										
										
											2021-11-06 13:22:57 +01:00
										 |  |  |         length(x$reference_gene_ids) | 
					
						
							| 
									
										
										
										
											2021-11-03 14:17:39 +01:00
										 |  |  |     )) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-06 13:22:57 +01:00
										 |  |  |     invisible(x) | 
					
						
							| 
									
										
										
										
											2021-11-03 14:17:39 +01:00
										 |  |  | } |