| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  | #' Score genes based on their proximity to the reference genes. | 
					
						
							|  |  |  | #' | 
					
						
							| 
									
										
										
										
											2022-01-17 20:11:07 +01:00
										 |  |  | #' In this case, the distance data that is available for one gene is first | 
					
						
							|  |  |  | #' combined. The resulting value is compared to the reference genes and | 
					
						
							|  |  |  | #' determines the gene's score in relation to other genes. | 
					
						
							|  |  |  | #' | 
					
						
							| 
									
										
										
										
											2022-06-22 11:20:39 +02:00
										 |  |  | #' @param id Unique ID for the method and its results. | 
					
						
							|  |  |  | #' @param name Human readable name for the method. | 
					
						
							|  |  |  | #' @param description Method description. | 
					
						
							| 
									
										
										
										
											2022-01-17 20:11:07 +01:00
										 |  |  | #' @param distance_estimate A function that will be used to summarize the | 
					
						
							|  |  |  | #'   distance values for each gene. See [densest()] for the default | 
					
						
							|  |  |  | #'   implementation. | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  | #' | 
					
						
							|  |  |  | #' @return An object of class `geposan_method`. | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' @export | 
					
						
							| 
									
										
										
										
											2022-06-22 11:20:39 +02:00
										 |  |  | adjacency <- function(id = "adjacency", | 
					
						
							|  |  |  |                       name = "Adjacency", | 
					
						
							|  |  |  |                       description = "Adjacency to reference genes", | 
					
						
							| 
									
										
										
										
											2022-08-18 19:14:37 +02:00
										 |  |  |                       distance_estimate = densest) { | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |   method( | 
					
						
							| 
									
										
										
										
											2022-06-22 11:20:39 +02:00
										 |  |  |     id = id, | 
					
						
							|  |  |  |     name = name, | 
					
						
							|  |  |  |     description = description, | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |     function(preset, progress) { | 
					
						
							|  |  |  |       species_ids <- preset$species_ids | 
					
						
							|  |  |  |       gene_ids <- preset$gene_ids | 
					
						
							|  |  |  |       reference_gene_ids <- preset$reference_gene_ids | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |       cached( | 
					
						
							| 
									
										
										
										
											2022-08-12 12:41:56 +02:00
										 |  |  |         id, | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |         c( | 
					
						
							|  |  |  |           species_ids, | 
					
						
							|  |  |  |           gene_ids, | 
					
						
							|  |  |  |           reference_gene_ids, | 
					
						
							| 
									
										
										
										
											2022-08-18 19:14:37 +02:00
										 |  |  |           distance_estimate | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |         ), | 
					
						
							|  |  |  |         { # nolint | 
					
						
							|  |  |  |           # Filter distances by species and gene and summarize each | 
					
						
							|  |  |  |           # gene's distance values using the estimation function. | 
					
						
							|  |  |  |           data <- geposan::distances[ | 
					
						
							|  |  |  |             species %chin% species_ids & gene %chin% gene_ids, | 
					
						
							|  |  |  |             .(distance = as.numeric(distance_estimate(distance))), | 
					
						
							|  |  |  |             by = gene | 
					
						
							|  |  |  |           ] | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |           # Compute the absolute value of the difference between the | 
					
						
							|  |  |  |           # estimated distances of each gene to the reference genes. | 
					
						
							| 
									
										
										
										
											2022-08-18 19:14:37 +02:00
										 |  |  |           compute_difference <- function(distance_values, | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |                                          comparison_ids) { | 
					
						
							| 
									
										
										
										
											2022-08-18 19:14:37 +02:00
										 |  |  |             comparison_distance <- data[ | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |               gene %chin% comparison_ids, | 
					
						
							| 
									
										
										
										
											2022-08-18 19:14:37 +02:00
										 |  |  |               distance_estimate(distance) | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |             ] | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-18 19:14:37 +02:00
										 |  |  |             abs(distance_values - comparison_distance) | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |           } | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |           # Compute the differences to the reference genes. | 
					
						
							|  |  |  |           data[ | 
					
						
							|  |  |  |             !gene %chin% reference_gene_ids, | 
					
						
							|  |  |  |             difference := compute_difference( | 
					
						
							|  |  |  |               distance, | 
					
						
							|  |  |  |               reference_gene_ids | 
					
						
							| 
									
										
										
										
											2022-08-18 19:14:37 +02:00
										 |  |  |             ) | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |           ] | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |           progress(0.5) | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |           # Exclude the reference gene itself when computing its | 
					
						
							|  |  |  |           # difference. | 
					
						
							|  |  |  |           data[ | 
					
						
							|  |  |  |             gene %chin% reference_gene_ids, | 
					
						
							|  |  |  |             difference := compute_difference( | 
					
						
							|  |  |  |               distance, | 
					
						
							|  |  |  |               reference_gene_ids[reference_gene_ids != gene] | 
					
						
							| 
									
										
										
										
											2022-08-18 19:14:37 +02:00
										 |  |  |             ) | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |           ] | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |           # Compute the final score by normalizing the difference. | 
					
						
							|  |  |  |           data[, score := 1 - difference / max(difference)] | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |           progress(1.0) | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |           result( | 
					
						
							|  |  |  |             method = "adjacency", | 
					
						
							|  |  |  |             scores = data[, .(gene, score)], | 
					
						
							|  |  |  |             details = list(data = data) | 
					
						
							|  |  |  |           ) | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:19 +02:00
										 |  |  |       ) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   ) | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | } |