| 
									
										
										
										
											2022-01-13 18:35:02 +01:00
										 |  |  | #' Find the densest value in the data. | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' This function assumes that data represents a continuous variable and finds | 
					
						
							|  |  |  | #' a single value with the highest estimated density. This can be used to | 
					
						
							|  |  |  | #' estimate the mode of the data. If there is only one value that value is | 
					
						
							|  |  |  | #' returned. If multiple density maxima with the same density exist, their mean | 
					
						
							|  |  |  | #' is returned. | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' @param data The input data. | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' @return The densest value of data. | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' @export | 
					
						
							|  |  |  | densest <- function(data) { | 
					
						
							|  |  |  |     as.numeric(if (length(data) <= 0) { | 
					
						
							|  |  |  |         NULL | 
					
						
							|  |  |  |     } else if (length(data) == 1) { | 
					
						
							|  |  |  |         data | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         density <- stats::density(data) | 
					
						
							|  |  |  |         mean(density$x[density$y == max(density$y)]) | 
					
						
							|  |  |  |     }) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  | #' Score genes based on their proximity to the reference genes. | 
					
						
							|  |  |  | #' | 
					
						
							| 
									
										
										
										
											2022-01-17 20:11:07 +01:00
										 |  |  | #' In this case, the distance data that is available for one gene is first | 
					
						
							|  |  |  | #' combined. The resulting value is compared to the reference genes and | 
					
						
							|  |  |  | #' determines the gene's score in relation to other genes. | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' @param distance_estimate A function that will be used to summarize the | 
					
						
							|  |  |  | #'   distance values for each gene. See [densest()] for the default | 
					
						
							|  |  |  | #'   implementation. | 
					
						
							|  |  |  | #' @param summarize A function that will be used to combine the different | 
					
						
							| 
									
										
										
										
											2022-02-24 15:36:07 +01:00
										 |  |  | #'   distances to the reference genes. By default [stats::median()] is used. | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  | #' | 
					
						
							|  |  |  | #' @return An object of class `geposan_method`. | 
					
						
							|  |  |  | #' | 
					
						
							| 
									
										
										
										
											2022-01-17 20:11:07 +01:00
										 |  |  | #' @seealso [species_adjacency()] | 
					
						
							|  |  |  | #' | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  | #' @export | 
					
						
							| 
									
										
										
										
											2022-02-24 15:36:07 +01:00
										 |  |  | adjacency <- function(distance_estimate = densest, summarize = stats::median) { | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  |     method( | 
					
						
							|  |  |  |         id = "adjacency", | 
					
						
							|  |  |  |         name = "Adjacency", | 
					
						
							|  |  |  |         description = "Adjacency to reference genes", | 
					
						
							|  |  |  |         function(preset, progress) { | 
					
						
							|  |  |  |             species_ids <- preset$species_ids | 
					
						
							|  |  |  |             gene_ids <- preset$gene_ids | 
					
						
							|  |  |  |             reference_gene_ids <- preset$reference_gene_ids | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  |             cached( | 
					
						
							|  |  |  |                 "adjacency", | 
					
						
							| 
									
										
										
										
											2022-01-09 20:26:42 +01:00
										 |  |  |                 c( | 
					
						
							|  |  |  |                     species_ids, | 
					
						
							|  |  |  |                     gene_ids, | 
					
						
							|  |  |  |                     reference_gene_ids, | 
					
						
							| 
									
										
										
										
											2022-01-17 20:11:07 +01:00
										 |  |  |                     distance_estimate, | 
					
						
							|  |  |  |                     summarize | 
					
						
							| 
									
										
										
										
											2022-01-09 20:26:42 +01:00
										 |  |  |                 ), | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  |                 { # nolint | 
					
						
							|  |  |  |                     # Filter distances by species and gene and summarize each | 
					
						
							|  |  |  |                     # gene's distance values using the estimation function. | 
					
						
							|  |  |  |                     data <- geposan::distances[ | 
					
						
							|  |  |  |                         species %chin% species_ids & gene %chin% gene_ids, | 
					
						
							| 
									
										
										
										
											2022-01-17 20:11:07 +01:00
										 |  |  |                         .(distance = as.numeric(distance_estimate(distance))), | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  |                         by = gene | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  |                     ] | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  |                     # Compute the absolute value of the difference between the | 
					
						
							|  |  |  |                     # estimated distances of each gene to the reference genes. | 
					
						
							| 
									
										
										
										
											2022-01-09 20:26:42 +01:00
										 |  |  |                     compute_difference <- function(distance_value, | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  |                                                    comparison_ids) { | 
					
						
							| 
									
										
										
										
											2022-01-09 20:26:42 +01:00
										 |  |  |                         differences <- data[ | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  |                             gene %chin% comparison_ids, | 
					
						
							| 
									
										
										
										
											2022-01-09 20:26:42 +01:00
										 |  |  |                             .(difference = abs(distance_value - distance)) | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  |                         ] | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-17 20:11:07 +01:00
										 |  |  |                         summarize(differences$difference) | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  |                     } | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  |                     # Compute the differences to the reference genes. | 
					
						
							|  |  |  |                     data[ | 
					
						
							|  |  |  |                         !gene %chin% reference_gene_ids, | 
					
						
							|  |  |  |                         difference := compute_difference( | 
					
						
							|  |  |  |                             distance, | 
					
						
							|  |  |  |                             reference_gene_ids | 
					
						
							| 
									
										
										
										
											2022-01-09 20:26:42 +01:00
										 |  |  |                         ), | 
					
						
							|  |  |  |                         by = gene | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  |                     ] | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  |                     progress(0.5) | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  |                     # Exclude the reference gene itself when computing its | 
					
						
							|  |  |  |                     # difference. | 
					
						
							|  |  |  |                     data[ | 
					
						
							|  |  |  |                         gene %chin% reference_gene_ids, | 
					
						
							|  |  |  |                         difference := compute_difference( | 
					
						
							|  |  |  |                             distance, | 
					
						
							|  |  |  |                             reference_gene_ids[reference_gene_ids != gene] | 
					
						
							|  |  |  |                         ), | 
					
						
							|  |  |  |                         by = gene | 
					
						
							|  |  |  |                     ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                     # Compute the final score by normalizing the difference. | 
					
						
							|  |  |  |                     data[, score := 1 - difference / max(difference)] | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  |                     progress(1.0) | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-09 20:21:27 +01:00
										 |  |  |                     result( | 
					
						
							|  |  |  |                         method = "adjacency", | 
					
						
							|  |  |  |                         scores = data[, .(gene, score)], | 
					
						
							|  |  |  |                         details = list(data = data) | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             ) | 
					
						
							| 
									
										
										
										
											2021-12-16 13:01:44 +01:00
										 |  |  |         } | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2021-11-25 20:55:11 +01:00
										 |  |  | } |