mirror of
				https://github.com/johrpan/geposan.git
				synced 2025-10-26 10:47:25 +01:00 
			
		
		
		
	Remove species adjacency method
This commit is contained in:
		
							parent
							
								
									ab545a415c
								
							
						
					
					
						commit
						0ae6836d86
					
				
					 6 changed files with 0 additions and 200 deletions
				
			
		|  | @ -27,6 +27,5 @@ export(preset) | ||||||
| export(proximity) | export(proximity) | ||||||
| export(ranking) | export(ranking) | ||||||
| export(result) | export(result) | ||||||
| export(species_adjacency) |  | ||||||
| export(validate) | export(validate) | ||||||
| import(data.table) | import(data.table) | ||||||
|  |  | ||||||
|  | @ -37,7 +37,6 @@ all_methods <- function() { | ||||||
|     correlation(), |     correlation(), | ||||||
|     neural(), |     neural(), | ||||||
|     adjacency(), |     adjacency(), | ||||||
|     species_adjacency(), |  | ||||||
|     proximity() |     proximity() | ||||||
|   ) |   ) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -39,8 +39,6 @@ densest <- function(data) { | ||||||
| #' | #' | ||||||
| #' @return An object of class `geposan_method`. | #' @return An object of class `geposan_method`. | ||||||
| #' | #' | ||||||
| #' @seealso [species_adjacency()] |  | ||||||
| #' |  | ||||||
| #' @export | #' @export | ||||||
| adjacency <- function(id = "adjacency", | adjacency <- function(id = "adjacency", | ||||||
|                       name = "Adjacency", |                       name = "Adjacency", | ||||||
|  |  | ||||||
|  | @ -1,156 +0,0 @@ | ||||||
| #' Score genes based on their adjacency to the reference genes within species. |  | ||||||
| #' |  | ||||||
| #' For each gene and species, the method will first combine the gene's distances |  | ||||||
| #' to the reference genes within that species. Afterwards, the results are |  | ||||||
| #' summarized across species and determine the gene's score. |  | ||||||
| #' |  | ||||||
| #' @param id Unique ID for the method and its results. |  | ||||||
| #' @param name Human readable name for the method. |  | ||||||
| #' @param description Method description. |  | ||||||
| #' @param distance_estimate Function for combining the distance differences |  | ||||||
| #'   within one species. |  | ||||||
| #' @param summarize Function for summarizing the distance values across species. |  | ||||||
| #' |  | ||||||
| #' @return An object of class `geposan_method`. |  | ||||||
| #' |  | ||||||
| #' @seealso [adjacency()] |  | ||||||
| #' |  | ||||||
| #' @export |  | ||||||
| species_adjacency <- function(id = "species_adjacency", |  | ||||||
|                               name = "Species adj.", |  | ||||||
|                               description = "Species adjacency", |  | ||||||
|                               distance_estimate = stats::median, |  | ||||||
|                               summarize = stats::median) { |  | ||||||
|   method( |  | ||||||
|     id = id, |  | ||||||
|     name = name, |  | ||||||
|     description = description, |  | ||||||
|     function(preset, progress) { |  | ||||||
|       species_ids <- preset$species_ids |  | ||||||
|       gene_ids <- preset$gene_ids |  | ||||||
|       reference_gene_ids <- preset$reference_gene_ids |  | ||||||
| 
 |  | ||||||
|       cached( |  | ||||||
|         "species_adjacency", |  | ||||||
|         c( |  | ||||||
|           species_ids, |  | ||||||
|           gene_ids, |  | ||||||
|           reference_gene_ids, |  | ||||||
|           distance_estimate, |  | ||||||
|           summarize |  | ||||||
|         ), |  | ||||||
|         { # nolint |  | ||||||
|           # Prefilter distances. |  | ||||||
|           data <- geposan::distances[ |  | ||||||
|             species %chin% species_ids & gene %chin% gene_ids |  | ||||||
|           ] |  | ||||||
| 
 |  | ||||||
|           progress_state <- 0.0 |  | ||||||
|           progress_step <- 0.9 / length(species_ids) |  | ||||||
| 
 |  | ||||||
|           # Iterate through all species and find the distance |  | ||||||
|           # estimates within that species. |  | ||||||
|           for (species_id in species_ids) { |  | ||||||
|             # For all genes, compute the distance to one reference |  | ||||||
|             # gene at a time in one go. |  | ||||||
|             for (reference_gene_id in reference_gene_ids) { |  | ||||||
|               comparison_distance <- data[ |  | ||||||
|                 species == species_id & |  | ||||||
|                   gene == reference_gene_id, |  | ||||||
|                 distance |  | ||||||
|               ] |  | ||||||
| 
 |  | ||||||
|               column <- quote(reference_gene_id) |  | ||||||
| 
 |  | ||||||
|               if (length(comparison_distance) != 1) { |  | ||||||
|                 # If we don't have a comparison distance, we |  | ||||||
|                 # can't compute a difference. This happens, if |  | ||||||
|                 # the species doesn't have the reference gene. |  | ||||||
|                 data[ |  | ||||||
|                   species == species_id & |  | ||||||
|                     gene %chin% gene_ids, |  | ||||||
|                   eval(column) := NA_integer_ |  | ||||||
|                 ] |  | ||||||
|               } else { |  | ||||||
|                 data[ |  | ||||||
|                   species == species_id & |  | ||||||
|                     gene %chin% gene_ids, |  | ||||||
|                   eval(column) := |  | ||||||
|                     abs(distance - comparison_distance) |  | ||||||
|                 ] |  | ||||||
|               } |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             # Combine the distances to the different reference genes |  | ||||||
|             # into one value using the provided function. |  | ||||||
|             data[ |  | ||||||
|               species == species_id & |  | ||||||
|                 gene %chin% gene_ids, |  | ||||||
|               combined_distance := as.numeric( |  | ||||||
|                 distance_estimate(stats::na.omit( |  | ||||||
|                   # Convert the data.table subset into a |  | ||||||
|                   # vector to get the correct na.omit |  | ||||||
|                   # behavior. |  | ||||||
|                   as.matrix(.SD)[1, ] |  | ||||||
|                 )) |  | ||||||
|               ), |  | ||||||
|               .SDcols = reference_gene_ids, |  | ||||||
|               by = gene |  | ||||||
|             ] |  | ||||||
| 
 |  | ||||||
|             progress_state <- progress_state + progress_step |  | ||||||
|             progress(progress_state) |  | ||||||
|           } |  | ||||||
| 
 |  | ||||||
|           progress(0.9) |  | ||||||
| 
 |  | ||||||
|           # Remove the distances between the reference genes. |  | ||||||
|           for (reference_gene_id in reference_gene_ids) { |  | ||||||
|             column <- quote(reference_gene_id) |  | ||||||
|             data[gene == reference_gene_id, eval(column) := NA] |  | ||||||
|           } |  | ||||||
| 
 |  | ||||||
|           # Recompute the combined distance for the reference genes. |  | ||||||
|           data[ |  | ||||||
|             gene %chin% reference_gene_ids, |  | ||||||
|             combined_distance := as.numeric( |  | ||||||
|               distance_estimate(stats::na.omit( |  | ||||||
|                 as.matrix(.SD)[1, ] |  | ||||||
|               )) |  | ||||||
|             ), |  | ||||||
|             .SDcols = reference_gene_ids, |  | ||||||
|             by = list(species, gene) |  | ||||||
|           ] |  | ||||||
| 
 |  | ||||||
|           # Combine the distances into one value. |  | ||||||
|           results <- data[, |  | ||||||
|             .( |  | ||||||
|               summarized_distances = as.numeric( |  | ||||||
|                 summarize(stats::na.omit(combined_distance)) |  | ||||||
|               ) |  | ||||||
|             ), |  | ||||||
|             by = gene |  | ||||||
|           ] |  | ||||||
| 
 |  | ||||||
|           # Compute the final score by normalizing the difference. |  | ||||||
|           results[ |  | ||||||
|             , |  | ||||||
|             score := 1 - summarized_distances / |  | ||||||
|               max(summarized_distances) |  | ||||||
|           ] |  | ||||||
| 
 |  | ||||||
|           progress(1.0) |  | ||||||
| 
 |  | ||||||
|           result( |  | ||||||
|             method = "species_adjacency", |  | ||||||
|             scores = results[, .(gene, score)], |  | ||||||
|             details = list( |  | ||||||
|               data = data, |  | ||||||
|               results = results |  | ||||||
|             ) |  | ||||||
|           ) |  | ||||||
|         } |  | ||||||
|       ) |  | ||||||
|     } |  | ||||||
|   ) |  | ||||||
| } |  | ||||||
|  | @ -34,6 +34,3 @@ In this case, the distance data that is available for one gene is first | ||||||
| combined. The resulting value is compared to the reference genes and | combined. The resulting value is compared to the reference genes and | ||||||
| determines the gene's score in relation to other genes. | determines the gene's score in relation to other genes. | ||||||
| } | } | ||||||
| \seealso{ |  | ||||||
| \code{\link[=species_adjacency]{species_adjacency()}} |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  | @ -1,37 +0,0 @@ | ||||||
| % Generated by roxygen2: do not edit by hand |  | ||||||
| % Please edit documentation in R/method_species_adjacency.R |  | ||||||
| \name{species_adjacency} |  | ||||||
| \alias{species_adjacency} |  | ||||||
| \title{Score genes based on their adjacency to the reference genes within species.} |  | ||||||
| \usage{ |  | ||||||
| species_adjacency( |  | ||||||
|   id = "species_adjacency", |  | ||||||
|   name = "Species adj.", |  | ||||||
|   description = "Species adjacency", |  | ||||||
|   distance_estimate = stats::median, |  | ||||||
|   summarize = stats::median |  | ||||||
| ) |  | ||||||
| } |  | ||||||
| \arguments{ |  | ||||||
| \item{id}{Unique ID for the method and its results.} |  | ||||||
| 
 |  | ||||||
| \item{name}{Human readable name for the method.} |  | ||||||
| 
 |  | ||||||
| \item{description}{Method description.} |  | ||||||
| 
 |  | ||||||
| \item{distance_estimate}{Function for combining the distance differences |  | ||||||
| within one species.} |  | ||||||
| 
 |  | ||||||
| \item{summarize}{Function for summarizing the distance values across species.} |  | ||||||
| } |  | ||||||
| \value{ |  | ||||||
| An object of class \code{geposan_method}. |  | ||||||
| } |  | ||||||
| \description{ |  | ||||||
| For each gene and species, the method will first combine the gene's distances |  | ||||||
| to the reference genes within that species. Afterwards, the results are |  | ||||||
| summarized across species and determine the gene's score. |  | ||||||
| } |  | ||||||
| \seealso{ |  | ||||||
| \code{\link[=adjacency]{adjacency()}} |  | ||||||
| } |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue