mirror of
				https://github.com/johrpan/geposanui.git
				synced 2025-10-26 19:27:24 +01:00 
			
		
		
		
	
		
			
	
	
		
			30 lines
		
	
	
	
		
			1,017 B
		
	
	
	
		
			R
		
	
	
	
	
	
		
		
			
		
	
	
			30 lines
		
	
	
	
		
			1,017 B
		
	
	
	
		
			R
		
	
	
	
	
	
|  | library(data.table) | ||
|  | 
 | ||
|  | #' Score the mean distance of genes to the telomeres across species. | ||
|  | #' | ||
|  | #' A score will be given to each gene such that 0.0 corresponds to the maximal | ||
|  | #' mean distance across all genes and 1.0 corresponds to a distance of 0. | ||
|  | #' | ||
|  | #' The result will be a data.table with the following columns: | ||
|  | #' | ||
|  | #'  - `gene` Gene ID of the processed gene. | ||
|  | #'  - `score` Score for the proximity. | ||
|  | #' | ||
|  | #' @param distances Distance data to use. | ||
|  | #' @param species_ids Species, whose data should be included. | ||
|  | #' @param gene_ids Genes to process. | ||
|  | process_proximity <- function(distances, species_ids, gene_ids, ...) { | ||
|  |     species_count <- length(species_ids) | ||
|  | 
 | ||
|  |     # Prefilter distances by species. | ||
|  |     distances <- distances[species %chin% species_ids] | ||
|  | 
 | ||
|  |     # Compute the score as described above. | ||
|  | 
 | ||
|  |     distances <- distances[, .(mean_distance = mean(distance)), by = "gene"] | ||
|  |     max_distance <- distances[, max(mean_distance)] | ||
|  |     distances[, score := 1 - mean_distance / max_distance] | ||
|  | 
 | ||
|  |     distances[, .(gene, score)] | ||
|  | } |