mirror of
				https://github.com/johrpan/geposanui.git
				synced 2025-10-26 19:27:24 +01:00 
			
		
		
		
	Reinclude human into distance data
This commit is contained in:
		
							parent
							
								
									22b25c9b83
								
							
						
					
					
						commit
						e9c05fdbab
					
				
					 2 changed files with 43 additions and 16 deletions
				
			
		
							
								
								
									
										14
									
								
								init.R
									
										
									
									
									
								
							
							
						
						
									
										14
									
								
								init.R
									
										
									
									
									
								
							|  | @ -5,11 +5,11 @@ source("util.R") | |||
| 
 | ||||
| # Load input data | ||||
| 
 | ||||
| species <- run_cached("input/species", retrieve_species) | ||||
| genes <- run_cached("input/genes", retrieve_genes) | ||||
| species <- run_cached("input_species", retrieve_species) | ||||
| genes <- run_cached("input_genes", retrieve_genes) | ||||
| 
 | ||||
| distances <- run_cached( | ||||
|     "input/distances", | ||||
|     "input_distances", | ||||
|     retrieve_distances, | ||||
|     species[, id], | ||||
|     genes[, id] | ||||
|  | @ -23,7 +23,7 @@ all_genes <- genes[, id] | |||
| tpe_old_genes <- genes[suggested | verified == TRUE, id] | ||||
| 
 | ||||
| clustering_all <- run_cached( | ||||
|     "all_species/clustering", | ||||
|     "clustering_all", | ||||
|     process_clustering, | ||||
|     distances, | ||||
|     all_species, | ||||
|  | @ -31,7 +31,7 @@ clustering_all <- run_cached( | |||
| ) | ||||
| 
 | ||||
| clustering_replicative <- run_cached( | ||||
|     "replicative_species/clustering", | ||||
|     "clustering_replicative", | ||||
|     process_clustering, | ||||
|     distances, | ||||
|     replicative_species, | ||||
|  | @ -39,7 +39,7 @@ clustering_replicative <- run_cached( | |||
| ) | ||||
| 
 | ||||
| correlation_all <- run_cached( | ||||
|     "all_species/correlation", | ||||
|     "correlation_all", | ||||
|     process_correlation, | ||||
|     distances, | ||||
|     all_species, | ||||
|  | @ -48,7 +48,7 @@ correlation_all <- run_cached( | |||
| ) | ||||
| 
 | ||||
| correlation_replicative <- run_cached( | ||||
|     "replicative_species/correlation", | ||||
|     "correlation_replicative", | ||||
|     process_correlation, | ||||
|     distances, | ||||
|     replicative_species, | ||||
|  |  | |||
							
								
								
									
										41
									
								
								input.R
									
										
									
									
									
								
							
							
						
						
									
										41
									
								
								input.R
									
										
									
									
									
								
							|  | @ -115,11 +115,41 @@ retrieve_genes <- function() { | |||
| #'  - `gene` Ensembl gene ID. | ||||
| #'  - `distance` Distance to nearest telomere in base pairs. | ||||
| retrieve_distances <- function(species_ids, gene_ids) { | ||||
|     distances <- data.table( | ||||
|         species = character(), | ||||
|         gene = character(), | ||||
|         distance = integer() | ||||
|     # Special case the human species and retrieve all available distance | ||||
|     # information. | ||||
| 
 | ||||
|     ensembl <- useDataset("hsapiens_gene_ensembl", mart = ensembl) | ||||
| 
 | ||||
|     human_distances <- data.table(getBM( | ||||
|         attributes = c( | ||||
|             "ensembl_gene_id", | ||||
|             "chromosome_name", | ||||
|             "start_position", | ||||
|             "end_position" | ||||
|         ), | ||||
|         mart = ensembl | ||||
|     )) | ||||
| 
 | ||||
|     human_distances[, | ||||
|         chromosome_length := max(end_position), | ||||
|         by = chromosome_name | ||||
|     ] | ||||
| 
 | ||||
|     # Filter out relevant information (see below). | ||||
|     distances <- human_distances[ | ||||
|         chromosome_length > 15000000, | ||||
|         .( | ||||
|             species = "hsapiens", | ||||
|             gene = ensembl_gene_id, | ||||
|             distance = pmin( | ||||
|                 start_position, | ||||
|                 chromosome_length - end_position | ||||
|             ) | ||||
|         ) | ||||
|     ] | ||||
| 
 | ||||
|     # Exclude the human from the species, in case it is present there. | ||||
|     species_ids <- species_ids[species_ids != "hsapiens"] | ||||
| 
 | ||||
|     species_count <- length(species_ids) | ||||
| 
 | ||||
|  | @ -156,10 +186,7 @@ retrieve_distances <- function(species_ids, gene_ids) { | |||
|                 "start_position", | ||||
|                 "end_position" | ||||
|             ), | ||||
|             mart = useDataset( | ||||
|                 sprintf("%s_gene_ensembl", species_id), | ||||
|             mart = ensembl | ||||
|             ) | ||||
|         )) | ||||
| 
 | ||||
|         ensembl_distances[, | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue