mirror of
				https://github.com/johrpan/geposanui.git
				synced 2025-10-26 19:27:24 +01:00 
			
		
		
		
	Reinclude human into distance data
This commit is contained in:
		
							parent
							
								
									22b25c9b83
								
							
						
					
					
						commit
						e9c05fdbab
					
				
					 2 changed files with 43 additions and 16 deletions
				
			
		
							
								
								
									
										14
									
								
								init.R
									
										
									
									
									
								
							
							
						
						
									
										14
									
								
								init.R
									
										
									
									
									
								
							|  | @ -5,11 +5,11 @@ source("util.R") | ||||||
| 
 | 
 | ||||||
| # Load input data | # Load input data | ||||||
| 
 | 
 | ||||||
| species <- run_cached("input/species", retrieve_species) | species <- run_cached("input_species", retrieve_species) | ||||||
| genes <- run_cached("input/genes", retrieve_genes) | genes <- run_cached("input_genes", retrieve_genes) | ||||||
| 
 | 
 | ||||||
| distances <- run_cached( | distances <- run_cached( | ||||||
|     "input/distances", |     "input_distances", | ||||||
|     retrieve_distances, |     retrieve_distances, | ||||||
|     species[, id], |     species[, id], | ||||||
|     genes[, id] |     genes[, id] | ||||||
|  | @ -23,7 +23,7 @@ all_genes <- genes[, id] | ||||||
| tpe_old_genes <- genes[suggested | verified == TRUE, id] | tpe_old_genes <- genes[suggested | verified == TRUE, id] | ||||||
| 
 | 
 | ||||||
| clustering_all <- run_cached( | clustering_all <- run_cached( | ||||||
|     "all_species/clustering", |     "clustering_all", | ||||||
|     process_clustering, |     process_clustering, | ||||||
|     distances, |     distances, | ||||||
|     all_species, |     all_species, | ||||||
|  | @ -31,7 +31,7 @@ clustering_all <- run_cached( | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| clustering_replicative <- run_cached( | clustering_replicative <- run_cached( | ||||||
|     "replicative_species/clustering", |     "clustering_replicative", | ||||||
|     process_clustering, |     process_clustering, | ||||||
|     distances, |     distances, | ||||||
|     replicative_species, |     replicative_species, | ||||||
|  | @ -39,7 +39,7 @@ clustering_replicative <- run_cached( | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| correlation_all <- run_cached( | correlation_all <- run_cached( | ||||||
|     "all_species/correlation", |     "correlation_all", | ||||||
|     process_correlation, |     process_correlation, | ||||||
|     distances, |     distances, | ||||||
|     all_species, |     all_species, | ||||||
|  | @ -48,7 +48,7 @@ correlation_all <- run_cached( | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| correlation_replicative <- run_cached( | correlation_replicative <- run_cached( | ||||||
|     "replicative_species/correlation", |     "correlation_replicative", | ||||||
|     process_correlation, |     process_correlation, | ||||||
|     distances, |     distances, | ||||||
|     replicative_species, |     replicative_species, | ||||||
|  |  | ||||||
							
								
								
									
										45
									
								
								input.R
									
										
									
									
									
								
							
							
						
						
									
										45
									
								
								input.R
									
										
									
									
									
								
							|  | @ -115,11 +115,41 @@ retrieve_genes <- function() { | ||||||
| #'  - `gene` Ensembl gene ID. | #'  - `gene` Ensembl gene ID. | ||||||
| #'  - `distance` Distance to nearest telomere in base pairs. | #'  - `distance` Distance to nearest telomere in base pairs. | ||||||
| retrieve_distances <- function(species_ids, gene_ids) { | retrieve_distances <- function(species_ids, gene_ids) { | ||||||
|     distances <- data.table( |     # Special case the human species and retrieve all available distance | ||||||
|         species = character(), |     # information. | ||||||
|         gene = character(), | 
 | ||||||
|         distance = integer() |     ensembl <- useDataset("hsapiens_gene_ensembl", mart = ensembl) | ||||||
|     ) | 
 | ||||||
|  |     human_distances <- data.table(getBM( | ||||||
|  |         attributes = c( | ||||||
|  |             "ensembl_gene_id", | ||||||
|  |             "chromosome_name", | ||||||
|  |             "start_position", | ||||||
|  |             "end_position" | ||||||
|  |         ), | ||||||
|  |         mart = ensembl | ||||||
|  |     )) | ||||||
|  | 
 | ||||||
|  |     human_distances[, | ||||||
|  |         chromosome_length := max(end_position), | ||||||
|  |         by = chromosome_name | ||||||
|  |     ] | ||||||
|  | 
 | ||||||
|  |     # Filter out relevant information (see below). | ||||||
|  |     distances <- human_distances[ | ||||||
|  |         chromosome_length > 15000000, | ||||||
|  |         .( | ||||||
|  |             species = "hsapiens", | ||||||
|  |             gene = ensembl_gene_id, | ||||||
|  |             distance = pmin( | ||||||
|  |                 start_position, | ||||||
|  |                 chromosome_length - end_position | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  |     ] | ||||||
|  | 
 | ||||||
|  |     # Exclude the human from the species, in case it is present there. | ||||||
|  |     species_ids <- species_ids[species_ids != "hsapiens"] | ||||||
| 
 | 
 | ||||||
|     species_count <- length(species_ids) |     species_count <- length(species_ids) | ||||||
| 
 | 
 | ||||||
|  | @ -156,10 +186,7 @@ retrieve_distances <- function(species_ids, gene_ids) { | ||||||
|                 "start_position", |                 "start_position", | ||||||
|                 "end_position" |                 "end_position" | ||||||
|             ), |             ), | ||||||
|             mart = useDataset( |             mart = ensembl | ||||||
|                 sprintf("%s_gene_ensembl", species_id), |  | ||||||
|                 mart = ensembl |  | ||||||
|             ) |  | ||||||
|         )) |         )) | ||||||
| 
 | 
 | ||||||
|         ensembl_distances[, |         ensembl_distances[, | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue