mirror of
				https://github.com/johrpan/geposan.git
				synced 2025-10-26 10:47:25 +01:00 
			
		
		
		
	neural: Readd distances as input data
This commit is contained in:
		
							parent
							
								
									88d6837fee
								
							
						
					
					
						commit
						f84d37dd30
					
				
					 2 changed files with 24 additions and 11 deletions
				
			
		|  | @ -40,8 +40,11 @@ analyze <- function(preset, progress = NULL) { | |||
|         "correlation_positions" = function(...) { | ||||
|             correlation(..., use_positions = TRUE) | ||||
|         }, | ||||
|         "proximity" = proximity, | ||||
|         "neural" = neural | ||||
|         "neural" = neural, | ||||
|         "neural_positions" = function(...) { | ||||
|             neural(..., use_positions = TRUE) | ||||
|         }, | ||||
|         "proximity" = proximity | ||||
|     ) | ||||
| 
 | ||||
|     results <- cached("analysis", preset, { | ||||
|  |  | |||
							
								
								
									
										22
									
								
								R/neural.R
									
										
									
									
									
								
							
							
						
						
									
										22
									
								
								R/neural.R
									
										
									
									
									
								
							|  | @ -1,14 +1,17 @@ | |||
| # Find genes by training a neural network on reference position data. | ||||
| # | ||||
| # @param seed A seed to get reproducible results. | ||||
| neural <- function(preset, progress = NULL, seed = 448077) { | ||||
| neural <- function(preset, | ||||
|                    use_positions = FALSE, | ||||
|                    progress = NULL, | ||||
|                    seed = 448077) { | ||||
|     species_ids <- preset$species_ids | ||||
|     gene_ids <- preset$gene_ids | ||||
|     reference_gene_ids <- preset$reference_gene_ids | ||||
| 
 | ||||
|     cached( | ||||
|         "neural", | ||||
|         c(species_ids, gene_ids, reference_gene_ids), | ||||
|         c(species_ids, gene_ids, reference_gene_ids, use_positions), | ||||
|         { # nolint | ||||
|             set.seed(seed) | ||||
|             gene_count <- length(gene_ids) | ||||
|  | @ -28,10 +31,17 @@ neural <- function(preset, progress = NULL, seed = 448077) { | |||
| 
 | ||||
|             # Make a column containing positions for each species. | ||||
|             for (species_id in species_ids) { | ||||
|                 species_data <- distances[ | ||||
|                 species_data <- if (use_positions) { | ||||
|                     setnames(distances[ | ||||
|                         species == species_id, | ||||
|                         .(gene, position) | ||||
|                     ], "position", "distance") | ||||
|                 } else { | ||||
|                     distances[ | ||||
|                         species == species_id, | ||||
|                         .(gene, distance) | ||||
|                     ] | ||||
|                 } | ||||
| 
 | ||||
|                 # Only include species with at least 25% known values. | ||||
| 
 | ||||
|  | @ -48,11 +58,11 @@ neural <- function(preset, progress = NULL, seed = 448077) { | |||
|                     # However, this will of course lessen the significance of | ||||
|                     # the results. | ||||
| 
 | ||||
|                     mean_position <- round(species_data[, mean(position)]) | ||||
|                     data[is.na(position), position := mean_position] | ||||
|                     mean_distance <- round(species_data[, mean(distance)]) | ||||
|                     data[is.na(distance), distance := mean_distance] | ||||
| 
 | ||||
|                     # Name the new column after the species. | ||||
|                     setnames(data, "position", species_id) | ||||
|                     setnames(data, "distance", species_id) | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue