mirror of
				https://github.com/johrpan/geposan.git
				synced 2025-10-26 18:57:25 +01:00 
			
		
		
		
	Remove position analysis
This commit is contained in:
		
							parent
							
								
									255123c74f
								
							
						
					
					
						commit
						599f09a52f
					
				
					 5 changed files with 15 additions and 52 deletions
				
			
		|  | @ -32,13 +32,7 @@ analyze <- function(preset, progress = NULL) { | ||||||
|     #  - `score` Score for the gene between 0.0 and 1.0. |     #  - `score` Score for the gene between 0.0 and 1.0. | ||||||
|     methods <- list( |     methods <- list( | ||||||
|         "clusteriness" = clusteriness, |         "clusteriness" = clusteriness, | ||||||
|         "clusteriness_positions" = function(...) { |  | ||||||
|             clusteriness(..., use_positions = TRUE) |  | ||||||
|         }, |  | ||||||
|         "correlation" = correlation, |         "correlation" = correlation, | ||||||
|         "correlation_positions" = function(...) { |  | ||||||
|             correlation(..., use_positions = TRUE) |  | ||||||
|         }, |  | ||||||
|         "neural" = neural, |         "neural" = neural, | ||||||
|         "proximity" = proximity |         "proximity" = proximity | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|  | @ -36,11 +36,11 @@ clusteriness_priv <- function(data, height = 1000000) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| # Process genes clustering their distance to telomeres. | # Process genes clustering their distance to telomeres. | ||||||
| clusteriness <- function(preset, use_positions = FALSE, progress = NULL) { | clusteriness <- function(preset, progress = NULL) { | ||||||
|     species_ids <- preset$species_ids |     species_ids <- preset$species_ids | ||||||
|     gene_ids <- preset$gene_ids |     gene_ids <- preset$gene_ids | ||||||
| 
 | 
 | ||||||
|     cached("clusteriness", c(species_ids, gene_ids, use_positions), { |     cached("clusteriness", c(species_ids, gene_ids), { | ||||||
|         results <- data.table(gene = gene_ids) |         results <- data.table(gene = gene_ids) | ||||||
| 
 | 
 | ||||||
|         # Prefilter the input data by species. |         # Prefilter the input data by species. | ||||||
|  | @ -54,12 +54,7 @@ clusteriness <- function(preset, use_positions = FALSE, progress = NULL) { | ||||||
| 
 | 
 | ||||||
|         # Perform the cluster analysis for one gene. |         # Perform the cluster analysis for one gene. | ||||||
|         compute <- function(gene_id) { |         compute <- function(gene_id) { | ||||||
|             data <- if (use_positions) { |             data <- distances[gene_id, distance] | ||||||
|                 distances[gene_id, position] |  | ||||||
|             } else { |  | ||||||
|                 distances[gene_id, distance] |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             score <- clusteriness_priv(data) |             score <- clusteriness_priv(data) | ||||||
| 
 | 
 | ||||||
|             if (!is.null(progress)) { |             if (!is.null(progress)) { | ||||||
|  |  | ||||||
|  | @ -1,14 +1,12 @@ | ||||||
| # Compute the mean correlation coefficient comparing gene distances with a set | # Compute the mean correlation coefficient comparing gene distances with a set | ||||||
| # of reference genes. | # of reference genes. | ||||||
| correlation <- function(preset, use_positions = FALSE, progress = NULL) { | correlation <- function(preset, progress = NULL) { | ||||||
|     species_ids <- preset$species_ids |     species_ids <- preset$species_ids | ||||||
|     gene_ids <- preset$gene_ids |     gene_ids <- preset$gene_ids | ||||||
|     reference_gene_ids <- preset$reference_gene_ids |     reference_gene_ids <- preset$reference_gene_ids | ||||||
| 
 | 
 | ||||||
|     cached( |     cached( | ||||||
|         "correlation", |         "correlation", c(species_ids, gene_ids, reference_gene_ids), { | ||||||
|         c(species_ids, gene_ids, reference_gene_ids, use_positions), |  | ||||||
|         { # nolint |  | ||||||
|             # Prefilter distances by species. |             # Prefilter distances by species. | ||||||
|             distances <- geposan::distances[species %chin% species_ids] |             distances <- geposan::distances[species %chin% species_ids] | ||||||
| 
 | 
 | ||||||
|  | @ -20,17 +18,10 @@ correlation <- function(preset, use_positions = FALSE, progress = NULL) { | ||||||
| 
 | 
 | ||||||
|             # Make a column containing distance data for each species. |             # Make a column containing distance data for each species. | ||||||
|             for (species_id in species_ids) { |             for (species_id in species_ids) { | ||||||
|                 species_data <- if (use_positions) { |                 species_data <- distances[ | ||||||
|                     setnames(distances[ |                     species == species_id, | ||||||
|                         species == species_id, |                     .(gene, distance) | ||||||
|                         .(gene, position) |                 ] | ||||||
|                     ], "position", "distance") |  | ||||||
|                 } else { |  | ||||||
|                     distances[ |  | ||||||
|                         species == species_id, |  | ||||||
|                         .(gene, distance) |  | ||||||
|                     ] |  | ||||||
|                 } |  | ||||||
| 
 | 
 | ||||||
|                 data <- merge(data, species_data, all.x = TRUE) |                 data <- merge(data, species_data, all.x = TRUE) | ||||||
|                 setnames(data, "distance", species_id) |                 setnames(data, "distance", species_id) | ||||||
|  |  | ||||||
							
								
								
									
										27
									
								
								R/neural.R
									
										
									
									
									
								
							
							
						
						
									
										27
									
								
								R/neural.R
									
										
									
									
									
								
							|  | @ -25,10 +25,7 @@ neural <- function(preset, progress = NULL, seed = 49641) { | ||||||
|         # Make a columns containing positions and distances for each |         # Make a columns containing positions and distances for each | ||||||
|         # species. |         # species. | ||||||
|         for (species_id in species_ids) { |         for (species_id in species_ids) { | ||||||
|             species_data <- distances[ |             species_data <- distances[species == species_id, .(gene, distance)] | ||||||
|                 species == species_id, |  | ||||||
|                 .(gene, position, distance) |  | ||||||
|             ] |  | ||||||
| 
 | 
 | ||||||
|             # Only include species with at least 25% known values. As |             # Only include species with at least 25% known values. As | ||||||
|             # positions and distances always coexist, we don't loose any |             # positions and distances always coexist, we don't loose any | ||||||
|  | @ -46,26 +43,14 @@ neural <- function(preset, progress = NULL, seed = 49641) { | ||||||
|                 # However, this will of course lessen the significance of |                 # However, this will of course lessen the significance of | ||||||
|                 # the results. |                 # the results. | ||||||
| 
 | 
 | ||||||
|                 mean_position <- round(species_data[, mean(position)]) |  | ||||||
|                 mean_distance <- round(species_data[, mean(distance)]) |                 mean_distance <- round(species_data[, mean(distance)]) | ||||||
|  |                 data[is.na(distance), `:=`(distance = mean_distance)] | ||||||
| 
 | 
 | ||||||
|                 data[is.na(distance), `:=`( |                 # Name the new column after the species. | ||||||
|                     position = mean_position, |                 setnames(data, "distance", species_id) | ||||||
|                     distance = mean_distance |  | ||||||
|                 )] |  | ||||||
| 
 | 
 | ||||||
|                 input_position <- sprintf("%s_position", species_id) |                 # Add the input variable to the buffer. | ||||||
|                 input_distance <- sprintf("%s_distance", species_id) |                 input_vars <- c(input_vars, species_id) | ||||||
| 
 |  | ||||||
|                 # Name the new columns after the species. |  | ||||||
|                 setnames( |  | ||||||
|                     data, |  | ||||||
|                     c("position", "distance"), |  | ||||||
|                     c(input_position, input_distance) |  | ||||||
|                 ) |  | ||||||
| 
 |  | ||||||
|                 # Add the input variables to the buffer. |  | ||||||
|                 input_vars <- c(input_vars, input_position, input_distance) |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -40,9 +40,7 @@ | ||||||
| #' @export | #' @export | ||||||
| preset <- function(methods = c( | preset <- function(methods = c( | ||||||
|                        "clusteriness", |                        "clusteriness", | ||||||
|                        "clusteriness_positions", |  | ||||||
|                        "correlation", |                        "correlation", | ||||||
|                        "correlation_positions", |  | ||||||
|                        "neural", |                        "neural", | ||||||
|                        "proximity" |                        "proximity" | ||||||
|                    ), |                    ), | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue