mirror of
				https://github.com/johrpan/geposanui.git
				synced 2025-10-26 19:27:24 +01:00 
			
		
		
		
	Simplify data format and correct scale labels
This commit is contained in:
		
							parent
							
								
									495524a0ac
								
							
						
					
					
						commit
						400ca776e0
					
				
					 2 changed files with 22 additions and 28 deletions
				
			
		
							
								
								
									
										18
									
								
								data.R
									
										
									
									
									
								
							
							
						
						
									
										18
									
								
								data.R
									
										
									
									
									
								
							|  | @ -41,7 +41,11 @@ load_data <- function(path) { | ||||||
|         median_distance = numeric() |         median_distance = numeric() | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
|     distances <- data.table(geneid = integer()) |     distances <- data.table( | ||||||
|  |         species = character(), | ||||||
|  |         gene = integer(), | ||||||
|  |         distance = integer() | ||||||
|  |     ) | ||||||
| 
 | 
 | ||||||
|     # Each file will contain data on one species. |     # Each file will contain data on one species. | ||||||
|     file_names <- list.files(paste(path, "genomes", sep = "/")) |     file_names <- list.files(paste(path, "genomes", sep = "/")) | ||||||
|  | @ -63,13 +67,19 @@ load_data <- function(path) { | ||||||
|                 median_distance = median(species_distances[, dist]) |                 median_distance = median(species_distances[, dist]) | ||||||
|             ))) |             ))) | ||||||
| 
 | 
 | ||||||
|             # Column names have to be unique for each species. |             species_distances <- data.table( | ||||||
|             setnames(species_distances, "dist", species_id) |                 species = species_id, | ||||||
|  |                 gene = species_distances[, geneid], | ||||||
|  |                 distance = species_distances[, dist] | ||||||
|  |             ) | ||||||
| 
 | 
 | ||||||
|             distances <- merge(distances, species_distances, all = TRUE) |             distances <- rbindlist(list(distances, species_distances)) | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     # Order species by there median distance. | ||||||
|  |     setorder(species, median_distance) | ||||||
|  | 
 | ||||||
|     list( |     list( | ||||||
|         genes = genes, |         genes = genes, | ||||||
|         species = species, |         species = species, | ||||||
|  |  | ||||||
|  | @ -3,44 +3,28 @@ library(ggplot2) | ||||||
| 
 | 
 | ||||||
| #' Draw a scatter plot containing gene positions. | #' Draw a scatter plot containing gene positions. | ||||||
| scatter_plot <- function(gene_ids, data) { | scatter_plot <- function(gene_ids, data) { | ||||||
|     species <- data$species |  | ||||||
|     setorder(species, median_distance) |  | ||||||
| 
 |  | ||||||
|     distances <- data$distances[geneid %in% gene_ids] |  | ||||||
| 
 |  | ||||||
|     plot <- ggplot() + |     plot <- ggplot() + | ||||||
|         scale_x_continuous( |         scale_x_discrete( | ||||||
|             name = "Species", |             name = "Species", | ||||||
|             breaks = seq_len(nrow(species)), |             breaks = data$species$id, | ||||||
|             labels = species$label |             labels = data$species$label | ||||||
|         ) + |         ) + | ||||||
|         scale_y_continuous(name = "Distance to telomeres [Mbp]") + |         scale_y_continuous(name = "Distance to telomeres [Mbp]") | ||||||
|         geom_line( |  | ||||||
|             species, |  | ||||||
|             mapping = aes( |  | ||||||
|                 x = as.numeric(rownames(species)), |  | ||||||
|                 y = median_distance / 1000000 |  | ||||||
|             ) |  | ||||||
|         ) |  | ||||||
| 
 | 
 | ||||||
|     colors <- rainbow(length(gene_ids)) |     colors <- rainbow(length(gene_ids)) | ||||||
| 
 | 
 | ||||||
|     for (i in seq_len(length(gene_ids))) { |     for (i in seq_len(length(gene_ids))) { | ||||||
|         gene_id <- gene_ids[i] |         gene_id <- gene_ids[i] | ||||||
| 
 | 
 | ||||||
|         gene_distances <- data.table( |  | ||||||
|             index = as.numeric(rownames(species)), |  | ||||||
|             distance = unlist(distances[geneid == gene_id, -1]) |  | ||||||
|         ) |  | ||||||
| 
 |  | ||||||
|         plot <- plot + |         plot <- plot + | ||||||
|             geom_point( |             geom_point( | ||||||
|                 gene_distances, |                 data$distances[gene == gene_id], | ||||||
|                 mapping = aes( |                 mapping = aes( | ||||||
|                     x = index, |                     x = species, | ||||||
|                     y = distance / 1000000, |                     y = distance / 1000000, | ||||||
|                 ), |                 ), | ||||||
|                 color = colors[i] |                 color = colors[i], | ||||||
|  |                 size = 4 | ||||||
|             ) |             ) | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue