mirror of
				https://github.com/johrpan/geposanui.git
				synced 2025-10-26 19:27:24 +01:00 
			
		
		
		
	Base clusteriness on species count
This commit is contained in:
		
							parent
							
								
									8b727a0329
								
							
						
					
					
						commit
						397b8d0ba2
					
				
					 2 changed files with 7 additions and 7 deletions
				
			
		
							
								
								
									
										12
									
								
								clustering.R
									
										
									
									
									
								
							
							
						
						
									
										12
									
								
								clustering.R
									
										
									
									
									
								
							|  | @ -7,9 +7,10 @@ library(rlog) | ||||||
| #' This function will cluster the data using `hclust` and `cutree` (with the | #' This function will cluster the data using `hclust` and `cutree` (with the | ||||||
| #' specified height). Every cluster with at least two members qualifies for | #' specified height). Every cluster with at least two members qualifies for | ||||||
| #' further analysis. Clusters are then ranked based on their size in relation | #' further analysis. Clusters are then ranked based on their size in relation | ||||||
| #' to the total number of values. The return value is a final score between | #' to the total number of possible values (`n`). The return value is a final | ||||||
| #' zero and one. Lower ranking clusters contribute less to this score. | #' score between zero and one. Lower ranking clusters contribute less to this | ||||||
| clusteriness <- function(data, height = 1000000) { | #' score. | ||||||
|  | clusteriness <- function(data, n, height = 1000000) { | ||||||
|     # Cluster the data and compute the cluster sizes. |     # Cluster the data and compute the cluster sizes. | ||||||
| 
 | 
 | ||||||
|     tree <- hclust(dist(data)) |     tree <- hclust(dist(data)) | ||||||
|  | @ -19,7 +20,6 @@ clusteriness <- function(data, height = 1000000) { | ||||||
|     # Compute the "cluteriness" score. |     # Compute the "cluteriness" score. | ||||||
| 
 | 
 | ||||||
|     score <- 0.0 |     score <- 0.0 | ||||||
|     n <- length(data) |  | ||||||
| 
 | 
 | ||||||
|     for (i in seq_along(cluster_sizes)) { |     for (i in seq_along(cluster_sizes)) { | ||||||
|         cluster_size <- cluster_sizes[i] |         cluster_size <- cluster_sizes[i] | ||||||
|  | @ -70,11 +70,11 @@ process_clustering <- function(distances, species_ids, gene_ids) { | ||||||
|             .(species, distance) |             .(species, distance) | ||||||
|         ] |         ] | ||||||
| 
 | 
 | ||||||
|         if (data[, .N] < 12) { |         if (data[, .N] < 10) { | ||||||
|             next |             next | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         score <- clusteriness(data[, distance]) |         score <- clusteriness(data[, distance], length(species_ids)) | ||||||
| 
 | 
 | ||||||
|         results[ |         results[ | ||||||
|             gene == gene_id, |             gene == gene_id, | ||||||
|  |  | ||||||
|  | @ -41,7 +41,7 @@ process_correlation <- function(distances, species_ids, gene_ids, | ||||||
|         gene_id <- gene_ids[i] |         gene_id <- gene_ids[i] | ||||||
|         gene_distances <- distances[gene == gene_id] |         gene_distances <- distances[gene == gene_id] | ||||||
| 
 | 
 | ||||||
|         if (nrow(gene_distances) < 12) { |         if (nrow(gene_distances) < 10) { | ||||||
|             next |             next | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue