From b69b3e9d2d518fd276cb21e06143d6763db37e3a Mon Sep 17 00:00:00 2001 From: Elias Projahn Date: Thu, 26 Aug 2021 14:37:17 +0200 Subject: [PATCH] Always use the largest cluster --- process.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/process.R b/process.R index 317a54d..342a0fb 100644 --- a/process.R +++ b/process.R @@ -35,7 +35,14 @@ process_input <- function(input) { clusters <- hclust(dist(distances[, distance])) clusters_cut <- cutree(clusters, h = 1000000) - cluster <- distances[which(clusters_cut == 1)] + + # Find the largest cluster + cluster_indices <- unique(clusters_cut) + cluster_index <- cluster_indices[ + which.max(tabulate(match(clusters_cut, cluster_indices))) + ] + + cluster <- distances[which(clusters_cut == cluster_index)] results[ gene == gene_id,