Always use the largest cluster

2025-10-26 11:17:24 +01:00 · 2021-08-26 14:37:17 +02:00 · 2021-08-26 14:37:17 +02:00 · b69b3e9d2d
commit b69b3e9d2d
parent 1c88458ce7
1 changed files with 8 additions and 1 deletions
--- a/process.R
+++ b/process.R
@ -35,7 +35,14 @@ process_input <- function(input) {

        clusters <- hclust(dist(distances[, distance]))
        clusters_cut <- cutree(clusters, h = 1000000)
-        cluster <- distances[which(clusters_cut == 1)]
+
+        # Find the largest cluster
+        cluster_indices <- unique(clusters_cut)
+        cluster_index <- cluster_indices[
+            which.max(tabulate(match(clusters_cut, cluster_indices)))
+        ]
+
+        cluster <- distances[which(clusters_cut == cluster_index)]

        results[
            gene == gene_id,