clustering: Expose clusteriness parameters

2025-10-25 19:37:23 +02:00 · 2022-06-22 11:24:30 +02:00 · 2022-06-22 11:24:30 +02:00 · ab545a415c
commit ab545a415c
parent c6ca93b009
2 changed files with 49 additions and 26 deletions
--- a/R/method_clustering.R
+++ b/R/method_clustering.R
@ -68,7 +68,9 @@ clusteriness <- function(data,
 #' Process genes clustering their distance to telomeres.
 #'
 #' The result will be cached and can be reused for different presets, because
-#' it is independent of the reference genes in use.
+#' it is independent of the reference genes in use. Most parameters are exposed
+#' for the [clusteriness()] function. See its documentation for more
+#' information.
 #'
 #' @param id Unique ID for the method and its results.
 #' @param name Human readable name for the method.
@ -80,8 +82,12 @@ clusteriness <- function(data,
 #'
 #' @export
 clustering <- function(id = "clustering",
-    name = "Clustering",
-                       description = "Clustering of genes") {
+                       name = "Clustering",
+                       description = "Clustering of genes",
+                       span = 100000,
+                       weight = 0.7,
+                       n_clusters = NULL,
+                       relation = NULL) {
  method(
    id = id,
    name = name,
@ -90,33 +96,44 @@ clustering <- function(id = "clustering",
      species_ids <- preset$species_ids
      gene_ids <- preset$gene_ids

-      cached("clustering", c(species_ids, gene_ids), {
-        scores <- data.table(gene = gene_ids)
+      cached(
+        "clustering",
+        c(species_ids, gene_ids, span, weight, n_clusters, relation),
+        { # nolint
+          scores <- data.table(gene = gene_ids)

-        # Prefilter the input data by species.
-        distances <- geposan::distances[species %chin% species_ids]
+          # Prefilter the input data by species.
+          distances <- geposan::distances[species %chin% species_ids]

-        genes_done <- 0
-        genes_total <- length(gene_ids)
+          genes_done <- 0
+          genes_total <- length(gene_ids)

-        # Perform the cluster analysis for one gene.
-        compute <- function(gene_id) {
-          data <- distances[gene == gene_id, distance]
-          score <- clusteriness(data)
+          # Perform the cluster analysis for one gene.
+          compute <- function(gene_id) {
+            data <- distances[gene == gene_id, distance]

-          genes_done <<- genes_done + 1
-          progress(genes_done / genes_total)
+            score <- clusteriness(
+              data,
+              span = span,
+              weight = weight,
+              n_clusters = n_clusters,
+              relation = relation
+            )

-          score
+            genes_done <<- genes_done + 1
+            progress(genes_done / genes_total)
+
+            score
+          }
+
+          scores[, score := compute(gene), by = gene]
+
+          result(
+            method = "clustering",
+            scores = scores
+          )
        }
-
-        scores[, score := compute(gene), by = gene]
-
-        result(
-          method = "clustering",
-          scores = scores
-        )
-      })
+      )
    }
  )
 }
--- a/man/clustering.Rd
+++ b/man/clustering.Rd
@ -7,7 +7,11 @@
 clustering(
  id = "clustering",
  name = "Clustering",
-  description = "Clustering of genes"
+  description = "Clustering of genes",
+  span = 1e+05,
+  weight = 0.7,
+  n_clusters = NULL,
+  relation = NULL
 )
 }
 \arguments{
@ -22,7 +26,9 @@ An object of class \code{geposan_method}.
 }
 \description{
 The result will be cached and can be reused for different presets, because
-it is independent of the reference genes in use.
+it is independent of the reference genes in use. Most parameters are exposed
+for the \code{\link[=clusteriness]{clusteriness()}} function. See its documentation for more
+information.
 }
 \seealso{
 \code{\link[=clusteriness]{clusteriness()}}