Allow to limit number of clusters for clusteriness

2025-10-25 19:37:23 +02:00 · 2022-06-16 19:45:59 +02:00 · 2022-06-16 19:45:59 +02:00 · 2529f35660
commit 2529f35660
parent 3df4ec5d89
3 changed files with 21 additions and 2 deletions
--- a/1
+++ b/1
@ -9,6 +9,7 @@ S3method(print,geposan_validation)
 export(adjacency)
 export(all_methods)
 export(analyze)
+export(clusteriness)
 export(clustering)
 export(compare)
 export(correlation)
--- a/R/method_clustering.R
+++ b/R/method_clustering.R
@ -12,7 +12,13 @@
 #'   relation to the previous one. For example, if `weight` is 0.7 (the
 #'   default), the first cluster will weigh 1.0, the second 0.7, the third 0.49
 #'   etc.
-clusteriness <- function(data, span = 100000, weight = 0.7) {
+#' @param n_clusters Maximum number of clusters that should be taken into
+#'   account. By default, all clusters will be regarded.
+#'
+#' @return A score between 0.0 and 1.0 summarizing how much the data clusters.
+#'
+#' @export
+clusteriness <- function(data, span = 100000, weight = 0.7, n_clusters = NULL) {
  n <- length(data)

  # Return a score of 0.0 if there is just one or no value at all.
@ -31,6 +37,12 @@ clusteriness <- function(data, span = 100000, weight = 0.7) {
  score <- 0.0

  for (i in seq_along(cluster_sizes)) {
+    if (!is.null(n_clusters)) {
+      if (i > n_clusters) {
+        break
+      }
+    }
+
    cluster_size <- cluster_sizes[i]

    if (cluster_size >= 2) {
--- a/man/clusteriness.Rd
+++ b/man/clusteriness.Rd
@ -4,7 +4,7 @@
 \alias{clusteriness}
 \title{Perform a cluster analysis.}
 \usage{
-clusteriness(data, span = 1e+05, weight = 0.7)
+clusteriness(data, span = 1e+05, weight = 0.7, n_clusters = NULL)
 }
 \arguments{
 \item{data}{The values that should be scored.}
@ -15,6 +15,12 @@ clusteriness(data, span = 1e+05, weight = 0.7)
 relation to the previous one. For example, if \code{weight} is 0.7 (the
 default), the first cluster will weigh 1.0, the second 0.7, the third 0.49
 etc.}
+
+\item{n_clusters}{Maximum number of clusters that should be taken into
+account. By default, all clusters will be regarded.}
+}
+\value{
+A score between 0.0 and 1.0 summarizing how much the data clusters.
 }
 \description{
 This function will cluster the data using \code{\link[stats:hclust]{stats::hclust()}} and