mirror of
https://github.com/johrpan/geposan.git
synced 2025-10-26 02:37:25 +01:00
Allow to limit number of clusters for clusteriness
This commit is contained in:
parent
3df4ec5d89
commit
2529f35660
3 changed files with 21 additions and 2 deletions
|
|
@ -9,6 +9,7 @@ S3method(print,geposan_validation)
|
||||||
export(adjacency)
|
export(adjacency)
|
||||||
export(all_methods)
|
export(all_methods)
|
||||||
export(analyze)
|
export(analyze)
|
||||||
|
export(clusteriness)
|
||||||
export(clustering)
|
export(clustering)
|
||||||
export(compare)
|
export(compare)
|
||||||
export(correlation)
|
export(correlation)
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,13 @@
|
||||||
#' relation to the previous one. For example, if `weight` is 0.7 (the
|
#' relation to the previous one. For example, if `weight` is 0.7 (the
|
||||||
#' default), the first cluster will weigh 1.0, the second 0.7, the third 0.49
|
#' default), the first cluster will weigh 1.0, the second 0.7, the third 0.49
|
||||||
#' etc.
|
#' etc.
|
||||||
clusteriness <- function(data, span = 100000, weight = 0.7) {
|
#' @param n_clusters Maximum number of clusters that should be taken into
|
||||||
|
#' account. By default, all clusters will be regarded.
|
||||||
|
#'
|
||||||
|
#' @return A score between 0.0 and 1.0 summarizing how much the data clusters.
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
clusteriness <- function(data, span = 100000, weight = 0.7, n_clusters = NULL) {
|
||||||
n <- length(data)
|
n <- length(data)
|
||||||
|
|
||||||
# Return a score of 0.0 if there is just one or no value at all.
|
# Return a score of 0.0 if there is just one or no value at all.
|
||||||
|
|
@ -31,6 +37,12 @@ clusteriness <- function(data, span = 100000, weight = 0.7) {
|
||||||
score <- 0.0
|
score <- 0.0
|
||||||
|
|
||||||
for (i in seq_along(cluster_sizes)) {
|
for (i in seq_along(cluster_sizes)) {
|
||||||
|
if (!is.null(n_clusters)) {
|
||||||
|
if (i > n_clusters) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cluster_size <- cluster_sizes[i]
|
cluster_size <- cluster_sizes[i]
|
||||||
|
|
||||||
if (cluster_size >= 2) {
|
if (cluster_size >= 2) {
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
\alias{clusteriness}
|
\alias{clusteriness}
|
||||||
\title{Perform a cluster analysis.}
|
\title{Perform a cluster analysis.}
|
||||||
\usage{
|
\usage{
|
||||||
clusteriness(data, span = 1e+05, weight = 0.7)
|
clusteriness(data, span = 1e+05, weight = 0.7, n_clusters = NULL)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{data}{The values that should be scored.}
|
\item{data}{The values that should be scored.}
|
||||||
|
|
@ -15,6 +15,12 @@ clusteriness(data, span = 1e+05, weight = 0.7)
|
||||||
relation to the previous one. For example, if \code{weight} is 0.7 (the
|
relation to the previous one. For example, if \code{weight} is 0.7 (the
|
||||||
default), the first cluster will weigh 1.0, the second 0.7, the third 0.49
|
default), the first cluster will weigh 1.0, the second 0.7, the third 0.49
|
||||||
etc.}
|
etc.}
|
||||||
|
|
||||||
|
\item{n_clusters}{Maximum number of clusters that should be taken into
|
||||||
|
account. By default, all clusters will be regarded.}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
A score between 0.0 and 1.0 summarizing how much the data clusters.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
This function will cluster the data using \code{\link[stats:hclust]{stats::hclust()}} and
|
This function will cluster the data using \code{\link[stats:hclust]{stats::hclust()}} and
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue