mirror of
				https://github.com/johrpan/geposan.git
				synced 2025-10-25 19:37:23 +02:00 
			
		
		
		
	Allow to set relation for cluster size
This commit is contained in:
		
							parent
							
								
									fddd0c3fa0
								
							
						
					
					
						commit
						0e4f4621ed
					
				
					 2 changed files with 24 additions and 3 deletions
				
			
		|  | @ -14,11 +14,18 @@ | |||
| #'   etc. | ||||
| #' @param n_clusters Maximum number of clusters that should be taken into | ||||
| #'   account. By default, all clusters will be regarded. | ||||
| #' @param relation Number of items that the cluster size should be based on. | ||||
| #'   This should always at least the length of the data. By default, the length | ||||
| #'   of the data is used. | ||||
| #' | ||||
| #' @return A score between 0.0 and 1.0 summarizing how much the data clusters. | ||||
| #' | ||||
| #' @export | ||||
| clusteriness <- function(data, span = 100000, weight = 0.7, n_clusters = NULL) { | ||||
| clusteriness <- function(data, | ||||
|                          span = 100000, | ||||
|                          weight = 0.7, | ||||
|                          n_clusters = NULL, | ||||
|                          relation = NULL) { | ||||
|   n <- length(data) | ||||
| 
 | ||||
|   # Return a score of 0.0 if there is just one or no value at all. | ||||
|  | @ -26,6 +33,10 @@ clusteriness <- function(data, span = 100000, weight = 0.7, n_clusters = NULL) { | |||
|     return(0.0) | ||||
|   } | ||||
| 
 | ||||
|   if (is.null(relation)) { | ||||
|     relation <- n | ||||
|   } | ||||
| 
 | ||||
|   # Cluster the data and compute the cluster sizes. | ||||
| 
 | ||||
|   tree <- stats::hclust(stats::dist(data)) | ||||
|  | @ -46,7 +57,7 @@ clusteriness <- function(data, span = 100000, weight = 0.7, n_clusters = NULL) { | |||
|     cluster_size <- cluster_sizes[i] | ||||
| 
 | ||||
|     if (cluster_size >= 2) { | ||||
|       cluster_score <- cluster_size / n | ||||
|       cluster_score <- cluster_size / relation | ||||
|       score <- score + weight^(i - 1) * cluster_score | ||||
|     } | ||||
|   } | ||||
|  |  | |||
|  | @ -4,7 +4,13 @@ | |||
| \alias{clusteriness} | ||||
| \title{Perform a cluster analysis.} | ||||
| \usage{ | ||||
| clusteriness(data, span = 1e+05, weight = 0.7, n_clusters = NULL) | ||||
| clusteriness( | ||||
|   data, | ||||
|   span = 1e+05, | ||||
|   weight = 0.7, | ||||
|   n_clusters = NULL, | ||||
|   relation = NULL | ||||
| ) | ||||
| } | ||||
| \arguments{ | ||||
| \item{data}{The values that should be scored.} | ||||
|  | @ -18,6 +24,10 @@ etc.} | |||
| 
 | ||||
| \item{n_clusters}{Maximum number of clusters that should be taken into | ||||
| account. By default, all clusters will be regarded.} | ||||
| 
 | ||||
| \item{relation}{Number of items that the cluster size should be based on. | ||||
| This should always at least the length of the data. By default, the length | ||||
| of the data is used.} | ||||
| } | ||||
| \value{ | ||||
| A score between 0.0 and 1.0 summarizing how much the data clusters. | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue