mirror of
https://github.com/johrpan/geposan.git
synced 2025-10-26 18:57:25 +01:00
clusteriness: Remove n_clusters and relation
This commit is contained in:
parent
0ae6836d86
commit
260705785a
3 changed files with 8 additions and 50 deletions
|
|
@ -12,20 +12,11 @@
|
||||||
#' relation to the previous one. For example, if `weight` is 0.7 (the
|
#' relation to the previous one. For example, if `weight` is 0.7 (the
|
||||||
#' default), the first cluster will weigh 1.0, the second 0.7, the third 0.49
|
#' default), the first cluster will weigh 1.0, the second 0.7, the third 0.49
|
||||||
#' etc.
|
#' etc.
|
||||||
#' @param n_clusters Maximum number of clusters that should be taken into
|
|
||||||
#' account. By default, all clusters will be regarded.
|
|
||||||
#' @param relation Number of items that the cluster size should be based on.
|
|
||||||
#' This should always at least the length of the data. By default, the length
|
|
||||||
#' of the data is used.
|
|
||||||
#'
|
#'
|
||||||
#' @return A score between 0.0 and 1.0 summarizing how much the data clusters.
|
#' @return A score between 0.0 and 1.0 summarizing how much the data clusters.
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
clusteriness <- function(data,
|
clusteriness <- function(data, span = 100000, weight = 0.7) {
|
||||||
span = 100000,
|
|
||||||
weight = 0.7,
|
|
||||||
n_clusters = NULL,
|
|
||||||
relation = NULL) {
|
|
||||||
n <- length(data)
|
n <- length(data)
|
||||||
|
|
||||||
# Return a score of 0.0 if there is just one or no value at all.
|
# Return a score of 0.0 if there is just one or no value at all.
|
||||||
|
|
@ -33,10 +24,6 @@ clusteriness <- function(data,
|
||||||
return(0.0)
|
return(0.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is.null(relation)) {
|
|
||||||
relation <- n
|
|
||||||
}
|
|
||||||
|
|
||||||
# Cluster the data and compute the cluster sizes.
|
# Cluster the data and compute the cluster sizes.
|
||||||
|
|
||||||
tree <- stats::hclust(stats::dist(data))
|
tree <- stats::hclust(stats::dist(data))
|
||||||
|
|
@ -48,17 +35,11 @@ clusteriness <- function(data,
|
||||||
score <- 0.0
|
score <- 0.0
|
||||||
|
|
||||||
for (i in seq_along(cluster_sizes)) {
|
for (i in seq_along(cluster_sizes)) {
|
||||||
if (!is.null(n_clusters)) {
|
|
||||||
if (i > n_clusters) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cluster_size <- cluster_sizes[i]
|
cluster_size <- cluster_sizes[i]
|
||||||
|
|
||||||
if (cluster_size >= 2) {
|
if (cluster_size >= 2) {
|
||||||
cluster_score <- cluster_size / relation
|
cluster_score <- cluster_size / n
|
||||||
score <- score + weight^(i - 1) * cluster_score
|
score <- score + weight^(i - 1) * cluster_score # nolint
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -85,9 +66,7 @@ clustering <- function(id = "clustering",
|
||||||
name = "Clustering",
|
name = "Clustering",
|
||||||
description = "Clustering of genes",
|
description = "Clustering of genes",
|
||||||
span = 100000,
|
span = 100000,
|
||||||
weight = 0.7,
|
weight = 0.7) {
|
||||||
n_clusters = NULL,
|
|
||||||
relation = NULL) {
|
|
||||||
method(
|
method(
|
||||||
id = id,
|
id = id,
|
||||||
name = name,
|
name = name,
|
||||||
|
|
@ -98,7 +77,7 @@ clustering <- function(id = "clustering",
|
||||||
|
|
||||||
cached(
|
cached(
|
||||||
"clustering",
|
"clustering",
|
||||||
c(species_ids, gene_ids, span, weight, n_clusters, relation),
|
c(species_ids, gene_ids, span, weight),
|
||||||
{ # nolint
|
{ # nolint
|
||||||
scores <- data.table(gene = gene_ids)
|
scores <- data.table(gene = gene_ids)
|
||||||
|
|
||||||
|
|
@ -112,13 +91,7 @@ clustering <- function(id = "clustering",
|
||||||
compute <- function(gene_id) {
|
compute <- function(gene_id) {
|
||||||
data <- distances[gene == gene_id, distance]
|
data <- distances[gene == gene_id, distance]
|
||||||
|
|
||||||
score <- clusteriness(
|
score <- clusteriness(data, span = span, weight = weight)
|
||||||
data,
|
|
||||||
span = span,
|
|
||||||
weight = weight,
|
|
||||||
n_clusters = n_clusters,
|
|
||||||
relation = relation
|
|
||||||
)
|
|
||||||
|
|
||||||
genes_done <<- genes_done + 1
|
genes_done <<- genes_done + 1
|
||||||
progress(genes_done / genes_total)
|
progress(genes_done / genes_total)
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,7 @@
|
||||||
\alias{clusteriness}
|
\alias{clusteriness}
|
||||||
\title{Perform a cluster analysis.}
|
\title{Perform a cluster analysis.}
|
||||||
\usage{
|
\usage{
|
||||||
clusteriness(
|
clusteriness(data, span = 1e+05, weight = 0.7)
|
||||||
data,
|
|
||||||
span = 1e+05,
|
|
||||||
weight = 0.7,
|
|
||||||
n_clusters = NULL,
|
|
||||||
relation = NULL
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{data}{The values that should be scored.}
|
\item{data}{The values that should be scored.}
|
||||||
|
|
@ -21,13 +15,6 @@ clusteriness(
|
||||||
relation to the previous one. For example, if \code{weight} is 0.7 (the
|
relation to the previous one. For example, if \code{weight} is 0.7 (the
|
||||||
default), the first cluster will weigh 1.0, the second 0.7, the third 0.49
|
default), the first cluster will weigh 1.0, the second 0.7, the third 0.49
|
||||||
etc.}
|
etc.}
|
||||||
|
|
||||||
\item{n_clusters}{Maximum number of clusters that should be taken into
|
|
||||||
account. By default, all clusters will be regarded.}
|
|
||||||
|
|
||||||
\item{relation}{Number of items that the cluster size should be based on.
|
|
||||||
This should always at least the length of the data. By default, the length
|
|
||||||
of the data is used.}
|
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
A score between 0.0 and 1.0 summarizing how much the data clusters.
|
A score between 0.0 and 1.0 summarizing how much the data clusters.
|
||||||
|
|
|
||||||
|
|
@ -9,9 +9,7 @@ clustering(
|
||||||
name = "Clustering",
|
name = "Clustering",
|
||||||
description = "Clustering of genes",
|
description = "Clustering of genes",
|
||||||
span = 1e+05,
|
span = 1e+05,
|
||||||
weight = 0.7,
|
weight = 0.7
|
||||||
n_clusters = NULL,
|
|
||||||
relation = NULL
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue