clustering: Expose clusteriness parameters

This commit is contained in:
Elias Projahn 2022-06-22 11:24:30 +02:00
parent c6ca93b009
commit ab545a415c
2 changed files with 49 additions and 26 deletions

View file

@ -68,7 +68,9 @@ clusteriness <- function(data,
#' Process genes clustering their distance to telomeres.
#'
#' The result will be cached and can be reused for different presets, because
#' it is independent of the reference genes in use.
#' it is independent of the reference genes in use. Most parameters are exposed
#' for the [clusteriness()] function. See its documentation for more
#' information.
#'
#' @param id Unique ID for the method and its results.
#' @param name Human readable name for the method.
@ -80,8 +82,12 @@ clusteriness <- function(data,
#'
#' @export
clustering <- function(id = "clustering",
name = "Clustering",
description = "Clustering of genes") {
name = "Clustering",
description = "Clustering of genes",
span = 100000,
weight = 0.7,
n_clusters = NULL,
relation = NULL) {
method(
id = id,
name = name,
@ -90,33 +96,44 @@ clustering <- function(id = "clustering",
species_ids <- preset$species_ids
gene_ids <- preset$gene_ids
cached("clustering", c(species_ids, gene_ids), {
scores <- data.table(gene = gene_ids)
cached(
"clustering",
c(species_ids, gene_ids, span, weight, n_clusters, relation),
{ # nolint
scores <- data.table(gene = gene_ids)
# Prefilter the input data by species.
distances <- geposan::distances[species %chin% species_ids]
# Prefilter the input data by species.
distances <- geposan::distances[species %chin% species_ids]
genes_done <- 0
genes_total <- length(gene_ids)
genes_done <- 0
genes_total <- length(gene_ids)
# Perform the cluster analysis for one gene.
compute <- function(gene_id) {
data <- distances[gene == gene_id, distance]
score <- clusteriness(data)
# Perform the cluster analysis for one gene.
compute <- function(gene_id) {
data <- distances[gene == gene_id, distance]
genes_done <<- genes_done + 1
progress(genes_done / genes_total)
score <- clusteriness(
data,
span = span,
weight = weight,
n_clusters = n_clusters,
relation = relation
)
score
genes_done <<- genes_done + 1
progress(genes_done / genes_total)
score
}
scores[, score := compute(gene), by = gene]
result(
method = "clustering",
scores = scores
)
}
scores[, score := compute(gene), by = gene]
result(
method = "clustering",
scores = scores
)
})
)
}
)
}

View file

@ -7,7 +7,11 @@
clustering(
id = "clustering",
name = "Clustering",
description = "Clustering of genes"
description = "Clustering of genes",
span = 1e+05,
weight = 0.7,
n_clusters = NULL,
relation = NULL
)
}
\arguments{
@ -22,7 +26,9 @@ An object of class \code{geposan_method}.
}
\description{
The result will be cached and can be reused for different presets, because
it is independent of the reference genes in use.
it is independent of the reference genes in use. Most parameters are exposed
for the \code{\link[=clusteriness]{clusteriness()}} function. See its documentation for more
information.
}
\seealso{
\code{\link[=clusteriness]{clusteriness()}}