mirror of
https://github.com/johrpan/geposanui.git
synced 2025-10-26 19:27:24 +01:00
Base clusteriness on species count
This commit is contained in:
parent
8b727a0329
commit
397b8d0ba2
2 changed files with 7 additions and 7 deletions
12
clustering.R
12
clustering.R
|
|
@ -7,9 +7,10 @@ library(rlog)
|
||||||
#' This function will cluster the data using `hclust` and `cutree` (with the
|
#' This function will cluster the data using `hclust` and `cutree` (with the
|
||||||
#' specified height). Every cluster with at least two members qualifies for
|
#' specified height). Every cluster with at least two members qualifies for
|
||||||
#' further analysis. Clusters are then ranked based on their size in relation
|
#' further analysis. Clusters are then ranked based on their size in relation
|
||||||
#' to the total number of values. The return value is a final score between
|
#' to the total number of possible values (`n`). The return value is a final
|
||||||
#' zero and one. Lower ranking clusters contribute less to this score.
|
#' score between zero and one. Lower ranking clusters contribute less to this
|
||||||
clusteriness <- function(data, height = 1000000) {
|
#' score.
|
||||||
|
clusteriness <- function(data, n, height = 1000000) {
|
||||||
# Cluster the data and compute the cluster sizes.
|
# Cluster the data and compute the cluster sizes.
|
||||||
|
|
||||||
tree <- hclust(dist(data))
|
tree <- hclust(dist(data))
|
||||||
|
|
@ -19,7 +20,6 @@ clusteriness <- function(data, height = 1000000) {
|
||||||
# Compute the "cluteriness" score.
|
# Compute the "cluteriness" score.
|
||||||
|
|
||||||
score <- 0.0
|
score <- 0.0
|
||||||
n <- length(data)
|
|
||||||
|
|
||||||
for (i in seq_along(cluster_sizes)) {
|
for (i in seq_along(cluster_sizes)) {
|
||||||
cluster_size <- cluster_sizes[i]
|
cluster_size <- cluster_sizes[i]
|
||||||
|
|
@ -70,11 +70,11 @@ process_clustering <- function(distances, species_ids, gene_ids) {
|
||||||
.(species, distance)
|
.(species, distance)
|
||||||
]
|
]
|
||||||
|
|
||||||
if (data[, .N] < 12) {
|
if (data[, .N] < 10) {
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
score <- clusteriness(data[, distance])
|
score <- clusteriness(data[, distance], length(species_ids))
|
||||||
|
|
||||||
results[
|
results[
|
||||||
gene == gene_id,
|
gene == gene_id,
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ process_correlation <- function(distances, species_ids, gene_ids,
|
||||||
gene_id <- gene_ids[i]
|
gene_id <- gene_ids[i]
|
||||||
gene_distances <- distances[gene == gene_id]
|
gene_distances <- distances[gene == gene_id]
|
||||||
|
|
||||||
if (nrow(gene_distances) < 12) {
|
if (nrow(gene_distances) < 10) {
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue