Add new correlation method

This commit is contained in:
Elias Projahn 2021-09-18 23:10:52 +02:00
parent 1cea6c3631
commit 9d6b2e4d50
4 changed files with 176 additions and 63 deletions

97
init.R Normal file
View file

@ -0,0 +1,97 @@
source("clustering.R")
source("correlation.R")
source("input.R")
source("util.R")
# Load input data
species <- run_cached("input/species", retrieve_species)
genes <- run_cached("input/genes", retrieve_genes)
distances <- run_cached(
"input/distances",
retrieve_distances,
species[, id],
genes[, id]
)
# Load processed data
all_species <- species[, id]
replicative_species <- species[replicative == TRUE, id]
all_genes <- genes[, id]
tpe_old_genes <- genes[suggested | verified == TRUE, id]
clustering_all <- run_cached(
"all_species/clustering",
process_clustering,
distances,
all_species,
all_genes
)
clustering_replicative <- run_cached(
"replicative_species/clustering",
process_clustering,
distances,
replicative_species,
all_genes
)
correlation_all <- run_cached(
"all_species/correlation",
process_correlation,
distances,
all_species,
all_genes,
tpe_old_genes
)
correlation_replicative <- run_cached(
"replicative_species/correlation",
process_correlation,
distances,
replicative_species,
all_genes,
tpe_old_genes
)
# Merge processed data as well as gene information.
results_all <- merge(
genes,
clustering_all,
by.x = "id",
by.y = "gene"
)
results_all <- merge(
results_all,
correlation_all,
by.x = "id",
by.y = "gene"
)
results_replicative <- merge(
genes,
clustering_replicative,
by.x = "id",
by.y = "gene"
)
results_replicative <- merge(
results_replicative,
correlation_replicative,
by.x = "id",
by.y = "gene"
)
# Rename `id` columns to `gene`.
setnames(results_all, "id", "gene")
setnames(results_replicative, "id", "gene")
# Order results by cluster length descendingly as a start.
setorder(results_all, -cluster_length)
setorder(results_replicative, -cluster_length)