geposanui/init.R

93 lines
1.6 KiB
R
Raw Normal View History

2021-09-18 23:10:52 +02:00
source("clustering.R")
source("correlation.R")
source("input.R")
source("util.R")
# Load input data
2021-09-19 12:07:59 +02:00
species <- run_cached("input_species", retrieve_species)
genes <- run_cached("input_genes", retrieve_genes)
2021-09-18 23:10:52 +02:00
distances <- run_cached(
2021-09-19 12:07:59 +02:00
"input_distances",
2021-09-18 23:10:52 +02:00
retrieve_distances,
species[, id],
genes[, id]
)
# Load processed data
all_species <- species[, id]
replicative_species <- species[replicative == TRUE, id]
all_genes <- genes[, id]
tpe_old_genes <- genes[suggested | verified == TRUE, id]
clustering_all <- run_cached(
2021-09-19 12:07:59 +02:00
"clustering_all",
2021-09-18 23:10:52 +02:00
process_clustering,
distances,
all_species,
all_genes
)
clustering_replicative <- run_cached(
2021-09-19 12:07:59 +02:00
"clustering_replicative",
2021-09-18 23:10:52 +02:00
process_clustering,
distances,
replicative_species,
all_genes
)
correlation_all <- run_cached(
2021-09-19 12:07:59 +02:00
"correlation_all",
2021-09-18 23:10:52 +02:00
process_correlation,
distances,
all_species,
all_genes,
tpe_old_genes
)
correlation_replicative <- run_cached(
2021-09-19 12:07:59 +02:00
"correlation_replicative",
2021-09-18 23:10:52 +02:00
process_correlation,
distances,
replicative_species,
all_genes,
tpe_old_genes
)
# Merge processed data as well as gene information.
results_all <- merge(
genes,
clustering_all,
by.x = "id",
by.y = "gene"
)
results_all <- merge(
results_all,
correlation_all,
by.x = "id",
by.y = "gene"
)
results_replicative <- merge(
genes,
clustering_replicative,
by.x = "id",
by.y = "gene"
)
results_replicative <- merge(
results_replicative,
correlation_replicative,
by.x = "id",
by.y = "gene"
)
# Rename `id` columns to `gene`.
setnames(results_all, "id", "gene")
setnames(results_replicative, "id", "gene")