2021-09-18 23:10:52 +02:00
|
|
|
source("clustering.R")
|
|
|
|
|
source("correlation.R")
|
|
|
|
|
source("input.R")
|
2021-10-05 18:30:12 +02:00
|
|
|
source("neural.R")
|
2021-09-18 23:10:52 +02:00
|
|
|
source("util.R")
|
|
|
|
|
|
|
|
|
|
# Load input data
|
|
|
|
|
|
2021-09-19 12:07:59 +02:00
|
|
|
species <- run_cached("input_species", retrieve_species)
|
|
|
|
|
genes <- run_cached("input_genes", retrieve_genes)
|
2021-09-18 23:10:52 +02:00
|
|
|
|
|
|
|
|
distances <- run_cached(
|
2021-09-19 12:07:59 +02:00
|
|
|
"input_distances",
|
2021-09-18 23:10:52 +02:00
|
|
|
retrieve_distances,
|
|
|
|
|
species[, id],
|
|
|
|
|
genes[, id]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Load processed data
|
|
|
|
|
|
|
|
|
|
all_species <- species[, id]
|
|
|
|
|
replicative_species <- species[replicative == TRUE, id]
|
|
|
|
|
all_genes <- genes[, id]
|
|
|
|
|
tpe_old_genes <- genes[suggested | verified == TRUE, id]
|
|
|
|
|
|
|
|
|
|
clustering_all <- run_cached(
|
2021-09-19 12:07:59 +02:00
|
|
|
"clustering_all",
|
2021-09-18 23:10:52 +02:00
|
|
|
process_clustering,
|
|
|
|
|
distances,
|
|
|
|
|
all_species,
|
|
|
|
|
all_genes
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
clustering_replicative <- run_cached(
|
2021-09-19 12:07:59 +02:00
|
|
|
"clustering_replicative",
|
2021-09-18 23:10:52 +02:00
|
|
|
process_clustering,
|
|
|
|
|
distances,
|
|
|
|
|
replicative_species,
|
|
|
|
|
all_genes
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
correlation_all <- run_cached(
|
2021-09-19 12:07:59 +02:00
|
|
|
"correlation_all",
|
2021-09-18 23:10:52 +02:00
|
|
|
process_correlation,
|
|
|
|
|
distances,
|
|
|
|
|
all_species,
|
|
|
|
|
all_genes,
|
|
|
|
|
tpe_old_genes
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
correlation_replicative <- run_cached(
|
2021-09-19 12:07:59 +02:00
|
|
|
"correlation_replicative",
|
2021-09-18 23:10:52 +02:00
|
|
|
process_correlation,
|
|
|
|
|
distances,
|
|
|
|
|
replicative_species,
|
|
|
|
|
all_genes,
|
|
|
|
|
tpe_old_genes
|
|
|
|
|
)
|
|
|
|
|
|
2021-10-05 18:30:12 +02:00
|
|
|
neural_all <- run_cached(
|
|
|
|
|
"neural_all",
|
|
|
|
|
process_neural,
|
|
|
|
|
distances,
|
|
|
|
|
all_species,
|
|
|
|
|
all_genes,
|
|
|
|
|
tpe_old_genes
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
neural_replicative <- run_cached(
|
|
|
|
|
"neural_replicative",
|
|
|
|
|
process_neural,
|
|
|
|
|
distances,
|
|
|
|
|
replicative_species,
|
|
|
|
|
all_genes,
|
|
|
|
|
tpe_old_genes
|
|
|
|
|
)
|
|
|
|
|
|
2021-09-18 23:10:52 +02:00
|
|
|
# Merge processed data as well as gene information.
|
|
|
|
|
|
|
|
|
|
results_all <- merge(
|
|
|
|
|
genes,
|
2021-10-11 11:08:50 +02:00
|
|
|
distances[, .(n_species = .N), by = "gene"],
|
|
|
|
|
by.x = "id",
|
|
|
|
|
by.y = "gene"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
results_all <- merge(
|
|
|
|
|
results_all,
|
2021-09-18 23:10:52 +02:00
|
|
|
clustering_all,
|
|
|
|
|
by.x = "id",
|
|
|
|
|
by.y = "gene"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
results_all <- merge(
|
|
|
|
|
results_all,
|
|
|
|
|
correlation_all,
|
|
|
|
|
by.x = "id",
|
|
|
|
|
by.y = "gene"
|
|
|
|
|
)
|
|
|
|
|
|
2021-10-05 18:30:12 +02:00
|
|
|
results_all <- merge(
|
|
|
|
|
results_all,
|
|
|
|
|
neural_all,
|
|
|
|
|
by.x = "id",
|
|
|
|
|
by.y = "gene"
|
|
|
|
|
)
|
|
|
|
|
|
2021-09-18 23:10:52 +02:00
|
|
|
results_replicative <- merge(
|
|
|
|
|
genes,
|
2021-10-11 11:08:50 +02:00
|
|
|
distances[
|
|
|
|
|
species %chin% species_ids_replicative,
|
|
|
|
|
.(n_species = .N),
|
|
|
|
|
by = gene
|
|
|
|
|
],
|
|
|
|
|
by.x = "id",
|
|
|
|
|
by.y = "gene"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
results_replicative <- merge(
|
|
|
|
|
results_replicative,
|
2021-09-18 23:10:52 +02:00
|
|
|
clustering_replicative,
|
|
|
|
|
by.x = "id",
|
|
|
|
|
by.y = "gene"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
results_replicative <- merge(
|
|
|
|
|
results_replicative,
|
|
|
|
|
correlation_replicative,
|
|
|
|
|
by.x = "id",
|
|
|
|
|
by.y = "gene"
|
|
|
|
|
)
|
|
|
|
|
|
2021-10-05 18:30:12 +02:00
|
|
|
results_replicative <- merge(
|
|
|
|
|
results_replicative,
|
|
|
|
|
neural_replicative,
|
|
|
|
|
by.x = "id",
|
|
|
|
|
by.y = "gene"
|
|
|
|
|
)
|
|
|
|
|
|
2021-09-18 23:10:52 +02:00
|
|
|
# Rename `id` columns to `gene`.
|
|
|
|
|
|
|
|
|
|
setnames(results_all, "id", "gene")
|
|
|
|
|
setnames(results_replicative, "id", "gene")
|