geposanui/init.R

144 lines
2.5 KiB
R
Raw Normal View History

2021-09-18 23:10:52 +02:00
source("clustering.R")
source("correlation.R")
source("input.R")
2021-10-05 18:30:12 +02:00
source("neural.R")
2021-09-18 23:10:52 +02:00
source("util.R")
# Load input data
2021-09-19 12:07:59 +02:00
species <- run_cached("input_species", retrieve_species)
genes <- run_cached("input_genes", retrieve_genes)
2021-09-18 23:10:52 +02:00
distances <- run_cached(
2021-09-19 12:07:59 +02:00
"input_distances",
2021-09-18 23:10:52 +02:00
retrieve_distances,
species[, id],
genes[, id]
)
# Load processed data
all_species <- species[, id]
replicative_species <- species[replicative == TRUE, id]
all_genes <- genes[, id]
tpe_old_genes <- genes[suggested | verified == TRUE, id]
clustering_all <- run_cached(
2021-09-19 12:07:59 +02:00
"clustering_all",
2021-09-18 23:10:52 +02:00
process_clustering,
distances,
all_species,
all_genes
)
clustering_replicative <- run_cached(
2021-09-19 12:07:59 +02:00
"clustering_replicative",
2021-09-18 23:10:52 +02:00
process_clustering,
distances,
replicative_species,
all_genes
)
correlation_all <- run_cached(
2021-09-19 12:07:59 +02:00
"correlation_all",
2021-09-18 23:10:52 +02:00
process_correlation,
distances,
all_species,
all_genes,
tpe_old_genes
)
correlation_replicative <- run_cached(
2021-09-19 12:07:59 +02:00
"correlation_replicative",
2021-09-18 23:10:52 +02:00
process_correlation,
distances,
replicative_species,
all_genes,
tpe_old_genes
)
2021-10-05 18:30:12 +02:00
neural_all <- run_cached(
"neural_all",
process_neural,
distances,
all_species,
all_genes,
tpe_old_genes
)
neural_replicative <- run_cached(
"neural_replicative",
process_neural,
distances,
replicative_species,
all_genes,
tpe_old_genes
)
2021-09-18 23:10:52 +02:00
# Merge processed data as well as gene information.
results_all <- merge(
genes,
distances[, .(n_species = .N), by = "gene"],
by.x = "id",
by.y = "gene"
)
results_all <- merge(
results_all,
2021-09-18 23:10:52 +02:00
clustering_all,
by.x = "id",
by.y = "gene"
)
results_all <- merge(
results_all,
correlation_all,
by.x = "id",
by.y = "gene"
)
2021-10-05 18:30:12 +02:00
results_all <- merge(
results_all,
neural_all,
by.x = "id",
by.y = "gene"
)
2021-09-18 23:10:52 +02:00
results_replicative <- merge(
genes,
distances[
species %chin% species_ids_replicative,
.(n_species = .N),
by = gene
],
by.x = "id",
by.y = "gene"
)
results_replicative <- merge(
results_replicative,
2021-09-18 23:10:52 +02:00
clustering_replicative,
by.x = "id",
by.y = "gene"
)
results_replicative <- merge(
results_replicative,
correlation_replicative,
by.x = "id",
by.y = "gene"
)
2021-10-05 18:30:12 +02:00
results_replicative <- merge(
results_replicative,
neural_replicative,
by.x = "id",
by.y = "gene"
)
2021-09-18 23:10:52 +02:00
# Rename `id` columns to `gene`.
setnames(results_all, "id", "gene")
setnames(results_replicative, "id", "gene")