Simplify data format and correct scale labels

This commit is contained in:
Elias Projahn 2021-08-16 17:21:01 +02:00
parent 495524a0ac
commit 400ca776e0
2 changed files with 22 additions and 28 deletions

18
data.R
View file

@ -41,7 +41,11 @@ load_data <- function(path) {
median_distance = numeric()
)
distances <- data.table(geneid = integer())
distances <- data.table(
species = character(),
gene = integer(),
distance = integer()
)
# Each file will contain data on one species.
file_names <- list.files(paste(path, "genomes", sep = "/"))
@ -63,13 +67,19 @@ load_data <- function(path) {
median_distance = median(species_distances[, dist])
)))
# Column names have to be unique for each species.
setnames(species_distances, "dist", species_id)
species_distances <- data.table(
species = species_id,
gene = species_distances[, geneid],
distance = species_distances[, dist]
)
distances <- merge(distances, species_distances, all = TRUE)
distances <- rbindlist(list(distances, species_distances))
}
}
# Order species by there median distance.
setorder(species, median_distance)
list(
genes = genes,
species = species,