mirror of
https://github.com/johrpan/geposanui.git
synced 2025-10-26 11:17:24 +01:00
Simplify data format and correct scale labels
This commit is contained in:
parent
495524a0ac
commit
400ca776e0
2 changed files with 22 additions and 28 deletions
18
data.R
18
data.R
|
|
@ -41,7 +41,11 @@ load_data <- function(path) {
|
|||
median_distance = numeric()
|
||||
)
|
||||
|
||||
distances <- data.table(geneid = integer())
|
||||
distances <- data.table(
|
||||
species = character(),
|
||||
gene = integer(),
|
||||
distance = integer()
|
||||
)
|
||||
|
||||
# Each file will contain data on one species.
|
||||
file_names <- list.files(paste(path, "genomes", sep = "/"))
|
||||
|
|
@ -63,13 +67,19 @@ load_data <- function(path) {
|
|||
median_distance = median(species_distances[, dist])
|
||||
)))
|
||||
|
||||
# Column names have to be unique for each species.
|
||||
setnames(species_distances, "dist", species_id)
|
||||
species_distances <- data.table(
|
||||
species = species_id,
|
||||
gene = species_distances[, geneid],
|
||||
distance = species_distances[, dist]
|
||||
)
|
||||
|
||||
distances <- merge(distances, species_distances, all = TRUE)
|
||||
distances <- rbindlist(list(distances, species_distances))
|
||||
}
|
||||
}
|
||||
|
||||
# Order species by there median distance.
|
||||
setorder(species, median_distance)
|
||||
|
||||
list(
|
||||
genes = genes,
|
||||
species = species,
|
||||
|
|
|
|||
|
|
@ -3,44 +3,28 @@ library(ggplot2)
|
|||
|
||||
#' Draw a scatter plot containing gene positions.
|
||||
scatter_plot <- function(gene_ids, data) {
|
||||
species <- data$species
|
||||
setorder(species, median_distance)
|
||||
|
||||
distances <- data$distances[geneid %in% gene_ids]
|
||||
|
||||
plot <- ggplot() +
|
||||
scale_x_continuous(
|
||||
scale_x_discrete(
|
||||
name = "Species",
|
||||
breaks = seq_len(nrow(species)),
|
||||
labels = species$label
|
||||
breaks = data$species$id,
|
||||
labels = data$species$label
|
||||
) +
|
||||
scale_y_continuous(name = "Distance to telomeres [Mbp]") +
|
||||
geom_line(
|
||||
species,
|
||||
mapping = aes(
|
||||
x = as.numeric(rownames(species)),
|
||||
y = median_distance / 1000000
|
||||
)
|
||||
)
|
||||
scale_y_continuous(name = "Distance to telomeres [Mbp]")
|
||||
|
||||
colors <- rainbow(length(gene_ids))
|
||||
|
||||
for (i in seq_len(length(gene_ids))) {
|
||||
gene_id <- gene_ids[i]
|
||||
|
||||
gene_distances <- data.table(
|
||||
index = as.numeric(rownames(species)),
|
||||
distance = unlist(distances[geneid == gene_id, -1])
|
||||
)
|
||||
|
||||
plot <- plot +
|
||||
geom_point(
|
||||
gene_distances,
|
||||
data$distances[gene == gene_id],
|
||||
mapping = aes(
|
||||
x = index,
|
||||
x = species,
|
||||
y = distance / 1000000,
|
||||
),
|
||||
color = colors[i]
|
||||
color = colors[i],
|
||||
size = 4
|
||||
)
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue