mirror of
https://github.com/johrpan/geposanui.git
synced 2025-10-26 19:27:24 +01:00
Simplify data format and correct scale labels
This commit is contained in:
parent
495524a0ac
commit
400ca776e0
2 changed files with 22 additions and 28 deletions
18
data.R
18
data.R
|
|
@ -41,7 +41,11 @@ load_data <- function(path) {
|
||||||
median_distance = numeric()
|
median_distance = numeric()
|
||||||
)
|
)
|
||||||
|
|
||||||
distances <- data.table(geneid = integer())
|
distances <- data.table(
|
||||||
|
species = character(),
|
||||||
|
gene = integer(),
|
||||||
|
distance = integer()
|
||||||
|
)
|
||||||
|
|
||||||
# Each file will contain data on one species.
|
# Each file will contain data on one species.
|
||||||
file_names <- list.files(paste(path, "genomes", sep = "/"))
|
file_names <- list.files(paste(path, "genomes", sep = "/"))
|
||||||
|
|
@ -63,13 +67,19 @@ load_data <- function(path) {
|
||||||
median_distance = median(species_distances[, dist])
|
median_distance = median(species_distances[, dist])
|
||||||
)))
|
)))
|
||||||
|
|
||||||
# Column names have to be unique for each species.
|
species_distances <- data.table(
|
||||||
setnames(species_distances, "dist", species_id)
|
species = species_id,
|
||||||
|
gene = species_distances[, geneid],
|
||||||
|
distance = species_distances[, dist]
|
||||||
|
)
|
||||||
|
|
||||||
distances <- merge(distances, species_distances, all = TRUE)
|
distances <- rbindlist(list(distances, species_distances))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Order species by there median distance.
|
||||||
|
setorder(species, median_distance)
|
||||||
|
|
||||||
list(
|
list(
|
||||||
genes = genes,
|
genes = genes,
|
||||||
species = species,
|
species = species,
|
||||||
|
|
|
||||||
|
|
@ -3,44 +3,28 @@ library(ggplot2)
|
||||||
|
|
||||||
#' Draw a scatter plot containing gene positions.
|
#' Draw a scatter plot containing gene positions.
|
||||||
scatter_plot <- function(gene_ids, data) {
|
scatter_plot <- function(gene_ids, data) {
|
||||||
species <- data$species
|
|
||||||
setorder(species, median_distance)
|
|
||||||
|
|
||||||
distances <- data$distances[geneid %in% gene_ids]
|
|
||||||
|
|
||||||
plot <- ggplot() +
|
plot <- ggplot() +
|
||||||
scale_x_continuous(
|
scale_x_discrete(
|
||||||
name = "Species",
|
name = "Species",
|
||||||
breaks = seq_len(nrow(species)),
|
breaks = data$species$id,
|
||||||
labels = species$label
|
labels = data$species$label
|
||||||
) +
|
) +
|
||||||
scale_y_continuous(name = "Distance to telomeres [Mbp]") +
|
scale_y_continuous(name = "Distance to telomeres [Mbp]")
|
||||||
geom_line(
|
|
||||||
species,
|
|
||||||
mapping = aes(
|
|
||||||
x = as.numeric(rownames(species)),
|
|
||||||
y = median_distance / 1000000
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
colors <- rainbow(length(gene_ids))
|
colors <- rainbow(length(gene_ids))
|
||||||
|
|
||||||
for (i in seq_len(length(gene_ids))) {
|
for (i in seq_len(length(gene_ids))) {
|
||||||
gene_id <- gene_ids[i]
|
gene_id <- gene_ids[i]
|
||||||
|
|
||||||
gene_distances <- data.table(
|
|
||||||
index = as.numeric(rownames(species)),
|
|
||||||
distance = unlist(distances[geneid == gene_id, -1])
|
|
||||||
)
|
|
||||||
|
|
||||||
plot <- plot +
|
plot <- plot +
|
||||||
geom_point(
|
geom_point(
|
||||||
gene_distances,
|
data$distances[gene == gene_id],
|
||||||
mapping = aes(
|
mapping = aes(
|
||||||
x = index,
|
x = species,
|
||||||
y = distance / 1000000,
|
y = distance / 1000000,
|
||||||
),
|
),
|
||||||
color = colors[i]
|
color = colors[i],
|
||||||
|
size = 4
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue