geposan/R/plots.R

802 lines
20 KiB
R
Raw Normal View History

2022-03-01 15:49:04 +01:00
#' Base color for the plots.
#' @noRd
base_color <- function() "#1964bf"
#' Transparent version of the base color.
#' @noRd
base_color_transparent <- function() "#1964bf80"
#' Color palette for gene sets.
#' @noRd
gene_set_color <- function(index) {
2022-05-26 12:42:19 +02:00
c("#FF7F00", "#4DAF4A", "#984EA3")[index]
2022-03-01 15:49:04 +01:00
}
2021-11-15 15:12:13 +01:00
#' Plot gene positions.
#'
#' This function requires the package `plotly`.
#'
#' @param species_ids IDs of species to show in the plot.
#' @param gene_sets A list of gene sets (containing vectors of gene IDs) that
2021-12-02 17:23:18 +01:00
#' will be highlighted in the plot. The names will be used as labels.
2022-05-28 17:01:31 +02:00
#' @param reference_gene_ids Optionally, a set of reference genes that will be
#' used to reorder the species.
2021-11-15 15:12:13 +01:00
#'
#' @export
2022-05-28 17:01:31 +02:00
plot_positions <- function(species_ids, gene_sets, reference_gene_ids = NULL) {
2022-05-26 12:42:19 +02:00
if (!requireNamespace("plotly", quietly = TRUE)) {
stop("Please install \"plotly\" to use this function.")
}
# Prefilter data by species.
data <- geposan::distances[species %chin% species_ids]
species_max_distance <- data[,
.(max_distance = max(distance)),
by = species
]
# Prefilter species.
species <- geposan::species[id %chin% species_ids]
2022-05-28 17:01:31 +02:00
# Sort species if reference genes have been provided.
if (!is.null(reference_gene_ids)) {
species_median_distance <- data[
gene %chin% reference_gene_ids,
.(median_distance = as.numeric(stats::median(distance))),
by = species
]
species <- merge(
species,
species_median_distance,
by.x = "id",
by.y = "species",
all.x = TRUE
)
setorder(species, median_distance)
}
2022-05-26 12:42:19 +02:00
plot <- plotly::plot_ly() |>
plotly::layout(
2022-05-28 16:15:33 +02:00
xaxis = list(title = "Distance to telomeres [Bp]"),
yaxis = list(
2022-05-26 12:42:19 +02:00
title = "Species",
2022-05-28 17:01:31 +02:00
type = "category",
categoryorder = "array",
categoryarray = species$id,
tickmode = "array",
2022-05-26 12:42:19 +02:00
tickvals = species$id,
ticktext = species$name
),
bargap = 0.9
) |>
plotly::add_bars(
data = species_max_distance,
2022-05-28 16:15:33 +02:00
x = ~max_distance,
y = ~species,
2022-05-26 12:42:19 +02:00
name = "All genes",
marker = list(color = base_color())
)
2021-11-15 15:12:13 +01:00
2022-05-26 12:42:19 +02:00
if (length(gene_sets) > 0) {
# Include gene information which will be used for labeling
gene_set_data <- merge(
data[gene %chin% unlist(gene_sets)],
geposan::genes,
by.x = "gene",
by.y = "id"
)
2021-11-15 15:12:13 +01:00
2022-05-26 12:42:19 +02:00
index <- 1
2021-12-02 17:23:18 +01:00
2022-05-26 12:42:19 +02:00
for (gene_set_name in names(gene_sets)) {
gene_set <- gene_sets[[gene_set_name]]
plot <- plot |> plotly::add_markers(
data = gene_set_data[gene %chin% gene_set],
2022-05-28 16:15:33 +02:00
x = ~distance,
y = ~species,
2022-05-26 12:42:19 +02:00
name = gene_set_name,
text = ~ glue::glue(
"<b>{name}</b><br>",
"{round(distance / 1000000, digits = 2)} MBp"
),
hoverinfo = "text",
marker = list(
size = 5,
2022-05-26 12:42:19 +02:00
color = gene_set_color(index)
2021-12-02 17:23:18 +01:00
)
2022-05-26 12:42:19 +02:00
)
2021-12-02 17:23:18 +01:00
2022-05-26 12:42:19 +02:00
index <- index + 1
2021-11-15 15:12:13 +01:00
}
2022-05-26 12:42:19 +02:00
}
2021-11-15 15:12:13 +01:00
2022-05-26 12:42:19 +02:00
plot
2021-11-15 15:12:13 +01:00
}
2021-12-02 15:28:03 +01:00
#' Plot a side-by-side comparison of multiple rankings.
#'
#' Each ranking's scores will be shown as a vertical violin plot without any
#' additional markings. The gene sets will be shown as markers on top of the
#' density visualization.
#'
#' This function requires the package `plotly`.
#'
#' @param rankings A named list of rankings to display. The names will be shown
#' as labels in the plot.
#' @param gene_sets A named list of vectors of gene IDs to highlight. The names
#' will be used to distinguish the sets and in the legend.
#'
#' @export
plot_rankings <- function(rankings, gene_sets) {
2022-05-26 12:42:19 +02:00
if (!requireNamespace("plotly", quietly = TRUE)) {
stop("Please install \"plotly\" to use this function.")
}
plot <- plotly::plot_ly() |>
plotly::layout(
xaxis = list(tickvals = names(rankings)),
yaxis = list(title = "Score")
)
2021-12-02 15:28:03 +01:00
2022-05-26 12:42:19 +02:00
is_first <- TRUE
for (ranking_name in names(rankings)) {
ranking <- rankings[[ranking_name]]
plot <- plot |> plotly::add_trace(
data = ranking,
x = ranking_name,
y = ~score,
name = "All genes",
type = "violin",
spanmode = "hard",
points = FALSE,
showlegend = is_first,
hoverinfo = "skip",
line = list(color = base_color()),
fillcolor = base_color_transparent()
)
2021-12-02 15:28:03 +01:00
2022-05-26 12:42:19 +02:00
if (length(gene_sets) > 0) {
gene_set_data <- merge(
ranking[gene %chin% unlist(gene_sets)],
geposan::genes,
by.x = "gene",
by.y = "id"
)
index <- 1
for (gene_set_name in names(gene_sets)) {
gene_set <- gene_sets[[gene_set_name]]
plot <- plot |> plotly::add_markers(
data = gene_set_data[gene %chin% gene_set],
x = ranking_name,
y = ~score,
name = gene_set_name,
text = ~ glue::glue(
"<b>{name}</b><br>",
"Score: {round(score, digits = 2)}<br>",
"Rank: {rank}<br>",
"Percentile: {round(percentile * 100, digits = 2)}%"
),
hoverinfo = "text",
showlegend = is_first,
marker = list(
size = 10,
color = gene_set_color(index)
)
2021-12-02 15:28:03 +01:00
)
2022-05-26 12:42:19 +02:00
index <- index + 1
}
2021-12-02 15:28:03 +01:00
}
2022-05-26 12:42:19 +02:00
is_first <- FALSE
}
plot
2021-12-02 15:28:03 +01:00
}
2022-08-14 16:20:54 +02:00
#' Plot a scatter plot to compare two rankings.
#'
#' This function requires the package `plotly`.
#'
#' @param ranking_x The ranking to be shown on the X axis.
#' @param ranking_y The ranking to be shown on the Y axis.
#' @param name_x Title of the X axis.
#' @param name_y Title of the Y axis.
#' @param gene_sets A named list of vectors of gene IDs to highlight. The names
#' will be used to distinguish the sets and in the legend.
#' @param use_ranks Show ranks instead of scores.
#' @param use_sample Limit genes outside of the gene sets to a random sample.
2022-08-14 16:20:54 +02:00
#'
#' @export
plot_rankings_correlation <- function(ranking_x,
ranking_y,
name_x,
name_y,
gene_sets = NULL,
use_ranks = TRUE,
use_sample = TRUE) {
2022-08-14 16:20:54 +02:00
if (!requireNamespace("plotly", quietly = TRUE)) {
stop("Please install \"plotly\" to use this function.")
}
data <- merge(ranking_x, ranking_y, by = "gene")
data <- merge(data, geposan::genes, by.x = "gene", by.y = "id")
data[, `:=`(
x = if (use_ranks) rank.x else score.x,
y = if (use_ranks) rank.y else score.y
)]
model <- stats::lm(y ~ x, data)
model_data <- data.table(x = seq(min(data$x), max(data$x), length = 100))
model_data[, c("y", "lower", "upper") := data.table(
stats::predict(model, model_data, interval = "confidence")
)]
fig <- plotly::plot_ly()
2022-08-14 16:20:54 +02:00
if (use_sample) {
fig <- fig |> plotly::add_markers(
data = data[!gene %chin% unlist(gene_sets)][sample(.N, 1000)],
2022-08-14 16:20:54 +02:00
x = ~x,
y = ~y,
name = "All genes",
marker = list(
color = base_color(),
size = 5
),
hoverinfo = "skip"
)
} else {
fig <- fig |> plotly::add_markers(
data = data,
x = ~x,
y = ~y,
name = "All genes",
marker = list(
color = base_color(),
size = 5
),
text = ~ glue::glue(
"<b>{name}</b>",
"<br>",
"{name_x}: {round(x, digits = 2)} ",
"({round(percentile.x * 100, digits = 2)}%)<br>",
"{name_y}: {round(y, digits = 2)} ",
"({round(percentile.y * 100, digits = 2)}%)"
),
hoverinfo = "text"
)
}
fig <- fig |>
2022-08-14 16:20:54 +02:00
plotly::add_lines(
data = model_data,
x = ~x,
y = ~y,
line = list(color = base_color()),
showlegend = FALSE,
hoverinfo = "skip"
) |>
plotly::add_ribbons(
data = model_data,
x = ~x,
ymin = ~lower,
ymax = ~upper,
fillcolor = base_color_transparent(),
line = list(width = 0),
showlegend = FALSE,
hoverinfo = "skip"
)
gene_set_index <- 1
2022-08-14 16:20:54 +02:00
for (gene_set_name in names(gene_sets)) {
gene_set <- gene_sets[[gene_set_name]]
fig <- fig |>
plotly::add_markers(
data = data[gene %chin% gene_set],
x = ~x,
y = ~y,
name = gene_set_name,
marker = list(
color = gene_set_color(gene_set_index),
size = 8
),
text = ~ glue::glue(
"<b>{name}</b>",
"<br>",
"{name_x}: {round(x, digits = 2)} ",
"({round(percentile.x * 100, digits = 2)}%)<br>",
"{name_y}: {round(y, digits = 2)} ",
"({round(percentile.y * 100, digits = 2)}%)"
),
hoverinfo = "text"
2022-08-14 16:20:54 +02:00
)
gene_set_index <- gene_set_index + 1
}
fig <- fig |> plotly::layout(
xaxis = list(title = name_x),
yaxis = list(title = name_y)
)
if (use_ranks) {
fig <- fig |> plotly::layout(
xaxis = list(autorange = "reversed"),
yaxis = list(autorange = "reversed")
)
}
fig
}
2021-12-02 15:28:03 +01:00
2021-11-06 13:03:36 +01:00
#' Plot a ranking as a scatter plot of scores.
#'
#' This function requires the package `plotly`.
#'
#' @param ranking The ranking to visualize.
2021-12-02 17:23:18 +01:00
#' @param gene_sets A named list of gene sets (containing vectors of gene IDs)
#' that will be highlighted in the plot. The names will be used in the legend.
2021-11-06 13:03:36 +01:00
#'
#' @seealso ranking()
#'
#' @export
plot_scores <- function(ranking, gene_sets = NULL) {
2022-05-26 12:42:19 +02:00
if (!requireNamespace("plotly", quietly = TRUE)) {
stop("Please install \"plotly\" to use this function.")
}
# To speed up rendering, don't show every single gene.
n_ranks <- nrow(ranking)
sample_ranking <- ranking[seq(1, n_ranks, 5)]
plot <- plotly::plot_ly() |>
plotly::add_lines(
data = sample_ranking,
x = ~percentile,
y = ~score,
name = "All genes",
hoverinfo = "skip",
line = list(width = 4, color = base_color())
) |>
plotly::layout(
xaxis = list(
title = "Percentile",
tickformat = ".0%"
),
yaxis = list(title = "Score"),
shapes = list(
vline(0.95),
vline(0.75),
vline(0.50),
vline(0.25),
vline(0.05)
),
annotations = list(
vlineannotation(0.95),
vlineannotation(0.75),
vlineannotation(0.50),
vlineannotation(0.25),
vlineannotation(0.05)
)
2022-05-26 12:42:19 +02:00
)
2021-11-06 13:03:36 +01:00
2022-05-26 12:42:19 +02:00
if (length(gene_sets) > 0) {
# Include gene information which will be used for labeling
gene_set_data <- merge(
ranking[gene %chin% unlist(gene_sets)],
geposan::genes,
by.x = "gene",
by.y = "id"
)
2021-11-06 13:03:36 +01:00
2022-05-26 12:42:19 +02:00
index <- 1
2021-11-06 13:03:36 +01:00
2022-05-26 12:42:19 +02:00
for (gene_set_name in names(gene_sets)) {
gene_set <- gene_sets[[gene_set_name]]
2021-11-15 09:21:37 +01:00
2022-05-26 12:42:19 +02:00
plot <- plot |> plotly::add_markers(
data = gene_set_data[gene %chin% gene_set],
x = ~percentile,
y = ~score,
name = gene_set_name,
text = ~ glue::glue(
"<b>{name}</b><br>",
"Score: {round(score, digits = 2)}<br>",
"Rank: {rank}<br>",
"Percentile: {round(percentile * 100, digits = 2)}%"
),
hoverinfo = "text",
marker = list(
size = 10,
color = gene_set_color(index)
)
)
2021-11-15 09:21:37 +01:00
2022-05-26 12:42:19 +02:00
index <- index + 1
}
}
plot
2021-11-06 13:03:36 +01:00
}
#' Visualize a ranking by comparing gene sets in a boxplot.
#'
#' This function requires the package `plotly`.
#'
#' @param ranking The ranking to visualize.
2021-12-02 17:23:18 +01:00
#' @param gene_sets A named list of gene sets (containing vectors of gene IDs)
#' that will be shown as separate boxes. The names will be used as labels.
2021-11-06 13:03:36 +01:00
#'
#' @seealso ranking()
#'
#' @export
2021-12-02 17:23:18 +01:00
plot_boxplot <- function(ranking, gene_sets = NULL) {
2022-05-26 12:42:19 +02:00
if (!requireNamespace("plotly", quietly = TRUE)) {
stop("Please install \"plotly\" to use this function.")
}
plot <- plotly::plot_ly() |>
plotly::add_boxplot(
data = ranking,
2022-08-18 11:57:46 +02:00
x = ~score,
y = "All genes",
2022-05-26 12:42:19 +02:00
name = "All genes",
showlegend = FALSE,
2022-08-18 11:57:46 +02:00
boxpoints = FALSE,
2022-05-26 12:42:19 +02:00
line = list(color = base_color())
) |>
plotly::layout(
2022-08-18 11:57:46 +02:00
xaxis = list(title = "Score"),
yaxis = list(tickvals = c("All genes", names(gene_sets)))
2022-05-26 12:42:19 +02:00
)
2021-11-06 13:03:36 +01:00
2022-05-26 12:42:19 +02:00
if (length(gene_sets) > 0) {
index <- 1
2021-12-02 17:23:18 +01:00
2022-05-26 12:42:19 +02:00
for (gene_set_name in names(gene_sets)) {
gene_set <- gene_sets[[gene_set_name]]
plot <- plot |> plotly::add_boxplot(
data = ranking[gene %chin% gene_set],
2022-08-18 11:57:46 +02:00
x = ~score,
y = gene_set_name,
2022-05-26 12:42:19 +02:00
name = gene_set_name,
showlegend = FALSE,
2022-08-18 11:57:46 +02:00
boxpoints = FALSE,
2022-05-26 12:42:19 +02:00
line = list(color = gene_set_color(index))
)
index <- index + 1
2021-11-06 13:03:36 +01:00
}
2022-05-26 12:42:19 +02:00
}
2021-11-06 13:03:36 +01:00
2022-05-26 12:42:19 +02:00
plot
2021-11-06 13:03:36 +01:00
}
2021-11-22 14:10:08 +01:00
#' Show the distribution of scores across chromosomes.
#'
#' This function requires the package `plotly`.
#'
#' @param ranking The ranking to visualize.
#'
#' @seealso ranking()
#'
#' @export
plot_chromosomes <- function(ranking) {
2022-05-26 12:42:19 +02:00
if (!requireNamespace("plotly", quietly = TRUE)) {
stop("Please install \"plotly\" to use this function.")
}
2023-11-10 14:43:48 +01:00
data <- merge(
geposan::distances[
species == "9606",
.(
gene,
chromosome,
position = (start_position + end_position) / 2
2022-05-26 12:42:19 +02:00
)
2023-11-10 14:43:48 +01:00
],
ranking,
by = "gene"
)[, .(chromosome, position, score)]
data <- merge(data, geposan::chromosomes, by.x = "chromosome", by.y = "id")
global_max_position <- data[, max(position)]
centromeres <- fread("
name, start, end
1, 122026460, 125184587
2, 92188146, 94090557
3, 90772459, 93655574
4, 49708101, 51743951
5, 46485901, 50059807
6, 58553889, 59829934
7, 58169654, 60828234
8, 44033745, 45877265
9, 43236168, 45518558
10, 39686683, 41593521
11, 51078349, 54425074
12, 34769408, 37185252
13, 16000001, 18051248
14, 16000001, 18173523
15, 17000001, 19725254
16, 36311159, 38280682
17, 22813680, 26885980
18, 15460900, 20861206
19, 24498981, 27190874
20, 26436233, 30038348
21, 10864561, 12915808
22, 12954789, 15054318
X, 58605580, 62412542
Y, 10316945, 10544039")
rows_chromosomes <- list(
list("1", "2", "3", "4", "5"),
list("6", "7", "8", "9", "10", "11", "12"),
list("13", "14", "15", "16", "17", "18"),
list("19", "20", "21", "22", "X", "Y")
)
2022-05-26 12:42:19 +02:00
2023-11-10 14:43:48 +01:00
cell_width <- floor(100 / max(lengths(rows_chromosomes)))
row_height <- floor(100 / length(rows_chromosomes))
rows_figs <- list()
2022-05-26 12:42:19 +02:00
2023-11-10 14:43:48 +01:00
for (chromosomes in rows_chromosomes) {
row_figs <- list()
n_chromosomes <- length(chromosomes)
first <- TRUE
for (chromosome_name in chromosomes) {
chromosome_data <- data[name == chromosome_name]
# Center chromosomes horizontally
offset <- chromosome_data[, (global_max_position - max(position)) / 2]
chromosome_data[, position := position + offset]
model <- stats::loess(
score ~ position,
chromosome_data
)
positions <- seq(0, global_max_position, by = global_max_position / 100)
scores <- stats::predict(model, positions)
centromere_position <- offset + centromeres[
name == chromosome_name,
(start + end) / 2
]
centromere_score <- scores[!is.na(scores)][
which.min(abs(positions[!is.na(scores)] - centromere_position))
]
row_figs <- c(row_figs, list(htmltools::div(
style = glue::glue("width: {cell_width}%; height: {row_height}%"),
2023-11-11 15:50:37 +01:00
plotly::plot_ly(
width = 150,
height = 150
) |>
2023-11-10 14:43:48 +01:00
plotly::add_lines(
x = positions,
y = scores,
line = list(width = 4, color = base_color())
) |>
plotly::add_markers(
x = centromere_position,
y = centromere_score,
marker = list(
size = 12,
color = "white",
line = list(width = 4, color = base_color())
)
) |>
plotly::layout(
annotations = list(
x = 0.5,
y = 0.8,
text = chromosome_name,
xref = "paper",
yref = "paper",
xanchor = "center",
showarrow = FALSE,
font = list(size = 16)
),
xaxis = list(
range = c(0, global_max_position),
showgrid = FALSE,
zeroline = FALSE,
visible = FALSE
),
yaxis = list(
range = c(0, 1),
showgrid = FALSE,
zeroline = FALSE,
visible = FALSE
),
showlegend = FALSE,
margin = list(
b = 0,
l = 0,
r = 0,
t = 0
)
2023-11-11 15:50:37 +01:00
) |>
plotly::config(
staticPlot = TRUE
2023-11-10 14:43:48 +01:00
)
)))
first <- FALSE
}
rows_figs <- c(rows_figs, list(htmltools::div(
style = "display: flex; justify-content: space-between",
row_figs
)))
}
htmltools::browsable(htmltools::div(
style = "display: flex; flex-direction: column",
rows_figs
))
2021-11-22 14:10:08 +01:00
}
2022-05-17 21:48:30 +02:00
#' Plot scores in relation to chromosomal position of genes.
#'
#' @param ranking The ranking to visualize.
#' @param chromosome_name The chromosome to visualize. If this is `NULL` all,
#' chromosomes will be included and the x-axis will show distances instead of
#' positions.
#' @param gene_sets Named list of vectors of genes to highlight. The list names
#' will be used as labels.
2022-05-17 21:48:30 +02:00
#'
#' @return A `plotly` figure.
#' @seealso ranking()
#'
#' @export
plot_scores_by_position <- function(ranking,
chromosome_name = NULL,
gene_sets = NULL) {
2022-05-26 12:42:19 +02:00
if (!requireNamespace("plotly", quietly = TRUE)) {
stop("Please install \"plotly\" to use this function.")
}
distance_data <- if (!is.null(chromosome_name)) {
chromosome_id <- geposan::chromosomes[
species == "9606" & name == chromosome_name,
id
2022-05-26 12:42:19 +02:00
]
geposan::distances[species == "9606" & chromosome == chromosome_id]
2022-05-26 12:42:19 +02:00
} else {
geposan::distances[species == "9606"]
2022-05-26 12:42:19 +02:00
}
data <- merge(ranking, distance_data, by = "gene")
data <- merge(
data,
geposan::genes,
by.x = "gene",
by.y = "id"
)
# Use distances instead of positions in case all chromosomes are included.
if (is.null(chromosome_name)) {
data[, x := distance]
} else {
data[, x := (start_position + end_position) / 2]
2022-05-26 12:42:19 +02:00
}
fig <- plotly::plot_ly() |>
2022-05-26 12:42:19 +02:00
plotly::add_markers(
data = data[!gene %chin% unlist(gene_sets)],
2022-05-26 12:42:19 +02:00
x = ~x,
y = ~score,
name = "All genes",
2022-05-26 12:42:19 +02:00
text = ~ glue::glue(
"<b>{name}</b><br>",
if (is.null(chromosome_name)) "Distance: " else "Position: ",
"{round(x / 1000000, digits = 2)} MBp<br>",
"Score: {round(score, digits = 2)}<br>",
"Rank: {rank}<br>",
"Percentile: {round(percentile * 100, digits = 2)}%"
),
marker = list(
color = base_color(),
size = 5
),
hoverinfo = "text"
2022-05-26 12:42:19 +02:00
) |>
plotly::layout(
xaxis = list(title = if (is.null(chromosome_name)) {
"Distance (Bp)"
} else {
"Position (Bp)"
}),
yaxis = list(title = "Score")
)
index <- 1
for (gene_set_name in names(gene_sets)) {
gene_set_genes <- gene_sets[[gene_set_name]]
fig <- fig |>
plotly::add_markers(
data = data[gene %chin% gene_set_genes],
x = ~x,
y = ~score,
name = gene_set_name,
text = ~ glue::glue(
"<b>{name}</b><br>",
if (is.null(chromosome_name)) "Distance: " else "Position: ",
"{round(x / 1000000, digits = 2)} MBp<br>",
"Score: {round(score, digits = 2)}<br>",
"Rank: {rank}<br>",
"Percentile: {round(percentile * 100, digits = 2)}%"
),
marker = list(
color = gene_set_color(index),
size = 8
),
hoverinfo = "text"
)
index <- index + 1
}
fig
2022-05-17 21:48:30 +02:00
}
#' Helper function for creating a vertical line for plotly.
#' @noRd
vline <- function(x) {
list(
type = "line",
y0 = 0,
y1 = 1,
yref = "paper",
x0 = x,
x1 = x,
line = list(
color = "#00000080",
opacity = 0.5,
dash = "dot"
)
)
}
#' Helper function for creating annotations for lines created using [vline()].
#' @noRd
vlineannotation <- function(x) {
list(
text = glue::glue("{round(x * 100)}%"),
showarrow = FALSE,
yref = "paper",
x = x,
y = 1,
xanchor = "left",
xshift = 4,
align = "left",
font = list(
color = "#00000080"
)
)
}