plots: Refactor and improve

This commit is contained in:
Elias Projahn 2021-12-02 17:23:18 +01:00
parent f997b5fdd7
commit a347bf0ad4
5 changed files with 125 additions and 138 deletions

View file

@ -12,4 +12,5 @@ export(plot_rankings)
export(plot_scores) export(plot_scores)
export(preset) export(preset)
export(ranking) export(ranking)
export(validate)
import(data.table) import(data.table)

193
R/plots.R
View file

@ -4,62 +4,63 @@
#' #'
#' @param species_ids IDs of species to show in the plot. #' @param species_ids IDs of species to show in the plot.
#' @param gene_sets A list of gene sets (containing vectors of gene IDs) that #' @param gene_sets A list of gene sets (containing vectors of gene IDs) that
#' will be highlighted in the plot. #' will be highlighted in the plot. The names will be used as labels.
#' @param labels Labels for the gene sets. This is required if gene sets are
#' given and has to have the same length.
#' @param use_positions Whether to display positions instead of distances.
#' #'
#' @export #' @export
plot_positions <- function(species_ids, plot_positions <- function(species_ids, gene_sets) {
gene_sets,
labels,
use_positions = FALSE) {
if (!requireNamespace("plotly", quietly = TRUE)) { if (!requireNamespace("plotly", quietly = TRUE)) {
stop("Please install \"plotly\" to use this function.") stop("Please install \"plotly\" to use this function.")
} }
data <- merge( # Prefilter data by species.
geposan::distances[gene %chin% unlist(gene_sets) & data <- geposan::distances[species %chin% species_ids]
species %chin% species_ids],
geposan::genes[, .(id, name)],
by.x = "gene", by.y = "id"
)
if (use_positions) { sample_data <- data[sample(nrow(data), 1000)]
data[, value := position]
} else {
data[, value := distance]
}
# Add labels for each gene set.
for (i in seq_along(gene_sets)) {
data[gene %chin% gene_sets[[i]], label := labels[i]]
}
# Prefilter species.
species <- geposan::species[id %chin% species_ids] species <- geposan::species[id %chin% species_ids]
yaxis_title <- if (use_positions) { plot <- plotly::plot_ly(colors = "Set2") |>
"Position [Bp]" plotly::add_markers(
} else { data = sample_data,
"Distance to telomeres [Bp]"
}
plotly::plot_ly(
data = data,
x = ~species, x = ~species,
y = ~value, y = ~distance,
color = ~label, color = "All genes",
text = ~name, hoverinfo = "skip"
type = "scatter", ) |>
mode = "markers" plotly::layout(
) |> plotly::layout(
xaxis = list( xaxis = list(
title = "Species", title = "Species",
tickvals = species$id, tickvals = species$id,
ticktext = species$name ticktext = species$name
), ),
yaxis = list(title = yaxis_title) yaxis = list(title = "Distance to telomeres [Bp]")
) )
if (length(gene_sets) > 0) {
# Include gene information which will be used for labeling
gene_set_data <- merge(
data[gene %chin% unlist(gene_sets)],
geposan::genes,
by.x = "gene",
by.y = "id"
)
for (gene_set_name in names(gene_sets)) {
gene_set <- gene_sets[[gene_set_name]]
plot <- plot |> plotly::add_markers(
data = gene_set_data[gene %chin% gene_set],
x = ~species,
y = ~distance,
text = ~name,
color = gene_set_name,
marker = list(size = 10, opacity = 0.66)
)
}
}
plot
} }
@ -84,10 +85,7 @@ plot_rankings <- function(rankings, gene_sets) {
plot <- plotly::plot_ly(colors = "Set2") |> plot <- plotly::plot_ly(colors = "Set2") |>
plotly::layout( plotly::layout(
xaxis = list( xaxis = list(tickvals = names(rankings)),
title = "Ranking",
tickvals = names(rankings)
),
yaxis = list(title = "Score") yaxis = list(title = "Score")
) )
@ -96,13 +94,6 @@ plot_rankings <- function(rankings, gene_sets) {
for (ranking_name in names(rankings)) { for (ranking_name in names(rankings)) {
ranking <- rankings[[ranking_name]] ranking <- rankings[[ranking_name]]
data <- merge(
ranking,
geposan::genes,
by.x = "gene",
by.y = "id"
)
plot <- plot |> plotly::add_trace( plot <- plot |> plotly::add_trace(
data = ranking, data = ranking,
x = ranking_name, x = ranking_name,
@ -115,11 +106,19 @@ plot_rankings <- function(rankings, gene_sets) {
hoverinfo = "skip" hoverinfo = "skip"
) )
if (length(gene_sets) > 0) {
gene_set_data <- merge(
ranking[gene %chin% unlist(gene_sets)],
geposan::genes,
by.x = "gene",
by.y = "id"
)
for (gene_set_name in names(gene_sets)) { for (gene_set_name in names(gene_sets)) {
gene_set <- gene_sets[[gene_set_name]] gene_set <- gene_sets[[gene_set_name]]
plot <- plot |> plotly::add_markers( plot <- plot |> plotly::add_markers(
data = data[gene %chin% gene_set], data = gene_set_data[gene %chin% gene_set],
x = ranking_name, x = ranking_name,
y = ~score, y = ~score,
text = ~name, text = ~name,
@ -128,6 +127,7 @@ plot_rankings <- function(rankings, gene_sets) {
marker = list(size = 20, opacity = 0.66) marker = list(size = 20, opacity = 0.66)
) )
} }
}
is_first <- FALSE is_first <- FALSE
} }
@ -141,32 +141,25 @@ plot_rankings <- function(rankings, gene_sets) {
#' This function requires the package `plotly`. #' This function requires the package `plotly`.
#' #'
#' @param ranking The ranking to visualize. #' @param ranking The ranking to visualize.
#' @param gene_sets A list of gene sets (containing vectors of gene IDs) that #' @param gene_sets A named list of gene sets (containing vectors of gene IDs)
#' will be highlighted in the plot. #' that will be highlighted in the plot. The names will be used in the legend.
#' @param labels Labels for the gene sets. This is required if gene sets are #' @param max_rank The maximum rank of included genes. All genes that are ranked
#' given and has to have the same length. #' lower will appear greyed out.
#' @param max_rank The maximum rank of the highlighted genes. All genes that
#' are ranked lower will appear greyed out.
#' #'
#' @seealso ranking() #' @seealso ranking()
#' #'
#' @export #' @export
plot_scores <- function(ranking, plot_scores <- function(ranking, gene_sets = NULL, max_rank = NULL) {
gene_sets = NULL,
labels = NULL,
max_rank = NULL) {
if (!requireNamespace("plotly", quietly = TRUE)) { if (!requireNamespace("plotly", quietly = TRUE)) {
stop("Please install \"plotly\" to use this function.") stop("Please install \"plotly\" to use this function.")
} }
plot <- plotly::plot_ly() |> plot <- plotly::plot_ly(colors = "Set2") |>
plotly::add_trace( plotly::add_markers(
data = ranking, data = ranking,
x = ~rank, x = ~rank,
y = ~score, y = ~score,
color = "All genes", color = "All genes",
type = "scatter",
mode = "markers",
hoverinfo = "skip" hoverinfo = "skip"
) |> ) |>
plotly::layout( plotly::layout(
@ -175,33 +168,27 @@ plot_scores <- function(ranking,
) )
if (length(gene_sets) > 0) { if (length(gene_sets) > 0) {
# Take out the genes to be highlighted.
gene_set_data <- ranking[gene %chin% unlist(gene_sets)]
# Add labels for each gene set.
for (i in seq_along(gene_sets)) {
gene_set_data[gene %chin% gene_sets[[i]], label := labels[i]]
}
# Include gene information which will be used for labeling # Include gene information which will be used for labeling
gene_set_data <- merge( gene_set_data <- merge(
gene_set_data, ranking[gene %chin% unlist(gene_sets)],
geposan::genes, geposan::genes,
by.x = "gene", by.x = "gene",
by.y = "id" by.y = "id"
) )
plot <- plot |> plotly::add_trace( for (gene_set_name in names(gene_sets)) {
data = gene_set_data, gene_set <- gene_sets[[gene_set_name]]
plot <- plot |> plotly::add_markers(
data = gene_set_data[gene %chin% gene_set],
x = ~rank, x = ~rank,
y = ~score, y = ~score,
color = ~label,
text = ~name, text = ~name,
type = "scatter", color = gene_set_name,
mode = "markers", marker = list(size = 20, opacity = 0.66)
marker = list(size = 20)
) )
} }
}
if (!is.null(max_rank)) { if (!is.null(max_rank)) {
@ -231,35 +218,45 @@ plot_scores <- function(ranking,
#' This function requires the package `plotly`. #' This function requires the package `plotly`.
#' #'
#' @param ranking The ranking to visualize. #' @param ranking The ranking to visualize.
#' @param gene_sets A list of gene sets (containing vectors of gene IDs) that #' @param gene_sets A named list of gene sets (containing vectors of gene IDs)
#' will be shown as separate boxes. #' that will be shown as separate boxes. The names will be used as labels.
#' @param labels Labels for the gene sets. This is required if gene sets are
#' given and has to have the same length.
#' #'
#' @seealso ranking() #' @seealso ranking()
#' #'
#' @export #' @export
plot_boxplot <- function(ranking, gene_sets = NULL, labels = NULL) { plot_boxplot <- function(ranking, gene_sets = NULL) {
if (!requireNamespace("plotly", quietly = TRUE)) { if (!requireNamespace("plotly", quietly = TRUE)) {
stop("Please install \"plotly\" to use this function.") stop("Please install \"plotly\" to use this function.")
} }
data <- copy(ranking) plot <- plotly::plot_ly(colors = "Set2") |>
plotly::add_boxplot(
data = ranking,
x = "All genes",
y = ~score,
color = "All genes",
showlegend = FALSE
) |>
plotly::layout(
xaxis = list(tickvals = c("All genes", names(gene_sets))),
yaxis = list(title = "Score")
)
# Add labels for each gene set. if (length(gene_sets) > 0) {
for (i in seq_along(gene_sets)) { for (gene_set_name in names(gene_sets)) {
data[gene %chin% gene_sets[[i]], label := labels[i]] gene_set <- gene_sets[[gene_set_name]]
plot <- plot |> plotly::add_boxplot(
data = ranking[gene %chin% gene_set],
x = gene_set_name,
y = ~score,
color = gene_set_name,
showlegend = FALSE
)
}
} }
# Label the other genes. plot
data[!gene %chin% unlist(gene_sets), label := "Other genes"]
plotly::plot_ly(
data = data,
y = ~score,
color = ~label,
type = "box"
)
} }
#' Show the distribution of scores across chromosomes. #' Show the distribution of scores across chromosomes.

View file

@ -4,16 +4,13 @@
\alias{plot_boxplot} \alias{plot_boxplot}
\title{Visualize a ranking by comparing gene sets in a boxplot.} \title{Visualize a ranking by comparing gene sets in a boxplot.}
\usage{ \usage{
plot_boxplot(ranking, gene_sets = NULL, labels = NULL) plot_boxplot(ranking, gene_sets = NULL)
} }
\arguments{ \arguments{
\item{ranking}{The ranking to visualize.} \item{ranking}{The ranking to visualize.}
\item{gene_sets}{A list of gene sets (containing vectors of gene IDs) that \item{gene_sets}{A named list of gene sets (containing vectors of gene IDs)
will be shown as separate boxes.} that will be shown as separate boxes. The names will be used as labels.}
\item{labels}{Labels for the gene sets. This is required if gene sets are
given and has to have the same length.}
} }
\description{ \description{
This function requires the package \code{plotly}. This function requires the package \code{plotly}.

View file

@ -4,18 +4,13 @@
\alias{plot_positions} \alias{plot_positions}
\title{Plot gene positions.} \title{Plot gene positions.}
\usage{ \usage{
plot_positions(species_ids, gene_sets, labels, use_positions = FALSE) plot_positions(species_ids, gene_sets)
} }
\arguments{ \arguments{
\item{species_ids}{IDs of species to show in the plot.} \item{species_ids}{IDs of species to show in the plot.}
\item{gene_sets}{A list of gene sets (containing vectors of gene IDs) that \item{gene_sets}{A list of gene sets (containing vectors of gene IDs) that
will be highlighted in the plot.} will be highlighted in the plot. The names will be used as labels.}
\item{labels}{Labels for the gene sets. This is required if gene sets are
given and has to have the same length.}
\item{use_positions}{Whether to display positions instead of distances.}
} }
\description{ \description{
This function requires the package \code{plotly}. This function requires the package \code{plotly}.

View file

@ -4,19 +4,16 @@
\alias{plot_scores} \alias{plot_scores}
\title{Plot a ranking as a scatter plot of scores.} \title{Plot a ranking as a scatter plot of scores.}
\usage{ \usage{
plot_scores(ranking, gene_sets = NULL, labels = NULL, max_rank = NULL) plot_scores(ranking, gene_sets = NULL, max_rank = NULL)
} }
\arguments{ \arguments{
\item{ranking}{The ranking to visualize.} \item{ranking}{The ranking to visualize.}
\item{gene_sets}{A list of gene sets (containing vectors of gene IDs) that \item{gene_sets}{A named list of gene sets (containing vectors of gene IDs)
will be highlighted in the plot.} that will be highlighted in the plot. The names will be used in the legend.}
\item{labels}{Labels for the gene sets. This is required if gene sets are \item{max_rank}{The maximum rank of included genes. All genes that are ranked
given and has to have the same length.} lower will appear greyed out.}
\item{max_rank}{The maximum rank of the highlighted genes. All genes that
are ranked lower will appear greyed out.}
} }
\description{ \description{
This function requires the package \code{plotly}. This function requires the package \code{plotly}.