mirror of
https://github.com/johrpan/ubigen.git
synced 2025-10-26 19:57:24 +01:00
scripts: Add normal GSEA
This commit is contained in:
parent
54b3315041
commit
7a21e15c0e
2 changed files with 201 additions and 45 deletions
187
scripts/gsea.R
187
scripts/gsea.R
|
|
@ -1,59 +1,156 @@
|
|||
# This script performs a gene set enrichment analysis (GSEA) across the whole
|
||||
# ranking of ubiquituos genes using g:Profiler.
|
||||
|
||||
# Size of each gene bucket. The GSEA is done once for each bucket within the
|
||||
# ranking.
|
||||
bucket_size <- 500
|
||||
|
||||
library(data.table)
|
||||
library(here)
|
||||
|
||||
i_am("scripts/gsea.R")
|
||||
file_path <- here("scripts", "output", "ubigen_gsea.Rds")
|
||||
image_path <- here("scripts", "output", "ubigen_gsea.svg")
|
||||
|
||||
# The result will be saved in `file_path` to avoid unnecessary API calls.
|
||||
result <- if (file.exists(file_path)) {
|
||||
readRDS(file_path)
|
||||
} else {
|
||||
data <- copy(ubigen::genes)
|
||||
data[, bucket := ceiling(rank / bucket_size)]
|
||||
ranking_gtex <- ubigen::rank_genes(ubigen::gtex_all)
|
||||
ranking_cmap <- ubigen::rank_genes(ubigen::cmap)
|
||||
|
||||
result <- data[, .(analysis = list(gprofiler2::gost(gene))), by = bucket]
|
||||
saveRDS(result, file = file_path)
|
||||
data <- merge(
|
||||
ranking_gtex[, .(gene, score, percentile)],
|
||||
ranking_cmap[, .(gene, score, percentile)],
|
||||
by = "gene",
|
||||
suffixes = c(x = "_gtex", y = "_cmap")
|
||||
)
|
||||
|
||||
result
|
||||
data[, score := score_gtex * score_cmap]
|
||||
setorder(data, -score)
|
||||
data[, percentile := (.N - .I) / .N]
|
||||
|
||||
gsea_1_0 <- gprofiler2::gost(
|
||||
data[percentile_gtex >= 0.95 & percentile_cmap < 0.95, gene],
|
||||
domain_scope = "custom_annotated",
|
||||
custom_bg = data[, gene]
|
||||
)
|
||||
|
||||
gsea_1_1 <- gprofiler2::gost(
|
||||
data[percentile_gtex >= 0.95 & percentile_cmap >= 0.95, gene],
|
||||
domain_scope = "custom_annotated",
|
||||
custom_bg = data[, gene]
|
||||
)
|
||||
|
||||
# This code is based on gostplot.R from the gprofiler2 package.
|
||||
|
||||
gsea_sources <- c(
|
||||
"GO:MF",
|
||||
"GO:BP",
|
||||
"GO:CC",
|
||||
"KEGG",
|
||||
"REAC",
|
||||
"WP",
|
||||
"TF",
|
||||
"MIRNA",
|
||||
"HPA",
|
||||
"CORUM",
|
||||
"HP"
|
||||
)
|
||||
|
||||
gsea_source_colors <- data.table(
|
||||
source = gsea_sources,
|
||||
color = c(
|
||||
"#dc3912",
|
||||
"#ff9900",
|
||||
"#109618",
|
||||
"#dd4477",
|
||||
"#3366cc",
|
||||
"#0099c6",
|
||||
"#5574a6",
|
||||
"#22aa99",
|
||||
"#6633cc",
|
||||
"#66aa00",
|
||||
"#990099"
|
||||
)
|
||||
)
|
||||
|
||||
lerp <- function(x) {
|
||||
(x - min(x)) / (max(x) - min(x))
|
||||
}
|
||||
|
||||
result[, result := lapply(analysis, function(a) a$result)]
|
||||
result <- result[, rbindlist(result), by = bucket]
|
||||
gsea_plot <- function(
|
||||
gsea_result,
|
||||
sources = c("GO:MF", "GO:BP", "GO:CC", "KEGG", "REAC", "WP", "TF", "HP")) {
|
||||
source_data <- gsea_source_colors[source %chin% sources]
|
||||
|
||||
data <- result[, .(count = .N), by = c("bucket", "source")]
|
||||
data[, total := sum(count), by = bucket]
|
||||
source_data[,
|
||||
width := gsea_result$meta$result_metadata[[source]]$number_of_terms,
|
||||
by = source
|
||||
]
|
||||
|
||||
smooth_model <- loess(total ~ bucket, data, span = 0.3)
|
||||
bucket_smoothed <- seq(1, nrow(data), 0.1)
|
||||
total_smoothed <- predict(smooth_model, bucket_smoothed)
|
||||
source_data[seq_len(.N - 1), width := width + 2000]
|
||||
source_data[, source_x := cumsum(width) - width]
|
||||
source_data[, source_center := source_x + width / 2]
|
||||
|
||||
fig <- plotly::plot_ly(data) |>
|
||||
plotly::add_bars(
|
||||
x = ~bucket,
|
||||
y = ~count,
|
||||
color = ~source
|
||||
) |>
|
||||
plotly::add_lines(
|
||||
x = bucket_smoothed,
|
||||
y = total_smoothed,
|
||||
name = "All (smoothed)"
|
||||
) |>
|
||||
plotly::layout(
|
||||
xaxis = list(title = glue::glue("Bucket of genes (n = {bucket_size})")),
|
||||
yaxis = list(title = "Number of associated terms"),
|
||||
barmode = "stack",
|
||||
legend = list(title = list(text = "<b>Source of term</b>"))
|
||||
)
|
||||
data <- gsea_result$result |> as.data.table()
|
||||
data <- merge(data, source_data, by = "source")
|
||||
data[, x := source_x + source_order]
|
||||
data[, y := -log10(p_value)]
|
||||
data[y > 16, y := 17]
|
||||
|
||||
plotly::save_image(fig, image_path, width = 1200, height = 800)
|
||||
plotly::plot_ly() |>
|
||||
plotly::add_markers(
|
||||
data = data,
|
||||
x = ~x,
|
||||
y = ~y,
|
||||
text = ~term_name,
|
||||
marker = list(
|
||||
size = ~ 4 + 6 * lerp(term_size),
|
||||
color = ~color,
|
||||
line = list(width = 0)
|
||||
),
|
||||
cliponaxis = FALSE
|
||||
) |>
|
||||
plotly::layout(
|
||||
xaxis = list(
|
||||
title = "",
|
||||
range = c(0, source_data[.N, source_x + width]),
|
||||
tickmode = "array",
|
||||
tickvals = source_data[, source_center],
|
||||
ticktext = source_data[, source],
|
||||
showgrid = FALSE,
|
||||
zeroline = FALSE
|
||||
),
|
||||
yaxis = list(
|
||||
title = "−log₁₀(p)",
|
||||
range = c(0, 18),
|
||||
tickmode = "array",
|
||||
tickvals = c(2, 4, 6, 8, 10, 12, 14, 16),
|
||||
ticktext = c("2", "4", "6", "8", "10", "12", "14", "≥ 16")
|
||||
),
|
||||
font = list(size = 8),
|
||||
margin = list(
|
||||
pad = 2,
|
||||
l = 0,
|
||||
r = 0,
|
||||
t = 0,
|
||||
b = 0
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
gsea_plot_ranking <- fig
|
||||
usethis::use_data(gsea_plot_ranking, internal = TRUE, overwrite = TRUE)
|
||||
fig_gsea_1_0 <- gsea_plot(gsea_1_0)
|
||||
fig_gsea_1_1 <- gsea_plot(gsea_1_1)
|
||||
|
||||
# Plotly specifies all sizes in pixels, including font size. Because of
|
||||
# that, we can actually think of these pixels as points. One point is defined as
|
||||
# 1/72 inch and SVG uses 96 DPI as the standard resolution.
|
||||
#
|
||||
# 1 plotly pixel = 1 point = 1/72 inch = 1 1/3 actual pixels
|
||||
#
|
||||
# So, we specify width and height in points (= plotly pixels) and scale up the
|
||||
# image by 96/72 to convert everything from points to pixels at 96 DPI.
|
||||
|
||||
plotly::save_image(
|
||||
fig_gsea_1_0,
|
||||
file = here("scripts/output/gsea_1_0.svg"),
|
||||
width = 6.27 * 72,
|
||||
height = 3.135 * 72,
|
||||
scale = 96 / 72
|
||||
)
|
||||
|
||||
plotly::save_image(
|
||||
fig_gsea_1_1,
|
||||
file = here("scripts/output/gsea_1_1.svg"),
|
||||
width = 6.27 * 72,
|
||||
height = 3.135 * 72,
|
||||
scale = 96 / 72
|
||||
)
|
||||
|
|
|
|||
59
scripts/sliding_gsea.R
Normal file
59
scripts/sliding_gsea.R
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
# This script performs a gene set enrichment analysis (GSEA) across the whole
|
||||
# ranking of ubiquituos genes using g:Profiler.
|
||||
|
||||
# Size of each gene bucket. The GSEA is done once for each bucket within the
|
||||
# ranking.
|
||||
bucket_size <- 500
|
||||
|
||||
library(data.table)
|
||||
library(here)
|
||||
|
||||
i_am("scripts/gsea.R")
|
||||
file_path <- here("scripts", "output", "ubigen_gsea.Rds")
|
||||
image_path <- here("scripts", "output", "ubigen_gsea.svg")
|
||||
|
||||
# The result will be saved in `file_path` to avoid unnecessary API calls.
|
||||
result <- if (file.exists(file_path)) {
|
||||
readRDS(file_path)
|
||||
} else {
|
||||
data <- copy(ubigen::genes)
|
||||
data[, bucket := ceiling(rank / bucket_size)]
|
||||
|
||||
result <- data[, .(analysis = list(gprofiler2::gost(gene))), by = bucket]
|
||||
saveRDS(result, file = file_path)
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
result[, result := lapply(analysis, function(a) a$result)]
|
||||
result <- result[, rbindlist(result), by = bucket]
|
||||
|
||||
data <- result[, .(count = .N), by = c("bucket", "source")]
|
||||
data[, total := sum(count), by = bucket]
|
||||
|
||||
smooth_model <- loess(total ~ bucket, data, span = 0.3)
|
||||
bucket_smoothed <- seq(1, nrow(data), 0.1)
|
||||
total_smoothed <- predict(smooth_model, bucket_smoothed)
|
||||
|
||||
fig <- plotly::plot_ly(data) |>
|
||||
plotly::add_bars(
|
||||
x = ~bucket,
|
||||
y = ~count,
|
||||
color = ~source
|
||||
) |>
|
||||
plotly::add_lines(
|
||||
x = bucket_smoothed,
|
||||
y = total_smoothed,
|
||||
name = "All (smoothed)"
|
||||
) |>
|
||||
plotly::layout(
|
||||
xaxis = list(title = glue::glue("Bucket of genes (n = {bucket_size})")),
|
||||
yaxis = list(title = "Number of associated terms"),
|
||||
barmode = "stack",
|
||||
legend = list(title = list(text = "<b>Source of term</b>"))
|
||||
)
|
||||
|
||||
plotly::save_image(fig, image_path, width = 1200, height = 800)
|
||||
|
||||
gsea_plot_ranking <- fig
|
||||
usethis::use_data(gsea_plot_ranking, internal = TRUE, overwrite = TRUE)
|
||||
Loading…
Add table
Add a link
Reference in a new issue