library(data.table) library(here) i_am("scripts/gsea.R") ranking_gtex <- ubigen::rank_genes(ubigen::gtex_all) ranking_cmap <- ubigen::rank_genes(ubigen::cmap) data <- merge( ranking_gtex[, .(gene, score, percentile)], ranking_cmap[, .(gene, score, percentile)], by = "gene", suffixes = c(x = "_gtex", y = "_cmap") ) data[, score := score_gtex * score_cmap] setorder(data, -score) data[, percentile := (.N - .I) / .N] gsea_1_0 <- gprofiler2::gost( data[percentile_gtex >= 0.95 & percentile_cmap < 0.95, gene], domain_scope = "custom_annotated", custom_bg = data[, gene] ) gsea_1_1 <- gprofiler2::gost( data[percentile_gtex >= 0.95 & percentile_cmap >= 0.95, gene], domain_scope = "custom_annotated", custom_bg = data[, gene] ) # This code is based on gostplot.R from the gprofiler2 package. gsea_sources <- c( "GO:MF", "GO:BP", "GO:CC", "KEGG", "REAC", "WP", "TF", "MIRNA", "HPA", "CORUM", "HP" ) gsea_source_colors <- data.table( source = gsea_sources, color = c( "#dc3912", "#ff9900", "#109618", "#dd4477", "#3366cc", "#0099c6", "#5574a6", "#22aa99", "#6633cc", "#66aa00", "#990099" ) ) lerp <- function(x) { (x - min(x)) / (max(x) - min(x)) } gsea_plot <- function( gsea_result, sources = c("GO:MF", "GO:BP", "GO:CC", "KEGG", "REAC", "WP", "TF", "HP")) { source_data <- gsea_source_colors[source %chin% sources] source_data[, width := gsea_result$meta$result_metadata[[source]]$number_of_terms, by = source ] source_data[seq_len(.N - 1), width := width + 2000] source_data[, source_x := cumsum(width) - width] source_data[, source_center := source_x + width / 2] data <- gsea_result$result |> as.data.table() data <- merge(data, source_data, by = "source") data[, x := source_x + source_order] data[, y := -log10(p_value)] data[y > 16, y := 17] plotly::plot_ly() |> plotly::add_markers( data = data, x = ~x, y = ~y, text = ~term_name, marker = list( size = ~ 4 + 6 * lerp(term_size), color = ~color, line = list(width = 0) ), cliponaxis = FALSE ) |> plotly::layout( xaxis = list( title = "", range = c(0, source_data[.N, source_x + width]), tickmode = "array", tickvals = source_data[, source_center], ticktext = source_data[, source], showgrid = FALSE, zeroline = FALSE ), yaxis = list( title = "−log₁₀(p)", range = c(0, 18), tickmode = "array", tickvals = c(2, 4, 6, 8, 10, 12, 14, 16), ticktext = c("2", "4", "6", "8", "10", "12", "14", "≥ 16") ), font = list(size = 8), margin = list( pad = 2, l = 0, r = 0, t = 0, b = 0 ) ) } fig_gsea_1_0 <- gsea_plot(gsea_1_0) fig_gsea_1_1 <- gsea_plot(gsea_1_1) # Plotly specifies all sizes in pixels, including font size. Because of # that, we can actually think of these pixels as points. One point is defined as # 1/72 inch and SVG uses 96 DPI as the standard resolution. # # 1 plotly pixel = 1 point = 1/72 inch = 1 1/3 actual pixels # # So, we specify width and height in points (= plotly pixels) and scale up the # image by 96/72 to convert everything from points to pixels at 96 DPI. plotly::save_image( fig_gsea_1_0, file = here("scripts/output/gsea_1_0.svg"), width = 6.27 * 72, height = 3.135 * 72, scale = 96 / 72 ) plotly::save_image( fig_gsea_1_1, file = here("scripts/output/gsea_1_1.svg"), width = 6.27 * 72, height = 3.135 * 72, scale = 96 / 72 )