From ed796853afbc1402d1d6e31db9a0288e24525cd1 Mon Sep 17 00:00:00 2001 From: Elias Projahn Date: Mon, 9 Sep 2024 10:48:03 +0200 Subject: [PATCH] scripts: Add gsea_vs_cmap.R --- scripts/gsea_vs_cmap.R | 152 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 scripts/gsea_vs_cmap.R diff --git a/scripts/gsea_vs_cmap.R b/scripts/gsea_vs_cmap.R new file mode 100644 index 0000000..410b273 --- /dev/null +++ b/scripts/gsea_vs_cmap.R @@ -0,0 +1,152 @@ +library(data.table) +library(here) + +i_am("scripts/gsea_vs_cmap.R") + +ranking_gtex <- ubigen::rank_genes(ubigen::gtex_all) +ranking_cmap <- ubigen::rank_genes(ubigen::cmap) + +data <- merge( + ranking_gtex[, .(gene, score, percentile)], + ranking_cmap[, .(gene, score, percentile)], + by = "gene", + suffixes = c(x = "_gtex", y = "_cmap") +) + +cat("N =", nrow(data)) + +# OPTICS clustering + +result_optics <- dbscan::optics( + data[, .(score_gtex, score_cmap)], + minPts = 0.01 * nrow(data) +) + +result_cluster <- result_optics |> dbscan::extractXi(0.05) +data[, cluster := result_cluster$cluster] + +cluster_means <- data[, + .( + score_gtex = mean(score_gtex), + score_cmap = mean(score_cmap) + ), + by = cluster +] + +cluster_means[, distance_1_1 := sqrt( + (1.0 - score_gtex)^2 + (1.0 - score_cmap)^2 +)] + +cluster_1_1 <- cluster_means[which.min(distance_1_1), cluster] +genes_optics <- data[cluster == cluster_1_1, gene] +write(genes_optics, here("scripts/output/genes_optics.txt")) + +# Percentiles + +genes_1_1 <- data[percentile_gtex >= 0.95 & percentile_cmap >= 0.95, gene] +genes_1_0 <- data[percentile_gtex >= 0.95 & percentile_cmap < 0.95, gene] + +write(genes_1_1, here("scripts/output/genes_1_1.txt")) +write(genes_1_0, here("scripts/output/genes_1_0.txt")) + +threshold_gtex <- data[percentile_gtex >= 0.95, min(score_gtex)] +threshold_cmap <- data[percentile_cmap >= 0.95, min(score_cmap)] + +fig <- plotly::plot_ly() |> + plotly::add_markers( + data = data, + x = ~score_gtex, + y = ~score_cmap, + name = ~cluster, + showlegend = FALSE, + marker = list(size = 4), + cliponaxis = FALSE + ) |> + plotly::layout( + xaxis = list( + title = "Ranking based on GTEx", + range = c(0, 1) + ), + yaxis = list( + title = "Ranking based on CMap", + range = c(0, 1) + ), + annotations = list( + list( + text = "95%", + x = threshold_gtex, + y = 1, + xshift = 2, + yshift = 3, + yref = "paper", + xanchor = "left", + yanchor = "top", + showarrow = FALSE + ), + list( + text = "95%", + x = 1, + y = threshold_cmap, + yshift = 2, + xref = "paper", + xanchor = "right", + yanchor = "bottom", + showarrow = FALSE + ) + ), + shapes = list( + list( + type = "line", + y0 = 0, + y1 = 1, + yref = "paper", + x0 = threshold_gtex, + x1 = threshold_gtex, + line = list( + color = "#00000080", + opacity = 0.5, + width = 1, + dash = "dot" + ) + ), + list( + type = "line", + y0 = threshold_cmap, + y1 = threshold_cmap, + x0 = 0, + x1 = 1, + xref = "paper", + line = list( + color = "#00000080", + width = 1, + opacity = 0.5, + dash = "dot" + ) + ) + ), + font = list(size = 8), + margin = list( + pad = 2, + l = 36, + r = 0, + t = 0, + b = 36 + ) + ) + +# Plotly specifies all sizes in pixels, including font size. Because of +# that, we can actually think of these pixels as points. One point is defined as +# 1/72 inch and SVG uses 96 DPI as the standard resolution. +# +# 1 plotly pixel = 1 point = 1/72 inch = 1 1/3 actual pixels +# +# So, we specify width and height in points (= plotly pixels) and scale up the +# image by 96/72 to convert everything from points to pixels at 96 DPI. + +plotly::save_image( + fig, + file = here("scripts/output/gsea_vs_cmap.svg"), + width = 6.27 * 72, + height = 6.27 * 72, + scale = 96 / 72 +)