From ba1f9f3a7b9ded5e50d72f0768e02d702698eb73 Mon Sep 17 00:00:00 2001
From: Elias Projahn <elias@johrpan.de>
Date: Wed, 15 Jun 2022 10:25:11 +0200
Subject: [PATCH] Add script for performing a bucket-wise GSEA

---
 .Rbuildignore      |  1 +
 scripts/.gitignore |  1 +
 scripts/gsea.R     | 42 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+)
 create mode 100644 scripts/.gitignore
 create mode 100644 scripts/gsea.R

diff --git a/.Rbuildignore b/.Rbuildignore
index 5163d0b..590a58f 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1 +1,2 @@
+^scripts$
 ^LICENSE\.md$
diff --git a/scripts/.gitignore b/scripts/.gitignore
new file mode 100644
index 0000000..16be8f2
--- /dev/null
+++ b/scripts/.gitignore
@@ -0,0 +1 @@
+/output/
diff --git a/scripts/gsea.R b/scripts/gsea.R
new file mode 100644
index 0000000..53cb72b
--- /dev/null
+++ b/scripts/gsea.R
@@ -0,0 +1,42 @@
+# This script performs a gene set enrichment analysis (GSEA) across the whole
+# ranking of ubiquituos genes using g:Profiler.
+
+# Size of each gene bucket. The GSEA is done once for each bucket within the
+# ranking.
+bucket_size <- 500
+
+library(data.table)
+library(here)
+
+i_am("scripts/gsea.R")
+file_path <- here("scripts", "output", "ubigen_gsea.Rds")
+image_path <- here("scripts", "output", "ubigen_gsea.svg")
+
+# The result will be saved in `file_path` to avoid unnecessary API calls.
+result <- if (file.exists(file_path)) {
+  readRDS(file_path)
+} else {
+  data <- copy(ubigen::genes)
+  data[, bucket := ceiling(rank / bucket_size)]
+
+  result <- data[, .(analysis = list(gprofiler2::gost(gene))), by = bucket]
+  result[, count := nrow(analysis[[1]]$result), by = bucket]
+  result[is.na(count), count := 0]
+
+  saveRDS(result, file = file_path)
+
+  result
+}
+
+fig <- plotly::plot_ly() |>
+  plotly::add_bars(
+    data = result,
+    x = ~bucket,
+    y = ~count
+  ) |>
+  plotly::layout(
+    xaxis = list(title = "Bucket of genes (n = 500)"),
+    yaxis = list(title = "Number of associated terms")
+  )
+
+plotly::save_image(fig, image_path, width = 1200, height = 800)