From ba1f9f3a7b9ded5e50d72f0768e02d702698eb73 Mon Sep 17 00:00:00 2001 From: Elias Projahn Date: Wed, 15 Jun 2022 10:25:11 +0200 Subject: [PATCH] Add script for performing a bucket-wise GSEA --- .Rbuildignore | 1 + scripts/.gitignore | 1 + scripts/gsea.R | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 scripts/.gitignore create mode 100644 scripts/gsea.R diff --git a/.Rbuildignore b/.Rbuildignore index 5163d0b..590a58f 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1 +1,2 @@ +^scripts$ ^LICENSE\.md$ diff --git a/scripts/.gitignore b/scripts/.gitignore new file mode 100644 index 0000000..16be8f2 --- /dev/null +++ b/scripts/.gitignore @@ -0,0 +1 @@ +/output/ diff --git a/scripts/gsea.R b/scripts/gsea.R new file mode 100644 index 0000000..53cb72b --- /dev/null +++ b/scripts/gsea.R @@ -0,0 +1,42 @@ +# This script performs a gene set enrichment analysis (GSEA) across the whole +# ranking of ubiquituos genes using g:Profiler. + +# Size of each gene bucket. The GSEA is done once for each bucket within the +# ranking. +bucket_size <- 500 + +library(data.table) +library(here) + +i_am("scripts/gsea.R") +file_path <- here("scripts", "output", "ubigen_gsea.Rds") +image_path <- here("scripts", "output", "ubigen_gsea.svg") + +# The result will be saved in `file_path` to avoid unnecessary API calls. +result <- if (file.exists(file_path)) { + readRDS(file_path) +} else { + data <- copy(ubigen::genes) + data[, bucket := ceiling(rank / bucket_size)] + + result <- data[, .(analysis = list(gprofiler2::gost(gene))), by = bucket] + result[, count := nrow(analysis[[1]]$result), by = bucket] + result[is.na(count), count := 0] + + saveRDS(result, file = file_path) + + result +} + +fig <- plotly::plot_ly() |> + plotly::add_bars( + data = result, + x = ~bucket, + y = ~count + ) |> + plotly::layout( + xaxis = list(title = "Bucket of genes (n = 500)"), + yaxis = list(title = "Number of associated terms") + ) + +plotly::save_image(fig, image_path, width = 1200, height = 800)