| 
									
										
										
										
											2024-12-01 17:04:19 +01:00
										 |  |  | # This script performs a gene set enrichment analysis (GSEA) across the whole | 
					
						
							|  |  |  | # ranking of ubiquituos genes using g:Profiler. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Size of each gene bucket. The GSEA is done once for each bucket within the | 
					
						
							|  |  |  | # ranking. | 
					
						
							|  |  |  | bucket_size <- 500 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | library(data.table) | 
					
						
							|  |  |  | library(here) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-18 13:16:31 +01:00
										 |  |  | i_am("scripts/sliding_gsea_package.R") | 
					
						
							| 
									
										
										
										
											2024-12-01 17:04:19 +01:00
										 |  |  | file_path <- here("scripts", "output", "ubigen_gsea.Rds") | 
					
						
							|  |  |  | image_path <- here("scripts", "output", "ubigen_gsea.svg") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # The result will be saved in `file_path` to avoid unnecessary API calls. | 
					
						
							|  |  |  | result <- if (file.exists(file_path)) { | 
					
						
							|  |  |  |   readRDS(file_path) | 
					
						
							|  |  |  | } else { | 
					
						
							|  |  |  |   data <- copy(ubigen::genes) | 
					
						
							|  |  |  |   data[, bucket := ceiling(rank / bucket_size)] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   result <- data[, .(analysis = list(gprofiler2::gost(gene))), by = bucket] | 
					
						
							|  |  |  |   saveRDS(result, file = file_path) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   result | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | result[, result := lapply(analysis, function(a) a$result)] | 
					
						
							|  |  |  | result <- result[, rbindlist(result), by = bucket] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | data <- result[, .(count = .N), by = c("bucket", "source")] | 
					
						
							|  |  |  | data[, total := sum(count), by = bucket] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | smooth_model <- loess(total ~ bucket, data, span = 0.3) | 
					
						
							|  |  |  | bucket_smoothed <- seq(1, nrow(data), 0.1) | 
					
						
							|  |  |  | total_smoothed <- predict(smooth_model, bucket_smoothed) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | fig <- plotly::plot_ly(data) |> | 
					
						
							|  |  |  |   plotly::add_bars( | 
					
						
							|  |  |  |     x = ~bucket, | 
					
						
							|  |  |  |     y = ~count, | 
					
						
							|  |  |  |     color = ~source | 
					
						
							|  |  |  |   ) |> | 
					
						
							|  |  |  |   plotly::add_lines( | 
					
						
							|  |  |  |     x = bucket_smoothed, | 
					
						
							|  |  |  |     y = total_smoothed, | 
					
						
							|  |  |  |     name = "All (smoothed)" | 
					
						
							|  |  |  |   ) |> | 
					
						
							|  |  |  |   plotly::layout( | 
					
						
							|  |  |  |     xaxis = list(title = glue::glue("Bucket of genes (n = {bucket_size})")), | 
					
						
							|  |  |  |     yaxis = list(title = "Number of associated terms"), | 
					
						
							|  |  |  |     barmode = "stack", | 
					
						
							|  |  |  |     legend = list(title = list(text = "<b>Source of term</b>")) | 
					
						
							|  |  |  |   ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | plotly::save_image(fig, image_path, width = 1200, height = 800) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | gsea_plot_ranking <- fig | 
					
						
							|  |  |  | usethis::use_data(gsea_plot_ranking, internal = TRUE, overwrite = TRUE) |