mirror of
				https://github.com/johrpan/ubigen.git
				synced 2025-10-26 03:37:24 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			303 lines
		
	
	
	
		
			6.8 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			303 lines
		
	
	
	
		
			6.8 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
| library(data.table)
 | |
| library(here)
 | |
| 
 | |
| i_am("scripts/comparison.R")
 | |
| 
 | |
| w2000 <- scan(here("scripts/input/datasets/warrington_2000.txt"), character())
 | |
| z2008 <- scan(here("scripts/input/datasets/zhu_2008.txt"), character())
 | |
| e2013 <- scan(here("scripts/input/datasets/eisenberg_2013.txt"), character())
 | |
| c2011 <- scan(here("scripts/input/datasets/chang_2011.txt"), character())
 | |
| j2022 <- scan(here("scripts/input/datasets/joshi_2022.txt"), character())
 | |
| 
 | |
| datasets <- list(
 | |
|   "Warrington 2000" = w2000,
 | |
|   "Zhu 2008" = z2008,
 | |
|   "Eisenberg 2013" = e2013,
 | |
|   "Chang 2011" = c2011,
 | |
|   "Joshi 2022" = j2022
 | |
| )
 | |
| 
 | |
| VennDiagram::venn.diagram(datasets, filename = NULL, disable.logging = TRUE) |>
 | |
|   ggplot2::ggsave(file = here("scripts/output/venn.svg"), device = "svg")
 | |
| 
 | |
| partitions <- VennDiagram::get.venn.partitions(datasets) |> data.table()
 | |
| genes_venn <- partitions[1]$..values..[[1]]
 | |
| write(genes_venn, file = here("scripts/output/genes_venn.txt"))
 | |
| 
 | |
| gene_sets <- fread(here("scripts/input/gene_sets.csv"))
 | |
| genes_literature <- gene_sets[type == "literature", unique(gene)]
 | |
| genes_recommended <- gene_sets[type == "expression", unique(gene)]
 | |
| genes_literature_ids <- data.table(
 | |
|   gene = genes_literature,
 | |
|   literature_id = seq_along(genes_literature)
 | |
| )
 | |
| 
 | |
| ranking_gtex <- ubigen::rank_genes(ubigen::gtex_all)
 | |
| ranking_cmap <- ubigen::rank_genes(ubigen::cmap)
 | |
| 
 | |
| data <- fread(here("scripts/output/gsea_vs_cmap_groups.csv"))
 | |
| 
 | |
| genes_table <- gene_sets[type == "literature"]
 | |
| genes_table[, hgnc_symbol := gprofiler2::gconvert(gene, target = "HGNC")$target]
 | |
| genes_table <- genes_table[,
 | |
|   .(
 | |
|     gene = unique(gene),
 | |
|     source = paste(label, collapse = ", ")
 | |
|   ),
 | |
|   by = hgnc_symbol
 | |
| ]
 | |
| genes_table <- merge(genes_table, data, by = "gene", sort = FALSE)
 | |
| fwrite(genes_table, file = here("scripts/output/genes_table.csv"))
 | |
| 
 | |
| datasets_data <- rbindlist(lapply(names(datasets), function(name) {
 | |
|   data.table(
 | |
|     dataset = name,
 | |
|     gene = datasets[[name]]
 | |
|   )
 | |
| }))
 | |
| 
 | |
| datasets_data <- rbind(
 | |
|   datasets_data,
 | |
|   data.table(
 | |
|     dataset = "Venn 88",
 | |
|     gene = genes_venn
 | |
|   )
 | |
| )
 | |
| 
 | |
| datasets_data <- rbind(
 | |
|   datasets_data,
 | |
|   data.table(
 | |
|     dataset = "Recommended",
 | |
|     gene = genes_recommended
 | |
|   )
 | |
| )
 | |
| 
 | |
| datasets_data <- rbind(
 | |
|   datasets_data,
 | |
|   data.table(
 | |
|     dataset = "Literature",
 | |
|     gene = genes_literature
 | |
|   )
 | |
| )
 | |
| 
 | |
| datasets_data <- merge(datasets_data, data, by = "gene")
 | |
| 
 | |
| datasets_table <- datasets_data[, .(count = .N), by = c("dataset", "group")]
 | |
| datasets_table[, total := sum(count), by = dataset]
 | |
| datasets_table[, proportion := count / total]
 | |
| 
 | |
| fig_11 <- plotly::plot_ly() |>
 | |
|   plotly::add_bars(
 | |
|     data = datasets_table[group == "1_1"],
 | |
|     x = ~dataset,
 | |
|     y = ~proportion
 | |
|   ) |>
 | |
|   plotly::layout(
 | |
|     xaxis = list(
 | |
|       categoryarray = datasets_table[group == "1_1", unique(dataset)],
 | |
|       title = ""
 | |
|     ),
 | |
|     yaxis = list(
 | |
|       range = c(0.0, 1.0),
 | |
|       title = "",
 | |
|       tickformat = ".0%"
 | |
|     ),
 | |
|     font = list(size = 8),
 | |
|     margin = list(
 | |
|       pad = 2,
 | |
|       l = 0,
 | |
|       r = 0,
 | |
|       t = 0,
 | |
|       b = 36
 | |
|     )
 | |
|   )
 | |
| 
 | |
| plotly::save_image(
 | |
|   fig_11,
 | |
|   file = here(glue::glue("scripts/output/gene_sets_highlight_1_1.svg")),
 | |
|   width = 3.135 * 72,
 | |
|   height = 3.135 * 72,
 | |
|   scale = 96 / 72
 | |
| )
 | |
| 
 | |
| group_plots <- list()
 | |
| 
 | |
| for (group_value in datasets_table[, unique(group)]) {
 | |
|   group_plot <- plotly::plot_ly() |>
 | |
|     plotly::add_bars(
 | |
|       data = datasets_table[group == group_value],
 | |
|       x = ~dataset,
 | |
|       color = ~dataset,
 | |
|       y = ~proportion
 | |
|     ) |>
 | |
|     plotly::layout(
 | |
|       xaxis = list(
 | |
|         categoryarray = datasets_table[, unique(dataset)],
 | |
|         title = ""
 | |
|       ),
 | |
|       yaxis = list(
 | |
|         range = c(0.0, 1.0),
 | |
|         title = ""
 | |
|       ),
 | |
|       font = list(size = 8),
 | |
|       margin = list(
 | |
|         pad = 2,
 | |
|         l = 0,
 | |
|         r = 0,
 | |
|         t = 0,
 | |
|         b = 36
 | |
|       )
 | |
|     )
 | |
| 
 | |
|   plotly::save_image(
 | |
|     group_plot |> plotly::hide_legend(),
 | |
|     file = here(glue::glue("scripts/output/gene_sets_{group_value}.svg")),
 | |
|     width = 3 * 72,
 | |
|     height = 4 * 72,
 | |
|     scale = 96 / 72
 | |
|   )
 | |
| 
 | |
|   group_plots <- c(group_plots, list(group_plot))
 | |
| }
 | |
| 
 | |
| plotly::save_image(
 | |
|   group_plot,
 | |
|   file = here(glue::glue("scripts/output/gene_sets_legend.svg")),
 | |
|   width = 6.27 * 72,
 | |
|   height = 6.27 * 72,
 | |
|   scale = 96 / 72
 | |
| )
 | |
| 
 | |
| data[, count := 0]
 | |
| 
 | |
| for (dataset in datasets) {
 | |
|   data[gene %chin% dataset, count := count + 1]
 | |
| }
 | |
| 
 | |
| threshold_gtex <- data[percentile_gtex >= 0.95, min(score_gtex)]
 | |
| threshold_cmap <- data[percentile_cmap >= 0.95, min(score_cmap)]
 | |
| 
 | |
| fig <- plotly::plot_ly() |>
 | |
|   plotly::add_markers(
 | |
|     data = data[count == 0 & !(gene %chin% genes_literature)],
 | |
|     x = ~score_gtex,
 | |
|     y = ~score_cmap,
 | |
|     marker = list(
 | |
|       size = 2,
 | |
|       color = "#919191",
 | |
|       opacity = 0.5
 | |
|     ),
 | |
|     cliponaxis = FALSE
 | |
|   ) |>
 | |
|   plotly::add_markers(
 | |
|     data = data[count >= 1 & !(gene %chin% genes_literature)],
 | |
|     x = ~score_gtex,
 | |
|     y = ~score_cmap,
 | |
|     color = ~count,
 | |
|     marker = list(
 | |
|       size = 4,
 | |
|       opacity = 0.8
 | |
|     ),
 | |
|     cliponaxis = FALSE
 | |
|   ) |>
 | |
|   plotly::add_text(
 | |
|     data = merge(
 | |
|       data[gene %chin% genes_literature],
 | |
|       genes_literature_ids
 | |
|     ),
 | |
|     x = ~score_gtex,
 | |
|     y = ~score_cmap,
 | |
|     text = ~ as.character(literature_id),
 | |
|     textfont = list(
 | |
|       size = 8,
 | |
|       color = "green"
 | |
|     )
 | |
|   ) |>
 | |
|   plotly::layout(
 | |
|     xaxis = list(
 | |
|       title = "Ranking based on GTEx",
 | |
|       range = c(0, 1)
 | |
|     ),
 | |
|     yaxis = list(
 | |
|       title = "Ranking based on CMap",
 | |
|       range = c(0, 1)
 | |
|     ),
 | |
|     annotations = list(
 | |
|       list(
 | |
|         text = "95%",
 | |
|         x = threshold_gtex,
 | |
|         y = 1,
 | |
|         xshift = 2,
 | |
|         yshift = 3,
 | |
|         yref = "paper",
 | |
|         xanchor = "left",
 | |
|         yanchor = "top",
 | |
|         showarrow = FALSE
 | |
|       ),
 | |
|       list(
 | |
|         text = "95%",
 | |
|         x = 1,
 | |
|         y = threshold_cmap,
 | |
|         yshift = 2,
 | |
|         xref = "paper",
 | |
|         xanchor = "right",
 | |
|         yanchor = "bottom",
 | |
|         showarrow = FALSE
 | |
|       )
 | |
|     ),
 | |
|     shapes = list(
 | |
|       list(
 | |
|         type = "line",
 | |
|         y0 = 0,
 | |
|         y1 = 1,
 | |
|         yref = "paper",
 | |
|         x0 = threshold_gtex,
 | |
|         x1 = threshold_gtex,
 | |
|         line = list(
 | |
|           color = "#00000080",
 | |
|           opacity = 0.5,
 | |
|           width = 1,
 | |
|           dash = "dot"
 | |
|         )
 | |
|       ),
 | |
|       list(
 | |
|         type = "line",
 | |
|         y0 = threshold_cmap,
 | |
|         y1 = threshold_cmap,
 | |
|         x0 = 0,
 | |
|         x1 = 1,
 | |
|         xref = "paper",
 | |
|         line = list(
 | |
|           color = "#00000080",
 | |
|           width = 1,
 | |
|           opacity = 0.5,
 | |
|           dash = "dot"
 | |
|         )
 | |
|       )
 | |
|     ),
 | |
|     font = list(size = 8),
 | |
|     margin = list(
 | |
|       pad = 2,
 | |
|       l = 36,
 | |
|       r = 0,
 | |
|       t = 0,
 | |
|       b = 36
 | |
|     )
 | |
|   ) |>
 | |
|   plotly::hide_legend()
 | |
| 
 | |
| plotly::save_image(
 | |
|   fig |> plotly::hide_colorbar(),
 | |
|   file = here("scripts/output/comparison.svg"),
 | |
|   width = 6.27 * 72,
 | |
|   height = 6.27 * 72,
 | |
|   scale = 96 / 72
 | |
| )
 | |
| 
 | |
| plotly::save_image(
 | |
|   fig,
 | |
|   file = here("scripts/output/comparison_legend.svg"),
 | |
|   width = 6.27 * 72,
 | |
|   height = 6.27 * 72,
 | |
|   scale = 96 / 72
 | |
| )
 |