diff --git a/scripts/comparison.R b/scripts/comparison.R index 6177a2c..fc309f5 100644 --- a/scripts/comparison.R +++ b/scripts/comparison.R @@ -24,14 +24,114 @@ partitions <- VennDiagram::get.venn.partitions(datasets) |> data.table() genes_venn <- partitions[1]$..values..[[1]] write(genes_venn, file = here("scripts/output/genes_venn.txt")) +gene_sets <- fread(here("scripts/input/gene_sets.csv")) +genes_literature <- gene_sets[type == "literature", unique(gene)] +genes_recommended <- gene_sets[type == "expression", unique(gene)] +genes_literature_ids <- data.table( + gene = genes_literature, + literature_id = seq_along(genes_literature) +) + ranking_gtex <- ubigen::rank_genes(ubigen::gtex_all) ranking_cmap <- ubigen::rank_genes(ubigen::cmap) -data <- merge( - ranking_gtex[, .(gene, score, percentile)], - ranking_cmap[, .(gene, score, percentile)], - by = "gene", - suffixes = c(x = "_gtex", y = "_cmap") +data <- fread(here("scripts/output/gsea_vs_cmap_groups.csv")) + +genes_table <- gene_sets[type == "literature"] +genes_table[, hgnc_symbol := gprofiler2::gconvert(gene, target = "HGNC")$target] +genes_table <- genes_table[, + .( + gene = unique(gene), + source = paste(label, collapse = ", ") + ), + by = hgnc_symbol +] +genes_table <- merge(genes_table, data, by = "gene", sort = FALSE) +fwrite(genes_table, file = here("scripts/output/genes_table.csv")) + +datasets_data <- rbindlist(lapply(names(datasets), function(name) { + data.table( + dataset = name, + gene = datasets[[name]] + ) +})) + +datasets_data <- rbind( + datasets_data, + data.table( + dataset = "Venn", + gene = genes_venn + ) +) + +datasets_data <- rbind( + datasets_data, + data.table( + dataset = "Recommended", + gene = genes_recommended + ) +) + +datasets_data <- rbind( + datasets_data, + data.table( + dataset = "Literature", + gene = genes_literature + ) +) + +datasets_data <- merge(datasets_data, data, by = "gene") + +datasets_table <- datasets_data[, .(count = .N), by = c("dataset", "group")] +datasets_table[, total := sum(count), by = dataset] +datasets_table[, proportion := count / total] + +group_plots <- list() + +for (group_value in datasets_table[, unique(group)]) { + group_plot <- plotly::plot_ly() |> + plotly::add_bars( + data = datasets_table[group == group_value], + x = ~dataset, + color = ~dataset, + y = ~proportion + ) |> + plotly::layout( + xaxis = list( + categoryarray = datasets_table[, unique(dataset)], + title = "" + ), + yaxis = list( + range = c(0.0, 1.0), + title = "" + ), + font = list(size = 8), + margin = list( + pad = 2, + l = 0, + r = 0, + t = 0, + b = 36 + ) + ) + + plotly::save_image( + group_plot |> plotly::hide_legend(), + file = here(glue::glue("scripts/output/gene_sets_{group_value}.svg")), + width = 3 * 72, + height = 4 * 72, + scale = 96 / 72 + ) + + group_plots <- c(group_plots, list(group_plot)) +} + +plotly::save_image( + group_plot, + file = here(glue::glue("scripts/output/gene_sets_legend.svg")), + width = 6.27 * 72, + height = 6.27 * 72, + scale = 96 / 72 ) data[, count := 0] @@ -45,7 +145,7 @@ threshold_cmap <- data[percentile_cmap >= 0.95, min(score_cmap)] fig <- plotly::plot_ly() |> plotly::add_markers( - data = data[count >= 1], + data = data[count >= 1 & !(gene %chin% genes_literature)], x = ~score_gtex, y = ~score_cmap, color = ~count, @@ -56,6 +156,19 @@ fig <- plotly::plot_ly() |> ), cliponaxis = FALSE ) |> + plotly::add_text( + data = merge( + data[gene %chin% genes_literature], + genes_literature_ids + ), + x = ~score_gtex, + y = ~score_cmap, + text = ~ as.character(literature_id), + textfont = list( + size = 8, + color = "green" + ) + ) |> plotly::layout( xaxis = list( title = "Ranking based on GTEx",