scripts: Add new comparison plots

This commit is contained in:
Elias Projahn 2024-11-24 11:01:27 +01:00
parent 23af4ba644
commit bc5c07a48e

View file

@ -24,14 +24,114 @@ partitions <- VennDiagram::get.venn.partitions(datasets) |> data.table()
genes_venn <- partitions[1]$..values..[[1]]
write(genes_venn, file = here("scripts/output/genes_venn.txt"))
gene_sets <- fread(here("scripts/input/gene_sets.csv"))
genes_literature <- gene_sets[type == "literature", unique(gene)]
genes_recommended <- gene_sets[type == "expression", unique(gene)]
genes_literature_ids <- data.table(
gene = genes_literature,
literature_id = seq_along(genes_literature)
)
ranking_gtex <- ubigen::rank_genes(ubigen::gtex_all)
ranking_cmap <- ubigen::rank_genes(ubigen::cmap)
data <- merge(
ranking_gtex[, .(gene, score, percentile)],
ranking_cmap[, .(gene, score, percentile)],
by = "gene",
suffixes = c(x = "_gtex", y = "_cmap")
data <- fread(here("scripts/output/gsea_vs_cmap_groups.csv"))
genes_table <- gene_sets[type == "literature"]
genes_table[, hgnc_symbol := gprofiler2::gconvert(gene, target = "HGNC")$target]
genes_table <- genes_table[,
.(
gene = unique(gene),
source = paste(label, collapse = ", ")
),
by = hgnc_symbol
]
genes_table <- merge(genes_table, data, by = "gene", sort = FALSE)
fwrite(genes_table, file = here("scripts/output/genes_table.csv"))
datasets_data <- rbindlist(lapply(names(datasets), function(name) {
data.table(
dataset = name,
gene = datasets[[name]]
)
}))
datasets_data <- rbind(
datasets_data,
data.table(
dataset = "Venn",
gene = genes_venn
)
)
datasets_data <- rbind(
datasets_data,
data.table(
dataset = "Recommended",
gene = genes_recommended
)
)
datasets_data <- rbind(
datasets_data,
data.table(
dataset = "Literature",
gene = genes_literature
)
)
datasets_data <- merge(datasets_data, data, by = "gene")
datasets_table <- datasets_data[, .(count = .N), by = c("dataset", "group")]
datasets_table[, total := sum(count), by = dataset]
datasets_table[, proportion := count / total]
group_plots <- list()
for (group_value in datasets_table[, unique(group)]) {
group_plot <- plotly::plot_ly() |>
plotly::add_bars(
data = datasets_table[group == group_value],
x = ~dataset,
color = ~dataset,
y = ~proportion
) |>
plotly::layout(
xaxis = list(
categoryarray = datasets_table[, unique(dataset)],
title = ""
),
yaxis = list(
range = c(0.0, 1.0),
title = ""
),
font = list(size = 8),
margin = list(
pad = 2,
l = 0,
r = 0,
t = 0,
b = 36
)
)
plotly::save_image(
group_plot |> plotly::hide_legend(),
file = here(glue::glue("scripts/output/gene_sets_{group_value}.svg")),
width = 3 * 72,
height = 4 * 72,
scale = 96 / 72
)
group_plots <- c(group_plots, list(group_plot))
}
plotly::save_image(
group_plot,
file = here(glue::glue("scripts/output/gene_sets_legend.svg")),
width = 6.27 * 72,
height = 6.27 * 72,
scale = 96 / 72
)
data[, count := 0]
@ -45,7 +145,7 @@ threshold_cmap <- data[percentile_cmap >= 0.95, min(score_cmap)]
fig <- plotly::plot_ly() |>
plotly::add_markers(
data = data[count >= 1],
data = data[count >= 1 & !(gene %chin% genes_literature)],
x = ~score_gtex,
y = ~score_cmap,
color = ~count,
@ -56,6 +156,19 @@ fig <- plotly::plot_ly() |>
),
cliponaxis = FALSE
) |>
plotly::add_text(
data = merge(
data[gene %chin% genes_literature],
genes_literature_ids
),
x = ~score_gtex,
y = ~score_cmap,
text = ~ as.character(literature_id),
textfont = list(
size = 8,
color = "green"
)
) |>
plotly::layout(
xaxis = list(
title = "Ranking based on GTEx",