ubigen/scripts/comparison.R

260 lines
5.9 KiB
R
Raw Normal View History

library(data.table)
library(here)
i_am("scripts/comparison.R")
w2000 <- scan(here("scripts/input/datasets/warrington_2000.txt"), character())
z2008 <- scan(here("scripts/input/datasets/zhu_2008.txt"), character())
e2013 <- scan(here("scripts/input/datasets/eisenberg_2013.txt"), character())
c2011 <- scan(here("scripts/input/datasets/chang_2011.txt"), character())
j2022 <- scan(here("scripts/input/datasets/joshi_2022.txt"), character())
datasets <- list(
"Warrington 2000" = w2000,
"Zhu 2008" = z2008,
"Eisenberg 2013" = e2013,
"Chang 2011" = c2011,
"Joshi 2022" = j2022
)
2024-10-02 11:01:58 +02:00
VennDiagram::venn.diagram(datasets, filename = NULL, disable.logging = TRUE) |>
ggplot2::ggsave(file = here("scripts/output/venn.svg"), device = "svg")
partitions <- VennDiagram::get.venn.partitions(datasets) |> data.table()
genes_venn <- partitions[1]$..values..[[1]]
write(genes_venn, file = here("scripts/output/genes_venn.txt"))
2024-11-24 11:01:27 +01:00
gene_sets <- fread(here("scripts/input/gene_sets.csv"))
genes_literature <- gene_sets[type == "literature", unique(gene)]
genes_recommended <- gene_sets[type == "expression", unique(gene)]
genes_literature_ids <- data.table(
gene = genes_literature,
literature_id = seq_along(genes_literature)
)
ranking_gtex <- ubigen::rank_genes(ubigen::gtex_all)
ranking_cmap <- ubigen::rank_genes(ubigen::cmap)
2024-11-24 11:01:27 +01:00
data <- fread(here("scripts/output/gsea_vs_cmap_groups.csv"))
genes_table <- gene_sets[type == "literature"]
genes_table[, hgnc_symbol := gprofiler2::gconvert(gene, target = "HGNC")$target]
genes_table <- genes_table[,
.(
gene = unique(gene),
source = paste(label, collapse = ", ")
),
by = hgnc_symbol
]
genes_table <- merge(genes_table, data, by = "gene", sort = FALSE)
fwrite(genes_table, file = here("scripts/output/genes_table.csv"))
datasets_data <- rbindlist(lapply(names(datasets), function(name) {
data.table(
dataset = name,
gene = datasets[[name]]
)
}))
datasets_data <- rbind(
datasets_data,
data.table(
dataset = "Venn",
gene = genes_venn
)
)
datasets_data <- rbind(
datasets_data,
data.table(
dataset = "Recommended",
gene = genes_recommended
)
)
datasets_data <- rbind(
datasets_data,
data.table(
dataset = "Literature",
gene = genes_literature
)
)
datasets_data <- merge(datasets_data, data, by = "gene")
datasets_table <- datasets_data[, .(count = .N), by = c("dataset", "group")]
datasets_table[, total := sum(count), by = dataset]
datasets_table[, proportion := count / total]
group_plots <- list()
for (group_value in datasets_table[, unique(group)]) {
group_plot <- plotly::plot_ly() |>
plotly::add_bars(
data = datasets_table[group == group_value],
x = ~dataset,
color = ~dataset,
y = ~proportion
) |>
plotly::layout(
xaxis = list(
categoryarray = datasets_table[, unique(dataset)],
title = ""
),
yaxis = list(
range = c(0.0, 1.0),
title = ""
),
font = list(size = 8),
margin = list(
pad = 2,
l = 0,
r = 0,
t = 0,
b = 36
)
)
plotly::save_image(
group_plot |> plotly::hide_legend(),
file = here(glue::glue("scripts/output/gene_sets_{group_value}.svg")),
width = 3 * 72,
height = 4 * 72,
scale = 96 / 72
)
group_plots <- c(group_plots, list(group_plot))
}
plotly::save_image(
group_plot,
file = here(glue::glue("scripts/output/gene_sets_legend.svg")),
width = 6.27 * 72,
height = 6.27 * 72,
scale = 96 / 72
)
data[, count := 0]
for (dataset in datasets) {
data[gene %chin% dataset, count := count + 1]
}
threshold_gtex <- data[percentile_gtex >= 0.95, min(score_gtex)]
threshold_cmap <- data[percentile_cmap >= 0.95, min(score_cmap)]
fig <- plotly::plot_ly() |>
plotly::add_markers(
2024-11-24 11:01:27 +01:00
data = data[count >= 1 & !(gene %chin% genes_literature)],
x = ~score_gtex,
y = ~score_cmap,
color = ~count,
colors = c("#7d19bf", "#ff7f2a"),
marker = list(
size = 4,
opacity = 0.8
),
cliponaxis = FALSE
) |>
2024-11-24 11:01:27 +01:00
plotly::add_text(
data = merge(
data[gene %chin% genes_literature],
genes_literature_ids
),
x = ~score_gtex,
y = ~score_cmap,
text = ~ as.character(literature_id),
textfont = list(
size = 8,
color = "green"
)
) |>
plotly::layout(
xaxis = list(
title = "Ranking based on GTEx",
range = c(0, 1)
),
yaxis = list(
title = "Ranking based on CMap",
range = c(0, 1)
),
annotations = list(
list(
text = "95%",
x = threshold_gtex,
y = 1,
xshift = 2,
yshift = 3,
yref = "paper",
xanchor = "left",
yanchor = "top",
showarrow = FALSE
),
list(
text = "95%",
x = 1,
y = threshold_cmap,
yshift = 2,
xref = "paper",
xanchor = "right",
yanchor = "bottom",
showarrow = FALSE
)
),
shapes = list(
list(
type = "line",
y0 = 0,
y1 = 1,
yref = "paper",
x0 = threshold_gtex,
x1 = threshold_gtex,
line = list(
color = "#00000080",
opacity = 0.5,
width = 1,
dash = "dot"
)
),
list(
type = "line",
y0 = threshold_cmap,
y1 = threshold_cmap,
x0 = 0,
x1 = 1,
xref = "paper",
line = list(
color = "#00000080",
width = 1,
opacity = 0.5,
dash = "dot"
)
)
),
font = list(size = 8),
margin = list(
pad = 2,
l = 36,
r = 0,
t = 0,
b = 36
)
) |>
plotly::hide_legend()
plotly::save_image(
fig |> plotly::hide_colorbar(),
file = here("scripts/output/comparison.svg"),
width = 6.27 * 72,
height = 6.27 * 72,
scale = 96 / 72
)
plotly::save_image(
fig,
file = here("scripts/output/comparison_legend.svg"),
width = 6.27 * 72,
height = 6.27 * 72,
scale = 96 / 72
)