ubigen/scripts/gsea_vs_cmap.R

159 lines
3.8 KiB
R
Raw Normal View History

2024-09-09 10:48:03 +02:00
library(data.table)
library(here)
i_am("scripts/gsea_vs_cmap.R")
ranking_gtex <- ubigen::rank_genes(ubigen::gtex_all)
ranking_cmap <- ubigen::rank_genes(ubigen::cmap)
data <- merge(
ranking_gtex[, .(gene, score, percentile)],
ranking_cmap[, .(gene, score, percentile)],
by = "gene",
suffixes = c(x = "_gtex", y = "_cmap")
)
cat("N =", nrow(data))
# OPTICS clustering
result_optics <- dbscan::optics(
data[, .(score_gtex, score_cmap)],
minPts = 0.01 * nrow(data)
)
result_cluster <- result_optics |> dbscan::extractXi(0.05)
data[, cluster := result_cluster$cluster]
cluster_means <- data[,
.(
score_gtex = mean(score_gtex),
score_cmap = mean(score_cmap)
),
by = cluster
]
cluster_means[, distance_1_1 := sqrt(
(1.0 - score_gtex)^2 + (1.0 - score_cmap)^2
)]
cluster_1_1 <- cluster_means[which.min(distance_1_1), cluster]
genes_optics <- data[cluster == cluster_1_1, gene]
write(genes_optics, here("scripts/output/genes_optics.txt"))
# Percentiles
2024-11-16 11:05:36 +01:00
data[percentile_gtex < 0.95 & percentile_cmap < 0.95, group := "0_0"]
data[percentile_gtex < 0.95 & percentile_cmap >= 0.95, group := "0_1"]
data[percentile_gtex >= 0.95 & percentile_cmap < 0.95, group := "1_0"]
data[percentile_gtex >= 0.95 & percentile_cmap >= 0.95, group := "1_1"]
2024-09-09 10:48:03 +02:00
2024-11-16 11:05:36 +01:00
fwrite(data, file = here("scripts/output/gsea_vs_cmap_groups.csv"))
write(data[group == "0_0", gene], here("scripts/output/genes_0_0.txt"))
write(data[group == "0_1", gene], here("scripts/output/genes_0_1.txt"))
write(data[group == "1_0", gene], here("scripts/output/genes_1_0.txt"))
write(data[group == "1_1", gene], here("scripts/output/genes_1_1.txt"))
2024-09-09 10:48:03 +02:00
threshold_gtex <- data[percentile_gtex >= 0.95, min(score_gtex)]
threshold_cmap <- data[percentile_cmap >= 0.95, min(score_cmap)]
fig <- plotly::plot_ly() |>
plotly::add_markers(
data = data,
x = ~score_gtex,
y = ~score_cmap,
name = ~cluster,
showlegend = FALSE,
marker = list(size = 4),
cliponaxis = FALSE
) |>
plotly::layout(
xaxis = list(
title = "Ranking based on GTEx",
range = c(0, 1)
),
yaxis = list(
title = "Ranking based on CMap",
range = c(0, 1)
),
annotations = list(
list(
text = "95%",
x = threshold_gtex,
y = 1,
xshift = 2,
yshift = 3,
yref = "paper",
xanchor = "left",
yanchor = "top",
showarrow = FALSE
),
list(
text = "95%",
x = 1,
y = threshold_cmap,
yshift = 2,
xref = "paper",
xanchor = "right",
yanchor = "bottom",
showarrow = FALSE
)
),
shapes = list(
list(
type = "line",
y0 = 0,
y1 = 1,
yref = "paper",
x0 = threshold_gtex,
x1 = threshold_gtex,
line = list(
color = "#00000080",
opacity = 0.5,
width = 1,
dash = "dot"
)
),
list(
type = "line",
y0 = threshold_cmap,
y1 = threshold_cmap,
x0 = 0,
x1 = 1,
xref = "paper",
line = list(
color = "#00000080",
width = 1,
opacity = 0.5,
dash = "dot"
)
)
),
font = list(size = 8),
margin = list(
pad = 2,
l = 36,
r = 0,
t = 0,
b = 36
)
)
# Plotly specifies all sizes in pixels, including font size. Because of
# that, we can actually think of these pixels as points. One point is defined as
# 1/72 inch and SVG uses 96 DPI as the standard resolution.
#
# 1 plotly pixel = 1 point = 1/72 inch = 1 1/3 actual pixels
#
# So, we specify width and height in points (= plotly pixels) and scale up the
# image by 96/72 to convert everything from points to pixels at 96 DPI.
plotly::save_image(
fig,
file = here("scripts/output/gsea_vs_cmap.svg"),
width = 6.27 * 72,
height = 6.27 * 72,
scale = 96 / 72
)