mirror of
				https://github.com/johrpan/ubigen.git
				synced 2025-10-26 11:47:24 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			158 lines
		
	
	
	
		
			3.8 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			158 lines
		
	
	
	
		
			3.8 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
| library(data.table)
 | |
| library(here)
 | |
| 
 | |
| i_am("scripts/gtex_vs_cmap.R")
 | |
| 
 | |
| ranking_gtex <- ubigen::rank_genes(ubigen::gtex_all)
 | |
| ranking_cmap <- ubigen::rank_genes(ubigen::cmap)
 | |
| 
 | |
| data <- merge(
 | |
|   ranking_gtex[, .(gene, score, percentile)],
 | |
|   ranking_cmap[, .(gene, score, percentile)],
 | |
|   by = "gene",
 | |
|   suffixes = c(x = "_gtex", y = "_cmap")
 | |
| )
 | |
| 
 | |
| cat("N =", nrow(data))
 | |
| 
 | |
| # OPTICS clustering
 | |
| 
 | |
| result_optics <- dbscan::optics(
 | |
|   data[, .(score_gtex, score_cmap)],
 | |
|   minPts = 0.01 * nrow(data)
 | |
| )
 | |
| 
 | |
| result_cluster <- result_optics |> dbscan::extractXi(0.05)
 | |
| data[, cluster := result_cluster$cluster]
 | |
| 
 | |
| cluster_means <- data[,
 | |
|   .(
 | |
|     score_gtex = mean(score_gtex),
 | |
|     score_cmap = mean(score_cmap)
 | |
|   ),
 | |
|   by = cluster
 | |
| ]
 | |
| 
 | |
| cluster_means[, distance_1_1 := sqrt(
 | |
|   (1.0 - score_gtex)^2 + (1.0 - score_cmap)^2
 | |
| )]
 | |
| 
 | |
| cluster_1_1 <- cluster_means[which.min(distance_1_1), cluster]
 | |
| genes_optics <- data[cluster == cluster_1_1, gene]
 | |
| write(genes_optics, here("scripts/output/genes_optics.txt"))
 | |
| 
 | |
| # Percentiles
 | |
| 
 | |
| data[percentile_gtex < 0.95 & percentile_cmap < 0.95, group := "0_0"]
 | |
| data[percentile_gtex < 0.95 & percentile_cmap >= 0.95, group := "0_1"]
 | |
| data[percentile_gtex >= 0.95 & percentile_cmap < 0.95, group := "1_0"]
 | |
| data[percentile_gtex >= 0.95 & percentile_cmap >= 0.95, group := "1_1"]
 | |
| 
 | |
| fwrite(data, file = here("scripts/output/gtex_vs_cmap_groups.csv"))
 | |
| 
 | |
| write(data[group == "0_0", gene], here("scripts/output/genes_0_0.txt"))
 | |
| write(data[group == "0_1", gene], here("scripts/output/genes_0_1.txt"))
 | |
| write(data[group == "1_0", gene], here("scripts/output/genes_1_0.txt"))
 | |
| write(data[group == "1_1", gene], here("scripts/output/genes_1_1.txt"))
 | |
| 
 | |
| threshold_gtex <- data[percentile_gtex >= 0.95, min(score_gtex)]
 | |
| threshold_cmap <- data[percentile_cmap >= 0.95, min(score_cmap)]
 | |
| 
 | |
| fig <- plotly::plot_ly() |>
 | |
|   plotly::add_markers(
 | |
|     data = data,
 | |
|     x = ~score_gtex,
 | |
|     y = ~score_cmap,
 | |
|     name = ~cluster,
 | |
|     showlegend = FALSE,
 | |
|     marker = list(size = 4),
 | |
|     cliponaxis = FALSE
 | |
|   ) |>
 | |
|   plotly::layout(
 | |
|     xaxis = list(
 | |
|       title = "Ranking based on GTEx",
 | |
|       range = c(0, 1)
 | |
|     ),
 | |
|     yaxis = list(
 | |
|       title = "Ranking based on CMap",
 | |
|       range = c(0, 1)
 | |
|     ),
 | |
|     annotations = list(
 | |
|       list(
 | |
|         text = "95%",
 | |
|         x = threshold_gtex,
 | |
|         y = 1,
 | |
|         xshift = 2,
 | |
|         yshift = 3,
 | |
|         yref = "paper",
 | |
|         xanchor = "left",
 | |
|         yanchor = "top",
 | |
|         showarrow = FALSE
 | |
|       ),
 | |
|       list(
 | |
|         text = "95%",
 | |
|         x = 1,
 | |
|         y = threshold_cmap,
 | |
|         yshift = 2,
 | |
|         xref = "paper",
 | |
|         xanchor = "right",
 | |
|         yanchor = "bottom",
 | |
|         showarrow = FALSE
 | |
|       )
 | |
|     ),
 | |
|     shapes = list(
 | |
|       list(
 | |
|         type = "line",
 | |
|         y0 = 0,
 | |
|         y1 = 1,
 | |
|         yref = "paper",
 | |
|         x0 = threshold_gtex,
 | |
|         x1 = threshold_gtex,
 | |
|         line = list(
 | |
|           color = "#00000080",
 | |
|           opacity = 0.5,
 | |
|           width = 1,
 | |
|           dash = "dot"
 | |
|         )
 | |
|       ),
 | |
|       list(
 | |
|         type = "line",
 | |
|         y0 = threshold_cmap,
 | |
|         y1 = threshold_cmap,
 | |
|         x0 = 0,
 | |
|         x1 = 1,
 | |
|         xref = "paper",
 | |
|         line = list(
 | |
|           color = "#00000080",
 | |
|           width = 1,
 | |
|           opacity = 0.5,
 | |
|           dash = "dot"
 | |
|         )
 | |
|       )
 | |
|     ),
 | |
|     font = list(size = 8),
 | |
|     margin = list(
 | |
|       pad = 2,
 | |
|       l = 36,
 | |
|       r = 0,
 | |
|       t = 0,
 | |
|       b = 36
 | |
|     )
 | |
|   )
 | |
| 
 | |
| # Plotly specifies all sizes in pixels, including font size. Because of
 | |
| # that, we can actually think of these pixels as points. One point is defined as
 | |
| # 1/72 inch and SVG uses 96 DPI as the standard resolution.
 | |
| #
 | |
| # 1 plotly pixel = 1 point = 1/72 inch = 1 1/3 actual pixels
 | |
| #
 | |
| # So, we specify width and height in points (= plotly pixels) and scale up the
 | |
| # image by 96/72 to convert everything from points to pixels at 96 DPI.
 | |
| 
 | |
| plotly::save_image(
 | |
|   fig,
 | |
|   file = here("scripts/output/gtex_vs_cmap.svg"),
 | |
|   width = 6.27 * 72,
 | |
|   height = 6.27 * 72,
 | |
|   scale = 96 / 72
 | |
| )
 |