mirror of
				https://github.com/johrpan/ubigen.git
				synced 2025-10-26 19:57:24 +01:00 
			
		
		
		
	Add drug plots
This commit is contained in:
		
							parent
							
								
									cf8e9e79d5
								
							
						
					
					
						commit
						785b748ba4
					
				
					 8 changed files with 365 additions and 185 deletions
				
			
		|  | @ -237,5 +237,6 @@ server <- function(custom_dataset = NULL) { | ||||||
|     }) |     }) | ||||||
| 
 | 
 | ||||||
|     output$gsea_plot_ranking <- plotly::renderPlotly(gsea_plot_ranking) |     output$gsea_plot_ranking <- plotly::renderPlotly(gsea_plot_ranking) | ||||||
|  |     output$fig_drug_scores <- plotly::renderPlotly(fig_drug_scores) | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
							
								
								
									
										
											BIN
										
									
								
								R/sysdata.rda
									
										
									
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								R/sysdata.rda
									
										
									
									
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										16
									
								
								R/ui.R
									
										
									
									
									
								
							
							
						
						
									
										16
									
								
								R/ui.R
									
										
									
									
									
								
							|  | @ -272,7 +272,21 @@ ui <- function(custom_dataset = NULL) { | ||||||
|             "Note: Click on the legend items to toggle single sources. A ", |             "Note: Click on the legend items to toggle single sources. A ", | ||||||
|             "double-click will isolate a single source of interest." |             "double-click will isolate a single source of interest." | ||||||
|           ))), |           ))), | ||||||
|           plotly::plotlyOutput("gsea_plot_ranking", height = "600px") |           plotly::plotlyOutput("gsea_plot_ranking", height = "600px"), | ||||||
|  |           h2("Drug effects"), | ||||||
|  |           p(HTML(paste0( | ||||||
|  |             "Scores for drugs based on the genes that are significantly ", | ||||||
|  |             "influenced by them. To compute a score for each drug, the scores ", | ||||||
|  |             "of all influenced genes based on “GTEx (all)” (X-axis) and ", | ||||||
|  |             "“CMap” (Y-axis) are averaged with weights based on the fold ", | ||||||
|  |             "change of the interactions. The position of each drug in this ", | ||||||
|  |             "plot is therefore a result of how ubiquitous the genes that it ", | ||||||
|  |             "influences are." | ||||||
|  |           ))), | ||||||
|  |           p(HTML(paste0( | ||||||
|  |             "Note: Hover over the markers to see drug names." | ||||||
|  |           ))), | ||||||
|  |           plotly::plotlyOutput("fig_drug_scores", height = "1200px") | ||||||
|         ) |         ) | ||||||
|       ), |       ), | ||||||
|       tabPanel( |       tabPanel( | ||||||
|  |  | ||||||
|  | @ -1,136 +0,0 @@ | ||||||
| library(data.table) |  | ||||||
| library(here) |  | ||||||
| 
 |  | ||||||
| i_am("scripts/cmap_drugs_analysis.R") |  | ||||||
| 
 |  | ||||||
| data <- fread(here("scripts/output/cmap_drugs.csv")) |  | ||||||
| 
 |  | ||||||
| data[, c("drug", "concentration", "cell_line") := |  | ||||||
|   tstrsplit(drug, "_", fixed = TRUE)] |  | ||||||
| 
 |  | ||||||
| data[, concentration := as.double(concentration)] |  | ||||||
| 
 |  | ||||||
| data <- data[, |  | ||||||
|   .(abs_mean_change = mean(abs(mean_change))), |  | ||||||
|   by = .(drug, group) |  | ||||||
| ] |  | ||||||
| 
 |  | ||||||
| # Source: PubChem ID list upload based on identifiers converted from CMap |  | ||||||
| # drug names using the PubChem ID exchange. |  | ||||||
| pubchem_data <- fread(here("scripts/input/pubchem_data.csv")) |  | ||||||
| 
 |  | ||||||
| pubchem_data <- pubchem_data[, .(cid, cmpdname, annotation)] |  | ||||||
| pubchem_data <- unique(pubchem_data, by = "cid") |  | ||||||
| pubchem_data <- pubchem_data[, |  | ||||||
|   .( |  | ||||||
|     cmpdname, |  | ||||||
|     annotation = strsplit(annotation, "|", fixed = TRUE) |> unlist() |  | ||||||
|   ), |  | ||||||
|   by = cid |  | ||||||
| ] |  | ||||||
| 
 |  | ||||||
| # Filter for WHO ATC annotations |  | ||||||
| pubchem_data <- pubchem_data[stringr::str_detect(annotation, "^[A-Z] - ")] |  | ||||||
| 
 |  | ||||||
| # Extract ATC levels |  | ||||||
| 
 |  | ||||||
| pubchem_data[, atc_1 := stringr::str_match( |  | ||||||
|   annotation, |  | ||||||
|   "^[A-Z] - ([^>]*)" |  | ||||||
| )[, 2] |> stringr::str_trim()] |  | ||||||
| 
 |  | ||||||
| pubchem_data[, atc_2 := stringr::str_match( |  | ||||||
|   annotation, |  | ||||||
|   "> [A-Z][0-9][0-9] - ([^>]*)" |  | ||||||
| )[, 2] |> stringr::str_trim()] |  | ||||||
| 
 |  | ||||||
| pubchem_data[, atc_3 := stringr::str_match( |  | ||||||
|   annotation, |  | ||||||
|   "> [A-Z][0-9][0-9][A-Z] - ([^>]*)" |  | ||||||
| )[, 2] |> stringr::str_trim()] |  | ||||||
| 
 |  | ||||||
| # Source: PubChem ID exchange |  | ||||||
| drugs_pubchem_mapping <- fread(here("scripts/input/drugs_pubchem.tsv")) |> |  | ||||||
|   na.omit() |  | ||||||
| 
 |  | ||||||
| data <- merge(data, drugs_pubchem_mapping, by = "drug", allow.cartesian = TRUE) |  | ||||||
| data <- merge(data, pubchem_data, by = "cid", allow.cartesian = TRUE) |  | ||||||
| data[, drug_category := atc_1] |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # Select top drug categories |  | ||||||
| 
 |  | ||||||
| results_drug_categories <- data[, |  | ||||||
|   .(score = mean(abs_mean_change)), |  | ||||||
|   by = .(group, drug_category) |  | ||||||
| ] |  | ||||||
| 
 |  | ||||||
| results_drug_categories <- results_drug_categories[, |  | ||||||
|   .(mean_score = mean(score)), |  | ||||||
|   by = drug_category |  | ||||||
| ] |  | ||||||
| 
 |  | ||||||
| setorder(results_drug_categories, -mean_score) |  | ||||||
| top_drug_categories <- results_drug_categories[1:7, drug_category] |  | ||||||
| drug_categories <- c(top_drug_categories, "Other") |  | ||||||
| 
 |  | ||||||
| # Merge other drug categories |  | ||||||
| 
 |  | ||||||
| data[!(drug_category %chin% top_drug_categories), drug_category := "Other"] |  | ||||||
| 
 |  | ||||||
| # Recompute results with new categories |  | ||||||
| 
 |  | ||||||
| results <- data[, |  | ||||||
|   .(score = mean(abs_mean_change)), |  | ||||||
|   by = .(group, drug_category) |  | ||||||
| ] |  | ||||||
| 
 |  | ||||||
| group_plots <- list() |  | ||||||
| 
 |  | ||||||
| for (group_value in results[, unique(group)]) { |  | ||||||
|   group_plot <- plotly::plot_ly() |> |  | ||||||
|     plotly::add_bars( |  | ||||||
|       data = results[group == group_value], |  | ||||||
|       x = ~drug_category, |  | ||||||
|       y = ~score, |  | ||||||
|       color = ~drug_category |  | ||||||
|     ) |> |  | ||||||
|     plotly::layout( |  | ||||||
|       xaxis = list( |  | ||||||
|         categoryarray = drug_categories, |  | ||||||
|         title = "", |  | ||||||
|         showticklabels = FALSE |  | ||||||
|       ), |  | ||||||
|       yaxis = list( |  | ||||||
|         range = c(0.0, 0.03), |  | ||||||
|         nticks = 4, |  | ||||||
|         title = "" |  | ||||||
|       ), |  | ||||||
|       font = list(size = 8), |  | ||||||
|       margin = list( |  | ||||||
|         pad = 2, |  | ||||||
|         l = 48, |  | ||||||
|         r = 0, |  | ||||||
|         t = 0, |  | ||||||
|         b = 36 |  | ||||||
|       ) |  | ||||||
|     ) |  | ||||||
| 
 |  | ||||||
|   plotly::save_image( |  | ||||||
|     group_plot |> plotly::hide_legend(), |  | ||||||
|     file = here(glue::glue("scripts/output/drug_categories_{group_value}.svg")), |  | ||||||
|     width = 3 * 72, |  | ||||||
|     height = 4 * 72, |  | ||||||
|     scale = 96 / 72 |  | ||||||
|   ) |  | ||||||
| 
 |  | ||||||
|   group_plots <- c(group_plots, list(group_plot)) |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| plotly::save_image( |  | ||||||
|   group_plot, |  | ||||||
|   file = here(glue::glue("scripts/output/drug_categories_legend.svg")), |  | ||||||
|   width = 6.27 * 72, |  | ||||||
|   height = 6.27 * 72, |  | ||||||
|   scale = 96 / 72 |  | ||||||
| ) |  | ||||||
|  | @ -1,47 +0,0 @@ | ||||||
| library(data.table) |  | ||||||
| library(gprofiler2) |  | ||||||
| library(here) |  | ||||||
| 
 |  | ||||||
| i_am("scripts/cmap_drugs_input.R") |  | ||||||
| 
 |  | ||||||
| # Source: custom |  | ||||||
| load(here("scripts", "input", "CMap_20180808.RData")) |  | ||||||
| 
 |  | ||||||
| data <- CMap$"HT_HG-U133A" |  | ||||||
| rm(CMap) |  | ||||||
| 
 |  | ||||||
| transcripts <- dimnames(data)$transcripts |  | ||||||
| genes <- gconvert( |  | ||||||
|   transcripts, |  | ||||||
|   numeric_ns = "ENTREZGENE_ACC", |  | ||||||
|   mthreshold = 1, |  | ||||||
|   filter_na = FALSE |  | ||||||
| )$target |  | ||||||
| dimnames(data)[[1]] <- genes |  | ||||||
| 
 |  | ||||||
| data_drugs <- as.data.table(data) |  | ||||||
| data_drugs <- na.omit(data_drugs) |  | ||||||
| data_drugs <- data_drugs[data == "logFoldChange", .(transcripts, drugs, value)] |  | ||||||
| 
 |  | ||||||
| setnames( |  | ||||||
|   data_drugs, |  | ||||||
|   c("transcripts", "drugs", "value"), |  | ||||||
|   c("gene", "drug", "change") |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| genes_0_0 <- scan(here("scripts/output/genes_0_0.txt"), what = character()) |  | ||||||
| genes_0_1 <- scan(here("scripts/output/genes_0_1.txt"), what = character()) |  | ||||||
| genes_1_0 <- scan(here("scripts/output/genes_1_0.txt"), what = character()) |  | ||||||
| genes_1_1 <- scan(here("scripts/output/genes_1_1.txt"), what = character()) |  | ||||||
| 
 |  | ||||||
| data_drugs[gene %chin% genes_0_0, group := "genes_0_0"] |  | ||||||
| data_drugs[gene %chin% genes_0_1, group := "genes_0_1"] |  | ||||||
| data_drugs[gene %chin% genes_1_0, group := "genes_1_0"] |  | ||||||
| data_drugs[gene %chin% genes_1_1, group := "genes_1_1"] |  | ||||||
| 
 |  | ||||||
| data_drugs <- na.omit(data_drugs) |  | ||||||
| 
 |  | ||||||
| results <- data_drugs[, .(mean_change = mean(change)), by = .(drug, group)] |  | ||||||
| fwrite(results, file = here("scripts/output/cmap_drugs.csv")) |  | ||||||
| 
 |  | ||||||
| write(data_drugs[, unique(drug)], file = here("scripts/output/drugs.txt")) |  | ||||||
							
								
								
									
										237
									
								
								scripts/drugs_analysis.R
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										237
									
								
								scripts/drugs_analysis.R
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,237 @@ | ||||||
|  | library(data.table) | ||||||
|  | library(here) | ||||||
|  | 
 | ||||||
|  | i_am("scripts/drugs_analysis.R") | ||||||
|  | 
 | ||||||
|  | drugs_cmap <- fread(here("scripts/output/drugs_cmap.csv")) | ||||||
|  | 
 | ||||||
|  | # Only keep significant changes | ||||||
|  | drugs_cmap <- drugs_cmap[p_value <= 0.05] | ||||||
|  | 
 | ||||||
|  | # Keep one row per gene and drug, with the most significant change. | ||||||
|  | setkey(drugs_cmap, gene, drug, p_value) | ||||||
|  | drugs_cmap <- drugs_cmap[ | ||||||
|  |   rowid(gene, drug) == 1, | ||||||
|  |   .(gene, drug, log_fold_change, p_value) | ||||||
|  | ] | ||||||
|  | 
 | ||||||
|  | drugs_cmap[, negative_log_10_p := -log10(p_value)] | ||||||
|  | 
 | ||||||
|  | ranking_data <- fread(here("scripts/output/gsea_vs_cmap_groups.csv")) | ||||||
|  | n_ubiquitous <- ranking_data[percentile_gtex >= 0.95, .N] | ||||||
|  | n_non_ubiquitous <- ranking_data[percentile_gtex < 0.95, .N] | ||||||
|  | data <- merge(drugs_cmap, ranking_data, by = "gene") | ||||||
|  | 
 | ||||||
|  | drugs <- fread(here("scripts/output/drugs.csv"), na.strings = "") | ||||||
|  | data <- merge(data, drugs, by = "drug", all.x = TRUE, allow.cartesian = TRUE) | ||||||
|  | 
 | ||||||
|  | # Use CMap names as fallback (for drugs not present in drugs.csv above) | ||||||
|  | data[is.na(name), name := stringr::str_to_sentence(drug)] | ||||||
|  | 
 | ||||||
|  | # Figures for single drugs | ||||||
|  | 
 | ||||||
|  | results_drugs <- unique(data, by = c("drug", "gene")) | ||||||
|  | results_drugs[, | ||||||
|  |   `:=`( | ||||||
|  |     proportion_ubiquitous = | ||||||
|  |       .SD[percentile_gtex >= 0.95, .N / n_ubiquitous], | ||||||
|  |     proportion_non_ubiquitous = | ||||||
|  |       .SD[percentile_gtex < 0.95, .N / n_non_ubiquitous], | ||||||
|  |     drug_score_gtex = weighted.mean(score_gtex, abs(log_fold_change)), | ||||||
|  |     drug_score_cmap = weighted.mean(score_cmap, abs(log_fold_change)) | ||||||
|  |   ), | ||||||
|  |   by = drug | ||||||
|  | ] | ||||||
|  | 
 | ||||||
|  | results_drugs[, bias := proportion_ubiquitous / proportion_non_ubiquitous] | ||||||
|  | setorder(results_drugs, -bias) | ||||||
|  | 
 | ||||||
|  | results_drugs_unique <- unique(results_drugs, by = "drug") | ||||||
|  | 
 | ||||||
|  | # Exclude some exotic drugs | ||||||
|  | results_drugs_unique <- results_drugs_unique[!is.na(indication)] | ||||||
|  | 
 | ||||||
|  | n_drugs <- nrow(results_drugs_unique) | ||||||
|  | selected_drugs <- c( | ||||||
|  |   results_drugs_unique[1:10, drug], | ||||||
|  |   results_drugs_unique[(n_drugs - 9):n_drugs, drug] | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | fig_drug_scores_new <- plotly::plot_ly(results_drugs_unique) |> | ||||||
|  |   plotly::add_markers( | ||||||
|  |     x = ~drug_score_gtex, | ||||||
|  |     y = ~drug_score_cmap, | ||||||
|  |     text = ~name, | ||||||
|  |     marker = list(size = 4) | ||||||
|  |   ) |> | ||||||
|  |   plotly::layout( | ||||||
|  |     xaxis = list( | ||||||
|  |       title = "Score based on GTEx (all)" | ||||||
|  |     ), | ||||||
|  |     yaxis = list( | ||||||
|  |       title = "Score based on CMap" | ||||||
|  |     ) | ||||||
|  |   ) | ||||||
|  | 
 | ||||||
|  | # To not overwrite other data: | ||||||
|  | load(here("R/sysdata.rda")) | ||||||
|  | fig_drug_scores <- fig_drug_scores_new | ||||||
|  | 
 | ||||||
|  | usethis::use_data( | ||||||
|  |   fig_drug_scores, | ||||||
|  |   gsea_plot_ranking, # From R/sysdata.rda | ||||||
|  |   internal = TRUE, | ||||||
|  |   overwrite = TRUE | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | results_drugs_unique <- results_drugs_unique[drug %chin% selected_drugs] | ||||||
|  | 
 | ||||||
|  | fig_drugs <- plotly::plot_ly(results_drugs_unique) |> | ||||||
|  |   plotly::add_bars( | ||||||
|  |     x = ~proportion_ubiquitous, | ||||||
|  |     y = ~name | ||||||
|  |   ) |> | ||||||
|  |   plotly::add_bars( | ||||||
|  |     x = ~ -proportion_non_ubiquitous, | ||||||
|  |     y = ~name | ||||||
|  |   ) |> | ||||||
|  |   plotly::layout( | ||||||
|  |     xaxis = list( | ||||||
|  |       range = c(-0.8, 0.8), | ||||||
|  |       title = "Proportion of genes that are influenced significantly", | ||||||
|  |       tickformat = ".0%" | ||||||
|  |     ), | ||||||
|  |     yaxis = list( | ||||||
|  |       categoryarray = rev(results_drugs_unique[, name]), | ||||||
|  |       title = "" | ||||||
|  |     ), | ||||||
|  |     barmode = "relative", | ||||||
|  |     showlegend = FALSE, | ||||||
|  |     font = list(size = 8), | ||||||
|  |     margin = list( | ||||||
|  |       pad = 2, | ||||||
|  |       l = 0, | ||||||
|  |       r = 0, | ||||||
|  |       t = 0, | ||||||
|  |       b = 36 | ||||||
|  |     ) | ||||||
|  |   ) | ||||||
|  | 
 | ||||||
|  | # Figure for mechanisms of action | ||||||
|  | 
 | ||||||
|  | results_moa <- unique( | ||||||
|  |   data[!is.na(mechanism_of_action) & mechanism_of_action != "Unknown"], | ||||||
|  |   by = c("drug", "gene", "mechanism_of_action") | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | results_moa <- results_moa[, | ||||||
|  |   .( | ||||||
|  |     percentile_gtex = percentile_gtex[1], | ||||||
|  |     log_fold_change = mean(log_fold_change), | ||||||
|  |     score_gtex = mean(score_gtex) | ||||||
|  |   ), | ||||||
|  |   by = c("mechanism_of_action", "gene") | ||||||
|  | ] | ||||||
|  | 
 | ||||||
|  | results_moa[, | ||||||
|  |   `:=`( | ||||||
|  |     proportion_ubiquitous = .SD[percentile_gtex >= 0.95, .N / n_ubiquitous], | ||||||
|  |     proportion_non_ubiquitous = | ||||||
|  |       .SD[percentile_gtex < 0.95, .N / n_non_ubiquitous], | ||||||
|  |     moa_score = weighted.mean(score_gtex, abs(log_fold_change)) | ||||||
|  |   ), | ||||||
|  |   by = mechanism_of_action | ||||||
|  | ] | ||||||
|  | 
 | ||||||
|  | results_moa[, bias := proportion_ubiquitous / proportion_non_ubiquitous] | ||||||
|  | setorder(results_moa, -bias) | ||||||
|  | 
 | ||||||
|  | results_moa_unique <- unique(results_moa, by = "mechanism_of_action") | ||||||
|  | n_moa <- nrow(results_moa_unique) | ||||||
|  | selected_moas <- c( | ||||||
|  |   results_moa_unique[1:10, mechanism_of_action], | ||||||
|  |   results_moa_unique[(n_moa - 9):n_moa, mechanism_of_action] | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | results_moa_unique <- | ||||||
|  |   results_moa_unique[mechanism_of_action %chin% selected_moas] | ||||||
|  | 
 | ||||||
|  | fig_moas <- plotly::plot_ly(results_moa_unique) |> | ||||||
|  |   plotly::add_bars( | ||||||
|  |     x = ~proportion_ubiquitous, | ||||||
|  |     y = ~mechanism_of_action | ||||||
|  |   ) |> | ||||||
|  |   plotly::add_bars( | ||||||
|  |     x = ~ -proportion_non_ubiquitous, | ||||||
|  |     y = ~mechanism_of_action | ||||||
|  |   ) |> | ||||||
|  |   plotly::layout( | ||||||
|  |     xaxis = list( | ||||||
|  |       range = c(-0.8, 0.8), | ||||||
|  |       title = "Proportion of genes that are influenced significantly", | ||||||
|  |       tickformat = ".0%" | ||||||
|  |     ), | ||||||
|  |     yaxis = list( | ||||||
|  |       categoryarray = rev(results_moa_unique[, mechanism_of_action]), | ||||||
|  |       title = "" | ||||||
|  |     ), | ||||||
|  |     barmode = "relative", | ||||||
|  |     showlegend = FALSE, | ||||||
|  |     font = list(size = 8), | ||||||
|  |     margin = list( | ||||||
|  |       pad = 2, | ||||||
|  |       l = 0, | ||||||
|  |       r = 0, | ||||||
|  |       t = 0, | ||||||
|  |       b = 36 | ||||||
|  |     ) | ||||||
|  |   ) | ||||||
|  | 
 | ||||||
|  | plotly::save_image( | ||||||
|  |   fig_drug_scores |> plotly::layout( | ||||||
|  |     font = list(size = 8), | ||||||
|  |     margin = list( | ||||||
|  |       pad = 2, | ||||||
|  |       l = 36, | ||||||
|  |       r = 0, | ||||||
|  |       t = 0, | ||||||
|  |       b = 36 | ||||||
|  |     ) | ||||||
|  |   ), | ||||||
|  |   file = here("scripts/output/drug_scores.svg"), | ||||||
|  |   width = 6.27 * 72, | ||||||
|  |   height = 6.27 * 72, | ||||||
|  |   scale = 96 / 72 | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | plotly::save_image( | ||||||
|  |   fig_drugs, | ||||||
|  |   file = here("scripts/output/drugs_labels.svg"), | ||||||
|  |   width = 3.135 * 72, | ||||||
|  |   height = 6.27 * 72, | ||||||
|  |   scale = 96 / 72 | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | plotly::save_image( | ||||||
|  |   fig_drugs |> plotly::layout(yaxis = list(showticklabels = FALSE)), | ||||||
|  |   file = here("scripts/output/drugs.svg"), | ||||||
|  |   width = 3.135 * 72, | ||||||
|  |   height = 6.27 * 72, | ||||||
|  |   scale = 96 / 72 | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | plotly::save_image( | ||||||
|  |   fig_moas, | ||||||
|  |   file = here("scripts/output/moas_labels.svg"), | ||||||
|  |   width = 3.135 * 72, | ||||||
|  |   height = 6.27 * 72, | ||||||
|  |   scale = 96 / 72 | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | plotly::save_image( | ||||||
|  |   fig_moas |> plotly::layout(yaxis = list(showticklabels = FALSE)), | ||||||
|  |   file = here("scripts/output/moas.svg"), | ||||||
|  |   width = 3.135 * 72, | ||||||
|  |   height = 6.27 * 72, | ||||||
|  |   scale = 96 / 72 | ||||||
|  | ) | ||||||
							
								
								
									
										103
									
								
								scripts/drugs_input.R
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								scripts/drugs_input.R
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,103 @@ | ||||||
|  | library(data.table) | ||||||
|  | library(here) | ||||||
|  | 
 | ||||||
|  | i_am("scripts/drugs_input.R") | ||||||
|  | 
 | ||||||
|  | # Source: PubChem ID exchange based on CMap drug identifiers. | ||||||
|  | drugs_cmap_pubchem <- fread(here("scripts/input/drugs_cmap_pubchem.tsv")) | ||||||
|  | drugs_cmap_pubchem <- na.omit(drugs_cmap_pubchem) | ||||||
|  | 
 | ||||||
|  | # Source: UniChem ID mapping | ||||||
|  | drugs_chembl_pubchem <- fread(here("scripts/input/drugs_chembl_pubchem.tsv")) | ||||||
|  | 
 | ||||||
|  | # Source: ChEMBL SQLite database | ||||||
|  | # SELECT DISTINCT | ||||||
|  | #   chembl_id, | ||||||
|  | #   synonyms AS name, | ||||||
|  | #   mesh_heading AS indication, | ||||||
|  | #   mechanism_of_action | ||||||
|  | # FROM molecule_dictionary | ||||||
|  | #   LEFT JOIN drug_indication | ||||||
|  | #     ON molecule_dictionary.molregno = drug_indication.molregno | ||||||
|  | #   LEFT JOIN drug_mechanism | ||||||
|  | #     ON molecule_dictionary.molregno = drug_mechanism.molregno | ||||||
|  | #   LEFT JOIN ( | ||||||
|  | #       SELECT molregno, synonyms FROM molecule_synonyms WHERE syn_type == 'INN' | ||||||
|  | #     ) AS molecule_synonyms | ||||||
|  | #     ON molecule_dictionary.molregno = molecule_synonyms.molregno | ||||||
|  | #   WHERE name IS NOT NULL | ||||||
|  | #     OR indication IS NOT NULL | ||||||
|  | #     OR mechanism_of_action IS NOT NULL; | ||||||
|  | drugs_chembl <- fread(here("scripts/input/drugs_chembl.csv")) | ||||||
|  | 
 | ||||||
|  | # Source: PubChem ID list upload based on identifiers converted from CMap | ||||||
|  | # drug names using the PubChem ID exchange. | ||||||
|  | drugs_pubchem <- fread(here("scripts/input/drugs_pubchem.csv")) | ||||||
|  | 
 | ||||||
|  | drugs_pubchem <- drugs_pubchem[, .(cid, cmpdname, annotation)] | ||||||
|  | drugs_pubchem <- unique(drugs_pubchem, by = "cid") | ||||||
|  | drugs_pubchem <- drugs_pubchem[, | ||||||
|  |   .( | ||||||
|  |     cmpdname, | ||||||
|  |     annotation = strsplit(annotation, "|", fixed = TRUE) |> unlist() | ||||||
|  |   ), | ||||||
|  |   by = cid | ||||||
|  | ] | ||||||
|  | 
 | ||||||
|  | # Filter for WHO ATC annotations | ||||||
|  | drugs_pubchem <- drugs_pubchem[stringr::str_detect(annotation, "^[A-Z] - ")] | ||||||
|  | 
 | ||||||
|  | # Extract ATC levels | ||||||
|  | 
 | ||||||
|  | drugs_pubchem[, atc_1 := stringr::str_match( | ||||||
|  |   annotation, | ||||||
|  |   "^[A-Z] - ([^>]*)" | ||||||
|  | )[, 2] |> stringr::str_trim()] | ||||||
|  | 
 | ||||||
|  | drugs_pubchem[, atc_2 := stringr::str_match( | ||||||
|  |   annotation, | ||||||
|  |   "> [A-Z][0-9][0-9] - ([^>]*)" | ||||||
|  | )[, 2] |> stringr::str_trim()] | ||||||
|  | 
 | ||||||
|  | drugs_pubchem[, atc_3 := stringr::str_match( | ||||||
|  |   annotation, | ||||||
|  |   "> [A-Z][0-9][0-9][A-Z] - ([^>]*)" | ||||||
|  | )[, 2] |> stringr::str_trim()] | ||||||
|  | 
 | ||||||
|  | drugs_pubchem <- drugs_pubchem[, .(cid, cmpdname, atc_1, atc_2, atc_3)] | ||||||
|  | setnames(drugs_pubchem, c("cid", "cmpdname"), c("pubchem_cid", "pubchem_name")) | ||||||
|  | 
 | ||||||
|  | drugs <- merge( | ||||||
|  |   drugs_cmap_pubchem, | ||||||
|  |   drugs_chembl_pubchem, | ||||||
|  |   by = "pubchem_cid", | ||||||
|  |   all.x = TRUE | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | drugs <- merge( | ||||||
|  |   drugs, | ||||||
|  |   drugs_chembl, | ||||||
|  |   by = "chembl_id", | ||||||
|  |   all.x = TRUE | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | drugs <- merge( | ||||||
|  |   drugs, | ||||||
|  |   drugs_pubchem, | ||||||
|  |   by = "pubchem_cid", | ||||||
|  |   all.x = TRUE, | ||||||
|  |   allow.cartesian = TRUE | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | # Prefer INN name, then PubChem, then CMap: | ||||||
|  | drugs[name == "", name := NA] | ||||||
|  | drugs[is.na(name), name := pubchem_name] | ||||||
|  | drugs[name == "", name := NA] | ||||||
|  | drugs[is.na(name), name := stringr::str_to_sentence(drug)] | ||||||
|  | drugs[, pubchem_name := NULL] | ||||||
|  | 
 | ||||||
|  | # Clean up empty values: | ||||||
|  | drugs[indication == "", indication := NA] | ||||||
|  | drugs[mechanism_of_action == "", mechanism_of_action := NA] | ||||||
|  | 
 | ||||||
|  | fwrite(drugs, file = here("scripts/output/drugs.csv")) | ||||||
|  | @ -55,5 +55,13 @@ fig <- plotly::plot_ly(data) |> | ||||||
| 
 | 
 | ||||||
| plotly::save_image(fig, image_path, width = 1200, height = 800) | plotly::save_image(fig, image_path, width = 1200, height = 800) | ||||||
| 
 | 
 | ||||||
|  | # To not overwrite other data: | ||||||
|  | load(here("R/sysdata.rda")) | ||||||
| gsea_plot_ranking <- fig | gsea_plot_ranking <- fig | ||||||
| usethis::use_data(gsea_plot_ranking, internal = TRUE, overwrite = TRUE) | 
 | ||||||
|  | usethis::use_data( | ||||||
|  |   gsea_plot_ranking, | ||||||
|  |   fig_drug_scores, # From R/sysdata.rda | ||||||
|  |   internal = TRUE, | ||||||
|  |   overwrite = TRUE | ||||||
|  | ) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue