diff --git a/R/server.R b/R/server.R index f3f6290..6c56b6a 100644 --- a/R/server.R +++ b/R/server.R @@ -237,5 +237,6 @@ server <- function(custom_dataset = NULL) { }) output$gsea_plot_ranking <- plotly::renderPlotly(gsea_plot_ranking) + output$fig_drug_scores <- plotly::renderPlotly(fig_drug_scores) } } diff --git a/R/sysdata.rda b/R/sysdata.rda index 58d77ef..c0e0234 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/ui.R b/R/ui.R index d8d336f..56fa0ed 100644 --- a/R/ui.R +++ b/R/ui.R @@ -272,7 +272,21 @@ ui <- function(custom_dataset = NULL) { "Note: Click on the legend items to toggle single sources. A ", "double-click will isolate a single source of interest." ))), - plotly::plotlyOutput("gsea_plot_ranking", height = "600px") + plotly::plotlyOutput("gsea_plot_ranking", height = "600px"), + h2("Drug effects"), + p(HTML(paste0( + "Scores for drugs based on the genes that are significantly ", + "influenced by them. To compute a score for each drug, the scores ", + "of all influenced genes based on “GTEx (all)” (X-axis) and ", + "“CMap” (Y-axis) are averaged with weights based on the fold ", + "change of the interactions. The position of each drug in this ", + "plot is therefore a result of how ubiquitous the genes that it ", + "influences are." + ))), + p(HTML(paste0( + "Note: Hover over the markers to see drug names." + ))), + plotly::plotlyOutput("fig_drug_scores", height = "1200px") ) ), tabPanel( diff --git a/scripts/cmap_drugs_analysis.R b/scripts/cmap_drugs_analysis.R deleted file mode 100644 index b563a13..0000000 --- a/scripts/cmap_drugs_analysis.R +++ /dev/null @@ -1,136 +0,0 @@ -library(data.table) -library(here) - -i_am("scripts/cmap_drugs_analysis.R") - -data <- fread(here("scripts/output/cmap_drugs.csv")) - -data[, c("drug", "concentration", "cell_line") := - tstrsplit(drug, "_", fixed = TRUE)] - -data[, concentration := as.double(concentration)] - -data <- data[, - .(abs_mean_change = mean(abs(mean_change))), - by = .(drug, group) -] - -# Source: PubChem ID list upload based on identifiers converted from CMap -# drug names using the PubChem ID exchange. -pubchem_data <- fread(here("scripts/input/pubchem_data.csv")) - -pubchem_data <- pubchem_data[, .(cid, cmpdname, annotation)] -pubchem_data <- unique(pubchem_data, by = "cid") -pubchem_data <- pubchem_data[, - .( - cmpdname, - annotation = strsplit(annotation, "|", fixed = TRUE) |> unlist() - ), - by = cid -] - -# Filter for WHO ATC annotations -pubchem_data <- pubchem_data[stringr::str_detect(annotation, "^[A-Z] - ")] - -# Extract ATC levels - -pubchem_data[, atc_1 := stringr::str_match( - annotation, - "^[A-Z] - ([^>]*)" -)[, 2] |> stringr::str_trim()] - -pubchem_data[, atc_2 := stringr::str_match( - annotation, - "> [A-Z][0-9][0-9] - ([^>]*)" -)[, 2] |> stringr::str_trim()] - -pubchem_data[, atc_3 := stringr::str_match( - annotation, - "> [A-Z][0-9][0-9][A-Z] - ([^>]*)" -)[, 2] |> stringr::str_trim()] - -# Source: PubChem ID exchange -drugs_pubchem_mapping <- fread(here("scripts/input/drugs_pubchem.tsv")) |> - na.omit() - -data <- merge(data, drugs_pubchem_mapping, by = "drug", allow.cartesian = TRUE) -data <- merge(data, pubchem_data, by = "cid", allow.cartesian = TRUE) -data[, drug_category := atc_1] - - -# Select top drug categories - -results_drug_categories <- data[, - .(score = mean(abs_mean_change)), - by = .(group, drug_category) -] - -results_drug_categories <- results_drug_categories[, - .(mean_score = mean(score)), - by = drug_category -] - -setorder(results_drug_categories, -mean_score) -top_drug_categories <- results_drug_categories[1:7, drug_category] -drug_categories <- c(top_drug_categories, "Other") - -# Merge other drug categories - -data[!(drug_category %chin% top_drug_categories), drug_category := "Other"] - -# Recompute results with new categories - -results <- data[, - .(score = mean(abs_mean_change)), - by = .(group, drug_category) -] - -group_plots <- list() - -for (group_value in results[, unique(group)]) { - group_plot <- plotly::plot_ly() |> - plotly::add_bars( - data = results[group == group_value], - x = ~drug_category, - y = ~score, - color = ~drug_category - ) |> - plotly::layout( - xaxis = list( - categoryarray = drug_categories, - title = "", - showticklabels = FALSE - ), - yaxis = list( - range = c(0.0, 0.03), - nticks = 4, - title = "" - ), - font = list(size = 8), - margin = list( - pad = 2, - l = 48, - r = 0, - t = 0, - b = 36 - ) - ) - - plotly::save_image( - group_plot |> plotly::hide_legend(), - file = here(glue::glue("scripts/output/drug_categories_{group_value}.svg")), - width = 3 * 72, - height = 4 * 72, - scale = 96 / 72 - ) - - group_plots <- c(group_plots, list(group_plot)) -} - -plotly::save_image( - group_plot, - file = here(glue::glue("scripts/output/drug_categories_legend.svg")), - width = 6.27 * 72, - height = 6.27 * 72, - scale = 96 / 72 -) diff --git a/scripts/cmap_drugs_input.R b/scripts/cmap_drugs_input.R deleted file mode 100644 index 2a01ffc..0000000 --- a/scripts/cmap_drugs_input.R +++ /dev/null @@ -1,47 +0,0 @@ -library(data.table) -library(gprofiler2) -library(here) - -i_am("scripts/cmap_drugs_input.R") - -# Source: custom -load(here("scripts", "input", "CMap_20180808.RData")) - -data <- CMap$"HT_HG-U133A" -rm(CMap) - -transcripts <- dimnames(data)$transcripts -genes <- gconvert( - transcripts, - numeric_ns = "ENTREZGENE_ACC", - mthreshold = 1, - filter_na = FALSE -)$target -dimnames(data)[[1]] <- genes - -data_drugs <- as.data.table(data) -data_drugs <- na.omit(data_drugs) -data_drugs <- data_drugs[data == "logFoldChange", .(transcripts, drugs, value)] - -setnames( - data_drugs, - c("transcripts", "drugs", "value"), - c("gene", "drug", "change") -) - -genes_0_0 <- scan(here("scripts/output/genes_0_0.txt"), what = character()) -genes_0_1 <- scan(here("scripts/output/genes_0_1.txt"), what = character()) -genes_1_0 <- scan(here("scripts/output/genes_1_0.txt"), what = character()) -genes_1_1 <- scan(here("scripts/output/genes_1_1.txt"), what = character()) - -data_drugs[gene %chin% genes_0_0, group := "genes_0_0"] -data_drugs[gene %chin% genes_0_1, group := "genes_0_1"] -data_drugs[gene %chin% genes_1_0, group := "genes_1_0"] -data_drugs[gene %chin% genes_1_1, group := "genes_1_1"] - -data_drugs <- na.omit(data_drugs) - -results <- data_drugs[, .(mean_change = mean(change)), by = .(drug, group)] -fwrite(results, file = here("scripts/output/cmap_drugs.csv")) - -write(data_drugs[, unique(drug)], file = here("scripts/output/drugs.txt")) diff --git a/scripts/drugs_analysis.R b/scripts/drugs_analysis.R new file mode 100644 index 0000000..c661b88 --- /dev/null +++ b/scripts/drugs_analysis.R @@ -0,0 +1,237 @@ +library(data.table) +library(here) + +i_am("scripts/drugs_analysis.R") + +drugs_cmap <- fread(here("scripts/output/drugs_cmap.csv")) + +# Only keep significant changes +drugs_cmap <- drugs_cmap[p_value <= 0.05] + +# Keep one row per gene and drug, with the most significant change. +setkey(drugs_cmap, gene, drug, p_value) +drugs_cmap <- drugs_cmap[ + rowid(gene, drug) == 1, + .(gene, drug, log_fold_change, p_value) +] + +drugs_cmap[, negative_log_10_p := -log10(p_value)] + +ranking_data <- fread(here("scripts/output/gsea_vs_cmap_groups.csv")) +n_ubiquitous <- ranking_data[percentile_gtex >= 0.95, .N] +n_non_ubiquitous <- ranking_data[percentile_gtex < 0.95, .N] +data <- merge(drugs_cmap, ranking_data, by = "gene") + +drugs <- fread(here("scripts/output/drugs.csv"), na.strings = "") +data <- merge(data, drugs, by = "drug", all.x = TRUE, allow.cartesian = TRUE) + +# Use CMap names as fallback (for drugs not present in drugs.csv above) +data[is.na(name), name := stringr::str_to_sentence(drug)] + +# Figures for single drugs + +results_drugs <- unique(data, by = c("drug", "gene")) +results_drugs[, + `:=`( + proportion_ubiquitous = + .SD[percentile_gtex >= 0.95, .N / n_ubiquitous], + proportion_non_ubiquitous = + .SD[percentile_gtex < 0.95, .N / n_non_ubiquitous], + drug_score_gtex = weighted.mean(score_gtex, abs(log_fold_change)), + drug_score_cmap = weighted.mean(score_cmap, abs(log_fold_change)) + ), + by = drug +] + +results_drugs[, bias := proportion_ubiquitous / proportion_non_ubiquitous] +setorder(results_drugs, -bias) + +results_drugs_unique <- unique(results_drugs, by = "drug") + +# Exclude some exotic drugs +results_drugs_unique <- results_drugs_unique[!is.na(indication)] + +n_drugs <- nrow(results_drugs_unique) +selected_drugs <- c( + results_drugs_unique[1:10, drug], + results_drugs_unique[(n_drugs - 9):n_drugs, drug] +) + +fig_drug_scores_new <- plotly::plot_ly(results_drugs_unique) |> + plotly::add_markers( + x = ~drug_score_gtex, + y = ~drug_score_cmap, + text = ~name, + marker = list(size = 4) + ) |> + plotly::layout( + xaxis = list( + title = "Score based on GTEx (all)" + ), + yaxis = list( + title = "Score based on CMap" + ) + ) + +# To not overwrite other data: +load(here("R/sysdata.rda")) +fig_drug_scores <- fig_drug_scores_new + +usethis::use_data( + fig_drug_scores, + gsea_plot_ranking, # From R/sysdata.rda + internal = TRUE, + overwrite = TRUE +) + +results_drugs_unique <- results_drugs_unique[drug %chin% selected_drugs] + +fig_drugs <- plotly::plot_ly(results_drugs_unique) |> + plotly::add_bars( + x = ~proportion_ubiquitous, + y = ~name + ) |> + plotly::add_bars( + x = ~ -proportion_non_ubiquitous, + y = ~name + ) |> + plotly::layout( + xaxis = list( + range = c(-0.8, 0.8), + title = "Proportion of genes that are influenced significantly", + tickformat = ".0%" + ), + yaxis = list( + categoryarray = rev(results_drugs_unique[, name]), + title = "" + ), + barmode = "relative", + showlegend = FALSE, + font = list(size = 8), + margin = list( + pad = 2, + l = 0, + r = 0, + t = 0, + b = 36 + ) + ) + +# Figure for mechanisms of action + +results_moa <- unique( + data[!is.na(mechanism_of_action) & mechanism_of_action != "Unknown"], + by = c("drug", "gene", "mechanism_of_action") +) + +results_moa <- results_moa[, + .( + percentile_gtex = percentile_gtex[1], + log_fold_change = mean(log_fold_change), + score_gtex = mean(score_gtex) + ), + by = c("mechanism_of_action", "gene") +] + +results_moa[, + `:=`( + proportion_ubiquitous = .SD[percentile_gtex >= 0.95, .N / n_ubiquitous], + proportion_non_ubiquitous = + .SD[percentile_gtex < 0.95, .N / n_non_ubiquitous], + moa_score = weighted.mean(score_gtex, abs(log_fold_change)) + ), + by = mechanism_of_action +] + +results_moa[, bias := proportion_ubiquitous / proportion_non_ubiquitous] +setorder(results_moa, -bias) + +results_moa_unique <- unique(results_moa, by = "mechanism_of_action") +n_moa <- nrow(results_moa_unique) +selected_moas <- c( + results_moa_unique[1:10, mechanism_of_action], + results_moa_unique[(n_moa - 9):n_moa, mechanism_of_action] +) + +results_moa_unique <- + results_moa_unique[mechanism_of_action %chin% selected_moas] + +fig_moas <- plotly::plot_ly(results_moa_unique) |> + plotly::add_bars( + x = ~proportion_ubiquitous, + y = ~mechanism_of_action + ) |> + plotly::add_bars( + x = ~ -proportion_non_ubiquitous, + y = ~mechanism_of_action + ) |> + plotly::layout( + xaxis = list( + range = c(-0.8, 0.8), + title = "Proportion of genes that are influenced significantly", + tickformat = ".0%" + ), + yaxis = list( + categoryarray = rev(results_moa_unique[, mechanism_of_action]), + title = "" + ), + barmode = "relative", + showlegend = FALSE, + font = list(size = 8), + margin = list( + pad = 2, + l = 0, + r = 0, + t = 0, + b = 36 + ) + ) + +plotly::save_image( + fig_drug_scores |> plotly::layout( + font = list(size = 8), + margin = list( + pad = 2, + l = 36, + r = 0, + t = 0, + b = 36 + ) + ), + file = here("scripts/output/drug_scores.svg"), + width = 6.27 * 72, + height = 6.27 * 72, + scale = 96 / 72 +) + +plotly::save_image( + fig_drugs, + file = here("scripts/output/drugs_labels.svg"), + width = 3.135 * 72, + height = 6.27 * 72, + scale = 96 / 72 +) + +plotly::save_image( + fig_drugs |> plotly::layout(yaxis = list(showticklabels = FALSE)), + file = here("scripts/output/drugs.svg"), + width = 3.135 * 72, + height = 6.27 * 72, + scale = 96 / 72 +) + +plotly::save_image( + fig_moas, + file = here("scripts/output/moas_labels.svg"), + width = 3.135 * 72, + height = 6.27 * 72, + scale = 96 / 72 +) + +plotly::save_image( + fig_moas |> plotly::layout(yaxis = list(showticklabels = FALSE)), + file = here("scripts/output/moas.svg"), + width = 3.135 * 72, + height = 6.27 * 72, + scale = 96 / 72 +) diff --git a/scripts/drugs_input.R b/scripts/drugs_input.R new file mode 100644 index 0000000..7971a8f --- /dev/null +++ b/scripts/drugs_input.R @@ -0,0 +1,103 @@ +library(data.table) +library(here) + +i_am("scripts/drugs_input.R") + +# Source: PubChem ID exchange based on CMap drug identifiers. +drugs_cmap_pubchem <- fread(here("scripts/input/drugs_cmap_pubchem.tsv")) +drugs_cmap_pubchem <- na.omit(drugs_cmap_pubchem) + +# Source: UniChem ID mapping +drugs_chembl_pubchem <- fread(here("scripts/input/drugs_chembl_pubchem.tsv")) + +# Source: ChEMBL SQLite database +# SELECT DISTINCT +# chembl_id, +# synonyms AS name, +# mesh_heading AS indication, +# mechanism_of_action +# FROM molecule_dictionary +# LEFT JOIN drug_indication +# ON molecule_dictionary.molregno = drug_indication.molregno +# LEFT JOIN drug_mechanism +# ON molecule_dictionary.molregno = drug_mechanism.molregno +# LEFT JOIN ( +# SELECT molregno, synonyms FROM molecule_synonyms WHERE syn_type == 'INN' +# ) AS molecule_synonyms +# ON molecule_dictionary.molregno = molecule_synonyms.molregno +# WHERE name IS NOT NULL +# OR indication IS NOT NULL +# OR mechanism_of_action IS NOT NULL; +drugs_chembl <- fread(here("scripts/input/drugs_chembl.csv")) + +# Source: PubChem ID list upload based on identifiers converted from CMap +# drug names using the PubChem ID exchange. +drugs_pubchem <- fread(here("scripts/input/drugs_pubchem.csv")) + +drugs_pubchem <- drugs_pubchem[, .(cid, cmpdname, annotation)] +drugs_pubchem <- unique(drugs_pubchem, by = "cid") +drugs_pubchem <- drugs_pubchem[, + .( + cmpdname, + annotation = strsplit(annotation, "|", fixed = TRUE) |> unlist() + ), + by = cid +] + +# Filter for WHO ATC annotations +drugs_pubchem <- drugs_pubchem[stringr::str_detect(annotation, "^[A-Z] - ")] + +# Extract ATC levels + +drugs_pubchem[, atc_1 := stringr::str_match( + annotation, + "^[A-Z] - ([^>]*)" +)[, 2] |> stringr::str_trim()] + +drugs_pubchem[, atc_2 := stringr::str_match( + annotation, + "> [A-Z][0-9][0-9] - ([^>]*)" +)[, 2] |> stringr::str_trim()] + +drugs_pubchem[, atc_3 := stringr::str_match( + annotation, + "> [A-Z][0-9][0-9][A-Z] - ([^>]*)" +)[, 2] |> stringr::str_trim()] + +drugs_pubchem <- drugs_pubchem[, .(cid, cmpdname, atc_1, atc_2, atc_3)] +setnames(drugs_pubchem, c("cid", "cmpdname"), c("pubchem_cid", "pubchem_name")) + +drugs <- merge( + drugs_cmap_pubchem, + drugs_chembl_pubchem, + by = "pubchem_cid", + all.x = TRUE +) + +drugs <- merge( + drugs, + drugs_chembl, + by = "chembl_id", + all.x = TRUE +) + +drugs <- merge( + drugs, + drugs_pubchem, + by = "pubchem_cid", + all.x = TRUE, + allow.cartesian = TRUE +) + +# Prefer INN name, then PubChem, then CMap: +drugs[name == "", name := NA] +drugs[is.na(name), name := pubchem_name] +drugs[name == "", name := NA] +drugs[is.na(name), name := stringr::str_to_sentence(drug)] +drugs[, pubchem_name := NULL] + +# Clean up empty values: +drugs[indication == "", indication := NA] +drugs[mechanism_of_action == "", mechanism_of_action := NA] + +fwrite(drugs, file = here("scripts/output/drugs.csv")) diff --git a/scripts/sliding_gsea_package.R b/scripts/sliding_gsea_package.R index 8bcf352..2adcb3f 100644 --- a/scripts/sliding_gsea_package.R +++ b/scripts/sliding_gsea_package.R @@ -55,5 +55,13 @@ fig <- plotly::plot_ly(data) |> plotly::save_image(fig, image_path, width = 1200, height = 800) +# To not overwrite other data: +load(here("R/sysdata.rda")) gsea_plot_ranking <- fig -usethis::use_data(gsea_plot_ranking, internal = TRUE, overwrite = TRUE) + +usethis::use_data( + gsea_plot_ranking, + fig_drug_scores, # From R/sysdata.rda + internal = TRUE, + overwrite = TRUE +)