From 8358f7e5e71a59c09fb78e0469ccae8703acc032 Mon Sep 17 00:00:00 2001 From: Elias Projahn Date: Sun, 24 Nov 2024 11:01:50 +0100 Subject: [PATCH] scripts: Add analysis of drug effects --- scripts/cmap_drugs_analysis.R | 136 ++++++++++++++++++++++++++++++++++ scripts/cmap_drugs_input.R | 47 ++++++++++++ 2 files changed, 183 insertions(+) create mode 100644 scripts/cmap_drugs_analysis.R create mode 100644 scripts/cmap_drugs_input.R diff --git a/scripts/cmap_drugs_analysis.R b/scripts/cmap_drugs_analysis.R new file mode 100644 index 0000000..b563a13 --- /dev/null +++ b/scripts/cmap_drugs_analysis.R @@ -0,0 +1,136 @@ +library(data.table) +library(here) + +i_am("scripts/cmap_drugs_analysis.R") + +data <- fread(here("scripts/output/cmap_drugs.csv")) + +data[, c("drug", "concentration", "cell_line") := + tstrsplit(drug, "_", fixed = TRUE)] + +data[, concentration := as.double(concentration)] + +data <- data[, + .(abs_mean_change = mean(abs(mean_change))), + by = .(drug, group) +] + +# Source: PubChem ID list upload based on identifiers converted from CMap +# drug names using the PubChem ID exchange. +pubchem_data <- fread(here("scripts/input/pubchem_data.csv")) + +pubchem_data <- pubchem_data[, .(cid, cmpdname, annotation)] +pubchem_data <- unique(pubchem_data, by = "cid") +pubchem_data <- pubchem_data[, + .( + cmpdname, + annotation = strsplit(annotation, "|", fixed = TRUE) |> unlist() + ), + by = cid +] + +# Filter for WHO ATC annotations +pubchem_data <- pubchem_data[stringr::str_detect(annotation, "^[A-Z] - ")] + +# Extract ATC levels + +pubchem_data[, atc_1 := stringr::str_match( + annotation, + "^[A-Z] - ([^>]*)" +)[, 2] |> stringr::str_trim()] + +pubchem_data[, atc_2 := stringr::str_match( + annotation, + "> [A-Z][0-9][0-9] - ([^>]*)" +)[, 2] |> stringr::str_trim()] + +pubchem_data[, atc_3 := stringr::str_match( + annotation, + "> [A-Z][0-9][0-9][A-Z] - ([^>]*)" +)[, 2] |> stringr::str_trim()] + +# Source: PubChem ID exchange +drugs_pubchem_mapping <- fread(here("scripts/input/drugs_pubchem.tsv")) |> + na.omit() + +data <- merge(data, drugs_pubchem_mapping, by = "drug", allow.cartesian = TRUE) +data <- merge(data, pubchem_data, by = "cid", allow.cartesian = TRUE) +data[, drug_category := atc_1] + + +# Select top drug categories + +results_drug_categories <- data[, + .(score = mean(abs_mean_change)), + by = .(group, drug_category) +] + +results_drug_categories <- results_drug_categories[, + .(mean_score = mean(score)), + by = drug_category +] + +setorder(results_drug_categories, -mean_score) +top_drug_categories <- results_drug_categories[1:7, drug_category] +drug_categories <- c(top_drug_categories, "Other") + +# Merge other drug categories + +data[!(drug_category %chin% top_drug_categories), drug_category := "Other"] + +# Recompute results with new categories + +results <- data[, + .(score = mean(abs_mean_change)), + by = .(group, drug_category) +] + +group_plots <- list() + +for (group_value in results[, unique(group)]) { + group_plot <- plotly::plot_ly() |> + plotly::add_bars( + data = results[group == group_value], + x = ~drug_category, + y = ~score, + color = ~drug_category + ) |> + plotly::layout( + xaxis = list( + categoryarray = drug_categories, + title = "", + showticklabels = FALSE + ), + yaxis = list( + range = c(0.0, 0.03), + nticks = 4, + title = "" + ), + font = list(size = 8), + margin = list( + pad = 2, + l = 48, + r = 0, + t = 0, + b = 36 + ) + ) + + plotly::save_image( + group_plot |> plotly::hide_legend(), + file = here(glue::glue("scripts/output/drug_categories_{group_value}.svg")), + width = 3 * 72, + height = 4 * 72, + scale = 96 / 72 + ) + + group_plots <- c(group_plots, list(group_plot)) +} + +plotly::save_image( + group_plot, + file = here(glue::glue("scripts/output/drug_categories_legend.svg")), + width = 6.27 * 72, + height = 6.27 * 72, + scale = 96 / 72 +) diff --git a/scripts/cmap_drugs_input.R b/scripts/cmap_drugs_input.R new file mode 100644 index 0000000..2a01ffc --- /dev/null +++ b/scripts/cmap_drugs_input.R @@ -0,0 +1,47 @@ +library(data.table) +library(gprofiler2) +library(here) + +i_am("scripts/cmap_drugs_input.R") + +# Source: custom +load(here("scripts", "input", "CMap_20180808.RData")) + +data <- CMap$"HT_HG-U133A" +rm(CMap) + +transcripts <- dimnames(data)$transcripts +genes <- gconvert( + transcripts, + numeric_ns = "ENTREZGENE_ACC", + mthreshold = 1, + filter_na = FALSE +)$target +dimnames(data)[[1]] <- genes + +data_drugs <- as.data.table(data) +data_drugs <- na.omit(data_drugs) +data_drugs <- data_drugs[data == "logFoldChange", .(transcripts, drugs, value)] + +setnames( + data_drugs, + c("transcripts", "drugs", "value"), + c("gene", "drug", "change") +) + +genes_0_0 <- scan(here("scripts/output/genes_0_0.txt"), what = character()) +genes_0_1 <- scan(here("scripts/output/genes_0_1.txt"), what = character()) +genes_1_0 <- scan(here("scripts/output/genes_1_0.txt"), what = character()) +genes_1_1 <- scan(here("scripts/output/genes_1_1.txt"), what = character()) + +data_drugs[gene %chin% genes_0_0, group := "genes_0_0"] +data_drugs[gene %chin% genes_0_1, group := "genes_0_1"] +data_drugs[gene %chin% genes_1_0, group := "genes_1_0"] +data_drugs[gene %chin% genes_1_1, group := "genes_1_1"] + +data_drugs <- na.omit(data_drugs) + +results <- data_drugs[, .(mean_change = mean(change)), by = .(drug, group)] +fwrite(results, file = here("scripts/output/cmap_drugs.csv")) + +write(data_drugs[, unique(drug)], file = here("scripts/output/drugs.txt"))