ubigen/scripts/cmap_drugs_analysis.R

library(data.table)
library(here)

i_am("scripts/cmap_drugs_analysis.R")

data <- fread(here("scripts/output/cmap_drugs.csv"))

data[, c("drug", "concentration", "cell_line") :=
  tstrsplit(drug, "_", fixed = TRUE)]

data[, concentration := as.double(concentration)]

data <- data[,
  .(abs_mean_change = mean(abs(mean_change))),
  by = .(drug, group)
]

# Source: PubChem ID list upload based on identifiers converted from CMap
# drug names using the PubChem ID exchange.
pubchem_data <- fread(here("scripts/input/pubchem_data.csv"))

pubchem_data <- pubchem_data[, .(cid, cmpdname, annotation)]
pubchem_data <- unique(pubchem_data, by = "cid")
pubchem_data <- pubchem_data[,
  .(
    cmpdname,
    annotation = strsplit(annotation, "|", fixed = TRUE) |> unlist()
  ),
  by = cid
]

# Filter for WHO ATC annotations
pubchem_data <- pubchem_data[stringr::str_detect(annotation, "^[A-Z] - ")]

# Extract ATC levels

pubchem_data[, atc_1 := stringr::str_match(
  annotation,
  "^[A-Z] - ([^>]*)"
)[, 2] |> stringr::str_trim()]

pubchem_data[, atc_2 := stringr::str_match(
  annotation,
  "> [A-Z][0-9][0-9] - ([^>]*)"
)[, 2] |> stringr::str_trim()]

pubchem_data[, atc_3 := stringr::str_match(
  annotation,
  "> [A-Z][0-9][0-9][A-Z] - ([^>]*)"
)[, 2] |> stringr::str_trim()]

# Source: PubChem ID exchange
drugs_pubchem_mapping <- fread(here("scripts/input/drugs_pubchem.tsv")) |>
  na.omit()

data <- merge(data, drugs_pubchem_mapping, by = "drug", allow.cartesian = TRUE)
data <- merge(data, pubchem_data, by = "cid", allow.cartesian = TRUE)
data[, drug_category := atc_1]


# Select top drug categories

results_drug_categories <- data[,
  .(score = mean(abs_mean_change)),
  by = .(group, drug_category)
]

results_drug_categories <- results_drug_categories[,
  .(mean_score = mean(score)),
  by = drug_category
]

setorder(results_drug_categories, -mean_score)
top_drug_categories <- results_drug_categories[1:7, drug_category]
drug_categories <- c(top_drug_categories, "Other")

# Merge other drug categories

data[!(drug_category %chin% top_drug_categories), drug_category := "Other"]

# Recompute results with new categories

results <- data[,
  .(score = mean(abs_mean_change)),
  by = .(group, drug_category)
]

group_plots <- list()

for (group_value in results[, unique(group)]) {
  group_plot <- plotly::plot_ly() |>
    plotly::add_bars(
      data = results[group == group_value],
      x = ~drug_category,
      y = ~score,
      color = ~drug_category
    ) |>
    plotly::layout(
      xaxis = list(
        categoryarray = drug_categories,
        title = "",
        showticklabels = FALSE
      ),
      yaxis = list(
        range = c(0.0, 0.03),
        nticks = 4,
        title = ""
      ),
      font = list(size = 8),
      margin = list(
        pad = 2,
        l = 48,
        r = 0,
        t = 0,
        b = 36
      )
    )

  plotly::save_image(
    group_plot |> plotly::hide_legend(),
    file = here(glue::glue("scripts/output/drug_categories_{group_value}.svg")),
    width = 3 * 72,
    height = 4 * 72,
    scale = 96 / 72
  )

  group_plots <- c(group_plots, list(group_plot))
}

plotly::save_image(
  group_plot,
  file = here(glue::glue("scripts/output/drug_categories_legend.svg")),
  width = 6.27 * 72,
  height = 6.27 * 72,
  scale = 96 / 72
)