mirror of
https://github.com/johrpan/ubigen.git
synced 2025-10-26 19:57:24 +01:00
Add drug plots
This commit is contained in:
parent
cf8e9e79d5
commit
785b748ba4
8 changed files with 365 additions and 185 deletions
|
|
@ -237,5 +237,6 @@ server <- function(custom_dataset = NULL) {
|
||||||
})
|
})
|
||||||
|
|
||||||
output$gsea_plot_ranking <- plotly::renderPlotly(gsea_plot_ranking)
|
output$gsea_plot_ranking <- plotly::renderPlotly(gsea_plot_ranking)
|
||||||
|
output$fig_drug_scores <- plotly::renderPlotly(fig_drug_scores)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
BIN
R/sysdata.rda
BIN
R/sysdata.rda
Binary file not shown.
16
R/ui.R
16
R/ui.R
|
|
@ -272,7 +272,21 @@ ui <- function(custom_dataset = NULL) {
|
||||||
"Note: Click on the legend items to toggle single sources. A ",
|
"Note: Click on the legend items to toggle single sources. A ",
|
||||||
"double-click will isolate a single source of interest."
|
"double-click will isolate a single source of interest."
|
||||||
))),
|
))),
|
||||||
plotly::plotlyOutput("gsea_plot_ranking", height = "600px")
|
plotly::plotlyOutput("gsea_plot_ranking", height = "600px"),
|
||||||
|
h2("Drug effects"),
|
||||||
|
p(HTML(paste0(
|
||||||
|
"Scores for drugs based on the genes that are significantly ",
|
||||||
|
"influenced by them. To compute a score for each drug, the scores ",
|
||||||
|
"of all influenced genes based on “GTEx (all)” (X-axis) and ",
|
||||||
|
"“CMap” (Y-axis) are averaged with weights based on the fold ",
|
||||||
|
"change of the interactions. The position of each drug in this ",
|
||||||
|
"plot is therefore a result of how ubiquitous the genes that it ",
|
||||||
|
"influences are."
|
||||||
|
))),
|
||||||
|
p(HTML(paste0(
|
||||||
|
"Note: Hover over the markers to see drug names."
|
||||||
|
))),
|
||||||
|
plotly::plotlyOutput("fig_drug_scores", height = "1200px")
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
tabPanel(
|
tabPanel(
|
||||||
|
|
|
||||||
|
|
@ -1,136 +0,0 @@
|
||||||
library(data.table)
|
|
||||||
library(here)
|
|
||||||
|
|
||||||
i_am("scripts/cmap_drugs_analysis.R")
|
|
||||||
|
|
||||||
data <- fread(here("scripts/output/cmap_drugs.csv"))
|
|
||||||
|
|
||||||
data[, c("drug", "concentration", "cell_line") :=
|
|
||||||
tstrsplit(drug, "_", fixed = TRUE)]
|
|
||||||
|
|
||||||
data[, concentration := as.double(concentration)]
|
|
||||||
|
|
||||||
data <- data[,
|
|
||||||
.(abs_mean_change = mean(abs(mean_change))),
|
|
||||||
by = .(drug, group)
|
|
||||||
]
|
|
||||||
|
|
||||||
# Source: PubChem ID list upload based on identifiers converted from CMap
|
|
||||||
# drug names using the PubChem ID exchange.
|
|
||||||
pubchem_data <- fread(here("scripts/input/pubchem_data.csv"))
|
|
||||||
|
|
||||||
pubchem_data <- pubchem_data[, .(cid, cmpdname, annotation)]
|
|
||||||
pubchem_data <- unique(pubchem_data, by = "cid")
|
|
||||||
pubchem_data <- pubchem_data[,
|
|
||||||
.(
|
|
||||||
cmpdname,
|
|
||||||
annotation = strsplit(annotation, "|", fixed = TRUE) |> unlist()
|
|
||||||
),
|
|
||||||
by = cid
|
|
||||||
]
|
|
||||||
|
|
||||||
# Filter for WHO ATC annotations
|
|
||||||
pubchem_data <- pubchem_data[stringr::str_detect(annotation, "^[A-Z] - ")]
|
|
||||||
|
|
||||||
# Extract ATC levels
|
|
||||||
|
|
||||||
pubchem_data[, atc_1 := stringr::str_match(
|
|
||||||
annotation,
|
|
||||||
"^[A-Z] - ([^>]*)"
|
|
||||||
)[, 2] |> stringr::str_trim()]
|
|
||||||
|
|
||||||
pubchem_data[, atc_2 := stringr::str_match(
|
|
||||||
annotation,
|
|
||||||
"> [A-Z][0-9][0-9] - ([^>]*)"
|
|
||||||
)[, 2] |> stringr::str_trim()]
|
|
||||||
|
|
||||||
pubchem_data[, atc_3 := stringr::str_match(
|
|
||||||
annotation,
|
|
||||||
"> [A-Z][0-9][0-9][A-Z] - ([^>]*)"
|
|
||||||
)[, 2] |> stringr::str_trim()]
|
|
||||||
|
|
||||||
# Source: PubChem ID exchange
|
|
||||||
drugs_pubchem_mapping <- fread(here("scripts/input/drugs_pubchem.tsv")) |>
|
|
||||||
na.omit()
|
|
||||||
|
|
||||||
data <- merge(data, drugs_pubchem_mapping, by = "drug", allow.cartesian = TRUE)
|
|
||||||
data <- merge(data, pubchem_data, by = "cid", allow.cartesian = TRUE)
|
|
||||||
data[, drug_category := atc_1]
|
|
||||||
|
|
||||||
|
|
||||||
# Select top drug categories
|
|
||||||
|
|
||||||
results_drug_categories <- data[,
|
|
||||||
.(score = mean(abs_mean_change)),
|
|
||||||
by = .(group, drug_category)
|
|
||||||
]
|
|
||||||
|
|
||||||
results_drug_categories <- results_drug_categories[,
|
|
||||||
.(mean_score = mean(score)),
|
|
||||||
by = drug_category
|
|
||||||
]
|
|
||||||
|
|
||||||
setorder(results_drug_categories, -mean_score)
|
|
||||||
top_drug_categories <- results_drug_categories[1:7, drug_category]
|
|
||||||
drug_categories <- c(top_drug_categories, "Other")
|
|
||||||
|
|
||||||
# Merge other drug categories
|
|
||||||
|
|
||||||
data[!(drug_category %chin% top_drug_categories), drug_category := "Other"]
|
|
||||||
|
|
||||||
# Recompute results with new categories
|
|
||||||
|
|
||||||
results <- data[,
|
|
||||||
.(score = mean(abs_mean_change)),
|
|
||||||
by = .(group, drug_category)
|
|
||||||
]
|
|
||||||
|
|
||||||
group_plots <- list()
|
|
||||||
|
|
||||||
for (group_value in results[, unique(group)]) {
|
|
||||||
group_plot <- plotly::plot_ly() |>
|
|
||||||
plotly::add_bars(
|
|
||||||
data = results[group == group_value],
|
|
||||||
x = ~drug_category,
|
|
||||||
y = ~score,
|
|
||||||
color = ~drug_category
|
|
||||||
) |>
|
|
||||||
plotly::layout(
|
|
||||||
xaxis = list(
|
|
||||||
categoryarray = drug_categories,
|
|
||||||
title = "",
|
|
||||||
showticklabels = FALSE
|
|
||||||
),
|
|
||||||
yaxis = list(
|
|
||||||
range = c(0.0, 0.03),
|
|
||||||
nticks = 4,
|
|
||||||
title = ""
|
|
||||||
),
|
|
||||||
font = list(size = 8),
|
|
||||||
margin = list(
|
|
||||||
pad = 2,
|
|
||||||
l = 48,
|
|
||||||
r = 0,
|
|
||||||
t = 0,
|
|
||||||
b = 36
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
plotly::save_image(
|
|
||||||
group_plot |> plotly::hide_legend(),
|
|
||||||
file = here(glue::glue("scripts/output/drug_categories_{group_value}.svg")),
|
|
||||||
width = 3 * 72,
|
|
||||||
height = 4 * 72,
|
|
||||||
scale = 96 / 72
|
|
||||||
)
|
|
||||||
|
|
||||||
group_plots <- c(group_plots, list(group_plot))
|
|
||||||
}
|
|
||||||
|
|
||||||
plotly::save_image(
|
|
||||||
group_plot,
|
|
||||||
file = here(glue::glue("scripts/output/drug_categories_legend.svg")),
|
|
||||||
width = 6.27 * 72,
|
|
||||||
height = 6.27 * 72,
|
|
||||||
scale = 96 / 72
|
|
||||||
)
|
|
||||||
|
|
@ -1,47 +0,0 @@
|
||||||
library(data.table)
|
|
||||||
library(gprofiler2)
|
|
||||||
library(here)
|
|
||||||
|
|
||||||
i_am("scripts/cmap_drugs_input.R")
|
|
||||||
|
|
||||||
# Source: custom
|
|
||||||
load(here("scripts", "input", "CMap_20180808.RData"))
|
|
||||||
|
|
||||||
data <- CMap$"HT_HG-U133A"
|
|
||||||
rm(CMap)
|
|
||||||
|
|
||||||
transcripts <- dimnames(data)$transcripts
|
|
||||||
genes <- gconvert(
|
|
||||||
transcripts,
|
|
||||||
numeric_ns = "ENTREZGENE_ACC",
|
|
||||||
mthreshold = 1,
|
|
||||||
filter_na = FALSE
|
|
||||||
)$target
|
|
||||||
dimnames(data)[[1]] <- genes
|
|
||||||
|
|
||||||
data_drugs <- as.data.table(data)
|
|
||||||
data_drugs <- na.omit(data_drugs)
|
|
||||||
data_drugs <- data_drugs[data == "logFoldChange", .(transcripts, drugs, value)]
|
|
||||||
|
|
||||||
setnames(
|
|
||||||
data_drugs,
|
|
||||||
c("transcripts", "drugs", "value"),
|
|
||||||
c("gene", "drug", "change")
|
|
||||||
)
|
|
||||||
|
|
||||||
genes_0_0 <- scan(here("scripts/output/genes_0_0.txt"), what = character())
|
|
||||||
genes_0_1 <- scan(here("scripts/output/genes_0_1.txt"), what = character())
|
|
||||||
genes_1_0 <- scan(here("scripts/output/genes_1_0.txt"), what = character())
|
|
||||||
genes_1_1 <- scan(here("scripts/output/genes_1_1.txt"), what = character())
|
|
||||||
|
|
||||||
data_drugs[gene %chin% genes_0_0, group := "genes_0_0"]
|
|
||||||
data_drugs[gene %chin% genes_0_1, group := "genes_0_1"]
|
|
||||||
data_drugs[gene %chin% genes_1_0, group := "genes_1_0"]
|
|
||||||
data_drugs[gene %chin% genes_1_1, group := "genes_1_1"]
|
|
||||||
|
|
||||||
data_drugs <- na.omit(data_drugs)
|
|
||||||
|
|
||||||
results <- data_drugs[, .(mean_change = mean(change)), by = .(drug, group)]
|
|
||||||
fwrite(results, file = here("scripts/output/cmap_drugs.csv"))
|
|
||||||
|
|
||||||
write(data_drugs[, unique(drug)], file = here("scripts/output/drugs.txt"))
|
|
||||||
237
scripts/drugs_analysis.R
Normal file
237
scripts/drugs_analysis.R
Normal file
|
|
@ -0,0 +1,237 @@
|
||||||
|
library(data.table)
|
||||||
|
library(here)
|
||||||
|
|
||||||
|
i_am("scripts/drugs_analysis.R")
|
||||||
|
|
||||||
|
drugs_cmap <- fread(here("scripts/output/drugs_cmap.csv"))
|
||||||
|
|
||||||
|
# Only keep significant changes
|
||||||
|
drugs_cmap <- drugs_cmap[p_value <= 0.05]
|
||||||
|
|
||||||
|
# Keep one row per gene and drug, with the most significant change.
|
||||||
|
setkey(drugs_cmap, gene, drug, p_value)
|
||||||
|
drugs_cmap <- drugs_cmap[
|
||||||
|
rowid(gene, drug) == 1,
|
||||||
|
.(gene, drug, log_fold_change, p_value)
|
||||||
|
]
|
||||||
|
|
||||||
|
drugs_cmap[, negative_log_10_p := -log10(p_value)]
|
||||||
|
|
||||||
|
ranking_data <- fread(here("scripts/output/gsea_vs_cmap_groups.csv"))
|
||||||
|
n_ubiquitous <- ranking_data[percentile_gtex >= 0.95, .N]
|
||||||
|
n_non_ubiquitous <- ranking_data[percentile_gtex < 0.95, .N]
|
||||||
|
data <- merge(drugs_cmap, ranking_data, by = "gene")
|
||||||
|
|
||||||
|
drugs <- fread(here("scripts/output/drugs.csv"), na.strings = "")
|
||||||
|
data <- merge(data, drugs, by = "drug", all.x = TRUE, allow.cartesian = TRUE)
|
||||||
|
|
||||||
|
# Use CMap names as fallback (for drugs not present in drugs.csv above)
|
||||||
|
data[is.na(name), name := stringr::str_to_sentence(drug)]
|
||||||
|
|
||||||
|
# Figures for single drugs
|
||||||
|
|
||||||
|
results_drugs <- unique(data, by = c("drug", "gene"))
|
||||||
|
results_drugs[,
|
||||||
|
`:=`(
|
||||||
|
proportion_ubiquitous =
|
||||||
|
.SD[percentile_gtex >= 0.95, .N / n_ubiquitous],
|
||||||
|
proportion_non_ubiquitous =
|
||||||
|
.SD[percentile_gtex < 0.95, .N / n_non_ubiquitous],
|
||||||
|
drug_score_gtex = weighted.mean(score_gtex, abs(log_fold_change)),
|
||||||
|
drug_score_cmap = weighted.mean(score_cmap, abs(log_fold_change))
|
||||||
|
),
|
||||||
|
by = drug
|
||||||
|
]
|
||||||
|
|
||||||
|
results_drugs[, bias := proportion_ubiquitous / proportion_non_ubiquitous]
|
||||||
|
setorder(results_drugs, -bias)
|
||||||
|
|
||||||
|
results_drugs_unique <- unique(results_drugs, by = "drug")
|
||||||
|
|
||||||
|
# Exclude some exotic drugs
|
||||||
|
results_drugs_unique <- results_drugs_unique[!is.na(indication)]
|
||||||
|
|
||||||
|
n_drugs <- nrow(results_drugs_unique)
|
||||||
|
selected_drugs <- c(
|
||||||
|
results_drugs_unique[1:10, drug],
|
||||||
|
results_drugs_unique[(n_drugs - 9):n_drugs, drug]
|
||||||
|
)
|
||||||
|
|
||||||
|
fig_drug_scores_new <- plotly::plot_ly(results_drugs_unique) |>
|
||||||
|
plotly::add_markers(
|
||||||
|
x = ~drug_score_gtex,
|
||||||
|
y = ~drug_score_cmap,
|
||||||
|
text = ~name,
|
||||||
|
marker = list(size = 4)
|
||||||
|
) |>
|
||||||
|
plotly::layout(
|
||||||
|
xaxis = list(
|
||||||
|
title = "Score based on GTEx (all)"
|
||||||
|
),
|
||||||
|
yaxis = list(
|
||||||
|
title = "Score based on CMap"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# To not overwrite other data:
|
||||||
|
load(here("R/sysdata.rda"))
|
||||||
|
fig_drug_scores <- fig_drug_scores_new
|
||||||
|
|
||||||
|
usethis::use_data(
|
||||||
|
fig_drug_scores,
|
||||||
|
gsea_plot_ranking, # From R/sysdata.rda
|
||||||
|
internal = TRUE,
|
||||||
|
overwrite = TRUE
|
||||||
|
)
|
||||||
|
|
||||||
|
results_drugs_unique <- results_drugs_unique[drug %chin% selected_drugs]
|
||||||
|
|
||||||
|
fig_drugs <- plotly::plot_ly(results_drugs_unique) |>
|
||||||
|
plotly::add_bars(
|
||||||
|
x = ~proportion_ubiquitous,
|
||||||
|
y = ~name
|
||||||
|
) |>
|
||||||
|
plotly::add_bars(
|
||||||
|
x = ~ -proportion_non_ubiquitous,
|
||||||
|
y = ~name
|
||||||
|
) |>
|
||||||
|
plotly::layout(
|
||||||
|
xaxis = list(
|
||||||
|
range = c(-0.8, 0.8),
|
||||||
|
title = "Proportion of genes that are influenced significantly",
|
||||||
|
tickformat = ".0%"
|
||||||
|
),
|
||||||
|
yaxis = list(
|
||||||
|
categoryarray = rev(results_drugs_unique[, name]),
|
||||||
|
title = ""
|
||||||
|
),
|
||||||
|
barmode = "relative",
|
||||||
|
showlegend = FALSE,
|
||||||
|
font = list(size = 8),
|
||||||
|
margin = list(
|
||||||
|
pad = 2,
|
||||||
|
l = 0,
|
||||||
|
r = 0,
|
||||||
|
t = 0,
|
||||||
|
b = 36
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Figure for mechanisms of action
|
||||||
|
|
||||||
|
results_moa <- unique(
|
||||||
|
data[!is.na(mechanism_of_action) & mechanism_of_action != "Unknown"],
|
||||||
|
by = c("drug", "gene", "mechanism_of_action")
|
||||||
|
)
|
||||||
|
|
||||||
|
results_moa <- results_moa[,
|
||||||
|
.(
|
||||||
|
percentile_gtex = percentile_gtex[1],
|
||||||
|
log_fold_change = mean(log_fold_change),
|
||||||
|
score_gtex = mean(score_gtex)
|
||||||
|
),
|
||||||
|
by = c("mechanism_of_action", "gene")
|
||||||
|
]
|
||||||
|
|
||||||
|
results_moa[,
|
||||||
|
`:=`(
|
||||||
|
proportion_ubiquitous = .SD[percentile_gtex >= 0.95, .N / n_ubiquitous],
|
||||||
|
proportion_non_ubiquitous =
|
||||||
|
.SD[percentile_gtex < 0.95, .N / n_non_ubiquitous],
|
||||||
|
moa_score = weighted.mean(score_gtex, abs(log_fold_change))
|
||||||
|
),
|
||||||
|
by = mechanism_of_action
|
||||||
|
]
|
||||||
|
|
||||||
|
results_moa[, bias := proportion_ubiquitous / proportion_non_ubiquitous]
|
||||||
|
setorder(results_moa, -bias)
|
||||||
|
|
||||||
|
results_moa_unique <- unique(results_moa, by = "mechanism_of_action")
|
||||||
|
n_moa <- nrow(results_moa_unique)
|
||||||
|
selected_moas <- c(
|
||||||
|
results_moa_unique[1:10, mechanism_of_action],
|
||||||
|
results_moa_unique[(n_moa - 9):n_moa, mechanism_of_action]
|
||||||
|
)
|
||||||
|
|
||||||
|
results_moa_unique <-
|
||||||
|
results_moa_unique[mechanism_of_action %chin% selected_moas]
|
||||||
|
|
||||||
|
fig_moas <- plotly::plot_ly(results_moa_unique) |>
|
||||||
|
plotly::add_bars(
|
||||||
|
x = ~proportion_ubiquitous,
|
||||||
|
y = ~mechanism_of_action
|
||||||
|
) |>
|
||||||
|
plotly::add_bars(
|
||||||
|
x = ~ -proportion_non_ubiquitous,
|
||||||
|
y = ~mechanism_of_action
|
||||||
|
) |>
|
||||||
|
plotly::layout(
|
||||||
|
xaxis = list(
|
||||||
|
range = c(-0.8, 0.8),
|
||||||
|
title = "Proportion of genes that are influenced significantly",
|
||||||
|
tickformat = ".0%"
|
||||||
|
),
|
||||||
|
yaxis = list(
|
||||||
|
categoryarray = rev(results_moa_unique[, mechanism_of_action]),
|
||||||
|
title = ""
|
||||||
|
),
|
||||||
|
barmode = "relative",
|
||||||
|
showlegend = FALSE,
|
||||||
|
font = list(size = 8),
|
||||||
|
margin = list(
|
||||||
|
pad = 2,
|
||||||
|
l = 0,
|
||||||
|
r = 0,
|
||||||
|
t = 0,
|
||||||
|
b = 36
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
plotly::save_image(
|
||||||
|
fig_drug_scores |> plotly::layout(
|
||||||
|
font = list(size = 8),
|
||||||
|
margin = list(
|
||||||
|
pad = 2,
|
||||||
|
l = 36,
|
||||||
|
r = 0,
|
||||||
|
t = 0,
|
||||||
|
b = 36
|
||||||
|
)
|
||||||
|
),
|
||||||
|
file = here("scripts/output/drug_scores.svg"),
|
||||||
|
width = 6.27 * 72,
|
||||||
|
height = 6.27 * 72,
|
||||||
|
scale = 96 / 72
|
||||||
|
)
|
||||||
|
|
||||||
|
plotly::save_image(
|
||||||
|
fig_drugs,
|
||||||
|
file = here("scripts/output/drugs_labels.svg"),
|
||||||
|
width = 3.135 * 72,
|
||||||
|
height = 6.27 * 72,
|
||||||
|
scale = 96 / 72
|
||||||
|
)
|
||||||
|
|
||||||
|
plotly::save_image(
|
||||||
|
fig_drugs |> plotly::layout(yaxis = list(showticklabels = FALSE)),
|
||||||
|
file = here("scripts/output/drugs.svg"),
|
||||||
|
width = 3.135 * 72,
|
||||||
|
height = 6.27 * 72,
|
||||||
|
scale = 96 / 72
|
||||||
|
)
|
||||||
|
|
||||||
|
plotly::save_image(
|
||||||
|
fig_moas,
|
||||||
|
file = here("scripts/output/moas_labels.svg"),
|
||||||
|
width = 3.135 * 72,
|
||||||
|
height = 6.27 * 72,
|
||||||
|
scale = 96 / 72
|
||||||
|
)
|
||||||
|
|
||||||
|
plotly::save_image(
|
||||||
|
fig_moas |> plotly::layout(yaxis = list(showticklabels = FALSE)),
|
||||||
|
file = here("scripts/output/moas.svg"),
|
||||||
|
width = 3.135 * 72,
|
||||||
|
height = 6.27 * 72,
|
||||||
|
scale = 96 / 72
|
||||||
|
)
|
||||||
103
scripts/drugs_input.R
Normal file
103
scripts/drugs_input.R
Normal file
|
|
@ -0,0 +1,103 @@
|
||||||
|
library(data.table)
|
||||||
|
library(here)
|
||||||
|
|
||||||
|
i_am("scripts/drugs_input.R")
|
||||||
|
|
||||||
|
# Source: PubChem ID exchange based on CMap drug identifiers.
|
||||||
|
drugs_cmap_pubchem <- fread(here("scripts/input/drugs_cmap_pubchem.tsv"))
|
||||||
|
drugs_cmap_pubchem <- na.omit(drugs_cmap_pubchem)
|
||||||
|
|
||||||
|
# Source: UniChem ID mapping
|
||||||
|
drugs_chembl_pubchem <- fread(here("scripts/input/drugs_chembl_pubchem.tsv"))
|
||||||
|
|
||||||
|
# Source: ChEMBL SQLite database
|
||||||
|
# SELECT DISTINCT
|
||||||
|
# chembl_id,
|
||||||
|
# synonyms AS name,
|
||||||
|
# mesh_heading AS indication,
|
||||||
|
# mechanism_of_action
|
||||||
|
# FROM molecule_dictionary
|
||||||
|
# LEFT JOIN drug_indication
|
||||||
|
# ON molecule_dictionary.molregno = drug_indication.molregno
|
||||||
|
# LEFT JOIN drug_mechanism
|
||||||
|
# ON molecule_dictionary.molregno = drug_mechanism.molregno
|
||||||
|
# LEFT JOIN (
|
||||||
|
# SELECT molregno, synonyms FROM molecule_synonyms WHERE syn_type == 'INN'
|
||||||
|
# ) AS molecule_synonyms
|
||||||
|
# ON molecule_dictionary.molregno = molecule_synonyms.molregno
|
||||||
|
# WHERE name IS NOT NULL
|
||||||
|
# OR indication IS NOT NULL
|
||||||
|
# OR mechanism_of_action IS NOT NULL;
|
||||||
|
drugs_chembl <- fread(here("scripts/input/drugs_chembl.csv"))
|
||||||
|
|
||||||
|
# Source: PubChem ID list upload based on identifiers converted from CMap
|
||||||
|
# drug names using the PubChem ID exchange.
|
||||||
|
drugs_pubchem <- fread(here("scripts/input/drugs_pubchem.csv"))
|
||||||
|
|
||||||
|
drugs_pubchem <- drugs_pubchem[, .(cid, cmpdname, annotation)]
|
||||||
|
drugs_pubchem <- unique(drugs_pubchem, by = "cid")
|
||||||
|
drugs_pubchem <- drugs_pubchem[,
|
||||||
|
.(
|
||||||
|
cmpdname,
|
||||||
|
annotation = strsplit(annotation, "|", fixed = TRUE) |> unlist()
|
||||||
|
),
|
||||||
|
by = cid
|
||||||
|
]
|
||||||
|
|
||||||
|
# Filter for WHO ATC annotations
|
||||||
|
drugs_pubchem <- drugs_pubchem[stringr::str_detect(annotation, "^[A-Z] - ")]
|
||||||
|
|
||||||
|
# Extract ATC levels
|
||||||
|
|
||||||
|
drugs_pubchem[, atc_1 := stringr::str_match(
|
||||||
|
annotation,
|
||||||
|
"^[A-Z] - ([^>]*)"
|
||||||
|
)[, 2] |> stringr::str_trim()]
|
||||||
|
|
||||||
|
drugs_pubchem[, atc_2 := stringr::str_match(
|
||||||
|
annotation,
|
||||||
|
"> [A-Z][0-9][0-9] - ([^>]*)"
|
||||||
|
)[, 2] |> stringr::str_trim()]
|
||||||
|
|
||||||
|
drugs_pubchem[, atc_3 := stringr::str_match(
|
||||||
|
annotation,
|
||||||
|
"> [A-Z][0-9][0-9][A-Z] - ([^>]*)"
|
||||||
|
)[, 2] |> stringr::str_trim()]
|
||||||
|
|
||||||
|
drugs_pubchem <- drugs_pubchem[, .(cid, cmpdname, atc_1, atc_2, atc_3)]
|
||||||
|
setnames(drugs_pubchem, c("cid", "cmpdname"), c("pubchem_cid", "pubchem_name"))
|
||||||
|
|
||||||
|
drugs <- merge(
|
||||||
|
drugs_cmap_pubchem,
|
||||||
|
drugs_chembl_pubchem,
|
||||||
|
by = "pubchem_cid",
|
||||||
|
all.x = TRUE
|
||||||
|
)
|
||||||
|
|
||||||
|
drugs <- merge(
|
||||||
|
drugs,
|
||||||
|
drugs_chembl,
|
||||||
|
by = "chembl_id",
|
||||||
|
all.x = TRUE
|
||||||
|
)
|
||||||
|
|
||||||
|
drugs <- merge(
|
||||||
|
drugs,
|
||||||
|
drugs_pubchem,
|
||||||
|
by = "pubchem_cid",
|
||||||
|
all.x = TRUE,
|
||||||
|
allow.cartesian = TRUE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Prefer INN name, then PubChem, then CMap:
|
||||||
|
drugs[name == "", name := NA]
|
||||||
|
drugs[is.na(name), name := pubchem_name]
|
||||||
|
drugs[name == "", name := NA]
|
||||||
|
drugs[is.na(name), name := stringr::str_to_sentence(drug)]
|
||||||
|
drugs[, pubchem_name := NULL]
|
||||||
|
|
||||||
|
# Clean up empty values:
|
||||||
|
drugs[indication == "", indication := NA]
|
||||||
|
drugs[mechanism_of_action == "", mechanism_of_action := NA]
|
||||||
|
|
||||||
|
fwrite(drugs, file = here("scripts/output/drugs.csv"))
|
||||||
|
|
@ -55,5 +55,13 @@ fig <- plotly::plot_ly(data) |>
|
||||||
|
|
||||||
plotly::save_image(fig, image_path, width = 1200, height = 800)
|
plotly::save_image(fig, image_path, width = 1200, height = 800)
|
||||||
|
|
||||||
|
# To not overwrite other data:
|
||||||
|
load(here("R/sysdata.rda"))
|
||||||
gsea_plot_ranking <- fig
|
gsea_plot_ranking <- fig
|
||||||
usethis::use_data(gsea_plot_ranking, internal = TRUE, overwrite = TRUE)
|
|
||||||
|
usethis::use_data(
|
||||||
|
gsea_plot_ranking,
|
||||||
|
fig_drug_scores, # From R/sysdata.rda
|
||||||
|
internal = TRUE,
|
||||||
|
overwrite = TRUE
|
||||||
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue