library(data.table)
library(here)

i_am("scripts/comparison.R")

w2000 <- scan(here("scripts/input/datasets/warrington_2000.txt"), character())
z2008 <- scan(here("scripts/input/datasets/zhu_2008.txt"), character())
e2013 <- scan(here("scripts/input/datasets/eisenberg_2013.txt"), character())
c2011 <- scan(here("scripts/input/datasets/chang_2011.txt"), character())
j2022 <- scan(here("scripts/input/datasets/joshi_2022.txt"), character())

datasets <- list(
  "Warrington 2000" = w2000,
  "Zhu 2008" = z2008,
  "Eisenberg 2013" = e2013,
  "Chang 2011" = c2011,
  "Joshi 2022" = j2022
)

VennDiagram::venn.diagram(datasets, filename = NULL, disable.logging = TRUE) |>
  ggplot2::ggsave(file = here("scripts/output/venn.svg"), device = "svg")

partitions <- VennDiagram::get.venn.partitions(datasets) |> data.table()
genes_venn <- partitions[1]$..values..[[1]]
write(genes_venn, file = here("scripts/output/genes_venn.txt"))

gene_sets <- fread(here("scripts/input/gene_sets.csv"))
genes_literature <- gene_sets[type == "literature", unique(gene)]
genes_recommended <- gene_sets[type == "expression", unique(gene)]
genes_literature_ids <- data.table(
  gene = genes_literature,
  literature_id = seq_along(genes_literature)
)

ranking_gtex <- ubigen::rank_genes(ubigen::gtex_all)
ranking_cmap <- ubigen::rank_genes(ubigen::cmap)

data <- fread(here("scripts/output/gsea_vs_cmap_groups.csv"))

genes_table <- gene_sets[type == "literature"]
genes_table[, hgnc_symbol := gprofiler2::gconvert(gene, target = "HGNC")$target]
genes_table <- genes_table[,
  .(
    gene = unique(gene),
    source = paste(label, collapse = ", ")
  ),
  by = hgnc_symbol
]
genes_table <- merge(genes_table, data, by = "gene", sort = FALSE)
fwrite(genes_table, file = here("scripts/output/genes_table.csv"))

datasets_data <- rbindlist(lapply(names(datasets), function(name) {
  data.table(
    dataset = name,
    gene = datasets[[name]]
  )
}))

datasets_data <- rbind(
  datasets_data,
  data.table(
    dataset = "Venn",
    gene = genes_venn
  )
)

datasets_data <- rbind(
  datasets_data,
  data.table(
    dataset = "Recommended",
    gene = genes_recommended
  )
)

datasets_data <- rbind(
  datasets_data,
  data.table(
    dataset = "Literature",
    gene = genes_literature
  )
)

datasets_data <- merge(datasets_data, data, by = "gene")

datasets_table <- datasets_data[, .(count = .N), by = c("dataset", "group")]
datasets_table[, total := sum(count), by = dataset]
datasets_table[, proportion := count / total]

group_plots <- list()

for (group_value in datasets_table[, unique(group)]) {
  group_plot <- plotly::plot_ly() |>
    plotly::add_bars(
      data = datasets_table[group == group_value],
      x = ~dataset,
      color = ~dataset,
      y = ~proportion
    ) |>
    plotly::layout(
      xaxis = list(
        categoryarray = datasets_table[, unique(dataset)],
        title = ""
      ),
      yaxis = list(
        range = c(0.0, 1.0),
        title = ""
      ),
      font = list(size = 8),
      margin = list(
        pad = 2,
        l = 0,
        r = 0,
        t = 0,
        b = 36
      )
    )

  plotly::save_image(
    group_plot |> plotly::hide_legend(),
    file = here(glue::glue("scripts/output/gene_sets_{group_value}.svg")),
    width = 3 * 72,
    height = 4 * 72,
    scale = 96 / 72
  )

  group_plots <- c(group_plots, list(group_plot))
}

plotly::save_image(
  group_plot,
  file = here(glue::glue("scripts/output/gene_sets_legend.svg")),
  width = 6.27 * 72,
  height = 6.27 * 72,
  scale = 96 / 72
)

data[, count := 0]

for (dataset in datasets) {
  data[gene %chin% dataset, count := count + 1]
}

threshold_gtex <- data[percentile_gtex >= 0.95, min(score_gtex)]
threshold_cmap <- data[percentile_cmap >= 0.95, min(score_cmap)]

fig <- plotly::plot_ly() |>
  plotly::add_markers(
    data = data[count >= 1 & !(gene %chin% genes_literature)],
    x = ~score_gtex,
    y = ~score_cmap,
    color = ~count,
    colors = c("#7d19bf", "#ff7f2a"),
    marker = list(
      size = 4,
      opacity = 0.8
    ),
    cliponaxis = FALSE
  ) |>
  plotly::add_text(
    data = merge(
      data[gene %chin% genes_literature],
      genes_literature_ids
    ),
    x = ~score_gtex,
    y = ~score_cmap,
    text = ~ as.character(literature_id),
    textfont = list(
      size = 8,
      color = "green"
    )
  ) |>
  plotly::layout(
    xaxis = list(
      title = "Ranking based on GTEx",
      range = c(0, 1)
    ),
    yaxis = list(
      title = "Ranking based on CMap",
      range = c(0, 1)
    ),
    annotations = list(
      list(
        text = "95%",
        x = threshold_gtex,
        y = 1,
        xshift = 2,
        yshift = 3,
        yref = "paper",
        xanchor = "left",
        yanchor = "top",
        showarrow = FALSE
      ),
      list(
        text = "95%",
        x = 1,
        y = threshold_cmap,
        yshift = 2,
        xref = "paper",
        xanchor = "right",
        yanchor = "bottom",
        showarrow = FALSE
      )
    ),
    shapes = list(
      list(
        type = "line",
        y0 = 0,
        y1 = 1,
        yref = "paper",
        x0 = threshold_gtex,
        x1 = threshold_gtex,
        line = list(
          color = "#00000080",
          opacity = 0.5,
          width = 1,
          dash = "dot"
        )
      ),
      list(
        type = "line",
        y0 = threshold_cmap,
        y1 = threshold_cmap,
        x0 = 0,
        x1 = 1,
        xref = "paper",
        line = list(
          color = "#00000080",
          width = 1,
          opacity = 0.5,
          dash = "dot"
        )
      )
    ),
    font = list(size = 8),
    margin = list(
      pad = 2,
      l = 36,
      r = 0,
      t = 0,
      b = 36
    )
  ) |>
  plotly::hide_legend()

plotly::save_image(
  fig |> plotly::hide_colorbar(),
  file = here("scripts/output/comparison.svg"),
  width = 6.27 * 72,
  height = 6.27 * 72,
  scale = 96 / 72
)

plotly::save_image(
  fig,
  file = here("scripts/output/comparison_legend.svg"),
  width = 6.27 * 72,
  height = 6.27 * 72,
  scale = 96 / 72
)