mirror of
https://github.com/johrpan/ubigen.git
synced 2025-10-26 19:57:24 +01:00
Allow selecting the expression dataset
This commit is contained in:
parent
2f24812c90
commit
510fafeb6e
15 changed files with 110 additions and 57 deletions
|
|
@ -1,12 +0,0 @@
|
|||
# This scripts reads the input data (See input.R) and performs various
|
||||
# computations on it in order to later use the results for computating scores
|
||||
# for ubuiquitously expressed genes.
|
||||
|
||||
library(data.table)
|
||||
library(here)
|
||||
|
||||
i_am("scripts/input.R")
|
||||
|
||||
data <- fread(here("scripts", "input", "data_long.csv"))
|
||||
results <- ubigen::analyze(data)
|
||||
fwrite(results, file = here("scripts", "output", "results.csv"))
|
||||
32
scripts/gtex_all.R
Normal file
32
scripts/gtex_all.R
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
# This script uses the results (See results.csv) and computes a score for each
|
||||
# gene. This is the data that will be used in the package.
|
||||
|
||||
library(data.table)
|
||||
library(here)
|
||||
|
||||
i_am("scripts/input.R")
|
||||
|
||||
data <- fread(here("scripts", "input", "data_long.csv"))
|
||||
gtex_all <- ubigen::analyze(data)
|
||||
|
||||
# To save memory, the data includes fake IDs for genes. The actual Ensembl IDs
|
||||
# are part of the separate genes table.
|
||||
|
||||
genes <- fread(here("scripts", "input", "genes.csv"))
|
||||
|
||||
setnames(gtex_all, "gene", "id")
|
||||
|
||||
data <- merge(
|
||||
gtex_all,
|
||||
genes[, .(id, gene)],
|
||||
by = "id",
|
||||
all.x = TRUE,
|
||||
sort = FALSE
|
||||
)
|
||||
|
||||
data[, id := NULL]
|
||||
|
||||
usethis::use_data(gtex_all, overwrite = TRUE)
|
||||
|
||||
genes[, id := NULL]
|
||||
usethis::use_data(genes, overwrite = TRUE)
|
||||
12
scripts/hpa.R
Normal file
12
scripts/hpa.R
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
library(data.table)
|
||||
library(here)
|
||||
|
||||
i_am("scripts/hpa.R")
|
||||
|
||||
# Source: https://www.proteinatlas.org/download/rna_tissue_hpa.tsv.zip
|
||||
data <- fread(here("scripts", "input", "rna_tissue_hpa.tsv"))
|
||||
setnames(data, c("Gene", "Tissue", "nTPM"), c("gene", "sample", "expression"))
|
||||
data[, `:=`("Gene name" = NULL, TPM = NULL, pTPM = NULL)]
|
||||
|
||||
hpa_tissues <- ubigen::analyze(data)
|
||||
usethis::use_data(hpa_tissues, overwrite = TRUE)
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
# This script uses the results (See results.csv) and computes a score for each
|
||||
# gene. This is the data that will be used in the package.
|
||||
|
||||
library(data.table)
|
||||
library(here)
|
||||
|
||||
i_am("scripts/input.R")
|
||||
|
||||
# To save memory, the data includes fake IDs for genes. The actual Ensembl IDs
|
||||
# are part of the separate genes table.
|
||||
|
||||
genes <- fread(here("scripts", "input", "genes.csv"))
|
||||
data <- fread(here("scripts", "output", "results.csv"))
|
||||
|
||||
# Rank the data using default parameters.
|
||||
data <- ubigen::rank_genes(data = data)
|
||||
|
||||
# Reintroduce gene IDs and HGNC symbols.
|
||||
|
||||
setnames(data, "gene", "id")
|
||||
|
||||
data <- merge(
|
||||
data,
|
||||
genes,
|
||||
by = "id",
|
||||
all.x = TRUE,
|
||||
sort = FALSE
|
||||
)
|
||||
|
||||
setnames(data, "hgnc_symbol", "hgnc_name")
|
||||
data[, id := NULL]
|
||||
|
||||
# Remove duplicates. This will keep the best row for each duplicated gene.
|
||||
data <- unique(data, by = "gene")
|
||||
|
||||
# Reassign ranks, because duplicates may have been removed.
|
||||
data[, rank := .I]
|
||||
|
||||
fwrite(data, file = here("scripts", "output", "genes.csv"))
|
||||
Loading…
Add table
Add a link
Reference in a new issue