Allow selecting the expression dataset

This commit is contained in:
Elias Projahn 2022-12-01 21:23:46 +01:00
parent 2f24812c90
commit 510fafeb6e
15 changed files with 110 additions and 57 deletions

View file

@ -1,12 +0,0 @@
# This scripts reads the input data (See input.R) and performs various
# computations on it in order to later use the results for computating scores
# for ubuiquitously expressed genes.
library(data.table)
library(here)
i_am("scripts/input.R")
data <- fread(here("scripts", "input", "data_long.csv"))
results <- ubigen::analyze(data)
fwrite(results, file = here("scripts", "output", "results.csv"))

32
scripts/gtex_all.R Normal file
View file

@ -0,0 +1,32 @@
# This script uses the results (See results.csv) and computes a score for each
# gene. This is the data that will be used in the package.
library(data.table)
library(here)
i_am("scripts/input.R")
data <- fread(here("scripts", "input", "data_long.csv"))
gtex_all <- ubigen::analyze(data)
# To save memory, the data includes fake IDs for genes. The actual Ensembl IDs
# are part of the separate genes table.
genes <- fread(here("scripts", "input", "genes.csv"))
setnames(gtex_all, "gene", "id")
data <- merge(
gtex_all,
genes[, .(id, gene)],
by = "id",
all.x = TRUE,
sort = FALSE
)
data[, id := NULL]
usethis::use_data(gtex_all, overwrite = TRUE)
genes[, id := NULL]
usethis::use_data(genes, overwrite = TRUE)

12
scripts/hpa.R Normal file
View file

@ -0,0 +1,12 @@
library(data.table)
library(here)
i_am("scripts/hpa.R")
# Source: https://www.proteinatlas.org/download/rna_tissue_hpa.tsv.zip
data <- fread(here("scripts", "input", "rna_tissue_hpa.tsv"))
setnames(data, c("Gene", "Tissue", "nTPM"), c("gene", "sample", "expression"))
data[, `:=`("Gene name" = NULL, TPM = NULL, pTPM = NULL)]
hpa_tissues <- ubigen::analyze(data)
usethis::use_data(hpa_tissues, overwrite = TRUE)

View file

@ -1,39 +0,0 @@
# This script uses the results (See results.csv) and computes a score for each
# gene. This is the data that will be used in the package.
library(data.table)
library(here)
i_am("scripts/input.R")
# To save memory, the data includes fake IDs for genes. The actual Ensembl IDs
# are part of the separate genes table.
genes <- fread(here("scripts", "input", "genes.csv"))
data <- fread(here("scripts", "output", "results.csv"))
# Rank the data using default parameters.
data <- ubigen::rank_genes(data = data)
# Reintroduce gene IDs and HGNC symbols.
setnames(data, "gene", "id")
data <- merge(
data,
genes,
by = "id",
all.x = TRUE,
sort = FALSE
)
setnames(data, "hgnc_symbol", "hgnc_name")
data[, id := NULL]
# Remove duplicates. This will keep the best row for each duplicated gene.
data <- unique(data, by = "gene")
# Reassign ranks, because duplicates may have been removed.
data[, rank := .I]
fwrite(data, file = here("scripts", "output", "genes.csv"))