Allow selecting the expression dataset

This commit is contained in:
Elias Projahn 2022-12-01 21:23:46 +01:00
parent 2f24812c90
commit 510fafeb6e
15 changed files with 110 additions and 57 deletions

View file

@ -1,2 +1,9 @@
#' A `data.table` containig data on genes and their expression behavior. #' A `data.table` containig mappings of Ensembl gene ID to HGNC symbol.
"genes" "genes"
#' The results from the analysis across all GTEx samples.
"gtex_all"
#' The results from the analysis across Human Protein Atlas' tissue aggregated
#' data.
"hpa_tissues"

View file

@ -10,7 +10,7 @@
#' percentiles for each gene. #' percentiles for each gene.
#' #'
#' @export #' @export
rank_genes <- function(data = ubigen::genes, rank_genes <- function(data = ubigen::gtex_all,
cross_sample_metric = "above_95", cross_sample_metric = "above_95",
cross_sample_weight = 0.5, cross_sample_weight = 0.5,
level_metric = "median_expression_normalized", level_metric = "median_expression_normalized",

View file

@ -1,8 +1,19 @@
#' Server implementing the main user interface. #' Server implementing the main user interface.
#' @noRd #' @noRd
server <- function(input, output, session) { server <- function(input, output, session) {
dataset <- reactive({
analysis <- if (input$dataset == "hpa_tissues") {
ubigen::hpa_tissues
} else {
ubigen::gtex_all
}
merge(analysis, ubigen::genes, by = "gene")
})
ranked_data <- reactive({ ranked_data <- reactive({
rank_genes( rank_genes(
data = dataset(),
cross_sample_metric = input$cross_sample_metric, cross_sample_metric = input$cross_sample_metric,
cross_sample_weight = input$cross_sample_weight, cross_sample_weight = input$cross_sample_weight,
level_metric = input$level_metric, level_metric = input$level_metric,

8
R/ui.R
View file

@ -19,6 +19,14 @@ ui <- function() {
h3("Your genes"), h3("Your genes"),
gene_selector_ui("custom_genes"), gene_selector_ui("custom_genes"),
h3("Method"), h3("Method"),
selectInput(
"dataset",
label = strong("Expression dataset"),
list(
"GTEx (all samples)" = "gtex_all",
"Human Protein Atlas (tissues)" = "hpa_tissues"
)
),
selectInput( selectInput(
"cross_sample_metric", "cross_sample_metric",
verticalLayout( verticalLayout(

Binary file not shown.

BIN
data/gtex_all.rda Normal file

Binary file not shown.

BIN
data/hpa_tissues.rda Normal file

Binary file not shown.

View file

@ -3,14 +3,14 @@
\docType{data} \docType{data}
\name{genes} \name{genes}
\alias{genes} \alias{genes}
\title{A \code{data.table} containig data on genes and their expression behavior.} \title{A \code{data.table} containig mappings of Ensembl gene ID to HGNC symbol.}
\format{ \format{
An object of class \code{data.table} (inherits from \code{data.frame}) with 55507 rows and 20 columns. An object of class \code{data.table} (inherits from \code{data.frame}) with 55507 rows and 2 columns.
} }
\usage{ \usage{
genes genes
} }
\description{ \description{
A \code{data.table} containig data on genes and their expression behavior. A \code{data.table} containig mappings of Ensembl gene ID to HGNC symbol.
} }
\keyword{datasets} \keyword{datasets}

16
man/gtex_all.Rd Normal file
View file

@ -0,0 +1,16 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data.R
\docType{data}
\name{gtex_all}
\alias{gtex_all}
\title{The results from the analysis across all GTEx samples.}
\format{
An object of class \code{data.table} (inherits from \code{data.frame}) with 55507 rows and 17 columns.
}
\usage{
gtex_all
}
\description{
The results from the analysis across all GTEx samples.
}
\keyword{datasets}

18
man/hpa_tissues.Rd Normal file
View file

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data.R
\docType{data}
\name{hpa_tissues}
\alias{hpa_tissues}
\title{The results from the analysis across Human Protein Atlas' tissue aggregated
data.}
\format{
An object of class \code{data.table} (inherits from \code{data.frame}) with 20090 rows and 17 columns.
}
\usage{
hpa_tissues
}
\description{
The results from the analysis across Human Protein Atlas' tissue aggregated
data.
}
\keyword{datasets}

View file

@ -5,7 +5,7 @@
\title{Rank genes based on how ubiquitous they are.} \title{Rank genes based on how ubiquitous they are.}
\usage{ \usage{
rank_genes( rank_genes(
data = ubigen::genes, data = ubigen::gtex_all,
cross_sample_metric = "above_95", cross_sample_metric = "above_95",
cross_sample_weight = 0.5, cross_sample_weight = 0.5,
level_metric = "median_expression_normalized", level_metric = "median_expression_normalized",

View file

@ -1,12 +0,0 @@
# This scripts reads the input data (See input.R) and performs various
# computations on it in order to later use the results for computating scores
# for ubuiquitously expressed genes.
library(data.table)
library(here)
i_am("scripts/input.R")
data <- fread(here("scripts", "input", "data_long.csv"))
results <- ubigen::analyze(data)
fwrite(results, file = here("scripts", "output", "results.csv"))

32
scripts/gtex_all.R Normal file
View file

@ -0,0 +1,32 @@
# This script uses the results (See results.csv) and computes a score for each
# gene. This is the data that will be used in the package.
library(data.table)
library(here)
i_am("scripts/input.R")
data <- fread(here("scripts", "input", "data_long.csv"))
gtex_all <- ubigen::analyze(data)
# To save memory, the data includes fake IDs for genes. The actual Ensembl IDs
# are part of the separate genes table.
genes <- fread(here("scripts", "input", "genes.csv"))
setnames(gtex_all, "gene", "id")
data <- merge(
gtex_all,
genes[, .(id, gene)],
by = "id",
all.x = TRUE,
sort = FALSE
)
data[, id := NULL]
usethis::use_data(gtex_all, overwrite = TRUE)
genes[, id := NULL]
usethis::use_data(genes, overwrite = TRUE)

12
scripts/hpa.R Normal file
View file

@ -0,0 +1,12 @@
library(data.table)
library(here)
i_am("scripts/hpa.R")
# Source: https://www.proteinatlas.org/download/rna_tissue_hpa.tsv.zip
data <- fread(here("scripts", "input", "rna_tissue_hpa.tsv"))
setnames(data, c("Gene", "Tissue", "nTPM"), c("gene", "sample", "expression"))
data[, `:=`("Gene name" = NULL, TPM = NULL, pTPM = NULL)]
hpa_tissues <- ubigen::analyze(data)
usethis::use_data(hpa_tissues, overwrite = TRUE)

View file

@ -1,39 +0,0 @@
# This script uses the results (See results.csv) and computes a score for each
# gene. This is the data that will be used in the package.
library(data.table)
library(here)
i_am("scripts/input.R")
# To save memory, the data includes fake IDs for genes. The actual Ensembl IDs
# are part of the separate genes table.
genes <- fread(here("scripts", "input", "genes.csv"))
data <- fread(here("scripts", "output", "results.csv"))
# Rank the data using default parameters.
data <- ubigen::rank_genes(data = data)
# Reintroduce gene IDs and HGNC symbols.
setnames(data, "gene", "id")
data <- merge(
data,
genes,
by = "id",
all.x = TRUE,
sort = FALSE
)
setnames(data, "hgnc_symbol", "hgnc_name")
data[, id := NULL]
# Remove duplicates. This will keep the best row for each duplicated gene.
data <- unique(data, by = "gene")
# Reassign ranks, because duplicates may have been removed.
data[, rank := .I]
fwrite(data, file = here("scripts", "output", "genes.csv"))