mirror of
https://github.com/johrpan/ubigen.git
synced 2025-10-26 19:57:24 +01:00
Allow selecting the expression dataset
This commit is contained in:
parent
2f24812c90
commit
510fafeb6e
15 changed files with 110 additions and 57 deletions
9
R/data.R
9
R/data.R
|
|
@ -1,2 +1,9 @@
|
||||||
#' A `data.table` containig data on genes and their expression behavior.
|
#' A `data.table` containig mappings of Ensembl gene ID to HGNC symbol.
|
||||||
"genes"
|
"genes"
|
||||||
|
|
||||||
|
#' The results from the analysis across all GTEx samples.
|
||||||
|
"gtex_all"
|
||||||
|
|
||||||
|
#' The results from the analysis across Human Protein Atlas' tissue aggregated
|
||||||
|
#' data.
|
||||||
|
"hpa_tissues"
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@
|
||||||
#' percentiles for each gene.
|
#' percentiles for each gene.
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
rank_genes <- function(data = ubigen::genes,
|
rank_genes <- function(data = ubigen::gtex_all,
|
||||||
cross_sample_metric = "above_95",
|
cross_sample_metric = "above_95",
|
||||||
cross_sample_weight = 0.5,
|
cross_sample_weight = 0.5,
|
||||||
level_metric = "median_expression_normalized",
|
level_metric = "median_expression_normalized",
|
||||||
|
|
|
||||||
11
R/server.R
11
R/server.R
|
|
@ -1,8 +1,19 @@
|
||||||
#' Server implementing the main user interface.
|
#' Server implementing the main user interface.
|
||||||
#' @noRd
|
#' @noRd
|
||||||
server <- function(input, output, session) {
|
server <- function(input, output, session) {
|
||||||
|
dataset <- reactive({
|
||||||
|
analysis <- if (input$dataset == "hpa_tissues") {
|
||||||
|
ubigen::hpa_tissues
|
||||||
|
} else {
|
||||||
|
ubigen::gtex_all
|
||||||
|
}
|
||||||
|
|
||||||
|
merge(analysis, ubigen::genes, by = "gene")
|
||||||
|
})
|
||||||
|
|
||||||
ranked_data <- reactive({
|
ranked_data <- reactive({
|
||||||
rank_genes(
|
rank_genes(
|
||||||
|
data = dataset(),
|
||||||
cross_sample_metric = input$cross_sample_metric,
|
cross_sample_metric = input$cross_sample_metric,
|
||||||
cross_sample_weight = input$cross_sample_weight,
|
cross_sample_weight = input$cross_sample_weight,
|
||||||
level_metric = input$level_metric,
|
level_metric = input$level_metric,
|
||||||
|
|
|
||||||
8
R/ui.R
8
R/ui.R
|
|
@ -19,6 +19,14 @@ ui <- function() {
|
||||||
h3("Your genes"),
|
h3("Your genes"),
|
||||||
gene_selector_ui("custom_genes"),
|
gene_selector_ui("custom_genes"),
|
||||||
h3("Method"),
|
h3("Method"),
|
||||||
|
selectInput(
|
||||||
|
"dataset",
|
||||||
|
label = strong("Expression dataset"),
|
||||||
|
list(
|
||||||
|
"GTEx (all samples)" = "gtex_all",
|
||||||
|
"Human Protein Atlas (tissues)" = "hpa_tissues"
|
||||||
|
)
|
||||||
|
),
|
||||||
selectInput(
|
selectInput(
|
||||||
"cross_sample_metric",
|
"cross_sample_metric",
|
||||||
verticalLayout(
|
verticalLayout(
|
||||||
|
|
|
||||||
BIN
data/genes.rda
BIN
data/genes.rda
Binary file not shown.
BIN
data/gtex_all.rda
Normal file
BIN
data/gtex_all.rda
Normal file
Binary file not shown.
BIN
data/hpa_tissues.rda
Normal file
BIN
data/hpa_tissues.rda
Normal file
Binary file not shown.
|
|
@ -3,14 +3,14 @@
|
||||||
\docType{data}
|
\docType{data}
|
||||||
\name{genes}
|
\name{genes}
|
||||||
\alias{genes}
|
\alias{genes}
|
||||||
\title{A \code{data.table} containig data on genes and their expression behavior.}
|
\title{A \code{data.table} containig mappings of Ensembl gene ID to HGNC symbol.}
|
||||||
\format{
|
\format{
|
||||||
An object of class \code{data.table} (inherits from \code{data.frame}) with 55507 rows and 20 columns.
|
An object of class \code{data.table} (inherits from \code{data.frame}) with 55507 rows and 2 columns.
|
||||||
}
|
}
|
||||||
\usage{
|
\usage{
|
||||||
genes
|
genes
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
A \code{data.table} containig data on genes and their expression behavior.
|
A \code{data.table} containig mappings of Ensembl gene ID to HGNC symbol.
|
||||||
}
|
}
|
||||||
\keyword{datasets}
|
\keyword{datasets}
|
||||||
|
|
|
||||||
16
man/gtex_all.Rd
Normal file
16
man/gtex_all.Rd
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/data.R
|
||||||
|
\docType{data}
|
||||||
|
\name{gtex_all}
|
||||||
|
\alias{gtex_all}
|
||||||
|
\title{The results from the analysis across all GTEx samples.}
|
||||||
|
\format{
|
||||||
|
An object of class \code{data.table} (inherits from \code{data.frame}) with 55507 rows and 17 columns.
|
||||||
|
}
|
||||||
|
\usage{
|
||||||
|
gtex_all
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
The results from the analysis across all GTEx samples.
|
||||||
|
}
|
||||||
|
\keyword{datasets}
|
||||||
18
man/hpa_tissues.Rd
Normal file
18
man/hpa_tissues.Rd
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/data.R
|
||||||
|
\docType{data}
|
||||||
|
\name{hpa_tissues}
|
||||||
|
\alias{hpa_tissues}
|
||||||
|
\title{The results from the analysis across Human Protein Atlas' tissue aggregated
|
||||||
|
data.}
|
||||||
|
\format{
|
||||||
|
An object of class \code{data.table} (inherits from \code{data.frame}) with 20090 rows and 17 columns.
|
||||||
|
}
|
||||||
|
\usage{
|
||||||
|
hpa_tissues
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
The results from the analysis across Human Protein Atlas' tissue aggregated
|
||||||
|
data.
|
||||||
|
}
|
||||||
|
\keyword{datasets}
|
||||||
|
|
@ -5,7 +5,7 @@
|
||||||
\title{Rank genes based on how ubiquitous they are.}
|
\title{Rank genes based on how ubiquitous they are.}
|
||||||
\usage{
|
\usage{
|
||||||
rank_genes(
|
rank_genes(
|
||||||
data = ubigen::genes,
|
data = ubigen::gtex_all,
|
||||||
cross_sample_metric = "above_95",
|
cross_sample_metric = "above_95",
|
||||||
cross_sample_weight = 0.5,
|
cross_sample_weight = 0.5,
|
||||||
level_metric = "median_expression_normalized",
|
level_metric = "median_expression_normalized",
|
||||||
|
|
|
||||||
|
|
@ -1,12 +0,0 @@
|
||||||
# This scripts reads the input data (See input.R) and performs various
|
|
||||||
# computations on it in order to later use the results for computating scores
|
|
||||||
# for ubuiquitously expressed genes.
|
|
||||||
|
|
||||||
library(data.table)
|
|
||||||
library(here)
|
|
||||||
|
|
||||||
i_am("scripts/input.R")
|
|
||||||
|
|
||||||
data <- fread(here("scripts", "input", "data_long.csv"))
|
|
||||||
results <- ubigen::analyze(data)
|
|
||||||
fwrite(results, file = here("scripts", "output", "results.csv"))
|
|
||||||
32
scripts/gtex_all.R
Normal file
32
scripts/gtex_all.R
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
# This script uses the results (See results.csv) and computes a score for each
|
||||||
|
# gene. This is the data that will be used in the package.
|
||||||
|
|
||||||
|
library(data.table)
|
||||||
|
library(here)
|
||||||
|
|
||||||
|
i_am("scripts/input.R")
|
||||||
|
|
||||||
|
data <- fread(here("scripts", "input", "data_long.csv"))
|
||||||
|
gtex_all <- ubigen::analyze(data)
|
||||||
|
|
||||||
|
# To save memory, the data includes fake IDs for genes. The actual Ensembl IDs
|
||||||
|
# are part of the separate genes table.
|
||||||
|
|
||||||
|
genes <- fread(here("scripts", "input", "genes.csv"))
|
||||||
|
|
||||||
|
setnames(gtex_all, "gene", "id")
|
||||||
|
|
||||||
|
data <- merge(
|
||||||
|
gtex_all,
|
||||||
|
genes[, .(id, gene)],
|
||||||
|
by = "id",
|
||||||
|
all.x = TRUE,
|
||||||
|
sort = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
data[, id := NULL]
|
||||||
|
|
||||||
|
usethis::use_data(gtex_all, overwrite = TRUE)
|
||||||
|
|
||||||
|
genes[, id := NULL]
|
||||||
|
usethis::use_data(genes, overwrite = TRUE)
|
||||||
12
scripts/hpa.R
Normal file
12
scripts/hpa.R
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
library(data.table)
|
||||||
|
library(here)
|
||||||
|
|
||||||
|
i_am("scripts/hpa.R")
|
||||||
|
|
||||||
|
# Source: https://www.proteinatlas.org/download/rna_tissue_hpa.tsv.zip
|
||||||
|
data <- fread(here("scripts", "input", "rna_tissue_hpa.tsv"))
|
||||||
|
setnames(data, c("Gene", "Tissue", "nTPM"), c("gene", "sample", "expression"))
|
||||||
|
data[, `:=`("Gene name" = NULL, TPM = NULL, pTPM = NULL)]
|
||||||
|
|
||||||
|
hpa_tissues <- ubigen::analyze(data)
|
||||||
|
usethis::use_data(hpa_tissues, overwrite = TRUE)
|
||||||
|
|
@ -1,39 +0,0 @@
|
||||||
# This script uses the results (See results.csv) and computes a score for each
|
|
||||||
# gene. This is the data that will be used in the package.
|
|
||||||
|
|
||||||
library(data.table)
|
|
||||||
library(here)
|
|
||||||
|
|
||||||
i_am("scripts/input.R")
|
|
||||||
|
|
||||||
# To save memory, the data includes fake IDs for genes. The actual Ensembl IDs
|
|
||||||
# are part of the separate genes table.
|
|
||||||
|
|
||||||
genes <- fread(here("scripts", "input", "genes.csv"))
|
|
||||||
data <- fread(here("scripts", "output", "results.csv"))
|
|
||||||
|
|
||||||
# Rank the data using default parameters.
|
|
||||||
data <- ubigen::rank_genes(data = data)
|
|
||||||
|
|
||||||
# Reintroduce gene IDs and HGNC symbols.
|
|
||||||
|
|
||||||
setnames(data, "gene", "id")
|
|
||||||
|
|
||||||
data <- merge(
|
|
||||||
data,
|
|
||||||
genes,
|
|
||||||
by = "id",
|
|
||||||
all.x = TRUE,
|
|
||||||
sort = FALSE
|
|
||||||
)
|
|
||||||
|
|
||||||
setnames(data, "hgnc_symbol", "hgnc_name")
|
|
||||||
data[, id := NULL]
|
|
||||||
|
|
||||||
# Remove duplicates. This will keep the best row for each duplicated gene.
|
|
||||||
data <- unique(data, by = "gene")
|
|
||||||
|
|
||||||
# Reassign ranks, because duplicates may have been removed.
|
|
||||||
data[, rank := .I]
|
|
||||||
|
|
||||||
fwrite(data, file = here("scripts", "output", "genes.csv"))
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue