mirror of
https://github.com/johrpan/ubigen.git
synced 2025-10-26 19:57:24 +01:00
Allow selecting the expression dataset
This commit is contained in:
parent
2f24812c90
commit
510fafeb6e
15 changed files with 110 additions and 57 deletions
9
R/data.R
9
R/data.R
|
|
@ -1,2 +1,9 @@
|
|||
#' A `data.table` containig data on genes and their expression behavior.
|
||||
#' A `data.table` containig mappings of Ensembl gene ID to HGNC symbol.
|
||||
"genes"
|
||||
|
||||
#' The results from the analysis across all GTEx samples.
|
||||
"gtex_all"
|
||||
|
||||
#' The results from the analysis across Human Protein Atlas' tissue aggregated
|
||||
#' data.
|
||||
"hpa_tissues"
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
#' percentiles for each gene.
|
||||
#'
|
||||
#' @export
|
||||
rank_genes <- function(data = ubigen::genes,
|
||||
rank_genes <- function(data = ubigen::gtex_all,
|
||||
cross_sample_metric = "above_95",
|
||||
cross_sample_weight = 0.5,
|
||||
level_metric = "median_expression_normalized",
|
||||
|
|
|
|||
11
R/server.R
11
R/server.R
|
|
@ -1,8 +1,19 @@
|
|||
#' Server implementing the main user interface.
|
||||
#' @noRd
|
||||
server <- function(input, output, session) {
|
||||
dataset <- reactive({
|
||||
analysis <- if (input$dataset == "hpa_tissues") {
|
||||
ubigen::hpa_tissues
|
||||
} else {
|
||||
ubigen::gtex_all
|
||||
}
|
||||
|
||||
merge(analysis, ubigen::genes, by = "gene")
|
||||
})
|
||||
|
||||
ranked_data <- reactive({
|
||||
rank_genes(
|
||||
data = dataset(),
|
||||
cross_sample_metric = input$cross_sample_metric,
|
||||
cross_sample_weight = input$cross_sample_weight,
|
||||
level_metric = input$level_metric,
|
||||
|
|
|
|||
8
R/ui.R
8
R/ui.R
|
|
@ -19,6 +19,14 @@ ui <- function() {
|
|||
h3("Your genes"),
|
||||
gene_selector_ui("custom_genes"),
|
||||
h3("Method"),
|
||||
selectInput(
|
||||
"dataset",
|
||||
label = strong("Expression dataset"),
|
||||
list(
|
||||
"GTEx (all samples)" = "gtex_all",
|
||||
"Human Protein Atlas (tissues)" = "hpa_tissues"
|
||||
)
|
||||
),
|
||||
selectInput(
|
||||
"cross_sample_metric",
|
||||
verticalLayout(
|
||||
|
|
|
|||
BIN
data/genes.rda
BIN
data/genes.rda
Binary file not shown.
BIN
data/gtex_all.rda
Normal file
BIN
data/gtex_all.rda
Normal file
Binary file not shown.
BIN
data/hpa_tissues.rda
Normal file
BIN
data/hpa_tissues.rda
Normal file
Binary file not shown.
|
|
@ -3,14 +3,14 @@
|
|||
\docType{data}
|
||||
\name{genes}
|
||||
\alias{genes}
|
||||
\title{A \code{data.table} containig data on genes and their expression behavior.}
|
||||
\title{A \code{data.table} containig mappings of Ensembl gene ID to HGNC symbol.}
|
||||
\format{
|
||||
An object of class \code{data.table} (inherits from \code{data.frame}) with 55507 rows and 20 columns.
|
||||
An object of class \code{data.table} (inherits from \code{data.frame}) with 55507 rows and 2 columns.
|
||||
}
|
||||
\usage{
|
||||
genes
|
||||
}
|
||||
\description{
|
||||
A \code{data.table} containig data on genes and their expression behavior.
|
||||
A \code{data.table} containig mappings of Ensembl gene ID to HGNC symbol.
|
||||
}
|
||||
\keyword{datasets}
|
||||
|
|
|
|||
16
man/gtex_all.Rd
Normal file
16
man/gtex_all.Rd
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/data.R
|
||||
\docType{data}
|
||||
\name{gtex_all}
|
||||
\alias{gtex_all}
|
||||
\title{The results from the analysis across all GTEx samples.}
|
||||
\format{
|
||||
An object of class \code{data.table} (inherits from \code{data.frame}) with 55507 rows and 17 columns.
|
||||
}
|
||||
\usage{
|
||||
gtex_all
|
||||
}
|
||||
\description{
|
||||
The results from the analysis across all GTEx samples.
|
||||
}
|
||||
\keyword{datasets}
|
||||
18
man/hpa_tissues.Rd
Normal file
18
man/hpa_tissues.Rd
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/data.R
|
||||
\docType{data}
|
||||
\name{hpa_tissues}
|
||||
\alias{hpa_tissues}
|
||||
\title{The results from the analysis across Human Protein Atlas' tissue aggregated
|
||||
data.}
|
||||
\format{
|
||||
An object of class \code{data.table} (inherits from \code{data.frame}) with 20090 rows and 17 columns.
|
||||
}
|
||||
\usage{
|
||||
hpa_tissues
|
||||
}
|
||||
\description{
|
||||
The results from the analysis across Human Protein Atlas' tissue aggregated
|
||||
data.
|
||||
}
|
||||
\keyword{datasets}
|
||||
|
|
@ -5,7 +5,7 @@
|
|||
\title{Rank genes based on how ubiquitous they are.}
|
||||
\usage{
|
||||
rank_genes(
|
||||
data = ubigen::genes,
|
||||
data = ubigen::gtex_all,
|
||||
cross_sample_metric = "above_95",
|
||||
cross_sample_weight = 0.5,
|
||||
level_metric = "median_expression_normalized",
|
||||
|
|
|
|||
|
|
@ -1,12 +0,0 @@
|
|||
# This scripts reads the input data (See input.R) and performs various
|
||||
# computations on it in order to later use the results for computating scores
|
||||
# for ubuiquitously expressed genes.
|
||||
|
||||
library(data.table)
|
||||
library(here)
|
||||
|
||||
i_am("scripts/input.R")
|
||||
|
||||
data <- fread(here("scripts", "input", "data_long.csv"))
|
||||
results <- ubigen::analyze(data)
|
||||
fwrite(results, file = here("scripts", "output", "results.csv"))
|
||||
32
scripts/gtex_all.R
Normal file
32
scripts/gtex_all.R
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
# This script uses the results (See results.csv) and computes a score for each
|
||||
# gene. This is the data that will be used in the package.
|
||||
|
||||
library(data.table)
|
||||
library(here)
|
||||
|
||||
i_am("scripts/input.R")
|
||||
|
||||
data <- fread(here("scripts", "input", "data_long.csv"))
|
||||
gtex_all <- ubigen::analyze(data)
|
||||
|
||||
# To save memory, the data includes fake IDs for genes. The actual Ensembl IDs
|
||||
# are part of the separate genes table.
|
||||
|
||||
genes <- fread(here("scripts", "input", "genes.csv"))
|
||||
|
||||
setnames(gtex_all, "gene", "id")
|
||||
|
||||
data <- merge(
|
||||
gtex_all,
|
||||
genes[, .(id, gene)],
|
||||
by = "id",
|
||||
all.x = TRUE,
|
||||
sort = FALSE
|
||||
)
|
||||
|
||||
data[, id := NULL]
|
||||
|
||||
usethis::use_data(gtex_all, overwrite = TRUE)
|
||||
|
||||
genes[, id := NULL]
|
||||
usethis::use_data(genes, overwrite = TRUE)
|
||||
12
scripts/hpa.R
Normal file
12
scripts/hpa.R
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
library(data.table)
|
||||
library(here)
|
||||
|
||||
i_am("scripts/hpa.R")
|
||||
|
||||
# Source: https://www.proteinatlas.org/download/rna_tissue_hpa.tsv.zip
|
||||
data <- fread(here("scripts", "input", "rna_tissue_hpa.tsv"))
|
||||
setnames(data, c("Gene", "Tissue", "nTPM"), c("gene", "sample", "expression"))
|
||||
data[, `:=`("Gene name" = NULL, TPM = NULL, pTPM = NULL)]
|
||||
|
||||
hpa_tissues <- ubigen::analyze(data)
|
||||
usethis::use_data(hpa_tissues, overwrite = TRUE)
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
# This script uses the results (See results.csv) and computes a score for each
|
||||
# gene. This is the data that will be used in the package.
|
||||
|
||||
library(data.table)
|
||||
library(here)
|
||||
|
||||
i_am("scripts/input.R")
|
||||
|
||||
# To save memory, the data includes fake IDs for genes. The actual Ensembl IDs
|
||||
# are part of the separate genes table.
|
||||
|
||||
genes <- fread(here("scripts", "input", "genes.csv"))
|
||||
data <- fread(here("scripts", "output", "results.csv"))
|
||||
|
||||
# Rank the data using default parameters.
|
||||
data <- ubigen::rank_genes(data = data)
|
||||
|
||||
# Reintroduce gene IDs and HGNC symbols.
|
||||
|
||||
setnames(data, "gene", "id")
|
||||
|
||||
data <- merge(
|
||||
data,
|
||||
genes,
|
||||
by = "id",
|
||||
all.x = TRUE,
|
||||
sort = FALSE
|
||||
)
|
||||
|
||||
setnames(data, "hgnc_symbol", "hgnc_name")
|
||||
data[, id := NULL]
|
||||
|
||||
# Remove duplicates. This will keep the best row for each duplicated gene.
|
||||
data <- unique(data, by = "gene")
|
||||
|
||||
# Reassign ranks, because duplicates may have been removed.
|
||||
data[, rank := .I]
|
||||
|
||||
fwrite(data, file = here("scripts", "output", "genes.csv"))
|
||||
Loading…
Add table
Add a link
Reference in a new issue