mirror of
https://github.com/johrpan/geposanui.git
synced 2025-10-26 11:17:24 +01:00
Reinclude human into distance data
This commit is contained in:
parent
22b25c9b83
commit
e9c05fdbab
2 changed files with 43 additions and 16 deletions
14
init.R
14
init.R
|
|
@ -5,11 +5,11 @@ source("util.R")
|
|||
|
||||
# Load input data
|
||||
|
||||
species <- run_cached("input/species", retrieve_species)
|
||||
genes <- run_cached("input/genes", retrieve_genes)
|
||||
species <- run_cached("input_species", retrieve_species)
|
||||
genes <- run_cached("input_genes", retrieve_genes)
|
||||
|
||||
distances <- run_cached(
|
||||
"input/distances",
|
||||
"input_distances",
|
||||
retrieve_distances,
|
||||
species[, id],
|
||||
genes[, id]
|
||||
|
|
@ -23,7 +23,7 @@ all_genes <- genes[, id]
|
|||
tpe_old_genes <- genes[suggested | verified == TRUE, id]
|
||||
|
||||
clustering_all <- run_cached(
|
||||
"all_species/clustering",
|
||||
"clustering_all",
|
||||
process_clustering,
|
||||
distances,
|
||||
all_species,
|
||||
|
|
@ -31,7 +31,7 @@ clustering_all <- run_cached(
|
|||
)
|
||||
|
||||
clustering_replicative <- run_cached(
|
||||
"replicative_species/clustering",
|
||||
"clustering_replicative",
|
||||
process_clustering,
|
||||
distances,
|
||||
replicative_species,
|
||||
|
|
@ -39,7 +39,7 @@ clustering_replicative <- run_cached(
|
|||
)
|
||||
|
||||
correlation_all <- run_cached(
|
||||
"all_species/correlation",
|
||||
"correlation_all",
|
||||
process_correlation,
|
||||
distances,
|
||||
all_species,
|
||||
|
|
@ -48,7 +48,7 @@ correlation_all <- run_cached(
|
|||
)
|
||||
|
||||
correlation_replicative <- run_cached(
|
||||
"replicative_species/correlation",
|
||||
"correlation_replicative",
|
||||
process_correlation,
|
||||
distances,
|
||||
replicative_species,
|
||||
|
|
|
|||
45
input.R
45
input.R
|
|
@ -115,11 +115,41 @@ retrieve_genes <- function() {
|
|||
#' - `gene` Ensembl gene ID.
|
||||
#' - `distance` Distance to nearest telomere in base pairs.
|
||||
retrieve_distances <- function(species_ids, gene_ids) {
|
||||
distances <- data.table(
|
||||
species = character(),
|
||||
gene = character(),
|
||||
distance = integer()
|
||||
)
|
||||
# Special case the human species and retrieve all available distance
|
||||
# information.
|
||||
|
||||
ensembl <- useDataset("hsapiens_gene_ensembl", mart = ensembl)
|
||||
|
||||
human_distances <- data.table(getBM(
|
||||
attributes = c(
|
||||
"ensembl_gene_id",
|
||||
"chromosome_name",
|
||||
"start_position",
|
||||
"end_position"
|
||||
),
|
||||
mart = ensembl
|
||||
))
|
||||
|
||||
human_distances[,
|
||||
chromosome_length := max(end_position),
|
||||
by = chromosome_name
|
||||
]
|
||||
|
||||
# Filter out relevant information (see below).
|
||||
distances <- human_distances[
|
||||
chromosome_length > 15000000,
|
||||
.(
|
||||
species = "hsapiens",
|
||||
gene = ensembl_gene_id,
|
||||
distance = pmin(
|
||||
start_position,
|
||||
chromosome_length - end_position
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
# Exclude the human from the species, in case it is present there.
|
||||
species_ids <- species_ids[species_ids != "hsapiens"]
|
||||
|
||||
species_count <- length(species_ids)
|
||||
|
||||
|
|
@ -156,10 +186,7 @@ retrieve_distances <- function(species_ids, gene_ids) {
|
|||
"start_position",
|
||||
"end_position"
|
||||
),
|
||||
mart = useDataset(
|
||||
sprintf("%s_gene_ensembl", species_id),
|
||||
mart = ensembl
|
||||
)
|
||||
mart = ensembl
|
||||
))
|
||||
|
||||
ensembl_distances[,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue