mirror of
https://github.com/johrpan/geposanui.git
synced 2025-10-26 11:17:24 +01:00
Enhance progress information
This commit is contained in:
parent
8e54dacd3d
commit
ba7c624705
3 changed files with 49 additions and 26 deletions
22
clustering.R
22
clustering.R
|
|
@ -1,4 +1,5 @@
|
|||
library(data.table)
|
||||
library(progress)
|
||||
library(rlog)
|
||||
|
||||
#' Process genes clustering their distance to telomeres.
|
||||
|
|
@ -17,14 +18,21 @@ process_clustering <- function(distances, species_ids, gene_ids) {
|
|||
results <- data.table(gene = gene_ids)
|
||||
gene_count <- length(gene_ids)
|
||||
|
||||
for (i in 1:gene_count) {
|
||||
gene_id <- gene_ids[i]
|
||||
log_info(sprintf(
|
||||
"Clustering %i genes from %i species",
|
||||
gene_count,
|
||||
length(species_ids)
|
||||
))
|
||||
|
||||
log_info(sprintf(
|
||||
"[%3i%%] Processing gene \"%s\"",
|
||||
round(i / gene_count * 100),
|
||||
gene_id
|
||||
))
|
||||
progress <- progress_bar$new(
|
||||
total = gene_count,
|
||||
format = "Clustering genes [:bar] :percent (ETA :eta)"
|
||||
)
|
||||
|
||||
for (i in 1:gene_count) {
|
||||
progress$tick()
|
||||
|
||||
gene_id <- gene_ids[i]
|
||||
|
||||
data <- distances[
|
||||
species %chin% species_ids & gene == gene_id,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
library(data.table)
|
||||
library(progress)
|
||||
library(rlog)
|
||||
|
||||
#' Compute the mean correlation coefficient comparing gene distances with a set
|
||||
|
|
@ -15,24 +16,29 @@ library(rlog)
|
|||
#' @param reference_gene_ids Genes to compare to.
|
||||
process_correlation <- function(distances, species_ids, gene_ids,
|
||||
reference_gene_ids) {
|
||||
log_info("Processing genes for correlation")
|
||||
|
||||
results <- data.table(gene = gene_ids)
|
||||
gene_count <- length(gene_ids)
|
||||
reference_count <- length(reference_gene_ids)
|
||||
|
||||
log_info(sprintf(
|
||||
"Correlating %i genes from %i species with %i reference genes",
|
||||
gene_count,
|
||||
length(species_ids),
|
||||
reference_count
|
||||
))
|
||||
|
||||
progress <- progress_bar$new(
|
||||
total = gene_count,
|
||||
format = "Correlating genes [:bar] :percent (ETA :eta)"
|
||||
)
|
||||
|
||||
# Prefilter distances by species.
|
||||
distances <- distances[species %chin% species_ids]
|
||||
|
||||
for (i in 1:gene_count) {
|
||||
progress$tick()
|
||||
|
||||
gene_id <- gene_ids[i]
|
||||
|
||||
log_info(sprintf(
|
||||
"[%3i%%] Processing gene \"%s\"",
|
||||
round(i / gene_count * 100),
|
||||
gene_id
|
||||
))
|
||||
|
||||
gene_distances <- distances[gene == gene_id]
|
||||
|
||||
if (nrow(gene_distances) < 12) {
|
||||
|
|
|
|||
29
input.R
29
input.R
|
|
@ -1,5 +1,6 @@
|
|||
library(biomaRt)
|
||||
library(data.table)
|
||||
library(progress)
|
||||
library(rlog)
|
||||
library(stringr)
|
||||
|
||||
|
|
@ -115,6 +116,23 @@ retrieve_genes <- function() {
|
|||
#' - `gene` Ensembl gene ID.
|
||||
#' - `distance` Distance to nearest telomere in base pairs.
|
||||
retrieve_distances <- function(species_ids, gene_ids) {
|
||||
# Exclude the human from the species, in case it is present there.
|
||||
species_ids <- species_ids[species_ids != "hsapiens"]
|
||||
|
||||
species_count <- length(species_ids)
|
||||
gene_count <- length(gene_ids)
|
||||
|
||||
log_info(sprintf(
|
||||
"Retrieving distance data for %i genes from %i species",
|
||||
gene_count,
|
||||
species_count
|
||||
))
|
||||
|
||||
progress <- progress_bar$new(
|
||||
total = gene_count,
|
||||
format = "Retrieving distance data [:bar] :percent (ETA :eta)"
|
||||
)
|
||||
|
||||
# Special case the human species and retrieve all available distance
|
||||
# information.
|
||||
|
||||
|
|
@ -148,19 +166,10 @@ retrieve_distances <- function(species_ids, gene_ids) {
|
|||
)
|
||||
]
|
||||
|
||||
# Exclude the human from the species, in case it is present there.
|
||||
species_ids <- species_ids[species_ids != "hsapiens"]
|
||||
|
||||
species_count <- length(species_ids)
|
||||
|
||||
for (i in 1:species_count) {
|
||||
species_id <- species_ids[i]
|
||||
|
||||
log_info(sprintf(
|
||||
"[%3i%%] Loading species \"%s\"",
|
||||
round(i / species_count * 100),
|
||||
species_id
|
||||
))
|
||||
progress$tick()
|
||||
|
||||
ensembl <- useDataset(
|
||||
sprintf("%s_gene_ensembl", species_id),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue