mirror of
https://github.com/johrpan/geposanui.git
synced 2025-10-26 19:27:24 +01:00
Enhance progress information
This commit is contained in:
parent
8e54dacd3d
commit
ba7c624705
3 changed files with 49 additions and 26 deletions
20
clustering.R
20
clustering.R
|
|
@ -1,4 +1,5 @@
|
||||||
library(data.table)
|
library(data.table)
|
||||||
|
library(progress)
|
||||||
library(rlog)
|
library(rlog)
|
||||||
|
|
||||||
#' Process genes clustering their distance to telomeres.
|
#' Process genes clustering their distance to telomeres.
|
||||||
|
|
@ -17,15 +18,22 @@ process_clustering <- function(distances, species_ids, gene_ids) {
|
||||||
results <- data.table(gene = gene_ids)
|
results <- data.table(gene = gene_ids)
|
||||||
gene_count <- length(gene_ids)
|
gene_count <- length(gene_ids)
|
||||||
|
|
||||||
for (i in 1:gene_count) {
|
|
||||||
gene_id <- gene_ids[i]
|
|
||||||
|
|
||||||
log_info(sprintf(
|
log_info(sprintf(
|
||||||
"[%3i%%] Processing gene \"%s\"",
|
"Clustering %i genes from %i species",
|
||||||
round(i / gene_count * 100),
|
gene_count,
|
||||||
gene_id
|
length(species_ids)
|
||||||
))
|
))
|
||||||
|
|
||||||
|
progress <- progress_bar$new(
|
||||||
|
total = gene_count,
|
||||||
|
format = "Clustering genes [:bar] :percent (ETA :eta)"
|
||||||
|
)
|
||||||
|
|
||||||
|
for (i in 1:gene_count) {
|
||||||
|
progress$tick()
|
||||||
|
|
||||||
|
gene_id <- gene_ids[i]
|
||||||
|
|
||||||
data <- distances[
|
data <- distances[
|
||||||
species %chin% species_ids & gene == gene_id,
|
species %chin% species_ids & gene == gene_id,
|
||||||
.(species, distance)
|
.(species, distance)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
library(data.table)
|
library(data.table)
|
||||||
|
library(progress)
|
||||||
library(rlog)
|
library(rlog)
|
||||||
|
|
||||||
#' Compute the mean correlation coefficient comparing gene distances with a set
|
#' Compute the mean correlation coefficient comparing gene distances with a set
|
||||||
|
|
@ -15,24 +16,29 @@ library(rlog)
|
||||||
#' @param reference_gene_ids Genes to compare to.
|
#' @param reference_gene_ids Genes to compare to.
|
||||||
process_correlation <- function(distances, species_ids, gene_ids,
|
process_correlation <- function(distances, species_ids, gene_ids,
|
||||||
reference_gene_ids) {
|
reference_gene_ids) {
|
||||||
log_info("Processing genes for correlation")
|
|
||||||
|
|
||||||
results <- data.table(gene = gene_ids)
|
results <- data.table(gene = gene_ids)
|
||||||
gene_count <- length(gene_ids)
|
gene_count <- length(gene_ids)
|
||||||
reference_count <- length(reference_gene_ids)
|
reference_count <- length(reference_gene_ids)
|
||||||
|
|
||||||
|
log_info(sprintf(
|
||||||
|
"Correlating %i genes from %i species with %i reference genes",
|
||||||
|
gene_count,
|
||||||
|
length(species_ids),
|
||||||
|
reference_count
|
||||||
|
))
|
||||||
|
|
||||||
|
progress <- progress_bar$new(
|
||||||
|
total = gene_count,
|
||||||
|
format = "Correlating genes [:bar] :percent (ETA :eta)"
|
||||||
|
)
|
||||||
|
|
||||||
# Prefilter distances by species.
|
# Prefilter distances by species.
|
||||||
distances <- distances[species %chin% species_ids]
|
distances <- distances[species %chin% species_ids]
|
||||||
|
|
||||||
for (i in 1:gene_count) {
|
for (i in 1:gene_count) {
|
||||||
|
progress$tick()
|
||||||
|
|
||||||
gene_id <- gene_ids[i]
|
gene_id <- gene_ids[i]
|
||||||
|
|
||||||
log_info(sprintf(
|
|
||||||
"[%3i%%] Processing gene \"%s\"",
|
|
||||||
round(i / gene_count * 100),
|
|
||||||
gene_id
|
|
||||||
))
|
|
||||||
|
|
||||||
gene_distances <- distances[gene == gene_id]
|
gene_distances <- distances[gene == gene_id]
|
||||||
|
|
||||||
if (nrow(gene_distances) < 12) {
|
if (nrow(gene_distances) < 12) {
|
||||||
|
|
|
||||||
29
input.R
29
input.R
|
|
@ -1,5 +1,6 @@
|
||||||
library(biomaRt)
|
library(biomaRt)
|
||||||
library(data.table)
|
library(data.table)
|
||||||
|
library(progress)
|
||||||
library(rlog)
|
library(rlog)
|
||||||
library(stringr)
|
library(stringr)
|
||||||
|
|
||||||
|
|
@ -115,6 +116,23 @@ retrieve_genes <- function() {
|
||||||
#' - `gene` Ensembl gene ID.
|
#' - `gene` Ensembl gene ID.
|
||||||
#' - `distance` Distance to nearest telomere in base pairs.
|
#' - `distance` Distance to nearest telomere in base pairs.
|
||||||
retrieve_distances <- function(species_ids, gene_ids) {
|
retrieve_distances <- function(species_ids, gene_ids) {
|
||||||
|
# Exclude the human from the species, in case it is present there.
|
||||||
|
species_ids <- species_ids[species_ids != "hsapiens"]
|
||||||
|
|
||||||
|
species_count <- length(species_ids)
|
||||||
|
gene_count <- length(gene_ids)
|
||||||
|
|
||||||
|
log_info(sprintf(
|
||||||
|
"Retrieving distance data for %i genes from %i species",
|
||||||
|
gene_count,
|
||||||
|
species_count
|
||||||
|
))
|
||||||
|
|
||||||
|
progress <- progress_bar$new(
|
||||||
|
total = gene_count,
|
||||||
|
format = "Retrieving distance data [:bar] :percent (ETA :eta)"
|
||||||
|
)
|
||||||
|
|
||||||
# Special case the human species and retrieve all available distance
|
# Special case the human species and retrieve all available distance
|
||||||
# information.
|
# information.
|
||||||
|
|
||||||
|
|
@ -148,19 +166,10 @@ retrieve_distances <- function(species_ids, gene_ids) {
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
# Exclude the human from the species, in case it is present there.
|
|
||||||
species_ids <- species_ids[species_ids != "hsapiens"]
|
|
||||||
|
|
||||||
species_count <- length(species_ids)
|
|
||||||
|
|
||||||
for (i in 1:species_count) {
|
for (i in 1:species_count) {
|
||||||
species_id <- species_ids[i]
|
species_id <- species_ids[i]
|
||||||
|
|
||||||
log_info(sprintf(
|
progress$tick()
|
||||||
"[%3i%%] Loading species \"%s\"",
|
|
||||||
round(i / species_count * 100),
|
|
||||||
species_id
|
|
||||||
))
|
|
||||||
|
|
||||||
ensembl <- useDataset(
|
ensembl <- useDataset(
|
||||||
sprintf("%s_gene_ensembl", species_id),
|
sprintf("%s_gene_ensembl", species_id),
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue