From 599f09a52ff386bc0c76685d77daefffade71d75 Mon Sep 17 00:00:00 2001 From: Elias Projahn Date: Mon, 22 Nov 2021 15:16:05 +0100 Subject: [PATCH] Remove position analysis --- R/analyze.R | 6 ------ R/clusteriness.R | 11 +++-------- R/correlation.R | 21 ++++++--------------- R/neural.R | 27 ++++++--------------------- R/preset.R | 2 -- 5 files changed, 15 insertions(+), 52 deletions(-) diff --git a/R/analyze.R b/R/analyze.R index 6db623b..e443ca0 100644 --- a/R/analyze.R +++ b/R/analyze.R @@ -32,13 +32,7 @@ analyze <- function(preset, progress = NULL) { # - `score` Score for the gene between 0.0 and 1.0. methods <- list( "clusteriness" = clusteriness, - "clusteriness_positions" = function(...) { - clusteriness(..., use_positions = TRUE) - }, "correlation" = correlation, - "correlation_positions" = function(...) { - correlation(..., use_positions = TRUE) - }, "neural" = neural, "proximity" = proximity ) diff --git a/R/clusteriness.R b/R/clusteriness.R index f2490a1..d884000 100644 --- a/R/clusteriness.R +++ b/R/clusteriness.R @@ -36,11 +36,11 @@ clusteriness_priv <- function(data, height = 1000000) { } # Process genes clustering their distance to telomeres. -clusteriness <- function(preset, use_positions = FALSE, progress = NULL) { +clusteriness <- function(preset, progress = NULL) { species_ids <- preset$species_ids gene_ids <- preset$gene_ids - cached("clusteriness", c(species_ids, gene_ids, use_positions), { + cached("clusteriness", c(species_ids, gene_ids), { results <- data.table(gene = gene_ids) # Prefilter the input data by species. @@ -54,12 +54,7 @@ clusteriness <- function(preset, use_positions = FALSE, progress = NULL) { # Perform the cluster analysis for one gene. compute <- function(gene_id) { - data <- if (use_positions) { - distances[gene_id, position] - } else { - distances[gene_id, distance] - } - + data <- distances[gene_id, distance] score <- clusteriness_priv(data) if (!is.null(progress)) { diff --git a/R/correlation.R b/R/correlation.R index e0057a8..60831a1 100644 --- a/R/correlation.R +++ b/R/correlation.R @@ -1,14 +1,12 @@ # Compute the mean correlation coefficient comparing gene distances with a set # of reference genes. -correlation <- function(preset, use_positions = FALSE, progress = NULL) { +correlation <- function(preset, progress = NULL) { species_ids <- preset$species_ids gene_ids <- preset$gene_ids reference_gene_ids <- preset$reference_gene_ids cached( - "correlation", - c(species_ids, gene_ids, reference_gene_ids, use_positions), - { # nolint + "correlation", c(species_ids, gene_ids, reference_gene_ids), { # Prefilter distances by species. distances <- geposan::distances[species %chin% species_ids] @@ -20,17 +18,10 @@ correlation <- function(preset, use_positions = FALSE, progress = NULL) { # Make a column containing distance data for each species. for (species_id in species_ids) { - species_data <- if (use_positions) { - setnames(distances[ - species == species_id, - .(gene, position) - ], "position", "distance") - } else { - distances[ - species == species_id, - .(gene, distance) - ] - } + species_data <- distances[ + species == species_id, + .(gene, distance) + ] data <- merge(data, species_data, all.x = TRUE) setnames(data, "distance", species_id) diff --git a/R/neural.R b/R/neural.R index 91f332c..c36bc98 100644 --- a/R/neural.R +++ b/R/neural.R @@ -25,10 +25,7 @@ neural <- function(preset, progress = NULL, seed = 49641) { # Make a columns containing positions and distances for each # species. for (species_id in species_ids) { - species_data <- distances[ - species == species_id, - .(gene, position, distance) - ] + species_data <- distances[species == species_id, .(gene, distance)] # Only include species with at least 25% known values. As # positions and distances always coexist, we don't loose any @@ -46,26 +43,14 @@ neural <- function(preset, progress = NULL, seed = 49641) { # However, this will of course lessen the significance of # the results. - mean_position <- round(species_data[, mean(position)]) mean_distance <- round(species_data[, mean(distance)]) + data[is.na(distance), `:=`(distance = mean_distance)] - data[is.na(distance), `:=`( - position = mean_position, - distance = mean_distance - )] + # Name the new column after the species. + setnames(data, "distance", species_id) - input_position <- sprintf("%s_position", species_id) - input_distance <- sprintf("%s_distance", species_id) - - # Name the new columns after the species. - setnames( - data, - c("position", "distance"), - c(input_position, input_distance) - ) - - # Add the input variables to the buffer. - input_vars <- c(input_vars, input_position, input_distance) + # Add the input variable to the buffer. + input_vars <- c(input_vars, species_id) } } diff --git a/R/preset.R b/R/preset.R index cc21615..e3b0edd 100644 --- a/R/preset.R +++ b/R/preset.R @@ -40,9 +40,7 @@ #' @export preset <- function(methods = c( "clusteriness", - "clusteriness_positions", "correlation", - "correlation_positions", "neural", "proximity" ),