From 7b9a42215e1c32a7ee0acd755c5734412a8df731 Mon Sep 17 00:00:00 2001 From: Elias Projahn Date: Fri, 15 Oct 2021 09:27:35 +0200 Subject: [PATCH] Add new method proximity --- methods.R | 7 +++++++ proximity.R | 29 +++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 proximity.R diff --git a/methods.R b/methods.R index 8d91478..a15d255 100644 --- a/methods.R +++ b/methods.R @@ -1,6 +1,7 @@ source("clusteriness.R") source("correlation.R") source("neural.R") +source("proximity.R") #' Construct a new method. #' @@ -47,6 +48,12 @@ methods <- list( "Correlation with known genes", process_correlation ), + method( + "proximity", + "Proximity", + "Proximity to telomeres", + process_proximity + ), method( "neural", "Neural", diff --git a/proximity.R b/proximity.R new file mode 100644 index 0000000..7da4363 --- /dev/null +++ b/proximity.R @@ -0,0 +1,29 @@ +library(data.table) + +#' Score the mean distance of genes to the telomeres across species. +#' +#' A score will be given to each gene such that 0.0 corresponds to the maximal +#' mean distance across all genes and 1.0 corresponds to a distance of 0. +#' +#' The result will be a data.table with the following columns: +#' +#' - `gene` Gene ID of the processed gene. +#' - `score` Score for the proximity. +#' +#' @param distances Distance data to use. +#' @param species_ids Species, whose data should be included. +#' @param gene_ids Genes to process. +process_proximity <- function(distances, species_ids, gene_ids, ...) { + species_count <- length(species_ids) + + # Prefilter distances by species. + distances <- distances[species %chin% species_ids] + + # Compute the score as described above. + + distances <- distances[, .(mean_distance = mean(distance)), by = "gene"] + max_distance <- distances[, max(mean_distance)] + distances[, score := 1 - mean_distance / max_distance] + + distances[, .(gene, score)] +}