mirror of
https://github.com/johrpan/geposanui.git
synced 2025-10-26 11:17:24 +01:00
Add new method proximity
This commit is contained in:
parent
9b0b3c13f5
commit
7b9a42215e
2 changed files with 36 additions and 0 deletions
|
|
@ -1,6 +1,7 @@
|
||||||
source("clusteriness.R")
|
source("clusteriness.R")
|
||||||
source("correlation.R")
|
source("correlation.R")
|
||||||
source("neural.R")
|
source("neural.R")
|
||||||
|
source("proximity.R")
|
||||||
|
|
||||||
#' Construct a new method.
|
#' Construct a new method.
|
||||||
#'
|
#'
|
||||||
|
|
@ -47,6 +48,12 @@ methods <- list(
|
||||||
"Correlation with known genes",
|
"Correlation with known genes",
|
||||||
process_correlation
|
process_correlation
|
||||||
),
|
),
|
||||||
|
method(
|
||||||
|
"proximity",
|
||||||
|
"Proximity",
|
||||||
|
"Proximity to telomeres",
|
||||||
|
process_proximity
|
||||||
|
),
|
||||||
method(
|
method(
|
||||||
"neural",
|
"neural",
|
||||||
"Neural",
|
"Neural",
|
||||||
|
|
|
||||||
29
proximity.R
Normal file
29
proximity.R
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
library(data.table)
|
||||||
|
|
||||||
|
#' Score the mean distance of genes to the telomeres across species.
|
||||||
|
#'
|
||||||
|
#' A score will be given to each gene such that 0.0 corresponds to the maximal
|
||||||
|
#' mean distance across all genes and 1.0 corresponds to a distance of 0.
|
||||||
|
#'
|
||||||
|
#' The result will be a data.table with the following columns:
|
||||||
|
#'
|
||||||
|
#' - `gene` Gene ID of the processed gene.
|
||||||
|
#' - `score` Score for the proximity.
|
||||||
|
#'
|
||||||
|
#' @param distances Distance data to use.
|
||||||
|
#' @param species_ids Species, whose data should be included.
|
||||||
|
#' @param gene_ids Genes to process.
|
||||||
|
process_proximity <- function(distances, species_ids, gene_ids, ...) {
|
||||||
|
species_count <- length(species_ids)
|
||||||
|
|
||||||
|
# Prefilter distances by species.
|
||||||
|
distances <- distances[species %chin% species_ids]
|
||||||
|
|
||||||
|
# Compute the score as described above.
|
||||||
|
|
||||||
|
distances <- distances[, .(mean_distance = mean(distance)), by = "gene"]
|
||||||
|
max_distance <- distances[, max(mean_distance)]
|
||||||
|
distances[, score := 1 - mean_distance / max_distance]
|
||||||
|
|
||||||
|
distances[, .(gene, score)]
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue