mirror of
https://github.com/johrpan/geposan.git
synced 2025-10-26 18:57:25 +01:00
Initial commit
This commit is contained in:
commit
c52d42c2b6
24 changed files with 1350 additions and 0 deletions
72
R/analyze.R
Normal file
72
R/analyze.R
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
#' Create a new preset.
|
||||
#'
|
||||
#' A preset is used to specify which methods and inputs should be used for an
|
||||
#' analysis. Note that the genes to process should normally include the
|
||||
#' reference genes to be able to assess the results later.
|
||||
#'
|
||||
#' Available methods are:
|
||||
#'
|
||||
#' - `clusteriness` How much the gene distances cluster across species.
|
||||
#' - `correlation` The mean correlation with the reference genes.
|
||||
#' - `proximity` Mean proximity to telomeres.
|
||||
#' - `neural` Assessment by neural network.
|
||||
#'
|
||||
#' @param methods IDs of methods to apply.
|
||||
#' @param species IDs of species to include.
|
||||
#' @param genes IDs of genes to screen.
|
||||
#' @param reference_genes IDs of reference genes to compare to.
|
||||
#'
|
||||
#' @return The preset to use with [analyze()].
|
||||
#'
|
||||
#' @export
|
||||
preset <- function(methods, species, genes, reference_genes) {
|
||||
list(
|
||||
method_ids = methods,
|
||||
species_ids = species,
|
||||
gene_ids = genes,
|
||||
reference_gene_ids = reference_genes
|
||||
)
|
||||
}
|
||||
|
||||
#' Analyze by applying the specified preset.
|
||||
#'
|
||||
#' @param preset The preset to use which can be created using [preset()].
|
||||
#'
|
||||
#' @return A [data.table] with one row for each gene identified by it's ID
|
||||
#' (`gene` column). The additional columns contain the resulting scores per
|
||||
#' method and are named after the method IDs.
|
||||
#'
|
||||
#' @export
|
||||
analyze <- function(preset) {
|
||||
# Available methods by ID.
|
||||
#
|
||||
# A method describes a way to perform a computation on gene distance data
|
||||
# that results in a single score per gene. The function should accept the
|
||||
# preset to apply as a single parameter (see [preset()]).
|
||||
#
|
||||
# The function should return a [data.table] with the following columns:
|
||||
#
|
||||
# - `gene` Gene ID of the processed gene.
|
||||
# - `score` Score for the gene between 0.0 and 1.0.
|
||||
methods <- list(
|
||||
"clusteriness" = clusteriness,
|
||||
"correlation" = correlation,
|
||||
"proximity" = proximity,
|
||||
"neural" = neural
|
||||
)
|
||||
|
||||
results <- data.table(gene = genes$id)
|
||||
|
||||
for (method_id in preset$method_ids) {
|
||||
method_results <- methods[[method_id]](distances, preset)
|
||||
setnames(method_results, "score", method_id)
|
||||
|
||||
results <- merge(
|
||||
results,
|
||||
method_results,
|
||||
by = "gene"
|
||||
)
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue