From 55958e0d852a4e90d2976da32d374ff946f4e82c Mon Sep 17 00:00:00 2001 From: Elias Projahn Date: Wed, 3 Nov 2021 14:17:39 +0100 Subject: [PATCH] preset: Turn into S3 class --- NAMESPACE | 1 + R/analyze.R | 44 ++++++------------------- R/preset.R | 66 +++++++++++++++++++++++++++++++++++++ man/analyze.Rd | 2 +- man/distances.Rd | 3 +- man/preset.Rd | 17 ++++++---- man/print.geposan_preset.Rd | 14 ++++++++ man/species.Rd | 2 +- 8 files changed, 106 insertions(+), 43 deletions(-) create mode 100644 R/preset.R create mode 100644 man/print.geposan_preset.Rd diff --git a/NAMESPACE b/NAMESPACE index 1d3b3d4..011c4ce 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(print,geposan_preset) export(analyze) export(optimize_weights) export(preset) diff --git a/R/analyze.R b/R/analyze.R index b8930e3..e416050 100644 --- a/R/analyze.R +++ b/R/analyze.R @@ -1,36 +1,6 @@ -#' Create a new preset. -#' -#' A preset is used to specify which methods and inputs should be used for an -#' analysis. Note that the genes to process should normally include the -#' reference genes to be able to assess the results later. -#' -#' Available methods are: -#' -#' - `clusteriness` How much the gene distances cluster across species. -#' - `correlation` The mean correlation with the reference genes. -#' - `proximity` Mean proximity to telomeres. -#' - `neural` Assessment by neural network. -#' -#' @param methods IDs of methods to apply. -#' @param species IDs of species to include. -#' @param genes IDs of genes to screen. -#' @param reference_genes IDs of reference genes to compare to. -#' -#' @return The preset to use with [analyze()]. -#' -#' @export -preset <- function(methods, species, genes, reference_genes) { - list( - method_ids = sort(methods), - species_ids = sort(species), - gene_ids = sort(genes), - reference_gene_ids = sort(reference_genes) - ) -} - #' Analyze by applying the specified preset. #' -#' @param preset The preset to use which can be created using [preset()]. +#' @param preset The preset to use which should be created using [preset()]. #' @param progress A function to be called for progress information. The #' function should accept a number between 0.0 and 1.0 for the current #' progress. @@ -41,6 +11,10 @@ preset <- function(methods, species, genes, reference_genes) { #' #' @export analyze <- function(preset, progress = NULL) { + if (class(preset) != "geposan_preset") { + stop("Preset is invalid. Use geposan::preset() to create one.") + } + # Available methods by ID. # # A method describes a way to perform a computation on gene distance data @@ -64,9 +38,11 @@ analyze <- function(preset, progress = NULL) { method_count <- length(preset$method_ids) results <- data.table(gene = preset$gene_ids) - for (method_id in preset$method_ids) { - method_progress <- if (!is.null(progress)) function(p) { - progress(total_progress + p / method_count) + for (method_id in preset$methods) { + method_progress <- if (!is.null(progress)) { + function(p) { + progress(total_progress + p / method_count) + } } method_results <- methods[[method_id]](preset, method_progress) diff --git a/R/preset.R b/R/preset.R new file mode 100644 index 0000000..527928a --- /dev/null +++ b/R/preset.R @@ -0,0 +1,66 @@ +#' Create a new preset. +#' +#' A preset is used to specify which methods and inputs should be used for an +#' analysis. Note that the genes to process should normally include the +#' reference genes to be able to assess the results later. +#' +#' Available methods are: +#' +#' - `clusteriness` How much the gene distances cluster across species. +#' - `correlation` The mean correlation with the reference genes. +#' - `proximity` Mean proximity to telomeres. +#' - `neural` Assessment by neural network. +#' +#' @param methods Methods to apply. +#' @param species_ids IDs of species to include. +#' @param gene_ids IDs of genes to screen. +#' @param reference_gene_ids IDs of reference genes to compare to. +#' +#' @return The preset to use with [analyze()]. +#' +#' @export +preset <- function(methods = c( + "clusteriness", + "correlation", + "neural", + "proximity" + ), + species_ids = NULL, + gene_ids = NULL, + reference_gene_ids = NULL) { + # The included data gets sorted to be able to produce predictable hashes + # for the object later. + structure( + list( + methods = sort(methods), + species_ids = sort(species_ids), + gene_ids = sort(gene_ids), + reference_gene_ids = sort(reference_gene_ids) + ), + class = "geposan_preset" + ) +} + +#' S3 method to print a preset object. +#' +#' @seealso [preset()] +#' +#' @export +print.geposan_preset <- function(preset, ...) { + cat("geposan preset:") + cat("\n Included methods: ") + cat(preset$method_ids, sep = ", ") + + cat(sprintf( + "\n Input data: %i species, %i genes", + length(preset$species_ids), + length(preset$gene_ids) + )) + + cat(sprintf( + "\n Comparison data: %i reference genes\n", + length(preset$reference_gene_ids) + )) + + invisible(preset) +} diff --git a/man/analyze.Rd b/man/analyze.Rd index c814c50..6d6cf1a 100644 --- a/man/analyze.Rd +++ b/man/analyze.Rd @@ -7,7 +7,7 @@ analyze(preset, progress = NULL) } \arguments{ -\item{preset}{The preset to use which can be created using \code{\link[=preset]{preset()}}.} +\item{preset}{The preset to use which should be created using \code{\link[=preset]{preset()}}.} \item{progress}{A function to be called for progress information. The function should accept a number between 0.0 and 1.0 for the current diff --git a/man/distances.Rd b/man/distances.Rd index 5ca856f..284b4ef 100644 --- a/man/distances.Rd +++ b/man/distances.Rd @@ -5,10 +5,11 @@ \alias{distances} \title{Information on gene positions across species.} \format{ -A \link{data.table} with 1390730 rows and 3 variables: +A \link{data.table} with 1506182 rows and 4 variables: \describe{ \item{species}{Species ID} \item{gene}{Gene ID} +\item{position}{Gene start position} \item{distance}{Distance to nearest telomere} } } diff --git a/man/preset.Rd b/man/preset.Rd index ea8c743..e30b1cd 100644 --- a/man/preset.Rd +++ b/man/preset.Rd @@ -1,19 +1,24 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/analyze.R +% Please edit documentation in R/preset.R \name{preset} \alias{preset} \title{Create a new preset.} \usage{ -preset(methods, species, genes, reference_genes) +preset( + methods = c("clusteriness", "correlation", "neural", "proximity"), + species_ids = NULL, + gene_ids = NULL, + reference_gene_ids = NULL +) } \arguments{ -\item{methods}{IDs of methods to apply.} +\item{methods}{Methods to apply.} -\item{species}{IDs of species to include.} +\item{species_ids}{IDs of species to include.} -\item{genes}{IDs of genes to screen.} +\item{gene_ids}{IDs of genes to screen.} -\item{reference_genes}{IDs of reference genes to compare to.} +\item{reference_gene_ids}{IDs of reference genes to compare to.} } \value{ The preset to use with \code{\link[=analyze]{analyze()}}. diff --git a/man/print.geposan_preset.Rd b/man/print.geposan_preset.Rd new file mode 100644 index 0000000..dce4ed2 --- /dev/null +++ b/man/print.geposan_preset.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preset.R +\name{print.geposan_preset} +\alias{print.geposan_preset} +\title{S3 method to print a preset object.} +\usage{ +\method{print}{geposan_preset}(preset, ...) +} +\description{ +S3 method to print a preset object. +} +\seealso{ +\code{\link[=preset]{preset()}} +} diff --git a/man/species.Rd b/man/species.Rd index 66e5386..8185473 100644 --- a/man/species.Rd +++ b/man/species.Rd @@ -5,7 +5,7 @@ \alias{species} \title{Information on included species from the Ensembl database.} \format{ -A \link{data.table} with 91 rows and 2 variables: +A \link{data.table} with 99 rows and 2 variables: \describe{ \item{id}{Unique species ID} \item{name}{Human readable species name}