preset: Turn into S3 class

This commit is contained in:
Elias Projahn 2021-11-03 14:17:39 +01:00
parent 6494ae8200
commit 55958e0d85
8 changed files with 106 additions and 43 deletions

View file

@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand # Generated by roxygen2: do not edit by hand
S3method(print,geposan_preset)
export(analyze) export(analyze)
export(optimize_weights) export(optimize_weights)
export(preset) export(preset)

View file

@ -1,36 +1,6 @@
#' Create a new preset.
#'
#' A preset is used to specify which methods and inputs should be used for an
#' analysis. Note that the genes to process should normally include the
#' reference genes to be able to assess the results later.
#'
#' Available methods are:
#'
#' - `clusteriness` How much the gene distances cluster across species.
#' - `correlation` The mean correlation with the reference genes.
#' - `proximity` Mean proximity to telomeres.
#' - `neural` Assessment by neural network.
#'
#' @param methods IDs of methods to apply.
#' @param species IDs of species to include.
#' @param genes IDs of genes to screen.
#' @param reference_genes IDs of reference genes to compare to.
#'
#' @return The preset to use with [analyze()].
#'
#' @export
preset <- function(methods, species, genes, reference_genes) {
list(
method_ids = sort(methods),
species_ids = sort(species),
gene_ids = sort(genes),
reference_gene_ids = sort(reference_genes)
)
}
#' Analyze by applying the specified preset. #' Analyze by applying the specified preset.
#' #'
#' @param preset The preset to use which can be created using [preset()]. #' @param preset The preset to use which should be created using [preset()].
#' @param progress A function to be called for progress information. The #' @param progress A function to be called for progress information. The
#' function should accept a number between 0.0 and 1.0 for the current #' function should accept a number between 0.0 and 1.0 for the current
#' progress. #' progress.
@ -41,6 +11,10 @@ preset <- function(methods, species, genes, reference_genes) {
#' #'
#' @export #' @export
analyze <- function(preset, progress = NULL) { analyze <- function(preset, progress = NULL) {
if (class(preset) != "geposan_preset") {
stop("Preset is invalid. Use geposan::preset() to create one.")
}
# Available methods by ID. # Available methods by ID.
# #
# A method describes a way to perform a computation on gene distance data # A method describes a way to perform a computation on gene distance data
@ -64,10 +38,12 @@ analyze <- function(preset, progress = NULL) {
method_count <- length(preset$method_ids) method_count <- length(preset$method_ids)
results <- data.table(gene = preset$gene_ids) results <- data.table(gene = preset$gene_ids)
for (method_id in preset$method_ids) { for (method_id in preset$methods) {
method_progress <- if (!is.null(progress)) function(p) { method_progress <- if (!is.null(progress)) {
function(p) {
progress(total_progress + p / method_count) progress(total_progress + p / method_count)
} }
}
method_results <- methods[[method_id]](preset, method_progress) method_results <- methods[[method_id]](preset, method_progress)
setnames(method_results, "score", method_id) setnames(method_results, "score", method_id)

66
R/preset.R Normal file
View file

@ -0,0 +1,66 @@
#' Create a new preset.
#'
#' A preset is used to specify which methods and inputs should be used for an
#' analysis. Note that the genes to process should normally include the
#' reference genes to be able to assess the results later.
#'
#' Available methods are:
#'
#' - `clusteriness` How much the gene distances cluster across species.
#' - `correlation` The mean correlation with the reference genes.
#' - `proximity` Mean proximity to telomeres.
#' - `neural` Assessment by neural network.
#'
#' @param methods Methods to apply.
#' @param species_ids IDs of species to include.
#' @param gene_ids IDs of genes to screen.
#' @param reference_gene_ids IDs of reference genes to compare to.
#'
#' @return The preset to use with [analyze()].
#'
#' @export
preset <- function(methods = c(
"clusteriness",
"correlation",
"neural",
"proximity"
),
species_ids = NULL,
gene_ids = NULL,
reference_gene_ids = NULL) {
# The included data gets sorted to be able to produce predictable hashes
# for the object later.
structure(
list(
methods = sort(methods),
species_ids = sort(species_ids),
gene_ids = sort(gene_ids),
reference_gene_ids = sort(reference_gene_ids)
),
class = "geposan_preset"
)
}
#' S3 method to print a preset object.
#'
#' @seealso [preset()]
#'
#' @export
print.geposan_preset <- function(preset, ...) {
cat("geposan preset:")
cat("\n Included methods: ")
cat(preset$method_ids, sep = ", ")
cat(sprintf(
"\n Input data: %i species, %i genes",
length(preset$species_ids),
length(preset$gene_ids)
))
cat(sprintf(
"\n Comparison data: %i reference genes\n",
length(preset$reference_gene_ids)
))
invisible(preset)
}

View file

@ -7,7 +7,7 @@
analyze(preset, progress = NULL) analyze(preset, progress = NULL)
} }
\arguments{ \arguments{
\item{preset}{The preset to use which can be created using \code{\link[=preset]{preset()}}.} \item{preset}{The preset to use which should be created using \code{\link[=preset]{preset()}}.}
\item{progress}{A function to be called for progress information. The \item{progress}{A function to be called for progress information. The
function should accept a number between 0.0 and 1.0 for the current function should accept a number between 0.0 and 1.0 for the current

View file

@ -5,10 +5,11 @@
\alias{distances} \alias{distances}
\title{Information on gene positions across species.} \title{Information on gene positions across species.}
\format{ \format{
A \link{data.table} with 1390730 rows and 3 variables: A \link{data.table} with 1506182 rows and 4 variables:
\describe{ \describe{
\item{species}{Species ID} \item{species}{Species ID}
\item{gene}{Gene ID} \item{gene}{Gene ID}
\item{position}{Gene start position}
\item{distance}{Distance to nearest telomere} \item{distance}{Distance to nearest telomere}
} }
} }

View file

@ -1,19 +1,24 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/analyze.R % Please edit documentation in R/preset.R
\name{preset} \name{preset}
\alias{preset} \alias{preset}
\title{Create a new preset.} \title{Create a new preset.}
\usage{ \usage{
preset(methods, species, genes, reference_genes) preset(
methods = c("clusteriness", "correlation", "neural", "proximity"),
species_ids = NULL,
gene_ids = NULL,
reference_gene_ids = NULL
)
} }
\arguments{ \arguments{
\item{methods}{IDs of methods to apply.} \item{methods}{Methods to apply.}
\item{species}{IDs of species to include.} \item{species_ids}{IDs of species to include.}
\item{genes}{IDs of genes to screen.} \item{gene_ids}{IDs of genes to screen.}
\item{reference_genes}{IDs of reference genes to compare to.} \item{reference_gene_ids}{IDs of reference genes to compare to.}
} }
\value{ \value{
The preset to use with \code{\link[=analyze]{analyze()}}. The preset to use with \code{\link[=analyze]{analyze()}}.

View file

@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/preset.R
\name{print.geposan_preset}
\alias{print.geposan_preset}
\title{S3 method to print a preset object.}
\usage{
\method{print}{geposan_preset}(preset, ...)
}
\description{
S3 method to print a preset object.
}
\seealso{
\code{\link[=preset]{preset()}}
}

View file

@ -5,7 +5,7 @@
\alias{species} \alias{species}
\title{Information on included species from the Ensembl database.} \title{Information on included species from the Ensembl database.}
\format{ \format{
A \link{data.table} with 91 rows and 2 variables: A \link{data.table} with 99 rows and 2 variables:
\describe{ \describe{
\item{id}{Unique species ID} \item{id}{Unique species ID}
\item{name}{Human readable species name} \item{name}{Human readable species name}