Restructure classes and their responsibilities

2025-10-26 02:37:25 +01:00 · 2021-12-16 13:01:44 +01:00 · 2021-12-16 13:01:44 +01:00 · e2b93babe5
commit e2b93babe5
parent 01ec301d6d
27 changed files with 974 additions and 634 deletions
--- a/man/adjacency.Rd
+++ b/man/adjacency.Rd
@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/adjacency.R
+\name{adjacency}
+\alias{adjacency}
+\title{Score genes based on their proximity to the reference genes.}
+\usage{
+adjacency()
+}
+\value{
+An object of class \code{geposan_method}.
+}
+\description{
+This method finds the distance value with the maximum density for each gene
+(i.e. the mode of its estimated distribution). Genes are scored by comparing
+those distance values with the values of the reference genes.
+}
--- a/man/all_methods.Rd
+++ b/man/all_methods.Rd
@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/method.R
+\name{all_methods}
+\alias{all_methods}
+\title{Get a list of all available methods.}
+\usage{
+all_methods()
+}
+\description{
+Get a list of all available methods.
+}
--- a/man/analyze.Rd
+++ b/man/analyze.Rd
@ -2,7 +2,7 @@
 % Please edit documentation in R/analyze.R
 \name{analyze}
 \alias{analyze}
-\title{Analyze by applying the specified preset.}
+\title{Analyze genes based on position data.}
 \usage{
 analyze(preset, progress = NULL)
 }
@ -11,17 +11,18 @@ analyze(preset, progress = NULL)

 \item{progress}{A function to be called for progress information. The
 function should accept a number between 0.0 and 1.0 for the current
-progress.}
+progress. If no function is provided, a simple text progress bar will be
+shown.}
 }
 \value{
 An object containing the results of the analysis with the following
 items:
 \describe{
 \item{\code{preset}}{The preset that was used.}
-\item{\code{weights}}{The optimal weights for ranking the reference genes.}
-\item{\code{ranking}}{The optimal ranking created using the weights.}
+\item{\code{scores}}{Table containing all scores for each gene.}
+\item{\code{results}}{Results from the different methods including details.}
 }
 }
 \description{
-Analyze by applying the specified preset.
+Analyze genes based on position data.
 }
--- a/man/clusteriness.Rd
+++ b/man/clusteriness.Rd
@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/clustering.R
+\name{clusteriness}
+\alias{clusteriness}
+\title{Perform a cluster analysis.}
+\usage{
+clusteriness(data, span = 1e+06, weight = 0.7)
+}
+\arguments{
+\item{data}{The values that should be scored.}
+
+\item{span}{The maximum span of values considered to be in one cluster.}
+
+\item{weight}{The weight that will be given to the next largest cluster in
+relation to the previous one. For example, if \code{weight} is 0.7 (the
+default), the first cluster will weigh 1.0, the second 0.7, the third 0.49
+etc.}
+}
+\description{
+This function will cluster the data using \code{\link[stats:hclust]{stats::hclust()}} and
+\code{\link[stats:cutree]{stats::cutree()}}. Every cluster with at least two members qualifies for
+further analysis. Clusters are then ranked based on their size in relation
+to the total number of values. The return value is a final score between
+0.0 and 1.0. Lower ranking clusters contribute less to this score.
+}
--- a/man/clustering.Rd
+++ b/man/clustering.Rd
@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/clustering.R
+\name{clustering}
+\alias{clustering}
+\title{Process genes clustering their distance to telomeres.}
+\usage{
+clustering()
+}
+\value{
+An object of class \code{geposan_method}.
+}
+\description{
+The result will be cached and can be reused for different presets, because
+it is independent of the reference genes in use.
+}
+\seealso{
+\code{\link[=clusteriness]{clusteriness()}}
+}
--- a/man/correlation.Rd
+++ b/man/correlation.Rd
@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/correlation.R
+\name{correlation}
+\alias{correlation}
+\title{Compute the mean correlation coefficient comparing gene distances with a set
+of reference genes.}
+\usage{
+correlation()
+}
+\value{
+An object of class \code{geposan_method}.
+}
+\description{
+Compute the mean correlation coefficient comparing gene distances with a set
+of reference genes.
+}
--- a/man/method.Rd
+++ b/man/method.Rd
@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/method.R
+\name{method}
+\alias{method}
+\title{Describe a new method for analyzing gene position data.}
+\usage{
+method(id, name, description, func)
+}
+\arguments{
+\item{id}{Unique identifier for the method.}
+
+\item{name}{Human readable name.}
+
+\item{description}{Slightly longer description.}
+
+\item{func}{Function to apply the method. The function should accept two
+parameters: an object of class \code{geposan_preset} as input and a function to
+report progress information to as a numeric value. The return value should
+be an object of class \code{geposan_result}.}
+}
+\value{
+An object of class \code{geposan_method}.
+}
+\description{
+Describe a new method for analyzing gene position data.
+}
--- a/man/neural.Rd
+++ b/man/neural.Rd
@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/neural.R
+\name{neural}
+\alias{neural}
+\title{Find genes by training and applying a neural network.}
+\usage{
+neural(seed = 180199, n_models = 5)
+}
+\arguments{
+\item{seed}{The seed will be used to make the results reproducible.}
+
+\item{n_models}{This number specifies how many sets of training data should
+be created. For each set, there will be a model trained on the remaining
+training data and validated using this set. For non-training genes, the
+final score will be the mean of the result of applying the different
+models. There should be at least two training sets. The analysis will only
+work, if there is at least one reference gene per training set.}
+}
+\value{
+An object of class \code{geposan_method}.
+}
+\description{
+Find genes by training and applying a neural network.
+}
--- a/man/preset.Rd
+++ b/man/preset.Rd
@ -5,24 +5,20 @@
 \title{Create a new preset.}
 \usage{
 preset(
-  methods = c("clusteriness", "correlation", "neural", "adjacency", "proximity"),
-  species_ids = NULL,
-  gene_ids = NULL,
-  reference_gene_ids = NULL,
-  optimization_target = "mean_rank"
+  methods = all_methods(),
+  species_ids = geposan::species$id,
+  gene_ids = geposan::genes$id,
+  reference_gene_ids
 )
 }
 \arguments{
-\item{methods}{Methods to apply.}
+\item{methods}{List of methods to apply.}

 \item{species_ids}{IDs of species to include.}

 \item{gene_ids}{IDs of genes to screen.}

 \item{reference_gene_ids}{IDs of reference genes to compare to.}
-
-\item{optimization_target}{Parameter of the reference genes that the ranking
-should be optimized for.}
 }
 \value{
 The preset to use with \code{\link[=analyze]{analyze()}}.
@ -33,25 +29,7 @@ analysis. Note that the genes to process should normally include the
 reference genes to be able to assess the results later. The genes will be
 filtered based on how many species have data for them. Genes which only have
 orthologs for less than 25\% of the input species will be excluded from the
-preset and the analyis.
-}
-\details{
-Available methods are:
-\itemize{
-\item \code{clusteriness} How much the gene distances to the nearest telomere
-cluster across species.
-\item \code{correlation} The mean correlation of gene distances to the nearest
-telomere across species.
-\item \code{neural} Assessment by neural network trained on the reference genes.
-\item \code{adjacency} Proximity to reference genes.
-\item \code{proximity} Mean proximity to telomeres.
-}
-
-Available optimization targets are:
-\itemize{
-\item \code{mean} Mean rank of the reference genes.
-\item \code{median} Median rank of the reference genes.
-\item \code{max} First rank of the reference genes.
-\item \code{min} Last rank of the reference genes.
-}
+preset and the analyis. See the different method functions for the available
+methods: \code{\link[=clustering]{clustering()}}, \code{\link[=correlation]{correlation()}}, \code{\link[=neural]{neural()}}, \code{\link[=adjacency]{adjacency()}} and
+\code{\link[=proximity]{proximity()}}.
 }
--- a/man/print.geposan_analysis.Rd
+++ b/man/print.geposan_analysis.Rd
@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/analyze.R
+\name{print.geposan_analysis}
+\alias{print.geposan_analysis}
+\title{Print an analysis object.}
+\usage{
+\method{print}{geposan_analysis}(x, ...)
+}
+\arguments{
+\item{x}{The analysis to print.}
+
+\item{...}{Other parameters.}
+}
+\description{
+Print an analysis object.
+}
+\seealso{
+\code{\link[=analyze]{analyze()}}
+}
--- a/man/print.geposan_method.Rd
+++ b/man/print.geposan_method.Rd
@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/method.R
+\name{print.geposan_method}
+\alias{print.geposan_method}
+\title{Print a method object.}
+\usage{
+\method{print}{geposan_method}(x, ...)
+}
+\arguments{
+\item{x}{The method to print.}
+
+\item{...}{Other parameters.}
+}
+\description{
+Print a method object.
+}
+\seealso{
+\code{\link[=method]{method()}}
+}
--- a/man/print.geposan_result.Rd
+++ b/man/print.geposan_result.Rd
@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/result.R
+\name{print.geposan_result}
+\alias{print.geposan_result}
+\title{Print a result object.}
+\usage{
+\method{print}{geposan_result}(x, ...)
+}
+\arguments{
+\item{x}{The result to print.}
+
+\item{...}{Other parameters.}
+}
+\description{
+Print a result object.
+}
+\seealso{
+\code{\link[=result]{result()}}
+}
--- a/man/proximity.Rd
+++ b/man/proximity.Rd
@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/proximity.R
+\name{proximity}
+\alias{proximity}
+\title{Score the mean distance of genes to the telomeres across species.}
+\usage{
+proximity()
+}
+\value{
+An object of class \code{geposan_method}.
+}
+\description{
+A score will be given to each gene such that 0.0 corresponds to the maximal
+mean distance across all genes and 1.0 corresponds to a distance of 0.
+}
--- a/man/result.Rd
+++ b/man/result.Rd
@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/result.R
+\name{result}
+\alias{result}
+\title{Result of applying a method on gene position data.}
+\usage{
+result(method_id, scores, details = list())
+}
+\arguments{
+\item{method_id}{ID of the method that produced this result.}
+
+\item{scores}{A \code{data.frame} mapping gene IDs (\code{gene}) to computed scores
+between 0.0 and 1.0 (\code{score}).}
+
+\item{details}{Optional details that may contain intermediate results as
+well as other information on the method application.}
+}
+\value{
+An object of class \code{geposan_result}.
+}
+\description{
+Result of applying a method on gene position data.
+}