preset: Filter species in addition to genes

This commit is contained in:
Elias Projahn 2022-05-30 13:49:52 +02:00
parent 9e96c54f23
commit 3217c9bd29
4 changed files with 49 additions and 48 deletions

View file

@ -4,12 +4,7 @@
\alias{neural}
\title{Find genes by training and applying a neural network.}
\usage{
neural(
seed = 180199,
n_models = 5,
gene_requirement = 0.5,
control_ratio = 0.5
)
neural(seed = 180199, n_models = 5, control_ratio = 0.5)
}
\arguments{
\item{seed}{The seed will be used to make the results reproducible.}
@ -21,9 +16,6 @@ final score will be the mean of the result of applying the different
models. There should be at least two training sets. The analysis will only
work, if there is at least one reference gene per training set.}
\item{gene_requirement}{Minimum proportion of genes from the preset that a
species has to have in order to be included in the models.}
\item{control_ratio}{The proportion of random control genes that is included
in the training data sets in addition to the reference genes. This should
be a numeric value between 0.0 and 1.0.}

View file

@ -8,7 +8,9 @@ preset(
reference_gene_ids,
methods = all_methods(),
species_ids = geposan::species$id,
gene_ids = geposan::genes$id
gene_ids = geposan::genes$id,
species_requirement = 0.25,
gene_requirement = 0.5
)
}
\arguments{
@ -19,6 +21,12 @@ preset(
\item{species_ids}{IDs of species to include.}
\item{gene_ids}{IDs of genes to screen.}
\item{species_requirement}{The proportion of species a gene has to have
orthologs in in order for the gene to qualify.}
\item{gene_requirement}{The proportion of genes that a species has to have
in order for the species to be included in the analysis.}
}
\value{
The preset to use with \code{\link[=analyze]{analyze()}}.
@ -27,9 +35,8 @@ The preset to use with \code{\link[=analyze]{analyze()}}.
A preset is used to specify which methods and inputs should be used for an
analysis. Note that the genes to process should normally include the
reference genes to be able to assess the results later. The genes will be
filtered based on how many species have data for them. Genes which only have
orthologs for less than 25\% of the input species will be excluded from the
preset and the analyis. See the different method functions for the available
methods: \code{\link[=clustering]{clustering()}}, \code{\link[=correlation]{correlation()}}, \code{\link[=neural]{neural()}}, \code{\link[=adjacency]{adjacency()}} and
\code{\link[=species_adjacency]{species_adjacency()}}.
filtered based on how many species have data for them. Afterwards, species
that still have many missing genes will also be excluded. See the different
method functions for the available methods: \code{\link[=clustering]{clustering()}}, \code{\link[=correlation]{correlation()}},
\code{\link[=neural]{neural()}}, \code{\link[=adjacency]{adjacency()}} and \code{\link[=species_adjacency]{species_adjacency()}}.
}