#' Create a new preset. #' #' A preset is used to specify which methods and inputs should be used for an #' analysis. Note that the genes to process should normally include the #' reference genes to be able to assess the results later. The genes will be #' filtered based on how many species have data for them. Genes which only have #' orthologs for less than 25% of the input species will be excluded from the #' preset and the analyis. See the different method functions for the available #' methods: [clustering()], [correlation()], [neural()], [adjacency()] and #' [proximity()]. #' #' @param reference_gene_ids IDs of reference genes to compare to. #' @param methods List of methods to apply. #' @param species_ids IDs of species to include. #' @param gene_ids IDs of genes to screen. #' #' @return The preset to use with [analyze()]. #' #' @export preset <- function(reference_gene_ids, methods = all_methods(), species_ids = geposan::species$id, gene_ids = geposan::genes$id) { # Count included species per gene. genes_n_species <- geposan::distances[ species %chin% species_ids, .(n_species = .N), by = "gene" ] # Filter out genes with less than 25% existing orthologs. gene_ids_filtered <- genes_n_species[ gene %chin% gene_ids & n_species >= 0.25 * length(species_ids), gene ] # The included data gets sorted to be able to produce predictable hashes # for the object later. structure( list( reference_gene_ids = sort(reference_gene_ids), methods = methods, species_ids = sort(species_ids), gene_ids = sort(gene_ids_filtered) ), class = "geposan_preset" ) } #' S3 method to print a preset object. #' #' @param x The preset to print. #' @param ... Other parameters. #' #' @seealso [preset()] #' #' @export print.geposan_preset <- function(x, ...) { cat(sprintf( paste0( "geposan preset:", "\n Reference genes: %i", "\n Included methods: %s", "\n Number of species: %i", "\n Number of genes: %i", "\n" ), length(x$reference_gene_ids), paste(sapply(x$methods, function(m) m$id), collapse = ", "), length(x$species_ids), length(x$gene_ids) )) invisible(x) }