mirror of
				https://github.com/johrpan/geposan.git
				synced 2025-10-26 18:57:25 +01:00 
			
		
		
		
	Handle caching
This commit is contained in:
		
							parent
							
								
									b8365e0efb
								
							
						
					
					
						commit
						df6e23d219
					
				
					 7 changed files with 247 additions and 191 deletions
				
			
		|  | @ -22,7 +22,8 @@ Depends: | ||||||
|     R (>= 2.10) |     R (>= 2.10) | ||||||
| Imports: | Imports: | ||||||
|     data.table, |     data.table, | ||||||
|     neuralnet |     neuralnet, | ||||||
|  |     rlang | ||||||
| Suggests: | Suggests: | ||||||
|     biomaRt, |     biomaRt, | ||||||
|     rlog, |     rlog, | ||||||
|  |  | ||||||
							
								
								
									
										46
									
								
								R/analyze.R
									
										
									
									
									
								
							
							
						
						
									
										46
									
								
								R/analyze.R
									
										
									
									
									
								
							|  | @ -59,30 +59,32 @@ analyze <- function(preset, progress = NULL) { | ||||||
|         "neural" = neural |         "neural" = neural | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
|     total_progress <- 0.0 |     cached("results", preset, { | ||||||
|     method_count <- length(preset$method_ids) |         total_progress <- 0.0 | ||||||
|     results <- data.table(gene = preset$gene_ids) |         method_count <- length(preset$method_ids) | ||||||
|  |         results <- data.table(gene = preset$gene_ids) | ||||||
| 
 | 
 | ||||||
|     for (method_id in preset$method_ids) { |         for (method_id in preset$method_ids) { | ||||||
|         method_progress <- if (!is.null(progress)) function(p) { |             method_progress <- if (!is.null(progress)) function(p) { | ||||||
|             progress(total_progress + p / method_count) |                 progress(total_progress + p / method_count) | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             method_results <- methods[[method_id]](preset, method_progress) | ||||||
|  |             setnames(method_results, "score", method_id) | ||||||
|  | 
 | ||||||
|  |             results <- merge( | ||||||
|  |                 results, | ||||||
|  |                 method_results, | ||||||
|  |                 by = "gene" | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |             total_progress <- total_progress + 1 / method_count | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         method_results <- methods[[method_id]](preset, method_progress) |         if (!is.null(progress)) { | ||||||
|         setnames(method_results, "score", method_id) |             progress(1.0) | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|         results <- merge( |         results | ||||||
|             results, |     }) | ||||||
|             method_results, |  | ||||||
|             by = "gene" |  | ||||||
|         ) |  | ||||||
| 
 |  | ||||||
|         total_progress <- total_progress + 1 / method_count |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (!is.null(progress)) { |  | ||||||
|         progress(1.0) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     results |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -37,28 +37,33 @@ clusteriness_priv <- function(data, height = 1000000) { | ||||||
| 
 | 
 | ||||||
| # Process genes clustering their distance to telomeres. | # Process genes clustering their distance to telomeres. | ||||||
| clusteriness <- function(preset, progress = NULL) { | clusteriness <- function(preset, progress = NULL) { | ||||||
|     results <- data.table(gene = preset$gene_ids) |     species_ids <- preset$species_ids | ||||||
|  |     gene_ids <- preset$gene_ids | ||||||
| 
 | 
 | ||||||
|     # Prefilter the input data by species. |     cached("clusteriness", c(species_ids, gene_ids), { | ||||||
|     distances <- geposan::distances[species %chin% preset$species_ids] |         results <- data.table(gene = gene_ids) | ||||||
| 
 | 
 | ||||||
|     # Add an index for quickly accessing data per gene. |         # Prefilter the input data by species. | ||||||
|     setkey(distances, gene) |         distances <- geposan::distances[species %chin% species_ids] | ||||||
| 
 | 
 | ||||||
|     genes_done <- 0 |         # Add an index for quickly accessing data per gene. | ||||||
|     genes_total <- length(preset$gene_ids) |         setkey(distances, gene) | ||||||
| 
 | 
 | ||||||
|     # Perform the cluster analysis for one gene. |         genes_done <- 0 | ||||||
|     compute <- function(gene_id) { |         genes_total <- length(gene_ids) | ||||||
|         score <- clusteriness_priv(distances[gene_id, distance]) |  | ||||||
| 
 | 
 | ||||||
|         if (!is.null(progress)) { |         # Perform the cluster analysis for one gene. | ||||||
|             genes_done <<- genes_done + 1 |         compute <- function(gene_id) { | ||||||
|             progress(genes_done / genes_total) |             score <- clusteriness_priv(distances[gene_id, distance]) | ||||||
|  | 
 | ||||||
|  |             if (!is.null(progress)) { | ||||||
|  |                 genes_done <<- genes_done + 1 | ||||||
|  |                 progress(genes_done / genes_total) | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             score | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         score |         results[, score := compute(gene), by = 1:nrow(results)] | ||||||
|     } |     }) | ||||||
| 
 |  | ||||||
|     results[, score := compute(gene), by = 1:nrow(results)] |  | ||||||
| } | } | ||||||
|  |  | ||||||
							
								
								
									
										120
									
								
								R/correlation.R
									
										
									
									
									
								
							
							
						
						
									
										120
									
								
								R/correlation.R
									
										
									
									
									
								
							|  | @ -5,69 +5,75 @@ correlation <- function(preset, progress = NULL) { | ||||||
|     gene_ids <- preset$gene_ids |     gene_ids <- preset$gene_ids | ||||||
|     reference_gene_ids <- preset$reference_gene_ids |     reference_gene_ids <- preset$reference_gene_ids | ||||||
| 
 | 
 | ||||||
|     # Prefilter distances by species. |     cached("correlation", c(species_ids, gene_ids, reference_gene_ids), { | ||||||
|     distances <- geposan::distances[species %chin% species_ids] |         # Prefilter distances by species. | ||||||
|  |         distances <- geposan::distances[species %chin% species_ids] | ||||||
| 
 | 
 | ||||||
|     # Tranform data to get species as rows and genes as columns. We construct |         # Tranform data to get species as rows and genes as columns. We | ||||||
|     # columns per species, because it requires fewer iterations, and transpose |         # construct columns per species, because it requires fewer iterations, | ||||||
|     # the table afterwards. |         # and transpose the table afterwards. | ||||||
| 
 | 
 | ||||||
|     data <- data.table(gene = gene_ids) |         data <- data.table(gene = gene_ids) | ||||||
| 
 | 
 | ||||||
|     # Make a column containing distance data for each species. |         # Make a column containing distance data for each species. | ||||||
|     for (species_id in species_ids) { |         for (species_id in species_ids) { | ||||||
|         species_distances <- distances[species == species_id, .(gene, distance)] |             species_distances <- distances[ | ||||||
|         data <- merge(data, species_distances, all.x = TRUE) |                 species == species_id, | ||||||
|         setnames(data, "distance", species_id) |                 .(gene, distance) | ||||||
|     } |             ] | ||||||
| 
 | 
 | ||||||
|     # Transpose to the desired format. |             data <- merge(data, species_distances, all.x = TRUE) | ||||||
|     data <- transpose(data, make.names = "gene") |             setnames(data, "distance", species_id) | ||||||
| 
 |  | ||||||
|     if (!is.null(progress)) progress(0.33) |  | ||||||
| 
 |  | ||||||
|     # Take the reference data. |  | ||||||
|     reference_data <- data[, ..reference_gene_ids] |  | ||||||
| 
 |  | ||||||
|     # Perform the correlation between all possible pairs. |  | ||||||
|     results <- stats::cor( |  | ||||||
|         data[, ..gene_ids], |  | ||||||
|         reference_data, |  | ||||||
|         use = "pairwise.complete.obs", |  | ||||||
|         method = "spearman" |  | ||||||
|     ) |  | ||||||
| 
 |  | ||||||
|     results <- data.table(results, keep.rownames = TRUE) |  | ||||||
|     setnames(results, "rn", "gene") |  | ||||||
| 
 |  | ||||||
|     # Remove correlations between the reference genes themselves. |  | ||||||
|     for (reference_gene_id in reference_gene_ids) { |  | ||||||
|         column <- quote(reference_gene_id) |  | ||||||
|         results[gene == reference_gene_id, eval(column) := NA] |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (!is.null(progress)) progress(0.66) |  | ||||||
| 
 |  | ||||||
|     # Compute the final score as the mean of known correlation scores. Negative |  | ||||||
|     # correlations will correctly lessen the score, which will be clamped to |  | ||||||
|     # zero as its lower bound. Genes with no possible correlations at all will |  | ||||||
|     # be assumed to have a score of 0.0. |  | ||||||
| 
 |  | ||||||
|     compute_score <- function(scores) { |  | ||||||
|         score <- mean(scores, na.rm = TRUE) |  | ||||||
| 
 |  | ||||||
|         if (is.na(score) | score < 0.0) { |  | ||||||
|             score <- 0.0 |  | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         score |         # Transpose to the desired format. | ||||||
|     } |         data <- transpose(data, make.names = "gene") | ||||||
| 
 | 
 | ||||||
|     results[, |         if (!is.null(progress)) progress(0.33) | ||||||
|         score := compute_score(as.matrix(.SD)), |  | ||||||
|         .SDcols = reference_gene_ids, |  | ||||||
|         by = gene |  | ||||||
|     ] |  | ||||||
| 
 | 
 | ||||||
|     results[, .(gene, score)] |         # Take the reference data. | ||||||
|  |         reference_data <- data[, ..reference_gene_ids] | ||||||
|  | 
 | ||||||
|  |         # Perform the correlation between all possible pairs. | ||||||
|  |         results <- stats::cor( | ||||||
|  |             data[, ..gene_ids], | ||||||
|  |             reference_data, | ||||||
|  |             use = "pairwise.complete.obs", | ||||||
|  |             method = "spearman" | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         results <- data.table(results, keep.rownames = TRUE) | ||||||
|  |         setnames(results, "rn", "gene") | ||||||
|  | 
 | ||||||
|  |         # Remove correlations between the reference genes themselves. | ||||||
|  |         for (reference_gene_id in reference_gene_ids) { | ||||||
|  |             column <- quote(reference_gene_id) | ||||||
|  |             results[gene == reference_gene_id, eval(column) := NA] | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (!is.null(progress)) progress(0.66) | ||||||
|  | 
 | ||||||
|  |         # Compute the final score as the mean of known correlation scores. | ||||||
|  |         # Negative correlations will correctly lessen the score, which will be | ||||||
|  |         # clamped to zero as its lower bound. Genes with no possible | ||||||
|  |         # correlations at all will be assumed to have a score of 0.0. | ||||||
|  | 
 | ||||||
|  |         compute_score <- function(scores) { | ||||||
|  |             score <- mean(scores, na.rm = TRUE) | ||||||
|  | 
 | ||||||
|  |             if (is.na(score) | score < 0.0) { | ||||||
|  |                 score <- 0.0 | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             score | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         results[, | ||||||
|  |             score := compute_score(as.matrix(.SD)), | ||||||
|  |             .SDcols = reference_gene_ids, | ||||||
|  |             by = gene | ||||||
|  |         ] | ||||||
|  | 
 | ||||||
|  |         results[, .(gene, score)] | ||||||
|  |     }) | ||||||
| } | } | ||||||
|  |  | ||||||
							
								
								
									
										164
									
								
								R/neural.R
									
										
									
									
									
								
							
							
						
						
									
										164
									
								
								R/neural.R
									
										
									
									
									
								
							|  | @ -3,106 +3,112 @@ | ||||||
| # @param seed A seed to get reproducible results. | # @param seed A seed to get reproducible results. | ||||||
| neural <- function(preset, progress = NULL, seed = 448077) { | neural <- function(preset, progress = NULL, seed = 448077) { | ||||||
|     species_ids <- preset$species_ids |     species_ids <- preset$species_ids | ||||||
|  |     gene_ids <- preset$gene_ids | ||||||
|     reference_gene_ids <- preset$reference_gene_ids |     reference_gene_ids <- preset$reference_gene_ids | ||||||
| 
 | 
 | ||||||
|     set.seed(seed) |     cached("neural", c(species_ids, gene_ids, reference_gene_ids), { | ||||||
|     gene_count <- length(preset$gene_ids) |         set.seed(seed) | ||||||
|  |         gene_count <- length(gene_ids) | ||||||
| 
 | 
 | ||||||
|     # Prefilter distances by species. |         # Prefilter distances by species. | ||||||
|     distances <- geposan::distances[species %chin% species_ids] |         distances <- geposan::distances[species %chin% species_ids] | ||||||
| 
 | 
 | ||||||
|     # Input data for the network. This contains the gene ID as an identifier |         # Input data for the network. This contains the gene ID as an identifier | ||||||
|     # as well as the per-species gene distances as input variables. |         # as well as the per-species gene distances as input variables. | ||||||
|     data <- data.table(gene = preset$gene_ids) |         data <- data.table(gene = gene_ids) | ||||||
| 
 | 
 | ||||||
|     # Buffer to keep track of species included in the computation. Species |         # Buffer to keep track of species included in the computation. Species | ||||||
|     # from `species_ids` may be excluded if they don't have enough data. |         # from `species_ids` may be excluded if they don't have enough data. | ||||||
|     species_ids_included <- NULL |         species_ids_included <- NULL | ||||||
| 
 | 
 | ||||||
|     # Make a column containing distance data for each species. |         # Make a column containing distance data for each species. | ||||||
|     for (species_id in species_ids) { |         for (species_id in species_ids) { | ||||||
|         species_distances <- distances[species == species_id, .(gene, distance)] |             species_distances <- distances[ | ||||||
|  |                 species == species_id, | ||||||
|  |                 .(gene, distance) | ||||||
|  |             ] | ||||||
| 
 | 
 | ||||||
|         # Only include species with at least 25% known values. |             # Only include species with at least 25% known values. | ||||||
| 
 | 
 | ||||||
|         species_distances <- stats::na.omit(species_distances) |             species_distances <- stats::na.omit(species_distances) | ||||||
| 
 | 
 | ||||||
|         if (nrow(species_distances) >= 0.25 * gene_count) { |             if (nrow(species_distances) >= 0.25 * gene_count) { | ||||||
|             species_ids_included <- c(species_ids_included, species_id) |                 species_ids_included <- c(species_ids_included, species_id) | ||||||
|             data <- merge(data, species_distances, all.x = TRUE) |                 data <- merge(data, species_distances, all.x = TRUE) | ||||||
| 
 | 
 | ||||||
|             # Replace missing data with mean values. The neural network can't |                 # Replace missing data with mean values. The neural network | ||||||
|             # handle NAs in a meaningful way. Choosing extreme values here |                 # can't handle NAs in a meaningful way. Choosing extreme values | ||||||
|             # would result in heavily biased results. Therefore, the mean value |                 # here would result in heavily biased results. Therefore, the | ||||||
|             # is chosen as a compromise. However, this will of course lessen the |                 # mean value is chosen as a compromise. However, this will of | ||||||
|             # significance of the results. |                 # course lessen the significance of the results. | ||||||
| 
 | 
 | ||||||
|             mean_distance <- round(species_distances[, mean(distance)]) |                 mean_distance <- round(species_distances[, mean(distance)]) | ||||||
|             data[is.na(distance), distance := mean_distance] |                 data[is.na(distance), distance := mean_distance] | ||||||
| 
 | 
 | ||||||
|             # Name the new column after the species. |                 # Name the new column after the species. | ||||||
|             setnames(data, "distance", species_id) |                 setnames(data, "distance", species_id) | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     # Extract the reference genes. |         # Extract the reference genes. | ||||||
| 
 | 
 | ||||||
|     reference_data <- data[gene %chin% reference_gene_ids] |         reference_data <- data[gene %chin% reference_gene_ids] | ||||||
|     reference_data[, neural := 1.0] |         reference_data[, neural := 1.0] | ||||||
| 
 | 
 | ||||||
|     # Take out random samples from the remaining genes. This is another |         # Take out random samples from the remaining genes. This is another | ||||||
|     # compromise with a negative impact on significance. Because there is no |         # compromise with a negative impact on significance. Because there is | ||||||
|     # information on genes with are explicitely *not* TPE-OLD genes, we have to |         # no information on genes with are explicitely *not* TPE-OLD genes, we | ||||||
|     # assume that a random sample of genes has a low probability of including |         # have to assume that a random sample of genes has a low probability of | ||||||
|     # TPE-OLD genes. |         # including TPE-OLD genes. | ||||||
| 
 | 
 | ||||||
|     without_reference_data <- data[!gene %chin% reference_gene_ids] |         without_reference_data <- data[!gene %chin% reference_gene_ids] | ||||||
| 
 | 
 | ||||||
|     reference_samples <- without_reference_data[ |         reference_samples <- without_reference_data[ | ||||||
|         sample( |             sample( | ||||||
|             nrow(without_reference_data), |                 nrow(without_reference_data), | ||||||
|             nrow(reference_data) |                 nrow(reference_data) | ||||||
|  |             ) | ||||||
|  |         ] | ||||||
|  | 
 | ||||||
|  |         reference_samples[, neural := 0.0] | ||||||
|  | 
 | ||||||
|  |         # Merge training data. The training data includes all reference genes as | ||||||
|  |         # well as an equal number of random sample genes. | ||||||
|  |         training_data <- rbindlist(list(reference_data, reference_samples)) | ||||||
|  | 
 | ||||||
|  |         # Construct and train the neural network. | ||||||
|  | 
 | ||||||
|  |         nn_formula <- stats::as.formula(sprintf( | ||||||
|  |             "neural~%s", | ||||||
|  |             paste(species_ids_included, collapse = "+") | ||||||
|  |         )) | ||||||
|  | 
 | ||||||
|  |         layer1 <- length(species_ids) * 0.66 | ||||||
|  |         layer2 <- layer1 * 0.66 | ||||||
|  |         layer3 <- layer2 * 0.66 | ||||||
|  | 
 | ||||||
|  |         nn <- neuralnet::neuralnet( | ||||||
|  |             nn_formula, | ||||||
|  |             training_data, | ||||||
|  |             hidden = c(layer1, layer2, layer3), | ||||||
|  |             linear.output = FALSE | ||||||
|         ) |         ) | ||||||
|     ] |  | ||||||
| 
 | 
 | ||||||
|     reference_samples[, neural := 0.0] |         if (!is.null(progress)) { | ||||||
|  |             # We do everything in one go, so it's not possible to report | ||||||
|  |             # detailed progress information. As the method is relatively quick, | ||||||
|  |             # this should not be a problem. | ||||||
|  |             progress(0.5) | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|     # Merge training data. The training data includes all reference genes as |         # Apply the neural network. | ||||||
|     # well as an equal number of random sample genes. |         data[, score := neuralnet::compute(nn, data)$net.result] | ||||||
|     training_data <- rbindlist(list(reference_data, reference_samples)) |  | ||||||
| 
 | 
 | ||||||
|     # Construct and train the neural network. |         if (!is.null(progress)) { | ||||||
|  |             # See above. | ||||||
|  |             progress(1.0) | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|     nn_formula <- stats::as.formula(sprintf( |         data[, .(gene, score)] | ||||||
|         "neural~%s", |     }) | ||||||
|         paste(species_ids_included, collapse = "+") |  | ||||||
|     )) |  | ||||||
| 
 |  | ||||||
|     layer1 <- length(species_ids) * 0.66 |  | ||||||
|     layer2 <- layer1 * 0.66 |  | ||||||
|     layer3 <- layer2 * 0.66 |  | ||||||
| 
 |  | ||||||
|     nn <- neuralnet::neuralnet( |  | ||||||
|         nn_formula, |  | ||||||
|         training_data, |  | ||||||
|         hidden = c(layer1, layer2, layer3), |  | ||||||
|         linear.output = FALSE |  | ||||||
|     ) |  | ||||||
| 
 |  | ||||||
|     if (!is.null(progress)) { |  | ||||||
|         # We do everything in one go, so it's not possible to report detailed |  | ||||||
|         # progress information. As the method is relatively quick, this should |  | ||||||
|         # not be a problem. |  | ||||||
|         progress(0.5) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     # Apply the neural network. |  | ||||||
|     data[, score := neuralnet::compute(nn, data)$net.result] |  | ||||||
| 
 |  | ||||||
|     if (!is.null(progress)) { |  | ||||||
|         # See above. |  | ||||||
|         progress(1.0) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     data[, .(gene, score)] |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -3,23 +3,28 @@ | ||||||
| # A score will be given to each gene such that 0.0 corresponds to the maximal | # A score will be given to each gene such that 0.0 corresponds to the maximal | ||||||
| # mean distance across all genes and 1.0 corresponds to a distance of 0. | # mean distance across all genes and 1.0 corresponds to a distance of 0. | ||||||
| proximity <- function(preset, progress = NULL) { | proximity <- function(preset, progress = NULL) { | ||||||
|     # Prefilter distances by species and gene. |     species_ids <- preset$species_ids | ||||||
|     distances <- geposan::distances[ |     gene_ids <- preset$gene_ids | ||||||
|         species %chin% preset$species_ids & gene %chin% preset$gene_ids |  | ||||||
|     ] |  | ||||||
| 
 | 
 | ||||||
|     # Compute the score as described above. |     cached("proximity", c(species_ids, gene_ids), { | ||||||
|  |         # Prefilter distances by species and gene. | ||||||
|  |         distances <- geposan::distances[ | ||||||
|  |             species %chin% preset$species_ids & gene %chin% preset$gene_ids | ||||||
|  |         ] | ||||||
| 
 | 
 | ||||||
|     distances <- distances[, .(mean_distance = mean(distance)), by = "gene"] |         # Compute the score as described above. | ||||||
|     max_distance <- distances[, max(mean_distance)] |  | ||||||
|     distances[, score := 1 - mean_distance / max_distance] |  | ||||||
| 
 | 
 | ||||||
|     if (!is.null(progress)) { |         distances <- distances[, .(mean_distance = mean(distance)), by = "gene"] | ||||||
|         # We do everything in one go, so it's not possible to report detailed |         max_distance <- distances[, max(mean_distance)] | ||||||
|         # progress information. As the method is relatively quick, this should |         distances[, score := 1 - mean_distance / max_distance] | ||||||
|         # not be a problem. |  | ||||||
|         progress(1.0) |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     distances[, .(gene, score)] |         if (!is.null(progress)) { | ||||||
|  |             # We do everything in one go, so it's not possible to report | ||||||
|  |             # detailed progress information. As the method is relatively quick, | ||||||
|  |             # this should not be a problem. | ||||||
|  |             progress(1.0) | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         distances[, .(gene, score)] | ||||||
|  |     }) | ||||||
| } | } | ||||||
|  |  | ||||||
							
								
								
									
										31
									
								
								R/utils.R
									
										
									
									
									
								
							
							
						
						
									
										31
									
								
								R/utils.R
									
										
									
									
									
								
							|  | @ -1,3 +1,34 @@ | ||||||
|  | # Cache the value of an expression on the file system. | ||||||
|  | # | ||||||
|  | # The expression will be evaluated if there is no matching cache file found. | ||||||
|  | # The cache files will be located in a directory "cache" located in the current | ||||||
|  | # working directory. | ||||||
|  | # | ||||||
|  | # @param name Human readable part of the cache file name. | ||||||
|  | # @param objects A vector of objects that this expression depends on. The hash | ||||||
|  | #   of those objects will be used for identifying the cache file. | ||||||
|  | cached <- function(name, objects, expr) { | ||||||
|  |     if (!dir.exists("cache")) { | ||||||
|  |         dir.create("cache") | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     id <- rlang::hash(objects) | ||||||
|  |     cache_file <- sprintf("cache/%s_%s.rda", name, id) | ||||||
|  | 
 | ||||||
|  |     if (file.exists(cache_file)) { | ||||||
|  |         # If the cache file exists, we restore the data from it. | ||||||
|  |         load(cache_file) | ||||||
|  |     } else { | ||||||
|  |         # If the cache file doesn't exist, we have to do the computation. | ||||||
|  |         data <- expr | ||||||
|  | 
 | ||||||
|  |         # The results are cached for the next run. | ||||||
|  |         save(data, file = cache_file, compress = "xz") | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     data | ||||||
|  | } | ||||||
|  | 
 | ||||||
| # This is needed to make data.table's symbols available within the package. | # This is needed to make data.table's symbols available within the package. | ||||||
| #' @import data.table | #' @import data.table | ||||||
| NULL | NULL | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue