| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  | #' Perform cross-validation for the ranking. | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  | #' | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  | #' This function reoptimizes the ranking leaving out one of the original | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  | #' reference genes at a time. | 
					
						
							|  |  |  | #' | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  | #' @param ranking The ranking to validate. | 
					
						
							|  |  |  | #' @param reference_gene_ids The reference gene IDs whose ranking should be | 
					
						
							|  |  |  | #'   validated. | 
					
						
							|  |  |  | #' @param method_ids IDs of the methods that were used. | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  | #' @param progress An optional progress function that should accept a single | 
					
						
							|  |  |  | #'   value between 0.0 and 1.0 for progress information. | 
					
						
							|  |  |  | #' | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  | #' @returns A validation object with the following items: | 
					
						
							|  |  |  | #'   \describe{ | 
					
						
							|  |  |  | #'     \item{`validation`}{A `data.table` containing percentiles of the | 
					
						
							|  |  |  | #'       comparison genes from the original ranking as well as their validation. | 
					
						
							|  |  |  | #'     } | 
					
						
							|  |  |  | #'     \item{`mean_score`}{The mean score of the genes.} | 
					
						
							|  |  |  | #'     \item{`mean_percentile_original`}{The mean percentile of the genes in | 
					
						
							|  |  |  | #'       the original ranking. | 
					
						
							|  |  |  | #'     } | 
					
						
							|  |  |  | #'     \item{`mean_percentile_validation`}{The mean percentile of the genes | 
					
						
							|  |  |  | #'       when optimizing without themselves. | 
					
						
							|  |  |  | #'     } | 
					
						
							|  |  |  | #'     \item{`mean_error`}{The mean absolute error.} | 
					
						
							|  |  |  | #'   } | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  | #' | 
					
						
							|  |  |  | #' @export | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  | validate <- function(ranking, reference_gene_ids, method_ids, progress = NULL) { | 
					
						
							|  |  |  |     if (!inherits(ranking, "geposan_ranking")) { | 
					
						
							|  |  |  |         stop("Ranking is invalid. Use geposan::ranking().") | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  |     if (is.null(progress)) { | 
					
						
							|  |  |  |         progress_bar <- progress::progress_bar$new() | 
					
						
							|  |  |  |         progress_bar$update(0.0) | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  |         progress <- function(progress_value) { | 
					
						
							|  |  |  |             if (!progress_bar$finished) { | 
					
						
							|  |  |  |                 progress_bar$update(progress_value) | 
					
						
							|  |  |  |                 if (progress_value >= 1.0) { | 
					
						
							|  |  |  |                     progress_bar$terminate() | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  |     progress_state <- 0.0 | 
					
						
							|  |  |  |     progress_step <- 1.0 / length(reference_gene_ids) | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  |     results <- ranking[gene %chin% reference_gene_ids, .(gene, percentile)] | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  |     for (gene_id in reference_gene_ids) { | 
					
						
							|  |  |  |         included_gene_ids <- reference_gene_ids[ | 
					
						
							|  |  |  |             reference_gene_ids != gene_id | 
					
						
							|  |  |  |         ] | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  |         weights <- optimal_weights( | 
					
						
							|  |  |  |             ranking, | 
					
						
							|  |  |  |             method_ids, | 
					
						
							|  |  |  |             included_gene_ids | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  |         ranking_validation <- ranking(ranking, weights) | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  |         results[ | 
					
						
							|  |  |  |             gene == gene_id, | 
					
						
							|  |  |  |             percentile_validation := ranking_validation[ | 
					
						
							|  |  |  |                 gene == gene_id, | 
					
						
							|  |  |  |                 percentile | 
					
						
							|  |  |  |             ] | 
					
						
							|  |  |  |         ] | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  |         if (!is.null(progress)) { | 
					
						
							|  |  |  |             progress_state <- progress_state + progress_step | 
					
						
							|  |  |  |             progress(progress_state) | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  |     results[, error := percentile - percentile_validation] | 
					
						
							|  |  |  |     setorder(results, error) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     structure( | 
					
						
							|  |  |  |         list( | 
					
						
							|  |  |  |             validation = results, | 
					
						
							|  |  |  |             mean_percentile_original = results[, mean(percentile)], | 
					
						
							|  |  |  |             mean_percentile_validation = results[, mean(percentile_validation)], | 
					
						
							|  |  |  |             mean_error = results[, mean(error)] | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         class = "geposan_validation" | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #' S3 method to print a validation object. | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' @param x The validation to print. | 
					
						
							|  |  |  | #' @param ... Other parameters. | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' @seealso [validate()] | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' @export | 
					
						
							|  |  |  | print.geposan_validation <- function(x, ...) { | 
					
						
							|  |  |  |     cat(sprintf( | 
					
						
							|  |  |  |         paste0( | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  |             "geposan validation:", | 
					
						
							|  |  |  |             "\n  Mean percentile original: %.1f%%", | 
					
						
							|  |  |  |             "\n  Mean percentile validation: %.1f%%", | 
					
						
							|  |  |  |             "\n  Mean error: %.1f percent points", | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  |             "\n" | 
					
						
							|  |  |  |         ), | 
					
						
							| 
									
										
										
										
											2022-01-26 11:38:39 +01:00
										 |  |  |         x$mean_percentile_original * 100, | 
					
						
							|  |  |  |         x$mean_percentile_validation * 100, | 
					
						
							|  |  |  |         x$mean_error * 100 | 
					
						
							| 
									
										
										
										
											2021-12-06 10:50:18 +01:00
										 |  |  |     )) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     invisible(x) | 
					
						
							|  |  |  | } |