| 
									
										
										
										
											2022-06-15 10:24:10 +02:00
										 |  |  | #' Rank genes based on how ubiquitous they are. | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' This function will compute a weighted average across multiple metrics that | 
					
						
							|  |  |  | #' define how ubiquitous a gene is based on its expression across samples. | 
					
						
							|  |  |  | #' | 
					
						
							| 
									
										
										
										
											2022-11-30 14:49:42 +01:00
										 |  |  | #' @param data The input data to use. This should either be the result of a | 
					
						
							|  |  |  | #'   previous call to this function or the return value of [analyze()]. | 
					
						
							| 
									
										
										
										
											2024-04-26 19:35:05 +02:00
										 |  |  | #' @param cross_sample_metric Name of the column that should be used as the | 
					
						
							|  |  |  | #'   metric measuring the expression across samples. | 
					
						
							|  |  |  | #' @param cross_sample_weight Relative weight that should be assigned to the | 
					
						
							|  |  |  | #'   cross sample metric. | 
					
						
							|  |  |  | #' @param level_metric Name of the column that should be used to represent | 
					
						
							|  |  |  | #'   overall expression levels. | 
					
						
							|  |  |  | #' @param level_weight Relative weight that should be assigned to the level | 
					
						
							|  |  |  | #'   metric. | 
					
						
							|  |  |  | #' @param variation_metric Name of the column that should be used as the metric | 
					
						
							|  |  |  | #'   representing variation in expression. | 
					
						
							|  |  |  | #' @param variation_weight Relative weight that should be assigned to the | 
					
						
							|  |  |  | #'   variation metric. | 
					
						
							| 
									
										
										
										
											2022-11-30 14:49:42 +01:00
										 |  |  | #' | 
					
						
							| 
									
										
										
										
											2022-06-15 10:24:10 +02:00
										 |  |  | #' @return A `data.table` with gene data as well as the scores, ranks and | 
					
						
							|  |  |  | #'   percentiles for each gene. | 
					
						
							|  |  |  | #' | 
					
						
							|  |  |  | #' @export | 
					
						
							| 
									
										
										
										
											2022-12-01 21:23:46 +01:00
										 |  |  | rank_genes <- function(data = ubigen::gtex_all, | 
					
						
							| 
									
										
										
										
											2022-11-30 14:49:42 +01:00
										 |  |  |                        cross_sample_metric = "above_95", | 
					
						
							| 
									
										
										
										
											2022-06-15 10:24:10 +02:00
										 |  |  |                        cross_sample_weight = 0.5, | 
					
						
							| 
									
										
										
										
											2022-09-25 20:01:42 +02:00
										 |  |  |                        level_metric = "median_expression_normalized", | 
					
						
							|  |  |  |                        level_weight = 0.25, | 
					
						
							|  |  |  |                        variation_metric = "qcv_expression_normalized", | 
					
						
							|  |  |  |                        variation_weight = -0.25) { | 
					
						
							| 
									
										
										
										
											2022-11-30 14:49:42 +01:00
										 |  |  |   data <- copy(data) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-06-22 19:09:21 +02:00
										 |  |  |   total_weight <- abs(cross_sample_weight) + | 
					
						
							| 
									
										
										
										
											2022-09-25 20:01:42 +02:00
										 |  |  |     abs(level_weight) + | 
					
						
							|  |  |  |     abs(variation_weight) | 
					
						
							| 
									
										
										
										
											2022-06-15 10:24:10 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |   data[, score := | 
					
						
							|  |  |  |     (cross_sample_weight * get(cross_sample_metric) + | 
					
						
							| 
									
										
										
										
											2022-09-25 20:01:42 +02:00
										 |  |  |       level_weight * get(level_metric) + | 
					
						
							|  |  |  |       variation_weight * get(variation_metric)) / | 
					
						
							| 
									
										
										
										
											2022-06-15 10:24:10 +02:00
										 |  |  |       total_weight] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   # Normalize scores to be between 0.0 and 1.0. | 
					
						
							|  |  |  |   data[, score := (score - min(score, na.rm = TRUE)) / | 
					
						
							|  |  |  |     (max(score, na.rm = TRUE) - min(score, na.rm = TRUE))] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-26 19:35:23 +02:00
										 |  |  |   # Exclude genes that are not expressed at all. | 
					
						
							|  |  |  |   data <- data[!is.na(score)] | 
					
						
							| 
									
										
										
										
											2022-06-15 10:24:10 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |   setorder(data, -score) | 
					
						
							|  |  |  |   data[, rank := .I] | 
					
						
							|  |  |  |   data[, percentile := 1 - rank / max(rank)] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   data | 
					
						
							|  |  |  | } |