From aee77e8bb528a15dcfdd079b01d13b99a67fae69 Mon Sep 17 00:00:00 2001 From: Elias Projahn Date: Sun, 14 Aug 2022 17:50:59 +0200 Subject: [PATCH] distance: Switch from median to density estimate --- R/method_adjacency.R | 24 ------------------------ R/method_distance.R | 2 +- R/utils.R | 24 ++++++++++++++++++++++++ man/densest.Rd | 2 +- man/distance.Rd | 2 +- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/R/method_adjacency.R b/R/method_adjacency.R index 9fb73cf..ed0b925 100644 --- a/R/method_adjacency.R +++ b/R/method_adjacency.R @@ -1,27 +1,3 @@ -#' Find the densest value in the data. -#' -#' This function assumes that data represents a continuous variable and finds -#' a single value with the highest estimated density. This can be used to -#' estimate the mode of the data. If there is only one value that value is -#' returned. If multiple density maxima with the same density exist, their mean -#' is returned. -#' -#' @param data The input data. -#' -#' @return The densest value of data. -#' -#' @export -densest <- function(data) { - as.numeric(if (length(data) <= 0) { - NULL - } else if (length(data) == 1) { - data - } else { - density <- stats::density(data) - mean(density$x[density$y == max(density$y)]) - }) -} - #' Score genes based on their proximity to the reference genes. #' #' In this case, the distance data that is available for one gene is first diff --git a/R/method_distance.R b/R/method_distance.R index e81bc35..217fa01 100644 --- a/R/method_distance.R +++ b/R/method_distance.R @@ -16,7 +16,7 @@ distance <- function(id = "distance", name = "Distance", description = "Distance to telomeres", - summarize = stats::median) { + summarize = densest) { method( id = id, name = name, diff --git a/R/utils.R b/R/utils.R index 473059d..af4b9bb 100644 --- a/R/utils.R +++ b/R/utils.R @@ -40,6 +40,30 @@ num <- function(number, digits) { format(round(number, digits = digits), nsmall = digits) } +#' Find the densest value in the data. +#' +#' This function assumes that data represents a continuous variable and finds +#' a single value with the highest estimated density. This can be used to +#' estimate the mode of the data. If there is only one value that value is +#' returned. If multiple density maxima with the same density exist, their mean +#' is returned. +#' +#' @param data The input data. +#' +#' @return The densest value of data. +#' +#' @export +densest <- function(data) { + as.numeric(if (length(data) <= 0) { + NULL + } else if (length(data) == 1) { + data + } else { + density <- stats::density(data) + mean(density$x[density$y == max(density$y)]) + }) +} + # This is needed to make data.table's symbols available within the package. #' @import data.table NULL diff --git a/man/densest.Rd b/man/densest.Rd index 181a270..1dfee5a 100644 --- a/man/densest.Rd +++ b/man/densest.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/method_adjacency.R +% Please edit documentation in R/utils.R \name{densest} \alias{densest} \title{Find the densest value in the data.} diff --git a/man/distance.Rd b/man/distance.Rd index 2633ccb..3c54f8f 100644 --- a/man/distance.Rd +++ b/man/distance.Rd @@ -8,7 +8,7 @@ distance( id = "distance", name = "Distance", description = "Distance to telomeres", - summarize = stats::median + summarize = densest ) } \arguments{