From 8cda3e8e83246559bc8c7abcaef9740060546cae Mon Sep 17 00:00:00 2001
From: Elias Projahn <elias@johrpan.de>
Date: Thu, 13 Jan 2022 18:35:02 +0100
Subject: [PATCH] adjacency: Revert to density estimate

This reverts commit 23bb499d3a38a62cf8a7111cd0c2b5bc2784064c.
---
 NAMESPACE        |  1 +
 R/adjacency.R    | 28 ++++++++++++++++++++++++++--
 man/adjacency.Rd |  4 ++--
 man/densest.Rd   | 21 +++++++++++++++++++++
 4 files changed, 50 insertions(+), 4 deletions(-)
 create mode 100644 man/densest.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 49d5bdf..685d468 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,6 +12,7 @@ export(analyze)
 export(clustering)
 export(compare)
 export(correlation)
+export(densest)
 export(method)
 export(neural)
 export(optimal_weights)
diff --git a/R/adjacency.R b/R/adjacency.R
index 06c47fd..6d2a1e6 100644
--- a/R/adjacency.R
+++ b/R/adjacency.R
@@ -1,7 +1,31 @@
+#' Find the densest value in the data.
+#'
+#' This function assumes that data represents a continuous variable and finds
+#' a single value with the highest estimated density. This can be used to
+#' estimate the mode of the data. If there is only one value that value is
+#' returned. If multiple density maxima with the same density exist, their mean
+#' is returned.
+#'
+#' @param data The input data.
+#'
+#' @return The densest value of data.
+#'
+#' @export
+densest <- function(data) {
+    as.numeric(if (length(data) <= 0) {
+        NULL
+    } else if (length(data) == 1) {
+        data
+    } else {
+        density <- stats::density(data)
+        mean(density$x[density$y == max(density$y)])
+    })
+}
+
 #' Score genes based on their proximity to the reference genes.
 #'
 #' @param estimate A function that will be used to summarize the distance
-#'   values for each gene. By default, [median()] is used.
+#'   values for each gene. See [densest()] for the default implementation.
 #' @param combination A function that will be used to combine the different
 #'   distances to the reference genes. By default [min()] is used. That means
 #'   the distance to the nearest reference gene will be scored.
@@ -9,7 +33,7 @@
 #' @return An object of class `geposan_method`.
 #'
 #' @export
-adjacency <- function(estimate = stats::median, combination = min) {
+adjacency <- function(estimate = densest, combination = min) {
     method(
         id = "adjacency",
         name = "Adjacency",
diff --git a/man/adjacency.Rd b/man/adjacency.Rd
index bc0a724..f68d759 100644
--- a/man/adjacency.Rd
+++ b/man/adjacency.Rd
@@ -4,11 +4,11 @@
 \alias{adjacency}
 \title{Score genes based on their proximity to the reference genes.}
 \usage{
-adjacency(estimate = stats::median, combination = min)
+adjacency(estimate = densest, combination = min)
 }
 \arguments{
 \item{estimate}{A function that will be used to summarize the distance
-values for each gene. By default, \code{\link[=median]{median()}} is used.}
+values for each gene. See \code{\link[=densest]{densest()}} for the default implementation.}
 
 \item{combination}{A function that will be used to combine the different
 distances to the reference genes. By default \code{\link[=min]{min()}} is used. That means
diff --git a/man/densest.Rd b/man/densest.Rd
new file mode 100644
index 0000000..252c6f1
--- /dev/null
+++ b/man/densest.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/adjacency.R
+\name{densest}
+\alias{densest}
+\title{Find the densest value in the data.}
+\usage{
+densest(data)
+}
+\arguments{
+\item{data}{The input data.}
+}
+\value{
+The densest value of data.
+}
+\description{
+This function assumes that data represents a continuous variable and finds
+a single value with the highest estimated density. This can be used to
+estimate the mode of the data. If there is only one value that value is
+returned. If multiple density maxima with the same density exist, their mean
+is returned.
+}