From e9ae78848a58725c2ba7af8bf5c8c3081ab54da7 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" Date: Mon, 17 Aug 2020 08:15:48 +0100 Subject: [PATCH] Reduce LAP comparison precision (#33) * Remove soft_lap. Precision 4 works; let's conservatively go to 8. * Update metatext * Arithmetic sequence * Document --- R/lap.R | 10 ++++++++++ man/LAPJV.Rd | 10 ++++++++++ src/tree_distances.cpp | 10 +++++----- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/R/lap.R b/R/lap.R index c7e28156..1950263f 100644 --- a/R/lap.R +++ b/R/lap.R @@ -9,6 +9,16 @@ #' The Jonker & Volgenant approach is a faster alternative to the Hungarian #' algorithm (Munkres 1957), which is implemented in `clue::solve_LSAP()`. #' +#' Note: the JV algorithm expects integers. In order to apply the function +#' to a non-integer _n_, as in the tree distance calculations in this package, +#' each _n_ is multiplied by the largest available integer before applying +#' the JV algorithm. If two values of _n_ exhibit a trivial difference -- +#' e.g. due to floating point errors -- then this can lead to interminable +#' run times. (If numbers of the magnitude of billions differ only in their +#' last significant digit, then the JV algorithm may undergo billions of +#' iterations.) To avoid this, integers over 2^22 that differ by a value of +#' 8 or less are treated as equal. +#' #' NB. At present, only square matrices are supported; if you need support for #' non-square matrices, drop a note at #' [issue #25](https://github.com/ms609/TreeDist/issues/25) diff --git a/man/LAPJV.Rd b/man/LAPJV.Rd index 78f823bf..55a1b66a 100644 --- a/man/LAPJV.Rd +++ b/man/LAPJV.Rd @@ -24,6 +24,16 @@ column, such that the cost of the matching is minimized. The Jonker & Volgenant approach is a faster alternative to the Hungarian algorithm (Munkres 1957), which is implemented in \code{clue::solve_LSAP()}. +Note: the JV algorithm expects integers. In order to apply the function +to a non-integer \emph{n}, as in the tree distance calculations in this package, +each \emph{n} is multiplied by the largest available integer before applying +the JV algorithm. If two values of \emph{n} exhibit a trivial difference -- +e.g. due to floating point errors -- then this can lead to interminable +run times. (If numbers of the magnitude of billions differ only in their +last significant digit, then the JV algorithm may undergo billions of +iterations.) To avoid this, integers over 2^22 that differ by a value of +8 or less are treated as equal. + NB. At present, only square matrices are supported; if you need support for non-square matrices, drop a note at \href{https://github.com/ms609/TreeDist/issues/25}{issue #25} diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index a194634c..9795d984 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -449,13 +449,13 @@ List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, } else { score[ai][bi] = max_score - // Division by n_tips converts n(A&B) to P(A&B) for each ic_element - cost((max_score / double(n_tips)) * ( + cost(max_score * (( // 0 < Sum of IC_elements <= n_tips ic_element(a_and_b, na, nb, n_tips) + - ic_element(a_and_B, na, nB, n_tips) + - ic_element(A_and_b, nA, nb, n_tips) + - ic_element(A_and_B, nA, nB, n_tips) - ) + ic_element(a_and_B, na, nB, n_tips) + + ic_element(A_and_b, nA, nb, n_tips) + + ic_element(A_and_B, nA, nB, n_tips) + ) / n_tips) ); } }