From a102f61477a99bba239838c049d1d9f8f902f3d7 Mon Sep 17 00:00:00 2001
From: n-kall <n-kall@github.com>
Date: Thu, 16 May 2024 11:22:01 +0300
Subject: [PATCH 1/4] fix eof newline issue in doc template

---
 man-roxygen/ref-vehtari-paretosmooth-2022.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man-roxygen/ref-vehtari-paretosmooth-2022.R b/man-roxygen/ref-vehtari-paretosmooth-2022.R
index 6f87d07..addcf4c 100644
--- a/man-roxygen/ref-vehtari-paretosmooth-2022.R
+++ b/man-roxygen/ref-vehtari-paretosmooth-2022.R
@@ -2,4 +2,4 @@
 #' Aki Vehtari, Daniel Simpson, Andrew Gelman, Yuling Yao and
 #' Jonah Gabry (2024). Pareto Smoothed Importance Sampling.
 #' *Journal of Machine Learning Research*, 25(72):1-58.
-#' [PDF](https://jmlr.org/papers/v25/19-556.html)
\ No newline at end of file
+#' [PDF](https://jmlr.org/papers/v25/19-556.html)

From 8aea0c1b966abf96fc21de24e5ed7edd6d2de054 Mon Sep 17 00:00:00 2001
From: n-kall <n-kall@github.com>
Date: Thu, 16 May 2024 12:21:44 +0300
Subject: [PATCH 2/4] pareto_* functions now return unnamed scalars rather than
 named

fixes #346, fixes #359
---
 R/pareto_smooth.R                   | 84 +++++++++++++++++------------
 tests/testthat/test-pareto_smooth.R | 17 ++----
 2 files changed, 55 insertions(+), 46 deletions(-)

diff --git a/R/pareto_smooth.R b/R/pareto_smooth.R
index dca53cc..18817fa 100644
--- a/R/pareto_smooth.R
+++ b/R/pareto_smooth.R
@@ -9,7 +9,7 @@
 #' @template args-pareto
 #' @template args-methods-dots
 #' @template ref-vehtari-paretosmooth-2022
-#' @return `khat` estimated Generalized Pareto Distribution shape parameter k
+#' @template return-conv
 #'
 #' @seealso [`pareto_diags`] for additional related diagnostics, and
 #'   [`pareto_smooth`] for Pareto smoothed draws.
@@ -41,7 +41,7 @@ pareto_khat.default <- function(x,
     smooth_draws = FALSE,
     are_log_weights = are_log_weights,
     ...)
-  return(smoothed$diagnostics)
+  return(smoothed$diagnostics$khat)
 }
 
 #' @rdname pareto_khat
@@ -49,19 +49,17 @@ pareto_khat.default <- function(x,
 pareto_khat.rvar <- function(x, ...) {
   draws_diags <- summarise_rvar_by_element_with_chains(
     x,
-    pareto_smooth.default,
-    return_k = TRUE,
-    smooth_draws = FALSE,
+    pareto_khat.default,
     ...
   )
   dim(draws_diags) <- dim(draws_diags) %||% length(draws_diags)
   margins <- seq_along(dim(draws_diags))
 
   diags <- list(
-    khat = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat)
+    khat = apply(draws_diags, margins, function(x) x[[1]])
   )
 
-  diags
+  diags$khat
 }
 
 
@@ -107,8 +105,10 @@ pareto_khat.rvar <- function(x, ...) {
 #'  when the sample size is increased, compared to the central limit
 #'  theorem convergence rate. See Appendix B in Vehtari et al. (2024).
 #'
-#' @seealso [`pareto_khat`] for only calculating khat, and
-#'   [`pareto_smooth`] for Pareto smoothed draws.
+#' @seealso [`pareto_khat`], [`pareto_min_ss`],
+#'   [`pareto_khat_threshold`], and [`pareto_convergence_rate`] for
+#'   individual diagnostics; and [`pareto_smooth`] for Pareto smoothing
+#'   draws.
 #' @examples
 #' mu <- extract_variable_matrix(example_draws(), "mu")
 #' pareto_diags(mu)
@@ -151,10 +151,7 @@ pareto_diags.default <- function(x,
 pareto_diags.rvar <- function(x, ...) {
   draws_diags <- summarise_rvar_by_element_with_chains(
     x,
-    pareto_smooth.default,
-    return_k = TRUE,
-    smooth_draws = FALSE,
-    extra_diags = TRUE,
+    pareto_diags.default,
     ...
   )
 
@@ -162,10 +159,10 @@ pareto_diags.rvar <- function(x, ...) {
   margins <- seq_along(dim(draws_diags))
 
   diags <- list(
-    khat = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat),
-    min_ss = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$min_ss),
-    khat_threshold = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat_threshold),
-    convergence_rate = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$convergence_rate)
+    khat = apply(draws_diags, margins, function(x) x[[1]]$khat),
+    min_ss = apply(draws_diags, margins, function(x) x[[1]]$min_ss),
+    khat_threshold = apply(draws_diags, margins, function(x) x[[1]]$khat_threshold),
+    convergence_rate = apply(draws_diags, margins, function(x) x[[1]]$convergence_rate)
   )
 
   diags
@@ -192,13 +189,20 @@ pareto_diags.rvar <- function(x, ...) {
 #' @template ref-vehtari-paretosmooth-2022
 #' @return Either a vector `x` of smoothed values or a named list
 #'   containing the vector `x` and a named list `diagnostics`
-#'   containing Pareto smoothing diagnostics: * `khat`: estimated
-#'   Pareto k shape parameter, and optionally * `min_ss`: minimum
-#'   sample size for reliable Pareto smoothed estimate *
-#'   `khat_threshold`: khat-threshold for reliable Pareto smoothed
-#'   estimates * `convergence_rate`: Relative convergence rate for
+#'   containing numeric values:
+#'
+#' * `khat`: estimated Pareto k shape parameter, and optionally
+#' * `min_ss`: minimum sample size for reliable Pareto smoothed
+#'   estimate
+#' * `khat_threshold`: sample size specific khat threshold for
+#'   reliable Pareto smoothed estimates
+#' * `convergence_rate`: Relative convergence rate for
 #'   Pareto smoothed estimates
 #'
+#' If any of the draws is non-finite, that is, `NA`, `NaN`, `Inf`, or
+#' `-Inf`, Pareto smoothing will not be performed, and the original
+#' draws will be returned and and diagnostics will be `NA` (numeric).
+#'
 #' @seealso [`pareto_khat`] for only calculating khat, and
 #'   [`pareto_diags`] for additional diagnostics.
 #' @examples
@@ -265,13 +269,27 @@ pareto_smooth.default <- function(x,
   verbose <- as_one_logical(verbose)
   are_log_weights <- as_one_logical(are_log_weights)
 
+  if (extra_diags) {
+    return_k <- TRUE
+  }
+
   # check for infinite or na values
   if (should_return_NA(x)) {
     warning_no_call("Input contains infinite or NA values, or is constant. Fitting of generalized Pareto distribution not performed.")
     if (!return_k) {
       out <- x
+    } else if (!extra_diags) {
+      out <- list(x = x, diagnostics = list(khat = NA_real_))
     } else {
-      out <- list(x = x, diagnostics = NA_real_)
+      out <- list(
+        x = x,
+        diagnostics = list(
+          khat = NA_real_,
+          min_ss = NA_real_,
+          khat_threshold = NA_real_,
+          convergence_rate = NA_real_
+        )
+      )
     }
     return(out)
   }
@@ -379,13 +397,13 @@ pareto_khat_threshold <- function(x, ...) {
 #' @rdname pareto_diags
 #' @export
 pareto_khat_threshold.default <- function(x, ...) {
-  c(khat_threshold = ps_khat_threshold(length(x)))
+  ps_khat_threshold(length(x))
 }
 
 #' @rdname pareto_diags
 #' @export
 pareto_khat_threshold.rvar <- function(x, ...) {
-  c(khat_threshold = ps_khat_threshold(ndraws(x)))
+  ps_khat_threshold(ndraws(x))
 }
 
 #' @rdname pareto_diags
@@ -397,15 +415,15 @@ pareto_min_ss <- function(x, ...) {
 #' @rdname pareto_diags
 #' @export
 pareto_min_ss.default <- function(x, ...) {
-  k <- pareto_khat(x)$k
-  c(min_ss = ps_min_ss(k))
+  k <- pareto_khat(x)
+  ps_min_ss(k)
 }
 
 #' @rdname pareto_diags
 #' @export
 pareto_min_ss.rvar <- function(x, ...) {
-  k <- pareto_khat(x)$k
-  c(min_ss = ps_min_ss(k))
+  k <- pareto_khat(x)
+  ps_min_ss(k)
 }
 
 #' @rdname pareto_diags
@@ -417,15 +435,15 @@ pareto_convergence_rate <- function(x, ...) {
 #' @rdname pareto_diags
 #' @export
 pareto_convergence_rate.default <- function(x, ...) {
-  k <- pareto_khat(x)$khat
-  c(convergence_rate = ps_convergence_rate(k, length(x)))
+  k <- pareto_khat(x)
+  ps_convergence_rate(k, length(x))
 }
 
 #' @rdname pareto_diags
 #' @export
 pareto_convergence_rate.rvar <- function(x, ...) {
   k <- pareto_khat(x)
-  c(convergence_rate = ps_convergence_rate(k, ndraws(x)))
+  ps_convergence_rate(k, ndraws(x))
 }
 
 
@@ -616,7 +634,7 @@ ps_convergence_rate <- function(k, ndraws, ...) {
 }
 
 #' Pareto tail length
-#' 
+#'
 #' Calculate the tail length from number of draws and relative efficiency
 #' r_eff. See Appendix H in Vehtari et al. (2024). This function is
 #' used internally and is exported to be available for other packages.
diff --git a/tests/testthat/test-pareto_smooth.R b/tests/testthat/test-pareto_smooth.R
index 7a6c939..8ea6639 100644
--- a/tests/testthat/test-pareto_smooth.R
+++ b/tests/testthat/test-pareto_smooth.R
@@ -1,12 +1,3 @@
-test_that("pareto_khat returns expected reasonable values", {
-  tau <- extract_variable_matrix(example_draws(), "tau")
-
-  pk <- pareto_khat(tau)
-  expect_true(names(pk) == "khat")
-
-})
-
-
 test_that("pareto_khat handles constant tail correctly", {
 
   # left tail is constant, so khat should be NA, but for "both" it
@@ -30,8 +21,8 @@ test_that("pareto_khat handles tail argument", {
   pkl <- pareto_khat(tau, tail = "left")
   pkr <- pareto_khat(tau, tail = "right")
   pkb <- pareto_khat(tau)
-  expect_true(pkl$khat < pkr$khat)
-  expect_equal(pkr$khat, pkb$khat)
+  expect_true(pkl < pkr)
+  expect_equal(pkr, pkb)
 })
 
 test_that("pareto_khat handles ndraws_tail argument", {
@@ -39,7 +30,7 @@ test_that("pareto_khat handles ndraws_tail argument", {
   tau <- extract_variable_matrix(example_draws(), "tau")
   pk10 <- pareto_khat(tau, tail = "right", ndraws_tail = 10)
   pk25 <- pareto_khat(tau, tail = "right", ndraws_tail = 25)
-  expect_true(pk10$khat > pk25$khat)
+  expect_true(pk10 > pk25)
 
   expect_warning(pareto_khat(tau, tail = "both", ndraws_tail = 201),
                  "Number of tail draws cannot be more than half ",
@@ -57,7 +48,7 @@ test_that("pareto_khat handles r_eff argument", {
   tau <- extract_variable_matrix(example_draws(), "tau")
   pk1 <- pareto_khat(tau, r_eff = 1)
   pk0.6 <- pareto_khat(tau, r_eff = 0.6)
-  expect_true(pk1$khat < pk0.6$khat)
+  expect_true(pk1 < pk0.6)
 
 })
 

From 9d59c189db6a0d8c556a0a3001d4e1b5ddd06e4b Mon Sep 17 00:00:00 2001
From: n-kall <n-kall@github.com>
Date: Thu, 16 May 2024 12:27:54 +0300
Subject: [PATCH 3/4] render docs

---
 man/pareto_diags.Rd  |  6 ++++--
 man/pareto_khat.Rd   | 14 +++++++++++++-
 man/pareto_smooth.Rd | 18 +++++++++++++-----
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/man/pareto_diags.Rd b/man/pareto_diags.Rd
index 31420b9..dcd0194 100644
--- a/man/pareto_diags.Rd
+++ b/man/pareto_diags.Rd
@@ -138,8 +138,10 @@ Jonah Gabry (2024). Pareto Smoothed Importance Sampling.
 \href{https://jmlr.org/papers/v25/19-556.html}{PDF}
 }
 \seealso{
-\code{\link{pareto_khat}} for only calculating khat, and
-\code{\link{pareto_smooth}} for Pareto smoothed draws.
+\code{\link{pareto_khat}}, \code{\link{pareto_min_ss}},
+\code{\link{pareto_khat_threshold}}, and \code{\link{pareto_convergence_rate}} for
+individual diagnostics; and \code{\link{pareto_smooth}} for Pareto smoothing
+draws.
 
 Other diagnostics: 
 \code{\link{ess_basic}()},
diff --git a/man/pareto_khat.Rd b/man/pareto_khat.Rd
index 9df455d..c419b1d 100644
--- a/man/pareto_khat.Rd
+++ b/man/pareto_khat.Rd
@@ -58,7 +58,19 @@ printed. Default is \code{FALSE}.}
 draws are log weights, and only right tail will be smoothed.}
 }
 \value{
-\code{khat} estimated Generalized Pareto Distribution shape parameter k
+If the input is an array, returns a single numeric value. If any of the draws
+is non-finite, that is, \code{NA}, \code{NaN}, \code{Inf}, or \code{-Inf}, the returned output
+will be (numeric) \code{NA}. Also, if all draws within any of the chains of a
+variable are the same (constant), the returned output will be (numeric) \code{NA}
+as well. The reason for the latter is that, for constant draws, we cannot
+distinguish between variables that are supposed to be constant (e.g., a
+diagonal element of a correlation matrix is always 1) or variables that just
+happened to be constant because of a failure of convergence or other problems
+in the sampling process.
+
+If the input is an \code{\link{rvar}}, returns an array of the same dimensions as the
+\code{\link{rvar}}, where each element is equal to the value that would be returned by
+passing the draws array for that element of the \code{\link{rvar}} to this function.
 }
 \description{
 Estimate Pareto k value by fitting a Generalized Pareto
diff --git a/man/pareto_smooth.Rd b/man/pareto_smooth.Rd
index d89b1ab..48ae217 100644
--- a/man/pareto_smooth.Rd
+++ b/man/pareto_smooth.Rd
@@ -72,13 +72,21 @@ draws are log weights, and only right tail will be smoothed.}
 \value{
 Either a vector \code{x} of smoothed values or a named list
 containing the vector \code{x} and a named list \code{diagnostics}
-containing Pareto smoothing diagnostics: * \code{khat}: estimated
-Pareto k shape parameter, and optionally * \code{min_ss}: minimum
-sample size for reliable Pareto smoothed estimate *
-\code{khat_threshold}: khat-threshold for reliable Pareto smoothed
-estimates * \code{convergence_rate}: Relative convergence rate for
+containing numeric values:
+\itemize{
+\item \code{khat}: estimated Pareto k shape parameter, and optionally
+\item \code{min_ss}: minimum sample size for reliable Pareto smoothed
+estimate
+\item \code{khat_threshold}: sample size specific khat threshold for
+reliable Pareto smoothed estimates
+\item \code{convergence_rate}: Relative convergence rate for
 Pareto smoothed estimates
 }
+
+If any of the draws is non-finite, that is, \code{NA}, \code{NaN}, \code{Inf}, or
+\code{-Inf}, Pareto smoothing will not be performed, and the original
+draws will be returned and and diagnostics will be \code{NA} (numeric).
+}
 \description{
 Smooth the tail draws of x by replacing tail draws by order
 statistics of a generalized Pareto distribution fit to the

From 173f7d7d7e007714dd54a059b10324d6923b612d Mon Sep 17 00:00:00 2001
From: n-kall <n-kall@github.com>
Date: Thu, 16 May 2024 12:46:01 +0300
Subject: [PATCH 4/4] don't pass dots from pareto_khat

---
 R/pareto_smooth.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/pareto_smooth.R b/R/pareto_smooth.R
index 18817fa..4854cec 100644
--- a/R/pareto_smooth.R
+++ b/R/pareto_smooth.R
@@ -39,8 +39,8 @@ pareto_khat.default <- function(x,
     verbose = verbose,
     return_k = TRUE,
     smooth_draws = FALSE,
-    are_log_weights = are_log_weights,
-    ...)
+    are_log_weights = are_log_weights
+  )
   return(smoothed$diagnostics$khat)
 }