From a102f61477a99bba239838c049d1d9f8f902f3d7 Mon Sep 17 00:00:00 2001 From: n-kall Date: Thu, 16 May 2024 11:22:01 +0300 Subject: [PATCH 1/4] fix eof newline issue in doc template --- man-roxygen/ref-vehtari-paretosmooth-2022.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man-roxygen/ref-vehtari-paretosmooth-2022.R b/man-roxygen/ref-vehtari-paretosmooth-2022.R index 6f87d07..addcf4c 100644 --- a/man-roxygen/ref-vehtari-paretosmooth-2022.R +++ b/man-roxygen/ref-vehtari-paretosmooth-2022.R @@ -2,4 +2,4 @@ #' Aki Vehtari, Daniel Simpson, Andrew Gelman, Yuling Yao and #' Jonah Gabry (2024). Pareto Smoothed Importance Sampling. #' *Journal of Machine Learning Research*, 25(72):1-58. -#' [PDF](https://jmlr.org/papers/v25/19-556.html) \ No newline at end of file +#' [PDF](https://jmlr.org/papers/v25/19-556.html) From 8aea0c1b966abf96fc21de24e5ed7edd6d2de054 Mon Sep 17 00:00:00 2001 From: n-kall Date: Thu, 16 May 2024 12:21:44 +0300 Subject: [PATCH 2/4] pareto_* functions now return unnamed scalars rather than named fixes #346, fixes #359 --- R/pareto_smooth.R | 84 +++++++++++++++++------------ tests/testthat/test-pareto_smooth.R | 17 ++---- 2 files changed, 55 insertions(+), 46 deletions(-) diff --git a/R/pareto_smooth.R b/R/pareto_smooth.R index dca53cc..18817fa 100644 --- a/R/pareto_smooth.R +++ b/R/pareto_smooth.R @@ -9,7 +9,7 @@ #' @template args-pareto #' @template args-methods-dots #' @template ref-vehtari-paretosmooth-2022 -#' @return `khat` estimated Generalized Pareto Distribution shape parameter k +#' @template return-conv #' #' @seealso [`pareto_diags`] for additional related diagnostics, and #' [`pareto_smooth`] for Pareto smoothed draws. @@ -41,7 +41,7 @@ pareto_khat.default <- function(x, smooth_draws = FALSE, are_log_weights = are_log_weights, ...) - return(smoothed$diagnostics) + return(smoothed$diagnostics$khat) } #' @rdname pareto_khat @@ -49,19 +49,17 @@ pareto_khat.default <- function(x, pareto_khat.rvar <- function(x, ...) { draws_diags <- summarise_rvar_by_element_with_chains( x, - pareto_smooth.default, - return_k = TRUE, - smooth_draws = FALSE, + pareto_khat.default, ... ) dim(draws_diags) <- dim(draws_diags) %||% length(draws_diags) margins <- seq_along(dim(draws_diags)) diags <- list( - khat = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat) + khat = apply(draws_diags, margins, function(x) x[[1]]) ) - diags + diags$khat } @@ -107,8 +105,10 @@ pareto_khat.rvar <- function(x, ...) { #' when the sample size is increased, compared to the central limit #' theorem convergence rate. See Appendix B in Vehtari et al. (2024). #' -#' @seealso [`pareto_khat`] for only calculating khat, and -#' [`pareto_smooth`] for Pareto smoothed draws. +#' @seealso [`pareto_khat`], [`pareto_min_ss`], +#' [`pareto_khat_threshold`], and [`pareto_convergence_rate`] for +#' individual diagnostics; and [`pareto_smooth`] for Pareto smoothing +#' draws. #' @examples #' mu <- extract_variable_matrix(example_draws(), "mu") #' pareto_diags(mu) @@ -151,10 +151,7 @@ pareto_diags.default <- function(x, pareto_diags.rvar <- function(x, ...) { draws_diags <- summarise_rvar_by_element_with_chains( x, - pareto_smooth.default, - return_k = TRUE, - smooth_draws = FALSE, - extra_diags = TRUE, + pareto_diags.default, ... ) @@ -162,10 +159,10 @@ pareto_diags.rvar <- function(x, ...) { margins <- seq_along(dim(draws_diags)) diags <- list( - khat = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat), - min_ss = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$min_ss), - khat_threshold = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat_threshold), - convergence_rate = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$convergence_rate) + khat = apply(draws_diags, margins, function(x) x[[1]]$khat), + min_ss = apply(draws_diags, margins, function(x) x[[1]]$min_ss), + khat_threshold = apply(draws_diags, margins, function(x) x[[1]]$khat_threshold), + convergence_rate = apply(draws_diags, margins, function(x) x[[1]]$convergence_rate) ) diags @@ -192,13 +189,20 @@ pareto_diags.rvar <- function(x, ...) { #' @template ref-vehtari-paretosmooth-2022 #' @return Either a vector `x` of smoothed values or a named list #' containing the vector `x` and a named list `diagnostics` -#' containing Pareto smoothing diagnostics: * `khat`: estimated -#' Pareto k shape parameter, and optionally * `min_ss`: minimum -#' sample size for reliable Pareto smoothed estimate * -#' `khat_threshold`: khat-threshold for reliable Pareto smoothed -#' estimates * `convergence_rate`: Relative convergence rate for +#' containing numeric values: +#' +#' * `khat`: estimated Pareto k shape parameter, and optionally +#' * `min_ss`: minimum sample size for reliable Pareto smoothed +#' estimate +#' * `khat_threshold`: sample size specific khat threshold for +#' reliable Pareto smoothed estimates +#' * `convergence_rate`: Relative convergence rate for #' Pareto smoothed estimates #' +#' If any of the draws is non-finite, that is, `NA`, `NaN`, `Inf`, or +#' `-Inf`, Pareto smoothing will not be performed, and the original +#' draws will be returned and and diagnostics will be `NA` (numeric). +#' #' @seealso [`pareto_khat`] for only calculating khat, and #' [`pareto_diags`] for additional diagnostics. #' @examples @@ -265,13 +269,27 @@ pareto_smooth.default <- function(x, verbose <- as_one_logical(verbose) are_log_weights <- as_one_logical(are_log_weights) + if (extra_diags) { + return_k <- TRUE + } + # check for infinite or na values if (should_return_NA(x)) { warning_no_call("Input contains infinite or NA values, or is constant. Fitting of generalized Pareto distribution not performed.") if (!return_k) { out <- x + } else if (!extra_diags) { + out <- list(x = x, diagnostics = list(khat = NA_real_)) } else { - out <- list(x = x, diagnostics = NA_real_) + out <- list( + x = x, + diagnostics = list( + khat = NA_real_, + min_ss = NA_real_, + khat_threshold = NA_real_, + convergence_rate = NA_real_ + ) + ) } return(out) } @@ -379,13 +397,13 @@ pareto_khat_threshold <- function(x, ...) { #' @rdname pareto_diags #' @export pareto_khat_threshold.default <- function(x, ...) { - c(khat_threshold = ps_khat_threshold(length(x))) + ps_khat_threshold(length(x)) } #' @rdname pareto_diags #' @export pareto_khat_threshold.rvar <- function(x, ...) { - c(khat_threshold = ps_khat_threshold(ndraws(x))) + ps_khat_threshold(ndraws(x)) } #' @rdname pareto_diags @@ -397,15 +415,15 @@ pareto_min_ss <- function(x, ...) { #' @rdname pareto_diags #' @export pareto_min_ss.default <- function(x, ...) { - k <- pareto_khat(x)$k - c(min_ss = ps_min_ss(k)) + k <- pareto_khat(x) + ps_min_ss(k) } #' @rdname pareto_diags #' @export pareto_min_ss.rvar <- function(x, ...) { - k <- pareto_khat(x)$k - c(min_ss = ps_min_ss(k)) + k <- pareto_khat(x) + ps_min_ss(k) } #' @rdname pareto_diags @@ -417,15 +435,15 @@ pareto_convergence_rate <- function(x, ...) { #' @rdname pareto_diags #' @export pareto_convergence_rate.default <- function(x, ...) { - k <- pareto_khat(x)$khat - c(convergence_rate = ps_convergence_rate(k, length(x))) + k <- pareto_khat(x) + ps_convergence_rate(k, length(x)) } #' @rdname pareto_diags #' @export pareto_convergence_rate.rvar <- function(x, ...) { k <- pareto_khat(x) - c(convergence_rate = ps_convergence_rate(k, ndraws(x))) + ps_convergence_rate(k, ndraws(x)) } @@ -616,7 +634,7 @@ ps_convergence_rate <- function(k, ndraws, ...) { } #' Pareto tail length -#' +#' #' Calculate the tail length from number of draws and relative efficiency #' r_eff. See Appendix H in Vehtari et al. (2024). This function is #' used internally and is exported to be available for other packages. diff --git a/tests/testthat/test-pareto_smooth.R b/tests/testthat/test-pareto_smooth.R index 7a6c939..8ea6639 100644 --- a/tests/testthat/test-pareto_smooth.R +++ b/tests/testthat/test-pareto_smooth.R @@ -1,12 +1,3 @@ -test_that("pareto_khat returns expected reasonable values", { - tau <- extract_variable_matrix(example_draws(), "tau") - - pk <- pareto_khat(tau) - expect_true(names(pk) == "khat") - -}) - - test_that("pareto_khat handles constant tail correctly", { # left tail is constant, so khat should be NA, but for "both" it @@ -30,8 +21,8 @@ test_that("pareto_khat handles tail argument", { pkl <- pareto_khat(tau, tail = "left") pkr <- pareto_khat(tau, tail = "right") pkb <- pareto_khat(tau) - expect_true(pkl$khat < pkr$khat) - expect_equal(pkr$khat, pkb$khat) + expect_true(pkl < pkr) + expect_equal(pkr, pkb) }) test_that("pareto_khat handles ndraws_tail argument", { @@ -39,7 +30,7 @@ test_that("pareto_khat handles ndraws_tail argument", { tau <- extract_variable_matrix(example_draws(), "tau") pk10 <- pareto_khat(tau, tail = "right", ndraws_tail = 10) pk25 <- pareto_khat(tau, tail = "right", ndraws_tail = 25) - expect_true(pk10$khat > pk25$khat) + expect_true(pk10 > pk25) expect_warning(pareto_khat(tau, tail = "both", ndraws_tail = 201), "Number of tail draws cannot be more than half ", @@ -57,7 +48,7 @@ test_that("pareto_khat handles r_eff argument", { tau <- extract_variable_matrix(example_draws(), "tau") pk1 <- pareto_khat(tau, r_eff = 1) pk0.6 <- pareto_khat(tau, r_eff = 0.6) - expect_true(pk1$khat < pk0.6$khat) + expect_true(pk1 < pk0.6) }) From 9d59c189db6a0d8c556a0a3001d4e1b5ddd06e4b Mon Sep 17 00:00:00 2001 From: n-kall Date: Thu, 16 May 2024 12:27:54 +0300 Subject: [PATCH 3/4] render docs --- man/pareto_diags.Rd | 6 ++++-- man/pareto_khat.Rd | 14 +++++++++++++- man/pareto_smooth.Rd | 18 +++++++++++++----- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/man/pareto_diags.Rd b/man/pareto_diags.Rd index 31420b9..dcd0194 100644 --- a/man/pareto_diags.Rd +++ b/man/pareto_diags.Rd @@ -138,8 +138,10 @@ Jonah Gabry (2024). Pareto Smoothed Importance Sampling. \href{https://jmlr.org/papers/v25/19-556.html}{PDF} } \seealso{ -\code{\link{pareto_khat}} for only calculating khat, and -\code{\link{pareto_smooth}} for Pareto smoothed draws. +\code{\link{pareto_khat}}, \code{\link{pareto_min_ss}}, +\code{\link{pareto_khat_threshold}}, and \code{\link{pareto_convergence_rate}} for +individual diagnostics; and \code{\link{pareto_smooth}} for Pareto smoothing +draws. Other diagnostics: \code{\link{ess_basic}()}, diff --git a/man/pareto_khat.Rd b/man/pareto_khat.Rd index 9df455d..c419b1d 100644 --- a/man/pareto_khat.Rd +++ b/man/pareto_khat.Rd @@ -58,7 +58,19 @@ printed. Default is \code{FALSE}.} draws are log weights, and only right tail will be smoothed.} } \value{ -\code{khat} estimated Generalized Pareto Distribution shape parameter k +If the input is an array, returns a single numeric value. If any of the draws +is non-finite, that is, \code{NA}, \code{NaN}, \code{Inf}, or \code{-Inf}, the returned output +will be (numeric) \code{NA}. Also, if all draws within any of the chains of a +variable are the same (constant), the returned output will be (numeric) \code{NA} +as well. The reason for the latter is that, for constant draws, we cannot +distinguish between variables that are supposed to be constant (e.g., a +diagonal element of a correlation matrix is always 1) or variables that just +happened to be constant because of a failure of convergence or other problems +in the sampling process. + +If the input is an \code{\link{rvar}}, returns an array of the same dimensions as the +\code{\link{rvar}}, where each element is equal to the value that would be returned by +passing the draws array for that element of the \code{\link{rvar}} to this function. } \description{ Estimate Pareto k value by fitting a Generalized Pareto diff --git a/man/pareto_smooth.Rd b/man/pareto_smooth.Rd index d89b1ab..48ae217 100644 --- a/man/pareto_smooth.Rd +++ b/man/pareto_smooth.Rd @@ -72,13 +72,21 @@ draws are log weights, and only right tail will be smoothed.} \value{ Either a vector \code{x} of smoothed values or a named list containing the vector \code{x} and a named list \code{diagnostics} -containing Pareto smoothing diagnostics: * \code{khat}: estimated -Pareto k shape parameter, and optionally * \code{min_ss}: minimum -sample size for reliable Pareto smoothed estimate * -\code{khat_threshold}: khat-threshold for reliable Pareto smoothed -estimates * \code{convergence_rate}: Relative convergence rate for +containing numeric values: +\itemize{ +\item \code{khat}: estimated Pareto k shape parameter, and optionally +\item \code{min_ss}: minimum sample size for reliable Pareto smoothed +estimate +\item \code{khat_threshold}: sample size specific khat threshold for +reliable Pareto smoothed estimates +\item \code{convergence_rate}: Relative convergence rate for Pareto smoothed estimates } + +If any of the draws is non-finite, that is, \code{NA}, \code{NaN}, \code{Inf}, or +\code{-Inf}, Pareto smoothing will not be performed, and the original +draws will be returned and and diagnostics will be \code{NA} (numeric). +} \description{ Smooth the tail draws of x by replacing tail draws by order statistics of a generalized Pareto distribution fit to the From 173f7d7d7e007714dd54a059b10324d6923b612d Mon Sep 17 00:00:00 2001 From: n-kall Date: Thu, 16 May 2024 12:46:01 +0300 Subject: [PATCH 4/4] don't pass dots from pareto_khat --- R/pareto_smooth.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pareto_smooth.R b/R/pareto_smooth.R index 18817fa..4854cec 100644 --- a/R/pareto_smooth.R +++ b/R/pareto_smooth.R @@ -39,8 +39,8 @@ pareto_khat.default <- function(x, verbose = verbose, return_k = TRUE, smooth_draws = FALSE, - are_log_weights = are_log_weights, - ...) + are_log_weights = are_log_weights + ) return(smoothed$diagnostics$khat) }