diff --git a/NAMESPACE b/NAMESPACE index c81bc588..ace458d1 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -223,6 +223,12 @@ S3method(order_draws,draws_list) S3method(order_draws,draws_matrix) S3method(order_draws,draws_rvars) S3method(order_draws,rvar) +S3method(pareto_diags,default) +S3method(pareto_diags,rvar) +S3method(pareto_khat,default) +S3method(pareto_khat,rvar) +S3method(pareto_smooth,default) +S3method(pareto_smooth,rvar) S3method(pillar_shaft,rvar) S3method(print,draws_array) S3method(print,draws_df) @@ -447,6 +453,9 @@ export(ndraws) export(niterations) export(nvariables) export(order_draws) +export(pareto_diags) +export(pareto_khat) +export(pareto_smooth) export(quantile2) export(r_scale) export(rdo) diff --git a/R/gpd.R b/R/gpd.R new file mode 100644 index 00000000..924f8c3f --- /dev/null +++ b/R/gpd.R @@ -0,0 +1,81 @@ +#' Quantile function for generalized pareto +#' @noRd +qgeneralized_pareto <- function(p, mu = 0, sigma = 1, k = 0, lower.tail = TRUE, log.p = FALSE) { + stopifnot(length(mu) == 1 && length(sigma) == 1 && length(k) == 1) + if (is.na(sigma) || sigma <= 0) { + return(rep(NaN, length(p))) + } + if (log.p) { + p <- exp(p) + } + if (!lower.tail) { + p <- 1 - p + } + if (k == 0) { + q <- mu - sigma * log1p(-p) + } else { + q <- mu + sigma * expm1(-k * log1p(-p)) / k + } + q +} + +#' Estimate parameters of the Generalized Pareto distribution +#' +#' Given a sample \eqn{x}, Estimate the parameters \eqn{k} and \eqn{\sigma} of +#' the generalized Pareto distribution (GPD), assuming the location parameter is +#' 0. By default the fit uses a prior for \eqn{k} (this is in addition to the prior described by Zhang and Stephens, 2009), which will stabilize +#' estimates for very small sample sizes (and low effective sample sizes in the +#' case of MCMC samples). The weakly informative prior is a Gaussian prior +#' centered at 0.5 (see details in Vehtari et al., 2022). +#' +#' @param x A numeric vector. The sample from which to estimate the parameters. +#' @param wip Logical indicating whether to adjust \eqn{k} based on a weakly +#' informative Gaussian prior centered on 0.5. Defaults to `TRUE`. +#' @param min_grid_pts The minimum number of grid points used in the fitting +#' algorithm. The actual number used is `min_grid_pts + floor(sqrt(length(x)))`. +#' @param sort_x If `TRUE` (the default), the first step in the fitting +#' algorithm is to sort the elements of `x`. If `x` is already +#' sorted in ascending order then `sort_x` can be set to `FALSE` to +#' skip the initial sorting step. +#' @return A named list with components `k` and `sigma`. +#' +#' @details Here the parameter \eqn{k} is the negative of \eqn{k} in Zhang & +#' Stephens (2009). +#' +#' +#' @references +#' Zhang, J., and Stephens, M. A. (2009). A new and efficient estimation method +#' for the generalized Pareto distribution. *Technometrics* **51**, 316-325. +#' +#' @noRd +gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) { + # see section 4 of Zhang and Stephens (2009) + if (sort_x) { + x <- sort.int(x) + } + N <- length(x) + prior <- 3 + M <- min_grid_pts + floor(sqrt(N)) + jj <- seq_len(M) + xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample + theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar + k <- matrixStats::rowMeans2(log1p(-theta %o% x)) + l_theta <- N * (log(-theta / k) - k - 1) # profile log-lik + w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize + theta_hat <- sum(theta * w_theta) + k_hat <- mean.default(log1p(-theta_hat * x)) + sigma_hat <- -k_hat / theta_hat + + # adjust k_hat based on weakly informative prior, Gaussian centered on 0.5. + # this stabilizes estimates for very small Monte Carlo sample sizes and low neff (see Vehtari et al., 2022 for details) + if (wip) { + k_hat <- (k_hat * N + 0.5 * 10) / (N + 10) + } + + if (is.na(k_hat)) { + k_hat <- Inf + sigma_hat <- NaN + } + + list(k = k_hat, sigma = sigma_hat) +} diff --git a/R/pareto_smooth.R b/R/pareto_smooth.R new file mode 100644 index 00000000..228aede9 --- /dev/null +++ b/R/pareto_smooth.R @@ -0,0 +1,534 @@ +#' Pareto khat diagnostic +#' +#' Estimate Pareto k value by fitting a Generalized Pareto +#' Distribution to one or two tails of x. This can be used to estimate +#' the number of fractional moments that is useful for convergence +#' diagnostics. For further details see Vehtari et al. (2022). +#' +#' @template args-pareto +#' @template args-methods-dots +#' @template ref-vehtari-paretosmooth-2022 +#' @return `khat` estimated Generalized Pareto Distribution shape parameter k +#' @examples +#' mu <- extract_variable_matrix(example_draws(), "mu") +#' pareto_khat(mu) +#' +#' d <- as_draws_rvars(example_draws("multi_normal")) +#' pareto_khat(d$Sigma) +#' @export +pareto_khat <- function(x, ...) UseMethod("pareto_khat") + +#' @rdname pareto_khat +#' @export +pareto_khat.default <- function(x, + tail = c("both", "right", "left"), + r_eff = NULL, + ndraws_tail = NULL, + verbose = FALSE, + ...) { + smoothed <- pareto_smooth.default( + x, + tail = tail, + r_eff = r_eff, + ndraws_tail = ndraws_tail, + verbose = verbose, + return_k = TRUE, + smooth_draws = FALSE, + ...) + return(smoothed$diagnostics) +} + +#' @rdname pareto_khat +#' @export +pareto_khat.rvar <- function(x, ...) { + draws_diags <- summarise_rvar_by_element_with_chains( + x, + pareto_smooth.default, + return_k = TRUE, + smooth_draws = FALSE, + ... + ) + dim(draws_diags) <- dim(draws_diags) %||% length(draws_diags) + margins <- seq_along(dim(draws_diags)) + + diags <- list( + khat = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat) + ) + + diags +} + + +#' Pareto smoothing diagnostics +#' +#' Compute diagnostics for Pareto smoothing the tail draws of x by +#' replacing tail draws by order statistics of a generalized Pareto +#' distribution fit to the tail(s). +#' +#' @template args-pareto +#' @template args-methods-dots +#' @template ref-vehtari-paretosmooth-2022 +#' @return List of Pareto smoothing diagnostics: +#' * `khat`: estimated Pareto k shape parameter, +#' * `min_ss`: minimum sample size for reliable Pareto smoothed estimate, +#' * `khat_threshold`: khat-threshold for reliable Pareto smoothed estimate, +#' * `convergence_rate`: Pareto smoothed estimate RMSE convergence rate. +#' +#' @details When the fitted Generalized Pareto Distribution is used to +#' smooth the tail values and these smoothed values are used to +#' compute expectations, the following diagnostics can give further +#' information about the reliability of these estimates. +#' +#' * `min_ss`: Minimum sample size for reliable Pareto smoothed +#' estimate. If the actual sample size is greater than `min_ss`, then +#' Pareto smoothed estimates can be considered reliable. If the actual +#' sample size is lower than `min_ss`, increasing the sample size +#' might result in more reliable estimates. For further details, see +#' Section 3.2.3, Equation 11 in Vehtari et al. (2022). +#' +#' * `khat_threshold`: Threshold below which k-hat values result in +#' reliable Pareto smoothed estimates. The threshold is lower for +#' smaller effective sample sizes. If k-hat is larger than the +#' threshold, increasing the total sample size may improve reliability +#' of estimates. For further details, see Section 3.2.4, Equation 13 +#' in Vehtari et al. (2022). +#' +#' * `convergence_rate`: Relative convergence rate compared to the +#' central limit theorem. Applicable only if the actual sample size +#' is sufficiently large (greater than `min_ss`). The convergence +#' rate tells the rate at which the variance of an estimate reduces +#' when the sample size is increased, compared to the central limit +#' theorem convergence rate. See Appendix B in Vehtari et al. (2022). +#' +#' @examples +#' mu <- extract_variable_matrix(example_draws(), "mu") +#' pareto_diags(mu) +#' +#' d <- as_draws_rvars(example_draws("multi_normal")) +#' pareto_diags(d$Sigma) +#' @export +pareto_diags <- function(x, ...) UseMethod("pareto_diags") + + +#' @rdname pareto_diags +#' @export +pareto_diags.default <- function(x, + tail = c("both", "right", "left"), + r_eff = NULL, + ndraws_tail = NULL, + verbose = FALSE, + ...) { + + smoothed <- pareto_smooth.default( + x, + tail = tail, + r_eff = r_eff, + ndraws_tail = ndraws_tail, + return_k = TRUE, + extra_diags = TRUE, + verbose = verbose, + smooth_draws = FALSE, + ...) + + return(smoothed$diagnostics) + +} + + +#' @rdname pareto_diags +#' @export +pareto_diags.rvar <- function(x, ...) { + draws_diags <- summarise_rvar_by_element_with_chains( + x, + pareto_smooth.default, + return_k = TRUE, + smooth_draws = FALSE, + extra_diags = TRUE, + ... + ) + + dim(draws_diags) <- dim(draws_diags) %||% length(draws_diags) + margins <- seq_along(dim(draws_diags)) + + diags <- list( + khat = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat), + min_ss = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$min_ss), + khat_threshold = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat_threshold), + convergence_rate = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$convergence_rate) + ) + + diags +} + + + +#' Pareto smoothing +#' +#' Smooth the tail draws of x by replacing tail draws by order +#' statistics of a generalized Pareto distribution fit to the +#' tail(s). For further details see Vehtari et al. (2022). +#' +#' @template args-pareto +#' @param return_k (logical) Should the Pareto khat be included in +#' output? If `TRUE`, output will be a list containing of smoothed +#' draws and diagnostics. Default is `TRUE`. +#' @param extra_diags (logical) Should extra Pareto khat diagnostics +#' be included in output? If `TRUE`, `min_ss`, `khat_threshold` and +#' `convergence_rate` for the estimated k value will be +#' returned. Default is `FALSE`. +#' @template args-methods-dots +#' @template ref-vehtari-paretosmooth-2022 +#' @return Either a vector `x` of smoothed values or a named list +#' containing the vector `x` and a named list `diagnostics` containing Pareto smoothing +#' diagnostics: +#' * `khat`: estimated Pareto k shape parameter, and +#' optionally +#' * `min_ss`: minimum sample size for reliable Pareto +#' smoothed estimate +#' * `khat_threshold`: khat-threshold for reliable +#' Pareto smoothed estimates +#' * `convergence_rate`: Relative convergence rate for Pareto smoothed estimates +#' +#' @examples +#' mu <- extract_variable_matrix(example_draws(), "mu") +#' pareto_smooth(mu) +#' +#' d <- as_draws_rvars(example_draws("multi_normal")) +#' pareto_smooth(d$Sigma) +#' @export +pareto_smooth <- function(x, ...) UseMethod("pareto_smooth") + +#' @rdname pareto_smooth +#' @export +pareto_smooth.rvar <- function(x, return_k = TRUE, extra_diags = FALSE, ...) { + + if (extra_diags) { + return_k <- TRUE + } + + draws_diags <- summarise_rvar_by_element_with_chains(x, pareto_smooth.default, return_k = return_k, extra_diags = extra_diags, ...) + dim(draws_diags) <- dim(draws_diags) %||% length(draws_diags) + margins <- seq_along(dim(draws_diags)) + + if (return_k) { + if (extra_diags) { + + diags <- list( + khat = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat), + min_ss = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$min_ss), + khat_threshold = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat_threshold), + convergence_rate = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$convergence_rate) + ) + } else { + diags <- list( + khat = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat) + ) + } + out <- list( + x = rvar(apply(draws_diags, margins, function(x) x[[1]]$x), nchains = nchains(x)), + diagnostics = diags + ) + } else { + out <- rvar(apply(draws_diags, margins, function(x) x[[1]]), nchains = nchains(x)) + } + out +} + +#' @rdname pareto_smooth +#' @export +pareto_smooth.default <- function(x, + tail = c("both", "right", "left"), + r_eff = NULL, + ndraws_tail = NULL, + return_k = TRUE, + extra_diags = FALSE, + verbose = FALSE, + ...) { + + checkmate::assert_number(ndraws_tail, null.ok = TRUE) + checkmate::assert_number(r_eff, null.ok = TRUE) + checkmate::assert_logical(extra_diags) + checkmate::assert_logical(return_k) + checkmate::assert_logical(verbose) + + # check for infinite or na values + if (should_return_NA(x)) { + warning_no_call("Input contains infinite or NA values, Pareto smoothing not performed.") + return(list(x = x, diagnostics = NA_real_)) + } + + tail <- match.arg(tail) + S <- length(x) + + # automatically calculate relative efficiency + if (is.null(r_eff)) { + r_eff <- ess_tail(x) / S + } + + # automatically calculate tail length + if (is.null(ndraws_tail)) { + ndraws_tail <- ps_tail_length(S, r_eff) + } + + if (tail == "both") { + + if (ndraws_tail > S / 2) { + warning("Number of tail draws cannot be more than half ", + "the total number of draws if both tails are fit, ", + "changing to ", S / 2, ".") + ndraws_tail <- S / 2 + } + + if (ndraws_tail < 5) { + warning("Number of tail draws cannot be less than 5. ", + "Changing to ", 5, ".") + ndraws_tail <- 5 + } + + # left tail + smoothed <- .pareto_smooth_tail( + x, + ndraws_tail = ndraws_tail, + tail = "left", + ... + ) + left_k <- smoothed$k + + # right tail + smoothed <-.pareto_smooth_tail( + x = smoothed$x, + ndraws_tail = ndraws_tail, + tail = "right", + ... + ) + right_k <- smoothed$k + + k <- max(left_k, right_k) + x <- smoothed$x + } else { + + smoothed <- .pareto_smooth_tail( + x, + ndraws_tail = ndraws_tail, + tail = tail, + ... + ) + k <- smoothed$k + x <- smoothed$x + } + + diags_list <- list(khat = k) + + if (extra_diags) { + ext_diags <- .pareto_smooth_extra_diags(k, S) + diags_list <- c(diags_list, ext_diags) + } + + if (verbose) { + if (!extra_diags) { + diags_list <- .pareto_smooth_extra_diags(diags_list$khat, length(x)) + } + pareto_k_diagmsg( + diags = diags_list + ) + } + + if (return_k) { + out <- list(x = x, diagnostics = diags_list) + } else { + out <- x + } + + return(out) +} + +#' Pareto smooth tail +#' internal function to pareto smooth the tail of a vector +#' @noRd +.pareto_smooth_tail <- function(x, + ndraws_tail, + smooth_draws = TRUE, + tail = c("right", "left"), + ... + ) { + + tail <- match.arg(tail) + + S <- length(x) + tail_ids <- seq(S - ndraws_tail + 1, S) + + + if (tail == "left") { + x <- -x + } + + ord <- sort.int(x, index.return = TRUE) + draws_tail <- ord$x[tail_ids] + cutoff <- ord$x[min(tail_ids) - 1] # largest value smaller than tail values + + max_tail <- max(draws_tail) + min_tail <- min(draws_tail) + + if (ndraws_tail >= 5) { + ord <- sort.int(x, index.return = TRUE) + if (abs(max_tail - min_tail) < .Machine$double.eps / 100) { + warning_no_call( + "Can't fit generalized Pareto distribution ", + "because all tail values are the same." + ) + smoothed <- NULL + k <- NA + } else { + # save time not sorting since x already sorted + fit <- gpdfit(draws_tail - cutoff, sort_x = FALSE) + k <- fit$k + sigma <- fit$sigma + if (is.finite(k) && smooth_draws) { + p <- (seq_len(ndraws_tail) - 0.5) / ndraws_tail + smoothed <- qgeneralized_pareto(p = p, mu = cutoff, k = k, sigma = sigma) + } else { + smoothed <- NULL + } + } + } else { + warning_no_call( + "Can't fit generalized Pareto distribution ", + "because ndraws_tail is less than 5." + ) + smoothed <- NULL + k <- NA + } + + # truncate at max of raw draws + if (!is.null(smoothed)) { + smoothed[smoothed > max_tail] <- max_tail + x[ord$ix[tail_ids]] <- smoothed + } + + if (tail == "left") { + x <- -x + } + + out <- list(x = x, k = k) + + return(out) +} + +#' Extra pareto-k diagnostics +#' +#' internal function to calculate the extra diagnostics for a given +#' pareto k and sample size S +#' @noRd +.pareto_smooth_extra_diags <- function(k, S, ...) { + + min_ss <- ps_min_ss(k) + + khat_threshold <- ps_khat_threshold(S) + + convergence_rate <- ps_convergence_rate(k, S) + + other_diags <- list( + min_ss = min_ss, + khat_threshold = khat_threshold, + convergence_rate = convergence_rate + ) +} + +#' Pareto-smoothing minimum sample-size +#' +#' Given Pareto-k computes the minimum sample size for reliable Pareto +#' smoothed estimate (to have small probability of large error) +#' Equation (11) in PSIS paper +#' @param k pareto k value +#' @param ... unused +#' @return minimum sample size +#' @noRd +ps_min_ss <- function(k, ...) { + if (k < 1) { + out <- 10^(1 / (1 - max(0, k))) + } else { + out <- Inf + } + out +} + + +#' Pareto-smoothing k-hat threshold +#' +#' Given sample size S computes khat threshold for reliable Pareto +#' smoothed estimate (to have small probability of large error). See +#' section 3.2.4, equation (13). +#' @param S sample size +#' @param ... unused +#' @return threshold +#' @noRd +ps_khat_threshold <- function(S, ...) { + 1 - 1 / log10(S) +} + +#' Pareto-smoothing convergence rate +#' +#' Given S and scalar or array of k's, compute the relative +#' convergence rate of PSIS estimate RMSE +#' @param k pareto-k values +#' @param S sample size +#' @param ... unused +#' @return convergence rate +#' @noRd +ps_convergence_rate <- function(k, S, ...) { + # allow array of k's + rate <- numeric(length(k)) + # k<0 bounded distribution + rate[k < 0] <- 1 + # k>0 non-finite mean + rate[k > 1] <- 0 + # limit value at k=1/2 + rate[k == 0.5] <- 1 - 1 / log(S) + # smooth approximation for the rest (see Appendix of PSIS paper) + ki <- (k > 0 & k < 1 & k != 0.5) + kk <- k[ki] + rate[ki] <- pmax( + 0, + (2 * (kk - 1) * S^(2 * kk + 1) + (1 - 2 * kk) * S^(2 * kk) + S^2) / + ((S - 1) * (S - S^(2 * kk))) + ) + rate +} + +#' Calculate the tail length from S and r_eff +#' Appendix H in PSIS paper +#' @noRd +ps_tail_length <- function(S, r_eff, ...) { + ifelse(S > 225, ceiling(3 * sqrt(S / r_eff)), S / 5) +} + +#' Pareto-k diagnostic message +#' +#' Given S and scalar and k, form a diagnostic message string +#' @param diags (numeric) named vector of diagnostic values +#' @param ... unused +#' @return diagnostic message +#' @noRd +pareto_k_diagmsg <- function(diags, ...) { + khat <- diags$khat + min_ss <- diags$min_ss + khat_threshold <- diags$khat_threshold + convergence_rate <- diags$convergence_rate + msg <- NULL + if (khat > 1) { + msg <- paste0(msg,'All estimates are unreliable. If the distribution of ratios is bounded,\n', + 'further draws may improve the estimates, but it is not possible to predict\n', + 'whether any feasible sample size is sufficient.') + } else { + if (khat > khat_threshold) { + msg <- paste0(msg, 'S is too small, and sample size larger than ', round(min_ss, 0), ' is needed for reliable results.\n') + } else { + msg <- paste0(msg, 'To halve the RMSE, approximately ', round(2^(2/convergence_rate),1), ' times bigger S is needed.\n') + } + if (khat > 0.7) { + msg <- paste0(msg, 'Bias dominates RMSE, and the variance based MCSE is underestimated.\n') + } + } + message(msg) + invisible(diags) +} diff --git a/man-roxygen/args-pareto.R b/man-roxygen/args-pareto.R new file mode 100644 index 00000000..a406dbde --- /dev/null +++ b/man-roxygen/args-pareto.R @@ -0,0 +1,20 @@ +#' @param x (multiple options) One of: +#' - A matrix of draws for a single variable (iterations x chains). See +#' [extract_variable_matrix()]. +#' - An [`rvar`]. +#' @param tail (string) The tail to diagnose/smooth: +#' * `"right"`: diagnose/smooth only the right (upper) tail +#' * `"left"`: diagnose/smooth only the left (lower) tail +#' * `"both"`: diagnose/smooth both tails and return the maximum k-hat value +#' +#' The default is `"both"`. +#' @param ndraws_tail (numeric) number of draws for the tail. If +#' `ndraws_tail` is not specified, it will be calculated as +#' ceiling(3 * sqrt(length(x) / r_eff)) if length(x) > 225 and +#' length(x) / 5 otherwise (see Appendix H in Vehtari et al. (2022)). +#' @param r_eff (numeric) relative effective sample size estimate. If +#' `r_eff` is omitted, it will be calculated assuming the draws are +#' from MCMC. +#' @param verbose (logical) Should diagnostic messages be printed? If +#' `TRUE`, messages related to Pareto diagnostics will be +#' printed. Default is `FALSE`. diff --git a/man-roxygen/ref-vehtari-paretosmooth-2022.R b/man-roxygen/ref-vehtari-paretosmooth-2022.R new file mode 100644 index 00000000..267ec29a --- /dev/null +++ b/man-roxygen/ref-vehtari-paretosmooth-2022.R @@ -0,0 +1,4 @@ +#' @references +#' Aki Vehtari, Daniel Simpson, Andrew Gelman, Yuling Yao and +#' Jonah Gabry (2022). Pareto Smoothed Importance Sampling. +#' arxiv:arXiv:1507.02646 diff --git a/man/pareto_diags.Rd b/man/pareto_diags.Rd new file mode 100644 index 00000000..c2558a9a --- /dev/null +++ b/man/pareto_diags.Rd @@ -0,0 +1,105 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pareto_smooth.R +\name{pareto_diags} +\alias{pareto_diags} +\alias{pareto_diags.default} +\alias{pareto_diags.rvar} +\title{Pareto smoothing diagnostics} +\usage{ +pareto_diags(x, ...) + +\method{pareto_diags}{default}( + x, + tail = c("both", "right", "left"), + r_eff = NULL, + ndraws_tail = NULL, + verbose = FALSE, + ... +) + +\method{pareto_diags}{rvar}(x, ...) +} +\arguments{ +\item{x}{(multiple options) One of: +\itemize{ +\item A matrix of draws for a single variable (iterations x chains). See +\code{\link[=extract_variable_matrix]{extract_variable_matrix()}}. +\item An \code{\link{rvar}}. +}} + +\item{...}{Arguments passed to individual methods (if applicable).} + +\item{tail}{(string) The tail to diagnose/smooth: +\itemize{ +\item \code{"right"}: diagnose/smooth only the right (upper) tail +\item \code{"left"}: diagnose/smooth only the left (lower) tail +\item \code{"both"}: diagnose/smooth both tails and return the maximum k-hat value +} + +The default is \code{"both"}.} + +\item{r_eff}{(numeric) relative effective sample size estimate. If +\code{r_eff} is omitted, it will be calculated assuming the draws are +from MCMC.} + +\item{ndraws_tail}{(numeric) number of draws for the tail. If +\code{ndraws_tail} is not specified, it will be calculated as +ceiling(3 * sqrt(length(x) / r_eff)) if length(x) > 225 and +length(x) / 5 otherwise (see Appendix H in Vehtari et al. (2022)).} + +\item{verbose}{(logical) Should diagnostic messages be printed? If +\code{TRUE}, messages related to Pareto diagnostics will be +printed. Default is \code{FALSE}.} +} +\value{ +List of Pareto smoothing diagnostics: +\itemize{ +\item \code{khat}: estimated Pareto k shape parameter, +\item \code{min_ss}: minimum sample size for reliable Pareto smoothed estimate, +\item \code{khat_threshold}: khat-threshold for reliable Pareto smoothed estimate, +\item \code{convergence_rate}: Pareto smoothed estimate RMSE convergence rate. +} +} +\description{ +Compute diagnostics for Pareto smoothing the tail draws of x by +replacing tail draws by order statistics of a generalized Pareto +distribution fit to the tail(s). +} +\details{ +When the fitted Generalized Pareto Distribution is used to +smooth the tail values and these smoothed values are used to +compute expectations, the following diagnostics can give further +information about the reliability of these estimates. +\itemize{ +\item \code{min_ss}: Minimum sample size for reliable Pareto smoothed +estimate. If the actual sample size is greater than \code{min_ss}, then +Pareto smoothed estimates can be considered reliable. If the actual +sample size is lower than \code{min_ss}, increasing the sample size +might result in more reliable estimates. For further details, see +Section 3.2.3, Equation 11 in Vehtari et al. (2022). +\item \code{khat_threshold}: Threshold below which k-hat values result in +reliable Pareto smoothed estimates. The threshold is lower for +smaller effective sample sizes. If k-hat is larger than the +threshold, increasing the total sample size may improve reliability +of estimates. For further details, see Section 3.2.4, Equation 13 +in Vehtari et al. (2022). +\item \code{convergence_rate}: Relative convergence rate compared to the +central limit theorem. Applicable only if the actual sample size +is sufficiently large (greater than \code{min_ss}). The convergence +rate tells the rate at which the variance of an estimate reduces +when the sample size is increased, compared to the central limit +theorem convergence rate. See Appendix B in Vehtari et al. (2022). +} +} +\examples{ +mu <- extract_variable_matrix(example_draws(), "mu") +pareto_diags(mu) + +d <- as_draws_rvars(example_draws("multi_normal")) +pareto_diags(d$Sigma) +} +\references{ +Aki Vehtari, Daniel Simpson, Andrew Gelman, Yuling Yao and +Jonah Gabry (2022). Pareto Smoothed Importance Sampling. +arxiv:arXiv:1507.02646 +} diff --git a/man/pareto_khat.Rd b/man/pareto_khat.Rd new file mode 100644 index 00000000..c3383eb4 --- /dev/null +++ b/man/pareto_khat.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pareto_smooth.R +\name{pareto_khat} +\alias{pareto_khat} +\alias{pareto_khat.default} +\alias{pareto_khat.rvar} +\title{Pareto khat diagnostic} +\usage{ +pareto_khat(x, ...) + +\method{pareto_khat}{default}( + x, + tail = c("both", "right", "left"), + r_eff = NULL, + ndraws_tail = NULL, + verbose = FALSE, + ... +) + +\method{pareto_khat}{rvar}(x, ...) +} +\arguments{ +\item{x}{(multiple options) One of: +\itemize{ +\item A matrix of draws for a single variable (iterations x chains). See +\code{\link[=extract_variable_matrix]{extract_variable_matrix()}}. +\item An \code{\link{rvar}}. +}} + +\item{...}{Arguments passed to individual methods (if applicable).} + +\item{tail}{(string) The tail to diagnose/smooth: +\itemize{ +\item \code{"right"}: diagnose/smooth only the right (upper) tail +\item \code{"left"}: diagnose/smooth only the left (lower) tail +\item \code{"both"}: diagnose/smooth both tails and return the maximum k-hat value +} + +The default is \code{"both"}.} + +\item{r_eff}{(numeric) relative effective sample size estimate. If +\code{r_eff} is omitted, it will be calculated assuming the draws are +from MCMC.} + +\item{ndraws_tail}{(numeric) number of draws for the tail. If +\code{ndraws_tail} is not specified, it will be calculated as +ceiling(3 * sqrt(length(x) / r_eff)) if length(x) > 225 and +length(x) / 5 otherwise (see Appendix H in Vehtari et al. (2022)).} + +\item{verbose}{(logical) Should diagnostic messages be printed? If +\code{TRUE}, messages related to Pareto diagnostics will be +printed. Default is \code{FALSE}.} +} +\value{ +\code{khat} estimated Generalized Pareto Distribution shape parameter k +} +\description{ +Estimate Pareto k value by fitting a Generalized Pareto +Distribution to one or two tails of x. This can be used to estimate +the number of fractional moments that is useful for convergence +diagnostics. For further details see Vehtari et al. (2022). +} +\examples{ +mu <- extract_variable_matrix(example_draws(), "mu") +pareto_khat(mu) + +d <- as_draws_rvars(example_draws("multi_normal")) +pareto_khat(d$Sigma) +} +\references{ +Aki Vehtari, Daniel Simpson, Andrew Gelman, Yuling Yao and +Jonah Gabry (2022). Pareto Smoothed Importance Sampling. +arxiv:arXiv:1507.02646 +} diff --git a/man/pareto_smooth.Rd b/man/pareto_smooth.Rd new file mode 100644 index 00000000..259421de --- /dev/null +++ b/man/pareto_smooth.Rd @@ -0,0 +1,95 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pareto_smooth.R +\name{pareto_smooth} +\alias{pareto_smooth} +\alias{pareto_smooth.rvar} +\alias{pareto_smooth.default} +\title{Pareto smoothing} +\usage{ +pareto_smooth(x, ...) + +\method{pareto_smooth}{rvar}(x, return_k = TRUE, extra_diags = FALSE, ...) + +\method{pareto_smooth}{default}( + x, + tail = c("both", "right", "left"), + r_eff = NULL, + ndraws_tail = NULL, + return_k = TRUE, + extra_diags = FALSE, + verbose = FALSE, + ... +) +} +\arguments{ +\item{x}{(multiple options) One of: +\itemize{ +\item A matrix of draws for a single variable (iterations x chains). See +\code{\link[=extract_variable_matrix]{extract_variable_matrix()}}. +\item An \code{\link{rvar}}. +}} + +\item{...}{Arguments passed to individual methods (if applicable).} + +\item{return_k}{(logical) Should the Pareto khat be included in +output? If \code{TRUE}, output will be a list containing of smoothed +draws and diagnostics. Default is \code{TRUE}.} + +\item{extra_diags}{(logical) Should extra Pareto khat diagnostics +be included in output? If \code{TRUE}, \code{min_ss}, \code{khat_threshold} and +\code{convergence_rate} for the estimated k value will be +returned. Default is \code{FALSE}.} + +\item{tail}{(string) The tail to diagnose/smooth: +\itemize{ +\item \code{"right"}: diagnose/smooth only the right (upper) tail +\item \code{"left"}: diagnose/smooth only the left (lower) tail +\item \code{"both"}: diagnose/smooth both tails and return the maximum k-hat value +} + +The default is \code{"both"}.} + +\item{r_eff}{(numeric) relative effective sample size estimate. If +\code{r_eff} is omitted, it will be calculated assuming the draws are +from MCMC.} + +\item{ndraws_tail}{(numeric) number of draws for the tail. If +\code{ndraws_tail} is not specified, it will be calculated as +ceiling(3 * sqrt(length(x) / r_eff)) if length(x) > 225 and +length(x) / 5 otherwise (see Appendix H in Vehtari et al. (2022)).} + +\item{verbose}{(logical) Should diagnostic messages be printed? If +\code{TRUE}, messages related to Pareto diagnostics will be +printed. Default is \code{FALSE}.} +} +\value{ +Either a vector \code{x} of smoothed values or a named list +containing the vector \code{x} and a named list \code{diagnostics} containing Pareto smoothing +diagnostics: +\itemize{ +\item \code{khat}: estimated Pareto k shape parameter, and +optionally +\item \code{min_ss}: minimum sample size for reliable Pareto +smoothed estimate +\item \code{khat_threshold}: khat-threshold for reliable +Pareto smoothed estimates +\item \code{convergence_rate}: Relative convergence rate for Pareto smoothed estimates +} +} +\description{ +Smooth the tail draws of x by replacing tail draws by order +statistics of a generalized Pareto distribution fit to the +tail(s). For further details see Vehtari et al. (2022). +} +\examples{ +mu <- extract_variable_matrix(example_draws(), "mu") +pareto_smooth(mu) + +d <- as_draws_rvars(example_draws("multi_normal")) +pareto_smooth(d$Sigma) +} +\references{ +Aki Vehtari, Daniel Simpson, Andrew Gelman, Yuling Yao and +Jonah Gabry (2022). Pareto Smoothed Importance Sampling. +arxiv:arXiv:1507.02646 +} diff --git a/man/set_variables.Rd b/man/set_variables.Rd index fbbccc78..ba67f440 100644 --- a/man/set_variables.Rd +++ b/man/set_variables.Rd @@ -25,7 +25,7 @@ when using pipe operators. x <- as_draws(matrix(rnorm(100), ncol = 2)) variables(x) -set_variables(x, c("theta[1]", "theta[2]")) +x <- set_variables(x, c("theta[1]", "theta[2]")) variables(x) # this is equivalent to diff --git a/tests/testthat/test-pareto_smooth.R b/tests/testthat/test-pareto_smooth.R new file mode 100644 index 00000000..dff22ee1 --- /dev/null +++ b/tests/testthat/test-pareto_smooth.R @@ -0,0 +1,194 @@ +test_that("pareto_khat returns expected reasonable values", { + tau <- extract_variable_matrix(example_draws(), "tau") + + pk <- pareto_khat(tau) + expect_true(names(pk) == "khat") + +}) + +test_that("pareto_khat handles tail argument", { + + # as tau is bounded (0, Inf) the left pareto k should be lower than + # right + tau <- extract_variable_matrix(example_draws(), "tau") + pkl <- pareto_khat(tau, tail = "left") + pkr <- pareto_khat(tau, tail = "right") + pkb <- pareto_khat(tau) + expect_true(pkl$khat < pkr$khat) + expect_equal(pkr$khat, pkb$khat) +}) + +test_that("pareto_khat handles ndraws_tail argument", { + + tau <- extract_variable_matrix(example_draws(), "tau") + pk10 <- pareto_khat(tau, tail = "right", ndraws_tail = 10) + pk25 <- pareto_khat(tau, tail = "right", ndraws_tail = 25) + expect_true(pk10$khat > pk25$khat) + + expect_warning(pareto_khat(tau, tail = "both", ndraws_tail = 201), + "Number of tail draws cannot be more than half ", + "the total number of draws if both tails are fit, ", + "changing to 200.") + + expect_warning(pareto_khat(tau, tail = "both", ndraws_tail = 4), + "Number of tail draws cannot be less than 5. Changing to 5.") + +}) + + +test_that("pareto_khat handles r_eff argument", { + + tau <- extract_variable_matrix(example_draws(), "tau") + pk1 <- pareto_khat(tau, r_eff = 1) + pk0.6 <- pareto_khat(tau, r_eff = 0.6) + expect_true(pk1$khat < pk0.6$khat) + +}) + + +test_that("pareto_khat diagnostics messages are as expected", { + + diags <- list( + khat = 0.5, + min_ss = 10, + khat_threshold = 0.55, + convergence_rate = 0.99 + ) + + expect_message(pareto_k_diagmsg(diags), + paste0('To halve the RMSE, approximately 4.1 times bigger S is needed.')) + + diags$khat <- 0.6 + + expect_message(pareto_k_diagmsg(diags), + paste0('S is too small, and sample size larger than 10 is needed for reliable results.\n')) + + diags$khat <- 0.71 + diags$khat_threshold <- 0.8 + + expect_message(pareto_k_diagmsg(diags), + paste0('To halve the RMSE, approximately 4.1 times bigger S is needed.\n', 'Bias dominates RMSE, and the variance based MCSE is underestimated.\n')) + + + diags$khat <- 1.1 + + expect_message(pareto_k_diagmsg(diags), + paste0('All estimates are unreliable. If the distribution of ratios is bounded,\n', + 'further draws may improve the estimates, but it is not possible to predict\n', + 'whether any feasible sample size is sufficient.')) + +}) + +test_that("pareto_diags returns expected diagnostics", { + + tau <- extract_variable_matrix(example_draws(), "tau") + + pk <- pareto_diags(tau) + expect_true(all(names(pk) == c("khat", "min_ss", "khat_threshold", "convergence_rate"))) +}) + + +test_that("pareto_khat diagnostics handles verbose argument", { + tau <- extract_variable_matrix(example_draws(), "tau") + + expect_message(pareto_khat(tau, extra_diags = TRUE, verbose = TRUE)) + expect_message(pareto_khat(tau, extra_diags = FALSE, verbose = TRUE)) + expect_no_message(pareto_khat(tau, extra_diags = TRUE, verbose = FALSE)) + +}) + +test_that("pareto_khat diagnostics accept vectors as input", { + set.seed(1234) + x <- rnorm(1000) + + pk <- pareto_khat(x) + expect_true(pk < 1) +}) + +test_that("pareto_khat diagnostics handle special cases correctly", { + set.seed(1234) + + x <- c(rnorm(50), NA) + expect_warning(pareto_khat(x)) + expect_true(is.na(suppressWarnings(pareto_khat(x)))) + + x <- c(rnorm(50), Inf) + expect_warning(pareto_khat(x)) + expect_true(is.na(suppressWarnings(pareto_khat(x)))) + + x <- rep(1, 50) + expect_warning(pareto_khat(x)) + expect_true(is.na(suppressWarnings(pareto_khat(x)))) + +}) + + +test_that("pareto_khat functions work with matrix with chains", { + + tau_chains <- extract_variable_matrix(example_draws(), "tau") + tau_nochains <- extract_variable(example_draws(), "tau") + + expect_equal(pareto_khat(tau_chains, ndraws_tail = 20), + pareto_khat(tau_nochains, ndraws_tail = 20)) + + ps_chains <- pareto_smooth(tau_chains, ndraws_tail = 20) + ps_nochains <- pareto_smooth(tau_nochains, ndraws_tail = 20) + + expect_equal(as.numeric(ps_chains$x), as.numeric(ps_nochains$x)) + +}) + +test_that("pareto_khat functions work with rvars with and without chains", { + + tau_chains <- extract_variable_matrix(example_draws(), "tau") + tau_nochains <- extract_variable(example_draws(), "tau") + + tau_rvar_chains <- rvar(tau_chains, with_chains = TRUE) + tau_rvar_nochains <- rvar(tau_nochains) + + expect_equal(pareto_khat(tau_chains, ndraws_tail = 20), + pareto_khat(tau_rvar_chains, ndraws_tail = 20)) + + expect_equal(pareto_khat(tau_rvar_chains, ndraws_tail = 20), + pareto_khat(tau_rvar_nochains, ndraws_tail = 20)) + + + expect_equal(pareto_diags(tau_chains, ndraws_tail = 20), + pareto_diags(tau_rvar_chains, ndraws_tail = 20)) + + expect_equal(pareto_diags(tau_rvar_chains, ndraws_tail = 20), + pareto_diags(tau_rvar_nochains, ndraws_tail = 20)) + + ps_chains <- pareto_smooth(tau_chains, ndraws_tail = 20) + ps_rvar_chains <- pareto_smooth(tau_rvar_chains, ndraws_tail = 20) + + ps_nochains <- pareto_smooth(tau_nochains, ndraws_tail = 20) + ps_rvar_nochains <- pareto_smooth(tau_rvar_nochains, ndraws_tail = 20) + + expect_equal(ps_rvar_chains$x, rvar(ps_chains$x, with_chains = TRUE)) + + expect_equal(ps_rvar_nochains$x, rvar(ps_nochains$x)) + + + ps_chains <- pareto_smooth(tau_chains, ndraws_tail = 20, extra_diags = TRUE) + ps_rvar_chains <- pareto_smooth(tau_rvar_chains, ndraws_tail = 20, extra_diags = TRUE) + + ps_nochains <- pareto_smooth(tau_nochains, ndraws_tail = 20, extra_diags = TRUE) + ps_rvar_nochains <- pareto_smooth(tau_rvar_nochains, ndraws_tail = 20, extra_diags = TRUE) + + expect_equal(ps_rvar_chains$x, rvar(ps_chains$x, with_chains = TRUE)) + + expect_equal(ps_rvar_nochains$x, rvar(ps_nochains$x)) + +}) + +test_that("pareto_smooth returns x with smoothed tail", { + tau <- extract_variable_matrix(example_draws(), "tau") + + tau_smoothed <- pareto_smooth(tau, ndraws_tail = 10, tail = "right")$x + + expect_equal(sort(tau)[1:390], sort(tau_smoothed)[1:390]) + + expect_false(isTRUE(all.equal(sort(tau), sort(tau_smoothed)))) + +})