diff --git a/NAMESPACE b/NAMESPACE
index c81bc588..ace458d1 100755
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -223,6 +223,12 @@ S3method(order_draws,draws_list)
 S3method(order_draws,draws_matrix)
 S3method(order_draws,draws_rvars)
 S3method(order_draws,rvar)
+S3method(pareto_diags,default)
+S3method(pareto_diags,rvar)
+S3method(pareto_khat,default)
+S3method(pareto_khat,rvar)
+S3method(pareto_smooth,default)
+S3method(pareto_smooth,rvar)
 S3method(pillar_shaft,rvar)
 S3method(print,draws_array)
 S3method(print,draws_df)
@@ -447,6 +453,9 @@ export(ndraws)
 export(niterations)
 export(nvariables)
 export(order_draws)
+export(pareto_diags)
+export(pareto_khat)
+export(pareto_smooth)
 export(quantile2)
 export(r_scale)
 export(rdo)
diff --git a/R/gpd.R b/R/gpd.R
new file mode 100644
index 00000000..924f8c3f
--- /dev/null
+++ b/R/gpd.R
@@ -0,0 +1,81 @@
+#' Quantile function for generalized pareto
+#' @noRd
+qgeneralized_pareto <- function(p, mu = 0, sigma = 1, k = 0, lower.tail = TRUE, log.p = FALSE) {
+  stopifnot(length(mu) == 1 && length(sigma) == 1 && length(k) == 1)
+  if (is.na(sigma) || sigma <= 0) {
+    return(rep(NaN, length(p)))
+  }
+  if (log.p) {
+    p <- exp(p)
+  }
+  if (!lower.tail) {
+    p <- 1 - p
+  }
+  if (k == 0) {
+    q <-  mu - sigma * log1p(-p)
+  } else {
+    q <- mu + sigma * expm1(-k * log1p(-p)) / k
+  }
+  q
+}
+
+#' Estimate parameters of the Generalized Pareto distribution
+#'
+#' Given a sample \eqn{x}, Estimate the parameters \eqn{k} and \eqn{\sigma} of
+#' the generalized Pareto distribution (GPD), assuming the location parameter is
+#' 0. By default the fit uses a prior for \eqn{k} (this is in addition to the prior described by Zhang and Stephens, 2009), which will stabilize
+#' estimates for very small sample sizes (and low effective sample sizes in the
+#' case of MCMC samples). The weakly informative prior is a Gaussian prior
+#' centered at 0.5 (see details in Vehtari et al., 2022).
+#'
+#' @param x A numeric vector. The sample from which to estimate the parameters.
+#' @param wip Logical indicating whether to adjust \eqn{k} based on a weakly
+#'   informative Gaussian prior centered on 0.5. Defaults to `TRUE`.
+#' @param min_grid_pts The minimum number of grid points used in the fitting
+#'   algorithm. The actual number used is `min_grid_pts + floor(sqrt(length(x)))`.
+#' @param sort_x If `TRUE` (the default), the first step in the fitting
+#'   algorithm is to sort the elements of `x`. If `x` is already
+#'   sorted in ascending order then `sort_x` can be set to `FALSE` to
+#'   skip the initial sorting step.
+#' @return A named list with components `k` and `sigma`.
+#'
+#' @details Here the parameter \eqn{k} is the negative of \eqn{k} in Zhang &
+#'   Stephens (2009).
+#'
+#'
+#' @references
+#' Zhang, J., and Stephens, M. A. (2009). A new and efficient estimation method
+#' for the generalized Pareto distribution. *Technometrics* **51**, 316-325.
+#'
+#' @noRd
+gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
+  # see section 4 of Zhang and Stephens (2009)
+  if (sort_x) {
+    x <- sort.int(x)
+  }
+  N <- length(x)
+  prior <- 3
+  M <- min_grid_pts + floor(sqrt(N))
+  jj <- seq_len(M)
+  xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample
+  theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar
+  k <- matrixStats::rowMeans2(log1p(-theta %o% x))
+  l_theta <- N * (log(-theta / k) - k - 1) # profile log-lik
+  w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize
+  theta_hat <- sum(theta * w_theta)
+  k_hat <- mean.default(log1p(-theta_hat * x))
+  sigma_hat <- -k_hat / theta_hat
+
+  # adjust k_hat based on weakly informative prior, Gaussian centered on 0.5.
+  # this stabilizes estimates for very small Monte Carlo sample sizes and low neff (see Vehtari et al., 2022 for details)
+  if (wip) {
+    k_hat <- (k_hat * N + 0.5 * 10) / (N + 10)
+  }
+
+  if (is.na(k_hat)) {
+    k_hat <- Inf
+    sigma_hat <- NaN
+  }
+
+  list(k = k_hat, sigma = sigma_hat)
+}
diff --git a/R/pareto_smooth.R b/R/pareto_smooth.R
new file mode 100644
index 00000000..228aede9
--- /dev/null
+++ b/R/pareto_smooth.R
@@ -0,0 +1,534 @@
+#' Pareto khat diagnostic
+#'
+#' Estimate Pareto k value by fitting a Generalized Pareto
+#' Distribution to one or two tails of x. This can be used to estimate
+#' the number of fractional moments that is useful for convergence
+#' diagnostics. For further details see Vehtari et al. (2022).
+#'
+#' @template args-pareto
+#' @template args-methods-dots
+#' @template ref-vehtari-paretosmooth-2022
+#' @return `khat` estimated Generalized Pareto Distribution shape parameter k
+#' @examples
+#' mu <- extract_variable_matrix(example_draws(), "mu")
+#' pareto_khat(mu)
+#'
+#' d <- as_draws_rvars(example_draws("multi_normal"))
+#' pareto_khat(d$Sigma)
+#' @export
+pareto_khat <- function(x, ...) UseMethod("pareto_khat")
+
+#' @rdname pareto_khat
+#' @export
+pareto_khat.default <- function(x,
+                                tail = c("both", "right", "left"),
+                                r_eff = NULL,
+                                ndraws_tail = NULL,
+                                verbose = FALSE,
+                                ...) {
+  smoothed <- pareto_smooth.default(
+    x,
+    tail = tail,
+    r_eff = r_eff,
+    ndraws_tail = ndraws_tail,
+    verbose = verbose,
+    return_k = TRUE,
+    smooth_draws = FALSE,
+    ...)
+  return(smoothed$diagnostics)
+}
+
+#' @rdname pareto_khat
+#' @export
+pareto_khat.rvar <- function(x, ...) {
+  draws_diags <- summarise_rvar_by_element_with_chains(
+    x,
+    pareto_smooth.default,
+    return_k = TRUE,
+    smooth_draws = FALSE,
+    ...
+  )
+  dim(draws_diags) <- dim(draws_diags) %||% length(draws_diags)
+  margins <- seq_along(dim(draws_diags))
+
+  diags <- list(
+    khat = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat)
+  )
+
+  diags
+}
+
+
+#' Pareto smoothing diagnostics
+#'
+#' Compute diagnostics for Pareto smoothing the tail draws of x by
+#' replacing tail draws by order statistics of a generalized Pareto
+#' distribution fit to the tail(s).
+#'
+#' @template args-pareto
+#' @template args-methods-dots
+#' @template ref-vehtari-paretosmooth-2022
+#' @return List of Pareto smoothing diagnostics:
+#'  * `khat`: estimated Pareto k shape parameter,
+#'  * `min_ss`: minimum sample size for reliable Pareto smoothed estimate,
+#'  * `khat_threshold`: khat-threshold for reliable Pareto smoothed estimate,
+#'  * `convergence_rate`: Pareto smoothed estimate RMSE convergence rate.
+#'
+#' @details When the fitted Generalized Pareto Distribution is used to
+#'   smooth the tail values and these smoothed values are used to
+#'   compute expectations, the following diagnostics can give further
+#'   information about the reliability of these estimates.
+#'
+#'  * `min_ss`: Minimum sample size for reliable Pareto smoothed
+#' estimate. If the actual sample size is greater than `min_ss`, then
+#' Pareto smoothed estimates can be considered reliable. If the actual
+#' sample size is lower than `min_ss`, increasing the sample size
+#' might result in more reliable estimates. For further details, see
+#' Section 3.2.3, Equation 11 in Vehtari et al. (2022).
+#'
+#'  * `khat_threshold`: Threshold below which k-hat values result in
+#' reliable Pareto smoothed estimates. The threshold is lower for
+#' smaller effective sample sizes. If k-hat is larger than the
+#' threshold, increasing the total sample size may improve reliability
+#' of estimates. For further details, see Section 3.2.4, Equation 13
+#' in Vehtari et al. (2022).
+#'
+#'  * `convergence_rate`: Relative convergence rate compared to the
+#'  central limit theorem. Applicable only if the actual sample size
+#'  is sufficiently large (greater than `min_ss`). The convergence
+#'  rate tells the rate at which the variance of an estimate reduces
+#'  when the sample size is increased, compared to the central limit
+#'  theorem convergence rate. See Appendix B in Vehtari et al. (2022).
+#'
+#' @examples
+#' mu <- extract_variable_matrix(example_draws(), "mu")
+#' pareto_diags(mu)
+#'
+#' d <- as_draws_rvars(example_draws("multi_normal"))
+#' pareto_diags(d$Sigma)
+#' @export
+pareto_diags <- function(x, ...) UseMethod("pareto_diags")
+
+
+#' @rdname pareto_diags
+#' @export
+pareto_diags.default <- function(x,
+                                  tail = c("both", "right", "left"),
+                                  r_eff = NULL,
+                                  ndraws_tail = NULL,
+                                  verbose = FALSE,
+                                  ...) {
+
+  smoothed <- pareto_smooth.default(
+    x,
+    tail = tail,
+    r_eff = r_eff,
+    ndraws_tail = ndraws_tail,
+    return_k = TRUE,
+    extra_diags = TRUE,
+    verbose = verbose,
+    smooth_draws = FALSE,
+    ...)
+
+  return(smoothed$diagnostics)
+
+}
+
+
+#' @rdname pareto_diags
+#' @export
+pareto_diags.rvar <- function(x, ...) {
+  draws_diags <- summarise_rvar_by_element_with_chains(
+    x,
+    pareto_smooth.default,
+    return_k = TRUE,
+    smooth_draws = FALSE,
+    extra_diags = TRUE,
+    ...
+  )
+
+  dim(draws_diags) <- dim(draws_diags) %||% length(draws_diags)
+  margins <- seq_along(dim(draws_diags))
+
+  diags <- list(
+    khat = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat),
+    min_ss = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$min_ss),
+    khat_threshold = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat_threshold),
+    convergence_rate = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$convergence_rate)
+  )
+
+  diags
+}
+
+
+
+#' Pareto smoothing
+#'
+#' Smooth the tail draws of x by replacing tail draws by order
+#' statistics of a generalized Pareto distribution fit to the
+#' tail(s). For further details see Vehtari et al. (2022).
+#'
+#' @template args-pareto
+#' @param return_k (logical) Should the Pareto khat be included in
+#'   output? If `TRUE`, output will be a list containing of smoothed
+#'   draws and diagnostics. Default is `TRUE`.
+#' @param extra_diags (logical) Should extra Pareto khat diagnostics
+#'   be included in output? If `TRUE`, `min_ss`, `khat_threshold` and
+#'   `convergence_rate` for the estimated k value will be
+#'   returned. Default is `FALSE`.
+#' @template args-methods-dots
+#' @template ref-vehtari-paretosmooth-2022
+#' @return Either a vector `x` of smoothed values or a named list
+#'   containing the vector `x` and a named list `diagnostics` containing Pareto smoothing
+#'   diagnostics:
+#' * `khat`: estimated Pareto k shape parameter, and
+#'   optionally
+#' * `min_ss`: minimum sample size for reliable Pareto
+#'   smoothed estimate
+#' * `khat_threshold`: khat-threshold for reliable
+#'   Pareto smoothed estimates
+#' * `convergence_rate`: Relative convergence rate for Pareto smoothed estimates
+#'
+#' @examples
+#' mu <- extract_variable_matrix(example_draws(), "mu")
+#' pareto_smooth(mu)
+#'
+#' d <- as_draws_rvars(example_draws("multi_normal"))
+#' pareto_smooth(d$Sigma)
+#' @export
+pareto_smooth <- function(x, ...) UseMethod("pareto_smooth")
+
+#' @rdname pareto_smooth
+#' @export
+pareto_smooth.rvar <- function(x, return_k = TRUE, extra_diags = FALSE, ...) {
+
+  if (extra_diags) {
+    return_k <- TRUE
+  }
+
+  draws_diags <- summarise_rvar_by_element_with_chains(x, pareto_smooth.default, return_k = return_k, extra_diags = extra_diags, ...)
+  dim(draws_diags) <- dim(draws_diags) %||% length(draws_diags)
+  margins <- seq_along(dim(draws_diags))
+
+  if (return_k) {
+    if (extra_diags) {
+
+      diags <- list(
+        khat = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat),
+        min_ss = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$min_ss),
+        khat_threshold = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat_threshold),
+        convergence_rate = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$convergence_rate)
+      )
+    } else {
+      diags <- list(
+        khat = apply(draws_diags, margins, function(x) x[[1]]$diagnostics$khat)
+      )
+    }
+    out <- list(
+    x = rvar(apply(draws_diags, margins, function(x) x[[1]]$x), nchains = nchains(x)),
+    diagnostics = diags
+    )
+  } else {
+    out <- rvar(apply(draws_diags, margins, function(x) x[[1]]), nchains = nchains(x))
+  }
+  out
+}
+
+#' @rdname pareto_smooth
+#' @export
+pareto_smooth.default <- function(x,
+                                  tail = c("both", "right", "left"),
+                                  r_eff = NULL,
+                                  ndraws_tail = NULL,
+                                  return_k = TRUE,
+                                  extra_diags = FALSE,
+                                  verbose = FALSE,
+                                  ...) {
+
+  checkmate::assert_number(ndraws_tail, null.ok = TRUE)
+  checkmate::assert_number(r_eff, null.ok = TRUE)
+  checkmate::assert_logical(extra_diags)
+  checkmate::assert_logical(return_k)
+  checkmate::assert_logical(verbose)
+
+  # check for infinite or na values
+  if (should_return_NA(x)) {
+    warning_no_call("Input contains infinite or NA values, Pareto smoothing not performed.")
+    return(list(x = x, diagnostics = NA_real_))
+  }
+
+  tail <- match.arg(tail)
+  S <- length(x)
+
+  # automatically calculate relative efficiency
+  if (is.null(r_eff)) {
+    r_eff <- ess_tail(x) / S
+  }
+
+  # automatically calculate tail length
+  if (is.null(ndraws_tail)) {
+    ndraws_tail <- ps_tail_length(S, r_eff)
+  }
+
+  if (tail == "both") {
+
+    if (ndraws_tail > S / 2) {
+      warning("Number of tail draws cannot be more than half ",
+              "the total number of draws if both tails are fit, ",
+              "changing to ", S / 2, ".")
+      ndraws_tail <- S / 2
+    }
+
+    if (ndraws_tail < 5) {
+      warning("Number of tail draws cannot be less than 5. ",
+              "Changing to ", 5, ".")
+      ndraws_tail <- 5
+    }
+
+    # left tail
+    smoothed <- .pareto_smooth_tail(
+      x,
+      ndraws_tail = ndraws_tail,
+      tail = "left",
+      ...
+    )
+    left_k <- smoothed$k
+
+    # right tail
+    smoothed <-.pareto_smooth_tail(
+      x = smoothed$x,
+      ndraws_tail = ndraws_tail,
+      tail = "right",
+      ...
+    )
+    right_k <- smoothed$k
+
+    k <- max(left_k, right_k)
+    x <- smoothed$x
+  } else {
+
+    smoothed <- .pareto_smooth_tail(
+      x,
+      ndraws_tail = ndraws_tail,
+      tail = tail,
+      ...
+    )
+    k <- smoothed$k
+    x <- smoothed$x
+  }
+
+  diags_list <- list(khat = k)
+
+  if (extra_diags) {
+    ext_diags <- .pareto_smooth_extra_diags(k, S)
+    diags_list <- c(diags_list, ext_diags)
+  }
+
+  if (verbose) {
+    if (!extra_diags) {
+      diags_list <- .pareto_smooth_extra_diags(diags_list$khat, length(x))
+    }
+    pareto_k_diagmsg(
+      diags = diags_list
+    )
+  }
+
+  if (return_k) {
+    out <- list(x = x, diagnostics = diags_list)
+  } else {
+    out <- x
+  }
+
+  return(out)
+}
+
+#' Pareto smooth tail
+#' internal function to pareto smooth the tail of a vector
+#' @noRd
+.pareto_smooth_tail <- function(x,
+                                ndraws_tail,
+                                smooth_draws = TRUE,
+                                tail = c("right", "left"),
+                                ...
+                                ) {
+
+  tail <- match.arg(tail)
+
+  S <- length(x)
+  tail_ids <- seq(S - ndraws_tail + 1, S)
+
+
+  if (tail == "left") {
+    x <- -x
+  }
+
+  ord <- sort.int(x, index.return = TRUE)
+  draws_tail <- ord$x[tail_ids]
+  cutoff <- ord$x[min(tail_ids) - 1] # largest value smaller than tail values
+
+  max_tail <- max(draws_tail)
+  min_tail <- min(draws_tail)
+
+  if (ndraws_tail >= 5) {
+    ord <- sort.int(x, index.return = TRUE)
+    if (abs(max_tail - min_tail) < .Machine$double.eps / 100) {
+      warning_no_call(
+        "Can't fit generalized Pareto distribution ",
+        "because all tail values are the same."
+      )
+      smoothed <- NULL
+      k <- NA
+    } else {
+      # save time not sorting since x already sorted
+      fit <- gpdfit(draws_tail - cutoff, sort_x = FALSE)
+      k <- fit$k
+      sigma <- fit$sigma
+      if (is.finite(k) && smooth_draws) {
+        p <- (seq_len(ndraws_tail) - 0.5) / ndraws_tail
+        smoothed <- qgeneralized_pareto(p = p, mu = cutoff, k = k, sigma = sigma)
+      } else {
+        smoothed <- NULL
+      }
+    }
+  } else {
+    warning_no_call(
+      "Can't fit generalized Pareto distribution ",
+      "because ndraws_tail is less than 5."
+    )
+    smoothed <- NULL
+    k <- NA
+  }
+
+  # truncate at max of raw draws
+  if (!is.null(smoothed)) {
+    smoothed[smoothed > max_tail] <- max_tail
+    x[ord$ix[tail_ids]] <- smoothed
+  }
+
+  if (tail == "left") {
+    x <- -x
+  }
+
+  out <- list(x = x, k = k)
+
+  return(out)
+}
+
+#' Extra pareto-k diagnostics
+#'
+#' internal function to calculate the extra diagnostics for a given
+#' pareto k and sample size S
+#' @noRd
+.pareto_smooth_extra_diags <- function(k, S, ...) {
+
+  min_ss <- ps_min_ss(k)
+
+  khat_threshold <- ps_khat_threshold(S)
+
+  convergence_rate <- ps_convergence_rate(k, S)
+
+  other_diags <- list(
+    min_ss = min_ss,
+    khat_threshold = khat_threshold,
+    convergence_rate = convergence_rate
+  )
+}
+
+#' Pareto-smoothing minimum sample-size
+#'
+#' Given Pareto-k computes the minimum sample size for reliable Pareto
+#' smoothed estimate (to have small probability of large error)
+#' Equation (11) in PSIS paper
+#' @param k pareto k value
+#' @param ... unused
+#' @return minimum sample size
+#' @noRd
+ps_min_ss <- function(k, ...) {
+  if (k < 1) {
+   out <- 10^(1 / (1 - max(0, k)))
+  } else {
+   out <- Inf
+  }
+   out
+}
+
+
+#' Pareto-smoothing k-hat threshold
+#'
+#' Given sample size S computes khat threshold for reliable Pareto
+#' smoothed estimate (to have small probability of large error). See
+#' section 3.2.4, equation (13).
+#' @param S sample size
+#' @param ... unused
+#' @return threshold
+#' @noRd
+ps_khat_threshold <- function(S, ...) {
+  1 - 1 / log10(S)
+}
+
+#' Pareto-smoothing convergence rate
+#'
+#' Given S and scalar or array of k's, compute the relative
+#' convergence rate of PSIS estimate RMSE
+#' @param k pareto-k values
+#' @param S sample size
+#' @param ... unused
+#' @return convergence rate
+#' @noRd
+ps_convergence_rate <- function(k, S, ...) {
+  # allow array of k's
+  rate <- numeric(length(k))
+  # k<0 bounded distribution
+  rate[k < 0] <- 1
+  # k>0 non-finite mean
+  rate[k > 1] <- 0
+  # limit value at k=1/2
+  rate[k == 0.5] <- 1 - 1 / log(S)
+  # smooth approximation for the rest (see Appendix of PSIS paper)
+  ki <- (k > 0 & k < 1 & k != 0.5)
+  kk <- k[ki]
+  rate[ki] <- pmax(
+    0,
+    (2 * (kk - 1) * S^(2 * kk + 1) + (1 - 2 * kk) * S^(2 * kk) + S^2) /
+      ((S - 1) * (S - S^(2 * kk)))
+  )
+  rate
+}
+
+#' Calculate the tail length from S and r_eff
+#' Appendix H in PSIS paper
+#' @noRd
+ps_tail_length <- function(S, r_eff, ...) {
+  ifelse(S > 225, ceiling(3 * sqrt(S / r_eff)), S / 5)
+}
+
+#' Pareto-k diagnostic message
+#'
+#' Given S and scalar and k, form a diagnostic message string
+#' @param diags (numeric) named vector of diagnostic values
+#' @param ... unused
+#' @return diagnostic message
+#' @noRd
+pareto_k_diagmsg <- function(diags, ...) {
+  khat <- diags$khat
+  min_ss <- diags$min_ss
+  khat_threshold <- diags$khat_threshold
+  convergence_rate <- diags$convergence_rate
+  msg <- NULL
+  if (khat > 1) {
+    msg <- paste0(msg,'All estimates are unreliable. If the distribution of ratios is bounded,\n',
+                  'further draws may improve the estimates, but it is not possible to predict\n',
+                  'whether any feasible sample size is sufficient.')
+  } else {
+    if (khat > khat_threshold) {
+      msg <- paste0(msg, 'S is too small, and sample size larger than ', round(min_ss, 0), ' is needed for reliable results.\n')
+    } else {
+      msg <- paste0(msg, 'To halve the RMSE, approximately ', round(2^(2/convergence_rate),1), ' times bigger S is needed.\n')
+    }
+    if (khat > 0.7) {
+      msg <- paste0(msg, 'Bias dominates RMSE, and the variance based MCSE is underestimated.\n')
+    }
+  }
+  message(msg)
+  invisible(diags)
+}
diff --git a/man-roxygen/args-pareto.R b/man-roxygen/args-pareto.R
new file mode 100644
index 00000000..a406dbde
--- /dev/null
+++ b/man-roxygen/args-pareto.R
@@ -0,0 +1,20 @@
+#' @param x (multiple options) One of:
+#'  - A matrix of draws for a single variable (iterations x chains). See
+#'    [extract_variable_matrix()].
+#'  - An [`rvar`].
+#' @param tail (string) The tail to diagnose/smooth:
+#'   * `"right"`: diagnose/smooth only the right (upper) tail
+#'   * `"left"`: diagnose/smooth only the left (lower) tail
+#'   * `"both"`: diagnose/smooth both tails and return the maximum k-hat value
+#'
+#' The default is `"both"`.
+#' @param ndraws_tail (numeric) number of draws for the tail. If
+#'   `ndraws_tail` is not specified, it will be calculated as
+#'   ceiling(3 * sqrt(length(x) / r_eff)) if length(x) > 225 and
+#'   length(x) / 5 otherwise (see Appendix H in Vehtari et al. (2022)).
+#' @param r_eff (numeric) relative effective sample size estimate. If
+#'   `r_eff` is omitted, it will be calculated assuming the draws are
+#'   from MCMC.
+#' @param verbose (logical) Should diagnostic messages be printed? If
+#'   `TRUE`, messages related to Pareto diagnostics will be
+#'   printed. Default is `FALSE`.
diff --git a/man-roxygen/ref-vehtari-paretosmooth-2022.R b/man-roxygen/ref-vehtari-paretosmooth-2022.R
new file mode 100644
index 00000000..267ec29a
--- /dev/null
+++ b/man-roxygen/ref-vehtari-paretosmooth-2022.R
@@ -0,0 +1,4 @@
+#' @references
+#' Aki Vehtari, Daniel Simpson, Andrew Gelman, Yuling Yao and
+#' Jonah Gabry (2022). Pareto Smoothed Importance Sampling.
+#' arxiv:arXiv:1507.02646
diff --git a/man/pareto_diags.Rd b/man/pareto_diags.Rd
new file mode 100644
index 00000000..c2558a9a
--- /dev/null
+++ b/man/pareto_diags.Rd
@@ -0,0 +1,105 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pareto_smooth.R
+\name{pareto_diags}
+\alias{pareto_diags}
+\alias{pareto_diags.default}
+\alias{pareto_diags.rvar}
+\title{Pareto smoothing diagnostics}
+\usage{
+pareto_diags(x, ...)
+
+\method{pareto_diags}{default}(
+  x,
+  tail = c("both", "right", "left"),
+  r_eff = NULL,
+  ndraws_tail = NULL,
+  verbose = FALSE,
+  ...
+)
+
+\method{pareto_diags}{rvar}(x, ...)
+}
+\arguments{
+\item{x}{(multiple options) One of:
+\itemize{
+\item A matrix of draws for a single variable (iterations x chains). See
+\code{\link[=extract_variable_matrix]{extract_variable_matrix()}}.
+\item An \code{\link{rvar}}.
+}}
+
+\item{...}{Arguments passed to individual methods (if applicable).}
+
+\item{tail}{(string) The tail to diagnose/smooth:
+\itemize{
+\item \code{"right"}: diagnose/smooth only the right (upper) tail
+\item \code{"left"}: diagnose/smooth only the left (lower) tail
+\item \code{"both"}: diagnose/smooth both tails and return the maximum k-hat value
+}
+
+The default is \code{"both"}.}
+
+\item{r_eff}{(numeric) relative effective sample size estimate. If
+\code{r_eff} is omitted, it will be calculated assuming the draws are
+from MCMC.}
+
+\item{ndraws_tail}{(numeric) number of draws for the tail. If
+\code{ndraws_tail} is not specified, it will be calculated as
+ceiling(3 * sqrt(length(x) / r_eff)) if length(x) > 225 and
+length(x) / 5 otherwise (see Appendix H in Vehtari et al. (2022)).}
+
+\item{verbose}{(logical) Should diagnostic messages be printed? If
+\code{TRUE}, messages related to Pareto diagnostics will be
+printed. Default is \code{FALSE}.}
+}
+\value{
+List of Pareto smoothing diagnostics:
+\itemize{
+\item \code{khat}: estimated Pareto k shape parameter,
+\item \code{min_ss}: minimum sample size for reliable Pareto smoothed estimate,
+\item \code{khat_threshold}: khat-threshold for reliable Pareto smoothed estimate,
+\item \code{convergence_rate}: Pareto smoothed estimate RMSE convergence rate.
+}
+}
+\description{
+Compute diagnostics for Pareto smoothing the tail draws of x by
+replacing tail draws by order statistics of a generalized Pareto
+distribution fit to the tail(s).
+}
+\details{
+When the fitted Generalized Pareto Distribution is used to
+smooth the tail values and these smoothed values are used to
+compute expectations, the following diagnostics can give further
+information about the reliability of these estimates.
+\itemize{
+\item \code{min_ss}: Minimum sample size for reliable Pareto smoothed
+estimate. If the actual sample size is greater than \code{min_ss}, then
+Pareto smoothed estimates can be considered reliable. If the actual
+sample size is lower than \code{min_ss}, increasing the sample size
+might result in more reliable estimates. For further details, see
+Section 3.2.3, Equation 11 in Vehtari et al. (2022).
+\item \code{khat_threshold}: Threshold below which k-hat values result in
+reliable Pareto smoothed estimates. The threshold is lower for
+smaller effective sample sizes. If k-hat is larger than the
+threshold, increasing the total sample size may improve reliability
+of estimates. For further details, see Section 3.2.4, Equation 13
+in Vehtari et al. (2022).
+\item \code{convergence_rate}: Relative convergence rate compared to the
+central limit theorem. Applicable only if the actual sample size
+is sufficiently large (greater than \code{min_ss}). The convergence
+rate tells the rate at which the variance of an estimate reduces
+when the sample size is increased, compared to the central limit
+theorem convergence rate. See Appendix B in Vehtari et al. (2022).
+}
+}
+\examples{
+mu <- extract_variable_matrix(example_draws(), "mu")
+pareto_diags(mu)
+
+d <- as_draws_rvars(example_draws("multi_normal"))
+pareto_diags(d$Sigma)
+}
+\references{
+Aki Vehtari, Daniel Simpson, Andrew Gelman, Yuling Yao and
+Jonah Gabry (2022). Pareto Smoothed Importance Sampling.
+arxiv:arXiv:1507.02646
+}
diff --git a/man/pareto_khat.Rd b/man/pareto_khat.Rd
new file mode 100644
index 00000000..c3383eb4
--- /dev/null
+++ b/man/pareto_khat.Rd
@@ -0,0 +1,74 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pareto_smooth.R
+\name{pareto_khat}
+\alias{pareto_khat}
+\alias{pareto_khat.default}
+\alias{pareto_khat.rvar}
+\title{Pareto khat diagnostic}
+\usage{
+pareto_khat(x, ...)
+
+\method{pareto_khat}{default}(
+  x,
+  tail = c("both", "right", "left"),
+  r_eff = NULL,
+  ndraws_tail = NULL,
+  verbose = FALSE,
+  ...
+)
+
+\method{pareto_khat}{rvar}(x, ...)
+}
+\arguments{
+\item{x}{(multiple options) One of:
+\itemize{
+\item A matrix of draws for a single variable (iterations x chains). See
+\code{\link[=extract_variable_matrix]{extract_variable_matrix()}}.
+\item An \code{\link{rvar}}.
+}}
+
+\item{...}{Arguments passed to individual methods (if applicable).}
+
+\item{tail}{(string) The tail to diagnose/smooth:
+\itemize{
+\item \code{"right"}: diagnose/smooth only the right (upper) tail
+\item \code{"left"}: diagnose/smooth only the left (lower) tail
+\item \code{"both"}: diagnose/smooth both tails and return the maximum k-hat value
+}
+
+The default is \code{"both"}.}
+
+\item{r_eff}{(numeric) relative effective sample size estimate. If
+\code{r_eff} is omitted, it will be calculated assuming the draws are
+from MCMC.}
+
+\item{ndraws_tail}{(numeric) number of draws for the tail. If
+\code{ndraws_tail} is not specified, it will be calculated as
+ceiling(3 * sqrt(length(x) / r_eff)) if length(x) > 225 and
+length(x) / 5 otherwise (see Appendix H in Vehtari et al. (2022)).}
+
+\item{verbose}{(logical) Should diagnostic messages be printed? If
+\code{TRUE}, messages related to Pareto diagnostics will be
+printed. Default is \code{FALSE}.}
+}
+\value{
+\code{khat} estimated Generalized Pareto Distribution shape parameter k
+}
+\description{
+Estimate Pareto k value by fitting a Generalized Pareto
+Distribution to one or two tails of x. This can be used to estimate
+the number of fractional moments that is useful for convergence
+diagnostics. For further details see Vehtari et al. (2022).
+}
+\examples{
+mu <- extract_variable_matrix(example_draws(), "mu")
+pareto_khat(mu)
+
+d <- as_draws_rvars(example_draws("multi_normal"))
+pareto_khat(d$Sigma)
+}
+\references{
+Aki Vehtari, Daniel Simpson, Andrew Gelman, Yuling Yao and
+Jonah Gabry (2022). Pareto Smoothed Importance Sampling.
+arxiv:arXiv:1507.02646
+}
diff --git a/man/pareto_smooth.Rd b/man/pareto_smooth.Rd
new file mode 100644
index 00000000..259421de
--- /dev/null
+++ b/man/pareto_smooth.Rd
@@ -0,0 +1,95 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pareto_smooth.R
+\name{pareto_smooth}
+\alias{pareto_smooth}
+\alias{pareto_smooth.rvar}
+\alias{pareto_smooth.default}
+\title{Pareto smoothing}
+\usage{
+pareto_smooth(x, ...)
+
+\method{pareto_smooth}{rvar}(x, return_k = TRUE, extra_diags = FALSE, ...)
+
+\method{pareto_smooth}{default}(
+  x,
+  tail = c("both", "right", "left"),
+  r_eff = NULL,
+  ndraws_tail = NULL,
+  return_k = TRUE,
+  extra_diags = FALSE,
+  verbose = FALSE,
+  ...
+)
+}
+\arguments{
+\item{x}{(multiple options) One of:
+\itemize{
+\item A matrix of draws for a single variable (iterations x chains). See
+\code{\link[=extract_variable_matrix]{extract_variable_matrix()}}.
+\item An \code{\link{rvar}}.
+}}
+
+\item{...}{Arguments passed to individual methods (if applicable).}
+
+\item{return_k}{(logical) Should the Pareto khat be included in
+output? If \code{TRUE}, output will be a list containing of smoothed
+draws and diagnostics. Default is \code{TRUE}.}
+
+\item{extra_diags}{(logical) Should extra Pareto khat diagnostics
+be included in output? If \code{TRUE}, \code{min_ss}, \code{khat_threshold} and
+\code{convergence_rate} for the estimated k value will be
+returned. Default is \code{FALSE}.}
+
+\item{tail}{(string) The tail to diagnose/smooth:
+\itemize{
+\item \code{"right"}: diagnose/smooth only the right (upper) tail
+\item \code{"left"}: diagnose/smooth only the left (lower) tail
+\item \code{"both"}: diagnose/smooth both tails and return the maximum k-hat value
+}
+
+The default is \code{"both"}.}
+
+\item{r_eff}{(numeric) relative effective sample size estimate. If
+\code{r_eff} is omitted, it will be calculated assuming the draws are
+from MCMC.}
+
+\item{ndraws_tail}{(numeric) number of draws for the tail. If
+\code{ndraws_tail} is not specified, it will be calculated as
+ceiling(3 * sqrt(length(x) / r_eff)) if length(x) > 225 and
+length(x) / 5 otherwise (see Appendix H in Vehtari et al. (2022)).}
+
+\item{verbose}{(logical) Should diagnostic messages be printed? If
+\code{TRUE}, messages related to Pareto diagnostics will be
+printed. Default is \code{FALSE}.}
+}
+\value{
+Either a vector \code{x} of smoothed values or a named list
+containing the vector \code{x} and a named list \code{diagnostics} containing Pareto smoothing
+diagnostics:
+\itemize{
+\item \code{khat}: estimated Pareto k shape parameter, and
+optionally
+\item \code{min_ss}: minimum sample size for reliable Pareto
+smoothed estimate
+\item \code{khat_threshold}: khat-threshold for reliable
+Pareto smoothed estimates
+\item \code{convergence_rate}: Relative convergence rate for Pareto smoothed estimates
+}
+}
+\description{
+Smooth the tail draws of x by replacing tail draws by order
+statistics of a generalized Pareto distribution fit to the
+tail(s). For further details see Vehtari et al. (2022).
+}
+\examples{
+mu <- extract_variable_matrix(example_draws(), "mu")
+pareto_smooth(mu)
+
+d <- as_draws_rvars(example_draws("multi_normal"))
+pareto_smooth(d$Sigma)
+}
+\references{
+Aki Vehtari, Daniel Simpson, Andrew Gelman, Yuling Yao and
+Jonah Gabry (2022). Pareto Smoothed Importance Sampling.
+arxiv:arXiv:1507.02646
+}
diff --git a/man/set_variables.Rd b/man/set_variables.Rd
index fbbccc78..ba67f440 100644
--- a/man/set_variables.Rd
+++ b/man/set_variables.Rd
@@ -25,7 +25,7 @@ when using pipe operators.
 x <- as_draws(matrix(rnorm(100), ncol = 2))
 variables(x)
 
-set_variables(x, c("theta[1]", "theta[2]"))
+x <- set_variables(x, c("theta[1]", "theta[2]"))
 variables(x)
 
 # this is equivalent to
diff --git a/tests/testthat/test-pareto_smooth.R b/tests/testthat/test-pareto_smooth.R
new file mode 100644
index 00000000..dff22ee1
--- /dev/null
+++ b/tests/testthat/test-pareto_smooth.R
@@ -0,0 +1,194 @@
+test_that("pareto_khat returns expected reasonable values", {
+  tau <- extract_variable_matrix(example_draws(), "tau")
+
+  pk <- pareto_khat(tau)
+  expect_true(names(pk) == "khat")
+
+})
+
+test_that("pareto_khat handles tail argument", {
+
+  # as tau is bounded (0, Inf) the left pareto k should be lower than
+  # right
+  tau <- extract_variable_matrix(example_draws(), "tau")
+  pkl <- pareto_khat(tau, tail = "left")
+  pkr <- pareto_khat(tau, tail = "right")
+  pkb <- pareto_khat(tau)
+  expect_true(pkl$khat < pkr$khat)
+  expect_equal(pkr$khat, pkb$khat)
+})
+
+test_that("pareto_khat handles ndraws_tail argument", {
+
+  tau <- extract_variable_matrix(example_draws(), "tau")
+  pk10 <- pareto_khat(tau, tail = "right", ndraws_tail = 10)
+  pk25 <- pareto_khat(tau, tail = "right", ndraws_tail = 25)
+  expect_true(pk10$khat > pk25$khat)
+
+  expect_warning(pareto_khat(tau, tail = "both", ndraws_tail = 201),
+                 "Number of tail draws cannot be more than half ",
+                 "the total number of draws if both tails are fit, ",
+                 "changing to 200.")
+
+  expect_warning(pareto_khat(tau, tail = "both", ndraws_tail = 4),
+                 "Number of tail draws cannot be less than 5. Changing to 5.")
+
+})
+
+
+test_that("pareto_khat handles r_eff argument", {
+
+  tau <- extract_variable_matrix(example_draws(), "tau")
+  pk1 <- pareto_khat(tau, r_eff = 1)
+  pk0.6 <- pareto_khat(tau, r_eff = 0.6)
+  expect_true(pk1$khat < pk0.6$khat)
+
+})
+
+
+test_that("pareto_khat diagnostics messages are as expected", {
+
+  diags <- list(
+    khat = 0.5,
+    min_ss = 10,
+    khat_threshold = 0.55,
+    convergence_rate = 0.99
+  )
+
+  expect_message(pareto_k_diagmsg(diags),
+                   paste0('To halve the RMSE, approximately 4.1 times bigger S is needed.'))
+
+  diags$khat <- 0.6
+
+    expect_message(pareto_k_diagmsg(diags),
+                   paste0('S is too small, and sample size larger than 10 is needed for reliable results.\n'))
+
+    diags$khat <- 0.71
+    diags$khat_threshold <- 0.8
+
+    expect_message(pareto_k_diagmsg(diags),
+                   paste0('To halve the RMSE, approximately 4.1 times bigger S is needed.\n', 'Bias dominates RMSE, and the variance based MCSE is underestimated.\n'))
+
+
+    diags$khat <- 1.1
+
+    expect_message(pareto_k_diagmsg(diags),
+                   paste0('All estimates are unreliable. If the distribution of ratios is bounded,\n',
+                  'further draws may improve the estimates, but it is not possible to predict\n',
+                  'whether any feasible sample size is sufficient.'))
+
+})
+
+test_that("pareto_diags returns expected diagnostics", {
+
+  tau <- extract_variable_matrix(example_draws(), "tau")
+
+  pk <- pareto_diags(tau)
+  expect_true(all(names(pk) == c("khat", "min_ss", "khat_threshold", "convergence_rate")))
+})
+
+
+test_that("pareto_khat diagnostics handles verbose argument", {
+  tau <- extract_variable_matrix(example_draws(), "tau")
+
+  expect_message(pareto_khat(tau, extra_diags = TRUE, verbose = TRUE))
+  expect_message(pareto_khat(tau, extra_diags = FALSE, verbose = TRUE))
+  expect_no_message(pareto_khat(tau, extra_diags = TRUE, verbose = FALSE))
+
+})
+
+test_that("pareto_khat diagnostics accept vectors as input", {
+  set.seed(1234)
+  x <- rnorm(1000)
+
+  pk <- pareto_khat(x)
+  expect_true(pk < 1)
+})
+
+test_that("pareto_khat diagnostics handle special cases correctly", {
+  set.seed(1234)
+
+  x <- c(rnorm(50), NA)
+  expect_warning(pareto_khat(x))
+  expect_true(is.na(suppressWarnings(pareto_khat(x))))
+
+  x <- c(rnorm(50), Inf)
+  expect_warning(pareto_khat(x))
+  expect_true(is.na(suppressWarnings(pareto_khat(x))))
+
+  x <- rep(1, 50)
+  expect_warning(pareto_khat(x))
+  expect_true(is.na(suppressWarnings(pareto_khat(x))))
+
+})
+
+
+test_that("pareto_khat functions work with matrix with chains", {
+
+  tau_chains <- extract_variable_matrix(example_draws(), "tau")
+  tau_nochains <- extract_variable(example_draws(), "tau")
+
+  expect_equal(pareto_khat(tau_chains, ndraws_tail = 20),
+               pareto_khat(tau_nochains, ndraws_tail = 20))
+
+  ps_chains <- pareto_smooth(tau_chains, ndraws_tail = 20)
+  ps_nochains <- pareto_smooth(tau_nochains, ndraws_tail = 20)
+
+  expect_equal(as.numeric(ps_chains$x), as.numeric(ps_nochains$x))
+
+})
+
+test_that("pareto_khat functions work with rvars with and without chains", {
+
+  tau_chains <- extract_variable_matrix(example_draws(), "tau")
+  tau_nochains <- extract_variable(example_draws(), "tau")
+
+  tau_rvar_chains <- rvar(tau_chains, with_chains = TRUE)
+  tau_rvar_nochains <- rvar(tau_nochains)
+
+  expect_equal(pareto_khat(tau_chains, ndraws_tail = 20),
+               pareto_khat(tau_rvar_chains, ndraws_tail = 20))
+
+  expect_equal(pareto_khat(tau_rvar_chains, ndraws_tail = 20),
+               pareto_khat(tau_rvar_nochains, ndraws_tail = 20))
+
+
+  expect_equal(pareto_diags(tau_chains, ndraws_tail = 20),
+               pareto_diags(tau_rvar_chains, ndraws_tail = 20))
+
+  expect_equal(pareto_diags(tau_rvar_chains, ndraws_tail = 20),
+               pareto_diags(tau_rvar_nochains, ndraws_tail = 20))
+
+  ps_chains <- pareto_smooth(tau_chains, ndraws_tail = 20)
+  ps_rvar_chains <- pareto_smooth(tau_rvar_chains, ndraws_tail = 20)
+
+  ps_nochains <- pareto_smooth(tau_nochains, ndraws_tail = 20)
+  ps_rvar_nochains <- pareto_smooth(tau_rvar_nochains, ndraws_tail = 20)
+
+  expect_equal(ps_rvar_chains$x, rvar(ps_chains$x, with_chains = TRUE))
+
+  expect_equal(ps_rvar_nochains$x, rvar(ps_nochains$x))
+
+
+  ps_chains <- pareto_smooth(tau_chains, ndraws_tail = 20, extra_diags = TRUE)
+  ps_rvar_chains <- pareto_smooth(tau_rvar_chains, ndraws_tail = 20, extra_diags = TRUE)
+
+  ps_nochains <- pareto_smooth(tau_nochains, ndraws_tail = 20, extra_diags = TRUE)
+  ps_rvar_nochains <- pareto_smooth(tau_rvar_nochains, ndraws_tail = 20, extra_diags = TRUE)
+
+  expect_equal(ps_rvar_chains$x, rvar(ps_chains$x, with_chains = TRUE))
+
+  expect_equal(ps_rvar_nochains$x, rvar(ps_nochains$x))
+
+})
+
+test_that("pareto_smooth returns x with smoothed tail", {
+  tau <- extract_variable_matrix(example_draws(), "tau")
+
+  tau_smoothed <- pareto_smooth(tau, ndraws_tail = 10, tail = "right")$x
+
+  expect_equal(sort(tau)[1:390], sort(tau_smoothed)[1:390])
+
+  expect_false(isTRUE(all.equal(sort(tau), sort(tau_smoothed))))
+
+})