From b69b8f519e9eb958443bcea57a2092ccae160a0e Mon Sep 17 00:00:00 2001
From: apeterson91 <adampeterso@gmail.com>
Date: Sun, 14 Jul 2024 11:56:12 -0600
Subject: [PATCH] Add documentation clarifying appropriate use of weights in
 dplyr's `slice_sample()`.

---
 R/slice.R | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/R/slice.R b/R/slice.R
index 36118d5511..679d0a67db 100644
--- a/R/slice.R
+++ b/R/slice.R
@@ -95,7 +95,9 @@
 #'
 #' # you can optionally weight by a variable - this code weights by the
 #' # physical weight of the cars, so heavy cars are more likely to get
-#' # selected
+#' # selected. Note that the weights cannot then be used to reconstruct
+#' # summary statistics from the underlying population. See
+#' # https://stats.stackexchange.com/q/639211/ for more details.
 #' mtcars %>% slice_sample(weight_by = wt, n = 5)
 #'
 #' # Group wise operation ----------------------------------------
@@ -293,6 +295,9 @@ slice_max.data.frame <- function(.data, order_by, ..., n, prop, by = NULL, with_
 #' @param weight_by <[`data-masking`][rlang::args_data_masking]> Sampling
 #'   weights. This must evaluate to a vector of non-negative numbers the same
 #'   length as the input. Weights are automatically standardised to sum to 1.
+#'   Note that these weights cannot be used to reconstruct summary statistics
+#'   via, for example, Horvitz-Thompson estimators. See
+#'   https://stats.stackexchange.com/q/639211/ for more details.
 slice_sample <- function(.data, ..., n, prop, by = NULL, weight_by = NULL, replace = FALSE) {
   check_dot_by_typo(...)
   check_slice_unnamed_n_prop(..., n = n, prop = prop)