From b69b8f519e9eb958443bcea57a2092ccae160a0e Mon Sep 17 00:00:00 2001 From: apeterson91 Date: Sun, 14 Jul 2024 11:56:12 -0600 Subject: [PATCH] Add documentation clarifying appropriate use of weights in dplyr's `slice_sample()`. --- R/slice.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/slice.R b/R/slice.R index 36118d5511..679d0a67db 100644 --- a/R/slice.R +++ b/R/slice.R @@ -95,7 +95,9 @@ #' #' # you can optionally weight by a variable - this code weights by the #' # physical weight of the cars, so heavy cars are more likely to get -#' # selected +#' # selected. Note that the weights cannot then be used to reconstruct +#' # summary statistics from the underlying population. See +#' # https://stats.stackexchange.com/q/639211/ for more details. #' mtcars %>% slice_sample(weight_by = wt, n = 5) #' #' # Group wise operation ---------------------------------------- @@ -293,6 +295,9 @@ slice_max.data.frame <- function(.data, order_by, ..., n, prop, by = NULL, with_ #' @param weight_by <[`data-masking`][rlang::args_data_masking]> Sampling #' weights. This must evaluate to a vector of non-negative numbers the same #' length as the input. Weights are automatically standardised to sum to 1. +#' Note that these weights cannot be used to reconstruct summary statistics +#' via, for example, Horvitz-Thompson estimators. See +#' https://stats.stackexchange.com/q/639211/ for more details. slice_sample <- function(.data, ..., n, prop, by = NULL, weight_by = NULL, replace = FALSE) { check_dot_by_typo(...) check_slice_unnamed_n_prop(..., n = n, prop = prop)