diff --git a/r/R/dplyr-funcs-augmented.R b/r/R/dplyr-funcs-augmented.R index 1067f15573b95..116248d2dd92a 100644 --- a/r/R/dplyr-funcs-augmented.R +++ b/r/R/dplyr-funcs-augmented.R @@ -20,13 +20,27 @@ #' This function only exists inside `arrow` `dplyr` queries, and it only is #' valid when quering on a `FileSystemDataset`. #' -#' @return A `FieldRef` `Expression` that refers to the filename augmented -#' column. -#' @examples -#' \dontrun{ +#' To use filenames generated by this function in subsequent pipeline steps, you +#' must either call \code{\link[dplyr:compute]{compute()}} or +#' \code{\link[dplyr:collect]{collect()}} first. See Examples. +#' +#' @return A `FieldRef` \code{\link{Expression}} that refers to the filename +#' augmented column. +#' +#' @examples \dontrun{ +#' open_dataset("nyc-taxi") %>% mutate( +#' file = +#' add_filename() +#' ) +#' +#' # To use a verb like mutate() with add_filename() we need to first call +#' # compute() #' open_dataset("nyc-taxi") %>% -#' mutate(file = add_filename()) +#' mutate(file = add_filename()) %>% +#' compute() %>% +#' mutate(filename_length = nchar(file)) #' } +#' #' @keywords internal add_filename <- function() Expression$field_ref("__filename") diff --git a/r/R/util.R b/r/R/util.R index 46ce92d0c1845..a7cb5b3792d29 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -223,8 +223,9 @@ handle_augmented_field_misuse <- function(msg, call) { msg, i = paste( "`add_filename()` or use of the `__filename` augmented field can only", - "be used with with Dataset objects, and can only be added before doing", - "an aggregation or a join." + "be used with Dataset objects, can only be added before doing", + "an aggregation or a join, and cannot be referenced in subsequent", + "pipeline steps until either compute() or collect() is called." ) ) abort(msg, call = call) diff --git a/r/man/add_filename.Rd b/r/man/add_filename.Rd index ca7ed0e4b1760..93718435a2042 100644 --- a/r/man/add_filename.Rd +++ b/r/man/add_filename.Rd @@ -7,17 +7,32 @@ add_filename() } \value{ -A \code{FieldRef} \code{Expression} that refers to the filename augmented -column. +A \code{FieldRef} \code{\link{Expression}} that refers to the filename +augmented column. } \description{ This function only exists inside \code{arrow} \code{dplyr} queries, and it only is valid when quering on a \code{FileSystemDataset}. } +\details{ +To use filenames generated by this function in subsequent pipeline steps, you +must either call \code{\link[dplyr:compute]{compute()}} or +\code{\link[dplyr:collect]{collect()}} first. See Examples. +} \examples{ \dontrun{ +open_dataset("nyc-taxi") \%>\% mutate( + file = + add_filename() +) + +# To use a verb like mutate() with add_filename() we need to first call +# compute() open_dataset("nyc-taxi") \%>\% - mutate(file = add_filename()) + mutate(file = add_filename()) \%>\% + compute() \%>\% + mutate(filename_length = nchar(file)) } + } \keyword{internal} diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index cbeb081d0bae6..b7632084e4622 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -1440,8 +1440,9 @@ test_that("can add in augmented fields", { error_regex <- paste( "`add_filename()` or use of the `__filename` augmented field can only", - "be used with with Dataset objects, and can only be added before doing", - "an aggregation or a join." + "be used with Dataset objects, can only be added before doing", + "an aggregation or a join, and cannot be referenced in subsequent", + "pipeline steps until either compute() or collect() is called." ) # errors appropriately with ArrowTabular objects