diff --git a/NEWS.md b/NEWS.md index abb7240cc9..9df8a4c1e0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,10 @@ * R >=3.6.0 is now explicitly required (#7026). +* `if_any()` and `if_all()` are now fully consistent with `any()` and `all()`. + In particular, when called with empty inputs `if_any()` returns `FALSE` and + `if_all()` returns `TRUE` (#7059, @jrwinget). + # dplyr 1.1.4 * `join_by()` now allows its helper functions to be namespaced with `dplyr::`, diff --git a/R/across.R b/R/across.R index 8262ef6b1e..e42fe28ed5 100644 --- a/R/across.R +++ b/R/across.R @@ -4,7 +4,7 @@ #' `across()` makes it easy to apply the same transformation to multiple #' columns, allowing you to use [select()] semantics inside in "data-masking" #' functions like [summarise()] and [mutate()]. See `vignette("colwise")` for -#' more details. +#' more details. #' #' `if_any()` and `if_all()` apply the same #' predicate function to a selection of columns and combine the @@ -18,6 +18,14 @@ #' `across()` supersedes the family of "scoped variants" like #' `summarise_at()`, `summarise_if()`, and `summarise_all()`. #' +#' @details +#' When there are no selected columns: +#' +#' - `if_any()` will return `FALSE`, consistent with the behavior of +#' `any()` when called without inputs. +#' - `if_all()` will return `TRUE`, consistent with the behavior of +#' `all()` when called without inputs. +#' #' @param .cols <[`tidy-select`][dplyr_tidy_select]> Columns to transform. #' You can't select grouping columns because they are already automatically #' handled by the verb (i.e. [summarise()] or [mutate()]). @@ -133,9 +141,16 @@ #' iris %>% #' group_by(Species) %>% #' summarise(across(starts_with("Sepal"), mean, .names = "mean_{.col}")) +#' #' iris %>% #' group_by(Species) %>% -#' summarise(across(starts_with("Sepal"), list(mean = mean, sd = sd), .names = "{.col}.{.fn}")) +#' summarise( +#' across( +#' starts_with("Sepal"), +#' list(mean = mean, sd = sd), +#' .names = "{.col}.{.fn}" +#' ) +#' ) #' #' # If a named external vector is used for column selection, .names will use #' # those names when constructing the output names @@ -146,7 +161,9 @@ #' # When the list is not named, .fn is replaced by the function's position #' iris %>% #' group_by(Species) %>% -#' summarise(across(starts_with("Sepal"), list(mean, sd), .names = "{.col}.fn{.fn}")) +#' summarise( +#' across(starts_with("Sepal"), list(mean, sd), .names = "{.col}.fn{.fn}") +#' ) #' #' # When the functions in .fns return a data frame, you typically get a #' # "packed" data frame back @@ -164,7 +181,9 @@ #' #' # .unpack can utilize a glue specification if you don't like the defaults #' iris %>% -#' reframe(across(starts_with("Sepal"), quantile_df, .unpack = "{outer}.{inner}")) +#' reframe( +#' across(starts_with("Sepal"), quantile_df, .unpack = "{outer}.{inner}") +#' ) #' #' # This is also useful inside mutate(), for example, with a multi-lag helper #' multilag <- function(x, lags = 1:3) { @@ -618,9 +637,11 @@ expand_if_across <- function(quo) { if (is_call(call, "if_any")) { op <- "|" if_fn <- "if_any" + empty <- FALSE } else { op <- "&" if_fn <- "if_all" + empty <- TRUE } context_local("across_if_fn", if_fn) @@ -634,9 +655,10 @@ expand_if_across <- function(quo) { call[[1]] <- quote(across) quos <- expand_across(quo_set_expr(quo, call)) - # Select all rows if there are no inputs + # Select all rows if there are no inputs for if_all(), + # but select no rows if there are no inputs for if_any(). if (!length(quos)) { - return(list(quo(TRUE))) + return(list(quo(!!empty))) } combine <- function(x, y) { diff --git a/man/across.Rd b/man/across.Rd index 7907d0a9dd..51afbbfc90 100644 --- a/man/across.Rd +++ b/man/across.Rd @@ -86,6 +86,15 @@ of them, then you probably want to use \code{\link[=pick]{pick()}} instead. \code{across()} supersedes the family of "scoped variants" like \code{summarise_at()}, \code{summarise_if()}, and \code{summarise_all()}. } +\details{ +When there are no selected columns: +\itemize{ +\item \code{if_any()} will return \code{FALSE}, consistent with the behavior of +\code{any()} when called without inputs. +\item \code{if_all()} will return \code{TRUE}, consistent with the behavior of +\code{all()} when called without inputs. +} +} \section{Timing of evaluation}{ R code in dplyr verbs is generally evaluated once per group. @@ -177,9 +186,16 @@ iris \%>\% iris \%>\% group_by(Species) \%>\% summarise(across(starts_with("Sepal"), mean, .names = "mean_{.col}")) + iris \%>\% group_by(Species) \%>\% - summarise(across(starts_with("Sepal"), list(mean = mean, sd = sd), .names = "{.col}.{.fn}")) + summarise( + across( + starts_with("Sepal"), + list(mean = mean, sd = sd), + .names = "{.col}.{.fn}" + ) + ) # If a named external vector is used for column selection, .names will use # those names when constructing the output names @@ -190,7 +206,9 @@ iris \%>\% # When the list is not named, .fn is replaced by the function's position iris \%>\% group_by(Species) \%>\% - summarise(across(starts_with("Sepal"), list(mean, sd), .names = "{.col}.fn{.fn}")) + summarise( + across(starts_with("Sepal"), list(mean, sd), .names = "{.col}.fn{.fn}") + ) # When the functions in .fns return a data frame, you typically get a # "packed" data frame back @@ -208,7 +226,9 @@ iris \%>\% # .unpack can utilize a glue specification if you don't like the defaults iris \%>\% - reframe(across(starts_with("Sepal"), quantile_df, .unpack = "{outer}.{inner}")) + reframe( + across(starts_with("Sepal"), quantile_df, .unpack = "{outer}.{inner}") + ) # This is also useful inside mutate(), for example, with a multi-lag helper multilag <- function(x, lags = 1:3) { diff --git a/tests/testthat/test-across.R b/tests/testthat/test-across.R index d2116c2c22..a23dde57ac 100644 --- a/tests/testthat/test-across.R +++ b/tests/testthat/test-across.R @@ -870,7 +870,7 @@ test_that("if_any() and if_all() expansions deal with no inputs or single inputs # No inputs expect_equal( filter(d, if_any(starts_with("c"), ~ FALSE)), - filter(d) + filter(d, FALSE) ) expect_equal( filter(d, if_all(starts_with("c"), ~ FALSE)), @@ -888,6 +888,32 @@ test_that("if_any() and if_all() expansions deal with no inputs or single inputs ) }) +test_that("if_any() on zero-column selection behaves like any() (#7059)", { + tbl <- tibble( + x1 = 1:5, + x2 = c(-1, 4, 5, 4, 1), + y = c(1, 4, 2, 4, 9), + ) + + expect_equal( + filter(tbl, if_any(c(), ~ is.na(.x))), + tbl[0, ] + ) +}) + +test_that("if_all() on zero-column selection behaves like all() (#7059)", { + tbl <- tibble( + x1 = 1:5, + x2 = c(-1, 4, 5, 4, 1), + y = c(1, 4, 2, 4, 9), + ) + + expect_equal( + filter(tbl, if_all(c(), ~ is.na(.x))), + tbl + ) +}) + test_that("if_any() and if_all() wrapped deal with no inputs or single inputs", { d <- data.frame(x = 1)