Merge branch 'main' into b-tally-quosure

tidyverse · Oct 6, 2024 · cb870a9 · cb870a9
2 parents 7d19060 + e4e9a29
commit cb870a9
Show file tree

Hide file tree

Showing 43 changed files with 374 additions and 93 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -41,3 +41,6 @@
 ^doc$
 ^Meta$
 ^CRAN-SUBMISSION$
+^compile_commands\.json$
+^\.cache$
+^\.vscode$
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -10,7 +10,7 @@ on:
   pull_request:
     branches: [main, master]
 
-name: R-CMD-check
+name: R-CMD-check.yaml
 
 permissions: read-all
 
@@ -27,8 +27,8 @@ jobs:
           - {os: macos-latest,   r: 'release'}
 
           - {os: windows-latest, r: 'release'}
-          # use 4.1 to check with rtools40's older compiler
-          - {os: windows-latest, r: '4.1'}
+          # use 4.0 or 4.1 to check with rtools40's older compiler
+          - {os: windows-latest, r: 'oldrel-4'}
 
           - {os: ubuntu-latest,  r: 'devel', http-user-agent: 'release'}
           - {os: ubuntu-latest,  r: 'release'}

diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -9,7 +9,7 @@ on:
     types: [published]
   workflow_dispatch:
 
-name: pkgdown
+name: pkgdown.yaml
 
 permissions: read-all
 

diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml
@@ -4,7 +4,7 @@ on:
   issue_comment:
     types: [created]
 
-name: Commands
+name: pr-commands.yaml
 
 permissions: read-all
 
@@ -15,6 +15,8 @@ jobs:
     runs-on: ubuntu-latest
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
     steps:
       - uses: actions/checkout@v4
 
@@ -52,6 +54,8 @@ jobs:
     runs-on: ubuntu-latest
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
     steps:
       - uses: actions/checkout@v4
 

diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -6,7 +6,7 @@ on:
   pull_request:
     branches: [main, master]
 
-name: test-coverage
+name: test-coverage.yaml
 
 permissions: read-all
 

diff --git a/.gitignore b/.gitignore
@@ -23,3 +23,5 @@ vignettes/*.R
 bench-libs
 doc
 Meta
+compile_commands.json
+.cache
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,12 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+      {
+        "name": "(lldb) Attach to R",
+        "type": "lldb",
+        "request": "attach",
+        "pid": "${command:pickMyProcess}",
+        "stopOnEntry": false
+      }
+    ]
+  }
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -61,7 +61,8 @@ VignetteBuilder:
     knitr
 Config/Needs/website: tidyverse, shiny, pkgdown, tidyverse/tidytemplate
 Config/testthat/edition: 3
+Config/build/compilation-database: true
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,24 @@
 # dplyr (development version)
 
+* `case_when()` now throws a better error if one of the conditions is an array
+  (#6862, @ilovemane).
+
+* `between()` gains a new `ptype` argument, allowing users to specify the
+  desired output type. This is particularly useful for ordered factors and other
+  complex types where the default common type behavior might not be ideal
+  (#6906, @JamesHWade).
+
+* Fixed an edge case when coercing data frames to matrices (#7004).
+
+* Fixed an issue where duckplyr's ALTREP data frames were being materialized
+  early due to internal usage of `ncol()` (#7049).
+
 * R >=3.6.0 is now explicitly required (#7026).
 
+* `if_any()` and `if_all()` are now fully consistent with `any()` and `all()`.
+  In particular, when called with empty inputs `if_any()` returns `FALSE` and
+  `if_all()` returns `TRUE` (#7059, @jrwinget).
+
 # dplyr 1.1.4
 
 * `join_by()` now allows its helper functions to be namespaced with `dplyr::`,

diff --git a/R/across.R b/R/across.R
@@ -4,7 +4,7 @@
 #' `across()` makes it easy to apply the same transformation to multiple
 #' columns, allowing you to use [select()] semantics inside in "data-masking"
 #' functions like [summarise()] and [mutate()]. See `vignette("colwise")` for
-#'  more details.
+#' more details.
 #'
 #' `if_any()` and `if_all()` apply the same
 #' predicate function to a selection of columns and combine the
@@ -18,6 +18,14 @@
 #' `across()` supersedes the family of "scoped variants" like
 #' `summarise_at()`, `summarise_if()`, and `summarise_all()`.
 #'
+#' @details
+#' When there are no selected columns:
+#'
+#' - `if_any()` will return `FALSE`, consistent with the behavior of
+#'   `any()` when called without inputs.
+#' - `if_all()` will return `TRUE`, consistent with the behavior of
+#'   `all()` when called without inputs.
+#'
 #' @param .cols <[`tidy-select`][dplyr_tidy_select]> Columns to transform.
 #'   You can't select grouping columns because they are already automatically
 #'   handled by the verb (i.e. [summarise()] or [mutate()]).
@@ -133,9 +141,16 @@
 #' iris %>%
 #'   group_by(Species) %>%
 #'   summarise(across(starts_with("Sepal"), mean, .names = "mean_{.col}"))
+#'
 #' iris %>%
 #'   group_by(Species) %>%
-#'   summarise(across(starts_with("Sepal"), list(mean = mean, sd = sd), .names = "{.col}.{.fn}"))
+#'   summarise(
+#'     across(
+#'       starts_with("Sepal"),
+#'       list(mean = mean, sd = sd),
+#'       .names = "{.col}.{.fn}"
+#'     )
+#'   )
 #'
 #' # If a named external vector is used for column selection, .names will use
 #' # those names when constructing the output names
@@ -146,7 +161,9 @@
 #' # When the list is not named, .fn is replaced by the function's position
 #' iris %>%
 #'   group_by(Species) %>%
-#'   summarise(across(starts_with("Sepal"), list(mean, sd), .names = "{.col}.fn{.fn}"))
+#'   summarise(
+#'     across(starts_with("Sepal"), list(mean, sd), .names = "{.col}.fn{.fn}")
+#'   )
 #'
 #' # When the functions in .fns return a data frame, you typically get a
 #' # "packed" data frame back
@@ -164,7 +181,9 @@
 #'
 #' # .unpack can utilize a glue specification if you don't like the defaults
 #' iris %>%
-#'   reframe(across(starts_with("Sepal"), quantile_df, .unpack = "{outer}.{inner}"))
+#'   reframe(
+#'     across(starts_with("Sepal"), quantile_df, .unpack = "{outer}.{inner}")
+#'   )
 #'
 #' # This is also useful inside mutate(), for example, with a multi-lag helper
 #' multilag <- function(x, lags = 1:3) {
@@ -618,9 +637,11 @@ expand_if_across <- function(quo) {
   if (is_call(call, "if_any")) {
     op <- "|"
     if_fn <- "if_any"
+    empty <- FALSE
   } else {
     op <- "&"
     if_fn <- "if_all"
+    empty <- TRUE
   }
 
   context_local("across_if_fn", if_fn)
@@ -634,9 +655,10 @@ expand_if_across <- function(quo) {
   call[[1]] <- quote(across)
   quos <- expand_across(quo_set_expr(quo, call))
 
-  # Select all rows if there are no inputs
+  # Select all rows if there are no inputs for if_all(),
+  # but select no rows if there are no inputs for if_any().
   if (!length(quos)) {
-    return(list(quo(TRUE)))
+    return(list(quo(!!empty)))
   }
 
   combine <- function(x, y) {
@@ -736,7 +758,7 @@ expand_across <- function(quo) {
 
   # Empty expansion
   if (length(vars) == 0L) {
-    return(new_expanded_quosures(list()))
+    return(list())
   }
 
   fns <- setup$fns
@@ -745,7 +767,7 @@ expand_across <- function(quo) {
   # No functions, so just return a list of symbols
   if (is.null(fns)) {
     # TODO: Deprecate and remove the `.fns = NULL` path in favor of `pick()`
-    expressions <- pmap(list(vars, names, seq_along(vars)), function(var, name, k) {
+    exprs <- pmap(list(vars, names, seq_along(vars)), function(var, name, k) {
       quo <- new_quosure(sym(var), empty_env())
       quo <- new_dplyr_quosure(
         quo,
@@ -755,9 +777,8 @@ expand_across <- function(quo) {
         column = var
       )
     })
-    names(expressions) <- names
-    expressions <- new_expanded_quosures(expressions)
-    return(expressions)
+    names(exprs) <- names
+    return(exprs)
   }
 
   n_vars <- length(vars)
@@ -766,8 +787,7 @@ expand_across <- function(quo) {
   seq_vars <- seq_len(n_vars)
   seq_fns  <- seq_len(n_fns)
 
-  expressions <- vector(mode = "list", n_vars * n_fns)
-  columns <- character(n_vars * n_fns)
+  exprs <- new_list(n_vars * n_fns, names = names)
 
   k <- 1L
   for (i in seq_vars) {
@@ -777,7 +797,7 @@ expand_across <- function(quo) {
       fn_call <- as_across_fn_call(fns[[j]], var, env, mask)
 
       name <- names[[k]]
-      expressions[[k]] <- new_dplyr_quosure(
+      exprs[[k]] <- new_dplyr_quosure(
         fn_call,
         name = name,
         is_named = TRUE,
@@ -789,12 +809,7 @@ expand_across <- function(quo) {
     }
   }
 
-  names(expressions) <- names
-  new_expanded_quosures(expressions)
-}
-
-new_expanded_quosures <- function(x) {
-  structure(x, class = "dplyr_expanded_quosures")
+  exprs
 }
 
 as_across_fn_call <- function(fn, var, env, mask) {

diff --git a/R/all-equal.R b/R/all-equal.R
@@ -60,7 +60,7 @@ equal_data_frame <- function(x, y, ignore_col_order = TRUE, ignore_row_order = T
     return("Different number of rows.")
   }
 
-  if (ncol(x) == 0L) {
+  if (df_n_col(x) == 0L) {
     return(TRUE)
   }
 

diff --git a/R/arrange.R b/R/arrange.R
@@ -225,7 +225,7 @@ sort_key_generator <- function(locale) {
 # ------------------------------------------------------------------------------
 
 dplyr_order_legacy <- function(data, direction = "asc") {
-  if (ncol(data) == 0L) {
+  if (df_n_col(data) == 0L) {
     # Work around `order(!!!list())` returning `NULL`
     return(seq_len(nrow(data)))
   }

diff --git a/R/data-mask.R b/R/data-mask.R
@@ -11,7 +11,22 @@ DataMask <- R6Class("DataMask",
       frame <- caller_env(n = 2)
       local_mask(self, frame)
 
-      names_bindings <- chr_unserialise_unicode(names2(data))
+      names <- names(data)
+
+      if (is.null(names)) {
+        cli::cli_abort(
+          "Can't transform a data frame with `NULL` names.",
+          call = error_call
+        )
+      }
+      if (vec_any_missing(names)) {
+        cli::cli_abort(
+          "Can't transform a data frame with missing names.",
+          call = error_call
+        )
+      }
+
+      names_bindings <- chr_unserialise_unicode(names)
       if (any(names_bindings == "")) {
         # `names2()` converted potential `NA` names to `""` already
         abort("Can't transform a data frame with `NA` or `\"\"` names.", call = error_call)

diff --git a/R/data-storms.R b/R/data-storms.R
@@ -10,7 +10,7 @@
 #'   <https://github.com/tidyverse/dplyr/blob/main/data-raw/storms.R>
 #'
 #' @format A tibble with `r format(nrow(storms), big.mark = ",")` observations
-#'   and `r ncol(storms)` variables:
+#'   and `r df_n_col(storms)` variables:
 #' \describe{
 #' \item{name}{Storm Name}
 #' \item{year,month,day}{Date of report}

diff --git a/R/funs.R b/R/funs.R
@@ -5,14 +5,19 @@
 #'
 #' @details
 #' `x`, `left`, and `right` are all cast to their common type before the
-#' comparison is made.
+#' comparison is made. Use the `ptype` argument to specify the type manually.
+#'
+#' @inheritParams rlang::args_dots_empty
 #'
 #' @param x A vector
 #' @param left,right Boundary values. Both `left` and `right` are recycled to
 #'   the size of `x`.
+#' @param ptype An optional prototype giving the desired output type. The
+#'   default is to compute the common type of `x`, `left`, and `right` using
+#'   [vctrs::vec_cast_common()].
 #'
 #' @returns
-#' A logical vector the same size as `x`.
+#' A logical vector the same size as `x` with a type determined by `ptype`.
 #'
 #' @seealso
 #' [join_by()] if you are looking for documentation for the `between()` overlap
@@ -27,15 +32,26 @@
 #'
 #' # On a tibble using `filter()`
 #' filter(starwars, between(height, 100, 150))
-between <- function(x, left, right) {
+#'
+#' # Using the `ptype` argument with ordered factors, where otherwise everything
+#' # is cast to the common type of character before the comparison
+#' x <- ordered(
+#'   c("low", "medium", "high", "medium"),
+#'   levels = c("low", "medium", "high")
+#' )
+#' between(x, "medium", "high")
+#' between(x, "medium", "high", ptype = x)
+between <- function(x, left, right, ..., ptype = NULL) {
+  check_dots_empty0(...)
+
   args <- list(x = x, left = left, right = right)
 
   # Common type of all inputs
-  args <- vec_cast_common(!!!args)
+  args <- vec_cast_common(!!!args, .to = ptype)
   x <- args$x
   args$x <- NULL
 
-  # But recycle to size of `x`
+  # Recycle to size of `x`
   args <- vec_recycle_common(!!!args, .size = vec_size(x))
   left <- args$left
   right <- args$right
@@ -49,7 +65,7 @@ between <- function(x, left, right) {
   left & right
 }
 
-#' Cumulativate versions of any, all, and mean
+#' Cumulative versions of any, all, and mean
 #'
 #' dplyr provides `cumall()`, `cumany()`, and `cummean()` to complete R's set
 #' of cumulative functions.

diff --git a/R/generics.R b/R/generics.R
@@ -221,7 +221,7 @@ dplyr_reconstruct.rowwise_df <- function(data, template) {
 }
 
 dplyr_col_select <- function(.data, loc, error_call = caller_env()) {
-  loc <- vec_as_location(loc, n = ncol(.data), names = names(.data))
+  loc <- vec_as_location(loc, n = df_n_col(.data), names = names(.data))
 
   out <- .data[loc]
   if (!inherits(out, "data.frame")) {