From 5867214ee9ee66b9c5dcb83c7429b2e717f6e2a5 Mon Sep 17 00:00:00 2001 From: Eric Neer Date: Mon, 5 Feb 2024 05:57:29 -0800 Subject: [PATCH] refine `select` inline criteria to keep `arrange`d computed columns --- NEWS.md | 4 ++++ R/verb-select.R | 24 +++++++++++++++++++++++- tests/testthat/_snaps/verb-select.md | 16 ++++++++++++++++ tests/testthat/test-verb-select.R | 8 ++++++++ 4 files changed, 51 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 54b4b0347..c0060ae3b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # dbplyr (development version) +* Refined the `select()` inlining criteria to keep computed columns used to + `arrange()` subqueries that are eliminated by a subsequent select (@ejneer, + #1437). + * `rows_patch(in_place = FALSE)` now works when more than one column should be patched (@gorcha, #1443). diff --git a/R/verb-select.R b/R/verb-select.R index c1ee020a2..2a0cf140f 100644 --- a/R/verb-select.R +++ b/R/verb-select.R @@ -114,7 +114,7 @@ add_select <- function(lazy_query, vars) { } is_select <- is_lazy_select_query(lazy_query) - select_can_be_inlined <- is_bijective_projection(vars, vars_data) || !is_true(lazy_query$distinct) + select_can_be_inlined <- select_can_be_inlined(lazy_query, vars) if (is_select && select_can_be_inlined) { idx <- vctrs::vec_match(vars, vars_data) @@ -131,6 +131,28 @@ add_select <- function(lazy_query, vars) { ) } +select_can_be_inlined <- function(lazy_query, vars) { + vars_data <- op_vars(lazy_query) + + bijective_or_not_distinct <- is_bijective_projection(vars, vars_data) || + !is_true(lazy_query$distinct) + + computed_columns <- purrr::pmap( + lazy_query$select, + function(name, expr, ...) if (is_quosure(expr)) name + ) %>% + purrr::compact() + + order_vars <- purrr::map_chr(lazy_query$order_by, as_label) + + all( + # computed columns used for ordering (if any) must also be + # selected to inline the query + intersect(computed_columns, order_vars) %in% vars, + bijective_or_not_distinct + ) +} + is_bijective_projection <- function(vars, names_prev) { vars <- unname(vars) identical(sort(vars), names_prev) diff --git a/tests/testthat/_snaps/verb-select.md b/tests/testthat/_snaps/verb-select.md index 0140cc721..8046580bd 100644 --- a/tests/testthat/_snaps/verb-select.md +++ b/tests/testthat/_snaps/verb-select.md @@ -181,6 +181,22 @@ Error in `select()`: ! This tidyselect interface doesn't support predicates. +# computed columns are not inlined away + + Code + lf %>% mutate(z = 1) %>% arrange(x, z) %>% select(x) + Condition + Warning: + ORDER BY is ignored in subqueries without LIMIT + i Do you need to move arrange() later in the pipeline or use window_order() instead? + Output + + SELECT `x` + FROM ( + SELECT `df`.*, 1.0 AS `z` + FROM `df` + ) AS `q01` + # multiple selects are collapsed Code diff --git a/tests/testthat/test-verb-select.R b/tests/testthat/test-verb-select.R index 62e567769..f0fb8d48b 100644 --- a/tests/testthat/test-verb-select.R +++ b/tests/testthat/test-verb-select.R @@ -284,6 +284,14 @@ test_that("where() isn't suppored", { }) }) +test_that("computed columns are not inlined away", { + lf <- lazy_frame(x = 1, y = 2) + + expect_snapshot({ + lf %>% mutate(z = 1) %>% arrange(x, z) %>% select(x) + }) +}) + # sql_render -------------------------------------------------------------- test_that("multiple selects are collapsed", {