Skip to content

Commit

Permalink
Order by multiple variables in slice_min/max() (#1323)
Browse files Browse the repository at this point in the history
  • Loading branch information
mgirlich authored Jul 3, 2023
1 parent f36cf0b commit 7b556ee
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 42 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# dbplyr (development version)

* `slice_min/max()` can now order by multiple variables like dplyr, e.g. use
`slice_min(lf, tibble(x, y))` (@mgirlich, #1167).

* The functions `simulate_vars()` and `simulate_vars_is_typed()` were removed
as they weren't used anymore and tidyselect now offers `tidyselect_data_proxy()`
and `tidyselect_data_has_predicates()` (@mgirllich, #1199).
Expand Down
10 changes: 9 additions & 1 deletion R/tidyeval.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,22 @@ capture_dot <- function(.data, x) {
partial_eval(enquo(x), data = .data)
}

partial_eval_dots <- function(.data, ..., .named = TRUE, error_call = caller_env()) {
partial_eval_dots <- function(.data,
...,
# .env = NULL,
.named = TRUE,
error_call = caller_env()) {
# corresponds to `capture_dots()`
# browser()
dots <- as.list(enquos(..., .named = .named))
dot_names <- names2(exprs(...))
was_named <- have_name(exprs(...))

for (i in seq_along(dots)) {
dot <- dots[[i]]
# if (!is_null(.env)) {
# dot <- quo_set_env(dot, .env)
# }
dot_name <- dot_names[[i]]
dots[[i]] <- partial_eval_quo(dot, .data, error_call, dot_name, was_named[[i]])
}
Expand Down
26 changes: 2 additions & 24 deletions R/translate-sql-window.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,13 @@ win_rank <- function(f) {
force(f)
function(order = NULL) {
group <- win_current_group()
order <- prepare_win_rank_over(enexpr(order), f = f)
order <- unwrap_order_expr({{ order }}, f = f)
con <- sql_current_con()

if (!is_null(order)) {
order_over <- translate_sql_(order, con = con)

order_symbols <- purrr::map_if(order, ~ is_call(.x, "desc", n = 1L), ~ call_args(.x)[[1L]])
order_symbols <- purrr::map_if(order, ~ quo_is_call(.x, "desc", n = 1L), ~ call_args(.x)[[1L]])

is_na_exprs <- purrr::map(order_symbols, ~ expr(is.na(!!.x)))
any_na_expr <- purrr::reduce(is_na_exprs, ~ call2("|", .x, .y))
Expand Down Expand Up @@ -164,28 +164,6 @@ win_rank <- function(f) {
}
}

prepare_win_rank_over <- function(order, f, error_call = caller_env()) {
if (is.null(order)) {
return()
}

if (is_call(order, "c")) {
args <- call_args(order)
tibble_expr <- expr_text(expr(tibble(!!!args)))
cli_abort(c(
"Can't use `c()` in {.fun {f}}",
i = "Did you mean to use `{tibble_expr}` instead?"
))
}

if (is_call(order, "tibble")) {
tibble_args <- call_args(order)
return(tibble_args)
}

list(order)
}

#' @rdname win_over
#' @export
win_aggregate <- function(f) {
Expand Down
48 changes: 48 additions & 0 deletions R/verb-arrange.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,51 @@ add_arrange <- function(.data, dots, .by_group) {
lazy_query$order_by <- dots
lazy_query
}

unwrap_order_expr <- function(order_by, f, error_call = caller_env()) {
order_by_quo <- quo({{ order_by }})
order_by_env <- quo_get_env(order_by_quo)
order_by_expr <- quo_get_expr(order_by_quo)

if (is.null(order_by_expr)) {
return()
}

if (is_call(order_by_expr, "c")) {
args <- call_args(order_by_expr)
tibble_expr <- expr_text(expr(tibble(!!!args)))
cli_abort(c(
"Can't use `c()` in {.fun {f}}",
i = "Did you mean to use `{tibble_expr}` instead?"
), call = error_call)
}

if (is_call(order_by_expr, c("tibble", "data.frame"))) {
tibble_args <- call_args(order_by_expr)
# browser()
out <- as_quosures(tibble_args, env = order_by_env)
return(out)
}

list(order_by_quo)
}

swap_order_direction <- function(x) {
is_quo <- is_quosure(x)
if (is_quo) {
env <- quo_get_env(x)
x <- quo_get_expr(x)
}

if (is_call(x, "desc", n = 1)) {
out <- call_args(x)[[1]]
} else {
out <- expr(desc(!!x))
}

if (is_quo) {
out <- as_quosure(out, env)
}

out
}
12 changes: 0 additions & 12 deletions R/verb-fill.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,6 @@ fill.tbl_lazy <- function(.data, ..., .direction = c("down", "up", "updown", "do
)
}

swap_order_direction <- function(x) {
if (is_quosure(x)) {
x <- quo_get_expr(x)
}

if (is_call(x, "desc", n = 1)) {
call_args(x)[[1]]
} else {
expr(desc(!!x))
}
}

dbplyr_fill0 <- function(.con, .data, cols_to_fill, order_by_cols, .direction) {
UseMethod("dbplyr_fill0")
}
Expand Down
12 changes: 8 additions & 4 deletions R/verb-slice.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,10 @@ slice_min.tbl_lazy <- function(.data,
na_rm = TRUE) {
size <- check_slice_size(n, prop)
check_unsupported_arg(na_rm, allowed = TRUE)
order_by <- unwrap_order_expr({{ order_by }}, f = "slice_min")
slice_by(
.data,
{{order_by}},
order_by,
size,
{{ by }},
with_ties = with_ties
Expand All @@ -100,9 +101,11 @@ slice_max.tbl_lazy <- function(.data,
na_rm = TRUE) {
size <- check_slice_size(n, prop)
check_unsupported_arg(na_rm, allowed = TRUE)
order_by <- unwrap_order_expr({{ order_by }}, f = "slice_max")
order_by <- purrr::map(order_by, swap_order_direction)
slice_by(
.data,
desc({{order_by}}),
order_by,
size,
{{ by }},
with_ties = with_ties
Expand Down Expand Up @@ -132,7 +135,8 @@ slice_sample.tbl_lazy <- function(.data,
cli_abort("Sampling with replacement is not supported on database backends")
}

slice_by(.data, runif(n()), size, {{ by }}, with_ties = FALSE)
order_by <- exprs(runif(n()))
slice_by(.data, order_by, size, {{ by }}, with_ties = FALSE)
}

slice_by <- function(.data, order_by, size, .by, with_ties = FALSE) {
Expand Down Expand Up @@ -163,7 +167,7 @@ slice_by <- function(.data, order_by, size, .by, with_ties = FALSE) {

# must use `add_order()` as `window_order()` only allows variables
# this is only okay to do because the previous, legal window order is restored
dots <- partial_eval_dots(.data, !!!quos({{order_by}}), .named = FALSE)
dots <- partial_eval_dots(.data, !!!order_by, .named = FALSE)
.data$lazy_query <- add_order(.data, dots)

out <- filter(.data, !!window_fun) %>%
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/_snaps/translate-sql-window.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
Code
test_translate_sql(row_number(c(x)))
Condition
Error in `prepare_win_rank_over()`:
Error in `row_number()`:
! Can't use `c()` in `ROW_NUMBER()`
i Did you mean to use `tibble(x)` instead?

Expand Down
71 changes: 71 additions & 0 deletions tests/testthat/_snaps/verb-slice.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,74 @@
Error in `slice_sample()`:
! Can't supply `by` when `data` is a grouped data frame.

# slice_min/max can order by multiple columns

Code
lf %>% slice_min(tibble(x))
Output
<SQL>
SELECT `x`, `y`
FROM (
SELECT `df`.*, RANK() OVER (ORDER BY `x`) AS `col01`
FROM `df`
) AS `q01`
WHERE (`col01` <= 1)
Code
lf %>% slice_min(tibble::tibble(x, y))
Output
<SQL>
SELECT `x`, `y`
FROM (
SELECT `df`.*, RANK() OVER (ORDER BY `x`, `y`) AS `col01`
FROM `df`
) AS `q01`
WHERE (`col01` <= 1)
Code
lf %>% slice_min(data.frame(y, x))
Output
<SQL>
SELECT `x`, `y`
FROM (
SELECT `df`.*, RANK() OVER (ORDER BY `y`, `x`) AS `col01`
FROM `df`
) AS `q01`
WHERE (`col01` <= 1)

---

Code
lf %>% slice_max(tibble(x))
Output
<SQL>
SELECT `x`, `y`
FROM (
SELECT `df`.*, RANK() OVER (ORDER BY `x` DESC) AS `col01`
FROM `df`
) AS `q01`
WHERE (`col01` <= 1)
Code
lf %>% slice_max(tibble::tibble(x, y))
Output
<SQL>
SELECT `x`, `y`
FROM (
SELECT `df`.*, RANK() OVER (ORDER BY `x` DESC, `y` DESC) AS `col01`
FROM `df`
) AS `q01`
WHERE (`col01` <= 1)
Code
lf %>% slice_max(data.frame(y, x))
Output
<SQL>
SELECT `x`, `y`
FROM (
SELECT `df`.*, RANK() OVER (ORDER BY `y` DESC, `x` DESC) AS `col01`
FROM `df`
) AS `q01`
WHERE (`col01` <= 1)

# slice_min/max informs if order_by uses c()

Can't use `c()` in `slice_min()`
i Did you mean to use `tibble(x, y)` instead?

23 changes: 23 additions & 0 deletions tests/testthat/test-verb-slice.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,26 @@ test_that("slice_sample() works with `by`", {
c(1, 2, 2)
)
})

test_that("slice_min/max can order by multiple columns", {
lf <- lazy_frame(x = 1, y = 1)

expect_snapshot({
lf %>% slice_min(tibble(x))
lf %>% slice_min(tibble::tibble(x, y))
lf %>% slice_min(data.frame(y, x))
})
expect_snapshot({
lf %>% slice_max(tibble(x))
lf %>% slice_max(tibble::tibble(x, y))
lf %>% slice_max(data.frame(y, x))
})
})

test_that("slice_min/max informs if order_by uses c()", {
lf <- lazy_frame(x = 1, y = 1)

expect_snapshot_error(
lf %>% slice_min(c(x, y))
)
})

0 comments on commit 7b556ee

Please sign in to comment.