diff --git a/NEWS.md b/NEWS.md index 45c2e46f0..9abe7a689 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,12 @@ # dbplyr (development version) +* Snowflake (@nathanhaigh, #1406) + * Added support for `str_starts()` and `str_ends()` via `REGEXP_INSTR()` + * Refactored `str_detect()` to use `REGEXP_INSTR()` so now supports + regular expressions. + * Refactored `grepl()` to use `REGEXP_INSTR()` so now supports + case-insensitive matching through `grepl(..., ignore.case = TRUE)` + * Functions qualified with the base namespace are now also translated, e.g. `base::paste0(x, "_1")` is now translated (@mgirlich, #1022). diff --git a/R/backend-snowflake.R b/R/backend-snowflake.R index 5774be3cf..f89998047 100644 --- a/R/backend-snowflake.R +++ b/R/backend-snowflake.R @@ -38,23 +38,38 @@ sql_translation.Snowflake <- function(con) { str_locate = function(string, pattern) { sql_expr(POSITION(!!pattern, !!string)) }, - # REGEXP on Snowflaake "implicitly anchors a pattern at both ends", which - # str_detect does not. Left- and right-pad `pattern` with .* to get - # str_detect-like behavior str_detect = function(string, pattern, negate = FALSE) { - sql_str_pattern_switch( - string = string, - pattern = {{ pattern }}, - negate = negate, - f_fixed = sql_str_detect_fixed_instr("detect"), - f_regex = function(string, pattern, negate = FALSE) { - if (isTRUE(negate)) { - sql_expr(!(((!!string)) %REGEXP% (".*" || (!!pattern) || ".*"))) - } else { - sql_expr(((!!string)) %REGEXP% (".*" || (!!pattern) || ".*")) - } - } - ) + con <- sql_current_con() + + # Snowflake needs backslashes escaped, so we must increase the level of escaping + pattern <- gsub("\\", "\\\\", pattern, fixed = TRUE) + if (negate) { + translate_sql(REGEXP_INSTR(!!string, !!pattern) == 0L, con = con) + } else { + translate_sql(REGEXP_INSTR(!!string, !!pattern) != 0L, con = con) + } + }, + str_starts = function(string, pattern, negate = FALSE) { + con <- sql_current_con() + + # Snowflake needs backslashes escaped, so we must increase the level of escaping + pattern <- gsub("\\", "\\\\", pattern, fixed = TRUE) + if (negate) { + translate_sql(REGEXP_INSTR(!!string, !!pattern) != 1L, con = con) + } else { + translate_sql(REGEXP_INSTR(!!string, !!pattern) == 1L, con = con) + } + }, + str_ends = function(string, pattern, negate = FALSE) { + con <- sql_current_con() + + # Snowflake needs backslashes escaped, so we must increase the level of escaping + pattern <- gsub("\\", "\\\\", pattern, fixed = TRUE) + if (negate) { + translate_sql(REGEXP_INSTR(!!string, !!pattern, 1L, 1L, 1L) != LENGTH(!!string) + 1L, con = con) + } else { + translate_sql(REGEXP_INSTR(!!string, !!pattern, 1L, 1L, 1L) == LENGTH(!!string) + 1L, con = con) + } }, # On Snowflake, REGEXP_REPLACE is used like this: # REGEXP_REPLACE( , [ , , @@ -261,15 +276,19 @@ snowflake_grepl <- function(pattern, perl = FALSE, fixed = FALSE, useBytes = FALSE) { - # https://docs.snowflake.com/en/sql-reference/functions/regexp.html - check_unsupported_arg(ignore.case, FALSE, backend = "Snowflake") + con <- sql_current_con() + check_unsupported_arg(perl, FALSE, backend = "Snowflake") check_unsupported_arg(fixed, FALSE, backend = "Snowflake") check_unsupported_arg(useBytes, FALSE, backend = "Snowflake") - # REGEXP on Snowflaake "implicitly anchors a pattern at both ends", which - # grepl does not. Left- and right-pad `pattern` with .* to get grepl-like - # behavior - sql_expr(((!!x)) %REGEXP% (".*" || !!paste0("(", pattern, ")") || ".*")) + + # https://docs.snowflake.com/en/sql-reference/functions/regexp_instr.html + # REGEXP_INSTR optional parameters: position, occurrance, option, regex_parameters + regexp_parameters <- "c" + if(ignore.case) { regexp_parameters <- "i" } + # Snowflake needs backslashes escaped, so we must increase the level of escaping + pattern <- gsub("\\", "\\\\", pattern, fixed = TRUE) + translate_sql(REGEXP_INSTR(!!x, !!pattern, 1L, 1L, 0L, !!regexp_parameters) != 0L, con = con) } snowflake_round <- function(x, digits = 0L) { diff --git a/tests/testthat/_snaps/backend-snowflake.md b/tests/testthat/_snaps/backend-snowflake.md index e43d3042e..2eef02c36 100644 --- a/tests/testthat/_snaps/backend-snowflake.md +++ b/tests/testthat/_snaps/backend-snowflake.md @@ -1,13 +1,3 @@ -# custom scalar translated correctly - - Code - (expect_error(test_translate_sql(grepl("exp", x, ignore.case = TRUE)))) - Output - - Error in `grepl()`: - ! `ignore.case = TRUE` isn't supported in Snowflake translation. - i It must be FALSE instead. - # pmin() and pmax() respect na.rm Code diff --git a/tests/testthat/test-backend-snowflake.R b/tests/testthat/test-backend-snowflake.R index b1f37b14a..0a6e58157 100644 --- a/tests/testthat/test-backend-snowflake.R +++ b/tests/testthat/test-backend-snowflake.R @@ -2,8 +2,8 @@ test_that("custom scalar translated correctly", { local_con(simulate_snowflake()) expect_equal(test_translate_sql(log10(x)), sql("LOG(10.0, `x`)")) expect_equal(test_translate_sql(round(x, digits = 1.1)), sql("ROUND((`x`) :: FLOAT, 1)")) - expect_equal(test_translate_sql(grepl("exp", x)), sql("(`x`) REGEXP ('.*' || '(exp)' || '.*')")) - expect_snapshot((expect_error(test_translate_sql(grepl("exp", x, ignore.case = TRUE))))) + expect_equal(test_translate_sql(grepl("exp", x)), sql("REGEXP_INSTR(`x`, 'exp', 1, 1, 0, 'c') != 0")) + expect_equal(test_translate_sql(grepl("exp", x, ignore.case = TRUE)), sql("REGEXP_INSTR(`x`, 'exp', 1, 1, 0, 'i') != 0")) }) test_that("pasting translated correctly", { @@ -25,8 +25,8 @@ test_that("custom stringr functions translated correctly", { local_con(simulate_snowflake()) expect_equal(test_translate_sql(str_locate(x, y)), sql("POSITION(`y`, `x`)")) - expect_equal(test_translate_sql(str_detect(x, y)), sql("(`x`) REGEXP ('.*' || `y` || '.*')")) - expect_equal(test_translate_sql(str_detect(x, y, negate = TRUE)), sql("!((`x`) REGEXP ('.*' || `y` || '.*'))")) + expect_equal(test_translate_sql(str_detect(x, y)), sql("REGEXP_INSTR(`x`, `y`) != 0")) + expect_equal(test_translate_sql(str_detect(x, y, negate = TRUE)), sql("REGEXP_INSTR(`x`, `y`) = 0")) expect_equal(test_translate_sql(str_replace(x, y, z)), sql("REGEXP_REPLACE(`x`, `y`, `z`, 1.0, 1.0)")) expect_equal(test_translate_sql(str_replace(x, "\\d", z)), sql("REGEXP_REPLACE(`x`, '\\\\d', `z`, 1.0, 1.0)")) expect_equal(test_translate_sql(str_replace_all(x, y, z)), sql("REGEXP_REPLACE(`x`, `y`, `z`)")) @@ -34,6 +34,10 @@ test_that("custom stringr functions translated correctly", { expect_equal(test_translate_sql(str_remove(x, y)), sql("REGEXP_REPLACE(`x`, `y`, '', 1.0, 1.0)")) expect_equal(test_translate_sql(str_remove_all(x, y)), sql("REGEXP_REPLACE(`x`, `y`)")) expect_equal(test_translate_sql(str_trim(x)), sql("TRIM(`x`)")) + expect_equal(test_translate_sql(str_starts(x, y)), sql("REGEXP_INSTR(`x`, `y`) = 1")) + expect_equal(test_translate_sql(str_starts(x, y, negate = TRUE)), sql("REGEXP_INSTR(`x`, `y`) != 1")) + expect_equal(test_translate_sql(str_ends(x, y)), sql("REGEXP_INSTR(`x`, `y`, 1, 1, 1) = (LENGTH(`x`) + 1)")) + expect_equal(test_translate_sql(str_ends(x, y, negate = TRUE)), sql("REGEXP_INSTR(`x`, `y`, 1, 1, 1) != (LENGTH(`x`) + 1)")) }) test_that("aggregates are translated correctly", {