Skip to content

Commit

Permalink
Better support for stringr in Snowflake
Browse files Browse the repository at this point in the history
 * Added support for str_starts() and str_ends() by using Snowflake's
REGEXP_INSTR() function
 * Refactored str_detect() to use Snowflake's CONTAINS() function
instead of hacking with REGEXP() which anchors the start and end by
default
  • Loading branch information
nathanhaigh committed Nov 10, 2023
1 parent 388a6ee commit f67a525
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 18 deletions.
41 changes: 25 additions & 16 deletions R/backend-snowflake.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,32 @@ sql_translation.Snowflake <- function(con) {
str_locate = function(string, pattern) {
sql_expr(POSITION(!!pattern, !!string))
},
# REGEXP on Snowflaake "implicitly anchors a pattern at both ends", which
# str_detect does not. Left- and right-pad `pattern` with .* to get
# str_detect-like behavior
str_detect = function(string, pattern, negate = FALSE) {
sql_str_pattern_switch(
string = string,
pattern = {{ pattern }},
negate = negate,
f_fixed = sql_str_detect_fixed_instr("detect"),
f_regex = function(string, pattern, negate = FALSE) {
if (isTRUE(negate)) {
sql_expr(!(((!!string)) %REGEXP% (".*" || (!!pattern) || ".*")))
} else {
sql_expr(((!!string)) %REGEXP% (".*" || (!!pattern) || ".*"))
}
}
)
con <- sql_current_con()

if (negate) {
translate_sql(!CONTAINS(!!string, !!pattern), con = con)
} else {
translate_sql(CONTAINS(!!string, !!pattern), con = con)
}
},
str_starts = function(string, pattern, negate = FALSE) {
con <- sql_current_con()

if (negate) {
translate_sql(REGEXP_INSTR(!!string, !!pattern) != 1L, con = con)
} else {
translate_sql(REGEXP_INSTR(!!string, !!pattern) == 1L, con = con)
}
},
str_ends = function(string, pattern, negate = FALSE) {
con <- sql_current_con()

if (negate) {
translate_sql(REGEXP_INSTR(!!string, !!pattern, 1L, 1L, 1L) != LENGTH(!!string) + 1L, con = con)
} else {
translate_sql(REGEXP_INSTR(!!string, !!pattern, 1L, 1L, 1L) == LENGTH(!!string) + 1L, con = con)
}
},
# On Snowflake, REGEXP_REPLACE is used like this:
# REGEXP_REPLACE( <subject> , <pattern> [ , <replacement> ,
Expand Down
8 changes: 6 additions & 2 deletions tests/testthat/test-backend-snowflake.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,19 @@ test_that("custom stringr functions translated correctly", {
local_con(simulate_snowflake())

expect_equal(test_translate_sql(str_locate(x, y)), sql("POSITION(`y`, `x`)"))
expect_equal(test_translate_sql(str_detect(x, y)), sql("(`x`) REGEXP ('.*' || `y` || '.*')"))
expect_equal(test_translate_sql(str_detect(x, y, negate = TRUE)), sql("!((`x`) REGEXP ('.*' || `y` || '.*'))"))
expect_equal(test_translate_sql(str_detect(x, y)), sql("CONTAINS(`x`, `y`)"))
expect_equal(test_translate_sql(str_detect(x, y, negate = TRUE)), sql("NOT(CONTAINS(`x`, `y`))"))
expect_equal(test_translate_sql(str_replace(x, y, z)), sql("REGEXP_REPLACE(`x`, `y`, `z`, 1.0, 1.0)"))
expect_equal(test_translate_sql(str_replace(x, "\\d", z)), sql("REGEXP_REPLACE(`x`, '\\\\d', `z`, 1.0, 1.0)"))
expect_equal(test_translate_sql(str_replace_all(x, y, z)), sql("REGEXP_REPLACE(`x`, `y`, `z`)"))
expect_equal(test_translate_sql(str_squish(x)), sql("REGEXP_REPLACE(TRIM(`x`), '\\\\s+', ' ')"))
expect_equal(test_translate_sql(str_remove(x, y)), sql("REGEXP_REPLACE(`x`, `y`, '', 1.0, 1.0)"))
expect_equal(test_translate_sql(str_remove_all(x, y)), sql("REGEXP_REPLACE(`x`, `y`)"))
expect_equal(test_translate_sql(str_trim(x)), sql("TRIM(`x`)"))
expect_equal(test_translate_sql(str_starts(x, y)), sql("REGEXP_INSTR(`x`, `y`) = 1"))
expect_equal(test_translate_sql(str_starts(x, y, negate = TRUE)), sql("REGEXP_INSTR(`x`, `y`) != 1"))
expect_equal(test_translate_sql(str_ends(x, y)), sql("REGEXP_INSTR(`x`, `y`, 1, 1, 1) = (LENGTH(`x`) + 1)"))
expect_equal(test_translate_sql(str_ends(x, y, negate = TRUE)), sql("REGEXP_INSTR(`x`, `y`, 1, 1, 1) != (LENGTH(`x`) + 1)"))
})

test_that("aggregates are translated correctly", {
Expand Down

0 comments on commit f67a525

Please sign in to comment.