From 45117f65f3d90579a77bd525a75ac568a888ef0a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 31 Jul 2024 15:41:30 -0400 Subject: [PATCH] Minor: Add tests for StringView / character functions --- .../sqllogictest/test_files/string_view.slt | 364 ++++++++++++++++++ 1 file changed, 364 insertions(+) diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt index 3f9a4793f655..c3b8916014e4 100644 --- a/datafusion/sqllogictest/test_files/string_view.slt +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -322,6 +322,370 @@ logical_plan 03)----TableScan: test projection=[column1_utf8, column2_utf8, column1_utf8view] +# Ensure string functions use native StringView implementation +# and do not fall back to Utf8 or LargeUtf8 +# Should see no casts to Utf8 in the plans below + +## Ensure no casts for LIKE/ILIKE +query TT +EXPLAIN SELECT + column1_utf8view like 'foo' as "like", + column1_utf8view ilike 'foo' as "ilike" +FROM test; +---- +logical_plan +01)Projection: test.column1_utf8view LIKE Utf8View("foo") AS like, test.column1_utf8view ILIKE Utf8View("foo") AS ilike +02)--TableScan: test projection=[column1_utf8view] + + + +## Ensure no casts for ASCII +## TODO file ticket +query TT +EXPLAIN SELECT + ASCII(column1_utf8view) AS l +FROM test; +---- +logical_plan +01)Projection: ascii(CAST(test.column1_utf8view AS Utf8)) AS l +02)--TableScan: test projection=[column1_utf8view] + + +## Ensure no casts for BTRIM +## TODO file ticket +query TT +EXPLAIN SELECT + BTRIM(column1_utf8view, 'foo') AS l +FROM test; +---- +logical_plan +01)Projection: btrim(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS l +02)--TableScan: test projection=[column1_utf8view] + +## Ensure no casts for CHARACTER_LENGTH +## TODO file ticket +query TT +EXPLAIN SELECT + CHARACTER_LENGTH(column1_utf8view) AS l +FROM test; +---- +logical_plan +01)Projection: character_length(CAST(test.column1_utf8view AS Utf8)) AS l +02)--TableScan: test projection=[column1_utf8view] + +## Ensure no casts for CONCAT +## TODO file ticket +query TT +EXPLAIN SELECT + concat(column1_utf8view, column2_utf8view) as c +FROM test; +---- +logical_plan +01)Projection: concat(CAST(test.column1_utf8view AS Utf8), CAST(test.column2_utf8view AS Utf8)) AS c +02)--TableScan: test projection=[column1_utf8view, column2_utf8view] + +## Ensure no casts for CONCAT_WS +## TODO file ticket +query TT +EXPLAIN SELECT + concat_ws(', ', column1_utf8view, column2_utf8view) as c +FROM test; +---- +logical_plan +01)Projection: concat_ws(Utf8(", "), CAST(test.column1_utf8view AS Utf8), CAST(test.column2_utf8view AS Utf8)) AS c +02)--TableScan: test projection=[column1_utf8view, column2_utf8view] + +## Ensure no casts for CONTAINS +## TODO file ticket +query TT +EXPLAIN SELECT + CONTAINS(column1_utf8view, 'foo') as c1, + CONTAINS(column2_utf8view, column2_utf8view) as c2 +FROM test; +---- +logical_plan +01)Projection: contains(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS c1, contains(__common_expr_1, __common_expr_1) AS c2 +02)--Projection: CAST(test.column2_utf8view AS Utf8) AS __common_expr_1, test.column1_utf8view +03)----TableScan: test projection=[column1_utf8view, column2_utf8view] + +## Ensure no casts for ENDS_WITH +## TODO file ticket +query TT +EXPLAIN SELECT + ENDS_WITH(column1_utf8view, 'foo') as c1, + ENDS_WITH(column2_utf8view, column2_utf8view) as c2 +FROM test; +---- +logical_plan +01)Projection: ends_with(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS c1, ends_with(__common_expr_1, __common_expr_1) AS c2 +02)--Projection: CAST(test.column2_utf8view AS Utf8) AS __common_expr_1, test.column1_utf8view +03)----TableScan: test projection=[column1_utf8view, column2_utf8view] + + +## Ensure no casts for INITCAP +## TODO file ticket +query TT +EXPLAIN SELECT + INITCAP(column1_utf8view) as c +FROM test; +---- +logical_plan +01)Projection: initcap(CAST(test.column1_utf8view AS Utf8)) AS c +02)--TableScan: test projection=[column1_utf8view] + +## Ensure no casts for LEVENSHTEIN +## TODO file ticket +query TT +EXPLAIN SELECT + levenshtein(column1_utf8view, 'foo') as c1, + levenshtein(column1_utf8view, column2_utf8view) as c2 +FROM test; +---- +logical_plan +01)Projection: levenshtein(__common_expr_1, Utf8("foo")) AS c1, levenshtein(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c2 +02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view +03)----TableScan: test projection=[column1_utf8view, column2_utf8view] + +## Ensure no casts for LOWER +## TODO file ticket +query TT +EXPLAIN SELECT + LOWER(column1_utf8view) as c1 +FROM test; +---- +logical_plan +01)Projection: lower(CAST(test.column1_utf8view AS Utf8)) AS c1 +02)--TableScan: test projection=[column1_utf8view] + +## Ensure no casts for LTRIM +## TODO file ticket +query TT +EXPLAIN SELECT + LTRIM(column1_utf8view) as c1 +FROM test; +---- +logical_plan +01)Projection: ltrim(CAST(test.column1_utf8view AS Utf8)) AS c1 +02)--TableScan: test projection=[column1_utf8view] + +## Ensure no casts for LPAD +## TODO file ticket +query TT +EXPLAIN SELECT + LPAD(column1_utf8view, 12, ' ') as c1 +FROM test; +---- +logical_plan +01)Projection: lpad(CAST(test.column1_utf8view AS Utf8), Int64(12), Utf8(" ")) AS c1 +02)--TableScan: test projection=[column1_utf8view] + + +## Ensure no casts for OCTET_LENGTH +## TODO file ticket +query TT +EXPLAIN SELECT + OCTET_LENGTH(column1_utf8view) as c1 +FROM test; +---- +logical_plan +01)Projection: octet_length(CAST(test.column1_utf8view AS Utf8)) AS c1 +02)--TableScan: test projection=[column1_utf8view] + +## Ensure no casts for OVERLAY +## TODO file ticket +query TT +EXPLAIN SELECT + OVERLAY(column1_utf8view PLACING 'foo' FROM 2 ) as c1 +FROM test; +---- +logical_plan +01)Projection: overlay(CAST(test.column1_utf8view AS Utf8), Utf8("foo"), Int64(2)) AS c1 +02)--TableScan: test projection=[column1_utf8view] + +## Ensure no casts for REGEXP_LIKE +query error DataFusion error: Error during planning: The regexp_like function can only accept strings\. Got Utf8View +EXPLAIN SELECT + REGEXP_LIKE(column1_utf8view, '^https?://(?:www\.)?([^/]+)/.*$') AS k +FROM test; + +## Ensure no casts for REGEXP_MATCH +query error DataFusion error: Error during planning: The regexp_match function can only accept strings\. Got Utf8View +EXPLAIN SELECT + REGEXP_MATCH(column1_utf8view, '^https?://(?:www\.)?([^/]+)/.*$') AS k +FROM test; + +## Ensure no casts for REGEXP_REPLACE +query TT +EXPLAIN SELECT + REGEXP_REPLACE(column1_utf8view, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k +FROM test; +---- +logical_plan +01)Projection: regexp_replace(test.column1_utf8view, Utf8("^https?://(?:www\.)?([^/]+)/.*$"), Utf8("\1")) AS k +02)--TableScan: test projection=[column1_utf8view] + + +## Ensure no casts for REPEAT +## TODO file ticket +query TT +EXPLAIN SELECT + REPEAT(column1_utf8view, 2) as c1 +FROM test; +---- +logical_plan +01)Projection: repeat(CAST(test.column1_utf8view AS Utf8), Int64(2)) AS c1 +02)--TableScan: test projection=[column1_utf8view] + +## Ensure no casts for REPLACE +## TODO file ticket +query TT +EXPLAIN SELECT + REPLACE(column1_utf8view, 'foo', 'bar') as c1, + REPLACE(column1_utf8view, column2_utf8view, 'bar') as c2 +FROM test; +---- +logical_plan +01)Projection: replace(__common_expr_1, Utf8("foo"), Utf8("bar")) AS c1, replace(__common_expr_1, CAST(test.column2_utf8view AS Utf8), Utf8("bar")) AS c2 +02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view +03)----TableScan: test projection=[column1_utf8view, column2_utf8view] + +## Ensure no casts for REVERSE +## TODO file ticket +query TT +EXPLAIN SELECT + REVERSE(column1_utf8view) as c1 +FROM test; +---- +logical_plan +01)Projection: reverse(CAST(test.column1_utf8view AS Utf8)) AS c1 +02)--TableScan: test projection=[column1_utf8view] + +## Ensure no casts for RTRIM +## TODO file ticket +query TT +EXPLAIN SELECT + RTRIM(column1_utf8view) as c1, + RTRIM(column1_utf8view, 'foo') as c2 +FROM test; +---- +logical_plan +01)Projection: rtrim(__common_expr_1) AS c1, rtrim(__common_expr_1, Utf8("foo")) AS c2 +02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1 +03)----TableScan: test projection=[column1_utf8view] + +## Ensure no casts for RIGHT +## TODO file ticket +query TT +EXPLAIN SELECT + RIGHT(column1_utf8view, 3) as c2 +FROM test; +---- +logical_plan +01)Projection: right(CAST(test.column1_utf8view AS Utf8), Int64(3)) AS c2 +02)--TableScan: test projection=[column1_utf8view] + +## Ensure no casts for RPAD +## TODO file ticket +query TT +EXPLAIN SELECT + RPAD(column1_utf8view, 1) as c1, + RPAD(column1_utf8view, 2, column2_utf8view) as c2 +FROM test; +---- +logical_plan +01)Projection: rpad(__common_expr_1, Int64(1)) AS c1, rpad(__common_expr_1, Int64(2), CAST(test.column2_utf8view AS Utf8)) AS c2 +02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view +03)----TableScan: test projection=[column1_utf8view, column2_utf8view] + + +## Ensure no casts for RTRIM +## TODO file ticket +query TT +EXPLAIN SELECT + RTRIM(column1_utf8view) as c, + RTRIM(column1_utf8view, column2_utf8view) as c1 +FROM test; +---- +logical_plan +01)Projection: rtrim(__common_expr_1) AS c, rtrim(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c1 +02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view +03)----TableScan: test projection=[column1_utf8view, column2_utf8view] + +## Ensure no casts for SPLIT_PART +## TODO file ticket +query TT +EXPLAIN SELECT + SPLIT_PART(column1_utf8view, 'f', 1) as c +FROM test; +---- +logical_plan +01)Projection: split_part(CAST(test.column1_utf8view AS Utf8), Utf8("f"), Int64(1)) AS c +02)--TableScan: test projection=[column1_utf8view] + +## Ensure no casts for STRPOS +## TODO file ticket +query TT +EXPLAIN SELECT + STRPOS(column1_utf8view, 'f') as c, + STRPOS(column1_utf8view, column2_utf8view) as c2 +FROM test; +---- +logical_plan +01)Projection: strpos(__common_expr_1, Utf8("f")) AS c, strpos(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c2 +02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view +03)----TableScan: test projection=[column1_utf8view, column2_utf8view] + +## Ensure no casts for SUBSTR +## TODO file ticket +query TT +EXPLAIN SELECT + SUBSTR(column1_utf8view, 1) as c, + SUBSTR(column1_utf8view, 1 ,2) as c2 +FROM test; +---- +logical_plan +01)Projection: substr(__common_expr_1, Int64(1)) AS c, substr(__common_expr_1, Int64(1), Int64(2)) AS c2 +02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1 +03)----TableScan: test projection=[column1_utf8view] + +## Ensure no casts for STARTS_WITH +## TODO file ticket +query TT +EXPLAIN SELECT + STARTS_WITH(column1_utf8view, 'foo') as c, + STARTS_WITH(column1_utf8view, column2_utf8view) as c2 +FROM test; +---- +logical_plan +01)Projection: starts_with(__common_expr_1, Utf8("foo")) AS c, starts_with(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c2 +02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view +03)----TableScan: test projection=[column1_utf8view, column2_utf8view] + +## Ensure no casts for TRANSLATE +## TODO file ticket +query TT +EXPLAIN SELECT + TRANSLATE(column1_utf8view, 'foo', 'bar') as c +FROM test; +---- +logical_plan +01)Projection: translate(CAST(test.column1_utf8view AS Utf8), Utf8("foo"), Utf8("bar")) AS c +02)--TableScan: test projection=[column1_utf8view] + +## Ensure no casts for FIND_IN_SET +## TODO file ticket +query TT +EXPLAIN SELECT + FIND_IN_SET(column1_utf8view, 'a,b,c,d') as c +FROM test; +---- +logical_plan +01)Projection: find_in_set(CAST(test.column1_utf8view AS Utf8), Utf8("a,b,c,d")) AS c +02)--TableScan: test projection=[column1_utf8view] + + + + statement ok drop table test;