From f5edfecfbe0b6ff542fe7c1fc15f4e09bf9213cb Mon Sep 17 00:00:00 2001 From: "Schierbeck, Cody" Date: Wed, 6 Mar 2024 09:44:22 -0800 Subject: [PATCH] Documentation and exception update for ascii character classes --- velox/docs/functions/spark/regexp.rst | 4 ++-- velox/functions/sparksql/RegexFunctions.cpp | 2 +- velox/functions/sparksql/tests/RegexFunctionsTest.cpp | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/velox/docs/functions/spark/regexp.rst b/velox/docs/functions/spark/regexp.rst index 93389c6c42bce..d7fdbfee23a8e 100644 --- a/velox/docs/functions/spark/regexp.rst +++ b/velox/docs/functions/spark/regexp.rst @@ -42,7 +42,7 @@ See https://github.com/google/re2/wiki/Syntax for more information. regexp_replace will throw an exception if ``string`` contains an invalid UTF-8 character, or if ``pattern`` does not conform to RE2 syntax: https://github.com/google/re2/wiki/Syntax. - regexp_replace does not support character class union, intersection, or difference and will throw an exception if they are detected within the provided ``pattern``. + regexp_replace does not support named ASCII character classes or ASCII character class union, intersection, or difference and will throw an exception if they are detected within the provided ``pattern``. Parameters: @@ -67,7 +67,7 @@ See https://github.com/google/re2/wiki/Syntax for more information. regexp_replace will throw an exception if ``string`` contains an invalid UTF-8 character, if ``position`` is less than 1, or if ``pattern`` does not conform to RE2 syntax: https://github.com/google/re2/wiki/Syntax. - regexp_replace does not support character class union, intersection, or difference and will throw an exception if they are detected within the provided ``pattern``. + regexp_replace does not support named ASCII character classes or character class union, intersection, or difference and will throw an exception if they are detected within the provided ``pattern``. This function is 1-indexed, meaning the position of the first character is 1. diff --git a/velox/functions/sparksql/RegexFunctions.cpp b/velox/functions/sparksql/RegexFunctions.cpp index 4222ffdfbb091..5a7b957c0ced6 100644 --- a/velox/functions/sparksql/RegexFunctions.cpp +++ b/velox/functions/sparksql/RegexFunctions.cpp @@ -66,7 +66,7 @@ void checkForCompatiblePattern( } else if (*c == '[') { if (charClassStart) { VELOX_USER_FAIL( - "{} does not support character class union, intersection, " + "{} does not support named ASCII character classes or class union, intersection, " "or difference ([a[b]], [a&&[b]], [a&&[^b]])", functionName); } diff --git a/velox/functions/sparksql/tests/RegexFunctionsTest.cpp b/velox/functions/sparksql/tests/RegexFunctionsTest.cpp index be53249489efa..33c92d64bfa3e 100644 --- a/velox/functions/sparksql/tests/RegexFunctionsTest.cpp +++ b/velox/functions/sparksql/tests/RegexFunctionsTest.cpp @@ -320,13 +320,13 @@ TEST_F(RegexFunctionsTest, regexpReplaceWithEmptyString) { TEST_F(RegexFunctionsTest, regexBadJavaPattern) { VELOX_ASSERT_THROW( testRegexpReplace("[]", "[a[b]]", ""), - "regexp_replace does not support character class union, intersection, or difference ([a[b]], [a&&[b]], [a&&[^b]])"); + "regexp_replace does not support named ASCII character classes or class union, intersection, or difference ([a[b]], [a&&[b]], [a&&[^b]])"); VELOX_ASSERT_THROW( testRegexpReplace("[]", "[a&&[b]]", ""), - "regexp_replace does not support character class union, intersection, or difference ([a[b]], [a&&[b]], [a&&[^b]])"); + "regexp_replace does not support named ASCII character classes or class union, intersection, or difference ([a[b]], [a&&[b]], [a&&[^b]])"); VELOX_ASSERT_THROW( testRegexpReplace("[]", "[a&&[^b]]", ""), - "regexp_replace does not support character class union, intersection, or difference ([a[b]], [a&&[b]], [a&&[^b]])"); + "regexp_replace does not support named ASCII character classes or class union, intersection, or difference ([a[b]], [a&&[b]], [a&&[^b]])"); }