From 219d4c7a85232e4c8ae8027e0420991ece365a16 Mon Sep 17 00:00:00 2001 From: jiang1997 Date: Thu, 28 Jul 2022 13:14:03 +0800 Subject: [PATCH] feat: add lexcial and syntax support for rlike (#41) * add lexcial and syntax support for rlike * add standlone_expression test and modify the output of the test related * fix typo and sign commit --- zetasql/parser/bison_parser.y | 35 ++++++++++++++++++- zetasql/parser/flex_tokenizer.l | 5 +-- zetasql/parser/keywords.cc | 1 + zetasql/parser/keywords_test.cc | 2 +- zetasql/parser/parse_tree.cc | 2 ++ zetasql/parser/parse_tree_manual.h | 2 ++ zetasql/parser/testdata/is_distinct.test | 2 +- .../testdata/standalone_expression.test | 10 ++++++ 8 files changed, 54 insertions(+), 5 deletions(-) diff --git a/zetasql/parser/bison_parser.y b/zetasql/parser/bison_parser.y index 2983c5559..a2026cde7 100644 --- a/zetasql/parser/bison_parser.y +++ b/zetasql/parser/bison_parser.y @@ -556,7 +556,7 @@ class DashedIdentifierTmpNode final : public zetasql::ASTNode { %left "AND" %left "XOR" %left UNARY_NOT_PRECEDENCE -%nonassoc "=" "==" "<>" ">" "<" ">=" "<=" "!=" "LIKE" "ILIKE" "IN" "DISTINCT" "BETWEEN" "IS" "NOT_SPECIAL" +%nonassoc "=" "==" "<>" ">" "<" ">=" "<=" "!=" "LIKE" "ILIKE" "RLIKE" "IN" "DISTINCT" "BETWEEN" "IS" "NOT_SPECIAL" %nonassoc "ESCAPE" %left "|" %left "^" @@ -714,6 +714,7 @@ using zetasql::ASTDropStatement; %token KW_LEFT "LEFT" %token KW_LIKE "LIKE" %token KW_ILIKE "ILIKE" +%token KW_RLIKE "RLIKE" %token KW_LIMIT "LIMIT" %token KW_LOOKUP "LOOKUP" %token KW_MERGE "MERGE" @@ -1385,6 +1386,7 @@ using zetasql::ASTDropStatement; %type is_operator %type like_operator %type ilike_operator +%type rlike_operator %type distinct_operator %type preceding_or_following @@ -5405,6 +5407,15 @@ ilike_operator: } %prec "ILIKE" ; +rlike_operator: + "RLIKE" { $$ = NotKeywordPresence::kAbsent; } %prec "RLIKE" + | "NOT_SPECIAL" "RLIKE" + { + @$ = @2; // Error messages should point at the "RLIKE". + $$ = NotKeywordPresence::kPresent; + } %prec "RLIKE" + ; + // Returns NotKeywordPresence to indicate whether NOT was present. between_operator: "BETWEEN" @@ -5630,6 +5641,27 @@ expression: { $$ = MAKE_NODE(ASTEscapedExpression, @$, {$1, $3}) } + | expression rlike_operator expression %prec "RLIKE" + { + // NOT has lower precedence but can be parsed unparenthesized in the + // rhs because it is not ambiguous. This is not allowed. + if (IsUnparenthesizedNotExpression($3)) { + YYERROR_UNEXPECTED_AND_ABORT_AT(@3); + } + // Bison allows some cases like IN on the left hand side because it's + // not ambiguous. The language doesn't allow this. + if (!$1->IsAllowedInComparison()) { + YYERROR_AND_ABORT_AT( + @2, + "Syntax error: " + "Expression to the left of RIKE must be parenthesized"); + } + auto* binary_expression = + MAKE_NODE(ASTBinaryExpression, @1, @3, {$1, $3}); + binary_expression->set_is_not($2 == NotKeywordPresence::kPresent); + binary_expression->set_op(zetasql::ASTBinaryExpression::RLIKE); + $$ = binary_expression; + } | expression distinct_operator expression %prec "DISTINCT" { if (parser->language_options() == nullptr @@ -7353,6 +7385,7 @@ reserved_keyword_rule: | "RECURSIVE" | "RESPECT" | "RIGHT" + | "RLIKE" | "ROLLUP" | "ROWS" | "ROWS_RANGE" diff --git a/zetasql/parser/flex_tokenizer.l b/zetasql/parser/flex_tokenizer.l index 1196527f9..b9eb57c25 100644 --- a/zetasql/parser/flex_tokenizer.l +++ b/zetasql/parser/flex_tokenizer.l @@ -512,6 +512,7 @@ left { return BisonParserImpl::token::KW_LEFT; } level { return BisonParserImpl::token::KW_LEVEL; } like { return BisonParserImpl::token::KW_LIKE; } ilike { return BisonParserImpl::token::KW_ILIKE; } +rlike { return BisonParserImpl::token::KW_RLIKE; } limit { return BisonParserImpl::token::KW_LIMIT; } load { return BisonParserImpl::token::KW_LOAD; } lookup { return BisonParserImpl::token::KW_LOOKUP; } @@ -539,14 +540,14 @@ no { return BisonParserImpl::token::KW_NO; } file will cause bad error messages. There is no situation where that is valid syntax, so there will never be any rejections as a result. */ -not{whitespace}(between|in|like|ilike|distinct)[^A-Z_0-9] { +not{whitespace}(between|in|like|ilike|rlike|distinct)[^A-Z_0-9] { SET_RETURN_PREFIX_LENGTH(3); if (mode_ == BisonParserMode::kTokenizer) { return BisonParserImpl::token::KW_NOT; } return BisonParserImpl::token::KW_NOT_SPECIAL; } -"!"{whitespace}(between|in|like|ilike|distinct)[^A-Z_0-9] { +"!"{whitespace}(between|in|like|ilike|rlike|distinct)[^A-Z_0-9] { SET_RETURN_PREFIX_LENGTH(1); if (mode_ == BisonParserMode::kTokenizer) { return BisonParserImpl::token::KW_NOT; diff --git a/zetasql/parser/keywords.cc b/zetasql/parser/keywords.cc index fb29999fa..e0eb3ae9c 100644 --- a/zetasql/parser/keywords.cc +++ b/zetasql/parser/keywords.cc @@ -189,6 +189,7 @@ constexpr KeywordInfoPOD kAllKeywords[] = { {"level", KW_LEVEL}, {"like", KW_LIKE, KeywordInfo::kReserved}, {"ilike", KW_ILIKE, KeywordInfo::kReserved}, + {"rlike", KW_RLIKE, KeywordInfo::kReserved}, {"limit", KW_LIMIT, KeywordInfo::kReserved}, {"load", KW_LOAD}, {"lookup", KW_LOOKUP, KeywordInfo::kReserved}, diff --git a/zetasql/parser/keywords_test.cc b/zetasql/parser/keywords_test.cc index b196b879d..05fe83d11 100644 --- a/zetasql/parser/keywords_test.cc +++ b/zetasql/parser/keywords_test.cc @@ -237,7 +237,7 @@ TEST(ParserTest, DontAddNewReservedKeywords) { // allows new queries to work that will not work on older code. // Before changing this, co-ordinate with all engines to make sure the change // is done safely. - EXPECT_EQ(105 /* CAUTION */, num_reserved); + EXPECT_EQ(106 /* CAUTION */, num_reserved); } } // namespace diff --git a/zetasql/parser/parse_tree.cc b/zetasql/parser/parse_tree.cc index a33c9ecc9..722f4f8bb 100644 --- a/zetasql/parser/parse_tree.cc +++ b/zetasql/parser/parse_tree.cc @@ -770,6 +770,8 @@ std::string ASTBinaryExpression::GetSQLForOperator() const { return is_not_ ? "IS NOT DISTINCT FROM" : "IS DISTINCT FROM"; case ILIKE: return is_not_ ? "NOT ILIKE" : "ILIKE"; + case RLIKE: + return is_not_ ? "NOT RLIKE" : "RLIKE"; } } diff --git a/zetasql/parser/parse_tree_manual.h b/zetasql/parser/parse_tree_manual.h index 71a9af275..fe2d02f24 100644 --- a/zetasql/parser/parse_tree_manual.h +++ b/zetasql/parser/parse_tree_manual.h @@ -2030,6 +2030,7 @@ class ASTBinaryExpression final : public ASTExpression { XOR, // "XOR" DISTINCT, // "IS DISTINCT FROM" ILIKE, // "ILIKE" + RLIKE, // "RLIKE" }; void set_op(Op op) { op_ = op; } @@ -2048,6 +2049,7 @@ class ASTBinaryExpression final : public ASTExpression { switch (op()) { case LIKE: case ILIKE: + case RLIKE: case IS: case EQ: case NE: diff --git a/zetasql/parser/testdata/is_distinct.test b/zetasql/parser/testdata/is_distinct.test index 4e75f4b68..caef529a4 100644 --- a/zetasql/parser/testdata/is_distinct.test +++ b/zetasql/parser/testdata/is_distinct.test @@ -154,7 +154,7 @@ SELECT # Syntax error SELECT 1 NOT DISTINCT FROM 2; -- -ERROR: Syntax error: Expected keyword BETWEEN or keyword ILIKE or keyword IN or keyword LIKE but got keyword DISTINCT [at 1:14] +ERROR: Syntax error: Unexpected keyword DISTINCT [at 1:14] SELECT 1 NOT DISTINCT FROM 2; ^ == diff --git a/zetasql/parser/testdata/standalone_expression.test b/zetasql/parser/testdata/standalone_expression.test index c874f5a79..912158048 100644 --- a/zetasql/parser/testdata/standalone_expression.test +++ b/zetasql/parser/testdata/standalone_expression.test @@ -48,6 +48,16 @@ BinaryExpression(NOT ILIKE) [0-30] col1 NOT ILIKE '&_' ESCAPE '&' == +# rlike +col1 NOT RLIKE '.a' +-- +BinaryExpression(NOT RLIKE) [0-19] + PathExpression [0-4] + Identifier(col1) [0-4] + StringLiteral('.a') [15-19] +-- +col1 NOT RLIKE '.a' +== abc + @param + @@sysvar + count(*) + (select 1) + (select x from y) --