Skip to content

Commit

Permalink
feat: add lexcial and syntax support for rlike (#41)
Browse files Browse the repository at this point in the history
* add lexcial and syntax support for rlike

* add standlone_expression test and modify the output of the test related

* fix typo and sign commit
  • Loading branch information
jiang1997 authored Jul 28, 2022
1 parent 3bd67ed commit 219d4c7
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 5 deletions.
35 changes: 34 additions & 1 deletion zetasql/parser/bison_parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ class DashedIdentifierTmpNode final : public zetasql::ASTNode {
%left "AND"
%left "XOR"
%left UNARY_NOT_PRECEDENCE
%nonassoc "=" "==" "<>" ">" "<" ">=" "<=" "!=" "LIKE" "ILIKE" "IN" "DISTINCT" "BETWEEN" "IS" "NOT_SPECIAL"
%nonassoc "=" "==" "<>" ">" "<" ">=" "<=" "!=" "LIKE" "ILIKE" "RLIKE" "IN" "DISTINCT" "BETWEEN" "IS" "NOT_SPECIAL"
%nonassoc "ESCAPE"
%left "|"
%left "^"
Expand Down Expand Up @@ -714,6 +714,7 @@ using zetasql::ASTDropStatement;
%token KW_LEFT "LEFT"
%token KW_LIKE "LIKE"
%token KW_ILIKE "ILIKE"
%token KW_RLIKE "RLIKE"
%token KW_LIMIT "LIMIT"
%token KW_LOOKUP "LOOKUP"
%token KW_MERGE "MERGE"
Expand Down Expand Up @@ -1385,6 +1386,7 @@ using zetasql::ASTDropStatement;
%type <not_keyword_presence> is_operator
%type <not_keyword_presence> like_operator
%type <not_keyword_presence> ilike_operator
%type <not_keyword_presence> rlike_operator
%type <not_keyword_presence> distinct_operator

%type <preceding_or_following_keyword> preceding_or_following
Expand Down Expand Up @@ -5405,6 +5407,15 @@ ilike_operator:
} %prec "ILIKE"
;

rlike_operator:
"RLIKE" { $$ = NotKeywordPresence::kAbsent; } %prec "RLIKE"
| "NOT_SPECIAL" "RLIKE"
{
@$ = @2; // Error messages should point at the "RLIKE".
$$ = NotKeywordPresence::kPresent;
} %prec "RLIKE"
;

// Returns NotKeywordPresence to indicate whether NOT was present.
between_operator:
"BETWEEN"
Expand Down Expand Up @@ -5630,6 +5641,27 @@ expression:
{
$$ = MAKE_NODE(ASTEscapedExpression, @$, {$1, $3})
}
| expression rlike_operator expression %prec "RLIKE"
{
// NOT has lower precedence but can be parsed unparenthesized in the
// rhs because it is not ambiguous. This is not allowed.
if (IsUnparenthesizedNotExpression($3)) {
YYERROR_UNEXPECTED_AND_ABORT_AT(@3);
}
// Bison allows some cases like IN on the left hand side because it's
// not ambiguous. The language doesn't allow this.
if (!$1->IsAllowedInComparison()) {
YYERROR_AND_ABORT_AT(
@2,
"Syntax error: "
"Expression to the left of RIKE must be parenthesized");
}
auto* binary_expression =
MAKE_NODE(ASTBinaryExpression, @1, @3, {$1, $3});
binary_expression->set_is_not($2 == NotKeywordPresence::kPresent);
binary_expression->set_op(zetasql::ASTBinaryExpression::RLIKE);
$$ = binary_expression;
}
| expression distinct_operator expression %prec "DISTINCT"
{
if (parser->language_options() == nullptr
Expand Down Expand Up @@ -7353,6 +7385,7 @@ reserved_keyword_rule:
| "RECURSIVE"
| "RESPECT"
| "RIGHT"
| "RLIKE"
| "ROLLUP"
| "ROWS"
| "ROWS_RANGE"
Expand Down
5 changes: 3 additions & 2 deletions zetasql/parser/flex_tokenizer.l
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,7 @@ left { return BisonParserImpl::token::KW_LEFT; }
level { return BisonParserImpl::token::KW_LEVEL; }
like { return BisonParserImpl::token::KW_LIKE; }
ilike { return BisonParserImpl::token::KW_ILIKE; }
rlike { return BisonParserImpl::token::KW_RLIKE; }
limit { return BisonParserImpl::token::KW_LIMIT; }
load { return BisonParserImpl::token::KW_LOAD; }
lookup { return BisonParserImpl::token::KW_LOOKUP; }
Expand Down Expand Up @@ -539,14 +540,14 @@ no { return BisonParserImpl::token::KW_NO; }
file will cause bad error messages. There is no situation where that is
valid syntax, so there will never be any rejections as a result.
*/
not{whitespace}(between|in|like|ilike|distinct)[^A-Z_0-9] {
not{whitespace}(between|in|like|ilike|rlike|distinct)[^A-Z_0-9] {
SET_RETURN_PREFIX_LENGTH(3);
if (mode_ == BisonParserMode::kTokenizer) {
return BisonParserImpl::token::KW_NOT;
}
return BisonParserImpl::token::KW_NOT_SPECIAL;
}
"!"{whitespace}(between|in|like|ilike|distinct)[^A-Z_0-9] {
"!"{whitespace}(between|in|like|ilike|rlike|distinct)[^A-Z_0-9] {
SET_RETURN_PREFIX_LENGTH(1);
if (mode_ == BisonParserMode::kTokenizer) {
return BisonParserImpl::token::KW_NOT;
Expand Down
1 change: 1 addition & 0 deletions zetasql/parser/keywords.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ constexpr KeywordInfoPOD kAllKeywords[] = {
{"level", KW_LEVEL},
{"like", KW_LIKE, KeywordInfo::kReserved},
{"ilike", KW_ILIKE, KeywordInfo::kReserved},
{"rlike", KW_RLIKE, KeywordInfo::kReserved},
{"limit", KW_LIMIT, KeywordInfo::kReserved},
{"load", KW_LOAD},
{"lookup", KW_LOOKUP, KeywordInfo::kReserved},
Expand Down
2 changes: 1 addition & 1 deletion zetasql/parser/keywords_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ TEST(ParserTest, DontAddNewReservedKeywords) {
// allows new queries to work that will not work on older code.
// Before changing this, co-ordinate with all engines to make sure the change
// is done safely.
EXPECT_EQ(105 /* CAUTION */, num_reserved);
EXPECT_EQ(106 /* CAUTION */, num_reserved);
}

} // namespace
Expand Down
2 changes: 2 additions & 0 deletions zetasql/parser/parse_tree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,8 @@ std::string ASTBinaryExpression::GetSQLForOperator() const {
return is_not_ ? "IS NOT DISTINCT FROM" : "IS DISTINCT FROM";
case ILIKE:
return is_not_ ? "NOT ILIKE" : "ILIKE";
case RLIKE:
return is_not_ ? "NOT RLIKE" : "RLIKE";
}
}

Expand Down
2 changes: 2 additions & 0 deletions zetasql/parser/parse_tree_manual.h
Original file line number Diff line number Diff line change
Expand Up @@ -2030,6 +2030,7 @@ class ASTBinaryExpression final : public ASTExpression {
XOR, // "XOR"
DISTINCT, // "IS DISTINCT FROM"
ILIKE, // "ILIKE"
RLIKE, // "RLIKE"
};

void set_op(Op op) { op_ = op; }
Expand All @@ -2048,6 +2049,7 @@ class ASTBinaryExpression final : public ASTExpression {
switch (op()) {
case LIKE:
case ILIKE:
case RLIKE:
case IS:
case EQ:
case NE:
Expand Down
2 changes: 1 addition & 1 deletion zetasql/parser/testdata/is_distinct.test
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ SELECT
# Syntax error
SELECT 1 NOT DISTINCT FROM 2;
--
ERROR: Syntax error: Expected keyword BETWEEN or keyword ILIKE or keyword IN or keyword LIKE but got keyword DISTINCT [at 1:14]
ERROR: Syntax error: Unexpected keyword DISTINCT [at 1:14]
SELECT 1 NOT DISTINCT FROM 2;
^
==
Expand Down
10 changes: 10 additions & 0 deletions zetasql/parser/testdata/standalone_expression.test
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@ BinaryExpression(NOT ILIKE) [0-30]
col1 NOT ILIKE '&_' ESCAPE '&'
==

# rlike
col1 NOT RLIKE '.a'
--
BinaryExpression(NOT RLIKE) [0-19]
PathExpression [0-4]
Identifier(col1) [0-4]
StringLiteral('.a') [15-19]
--
col1 NOT RLIKE '.a'
==

abc + @param + @@sysvar + count(*) + (select 1) + (select x from y)
--
Expand Down

0 comments on commit 219d4c7

Please sign in to comment.