diff --git a/zetasql/parser/ast_node_kind.h b/zetasql/parser/ast_node_kind.h index c05f62d54..063dd0e32 100755 --- a/zetasql/parser/ast_node_kind.h +++ b/zetasql/parser/ast_node_kind.h @@ -289,6 +289,7 @@ enum ASTNodeKind { AST_TVF_SCHEMA_COLUMN, AST_TYPE_PARAMETER_LIST, AST_UNARY_EXPRESSION, + AST_UNION_TABLE_REFERENCE_LIST, AST_UNNEST_EXPRESSION, AST_UNNEST_EXPRESSION_WITH_OPT_ALIAS_AND_OFFSET, AST_UNTIL_CLAUSE, diff --git a/zetasql/parser/bison_parser.y b/zetasql/parser/bison_parser.y index 84203b1e6..88ab0f63b 100644 --- a/zetasql/parser/bison_parser.y +++ b/zetasql/parser/bison_parser.y @@ -1291,6 +1291,9 @@ using zetasql::ASTDropStatement; %type table_path_expression %type table_path_expression_base %type table_primary +%type union_table_reference +%type union_table_reference_list +%type opt_union_table_reference_list %type table_subquery %type templated_parameter_type %type terminated_statement @@ -6668,17 +6671,18 @@ opt_exclude_current_time: }; | /* Nothing */ { $$ = false; } ; + window_specification: identifier { $$ = MAKE_NODE(ASTWindowSpecification, @$, {$1}); } - | "(" opt_identifier opt_partition_by_clause opt_order_by_clause + | "(" opt_identifier opt_union_table_reference_list opt_partition_by_clause opt_order_by_clause opt_window_frame_clause opt_exclude_current_time opt_instance_not_in_window ")" { - auto *window_spec = MAKE_NODE(ASTWindowSpecification, @$, {$2, $3, $4, $5}); - window_spec->set_is_exclude_current_time($6); - window_spec->set_is_instance_not_in_window($7); + auto *window_spec = MAKE_NODE(ASTWindowSpecification, @$, {$2, $3, $4, $5, $6}); + window_spec->set_is_exclude_current_time($7); + window_spec->set_is_instance_not_in_window($8); $$ = window_spec; } ; @@ -8219,6 +8223,54 @@ execute_using_argument_list: } ; +union_table_reference: + maybe_dashed_path_expression + | "(" query ")" + { + zetasql::ASTQuery* query = $2; + query->set_is_nested(true); + $$ = MAKE_NODE(ASTTableSubquery, @$, { + $2, nullptr, nullptr, nullptr, nullptr}); + } + | "(" query ")" "AS" identifier + { + auto* alias = MAKE_NODE(ASTAlias, @$, {$5}); + zetasql::ASTQuery* query = $2; + query->set_is_nested(true); + $$ = MAKE_NODE(ASTTableSubquery, @$, { + $2, alias, nullptr, nullptr, nullptr}); + } + | "(" query ")" identifier + { + auto* alias = MAKE_NODE(ASTAlias, @$, {$4}); + zetasql::ASTQuery* query = $2; + query->set_is_nested(true); + $$ = MAKE_NODE(ASTTableSubquery, @$, { + $2, alias, nullptr, nullptr, nullptr}); + } + ; + +union_table_reference_list: + union_table_reference + { + $$ = MAKE_NODE(ASTUnionTableReferenceList, @$, {$1}); + } + | union_table_reference_list "," union_table_reference + { + $$ = WithEndLocation(WithExtraChildren($1, {$3}), @$); + } + ; + +opt_union_table_reference_list: + KW_UNION union_table_reference_list + { + $$ = $2; + } + | /* Nothing */ + { + $$ = nullptr; + } + ; opt_execute_using_clause: KW_USING execute_using_argument_list { diff --git a/zetasql/parser/parse_tree.cc b/zetasql/parser/parse_tree.cc index 192a048f9..b96536fbb 100644 --- a/zetasql/parser/parse_tree.cc +++ b/zetasql/parser/parse_tree.cc @@ -323,6 +323,7 @@ static absl::flat_hash_map CreateNodeNamesMap() { map[AST_TVF_SCHEMA] = "TVFSchema"; map[AST_TVF] = "TVF"; map[AST_TYPE_PARAMETER_LIST] = "TypeParameterList"; + map[AST_UNION_TABLE_REFERENCE_LIST] = "UnionTableReferenceList"; map[AST_UNARY_EXPRESSION] = "UnaryExpression"; map[AST_UNNEST_EXPRESSION] = "UnnestExpression"; map[AST_UNNEST_EXPRESSION_WITH_OPT_ALIAS_AND_OFFSET] = diff --git a/zetasql/parser/parse_tree_manual.h b/zetasql/parser/parse_tree_manual.h index 4664aa4b1..95f2e4455 100644 --- a/zetasql/parser/parse_tree_manual.h +++ b/zetasql/parser/parse_tree_manual.h @@ -2820,6 +2820,7 @@ class ASTWindowSpecification final : public ASTNode { zetasql_base::StatusOr Accept( NonRecursiveParseTreeVisitor* visitor) const override; + const ASTUnionTableReferenceList* union_table_references() const { return union_table_references_; } const ASTPartitionBy* partition_by() const { return partition_by_; } const ASTOrderBy* order_by() const { return order_by_; } const ASTWindowFrame* window_frame() const { return window_frame_; } @@ -2836,6 +2837,7 @@ class ASTWindowSpecification final : public ASTNode { void InitFields() final { FieldLoader fl(this); fl.AddOptional(&base_window_name_, AST_IDENTIFIER); + fl.AddOptional(&union_table_references_, AST_UNION_TABLE_REFERENCE_LIST); fl.AddOptional(&partition_by_, AST_PARTITION_BY); fl.AddOptional(&order_by_, AST_ORDER_BY); fl.AddOptional(&window_frame_, AST_WINDOW_FRAME); @@ -2843,6 +2845,7 @@ class ASTWindowSpecification final : public ASTNode { // All are optional, can be NULL. const ASTIdentifier* base_window_name_ = nullptr; + const ASTUnionTableReferenceList* union_table_references_ = nullptr; const ASTPartitionBy* partition_by_ = nullptr; const ASTOrderBy* order_by_ = nullptr; const ASTWindowFrame* window_frame_ = nullptr; @@ -3792,6 +3795,28 @@ class ASTTableClause final : public ASTNode { const ASTTVF* tvf_ = nullptr; }; +class ASTUnionTableReferenceList final : public ASTNode { + public: + static constexpr ASTNodeKind kConcreteNodeKind = AST_UNION_TABLE_REFERENCE_LIST; + + ASTUnionTableReferenceList() : ASTNode(kConcreteNodeKind) {} + void Accept(ParseTreeVisitor* visitor, void* data) const override; + zetasql_base::StatusOr Accept( + NonRecursiveParseTreeVisitor* visitor) const override; + + const absl::Span& table_references() const { + return table_references_; + } + + private: + void InitFields() final { + FieldLoader fl(this); + fl.AddRestAsRepeated(&table_references_); + } + + absl::Span table_references_; +}; + // This represents a clause of form "MODEL ", where is a model // name. class ASTModelClause final : public ASTNode { diff --git a/zetasql/parser/testdata/analytic_functions.test b/zetasql/parser/testdata/analytic_functions.test index cc4bc7df5..1c95e4c11 100644 --- a/zetasql/parser/testdata/analytic_functions.test +++ b/zetasql/parser/testdata/analytic_functions.test @@ -717,6 +717,272 @@ FROM T == +# window with instance_not_in_window and exclude current_time +select f() over (rows_range between 5s preceding and current row maxsize 5 exclude current_time instance_not_in_window) +from T +-- +QueryStatement [0-126] + Query [0-126] + Select [0-126] + SelectList [7-119] + SelectColumn [7-119] + AnalyticFunctionCall [7-119] + FunctionCall [7-10] + PathExpression [7-8] + Identifier(f) [7-8] + WindowSpecification(is_exclude_current_time, is_instance_not_in_window) [16-119] + WindowFrame(ROWS_RANGE) [17-74] + WindowFrameExpr(OFFSET PRECEDING) [36-48] + IntervalLiteral(5s) [36-38] + WindowFrameExpr(CURRENT ROW) [53-64] + MaxSize [65-74] + IntLiteral(5) [73-74] + FromClause [120-126] + TablePathExpression [125-126] + PathExpression [125-126] + Identifier(T) [125-126] +-- +SELECT + f() OVER (ROWS_RANGE BETWEEN 5s PRECEDING AND CURRENT ROW MAXSIZE 5 EXCLUDE CURRENT_TIME INSTANCE_NOT_IN_WINDOW) +FROM + T +== + +# window with union single table +select f() over (union T2 rows_range between 5s preceding and current row maxsize 5 exclude current_time instance_not_in_window) +from T +-- +QueryStatement [0-135] + Query [0-135] + Select [0-135] + SelectList [7-128] + SelectColumn [7-128] + AnalyticFunctionCall [7-128] + FunctionCall [7-10] + PathExpression [7-8] + Identifier(f) [7-8] + WindowSpecification(is_exclude_current_time, is_instance_not_in_window) [16-128] + UnionTableReferenceList [23-25] + PathExpression [23-25] + Identifier(T2) [23-25] + WindowFrame(ROWS_RANGE) [26-83] + WindowFrameExpr(OFFSET PRECEDING) [45-57] + IntervalLiteral(5s) [45-47] + WindowFrameExpr(CURRENT ROW) [62-73] + MaxSize [74-83] + IntLiteral(5) [82-83] + FromClause [129-135] + TablePathExpression [134-135] + PathExpression [134-135] + Identifier(T) [134-135] +-- +SELECT + f() OVER (UNION T2 ROWS_RANGE BETWEEN 5s PRECEDING AND CURRENT ROW MAXSIZE 5 EXCLUDE CURRENT_TIME INSTANCE_NOT_IN_WINDOW) +FROM + T +== + +# window with union multi tables +select f() over (union T2, T3 rows_range between 5s preceding and current row maxsize 5 exclude current_time instance_not_in_window) +from T +-- +QueryStatement [0-139] + Query [0-139] + Select [0-139] + SelectList [7-132] + SelectColumn [7-132] + AnalyticFunctionCall [7-132] + FunctionCall [7-10] + PathExpression [7-8] + Identifier(f) [7-8] + WindowSpecification(is_exclude_current_time, is_instance_not_in_window) [16-132] + UnionTableReferenceList [23-29] + PathExpression [23-25] + Identifier(T2) [23-25] + PathExpression [27-29] + Identifier(T3) [27-29] + WindowFrame(ROWS_RANGE) [30-87] + WindowFrameExpr(OFFSET PRECEDING) [49-61] + IntervalLiteral(5s) [49-51] + WindowFrameExpr(CURRENT ROW) [66-77] + MaxSize [78-87] + IntLiteral(5) [86-87] + FromClause [133-139] + TablePathExpression [138-139] + PathExpression [138-139] + Identifier(T) [138-139] +-- +SELECT + f() OVER (UNION T2, T3 ROWS_RANGE BETWEEN 5s PRECEDING AND CURRENT ROW MAXSIZE 5 EXCLUDE CURRENT_TIME INSTANCE_NOT_IN_WINDOW) +FROM + T +== + +# window with union subquery +select f() over (union (select col1 as c1, col2 as c2, col3 as c3 from T2) rows_range between 5s preceding and current row maxsize 5 exclude current_time instance_not_in_window) +from T +-- +QueryStatement [0-184] + Query [0-184] + Select [0-184] + SelectList [7-177] + SelectColumn [7-177] + AnalyticFunctionCall [7-177] + FunctionCall [7-10] + PathExpression [7-8] + Identifier(f) [7-8] + WindowSpecification(is_exclude_current_time, is_instance_not_in_window) [16-177] + UnionTableReferenceList [23-74] + TableSubquery [23-74] + Query [24-73] + Select [24-73] + SelectList [31-65] + SelectColumn [31-41] + PathExpression [31-35] + Identifier(col1) [31-35] + Alias [36-41] + Identifier(c1) [39-41] + SelectColumn [43-53] + PathExpression [43-47] + Identifier(col2) [43-47] + Alias [48-53] + Identifier(c2) [51-53] + SelectColumn [55-65] + PathExpression [55-59] + Identifier(col3) [55-59] + Alias [60-65] + Identifier(c3) [63-65] + FromClause [66-73] + TablePathExpression [71-73] + PathExpression [71-73] + Identifier(T2) [71-73] + WindowFrame(ROWS_RANGE) [75-132] + WindowFrameExpr(OFFSET PRECEDING) [94-106] + IntervalLiteral(5s) [94-96] + WindowFrameExpr(CURRENT ROW) [111-122] + MaxSize [123-132] + IntLiteral(5) [131-132] + FromClause [178-184] + TablePathExpression [183-184] + PathExpression [183-184] + Identifier(T) [183-184] +-- +SELECT + f() OVER (UNION + ( + SELECT + col1 AS c1, + col2 AS c2, + col3 AS c3 + FROM + T2 + ) ROWS_RANGE BETWEEN 5s PRECEDING AND CURRENT ROW MAXSIZE 5 EXCLUDE CURRENT_TIME INSTANCE_NOT_IN_WINDOW) +FROM + T +== + +# window with union multiple subquery and table +select f() over (union + (select col1 as c1, col2 as c2, col3 as c3 from T2) as T3, + (select col1 as c1, col2 as c2, col3 as c3 from T4) as T5, T6 rows_range between 5s preceding and current row maxsize 5 exclude current_time instance_not_in_window) +from T +-- +QueryStatement [0-258] + Query [0-258] + Select [0-258] + SelectList [7-251] + SelectColumn [7-251] + AnalyticFunctionCall [7-251] + FunctionCall [7-10] + PathExpression [7-8] + Identifier(f) [7-8] + WindowSpecification(is_exclude_current_time, is_instance_not_in_window) [16-251] + UnionTableReferenceList [26-148] + TableSubquery [26-83] + Query [27-76] + Select [27-76] + SelectList [34-68] + SelectColumn [34-44] + PathExpression [34-38] + Identifier(col1) [34-38] + Alias [39-44] + Identifier(c1) [42-44] + SelectColumn [46-56] + PathExpression [46-50] + Identifier(col2) [46-50] + Alias [51-56] + Identifier(c2) [54-56] + SelectColumn [58-68] + PathExpression [58-62] + Identifier(col3) [58-62] + Alias [63-68] + Identifier(c3) [66-68] + FromClause [69-76] + TablePathExpression [74-76] + PathExpression [74-76] + Identifier(T2) [74-76] + Alias [26-83] + Identifier(T3) [81-83] + TableSubquery [87-144] + Query [88-137] + Select [88-137] + SelectList [95-129] + SelectColumn [95-105] + PathExpression [95-99] + Identifier(col1) [95-99] + Alias [100-105] + Identifier(c1) [103-105] + SelectColumn [107-117] + PathExpression [107-111] + Identifier(col2) [107-111] + Alias [112-117] + Identifier(c2) [115-117] + SelectColumn [119-129] + PathExpression [119-123] + Identifier(col3) [119-123] + Alias [124-129] + Identifier(c3) [127-129] + FromClause [130-137] + TablePathExpression [135-137] + PathExpression [135-137] + Identifier(T4) [135-137] + Alias [87-144] + Identifier(T5) [142-144] + PathExpression [146-148] + Identifier(T6) [146-148] + WindowFrame(ROWS_RANGE) [149-206] + WindowFrameExpr(OFFSET PRECEDING) [168-180] + IntervalLiteral(5s) [168-170] + WindowFrameExpr(CURRENT ROW) [185-196] + MaxSize [197-206] + IntLiteral(5) [205-206] + FromClause [252-258] + TablePathExpression [257-258] + PathExpression [257-258] + Identifier(T) [257-258] +-- +SELECT + f() OVER (UNION + ( + SELECT + col1 AS c1, + col2 AS c2, + col3 AS c3 + FROM + T2 + ) AS T3, + ( + SELECT + col1 AS c1, + col2 AS c2, + col3 AS c3 + FROM + T4 + ) AS T5, T6 ROWS_RANGE BETWEEN 5s PRECEDING AND CURRENT ROW MAXSIZE 5 EXCLUDE CURRENT_TIME INSTANCE_NOT_IN_WINDOW) +FROM + T +== + select f() over (range between 5+5 preceding and current {{rows|blah}}) from T -- diff --git a/zetasql/parser/unparser.cc b/zetasql/parser/unparser.cc index 0d46a2e0c..72515a928 100644 --- a/zetasql/parser/unparser.cc +++ b/zetasql/parser/unparser.cc @@ -1213,6 +1213,11 @@ void Unparser::visitASTWithOffset(const ASTWithOffset* node, void* data) { visitASTChildren(node, data); } +void Unparser::visitASTUnionTableReferenceList(const ASTUnionTableReferenceList* node, void* data) { + print("UNION"); + UnparseVectorWithSeparator(node->table_references(), data, ", "); +} + void Unparser::visitASTUnnestExpression(const ASTUnnestExpression* node, void* data) { print("UNNEST("); diff --git a/zetasql/parser/unparser.h b/zetasql/parser/unparser.h index 1e5a55742..c3c114001 100644 --- a/zetasql/parser/unparser.h +++ b/zetasql/parser/unparser.h @@ -255,6 +255,7 @@ class Unparser : public ParseTreeVisitor { void visitASTTransformClause(const ASTTransformClause* node, void* data) override; void visitASTWithOffset(const ASTWithOffset* node, void* data) override; + void visitASTUnionTableReferenceList(const ASTUnionTableReferenceList* node, void* data) override; void visitASTUnnestExpression(const ASTUnnestExpression* node, void* data) override; void visitASTUnnestExpressionWithOptAliasAndOffset(