Skip to content

Commit

Permalink
feat: support map data type (#53)
Browse files Browse the repository at this point in the history
1. map type available in `type` and `table_column_schema`.
2. construct map value with `map(key1, value1, ...)` function
   construct by map literal, like `{key1: value1, ... }` is not
   supported, since usage of `{` & `:` is undetermined, possible
   compatibility issue with JavaCC.
3. access value in map with `[]` or `.`(dot) operator
  • Loading branch information
aceforeverd authored Dec 20, 2023
1 parent 3c9cf36 commit 55be108
Show file tree
Hide file tree
Showing 11 changed files with 233 additions and 9 deletions.
10 changes: 6 additions & 4 deletions zetasql/parser/ast_node_kind.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ enum ASTNodeKind {
AST_ALTER_SCHEMA_STATEMENT,
AST_ALTER_TABLE_STATEMENT,
AST_ALTER_VIEW_STATEMENT,
AST_ALTER_USER_STATEMENT,
AST_ANALYTIC_FUNCTION_CALL,
AST_ANALYZE_STATEMENT,
AST_AND_EXPR,
Expand Down Expand Up @@ -88,7 +87,6 @@ enum ASTNodeKind {
AST_CREATE_EXTERNAL_TABLE_STATEMENT,
AST_CREATE_FUNCTION_STATEMENT,
AST_CREATE_INDEX_STATEMENT,
AST_CREATE_USER_STATEMENT,
AST_CREATE_MODEL_STATEMENT,
AST_CREATE_PROCEDURE_STATEMENT,
AST_CREATE_ROW_ACCESS_POLICY_STATEMENT,
Expand Down Expand Up @@ -119,7 +117,6 @@ enum ASTNodeKind {
AST_DROP_STATEMENT,
AST_DROP_TABLE_FUNCTION_STATEMENT,
AST_DROP_MATERIALIZED_VIEW_STATEMENT,
AST_DROP_USER_STATEMENT,
AST_ELSEIF_CLAUSE,
AST_ELSEIF_CLAUSE_LIST,
AST_EXCEPTION_HANDLER,
Expand Down Expand Up @@ -334,7 +331,12 @@ enum ASTNodeKind {
AST_EXIT_STATEMENT,
AST_ADD_PATH_ACTION,
AST_DROP_PATH_ACTION,
kLastASTNodeKind = AST_DROP_PATH_ACTION,
AST_CREATE_USER_STATEMENT,
AST_ALTER_USER_STATEMENT,
AST_DROP_USER_STATEMENT,
AST_MAP_COLUMN_SCHEMA,
AST_MAP_TYPE,
kLastASTNodeKind = AST_MAP_TYPE,
};

} // namespace zetasql
Expand Down
29 changes: 28 additions & 1 deletion zetasql/parser/bison_parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,7 @@ using zetasql::ASTDropStatement;
%token KW_RLIKE "RLIKE"
%token KW_LIMIT "LIMIT"
%token KW_LOOKUP "LOOKUP"
%token KW_MAP "MAP"
%token KW_MERGE "MERGE"
%token KW_MOD "MOD"
%token KW_NATURAL "NATURAL"
Expand Down Expand Up @@ -1130,6 +1131,8 @@ using zetasql::ASTDropStatement;
%type <expression> json_literal
%type <expression> lambda_argument
%type <node> lambda_argument_list
%type <node> map_type
%type <node> map_column_schema_inner
%type <node> merge_action
%type <node> merge_insert_value_list_or_source_row
%type <node> merge_source
Expand Down Expand Up @@ -2836,10 +2839,18 @@ struct_column_schema_inner:
| struct_column_schema_prefix ">"
;

map_column_schema_inner:
"MAP" "<" field_schema "," field_schema ">"
{
$$ = MAKE_NODE(ASTMapColumnSchema, @$, {$3, $5});
}
;

raw_column_schema_inner:
simple_column_schema_inner
| array_column_schema_inner
| struct_column_schema_inner
| map_column_schema_inner
;

column_schema_inner:
Expand Down Expand Up @@ -6320,8 +6331,15 @@ struct_type:
}
;

map_type:
"MAP" "<" type "," type ">"
{
$$ = MAKE_NODE(ASTMapType, @$, {$3, $5});
}
;

raw_type:
array_type | struct_type | type_name ;
array_type | struct_type | map_type | type_name ;

type_parameter:
integer_literal
Expand Down Expand Up @@ -6382,6 +6400,10 @@ templated_parameter_kind:
{
$$ = zetasql::ASTTemplatedParameterType::ANY_ARRAY;
}
| "MAP"
{
$$ = zetasql::ASTTemplatedParameterType::ANY_MAP;
}
| identifier
{
const absl::string_view templated_type_string = $1->GetAsStringView();
Expand Down Expand Up @@ -6657,6 +6679,10 @@ function_name_from_keyword:
{
$$ = parser->MakeIdentifier(@1, parser->GetInputText(@1));
}
| "MAP"
{
$$ = parser->MakeIdentifier(@1, parser->GetInputText(@1));
}
;

// These rules have "expression" as their first part rather than
Expand Down Expand Up @@ -7435,6 +7461,7 @@ reserved_keyword_rule:
| "LIKE"
| "LIMIT"
| "LOOKUP"
| "MAP"
| "MERGE"
| "MOD"
| "NATURAL"
Expand Down
5 changes: 4 additions & 1 deletion zetasql/parser/flex_tokenizer.l
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
the normal rules. */
%x DOT_IDENTIFIER

/* This inclusive state is for in ARRAY<...> and STRUCT<...>. It turns off the
/* This inclusive state is for in ARRAY<...>, MAP<...> and STRUCT<...>. It turns off the
parsing of <<, >>, and <>, but leaves everything else the same. Doing this in
the tokenizer avoids complicated rules and duplication at the parser level.
Expand Down Expand Up @@ -519,6 +519,7 @@ limit { return BisonParserImpl::token::KW_LIMIT; }
load { return BisonParserImpl::token::KW_LOAD; }
lookup { return BisonParserImpl::token::KW_LOOKUP; }
loop { return BisonParserImpl::token::KW_LOOP; }
map { return BisonParserImpl::token::KW_MAP; }
match { return BisonParserImpl::token::KW_MATCH; }
matched { return BisonParserImpl::token::KW_MATCHED; }
materialized { return BisonParserImpl::token::KW_MATERIALIZED; }
Expand Down Expand Up @@ -871,6 +872,7 @@ zone { return BisonParserImpl::token::KW_ZONE; }
/* Don't recognize these in ARRAY<> or STRUCT<> context. */
"<>" {
if (prev_token_ == BisonParserImpl::token::KW_ARRAY ||
prev_token_ == BisonParserImpl::token::KW_MAP ||
prev_token_ == BisonParserImpl::token::KW_STRUCT) {
// Match only the '<', and move to the same state that that production would
// have moved to.
Expand All @@ -890,6 +892,7 @@ zone { return BisonParserImpl::token::KW_ZONE; }
}
"<" {
if (prev_token_ == BisonParserImpl::token::KW_ARRAY ||
prev_token_ == BisonParserImpl::token::KW_MAP ||
prev_token_ == BisonParserImpl::token::KW_STRUCT) {
// Switch to a mode that does not recognize >>. This only works as long as
// there are no legal "independent" < and > inside array or struct types
Expand Down
1 change: 1 addition & 0 deletions zetasql/parser/keywords.cc
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ constexpr KeywordInfoPOD kAllKeywords[] = {
{"load", KW_LOAD},
{"lookup", KW_LOOKUP, KeywordInfo::kReserved},
{"loop", KW_LOOP},
{"map", KW_MAP, KeywordInfo::kReserved},
{"match", KW_MATCH},
{"matched", KW_MATCHED},
{"materialized", KW_MATERIALIZED},
Expand Down
2 changes: 1 addition & 1 deletion zetasql/parser/keywords_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ TEST(ParserTest, DontAddNewReservedKeywords) {
// allows new queries to work that will not work on older code.
// Before changing this, co-ordinate with all engines to make sure the change
// is done safely.
EXPECT_EQ(106 /* CAUTION */, num_reserved);
EXPECT_EQ(107 /* CAUTION */, num_reserved);
}

} // namespace
Expand Down
2 changes: 2 additions & 0 deletions zetasql/parser/parse_tree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,8 @@ static absl::flat_hash_map<ASTNodeKind, std::string> CreateNodeNamesMap() {
map[AST_LIKE_TABLE_CLAUSE] = "LikeTableClause";
map[AST_ADD_PATH_ACTION] = "AddOfflinePathAction";
map[AST_DROP_PATH_ACTION] = "DropOfflinePathAction";
map[AST_MAP_COLUMN_SCHEMA] = "MapColumnSchema";
map[AST_MAP_TYPE] = "MapType";
for (int kind = kFirstASTNodeKind; kind <= kLastASTNodeKind;
++kind) {
ZETASQL_DCHECK(zetasql_base::ContainsKey(map, static_cast<ASTNodeKind>(kind)))
Expand Down
54 changes: 54 additions & 0 deletions zetasql/parser/parse_tree_manual.h
Original file line number Diff line number Diff line change
Expand Up @@ -5380,6 +5380,30 @@ class ASTStructField final : public ASTNode {
const ASTType* type_ = nullptr;
};

class ASTMapType final : public ASTType {
public:
static constexpr ASTNodeKind kConcreteNodeKind = AST_MAP_TYPE;

ASTMapType() : ASTType(kConcreteNodeKind) {}
void Accept(ParseTreeVisitor* visitor, void* data) const override;
zetasql_base::StatusOr<VisitResult> Accept(
NonRecursiveParseTreeVisitor* visitor) const override;

const ASTType* key_type() const { return key_type_; }
const ASTType* value_type() const { return value_type_; }

private:
void InitFields() final {
FieldLoader fl(this);
fl.AddRequired(&key_type_);
fl.AddRequired(&value_type_);
fl.AddOptional(mutable_type_parameters_ptr(), AST_TYPE_PARAMETER_LIST);
}

const ASTType* key_type_ = nullptr;
const ASTType* value_type_ = nullptr;
};

class ASTTemplatedParameterType final : public ASTNode {
public:
static constexpr ASTNodeKind kConcreteNodeKind =
Expand All @@ -5392,6 +5416,7 @@ class ASTTemplatedParameterType final : public ASTNode {
ANY_STRUCT,
ANY_ARRAY,
ANY_TABLE,
ANY_MAP,
};

ASTTemplatedParameterType() : ASTNode(kConcreteNodeKind) {}
Expand Down Expand Up @@ -5907,6 +5932,35 @@ class ASTArrayColumnSchema final : public ASTColumnSchema {
const ASTColumnSchema* element_schema_ = nullptr;
};

class ASTMapColumnSchema final : public ASTColumnSchema {
public:
static constexpr ASTNodeKind kConcreteNodeKind = AST_MAP_COLUMN_SCHEMA;

ASTMapColumnSchema() : ASTColumnSchema(kConcreteNodeKind) {}
void Accept(ParseTreeVisitor* visitor, void* data) const override;
zetasql_base::StatusOr<VisitResult> Accept(
NonRecursiveParseTreeVisitor* visitor) const override;

const ASTColumnSchema *key_schema() const { return key_schema_; }
const ASTColumnSchema *value_schema() const { return value_schema_; }

private:
void InitFields() final {
FieldLoader fl(this);
fl.AddRequired(&key_schema_);
fl.AddRequired(&value_schema_);
fl.AddOptional(mutable_type_parameters_ptr(), AST_TYPE_PARAMETER_LIST);
fl.AddOptional(mutable_generated_column_info_ptr(),
AST_GENERATED_COLUMN_INFO);
fl.AddOptionalExpression(mutable_default_expression_ptr());
fl.AddOptional(mutable_attributes_ptr(), AST_COLUMN_ATTRIBUTE_LIST);
fl.AddOptional(mutable_options_list_ptr(), AST_OPTIONS_LIST);
}

const ASTColumnSchema* key_schema_ = nullptr;
const ASTColumnSchema* value_schema_ = nullptr;
};

class ASTStructColumnSchema final : public ASTColumnSchema {
public:
static constexpr ASTNodeKind kConcreteNodeKind = AST_STRUCT_COLUMN_SCHEMA;
Expand Down
38 changes: 38 additions & 0 deletions zetasql/parser/testdata/create_table.test
Original file line number Diff line number Diff line change
Expand Up @@ -4766,3 +4766,41 @@ CREATE TABLE t CLONE
t1
OPTIONS(expiration_timestamp = TIMESTAMP "2019-05-22 00:00:00 UTC", description = "mydataset.source_table clone",
label = ARRAY["experiments"])
==

# create table with map type
# both map type and key/value type inside map accept optional column attribute and option list
create table t (
c1 STRING,
c2 map<STRING, TIMESTAMP NOT NULL> NOT NULL
);
--
CreateTableStatement [0-77]
PathExpression [13-14]
Identifier(t) [13-14]
TableElementList [15-77]
ColumnDefinition [19-28]
Identifier(c1) [19-21]
SimpleColumnSchema [22-28]
PathExpression [22-28]
Identifier(STRING) [22-28]
ColumnDefinition [32-75]
Identifier(c2) [32-34]
MapColumnSchema [35-75]
SimpleColumnSchema [39-45]
PathExpression [39-45]
Identifier(STRING) [39-45]
SimpleColumnSchema [47-65]
PathExpression [47-56]
Identifier(TIMESTAMP) [47-56]
ColumnAttributeList [57-65]
NotNullColumnAttribute [57-65]
ColumnAttributeList [67-75]
NotNullColumnAttribute [67-75]
--
CREATE TABLE t
(
c1 STRING,
c2 MAP< STRING, TIMESTAMP NOT NULL > NOT NULL
)
==
72 changes: 72 additions & 0 deletions zetasql/parser/testdata/map.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# construct map data type from map function
# access map value by []operator
#
# we represent '[]' in syntax tree with ArrayElement, but it can
# also apply to map values
select map("k", "v")["k"]
--
QueryStatement [0-25]
Query [0-25]
Select [0-25]
SelectList [7-25]
SelectColumn [7-25]
ArrayElement [20-25]
FunctionCall [7-20]
PathExpression [7-10]
Identifier(`map`) [7-10]
StringLiteral("k") [11-14]
StringLiteral("v") [16-19]
StringLiteral("k") [21-24]
--
SELECT
`map`("k", "v")["k"]
==

select map("k", "v").k
--
QueryStatement [0-22]
Query [0-22]
Select [0-22]
SelectList [7-22]
SelectColumn [7-22]
DotIdentifier [20-22]
FunctionCall [7-20]
PathExpression [7-10]
Identifier(`map`) [7-10]
StringLiteral("k") [11-14]
StringLiteral("v") [16-19]
Identifier(k) [21-22]
--
SELECT
`map`("k", "v").k
==

# casting to map type
select cast(col as MAP<STRING, TIMESTAMP>)
--
QueryStatement [0-42]
Query [0-42]
Select [0-42]
SelectList [7-42]
SelectColumn [7-42]
CastExpression [7-42]
PathExpression [12-15]
Identifier(col) [12-15]
MapType [19-41]
SimpleType [23-29]
PathExpression [23-29]
Identifier(STRING) [23-29]
SimpleType [31-40]
PathExpression [31-40]
Identifier(TIMESTAMP) [31-40]
--
SELECT
CAST(col AS MAP< STRING, TIMESTAMP >)
==

select cast(col as MAP<>)
--
ERROR: Syntax error: Unexpected ">" [at 1:24]
select cast(col as MAP<>)
^
==
22 changes: 22 additions & 0 deletions zetasql/parser/unparser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3535,5 +3535,27 @@ void Unparser::visitASTDropOfflinePathAction(const ASTDropOfflinePathAction *nod
node->path()->Accept(this, data);
}

void Unparser::visitASTMapType(const ASTMapType *node, void *data) {
print("MAP<");
node->key_type()->Accept(this, data);
print(",");
node->value_type()->Accept(this, data);
print(">");

if (node->type_parameters() != nullptr) {
node->type_parameters()->Accept(this, data);
}
}

void Unparser::visitASTMapColumnSchema(const ASTMapColumnSchema *node,
void *data) {
print("MAP<");
node->key_schema()->Accept(this, data);
print(",");
node->value_schema()->Accept(this, data);
print(">");
UnparseColumnSchema(node, data);
}

} // namespace parser
} // namespace zetasql
Loading

0 comments on commit 55be108

Please sign in to comment.