Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
CREATE TABLE t_complex (id INT) STORED BY ICEBERG;

INSERT INTO t_complex (id) VALUES (1);

ALTER TABLE t_complex ADD COLUMNS (col1 STRUCT<x:INT, y:INT>);

INSERT INTO t_complex VALUES (2, named_struct("x", 10, "y", 20));

ALTER TABLE t_complex ADD COLUMNS (col2 map<string,string>);

INSERT INTO t_complex VALUES (3, named_struct("x", 11, "y", 22), map("k1", "v1", "k2", "v2"));

ALTER TABLE t_complex ADD COLUMNS (col3 array<int>);

INSERT INTO t_complex VALUES (4, named_struct("x", 5, "y", 18), map("k22", "v22", "k33", "v44"), array(1, 2, 3));

SELECT * FROM t_complex ORDER BY id;
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
PREHOOK: query: CREATE TABLE t_complex (id INT) STORED BY ICEBERG
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@t_complex
POSTHOOK: query: CREATE TABLE t_complex (id INT) STORED BY ICEBERG
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t_complex
PREHOOK: query: INSERT INTO t_complex (id) VALUES (1)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@t_complex
POSTHOOK: query: INSERT INTO t_complex (id) VALUES (1)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@t_complex
PREHOOK: query: ALTER TABLE t_complex ADD COLUMNS (col1 STRUCT<x:INT, y:INT>)
PREHOOK: type: ALTERTABLE_ADDCOLS
PREHOOK: Input: default@t_complex
PREHOOK: Output: default@t_complex
POSTHOOK: query: ALTER TABLE t_complex ADD COLUMNS (col1 STRUCT<x:INT, y:INT>)
POSTHOOK: type: ALTERTABLE_ADDCOLS
POSTHOOK: Input: default@t_complex
POSTHOOK: Output: default@t_complex
PREHOOK: query: INSERT INTO t_complex VALUES (2, named_struct("x", 10, "y", 20))
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@t_complex
POSTHOOK: query: INSERT INTO t_complex VALUES (2, named_struct("x", 10, "y", 20))
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@t_complex
PREHOOK: query: ALTER TABLE t_complex ADD COLUMNS (col2 map<string,string>)
PREHOOK: type: ALTERTABLE_ADDCOLS
PREHOOK: Input: default@t_complex
PREHOOK: Output: default@t_complex
POSTHOOK: query: ALTER TABLE t_complex ADD COLUMNS (col2 map<string,string>)
POSTHOOK: type: ALTERTABLE_ADDCOLS
POSTHOOK: Input: default@t_complex
POSTHOOK: Output: default@t_complex
PREHOOK: query: INSERT INTO t_complex VALUES (3, named_struct("x", 11, "y", 22), map("k1", "v1", "k2", "v2"))
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@t_complex
POSTHOOK: query: INSERT INTO t_complex VALUES (3, named_struct("x", 11, "y", 22), map("k1", "v1", "k2", "v2"))
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@t_complex
PREHOOK: query: ALTER TABLE t_complex ADD COLUMNS (col3 array<int>)
PREHOOK: type: ALTERTABLE_ADDCOLS
PREHOOK: Input: default@t_complex
PREHOOK: Output: default@t_complex
POSTHOOK: query: ALTER TABLE t_complex ADD COLUMNS (col3 array<int>)
POSTHOOK: type: ALTERTABLE_ADDCOLS
POSTHOOK: Input: default@t_complex
POSTHOOK: Output: default@t_complex
PREHOOK: query: INSERT INTO t_complex VALUES (4, named_struct("x", 5, "y", 18), map("k22", "v22", "k33", "v44"), array(1, 2, 3))
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@t_complex
POSTHOOK: query: INSERT INTO t_complex VALUES (4, named_struct("x", 5, "y", 18), map("k22", "v22", "k33", "v44"), array(1, 2, 3))
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@t_complex
PREHOOK: query: SELECT * FROM t_complex ORDER BY id
PREHOOK: type: QUERY
PREHOOK: Input: default@t_complex
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: SELECT * FROM t_complex ORDER BY id
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t_complex
POSTHOOK: Output: hdfs://### HDFS PATH ###
1 NULL NULL NULL
2 {"x":10,"y":20} NULL NULL
3 {"x":11,"y":22} {"k1":"v1","k2":"v2"} NULL
4 {"x":5,"y":18} {"k22":"v22","k33":"v44"} [1,2,3]
Original file line number Diff line number Diff line change
Expand Up @@ -519,20 +519,22 @@ private VectorizedColumnReader buildVectorizedParquetReader(
int depth) throws IOException {
List<ColumnDescriptor> descriptors =
getAllColumnDescriptorByType(depth, type, columnDescriptors);
// Support for schema evolution: if the column from the current
// query schema is not present in the file schema, return a dummy
// reader that produces nulls. This allows queries to proceed even
// when new columns have been added after the file was written.
if (!fileSchema.getColumns().contains(descriptors.get(0))) {
return new VectorizedDummyColumnReader();
}
switch (typeInfo.getCategory()) {
case PRIMITIVE:
if (columnDescriptors == null || columnDescriptors.isEmpty()) {
throw new RuntimeException(
"Failed to find related Parquet column descriptor with type " + type);
}
if (fileSchema.getColumns().contains(descriptors.get(0))) {
return new VectorizedPrimitiveColumnReader(descriptors.get(0),
pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, skipProlepticConversion,
legacyConversionEnabled, type, typeInfo);
} else {
// Support for schema evolution
return new VectorizedDummyColumnReader();
}
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List<VectorizedColumnReader> fieldReaders = new ArrayList<>();
Expand Down