diff --git a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_add_complex_column.q b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_add_complex_column.q new file mode 100644 index 000000000000..d206fd8e76cf --- /dev/null +++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_add_complex_column.q @@ -0,0 +1,17 @@ +CREATE TABLE t_complex (id INT) STORED BY ICEBERG; + +INSERT INTO t_complex (id) VALUES (1); + +ALTER TABLE t_complex ADD COLUMNS (col1 STRUCT); + +INSERT INTO t_complex VALUES (2, named_struct("x", 10, "y", 20)); + +ALTER TABLE t_complex ADD COLUMNS (col2 map); + +INSERT INTO t_complex VALUES (3, named_struct("x", 11, "y", 22), map("k1", "v1", "k2", "v2")); + +ALTER TABLE t_complex ADD COLUMNS (col3 array); + +INSERT INTO t_complex VALUES (4, named_struct("x", 5, "y", 18), map("k22", "v22", "k33", "v44"), array(1, 2, 3)); + +SELECT * FROM t_complex ORDER BY id; \ No newline at end of file diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_add_complex_column.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_add_complex_column.q.out new file mode 100644 index 000000000000..80ef8542a893 --- /dev/null +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_add_complex_column.q.out @@ -0,0 +1,76 @@ +PREHOOK: query: CREATE TABLE t_complex (id INT) STORED BY ICEBERG +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_complex +POSTHOOK: query: CREATE TABLE t_complex (id INT) STORED BY ICEBERG +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_complex +PREHOOK: query: INSERT INTO t_complex (id) VALUES (1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_complex +POSTHOOK: query: INSERT INTO t_complex (id) VALUES (1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_complex +PREHOOK: query: ALTER TABLE t_complex ADD COLUMNS (col1 STRUCT) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@t_complex +PREHOOK: Output: default@t_complex +POSTHOOK: query: ALTER TABLE t_complex ADD COLUMNS (col1 STRUCT) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@t_complex +POSTHOOK: Output: default@t_complex +PREHOOK: query: INSERT INTO t_complex VALUES (2, named_struct("x", 10, "y", 20)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_complex +POSTHOOK: query: INSERT INTO t_complex VALUES (2, named_struct("x", 10, "y", 20)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_complex +PREHOOK: query: ALTER TABLE t_complex ADD COLUMNS (col2 map) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@t_complex +PREHOOK: Output: default@t_complex +POSTHOOK: query: ALTER TABLE t_complex ADD COLUMNS (col2 map) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@t_complex +POSTHOOK: Output: default@t_complex +PREHOOK: query: INSERT INTO t_complex VALUES (3, named_struct("x", 11, "y", 22), map("k1", "v1", "k2", "v2")) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_complex +POSTHOOK: query: INSERT INTO t_complex VALUES (3, named_struct("x", 11, "y", 22), map("k1", "v1", "k2", "v2")) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_complex +PREHOOK: query: ALTER TABLE t_complex ADD COLUMNS (col3 array) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@t_complex +PREHOOK: Output: default@t_complex +POSTHOOK: query: ALTER TABLE t_complex ADD COLUMNS (col3 array) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@t_complex +POSTHOOK: Output: default@t_complex +PREHOOK: query: INSERT INTO t_complex VALUES (4, named_struct("x", 5, "y", 18), map("k22", "v22", "k33", "v44"), array(1, 2, 3)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_complex +POSTHOOK: query: INSERT INTO t_complex VALUES (4, named_struct("x", 5, "y", 18), map("k22", "v22", "k33", "v44"), array(1, 2, 3)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_complex +PREHOOK: query: SELECT * FROM t_complex ORDER BY id +PREHOOK: type: QUERY +PREHOOK: Input: default@t_complex +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM t_complex ORDER BY id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_complex +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 NULL NULL NULL +2 {"x":10,"y":20} NULL NULL +3 {"x":11,"y":22} {"k1":"v1","k2":"v2"} NULL +4 {"x":5,"y":18} {"k22":"v22","k33":"v44"} [1,2,3] diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java index a7ab49c43dfd..8ce85db5e33b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java @@ -519,20 +519,22 @@ private VectorizedColumnReader buildVectorizedParquetReader( int depth) throws IOException { List descriptors = getAllColumnDescriptorByType(depth, type, columnDescriptors); + // Support for schema evolution: if the column from the current + // query schema is not present in the file schema, return a dummy + // reader that produces nulls. This allows queries to proceed even + // when new columns have been added after the file was written. + if (!fileSchema.getColumns().contains(descriptors.get(0))) { + return new VectorizedDummyColumnReader(); + } switch (typeInfo.getCategory()) { case PRIMITIVE: if (columnDescriptors == null || columnDescriptors.isEmpty()) { throw new RuntimeException( "Failed to find related Parquet column descriptor with type " + type); } - if (fileSchema.getColumns().contains(descriptors.get(0))) { return new VectorizedPrimitiveColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, skipProlepticConversion, legacyConversionEnabled, type, typeInfo); - } else { - // Support for schema evolution - return new VectorizedDummyColumnReader(); - } case STRUCT: StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; List fieldReaders = new ArrayList<>();