diff --git a/velox/dwio/parquet/reader/ParquetReader.cpp b/velox/dwio/parquet/reader/ParquetReader.cpp index 6200498a7137..ceeb4ee344ae 100644 --- a/velox/dwio/parquet/reader/ParquetReader.cpp +++ b/velox/dwio/parquet/reader/ParquetReader.cpp @@ -528,10 +528,16 @@ TypePtr ReaderBase::convertType( VELOX_FAIL( "UTF8 converted type can only be set for thrift::Type::(FIXED_LEN_)BYTE_ARRAY"); } + case thrift::ConvertedType::ENUM: { + VELOX_CHECK_EQ( + schemaElement.type, + thrift::Type::BYTE_ARRAY, + "ENUM converted type can only be set for value of thrift::Type::BYTE_ARRAY"); + return VARCHAR(); + } case thrift::ConvertedType::MAP: case thrift::ConvertedType::MAP_KEY_VALUE: case thrift::ConvertedType::LIST: - case thrift::ConvertedType::ENUM: case thrift::ConvertedType::TIME_MILLIS: case thrift::ConvertedType::TIME_MICROS: case thrift::ConvertedType::JSON: @@ -539,7 +545,7 @@ TypePtr ReaderBase::convertType( case thrift::ConvertedType::INTERVAL: default: VELOX_FAIL( - "Unsupported Parquet SchemaElement converted type: ", + "Unsupported Parquet SchemaElement converted type: {}", schemaElement.converted_type); } } else { @@ -565,7 +571,8 @@ TypePtr ReaderBase::convertType( } default: - VELOX_FAIL("Unknown Parquet SchemaElement type: ", schemaElement.type); + VELOX_FAIL( + "Unknown Parquet SchemaElement type: {}", schemaElement.type); } } } diff --git a/velox/dwio/parquet/tests/examples/enum_type.parquet b/velox/dwio/parquet/tests/examples/enum_type.parquet new file mode 100644 index 000000000000..90b53745ce92 Binary files /dev/null and b/velox/dwio/parquet/tests/examples/enum_type.parquet differ diff --git a/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp b/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp index a64df210a48a..e3b176ca7359 100644 --- a/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp +++ b/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp @@ -944,3 +944,28 @@ TEST_F(ParquetReaderTest, testEmptyRowGroups) { assertReadWithReaderAndExpected(fileSchema, *rowReader, expected, *leafPool_); } + +TEST_F(ParquetReaderTest, testEnumType) { + // enum_type.parquet contains 1 column (ENUM) with 3 rows. + const std::string sample(getExampleFilePath("enum_type.parquet")); + + facebook::velox::dwio::common::ReaderOptions readerOptions{leafPool_.get()}; + auto reader = createReader(sample, readerOptions); + EXPECT_EQ(reader->numberOfRows(), 3ULL); + + auto rowType = reader->typeWithId(); + EXPECT_EQ(rowType->type()->kind(), TypeKind::ROW); + EXPECT_EQ(rowType->size(), 1ULL); + + EXPECT_EQ(rowType->childAt(0)->type()->kind(), TypeKind::VARCHAR); + + auto fileSchema = ROW({"test"}, {VARCHAR()}); + auto rowReaderOpts = getReaderOpts(fileSchema); + rowReaderOpts.setScanSpec(makeScanSpec(fileSchema)); + auto rowReader = reader->createRowReader(rowReaderOpts); + + auto expected = + makeRowVector({makeFlatVector({"FOO", "BAR", "FOO"})}); + + assertReadWithReaderAndExpected(fileSchema, *rowReader, expected, *leafPool_); +}