diff --git a/velox/dwio/parquet/reader/PageReader.cpp b/velox/dwio/parquet/reader/PageReader.cpp index 004b0f6b801c..cf46fdb58184 100644 --- a/velox/dwio/parquet/reader/PageReader.cpp +++ b/velox/dwio/parquet/reader/PageReader.cpp @@ -666,7 +666,7 @@ void PageReader::makeDecoder() { pageData_, pageData_ + encodedDataSize_); break; case thrift::Type::FIXED_LEN_BYTE_ARRAY: - if (type_->type()->isVarbinary()) { + if (type_->type()->isVarbinary() || type_->type()->isVarchar()) { stringDecoder_ = std::make_unique( pageData_, pageData_ + encodedDataSize_, type_->typeLength_); } else { diff --git a/velox/dwio/parquet/tests/examples/uuid.parquet b/velox/dwio/parquet/tests/examples/uuid.parquet new file mode 100644 index 000000000000..91ca9d2061fd Binary files /dev/null and b/velox/dwio/parquet/tests/examples/uuid.parquet differ diff --git a/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp b/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp index 1a4ef241bd3c..743f14682627 100644 --- a/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp +++ b/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp @@ -1180,6 +1180,34 @@ TEST_F(ParquetReaderTest, readBinaryAsStringFromNation) { 0)); } +TEST_F(ParquetReaderTest, readFixedLenBinaryAsStringFromUuid) { + const std::string filename("uuid.parquet"); + const std::string sample(getExampleFilePath(filename)); + + dwio::common::ReaderOptions readerOptions{leafPool_.get()}; + auto outputRowType = ROW({"uuid_field"}, {VARCHAR()}); + + readerOptions.setFileSchema(outputRowType); + auto reader = createReader(sample, readerOptions); + EXPECT_EQ(reader->numberOfRows(), 3ULL); + auto rowType = reader->typeWithId(); + EXPECT_EQ(rowType->type()->kind(), TypeKind::ROW); + EXPECT_EQ(rowType->size(), 1ULL); + EXPECT_EQ(rowType->childAt(0)->type()->kind(), TypeKind::VARCHAR); + + auto rowReaderOpts = getReaderOpts(outputRowType); + rowReaderOpts.setScanSpec(makeScanSpec(outputRowType)); + auto rowReader = reader->createRowReader(rowReaderOpts); + + auto expected = std::string("5468454a-363f-ccc8-7d0b-76072a75dfaa"); + VectorPtr result = BaseVector::create(outputRowType, 0, &(*leafPool_)); + rowReader->next(1, result); + EXPECT_EQ( + expected, + result->as()->childAt(0)->asFlatVector()->valueAt( + 0)); +} + TEST_F(ParquetReaderTest, testV2PageWithZeroMaxDefRep) { // enum_type.parquet contains 1 column (ENUM) with 3 rows. const std::string sample(getExampleFilePath("v2_page.parquet"));