Skip to content

Commit

Permalink
support to read fixed length binary as string. (#10621)
Browse files Browse the repository at this point in the history
Summary:
This is a followup of #10399,
support to read fixed length binary as string.

Pull Request resolved: #10621

Reviewed By: Yuhta

Differential Revision: D60527079

Pulled By: kagamiori

fbshipit-source-id: a13f3bb963c57e98d494fb9fc194f2a988ba2bee
  • Loading branch information
kevincmchen authored and facebook-github-bot committed Aug 1, 2024
1 parent b4ea92c commit ede6a69
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 1 deletion.
2 changes: 1 addition & 1 deletion velox/dwio/parquet/reader/PageReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,7 @@ void PageReader::makeDecoder() {
pageData_, pageData_ + encodedDataSize_);
break;
case thrift::Type::FIXED_LEN_BYTE_ARRAY:
if (type_->type()->isVarbinary()) {
if (type_->type()->isVarbinary() || type_->type()->isVarchar()) {
stringDecoder_ = std::make_unique<StringDecoder>(
pageData_, pageData_ + encodedDataSize_, type_->typeLength_);
} else {
Expand Down
Binary file added velox/dwio/parquet/tests/examples/uuid.parquet
Binary file not shown.
28 changes: 28 additions & 0 deletions velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,34 @@ TEST_F(ParquetReaderTest, readBinaryAsStringFromNation) {
0));
}

TEST_F(ParquetReaderTest, readFixedLenBinaryAsStringFromUuid) {
const std::string filename("uuid.parquet");
const std::string sample(getExampleFilePath(filename));

dwio::common::ReaderOptions readerOptions{leafPool_.get()};
auto outputRowType = ROW({"uuid_field"}, {VARCHAR()});

readerOptions.setFileSchema(outputRowType);
auto reader = createReader(sample, readerOptions);
EXPECT_EQ(reader->numberOfRows(), 3ULL);
auto rowType = reader->typeWithId();
EXPECT_EQ(rowType->type()->kind(), TypeKind::ROW);
EXPECT_EQ(rowType->size(), 1ULL);
EXPECT_EQ(rowType->childAt(0)->type()->kind(), TypeKind::VARCHAR);

auto rowReaderOpts = getReaderOpts(outputRowType);
rowReaderOpts.setScanSpec(makeScanSpec(outputRowType));
auto rowReader = reader->createRowReader(rowReaderOpts);

auto expected = std::string("5468454a-363f-ccc8-7d0b-76072a75dfaa");
VectorPtr result = BaseVector::create(outputRowType, 0, &(*leafPool_));
rowReader->next(1, result);
EXPECT_EQ(
expected,
result->as<RowVector>()->childAt(0)->asFlatVector<StringView>()->valueAt(
0));
}

TEST_F(ParquetReaderTest, testV2PageWithZeroMaxDefRep) {
// enum_type.parquet contains 1 column (ENUM) with 3 rows.
const std::string sample(getExampleFilePath("v2_page.parquet"));
Expand Down

0 comments on commit ede6a69

Please sign in to comment.