Skip to content

Commit

Permalink
added in rle decoder for boolean
Browse files Browse the repository at this point in the history
  • Loading branch information
jkhaliqi committed Oct 29, 2024
1 parent 48f6b8d commit f638683
Show file tree
Hide file tree
Showing 17 changed files with 684 additions and 3 deletions.
12 changes: 12 additions & 0 deletions velox/dwio/parquet/reader/PageReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ void PageReader::prepareDataPageV2(const PageHeader& pageHeader, int64_t row) {
}

encodedDataSize_ = pageHeader.uncompressed_page_size - levelsSize;
numNulls_ = pageHeader.data_page_header_v2.num_nulls;
encoding_ = pageHeader.data_page_header_v2.encoding;
if (numRowsInPage_ == kRowsUnknown) {
readPageDefLevels();
Expand Down Expand Up @@ -706,6 +707,15 @@ void PageReader::makeDecoder() {
break;
}
FMT_FALLTHROUGH;
case Encoding::RLE:
switch (parquetType) {
case thrift::Type::BOOLEAN:
rleBooleanDecoder_ = std::make_unique<RleBooleanDecoder>(pageData_, pageData_ + encodedDataSize_, decompressedData_, repetitionLevels_.data(), encodedDataSize_, numNulls_);
break;
default:
VELOX_UNSUPPORTED("RLE decoder only supports boolean");
}
break;
default:
VELOX_UNSUPPORTED("Encoding not supported yet: {}", encoding_);
}
Expand Down Expand Up @@ -748,6 +758,8 @@ void PageReader::skip(int64_t numRows) {
deltaBpDecoder_->skip(toSkip);
} else if (deltaByteArrDecoder_) {
deltaByteArrDecoder_->skip(toSkip);
} else if (rleBooleanDecoder_) {
rleBooleanDecoder_->skip(toSkip);
} else {
VELOX_FAIL("No decoder to skip");
}
Expand Down
21 changes: 19 additions & 2 deletions velox/dwio/parquet/reader/PageReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "velox/dwio/parquet/reader/DeltaBpDecoder.h"
#include "velox/dwio/parquet/reader/DeltaByteArrayDecoder.h"
#include "velox/dwio/parquet/reader/ParquetTypeWithId.h"
#include "velox/dwio/parquet/reader/RleBooleanDecoder.h"
#include "velox/dwio/parquet/reader/RleBpDataDecoder.h"
#include "velox/dwio/parquet/reader/StringDecoder.h"

Expand Down Expand Up @@ -339,9 +340,23 @@ class PageReader {
VELOX_CHECK(!isDictionary(), "BOOLEAN types are never dictionary-encoded");
if (nulls) {
nullsFromFastPath = false;
booleanDecoder_->readWithVisitor<true>(nulls, visitor);
switch (encoding_)
{
case thrift::Encoding::RLE:
rleBooleanDecoder_->readWithVisitor<true>(nulls, visitor);
break;
default:
booleanDecoder_->readWithVisitor<true>(nulls, visitor);
}
} else {
booleanDecoder_->readWithVisitor<false>(nulls, visitor);
switch (encoding_)
{
case thrift::Encoding::RLE:
rleBooleanDecoder_->readWithVisitor<false>(nulls, visitor);
break;
default:
booleanDecoder_->readWithVisitor<false>(nulls, visitor);
}
}
}

Expand Down Expand Up @@ -449,6 +464,7 @@ class PageReader {
// Number of bytes starting at pageData_ for current encoded data.
int32_t encodedDataSize_{0};

int32_t numNulls_{0};
// Below members Keep state between calls to readWithVisitor().

// Original rows in Visitor.
Expand Down Expand Up @@ -500,6 +516,7 @@ class PageReader {
std::unique_ptr<BooleanDecoder> booleanDecoder_;
std::unique_ptr<DeltaBpDecoder> deltaBpDecoder_;
std::unique_ptr<DeltaByteArrayDecoder> deltaByteArrDecoder_;
std::unique_ptr<RleBooleanDecoder> rleBooleanDecoder_;
// Add decoders for other encodings here.
};

Expand Down
Loading

0 comments on commit f638683

Please sign in to comment.