Skip to content

Commit

Permalink
Optimize readWithVisitor for TrivialEncoding and MainlyConstantEncoding
Browse files Browse the repository at this point in the history
Summary:
- Fast path for `TrivialEncoding::readWithVisitor`
- Fast path for `MainlyConstantEncoding::readWithVisitor`
- Store `encodingType`, `dataType`, `rowCount` in `Encoding` object memory to reduce memory fetch on `data_`
- Use skip functor only in `readWithVisitorSlow` to avoid virtual call cost

Differential Revision: D58085138
  • Loading branch information
Yuhta authored and facebook-github-bot committed Jun 3, 2024
1 parent 277d5c5 commit 2e541f7
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 7 deletions.
13 changes: 7 additions & 6 deletions velox/dwio/common/SelectiveColumnReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,13 @@ class SelectiveColumnReader {

// Returns a pointer to output rows with at least 'size' elements available.
vector_size_t* mutableOutputRows(int32_t size) {
numOutConfirmed_ = outputRows_.size();
outputRows_.resize(numOutConfirmed_ + size);
return outputRows_.data() + numOutConfirmed_;
auto numOutConfirmed = outputRows_.size();
outputRows_.resize(numOutConfirmed + size);
return outputRows_.data() + numOutConfirmed;
}

void* rawValues() {
return rawValues_;
}

template <typename T>
Expand Down Expand Up @@ -616,9 +620,6 @@ class SelectiveColumnReader {
// Rows passing the filter in readWithVisitor. Must stay
// constant between consecutive calls to read().
raw_vector<vector_size_t> outputRows_;
// Index of last set value in outputRows. Values between this and
// size() can be used as scratchpad inside read().
vector_size_t numOutConfirmed_;
// The row number
// corresponding to each element in 'values_'
raw_vector<vector_size_t> valueRows_;
Expand Down
1 change: 0 additions & 1 deletion velox/dwio/common/SelectiveColumnReaderInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ void SelectiveColumnReader::prepareRead(
outputRows_.clear();
// is part of read() and after read returns getValues may be called.
mayGetValues_ = true;
numOutConfirmed_ = 0;
numValues_ = 0;
valueSize_ = sizeof(T);
inputRows_ = rows;
Expand Down
22 changes: 22 additions & 0 deletions velox/dwio/common/tests/utils/DataSetBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,28 @@ class DataSetBuilder {
return *this;
}

template <typename T>
DataSetBuilder& withIntMainlyConstantForField(const common::Subfield& field) {
for (auto& batch : *batches_) {
std::optional<T> value;
auto* numbers = dwio::common::getChildBySubfield(batch.get(), field)
->as<FlatVector<T>>();
for (auto row = 0; row < numbers->size(); ++row) {
if (numbers->isNullAt(row)) {
continue;
}
if (folly::Random::randDouble01(rng_) < 0.95) {
if (!value.has_value()) {
value = numbers->valueAt(row);
} else {
numbers->set(row, *value);
}
}
}
}
return *this;
}

template <typename T>
DataSetBuilder& withQuantizedFloatForField(
const common::Subfield& field,
Expand Down
5 changes: 5 additions & 0 deletions velox/dwio/common/tests/utils/E2EFilterTestBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ class E2EFilterTestBase : public testing::Test {
dataSetBuilder_->withIntRleForField<T>(Subfield(fieldName));
}

template <typename T>
void makeIntMainlyConstant(const std::string& fieldName) {
dataSetBuilder_->withIntMainlyConstantForField<T>(Subfield(fieldName));
}

template <typename T>
void makeQuantizedFloat(
const std::string& fieldName,
Expand Down

0 comments on commit 2e541f7

Please sign in to comment.