Skip to content

Commit

Permalink
Optimize and rename unwrapRows. (#6664)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #6664

move unwrapRows to lazy vector and name it selectBaseRowsToLoad.
also do several optimizations to minimize number of rows selected to load and also size of
allocated selectivity vector.

Reviewed By: kevinwilfong

Differential Revision: D49461831

fbshipit-source-id: e524587847c9ab92b9ace2a8d607829763670f51
  • Loading branch information
laithsakka authored and facebook-github-bot committed Oct 10, 2023
1 parent 9043928 commit fefcd9f
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 34 deletions.
25 changes: 0 additions & 25 deletions velox/vector/DecodedVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -413,31 +413,6 @@ VectorPtr DecodedVector::wrap(
std::move(data));
}

void DecodedVector::unwrapRows(
SelectivityVector& unwrapped,
const SelectivityVector& rows) const {
if (isIdentityMapping_ && rows.isAllSelected()) {
unwrapped.resizeFill(baseVector_->size(), true);
return;
}

unwrapped.resizeFill(baseVector_->size(), false);

if (isIdentityMapping_) {
unwrapped.select(rows);
} else if (isConstantMapping_) {
unwrapped.setValid(constantIndex_, true);
} else {
rows.applyToSelected([&](vector_size_t row) {
if (!isNullAt(row)) {
unwrapped.setValid(index(row), true);
}
});
}

unwrapped.updateBounds();
}

const uint64_t* DecodedVector::nulls() {
if (allNulls_.has_value()) {
return allNulls_.value();
Expand Down
6 changes: 0 additions & 6 deletions velox/vector/DecodedVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,12 +264,6 @@ class DecodedVector {
return wrap(std::move(data), wrapper, rows.end());
}

// Given a SelectivityVector 'rows', updates 'unwrapped' resizing it to match
// the base Vector and selecting the rows in the base Vector that correspond
// to those selected by 'rows' in the original encoded Vector.
void unwrapRows(SelectivityVector& unwrapped, const SelectivityVector& rows)
const;

struct DictionaryWrapping {
BufferPtr indices;
BufferPtr nulls;
Expand Down
51 changes: 48 additions & 3 deletions velox/vector/LazyVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,50 @@ void LazyVector::ensureLoadedRows(
}
}

namespace {
// Given a SelectivityVector 'rows', updates 'baseRows' selecting the rows
// in the base vector that should be loaded.
void selectBaseRowsToLoad(
const DecodedVector& decoded,
SelectivityVector& baseRows,
const SelectivityVector& rows) {
VELOX_DCHECK(
decoded.base()->encoding() == VectorEncoding::Simple::ROW ||
decoded.base()->encoding() == VectorEncoding::Simple::LAZY);

auto deselectNullsIdentity = [&]() {
if (decoded.base()->rawNulls()) {
baseRows.deselectNulls(
decoded.base()->rawNulls(), rows.begin(), rows.end());
}
};

if (decoded.isIdentityMapping() && rows.isAllSelected()) {
baseRows.resizeFill(rows.end(), true);
deselectNullsIdentity();
return;
}

if (decoded.isIdentityMapping()) {
baseRows.resizeFill(rows.end(), false);
baseRows.select(rows);
deselectNullsIdentity();
} else if (decoded.isConstantMapping()) {
baseRows.resizeFill(decoded.index(0) + 1, false);
baseRows.setValid(decoded.index(0), true);
} else {
baseRows.resizeFill(decoded.base()->size(), false);
rows.applyToSelected([&](vector_size_t row) {
if (!decoded.isNullAt(row)) {
baseRows.setValid(decoded.index(row), true);
}
});
}

baseRows.updateBounds();
}
} // namespace

// static
void LazyVector::ensureLoadedRowsImpl(
const VectorPtr& vector,
Expand All @@ -126,8 +170,8 @@ void LazyVector::ensureLoadedRowsImpl(
if (decoded.base()->encoding() != VectorEncoding::Simple::LAZY) {
if (decoded.base()->encoding() == VectorEncoding::Simple::ROW &&
isLazyNotLoaded(*decoded.base())) {
decoded.unwrapRows(baseRows, rows);
auto* rowVector = decoded.base()->asUnchecked<RowVector>();
selectBaseRowsToLoad(decoded, baseRows, rows);
DecodedVector decodedChild;
SelectivityVector childRows;
for (auto child : rowVector->children()) {
Expand Down Expand Up @@ -160,7 +204,7 @@ void LazyVector::ensureLoadedRowsImpl(
rowSet = RowSet(rowNumbers);
}
} else {
decoded.unwrapRows(baseRows, rows);
selectBaseRowsToLoad(decoded, baseRows, rows);
rowNumbers.resize(baseRows.end());
rowNumbers.resize(simd::indicesOfSetBits(
baseRows.asRange().bits(), 0, baseRows.end(), rowNumbers.data()));
Expand All @@ -173,7 +217,8 @@ void LazyVector::ensureLoadedRowsImpl(

// The loaded vector can itself also be lazy, so we load recursively.
if (isLazyNotLoaded(*baseLazyVector->vector_)) {
decoded.unwrapRows(baseRows, rows);
// We do not neeed to decode all rows.
selectBaseRowsToLoad(decoded, baseRows, rows);
decoded.decode(*baseLazyVector->vector_, baseRows, false);
SelectivityVector nestedRows;
ensureLoadedRowsImpl(
Expand Down

0 comments on commit fefcd9f

Please sign in to comment.