Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Replace columnHasNulls with row stats #11841

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions velox/exec/RowContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ RowContainer::RowContainer(
if (nullableKeys_) {
++nullOffset;
}
columnHasNulls_.push_back(false);
}
// Make offset at least sizeof pointer so that there is space for a
// free list next pointer below the bit at 'freeFlagOffset_'.
Expand Down Expand Up @@ -217,7 +216,6 @@ RowContainer::RowContainer(
nullOffsets_.push_back(nullOffset);
++nullOffset;
isVariableWidth |= !type->isFixedWidth();
columnHasNulls_.push_back(false);
}
if (hasProbedFlag) {
nullOffsets_.push_back(nullOffset);
Expand Down Expand Up @@ -467,9 +465,6 @@ void RowContainer::freeRowsExtraMemory(
}

void RowContainer::invalidateColumnStats() {
if (rowColumnsStats_.empty()) {
return;
}
rowColumnsStats_.clear();
}

Expand Down Expand Up @@ -567,7 +562,8 @@ void RowContainer::store(
decoded,
rows,
isKey,
offsets_[column]);
offsets_[column],
column);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also change in #11527, FYI.
Thanks.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the information. I see it's already merged. Let me rebase.

} else {
const auto rowColumn = rowColumns_[column];
VELOX_DYNAMIC_TYPE_DISPATCH_ALL(
Expand Down Expand Up @@ -815,7 +811,6 @@ void RowContainer::storeComplexType(
if (decoded.isNullAt(index)) {
VELOX_DCHECK(nullMask);
row[nullByte] |= nullMask;
updateColumnHasNulls(column, true);
return;
}
RowSizeTracker tracker(row[rowSizeOffset_], *stringAllocator_);
Expand Down
18 changes: 6 additions & 12 deletions velox/exec/RowContainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,8 @@ class RowContainer {

/// Returns true if specified column may have nulls, false otherwise.
inline bool columnHasNulls(int32_t columnIndex) const {
return columnHasNulls_[columnIndex];
return columnStats(columnIndex).has_value() &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/columnHasNulls/columnMayHaveNulls/
!columnStats(columnIndex).has_value() || columnStats(columnIndex)->nullCount() > 0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As @tanjialiang suggested, I'll separate the nullCount when invalidating columnStats, so that we don't need to check it here.

columnStats(columnIndex)->nullCount() > 0;
}

const std::vector<Accumulator>& accumulators() const {
Expand Down Expand Up @@ -1015,7 +1016,6 @@ class RowContainer {
// Do not leave an uninitialized value in the case of a
// null. This is an error with valgrind/asan.
*reinterpret_cast<T*>(row + offset) = T();
updateColumnHasNulls(columnIndex, true);
return;
}
if constexpr (std::is_same_v<T, StringView>) {
Expand Down Expand Up @@ -1056,6 +1056,7 @@ class RowContainer {
for (int32_t i = 0; i < rows.size(); ++i) {
storeWithNulls<Kind>(
decoded, i, isKey, rows[i], offset, nullByte, nullMask, column);
updateColumnStats(decoded, i, rows[i], column);
}
}

Expand All @@ -1064,9 +1065,11 @@ class RowContainer {
const DecodedVector& decoded,
folly::Range<char**> rows,
bool isKey,
int32_t offset) {
int32_t offset,
int32_t column) {
for (int32_t i = 0; i < rows.size(); ++i) {
storeNoNulls<Kind>(decoded, i, isKey, rows[i], offset);
updateColumnStats(decoded, i, rows[i], column);
}
}

Expand Down Expand Up @@ -1467,12 +1470,6 @@ class RowContainer {
// method is called whenever a row is erased.
void invalidateColumnStats();

// Updates the specific column's columnHasNulls_ flag, if 'hasNulls' is true.
// columnHasNulls_ flag is false by default.
inline void updateColumnHasNulls(int32_t columnIndex, bool hasNulls) {
columnHasNulls_[columnIndex] = columnHasNulls_[columnIndex] || hasNulls;
}

const std::vector<TypePtr> keyTypes_;
const bool nullableKeys_;
const bool isJoinBuild_;
Expand All @@ -1481,8 +1478,6 @@ class RowContainer {

const std::unique_ptr<HashStringAllocator> stringAllocator_;

std::vector<bool> columnHasNulls_;

// Indicates if we can add new row to this row container. It is set to false
// after user calls 'getRowPartitions()' to create 'rowPartitions' object for
// parallel join build.
Expand Down Expand Up @@ -1637,7 +1632,6 @@ inline void RowContainer::storeWithNulls<TypeKind::HUGEINT>(
if (decoded.isNullAt(rowIndex)) {
row[nullByte] |= nullMask;
memset(row + offset, 0, sizeof(int128_t));
updateColumnHasNulls(columnIndex, true);
return;
}
HugeInt::serialize(decoded.valueAt<int128_t>(rowIndex), row + offset);
Expand Down
Loading