Skip to content

Commit

Permalink
Replace columnHasNulls with row stats
Browse files Browse the repository at this point in the history
  • Loading branch information
zation99 committed Dec 12, 2024
1 parent 7205403 commit 08006fc
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 19 deletions.
9 changes: 2 additions & 7 deletions velox/exec/RowContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ RowContainer::RowContainer(
if (nullableKeys_) {
++nullOffset;
}
columnHasNulls_.push_back(false);
}
// Make offset at least sizeof pointer so that there is space for a
// free list next pointer below the bit at 'freeFlagOffset_'.
Expand Down Expand Up @@ -217,7 +216,6 @@ RowContainer::RowContainer(
nullOffsets_.push_back(nullOffset);
++nullOffset;
isVariableWidth |= !type->isFixedWidth();
columnHasNulls_.push_back(false);
}
if (hasProbedFlag) {
nullOffsets_.push_back(nullOffset);
Expand Down Expand Up @@ -467,9 +465,6 @@ void RowContainer::freeRowsExtraMemory(
}

void RowContainer::invalidateColumnStats() {
if (rowColumnsStats_.empty()) {
return;
}
rowColumnsStats_.clear();
}

Expand Down Expand Up @@ -567,7 +562,8 @@ void RowContainer::store(
decoded,
rows,
isKey,
offsets_[column]);
offsets_[column],
column);
} else {
const auto rowColumn = rowColumns_[column];
VELOX_DYNAMIC_TYPE_DISPATCH_ALL(
Expand Down Expand Up @@ -815,7 +811,6 @@ void RowContainer::storeComplexType(
if (decoded.isNullAt(index)) {
VELOX_DCHECK(nullMask);
row[nullByte] |= nullMask;
updateColumnHasNulls(column, true);
return;
}
RowSizeTracker tracker(row[rowSizeOffset_], *stringAllocator_);
Expand Down
18 changes: 6 additions & 12 deletions velox/exec/RowContainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,8 @@ class RowContainer {

/// Returns true if specified column may have nulls, false otherwise.
inline bool columnHasNulls(int32_t columnIndex) const {
return columnHasNulls_[columnIndex];
return columnStats(columnIndex).has_value() &&
columnStats(columnIndex)->nullCount() > 0;
}

const std::vector<Accumulator>& accumulators() const {
Expand Down Expand Up @@ -1015,7 +1016,6 @@ class RowContainer {
// Do not leave an uninitialized value in the case of a
// null. This is an error with valgrind/asan.
*reinterpret_cast<T*>(row + offset) = T();
updateColumnHasNulls(columnIndex, true);
return;
}
if constexpr (std::is_same_v<T, StringView>) {
Expand Down Expand Up @@ -1056,6 +1056,7 @@ class RowContainer {
for (int32_t i = 0; i < rows.size(); ++i) {
storeWithNulls<Kind>(
decoded, i, isKey, rows[i], offset, nullByte, nullMask, column);
updateColumnStats(decoded, i, rows[i], column);
}
}

Expand All @@ -1064,9 +1065,11 @@ class RowContainer {
const DecodedVector& decoded,
folly::Range<char**> rows,
bool isKey,
int32_t offset) {
int32_t offset,
int32_t column) {
for (int32_t i = 0; i < rows.size(); ++i) {
storeNoNulls<Kind>(decoded, i, isKey, rows[i], offset);
updateColumnStats(decoded, i, rows[i], column);
}
}

Expand Down Expand Up @@ -1467,12 +1470,6 @@ class RowContainer {
// method is called whenever a row is erased.
void invalidateColumnStats();

// Updates the specific column's columnHasNulls_ flag, if 'hasNulls' is true.
// columnHasNulls_ flag is false by default.
inline void updateColumnHasNulls(int32_t columnIndex, bool hasNulls) {
columnHasNulls_[columnIndex] = columnHasNulls_[columnIndex] || hasNulls;
}

const std::vector<TypePtr> keyTypes_;
const bool nullableKeys_;
const bool isJoinBuild_;
Expand All @@ -1481,8 +1478,6 @@ class RowContainer {

const std::unique_ptr<HashStringAllocator> stringAllocator_;

std::vector<bool> columnHasNulls_;

// Indicates if we can add new row to this row container. It is set to false
// after user calls 'getRowPartitions()' to create 'rowPartitions' object for
// parallel join build.
Expand Down Expand Up @@ -1637,7 +1632,6 @@ inline void RowContainer::storeWithNulls<TypeKind::HUGEINT>(
if (decoded.isNullAt(rowIndex)) {
row[nullByte] |= nullMask;
memset(row + offset, 0, sizeof(int128_t));
updateColumnHasNulls(columnIndex, true);
return;
}
HugeInt::serialize(decoded.valueAt<int128_t>(rowIndex), row + offset);
Expand Down

0 comments on commit 08006fc

Please sign in to comment.