Skip to content

Commit

Permalink
Optimize VectorHasher::makeValueIdsDecoded to not cache hash values w…
Browse files Browse the repository at this point in the history
…hen it is not beneficial (facebookincubator#10084)

Summary:
Pull Request resolved: facebookincubator#10084

Similar to facebookincubator#7150, when we only need to make IDs for a small number of rows fewer than dictionary values, using cache is slower and we should just compute the IDs directly.

Related issue: facebookincubator#10057

Reviewed By: mbasmanova

Differential Revision: D58215380

fbshipit-source-id: 50c904f06f4614525d289d36c792bfbf04ed8f6e
  • Loading branch information
Yuhta authored and facebook-github-bot committed Jun 6, 2024
1 parent 79943a7 commit 7164f92
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 38 deletions.
84 changes: 46 additions & 38 deletions velox/exec/VectorHasher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,36 @@ bool VectorHasher::makeValueIdsFlatWithNulls<bool>(
return true;
}

template <typename T, bool mayHaveNulls>
void VectorHasher::makeValueIdForOneRow(
const uint64_t* nulls,
vector_size_t row,
const T* values,
vector_size_t valueRow,
uint64_t* result,
bool& success) {
if constexpr (mayHaveNulls) {
if (bits::isBitNull(nulls, row)) {
if (multiplier_ == 1) {
result[row] = 0;
}
return;
}
}
T value = values[valueRow];
if (!success) {
analyzeValue(value);
return;
}
auto id = valueId(value);
if (id == kUnmappable) {
success = false;
analyzeValue(value);
} else {
result[row] = multiplier_ == 1 ? id : result[row] + multiplier_ * id;
}
}

template <typename T>
bool VectorHasher::makeValueIdsFlatNoNulls(
const SelectivityVector& rows,
Expand All @@ -192,20 +222,7 @@ bool VectorHasher::makeValueIdsFlatNoNulls(

bool success = true;
rows.applyToSelected([&](vector_size_t row) INLINE_LAMBDA {
T value = values[row];
if (!success) {
// If all were not mappable and we do not remove unmappable,
// we just analyze the remaining so we can decide the hash mode.
analyzeValue(value);
return;
}
uint64_t id = valueId(value);
if (id == kUnmappable) {
success = false;
analyzeValue(value);
return;
}
result[row] = multiplier_ == 1 ? id : result[row] + multiplier_ * id;
makeValueIdForOneRow<T, false>(nullptr, row, values, row, result, success);
});

return success;
Expand All @@ -220,26 +237,7 @@ bool VectorHasher::makeValueIdsFlatWithNulls(

bool success = true;
rows.applyToSelected([&](vector_size_t row) INLINE_LAMBDA {
if (bits::isBitNull(nulls, row)) {
if (multiplier_ == 1) {
result[row] = 0;
}
return;
}
T value = values[row];
if (!success) {
// If all were not mappable we just analyze the remaining so we can decide
// the hash mode.
analyzeValue(value);
return;
}
uint64_t id = valueId(value);
if (id == kUnmappable) {
success = false;
analyzeValue(value);
return;
}
result[row] = multiplier_ == 1 ? id : result[row] + multiplier_ * id;
makeValueIdForOneRow<T, true>(nulls, row, values, row, result, success);
});
return success;
}
Expand All @@ -248,13 +246,23 @@ template <typename T, bool mayHaveNulls>
bool VectorHasher::makeValueIdsDecoded(
const SelectivityVector& rows,
uint64_t* result) {
cachedHashes_.resize(decoded_.base()->size());
std::fill(cachedHashes_.begin(), cachedHashes_.end(), 0);

auto indices = decoded_.indices();
auto values = decoded_.data<T>();

bool success = true;

if (rows.countSelected() <= decoded_.base()->size()) {
// Cache is not beneficial in this case and we don't use them.
auto* nulls = decoded_.nulls(&rows);
rows.applyToSelected([&](vector_size_t row) INLINE_LAMBDA {
makeValueIdForOneRow<T, mayHaveNulls>(
nulls, row, values, indices[row], result, success);
});
return success;
}

cachedHashes_.resize(decoded_.base()->size());
std::fill(cachedHashes_.begin(), cachedHashes_.end(), 0);

int numCachedHashes = 0;
rows.testSelected([&](vector_size_t row) INLINE_LAMBDA {
if constexpr (mayHaveNulls) {
Expand Down
9 changes: 9 additions & 0 deletions velox/exec/VectorHasher.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,15 @@ class VectorHasher {
template <TypeKind Kind>
bool makeValueIds(const SelectivityVector& rows, uint64_t* result);

template <typename T, bool mayHaveNulls>
FOLLY_ALWAYS_INLINE void makeValueIdForOneRow(
const uint64_t* nulls,
vector_size_t row,
const T* values,
vector_size_t valueRow,
uint64_t* result,
bool& success);

template <typename T>
bool makeValueIdsFlatNoNulls(const SelectivityVector& rows, uint64_t* result);

Expand Down
30 changes: 30 additions & 0 deletions velox/exec/benchmarks/VectorHasherBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,36 @@ BENCHMARK(computeValueIdsLowCardinalityNotAllUsed) {
}
}

BENCHMARK(computeValueIdsDictionaryForFiltering) {
folly::BenchmarkSuspender suspender;

vector_size_t cardinality = 30'000'000;
vector_size_t batchSize = 300;
BenchmarkBase base;

auto data = base.vectorMaker().flatVector<int64_t>(
cardinality, [](vector_size_t row) { return row; });
BufferPtr indices = allocateIndices(batchSize, base.pool());
auto rawIndices = indices->asMutable<vector_size_t>();
// Assign indices such that array is reversed.
for (size_t i = 0; i < batchSize; ++i) {
rawIndices[i] = i * 1000;
}
auto values = BaseVector::wrapInDictionary(nullptr, indices, batchSize, data);

for (int i = 0; i < 10; i++) {
raw_vector<uint64_t> hashes(batchSize);
SelectivityVector rows(batchSize);
VectorHasher hasher(BIGINT(), 0);
hasher.decode(*values, rows);
suspender.dismiss();

bool ok = hasher.computeValueIds(rows, hashes);
folly::doNotOptimizeAway(ok);
suspender.rehire();
}
}

int main(int argc, char** argv) {
folly::Init init{&argc, &argv};
memory::MemoryManager::initialize({});
Expand Down
18 changes: 18 additions & 0 deletions velox/exec/tests/VectorHasherTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,24 @@ TEST_F(VectorHasherTest, computeValueIdsStrings) {
}

ASSERT_LE(uniqueValues.size(), multiplier);

{
result[42] = 0xAAAAAAAAAAAAAAAA;
VectorHasher hasher(dictionaryVectors[0]->type(), 0);
SelectivityVector oneRow(size, false);
oneRow.setValid(42, true);
oneRow.updateBounds();
hasher.decode(*dictionaryVectors[0], oneRow);
ASSERT_FALSE(hasher.computeValueIds(oneRow, result));
uint64_t asRange;
uint64_t asDistinct;
hasher.cardinality(0, asRange, asDistinct);
ASSERT_EQ(asDistinct, 2);
hasher.enableValueIds(1, 0);
hasher.decode(*dictionaryVectors[0], oneRow);
ASSERT_TRUE(hasher.computeValueIds(oneRow, result));
ASSERT_EQ(result[42], 1);
}
}

namespace {
Expand Down

0 comments on commit 7164f92

Please sign in to comment.