Skip to content

Commit

Permalink
Fix checksum Presto aggregate function (facebookincubator#6910)
Browse files Browse the repository at this point in the history
Summary:
PrestoHasher used to ignore null flags for structs producing incorrect hashes.

Fixes facebookincubator#6715 and facebookincubator#6449

Pull Request resolved: facebookincubator#6910

Reviewed By: kgpai

Differential Revision: D49953433

Pulled By: mbasmanova

fbshipit-source-id: 828476371bc649af7b1bd29d80a072cd4bab739a
  • Loading branch information
mbasmanova authored and ericyuliu committed Oct 12, 2023
1 parent 4570624 commit b7dbc11
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 18 deletions.
34 changes: 26 additions & 8 deletions velox/functions/prestosql/aggregates/PrestoHasher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,45 +231,63 @@ void PrestoHasher::hash<TypeKind::ROW>(
BufferPtr& hashes) {
auto baseRow = vector_->base()->as<RowVector>();
auto indices = vector_->indices();
SelectivityVector elementRows;

if (vector_->isIdentityMapping()) {
SelectivityVector elementRows;
if (vector_->isIdentityMapping() && !vector_->mayHaveNulls()) {
elementRows = rows;
} else {
elementRows = SelectivityVector(baseRow->size(), false);
rows.applyToSelected(
[&](auto row) { elementRows.setValid(indices[row], true); });
rows.applyToSelected([&](auto row) {
if (!vector_->isNullAt(row)) {
elementRows.setValid(indices[row], true);
}
});
elementRows.updateBounds();
}

BufferPtr childHashes =
AlignedBuffer::allocate<int64_t>(elementRows.end(), baseRow->pool());

auto rawHashes = hashes->asMutable<int64_t>();
auto rowChildHashes = childHashes->as<int64_t>();

if (isTimestampWithTimeZoneType(vector_->base()->type())) {
// Hash only timestamp value.
children_[0]->hash(baseRow->childAt(0), elementRows, childHashes);
auto rawChildHashes = childHashes->as<int64_t>();
rows.applyToSelected([&](auto row) {
if (!baseRow->isNullAt(indices[row])) {
rawHashes[row] = rowChildHashes[indices[row]];
rawHashes[row] = rawChildHashes[indices[row]];
} else {
rawHashes[row] = 0;
}
});
return;
}

BufferPtr combinedChildHashes =
AlignedBuffer::allocate<int64_t>(elementRows.end(), baseRow->pool());
auto* rawCombinedChildHashes = combinedChildHashes->asMutable<int64_t>();
std::fill_n(rawCombinedChildHashes, rows.end(), 1);

std::fill_n(rawHashes, rows.end(), 1);

for (int i = 0; i < baseRow->childrenSize(); i++) {
children_[i]->hash(baseRow->childAt(i), elementRows, childHashes);

rows.applyToSelected([&](auto row) {
rawHashes[row] = safeHash(rawHashes[row], rowChildHashes[indices[row]]);
auto rawChildHashes = childHashes->as<int64_t>();
elementRows.applyToSelected([&](auto row) {
rawCombinedChildHashes[row] =
safeHash(rawCombinedChildHashes[row], rawChildHashes[row]);
});
}

rows.applyToSelected([&](auto row) {
if (!vector_->isNullAt(row)) {
rawHashes[row] = rawCombinedChildHashes[indices[row]];
} else {
rawHashes[row] = 0;
}
});
}

void PrestoHasher::hash(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,16 +205,45 @@ TEST_F(ChecksumAggregateTest, maps) {
}

TEST_F(ChecksumAggregateTest, rows) {
auto row = makeRowVector(
{makeFlatVector<int64_t>({1, 3}), makeFlatVector<int64_t>({2, 4})});
auto row = makeRowVector({
makeFlatVector<int64_t>({1, 3}),
makeFlatVector<int64_t>({2, 4}),
});

assertChecksum(row, "jMIvLQ5YEVg=");

row = makeRowVector(
{makeNullableFlatVector<int64_t>({1, std::nullopt}),
makeNullableFlatVector<int64_t>({std::nullopt, 4})});
row->setNull(0, true);
assertChecksum(row, "nbYF0I9UTeU=");

row->setNull(1, true);
assertChecksum(row, "DpXXC2Pzbjw=");

row = makeRowVector({
makeNullableFlatVector<int64_t>({1, std::nullopt}),
makeNullableFlatVector<int64_t>({std::nullopt, 4}),
});

assertChecksum(row, "6jtxEIUj7Hg=");

row = makeRowVector({
makeRowVector({
makeNullableFlatVector<std::string>({"Hello", "world!"}),
makeNullableFlatVector<bool>({true, false}),
}),
makeNullableFlatVector<int64_t>({17, 4}),
});

assertChecksum(row, "21pwcVg31Kc=");

row = makeRowVector({
makeRowVector({
makeNullableFlatVector<std::string>({"Hello", std::nullopt}),
makeNullableFlatVector<bool>({std::nullopt, false}),
}),
makeNullableFlatVector<int64_t>({std::nullopt, 4}),
});

assertChecksum(row, "Aw9tzUPOiUc=");
}

TEST_F(ChecksumAggregateTest, globalAggregationNoData) {
Expand Down
19 changes: 14 additions & 5 deletions velox/functions/prestosql/aggregates/tests/PrestoHasherTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,16 +310,25 @@ TEST_F(PrestoHasherTest, maps) {
}

TEST_F(PrestoHasherTest, rows) {
auto row = makeRowVector(
{makeFlatVector<int64_t>({1, 3}), makeFlatVector<int64_t>({2, 4})});
auto row = makeRowVector({
makeFlatVector<int64_t>({1, 3}),
makeFlatVector<int64_t>({2, 4}),
});

assertHash(row, {4329740752828761434, 655643799837772474});

row = makeRowVector(
{makeNullableFlatVector<int64_t>({1, std::nullopt}),
makeNullableFlatVector<int64_t>({std::nullopt, 4})});
row = makeRowVector({
makeNullableFlatVector<int64_t>({1, std::nullopt}),
makeNullableFlatVector<int64_t>({std::nullopt, 4}),
});

assertHash(row, {7113531408683827503, -1169223928725763049});

row->setNull(0, true);
assertHash(row, {0, -1169223928725763049});

row->setNull(1, true);
assertHash(row, {0, 0});
}

TEST_F(PrestoHasherTest, wrongVectorType) {
Expand Down

0 comments on commit b7dbc11

Please sign in to comment.