Skip to content

Commit

Permalink
fix(HashJoin): Stop HashProbe filter evaluation from reusing filterRe…
Browse files Browse the repository at this point in the history
…sult_ accross vectors

Summary:
This change stops the `filterResult_` vector<VectorPtr> from being resued for every subsequent call to evalFilter. This reuse causes index out of range errors:

```
Exception: VeloxRuntimeError
Error Source: RUNTIME
Error Code: INVALID_STATE
Reason: (128 vs. 128) Dictionary index must be less than base vector's size. Index: 128.
Retriable: False
Expression: rawIndices_[i] < dictionaryValues_->size()
Context: Top-level Expression: gt(cast((t_k1) as BIGINT), 1000:BIGINT)
Additional Context: Operator: HashProbe[2] 1
Function: validate
File: buck-out/v2/gen/fbcode/16b07710e10499f0/velox/vector/__velox_vector__/buck-headers/velox/vector/DictionaryVector-inl.h
Line: 215
```

Differential Revision: D67420798
  • Loading branch information
Daniel Hunte authored and facebook-github-bot committed Dec 18, 2024
1 parent e3f7c5f commit 588fffd
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 0 deletions.
1 change: 1 addition & 0 deletions velox/exec/HashProbe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1572,6 +1572,7 @@ int32_t HashProbe::evalFilter(int32_t numRows) {
}
}
}
filterResult_.clear();
VELOX_CHECK_LE(numPassed, outputTableRowsCapacity_);
return numPassed;
}
Expand Down
30 changes: 30 additions & 0 deletions velox/exec/tests/HashJoinTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7827,6 +7827,36 @@ DEBUG_ONLY_TEST_F(HashJoinTest, hashProbeSpill) {
}
}

DEBUG_ONLY_TEST_F(HashJoinTest, hashProbeSpillWithFilter) {
SCOPED_TESTVALUE_SET(
"facebook::velox::exec::Driver::runInternal::getOutput",
std::function<void(Operator*)>([&](Operator* op) {
if (!isHashProbeMemoryPool(*op->pool())) {
return;
}
testingRunArbitration(op->pool());
}));
fuzzerOpts_.vectorSize = 128;
auto probeVectors = createVectors(6, probeType_, fuzzerOpts_);
auto buildVectors = createVectors(10, buildType_, fuzzerOpts_);
const std::shared_ptr<exec::test::TempDirectoryPath> spillDirectory =
exec::test::TempDirectoryPath::create();
HashJoinBuilder(*pool_, duckDbQueryRunner_, driverExecutor_.get())
.numDrivers(1)
.spillDirectory(spillDirectory->getPath())
.probeKeys({"t_k1"})
.probeVectors(std::move(probeVectors))
.buildKeys({"u_k1"})
.buildVectors(std::move(buildVectors))
.config(core::QueryConfig::kJoinSpillEnabled, "true")
.joinType(core::JoinType::kLeftSemiProject)
.joinFilter("t_k1 > 1000")
.joinOutputLayout({"t_k1", "match"})
.referenceQuery(
"SELECT t.t_k1, EXISTS (SELECT * FROM u WHERE t.t_k1 = u.u_k1 AND t.t_k1 > 1000) FROM t")
.run();
}

DEBUG_ONLY_TEST_F(HashJoinTest, hashProbeSpillInMiddeOfLastOutputProcessing) {
std::atomic_int outputCountAfterNoMoreInout{0};
std::atomic_bool injectOnce{true};
Expand Down

0 comments on commit 588fffd

Please sign in to comment.