Skip to content

Commit

Permalink
Add query configs to turn off expression evaluation optimizations (fa…
Browse files Browse the repository at this point in the history
…cebookincubator#10902)

Summary:
Pull Request resolved: facebookincubator#10902

This change adds query configs to individually turn off expression
evaluation optimizations like dictionary peeling, dictionary
memoization, reusing shared subexpression results and deferring
lazy vector loading.
The goal is to streamline debugging in production and enable prompt
mitigation of bugs or regressions caused by the optimization or
surfaced due to it.

Note: When peeling is turned off, we still ensure that single arg
functions recieve a flat input

Differential Revision: D61943875
  • Loading branch information
Bikramjeet Vig authored and facebook-github-bot committed Sep 3, 2024
1 parent fd06bd9 commit 1fb7610
Show file tree
Hide file tree
Showing 8 changed files with 356 additions and 38 deletions.
37 changes: 37 additions & 0 deletions velox/core/QueryConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,43 @@ class QueryConfig {
/// Empty string if only want to trace the query metadata.
static constexpr const char* kQueryTraceNodeIds = "query_trace_node_ids";

/// Disable optimization in expression evaluation to peel common dictionary
/// layer from inputs.
static constexpr const char* kDebugDisableExpressionWithPeeling =
"debug_disable_expression_with_peeling";

/// Disable optimization in expression evaluation to re-use cached results for
/// common sub-expressions.
static constexpr const char* kDebugDisableCommonSubExpressions =
"debug_disable_common_sub_expressions";

/// Disable optimization in expression evaluation to re-use cached results
/// between subsequent input batches that are dictionary encoded and have the
/// same alphabet(underlying flat vector).
static constexpr const char* kDebugDisableExpressionWithMemoization =
"debug_disable_expression_with_memoization";

/// Disable optimization in expression evaluation to delay loading of lazy
/// inputs unless required.
static constexpr const char* kDebugDisableExpressionWithLazyInputs =
"debug_disable_expression_with_lazy_inputs";

bool debugDisableExpressionsWithPeeling() const {
return get<bool>(kDebugDisableExpressionWithPeeling, false);
}

bool debugDisableCommonSubExpressions() const {
return get<bool>(kDebugDisableCommonSubExpressions, false);
}

bool debugDisableExpressionsWithMemoization() const {
return get<bool>(kDebugDisableExpressionWithMemoization, false);
}

bool debugDisableExpressionsWithLazyInputs() const {
return get<bool>(kDebugDisableExpressionWithLazyInputs, false);
}

uint64_t queryMaxMemoryPerNode() const {
return config::toCapacity(
get<std::string>(kQueryMaxMemoryPerNode, "0B"),
Expand Down
58 changes: 57 additions & 1 deletion velox/core/tests/QueryConfigTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,9 @@ TEST_F(QueryConfigTest, enableExpressionEvaluationCacheConfig) {
execCtx->vectorPool() != nullptr, enableExpressionEvaluationCache);

auto evalCtx = std::make_shared<exec::EvalCtx>(execCtx.get());
ASSERT_EQ(evalCtx->cacheEnabled(), enableExpressionEvaluationCache);
ASSERT_EQ(
evalCtx->dictionaryMemoizationEnabled(),
enableExpressionEvaluationCache);

// Test ExecCtx::selectivityVectorPool_.
auto rows = execCtx->getSelectivityVector(100);
Expand All @@ -144,4 +146,58 @@ TEST_F(QueryConfigTest, enableExpressionEvaluationCacheConfig) {
testConfig(false);
}

TEST_F(QueryConfigTest, expressionEvaluationRelatedConfigs) {
// Verify that the expression evaluation related configs are porpogated
// correctly to ExprCtx which is used during expression evaluation. Each
// config is individually set and verified.
std::shared_ptr<memory::MemoryPool> rootPool{
memory::memoryManager()->addRootPool()};
std::shared_ptr<memory::MemoryPool> pool{rootPool->addLeafChild("leaf")};

auto testConfig =
[&](std::unordered_map<std::string, std::string> configData) {
auto queryCtx =
core::QueryCtx::create(nullptr, QueryConfig{std::move(configData)});
const auto& queryConfig = queryCtx->queryConfig();
auto execCtx =
std::make_shared<core::ExecCtx>(pool.get(), queryCtx.get());
auto evalCtx = std::make_shared<exec::EvalCtx>(execCtx.get());

ASSERT_EQ(
evalCtx->peelingEnabled(),
!queryConfig.debugDisableExpressionsWithPeeling());
ASSERT_EQ(
evalCtx->sharedSubExpressionReuseEnabled(),
!queryConfig.debugDisableCommonSubExpressions());
ASSERT_EQ(
evalCtx->dictionaryMemoizationEnabled(),
!queryConfig.debugDisableExpressionsWithMemoization());
ASSERT_EQ(
evalCtx->deferredLazyLoadingEnabled(),
!queryConfig.debugDisableExpressionsWithLazyInputs());
};

auto createConfig = [&](bool debugDisableExpressionsWithPeeling,
bool debugDisableCommonSubExpressions,
bool debugDisableExpressionsWithMemoization,
bool debugDisableExpressionsWithLazyInputs) -> auto {
std::unordered_map<std::string, std::string> configData(
{{core::QueryConfig::kDebugDisableExpressionWithPeeling,
std::to_string(debugDisableExpressionsWithPeeling)},
{core::QueryConfig::kDebugDisableCommonSubExpressions,
std::to_string(debugDisableCommonSubExpressions)},
{core::QueryConfig::kDebugDisableExpressionWithMemoization,
std::to_string(debugDisableExpressionsWithMemoization)},
{core::QueryConfig::kDebugDisableExpressionWithLazyInputs,
std::to_string(debugDisableExpressionsWithLazyInputs)}});
return configData;
};

testConfig({}); // Verify default config.
testConfig(createConfig(true, false, false, false));
testConfig(createConfig(false, true, false, false));
testConfig(createConfig(false, false, true, false));
testConfig(createConfig(false, false, false, true));
}

} // namespace facebook::velox::core::test
16 changes: 16 additions & 0 deletions velox/docs/configs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,22 @@ Expression Evaluation Configuration
- bool
- false
- This flag makes the Row conversion to by applied in a way that the casting row field are matched by name instead of position.
* - debug_disable_expression_with_peeling
- bool
- false
- Disable optimization in expression evaluation to peel common dictionary layer from inputs. Should only be used for debugging.
* - debug_disable_common_sub_expressions
- bool
- false
- Disable optimization in expression evaluation to re-use cached results for common sub-expressions. Should only be used for debugging.
* - debug_disable_expression_with_memoization
- bool
- false
- Disable optimization in expression evaluation to re-use cached results between subsequent input batches that are dictionary encoded and have the same alphabet(underlying flat vector). Should only be used for debugging.
* - debug_disable_expression_with_lazy_inputs
- bool
- false
- Disable optimization in expression evaluation to delay loading of lazy inputs unless required. Should only be used for debugging.

Memory Management
-----------------
Expand Down
16 changes: 2 additions & 14 deletions velox/expression/EvalCtx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,7 @@ EvalCtx::EvalCtx(core::ExecCtx* execCtx, ExprSet* exprSet, const RowVector* row)
: execCtx_(execCtx),
exprSet_(exprSet),
row_(row),
cacheEnabled_(execCtx->exprEvalCacheEnabled()),
maxSharedSubexprResultsCached_(
execCtx->queryCtx()
? execCtx->queryCtx()
->queryConfig()
.maxSharedSubexprResultsCached()
: core::QueryConfig({}).maxSharedSubexprResultsCached()) {
optimizationParams_(execCtx_) {
// TODO Change the API to replace raw pointers with non-const references.
// Sanity check inputs to prevent crashes.
VELOX_CHECK_NOT_NULL(execCtx);
Expand All @@ -56,13 +50,7 @@ EvalCtx::EvalCtx(core::ExecCtx* execCtx)
: execCtx_(execCtx),
exprSet_(nullptr),
row_(nullptr),
cacheEnabled_(execCtx->exprEvalCacheEnabled()),
maxSharedSubexprResultsCached_(
execCtx->queryCtx()
? execCtx->queryCtx()
->queryConfig()
.maxSharedSubexprResultsCached()
: core::QueryConfig({}).maxSharedSubexprResultsCached()) {
optimizationParams_(execCtx_) {
VELOX_CHECK_NOT_NULL(execCtx);
}

Expand Down
58 changes: 50 additions & 8 deletions velox/expression/EvalCtx.h
Original file line number Diff line number Diff line change
Expand Up @@ -519,16 +519,33 @@ class EvalCtx {
return peeledEncoding_.get();
}

/// Returns true if caching in expression evaluation is enabled, such as
/// Expr::evalWithMemo.
bool cacheEnabled() const {
return cacheEnabled_;
/// Returns true if dictionary memoization optimization is enabled, which
/// allows the reuse of results between consecutive input batches if they are
/// dictionary encoded and have the same alphabet(undelying flat vector).
bool dictionaryMemoizationEnabled() const {
return optimizationParams_.dictionaryMemoizationEnabled_;
}

/// Returns the maximum number of distinct inputs to cache results for in a
/// Returns the maximum number of distinct inputs to cache results in a
/// given shared subexpression.
uint32_t maxSharedSubexprResultsCached() const {
return maxSharedSubexprResultsCached_;
return optimizationParams_.maxSharedSubexprResultsCached_;
}

/// Returns true if peeling is enabled.
bool peelingEnabled() const {
return optimizationParams_.peelingEnabled_;
}

/// Returns true if shared subexpression reuse is enabled.
bool sharedSubExpressionReuseEnabled() const {
return optimizationParams_.sharedSubExpressionReuseEnabled_;
}

/// Returns true if loading lazy inputs are deferred till they need to be
/// accessed.
bool deferredLazyLoadingEnabled() const {
return optimizationParams_.deferredLazyLoadingEnabled_;
}

private:
Expand All @@ -550,8 +567,33 @@ class EvalCtx {
core::ExecCtx* const execCtx_;
ExprSet* const exprSet_;
const RowVector* row_;
const bool cacheEnabled_;
const uint32_t maxSharedSubexprResultsCached_;

struct OptimizationParams {
explicit OptimizationParams(core::ExecCtx* execCtx) {
const core::QueryConfig defaultQueryConfig = core::QueryConfig({});

const core::QueryConfig& queryConfig = (execCtx && execCtx->queryCtx())
? execCtx->queryCtx()->queryConfig()
: defaultQueryConfig;

dictionaryMemoizationEnabled_ =
!queryConfig.debugDisableExpressionsWithMemoization() &&
execCtx->exprEvalCacheEnabled();
peelingEnabled_ = !queryConfig.debugDisableExpressionsWithPeeling();
sharedSubExpressionReuseEnabled_ =
!queryConfig.debugDisableCommonSubExpressions();
deferredLazyLoadingEnabled_ =
!queryConfig.debugDisableExpressionsWithLazyInputs();
maxSharedSubexprResultsCached_ =
queryConfig.maxSharedSubexprResultsCached();
}
bool dictionaryMemoizationEnabled_;
bool peelingEnabled_;
bool sharedSubExpressionReuseEnabled_;
bool deferredLazyLoadingEnabled_;
uint32_t maxSharedSubexprResultsCached_;
};
const OptimizationParams optimizationParams_;
bool inputFlatNoNulls_;

// Corresponds 1:1 to children of 'row_'. Set to an inner vector
Expand Down
28 changes: 19 additions & 9 deletions velox/expression/Expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -708,7 +708,7 @@ void Expr::evalFlatNoNulls(
EvalCtx& context,
VectorPtr& result,
const ExprSet* parentExprSet) {
if (shouldEvaluateSharedSubexp()) {
if (shouldEvaluateSharedSubexp(context)) {
evaluateSharedSubexpr(
rows,
context,
Expand Down Expand Up @@ -819,7 +819,8 @@ void Expr::eval(
//
// TODO: Re-work the logic of deciding when to load which field.
if (!hasConditionals_ || distinctFields_.size() == 1 ||
shouldEvaluateSharedSubexp()) {
shouldEvaluateSharedSubexp(context) ||
!context.deferredLazyLoadingEnabled()) {
// Load lazy vectors if any.
for (auto* field : distinctFields_) {
context.ensureFieldLoaded(field->index(context), rows);
Expand Down Expand Up @@ -874,10 +875,8 @@ void Expr::evaluateSharedSubexpr(
}

if (sharedSubexprResultsIter == sharedSubexprResults_.end()) {
auto maxSharedSubexprResultsCached = context.execCtx()
->queryCtx()
->queryConfig()
.maxSharedSubexprResultsCached();
auto maxSharedSubexprResultsCached =
context.maxSharedSubexprResultsCached();
if (sharedSubexprResults_.size() < maxSharedSubexprResultsCached) {
// If we have room left in the cache, add it.
sharedSubexprResultsIter =
Expand Down Expand Up @@ -1039,7 +1038,7 @@ Expr::PeelEncodingsResult Expr::peelEncodings(

// If the expression depends on one dictionary, results are cacheable.
bool mayCache = false;
if (context.cacheEnabled()) {
if (context.dictionaryMemoizationEnabled()) {
mayCache = distinctFields_.size() == 1 &&
VectorEncoding::isDictionary(context.wrapEncoding()) &&
!peeledVectors[0]->memoDisabled();
Expand All @@ -1054,7 +1053,8 @@ void Expr::evalEncodings(
const SelectivityVector& rows,
EvalCtx& context,
VectorPtr& result) {
if (deterministic_ && !skipFieldDependentOptimizations()) {
if (deterministic_ && !skipFieldDependentOptimizations() &&
context.peelingEnabled()) {
bool hasFlat = false;
for (auto* field : distinctFields_) {
if (isFlat(*context.getField(field->index(context)))) {
Expand Down Expand Up @@ -1381,7 +1381,7 @@ void Expr::evalAll(
return;
}

if (shouldEvaluateSharedSubexp()) {
if (shouldEvaluateSharedSubexp(context)) {
evaluateSharedSubexpr(
rows,
context,
Expand Down Expand Up @@ -1462,6 +1462,16 @@ bool Expr::applyFunctionWithPeeling(
VectorPtr& result) {
LocalDecodedVector localDecoded(context);
LocalSelectivityVector newRowsHolder(context);
if (!context.peelingEnabled()) {
if (inputValues_.size() == 1) {
// If we have a single input, velox needs to ensure that the
// vectorFunction would receive a flat input.
BaseVector::flattenVector(inputValues_[0]);
applyFunction(applyRows, context, result);
return true;
}
return false;
}
// Attempt peeling.
std::vector<VectorPtr> peeledVectors;
auto peeledEncoding = PeeledEncoding::peel(
Expand Down
5 changes: 3 additions & 2 deletions velox/expression/Expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -486,8 +486,9 @@ class Expr {
/// Evaluation of such expression is optimized by memoizing and reusing
/// the results of prior evaluations. That logic is implemented in
/// 'evaluateSharedSubexpr'.
bool shouldEvaluateSharedSubexp() const {
return deterministic_ && isMultiplyReferenced_ && !inputs_.empty();
bool shouldEvaluateSharedSubexp(EvalCtx& context) const {
return deterministic_ && isMultiplyReferenced_ && !inputs_.empty() &&
context.sharedSubExpressionReuseEnabled();
}

/// Evaluate common sub-expression. Check if sharedSubexprValues_ already has
Expand Down
Loading

0 comments on commit 1fb7610

Please sign in to comment.