diff --git a/velox/core/QueryConfig.h b/velox/core/QueryConfig.h index 814bdef03a26..def08a9de314 100644 --- a/velox/core/QueryConfig.h +++ b/velox/core/QueryConfig.h @@ -357,6 +357,43 @@ class QueryConfig { /// Empty string if only want to trace the query metadata. static constexpr const char* kQueryTraceNodeIds = "query_trace_node_ids"; + /// Disable optimization in expression evaluation to peel common dictionary + /// layer from inputs. + static constexpr const char* kDebugDisableExpressionWithPeeling = + "debug_disable_expression_with_peeling"; + + /// Disable optimization in expression evaluation to re-use cached results for + /// common sub-expressions. + static constexpr const char* kDebugDisableCommonSubExpressions = + "debug_disable_common_sub_expressions"; + + /// Disable optimization in expression evaluation to re-use cached results + /// between subsequent input batches that are dictionary encoded and have the + /// same alphabet(underlying flat vector). + static constexpr const char* kDebugDisableExpressionWithMemoization = + "debug_disable_expression_with_memoization"; + + /// Disable optimization in expression evaluation to delay loading of lazy + /// inputs unless required. + static constexpr const char* kDebugDisableExpressionWithLazyInputs = + "debug_disable_expression_with_lazy_inputs"; + + bool debugDisableExpressionsWithPeeling() const { + return get(kDebugDisableExpressionWithPeeling, false); + } + + bool debugDisableCommonSubExpressions() const { + return get(kDebugDisableCommonSubExpressions, false); + } + + bool debugDisableExpressionsWithMemoization() const { + return get(kDebugDisableExpressionWithMemoization, false); + } + + bool debugDisableExpressionsWithLazyInputs() const { + return get(kDebugDisableExpressionWithLazyInputs, false); + } + uint64_t queryMaxMemoryPerNode() const { return config::toCapacity( get(kQueryMaxMemoryPerNode, "0B"), diff --git a/velox/core/QueryCtx.h b/velox/core/QueryCtx.h index c2274cf4af44..89ff229e3a48 100644 --- a/velox/core/QueryCtx.h +++ b/velox/core/QueryCtx.h @@ -229,12 +229,53 @@ class ExecCtx { ExecCtx(memory::MemoryPool* pool, QueryCtx* queryCtx) : pool_(pool), queryCtx_(queryCtx), - exprEvalCacheEnabled_( - !queryCtx || - queryCtx->queryConfig().isExpressionEvaluationCacheEnabled()), + optimizationParams_(queryCtx), vectorPool_( - exprEvalCacheEnabled_ ? std::make_unique(pool) - : nullptr) {} + optimizationParams_.exprEvalCacheEnabled + ? std::make_unique(pool) + : nullptr) {} + + struct OptimizationParams { + explicit OptimizationParams(QueryCtx* queryCtx) { + const core::QueryConfig defaultQueryConfig = core::QueryConfig({}); + + const core::QueryConfig& queryConfig = + queryCtx ? queryCtx->queryConfig() : defaultQueryConfig; + + exprEvalCacheEnabled = queryConfig.isExpressionEvaluationCacheEnabled(); + dictionaryMemoizationEnabled = + !queryConfig.debugDisableExpressionsWithMemoization() && + exprEvalCacheEnabled; + peelingEnabled = !queryConfig.debugDisableExpressionsWithPeeling(); + sharedSubExpressionReuseEnabled = + !queryConfig.debugDisableCommonSubExpressions(); + deferredLazyLoadingEnabled = + !queryConfig.debugDisableExpressionsWithLazyInputs(); + maxSharedSubexprResultsCached = + queryConfig.maxSharedSubexprResultsCached(); + } + + /// True if caches in expression evaluation used for performance are + /// enabled, including VectorPool, DecodedVectorPool, SelectivityVectorPool + /// and dictionary memoization. + bool exprEvalCacheEnabled; + /// True if dictionary memoization optimization is enabled during experssion + /// evaluation, whichallows the reuse of results between consecutive input + /// batches if they are dictionary encoded and have the same + /// alphabet(undelying flat vector). + bool dictionaryMemoizationEnabled; + /// True if peeling is enabled during experssion evaluation. + bool peelingEnabled; + /// True if shared subexpression reuse is enabled during experssion + /// evaluation. + bool sharedSubExpressionReuseEnabled; + /// True if loading lazy inputs are deferred till they need to be + /// accessed during experssion evaluation. + bool deferredLazyLoadingEnabled; + /// The maximum number of distinct inputs to cache results in a + /// given shared subexpression during experssion evaluation. + uint32_t maxSharedSubexprResultsCached; + }; velox::memory::MemoryPool* pool() const { return pool_; @@ -251,7 +292,9 @@ class ExecCtx { /// Prefer using LocalSelectivityVector which takes care of returning the /// vector to the pool on destruction. std::unique_ptr getSelectivityVector(int32_t size) { - VELOX_CHECK(exprEvalCacheEnabled_ || selectivityVectorPool_.empty()); + VELOX_CHECK( + optimizationParams_.exprEvalCacheEnabled || + selectivityVectorPool_.empty()); if (selectivityVectorPool_.empty()) { return std::make_unique(size); } @@ -265,7 +308,9 @@ class ExecCtx { // content. The caller is responsible for setting the size and // assigning the contents. std::unique_ptr getSelectivityVector() { - VELOX_CHECK(exprEvalCacheEnabled_ || selectivityVectorPool_.empty()); + VELOX_CHECK( + optimizationParams_.exprEvalCacheEnabled || + selectivityVectorPool_.empty()); if (selectivityVectorPool_.empty()) { return std::make_unique(); } @@ -276,7 +321,7 @@ class ExecCtx { // Returns true if the vector was moved into the pool. bool releaseSelectivityVector(std::unique_ptr&& vector) { - if (exprEvalCacheEnabled_) { + if (optimizationParams_.exprEvalCacheEnabled) { selectivityVectorPool_.push_back(std::move(vector)); return true; } @@ -284,7 +329,8 @@ class ExecCtx { } std::unique_ptr getDecodedVector() { - VELOX_CHECK(exprEvalCacheEnabled_ || decodedVectorPool_.empty()); + VELOX_CHECK( + optimizationParams_.exprEvalCacheEnabled || decodedVectorPool_.empty()); if (decodedVectorPool_.empty()) { return std::make_unique(); } @@ -295,7 +341,7 @@ class ExecCtx { // Returns true if the vector was moved into the pool. bool releaseDecodedVector(std::unique_ptr&& vector) { - if (exprEvalCacheEnabled_) { + if (optimizationParams_.exprEvalCacheEnabled) { decodedVectorPool_.push_back(std::move(vector)); return true; } @@ -334,8 +380,8 @@ class ExecCtx { return 0; } - bool exprEvalCacheEnabled() const { - return exprEvalCacheEnabled_; + const OptimizationParams& optimizationParams() const { + return optimizationParams_; } private: @@ -343,8 +389,9 @@ class ExecCtx { memory::MemoryPool* const pool_; QueryCtx* const queryCtx_; - const bool exprEvalCacheEnabled_; - // A pool of preallocated DecodedVectors for use by expressions and operators. + const OptimizationParams optimizationParams_; + // A pool of preallocated DecodedVectors for use by expressions and + // operators. std::vector> decodedVectorPool_; // A pool of preallocated SelectivityVectors for use by expressions // and operators. diff --git a/velox/core/tests/QueryConfigTest.cpp b/velox/core/tests/QueryConfigTest.cpp index 81a199685bcc..c89d44d2fdfa 100644 --- a/velox/core/tests/QueryConfigTest.cpp +++ b/velox/core/tests/QueryConfigTest.cpp @@ -118,12 +118,16 @@ TEST_F(QueryConfigTest, enableExpressionEvaluationCacheConfig) { enableExpressionEvaluationCache); auto execCtx = std::make_shared(pool.get(), queryCtx.get()); - ASSERT_EQ(execCtx->exprEvalCacheEnabled(), enableExpressionEvaluationCache); + ASSERT_EQ( + execCtx->optimizationParams().exprEvalCacheEnabled, + enableExpressionEvaluationCache); ASSERT_EQ( execCtx->vectorPool() != nullptr, enableExpressionEvaluationCache); auto evalCtx = std::make_shared(execCtx.get()); - ASSERT_EQ(evalCtx->cacheEnabled(), enableExpressionEvaluationCache); + ASSERT_EQ( + evalCtx->dictionaryMemoizationEnabled(), + enableExpressionEvaluationCache); // Test ExecCtx::selectivityVectorPool_. auto rows = execCtx->getSelectivityVector(100); @@ -144,4 +148,58 @@ TEST_F(QueryConfigTest, enableExpressionEvaluationCacheConfig) { testConfig(false); } +TEST_F(QueryConfigTest, expressionEvaluationRelatedConfigs) { + // Verify that the expression evaluation related configs are porpogated + // correctly to ExprCtx which is used during expression evaluation. Each + // config is individually set and verified. + std::shared_ptr rootPool{ + memory::memoryManager()->addRootPool()}; + std::shared_ptr pool{rootPool->addLeafChild("leaf")}; + + auto testConfig = + [&](std::unordered_map configData) { + auto queryCtx = + core::QueryCtx::create(nullptr, QueryConfig{std::move(configData)}); + const auto& queryConfig = queryCtx->queryConfig(); + auto execCtx = + std::make_shared(pool.get(), queryCtx.get()); + auto evalCtx = std::make_shared(execCtx.get()); + + ASSERT_EQ( + evalCtx->peelingEnabled(), + !queryConfig.debugDisableExpressionsWithPeeling()); + ASSERT_EQ( + evalCtx->sharedSubExpressionReuseEnabled(), + !queryConfig.debugDisableCommonSubExpressions()); + ASSERT_EQ( + evalCtx->dictionaryMemoizationEnabled(), + !queryConfig.debugDisableExpressionsWithMemoization()); + ASSERT_EQ( + evalCtx->deferredLazyLoadingEnabled(), + !queryConfig.debugDisableExpressionsWithLazyInputs()); + }; + + auto createConfig = [&](bool debugDisableExpressionsWithPeeling, + bool debugDisableCommonSubExpressions, + bool debugDisableExpressionsWithMemoization, + bool debugDisableExpressionsWithLazyInputs) -> auto { + std::unordered_map configData( + {{core::QueryConfig::kDebugDisableExpressionWithPeeling, + std::to_string(debugDisableExpressionsWithPeeling)}, + {core::QueryConfig::kDebugDisableCommonSubExpressions, + std::to_string(debugDisableCommonSubExpressions)}, + {core::QueryConfig::kDebugDisableExpressionWithMemoization, + std::to_string(debugDisableExpressionsWithMemoization)}, + {core::QueryConfig::kDebugDisableExpressionWithLazyInputs, + std::to_string(debugDisableExpressionsWithLazyInputs)}}); + return configData; + }; + + testConfig({}); // Verify default config. + testConfig(createConfig(true, false, false, false)); + testConfig(createConfig(false, true, false, false)); + testConfig(createConfig(false, false, true, false)); + testConfig(createConfig(false, false, false, true)); +} + } // namespace facebook::velox::core::test diff --git a/velox/docs/configs.rst b/velox/docs/configs.rst index 746ad0d8cd44..bdcd28d054a8 100644 --- a/velox/docs/configs.rst +++ b/velox/docs/configs.rst @@ -168,6 +168,22 @@ Expression Evaluation Configuration - bool - false - This flag makes the Row conversion to by applied in a way that the casting row field are matched by name instead of position. + * - debug_disable_expression_with_peeling + - bool + - false + - Disable optimization in expression evaluation to peel common dictionary layer from inputs. Should only be used for debugging. + * - debug_disable_common_sub_expressions + - bool + - false + - Disable optimization in expression evaluation to re-use cached results for common sub-expressions. Should only be used for debugging. + * - debug_disable_expression_with_memoization + - bool + - false + - Disable optimization in expression evaluation to re-use cached results between subsequent input batches that are dictionary encoded and have the same alphabet(underlying flat vector). Should only be used for debugging. + * - debug_disable_expression_with_lazy_inputs + - bool + - false + - Disable optimization in expression evaluation to delay loading of lazy inputs unless required. Should only be used for debugging. Memory Management ----------------- diff --git a/velox/expression/EvalCtx.cpp b/velox/expression/EvalCtx.cpp index e43f0aae3437..ae87fb28063f 100644 --- a/velox/expression/EvalCtx.cpp +++ b/velox/expression/EvalCtx.cpp @@ -26,16 +26,7 @@ using facebook::velox::common::testutil::TestValue; namespace facebook::velox::exec { EvalCtx::EvalCtx(core::ExecCtx* execCtx, ExprSet* exprSet, const RowVector* row) - : execCtx_(execCtx), - exprSet_(exprSet), - row_(row), - cacheEnabled_(execCtx->exprEvalCacheEnabled()), - maxSharedSubexprResultsCached_( - execCtx->queryCtx() - ? execCtx->queryCtx() - ->queryConfig() - .maxSharedSubexprResultsCached() - : core::QueryConfig({}).maxSharedSubexprResultsCached()) { + : execCtx_(execCtx), exprSet_(exprSet), row_(row) { // TODO Change the API to replace raw pointers with non-const references. // Sanity check inputs to prevent crashes. VELOX_CHECK_NOT_NULL(execCtx); @@ -53,16 +44,7 @@ EvalCtx::EvalCtx(core::ExecCtx* execCtx, ExprSet* exprSet, const RowVector* row) } EvalCtx::EvalCtx(core::ExecCtx* execCtx) - : execCtx_(execCtx), - exprSet_(nullptr), - row_(nullptr), - cacheEnabled_(execCtx->exprEvalCacheEnabled()), - maxSharedSubexprResultsCached_( - execCtx->queryCtx() - ? execCtx->queryCtx() - ->queryConfig() - .maxSharedSubexprResultsCached() - : core::QueryConfig({}).maxSharedSubexprResultsCached()) { + : execCtx_(execCtx), exprSet_(nullptr), row_(nullptr) { VELOX_CHECK_NOT_NULL(execCtx); } diff --git a/velox/expression/EvalCtx.h b/velox/expression/EvalCtx.h index 0d4ba9c655f3..22fb13e478b6 100644 --- a/velox/expression/EvalCtx.h +++ b/velox/expression/EvalCtx.h @@ -519,16 +519,33 @@ class EvalCtx { return peeledEncoding_.get(); } - /// Returns true if caching in expression evaluation is enabled, such as - /// Expr::evalWithMemo. - bool cacheEnabled() const { - return cacheEnabled_; + /// Returns true if dictionary memoization optimization is enabled, which + /// allows the reuse of results between consecutive input batches if they are + /// dictionary encoded and have the same alphabet(undelying flat vector). + bool dictionaryMemoizationEnabled() const { + return execCtx_->optimizationParams().dictionaryMemoizationEnabled; } /// Returns the maximum number of distinct inputs to cache results for in a /// given shared subexpression. uint32_t maxSharedSubexprResultsCached() const { - return maxSharedSubexprResultsCached_; + return execCtx_->optimizationParams().maxSharedSubexprResultsCached; + } + + /// Returns true if peeling is enabled. + bool peelingEnabled() const { + return execCtx_->optimizationParams().peelingEnabled; + } + + /// Returns true if shared subexpression reuse is enabled. + bool sharedSubExpressionReuseEnabled() const { + return execCtx_->optimizationParams().sharedSubExpressionReuseEnabled; + } + + /// Returns true if loading lazy inputs are deferred till they need to be + /// accessed. + bool deferredLazyLoadingEnabled() const { + return execCtx_->optimizationParams().deferredLazyLoadingEnabled; } private: @@ -550,8 +567,6 @@ class EvalCtx { core::ExecCtx* const execCtx_; ExprSet* const exprSet_; const RowVector* row_; - const bool cacheEnabled_; - const uint32_t maxSharedSubexprResultsCached_; bool inputFlatNoNulls_; // Corresponds 1:1 to children of 'row_'. Set to an inner vector diff --git a/velox/expression/Expr.cpp b/velox/expression/Expr.cpp index 8be2572bd7a5..ee06286dfa4f 100644 --- a/velox/expression/Expr.cpp +++ b/velox/expression/Expr.cpp @@ -708,7 +708,7 @@ void Expr::evalFlatNoNulls( EvalCtx& context, VectorPtr& result, const ExprSet* parentExprSet) { - if (shouldEvaluateSharedSubexp()) { + if (shouldEvaluateSharedSubexp(context)) { evaluateSharedSubexpr( rows, context, @@ -819,7 +819,8 @@ void Expr::eval( // // TODO: Re-work the logic of deciding when to load which field. if (!hasConditionals_ || distinctFields_.size() == 1 || - shouldEvaluateSharedSubexp()) { + shouldEvaluateSharedSubexp(context) || + !context.deferredLazyLoadingEnabled()) { // Load lazy vectors if any. for (auto* field : distinctFields_) { context.ensureFieldLoaded(field->index(context), rows); @@ -874,10 +875,8 @@ void Expr::evaluateSharedSubexpr( } if (sharedSubexprResultsIter == sharedSubexprResults_.end()) { - auto maxSharedSubexprResultsCached = context.execCtx() - ->queryCtx() - ->queryConfig() - .maxSharedSubexprResultsCached(); + auto maxSharedSubexprResultsCached = + context.maxSharedSubexprResultsCached(); if (sharedSubexprResults_.size() < maxSharedSubexprResultsCached) { // If we have room left in the cache, add it. sharedSubexprResultsIter = @@ -1039,7 +1038,7 @@ Expr::PeelEncodingsResult Expr::peelEncodings( // If the expression depends on one dictionary, results are cacheable. bool mayCache = false; - if (context.cacheEnabled()) { + if (context.dictionaryMemoizationEnabled()) { mayCache = distinctFields_.size() == 1 && VectorEncoding::isDictionary(context.wrapEncoding()) && !peeledVectors[0]->memoDisabled(); @@ -1054,7 +1053,8 @@ void Expr::evalEncodings( const SelectivityVector& rows, EvalCtx& context, VectorPtr& result) { - if (deterministic_ && !skipFieldDependentOptimizations()) { + if (deterministic_ && !skipFieldDependentOptimizations() && + context.peelingEnabled()) { bool hasFlat = false; for (auto* field : distinctFields_) { if (isFlat(*context.getField(field->index(context)))) { @@ -1381,7 +1381,7 @@ void Expr::evalAll( return; } - if (shouldEvaluateSharedSubexp()) { + if (shouldEvaluateSharedSubexp(context)) { evaluateSharedSubexpr( rows, context, @@ -1462,6 +1462,16 @@ bool Expr::applyFunctionWithPeeling( VectorPtr& result) { LocalDecodedVector localDecoded(context); LocalSelectivityVector newRowsHolder(context); + if (!context.peelingEnabled()) { + if (inputValues_.size() == 1) { + // If we have a single input, velox needs to ensure that the + // vectorFunction would receive a flat input. + BaseVector::flattenVector(inputValues_[0]); + applyFunction(applyRows, context, result); + return true; + } + return false; + } // Attempt peeling. std::vector peeledVectors; auto peeledEncoding = PeeledEncoding::peel( diff --git a/velox/expression/Expr.h b/velox/expression/Expr.h index b425fd392bc9..ade47d61f8b0 100644 --- a/velox/expression/Expr.h +++ b/velox/expression/Expr.h @@ -486,8 +486,9 @@ class Expr { /// Evaluation of such expression is optimized by memoizing and reusing /// the results of prior evaluations. That logic is implemented in /// 'evaluateSharedSubexpr'. - bool shouldEvaluateSharedSubexp() const { - return deterministic_ && isMultiplyReferenced_ && !inputs_.empty(); + bool shouldEvaluateSharedSubexp(EvalCtx& context) const { + return deterministic_ && isMultiplyReferenced_ && !inputs_.empty() && + context.sharedSubExpressionReuseEnabled(); } /// Evaluate common sub-expression. Check if sharedSubexprValues_ already has diff --git a/velox/expression/tests/ExprTest.cpp b/velox/expression/tests/ExprTest.cpp index d3af7b405d01..100d0acb71b1 100644 --- a/velox/expression/tests/ExprTest.cpp +++ b/velox/expression/tests/ExprTest.cpp @@ -166,10 +166,12 @@ class ExprTest : public testing::Test, public VectorTestBase { evaluateMultipleWithStats( const std::vector& texts, const RowVectorPtr& input, - std::vector resultToReuse = {}) { + std::vector resultToReuse = {}, + core::ExecCtx* execCtx = nullptr) { auto exprSet = compileMultiple(texts, asRowType(input->type())); - exec::EvalCtx context(execCtx_.get(), exprSet.get(), input.get()); + exec::EvalCtx context( + execCtx ? execCtx : execCtx_.get(), exprSet.get(), input.get()); SelectivityVector rows(input->size()); if (resultToReuse.empty()) { @@ -190,11 +192,15 @@ class ExprTest : public testing::Test, public VectorTestBase { } std::pair> - evaluateWithStats(exec::ExprSet* exprSetPtr, const RowVectorPtr& input) { + evaluateWithStats( + exec::ExprSet* exprSetPtr, + const RowVectorPtr& input, + core::ExecCtx* execCtx = nullptr) { SelectivityVector rows(input->size()); std::vector results(1); - exec::EvalCtx context(execCtx_.get(), exprSetPtr, input.get()); + exec::EvalCtx context( + execCtx ? execCtx : execCtx_.get(), exprSetPtr, input.get()); exprSetPtr->eval(rows, context, results); return {results[0], exprSetPtr->stats()}; @@ -4795,5 +4801,167 @@ VELOX_INSTANTIATE_TEST_SUITE_P( ParameterizedExprTest, testing::ValuesIn({false, true})); +TEST_F(ExprTest, disablePeeling) { + // Verify that peeling is disabled when the config is set by checking whether + // the number of rows processed is equal to the alphabet size (when enabled) + // or the dictionary size (when disabled). + // Also, ensure that single arg function recieves a flat vector even when + // peeling is disabled. + + // This throws if input is not flat or constant. + VELOX_REGISTER_VECTOR_FUNCTION( + udf_testing_single_arg_deterministic, "testing_single_arg_deterministic"); + // This wraps the input in a dictionary. + exec::registerVectorFunction( + "dict_wrap", + WrapInDictionaryFunc::signatures(), + std::make_unique(), + exec::VectorFunctionMetadataBuilder().defaultNullBehavior(false).build()); + const std::vector expressions = {"c0 + 1"}; + + auto flatInput = makeFlatVector({1, 2, 3}); + auto flatSize = flatInput->size(); + auto dictInput = wrapInDictionary( + makeIndices(2 * flatSize, [&](auto row) { return row % flatSize; }), + 2 * flatSize, + flatInput); + auto dictSize = dictInput->size(); + + // Peeling Enabled (by default) + auto [result, stats] = evaluateMultipleWithStats( + expressions, makeRowVector({dictInput}), {}, execCtx_.get()); + + ASSERT_TRUE(stats.find("plus") != stats.end()); + ASSERT_EQ(stats["plus"].numProcessedRows, flatSize); + + // Peeling Disabled + std::unordered_map configData( + {{core::QueryConfig::kDebugDisableExpressionWithPeeling, "true"}}); + auto queryCtx = velox::core::QueryCtx::create( + nullptr, core::QueryConfig(std::move(configData))); + auto execCtx = std::make_unique(pool_.get(), queryCtx.get()); + + std::tie(result, stats) = evaluateMultipleWithStats( + expressions, makeRowVector({dictInput}), {}, execCtx.get()); + + ASSERT_TRUE(stats.find("plus") != stats.end()); + ASSERT_EQ(stats["plus"].numProcessedRows, dictSize); + + // Ensure single arg function recieves a flat vector. + // When top level column is dictionary wrapped. + ASSERT_NO_THROW(evaluateMultiple( + {"testing_single_arg_deterministic((c0))"}, + makeRowVector({dictInput}), + {}, + execCtx.get())); + // When intermediate column is dictionary wrapped. + // dict_wrap helps generate an intermediate dictionary vector. + ASSERT_NO_THROW(evaluateMultiple( + {"testing_single_arg_deterministic(dict_wrap(c0))"}, + makeRowVector({flatInput}), + {}, + execCtx.get())); +} + +TEST_F(ExprTest, disableSharedSubExpressionReuse) { + // Verify that shared subexpression reuse is disabled when the config is set + // by confirming that the same rows are processed twice by the shared + // expression when its disabled. + const std::vector expressions = {"c0 + 1", "(c0 + 1) = 0"}; + + auto flatInput = makeFlatVector({1, 2, 3}); + auto flatSize = flatInput->size(); + + // SharedSubExpressionReuse Enabled (by default) + auto [result, stats] = evaluateMultipleWithStats( + expressions, makeRowVector({flatInput}), {}, execCtx_.get()); + + ASSERT_TRUE(stats.find("plus") != stats.end()); + ASSERT_EQ(stats["plus"].numProcessedRows, flatSize); + + // SharedSubExpressionReuse Disabled + std::unordered_map configData( + {{core::QueryConfig::kDebugDisableCommonSubExpressions, "true"}}); + auto queryCtx = velox::core::QueryCtx::create( + nullptr, core::QueryConfig(std::move(configData))); + auto execCtx = std::make_unique(pool_.get(), queryCtx.get()); + + std::tie(result, stats) = evaluateMultipleWithStats( + expressions, makeRowVector({flatInput}), {}, execCtx.get()); + + ASSERT_TRUE(stats.find("plus") != stats.end()); + ASSERT_EQ(stats["plus"].numProcessedRows, 2 * flatSize); +} + +TEST_F(ExprTest, disableMemoization) { + // Verify that memoization is disabled when the config is set by confirming + // that the third invocation reuses the results. + auto flatInput = makeFlatVector({1, 2, 3}); + auto flatSize = flatInput->size(); + auto dictInput = wrapInDictionary( + makeIndices(2 * flatSize, [&](auto row) { return row % flatSize; }), + 2 * flatSize, + flatInput); + auto dictSize = dictInput->size(); + auto inputRow = makeRowVector({dictInput}); + + auto exprSet = compileExpression("c0 + 1", asRowType(inputRow->type())); + // Memoization Enabled (by default). We need to evaluate the expression + // atleast twice to enable memoization. The third invocation will use the + // memoized result. + evaluateWithStats(exprSet.get(), inputRow, execCtx_.get()); + evaluateWithStats(exprSet.get(), inputRow, execCtx_.get()); + auto [result, stats] = + evaluateWithStats(exprSet.get(), inputRow, execCtx_.get()); + + ASSERT_TRUE(stats.find("plus") != stats.end()); + ASSERT_EQ(stats["plus"].numProcessedRows, 2 * flatSize); + + // Memoization Disabled + std::unordered_map configData( + {{core::QueryConfig::kDebugDisableExpressionWithMemoization, "true"}}); + auto queryCtx = velox::core::QueryCtx::create( + nullptr, core::QueryConfig(std::move(configData))); + auto execCtx = std::make_unique(pool_.get(), queryCtx.get()); + + exprSet = compileExpression("c0 + 1", asRowType(inputRow->type())); + evaluateWithStats(exprSet.get(), inputRow, execCtx.get()); + evaluateWithStats(exprSet.get(), inputRow, execCtx.get()); + std::tie(result, stats) = + evaluateWithStats(exprSet.get(), inputRow, execCtx.get()); + + ASSERT_TRUE(stats.find("plus") != stats.end()); + ASSERT_EQ(stats["plus"].numProcessedRows, 3 * flatSize); +} + +TEST_F(ExprTest, disabledeferredLazyLoading) { + // Verify that deferred lazy loading is disabled when the config is set by + // confirming that all rows are loaded even when only a subset is required. + + // The following expression only requires 1 row to be loaded on c1. + const std::vector expressions = {"(c0 < 2) AND (c1 > 0)"}; + auto c0 = makeFlatVector({1, 2, 3}); + auto valueAt = [](auto row) { return row; }; + // Confirm only 1 row is loaded. + auto c1 = makeLazyFlatVector(3, valueAt, nullptr, 1); + + // Deferred lazy loading enabled (by default). Confirm that only required rows + // are loaded. + auto [result, stats] = evaluateMultipleWithStats( + expressions, makeRowVector({c0, c1}), {}, execCtx_.get()); + + // Deferred lazy loading disabled. Confirm all rows will be loaded. + std::unordered_map configData( + {{core::QueryConfig::kDebugDisableExpressionWithLazyInputs, "true"}}); + auto queryCtx = velox::core::QueryCtx::create( + nullptr, core::QueryConfig(std::move(configData))); + auto execCtx = std::make_unique(pool_.get(), queryCtx.get()); + + // Confirm that all rows are loaded. + c1 = makeLazyFlatVector(3, valueAt, nullptr, 3); + std::tie(result, stats) = evaluateMultipleWithStats( + expressions, makeRowVector({c0, c1}), {}, execCtx.get()); +} + } // namespace } // namespace facebook::velox::test