diff --git a/velox/core/QueryConfig.h b/velox/core/QueryConfig.h
index 814bdef03a26..def08a9de314 100644
--- a/velox/core/QueryConfig.h
+++ b/velox/core/QueryConfig.h
@@ -357,6 +357,43 @@ class QueryConfig {
   /// Empty string if only want to trace the query metadata.
   static constexpr const char* kQueryTraceNodeIds = "query_trace_node_ids";
 
+  /// Disable optimization in expression evaluation to peel common dictionary
+  /// layer from inputs.
+  static constexpr const char* kDebugDisableExpressionWithPeeling =
+      "debug_disable_expression_with_peeling";
+
+  /// Disable optimization in expression evaluation to re-use cached results for
+  /// common sub-expressions.
+  static constexpr const char* kDebugDisableCommonSubExpressions =
+      "debug_disable_common_sub_expressions";
+
+  /// Disable optimization in expression evaluation to re-use cached results
+  /// between subsequent input batches that are dictionary encoded and have the
+  /// same alphabet(underlying flat vector).
+  static constexpr const char* kDebugDisableExpressionWithMemoization =
+      "debug_disable_expression_with_memoization";
+
+  /// Disable optimization in expression evaluation to delay loading of lazy
+  /// inputs unless required.
+  static constexpr const char* kDebugDisableExpressionWithLazyInputs =
+      "debug_disable_expression_with_lazy_inputs";
+
+  bool debugDisableExpressionsWithPeeling() const {
+    return get<bool>(kDebugDisableExpressionWithPeeling, false);
+  }
+
+  bool debugDisableCommonSubExpressions() const {
+    return get<bool>(kDebugDisableCommonSubExpressions, false);
+  }
+
+  bool debugDisableExpressionsWithMemoization() const {
+    return get<bool>(kDebugDisableExpressionWithMemoization, false);
+  }
+
+  bool debugDisableExpressionsWithLazyInputs() const {
+    return get<bool>(kDebugDisableExpressionWithLazyInputs, false);
+  }
+
   uint64_t queryMaxMemoryPerNode() const {
     return config::toCapacity(
         get<std::string>(kQueryMaxMemoryPerNode, "0B"),
diff --git a/velox/core/QueryCtx.h b/velox/core/QueryCtx.h
index c2274cf4af44..89ff229e3a48 100644
--- a/velox/core/QueryCtx.h
+++ b/velox/core/QueryCtx.h
@@ -229,12 +229,53 @@ class ExecCtx {
   ExecCtx(memory::MemoryPool* pool, QueryCtx* queryCtx)
       : pool_(pool),
         queryCtx_(queryCtx),
-        exprEvalCacheEnabled_(
-            !queryCtx ||
-            queryCtx->queryConfig().isExpressionEvaluationCacheEnabled()),
+        optimizationParams_(queryCtx),
         vectorPool_(
-            exprEvalCacheEnabled_ ? std::make_unique<VectorPool>(pool)
-                                  : nullptr) {}
+            optimizationParams_.exprEvalCacheEnabled
+                ? std::make_unique<VectorPool>(pool)
+                : nullptr) {}
+
+  struct OptimizationParams {
+    explicit OptimizationParams(QueryCtx* queryCtx) {
+      const core::QueryConfig defaultQueryConfig = core::QueryConfig({});
+
+      const core::QueryConfig& queryConfig =
+          queryCtx ? queryCtx->queryConfig() : defaultQueryConfig;
+
+      exprEvalCacheEnabled = queryConfig.isExpressionEvaluationCacheEnabled();
+      dictionaryMemoizationEnabled =
+          !queryConfig.debugDisableExpressionsWithMemoization() &&
+          exprEvalCacheEnabled;
+      peelingEnabled = !queryConfig.debugDisableExpressionsWithPeeling();
+      sharedSubExpressionReuseEnabled =
+          !queryConfig.debugDisableCommonSubExpressions();
+      deferredLazyLoadingEnabled =
+          !queryConfig.debugDisableExpressionsWithLazyInputs();
+      maxSharedSubexprResultsCached =
+          queryConfig.maxSharedSubexprResultsCached();
+    }
+
+    /// True if caches in expression evaluation used for performance are
+    /// enabled, including VectorPool, DecodedVectorPool, SelectivityVectorPool
+    /// and dictionary memoization.
+    bool exprEvalCacheEnabled;
+    /// True if dictionary memoization optimization is enabled during experssion
+    /// evaluation, whichallows the reuse of results between consecutive input
+    /// batches if they are dictionary encoded and have the same
+    /// alphabet(undelying flat vector).
+    bool dictionaryMemoizationEnabled;
+    /// True if peeling is enabled during experssion evaluation.
+    bool peelingEnabled;
+    /// True if shared subexpression reuse is enabled during experssion
+    /// evaluation.
+    bool sharedSubExpressionReuseEnabled;
+    /// True if loading lazy inputs are deferred till they need to be
+    /// accessed during experssion evaluation.
+    bool deferredLazyLoadingEnabled;
+    /// The maximum number of distinct inputs to cache results in a
+    /// given shared subexpression during experssion evaluation.
+    uint32_t maxSharedSubexprResultsCached;
+  };
 
   velox::memory::MemoryPool* pool() const {
     return pool_;
@@ -251,7 +292,9 @@ class ExecCtx {
   /// Prefer using LocalSelectivityVector which takes care of returning the
   /// vector to the pool on destruction.
   std::unique_ptr<SelectivityVector> getSelectivityVector(int32_t size) {
-    VELOX_CHECK(exprEvalCacheEnabled_ || selectivityVectorPool_.empty());
+    VELOX_CHECK(
+        optimizationParams_.exprEvalCacheEnabled ||
+        selectivityVectorPool_.empty());
     if (selectivityVectorPool_.empty()) {
       return std::make_unique<SelectivityVector>(size);
     }
@@ -265,7 +308,9 @@ class ExecCtx {
   // content. The caller is responsible for setting the size and
   // assigning the contents.
   std::unique_ptr<SelectivityVector> getSelectivityVector() {
-    VELOX_CHECK(exprEvalCacheEnabled_ || selectivityVectorPool_.empty());
+    VELOX_CHECK(
+        optimizationParams_.exprEvalCacheEnabled ||
+        selectivityVectorPool_.empty());
     if (selectivityVectorPool_.empty()) {
       return std::make_unique<SelectivityVector>();
     }
@@ -276,7 +321,7 @@ class ExecCtx {
 
   // Returns true if the vector was moved into the pool.
   bool releaseSelectivityVector(std::unique_ptr<SelectivityVector>&& vector) {
-    if (exprEvalCacheEnabled_) {
+    if (optimizationParams_.exprEvalCacheEnabled) {
       selectivityVectorPool_.push_back(std::move(vector));
       return true;
     }
@@ -284,7 +329,8 @@ class ExecCtx {
   }
 
   std::unique_ptr<DecodedVector> getDecodedVector() {
-    VELOX_CHECK(exprEvalCacheEnabled_ || decodedVectorPool_.empty());
+    VELOX_CHECK(
+        optimizationParams_.exprEvalCacheEnabled || decodedVectorPool_.empty());
     if (decodedVectorPool_.empty()) {
       return std::make_unique<DecodedVector>();
     }
@@ -295,7 +341,7 @@ class ExecCtx {
 
   // Returns true if the vector was moved into the pool.
   bool releaseDecodedVector(std::unique_ptr<DecodedVector>&& vector) {
-    if (exprEvalCacheEnabled_) {
+    if (optimizationParams_.exprEvalCacheEnabled) {
       decodedVectorPool_.push_back(std::move(vector));
       return true;
     }
@@ -334,8 +380,8 @@ class ExecCtx {
     return 0;
   }
 
-  bool exprEvalCacheEnabled() const {
-    return exprEvalCacheEnabled_;
+  const OptimizationParams& optimizationParams() const {
+    return optimizationParams_;
   }
 
  private:
@@ -343,8 +389,9 @@ class ExecCtx {
   memory::MemoryPool* const pool_;
   QueryCtx* const queryCtx_;
 
-  const bool exprEvalCacheEnabled_;
-  // A pool of preallocated DecodedVectors for use by expressions and operators.
+  const OptimizationParams optimizationParams_;
+  // A pool of preallocated DecodedVectors for use by expressions and
+  // operators.
   std::vector<std::unique_ptr<DecodedVector>> decodedVectorPool_;
   // A pool of preallocated SelectivityVectors for use by expressions
   // and operators.
diff --git a/velox/core/tests/QueryConfigTest.cpp b/velox/core/tests/QueryConfigTest.cpp
index 81a199685bcc..c89d44d2fdfa 100644
--- a/velox/core/tests/QueryConfigTest.cpp
+++ b/velox/core/tests/QueryConfigTest.cpp
@@ -118,12 +118,16 @@ TEST_F(QueryConfigTest, enableExpressionEvaluationCacheConfig) {
         enableExpressionEvaluationCache);
 
     auto execCtx = std::make_shared<core::ExecCtx>(pool.get(), queryCtx.get());
-    ASSERT_EQ(execCtx->exprEvalCacheEnabled(), enableExpressionEvaluationCache);
+    ASSERT_EQ(
+        execCtx->optimizationParams().exprEvalCacheEnabled,
+        enableExpressionEvaluationCache);
     ASSERT_EQ(
         execCtx->vectorPool() != nullptr, enableExpressionEvaluationCache);
 
     auto evalCtx = std::make_shared<exec::EvalCtx>(execCtx.get());
-    ASSERT_EQ(evalCtx->cacheEnabled(), enableExpressionEvaluationCache);
+    ASSERT_EQ(
+        evalCtx->dictionaryMemoizationEnabled(),
+        enableExpressionEvaluationCache);
 
     // Test ExecCtx::selectivityVectorPool_.
     auto rows = execCtx->getSelectivityVector(100);
@@ -144,4 +148,58 @@ TEST_F(QueryConfigTest, enableExpressionEvaluationCacheConfig) {
   testConfig(false);
 }
 
+TEST_F(QueryConfigTest, expressionEvaluationRelatedConfigs) {
+  // Verify that the expression evaluation related configs are porpogated
+  // correctly to ExprCtx which is used during expression evaluation. Each
+  // config is individually set and verified.
+  std::shared_ptr<memory::MemoryPool> rootPool{
+      memory::memoryManager()->addRootPool()};
+  std::shared_ptr<memory::MemoryPool> pool{rootPool->addLeafChild("leaf")};
+
+  auto testConfig =
+      [&](std::unordered_map<std::string, std::string> configData) {
+        auto queryCtx =
+            core::QueryCtx::create(nullptr, QueryConfig{std::move(configData)});
+        const auto& queryConfig = queryCtx->queryConfig();
+        auto execCtx =
+            std::make_shared<core::ExecCtx>(pool.get(), queryCtx.get());
+        auto evalCtx = std::make_shared<exec::EvalCtx>(execCtx.get());
+
+        ASSERT_EQ(
+            evalCtx->peelingEnabled(),
+            !queryConfig.debugDisableExpressionsWithPeeling());
+        ASSERT_EQ(
+            evalCtx->sharedSubExpressionReuseEnabled(),
+            !queryConfig.debugDisableCommonSubExpressions());
+        ASSERT_EQ(
+            evalCtx->dictionaryMemoizationEnabled(),
+            !queryConfig.debugDisableExpressionsWithMemoization());
+        ASSERT_EQ(
+            evalCtx->deferredLazyLoadingEnabled(),
+            !queryConfig.debugDisableExpressionsWithLazyInputs());
+      };
+
+  auto createConfig = [&](bool debugDisableExpressionsWithPeeling,
+                          bool debugDisableCommonSubExpressions,
+                          bool debugDisableExpressionsWithMemoization,
+                          bool debugDisableExpressionsWithLazyInputs) -> auto {
+    std::unordered_map<std::string, std::string> configData(
+        {{core::QueryConfig::kDebugDisableExpressionWithPeeling,
+          std::to_string(debugDisableExpressionsWithPeeling)},
+         {core::QueryConfig::kDebugDisableCommonSubExpressions,
+          std::to_string(debugDisableCommonSubExpressions)},
+         {core::QueryConfig::kDebugDisableExpressionWithMemoization,
+          std::to_string(debugDisableExpressionsWithMemoization)},
+         {core::QueryConfig::kDebugDisableExpressionWithLazyInputs,
+          std::to_string(debugDisableExpressionsWithLazyInputs)}});
+    return configData;
+  };
+
+  testConfig({}); // Verify default config.
+  testConfig(createConfig(true, false, false, false));
+  testConfig(createConfig(false, true, false, false));
+  testConfig(createConfig(false, false, true, false));
+  testConfig(createConfig(false, false, false, true));
+}
+
 } // namespace facebook::velox::core::test
diff --git a/velox/docs/configs.rst b/velox/docs/configs.rst
index 746ad0d8cd44..bdcd28d054a8 100644
--- a/velox/docs/configs.rst
+++ b/velox/docs/configs.rst
@@ -168,6 +168,22 @@ Expression Evaluation Configuration
      - bool
      - false
      - This flag makes the Row conversion to by applied in a way that the casting row field are matched by name instead of position.
+   * - debug_disable_expression_with_peeling
+     - bool
+     - false
+     - Disable optimization in expression evaluation to peel common dictionary layer from inputs. Should only be used for debugging.
+   * - debug_disable_common_sub_expressions
+     - bool
+     - false
+     - Disable optimization in expression evaluation to re-use cached results for common sub-expressions. Should only be used for debugging.
+   * - debug_disable_expression_with_memoization
+     - bool
+     - false
+     - Disable optimization in expression evaluation to re-use cached results between subsequent input batches that are dictionary encoded and have the same alphabet(underlying flat vector). Should only be used for debugging.
+   * - debug_disable_expression_with_lazy_inputs
+     - bool
+     - false
+     - Disable optimization in expression evaluation to delay loading of lazy inputs unless required. Should only be used for debugging.
 
 Memory Management
 -----------------
diff --git a/velox/expression/EvalCtx.cpp b/velox/expression/EvalCtx.cpp
index e43f0aae3437..ae87fb28063f 100644
--- a/velox/expression/EvalCtx.cpp
+++ b/velox/expression/EvalCtx.cpp
@@ -26,16 +26,7 @@ using facebook::velox::common::testutil::TestValue;
 namespace facebook::velox::exec {
 
 EvalCtx::EvalCtx(core::ExecCtx* execCtx, ExprSet* exprSet, const RowVector* row)
-    : execCtx_(execCtx),
-      exprSet_(exprSet),
-      row_(row),
-      cacheEnabled_(execCtx->exprEvalCacheEnabled()),
-      maxSharedSubexprResultsCached_(
-          execCtx->queryCtx()
-              ? execCtx->queryCtx()
-                    ->queryConfig()
-                    .maxSharedSubexprResultsCached()
-              : core::QueryConfig({}).maxSharedSubexprResultsCached()) {
+    : execCtx_(execCtx), exprSet_(exprSet), row_(row) {
   // TODO Change the API to replace raw pointers with non-const references.
   // Sanity check inputs to prevent crashes.
   VELOX_CHECK_NOT_NULL(execCtx);
@@ -53,16 +44,7 @@ EvalCtx::EvalCtx(core::ExecCtx* execCtx, ExprSet* exprSet, const RowVector* row)
 }
 
 EvalCtx::EvalCtx(core::ExecCtx* execCtx)
-    : execCtx_(execCtx),
-      exprSet_(nullptr),
-      row_(nullptr),
-      cacheEnabled_(execCtx->exprEvalCacheEnabled()),
-      maxSharedSubexprResultsCached_(
-          execCtx->queryCtx()
-              ? execCtx->queryCtx()
-                    ->queryConfig()
-                    .maxSharedSubexprResultsCached()
-              : core::QueryConfig({}).maxSharedSubexprResultsCached()) {
+    : execCtx_(execCtx), exprSet_(nullptr), row_(nullptr) {
   VELOX_CHECK_NOT_NULL(execCtx);
 }
 
diff --git a/velox/expression/EvalCtx.h b/velox/expression/EvalCtx.h
index 0d4ba9c655f3..22fb13e478b6 100644
--- a/velox/expression/EvalCtx.h
+++ b/velox/expression/EvalCtx.h
@@ -519,16 +519,33 @@ class EvalCtx {
     return peeledEncoding_.get();
   }
 
-  /// Returns true if caching in expression evaluation is enabled, such as
-  /// Expr::evalWithMemo.
-  bool cacheEnabled() const {
-    return cacheEnabled_;
+  /// Returns true if dictionary memoization optimization is enabled, which
+  /// allows the reuse of results between consecutive input batches if they are
+  /// dictionary encoded and have the same alphabet(undelying flat vector).
+  bool dictionaryMemoizationEnabled() const {
+    return execCtx_->optimizationParams().dictionaryMemoizationEnabled;
   }
 
   /// Returns the maximum number of distinct inputs to cache results for in a
   /// given shared subexpression.
   uint32_t maxSharedSubexprResultsCached() const {
-    return maxSharedSubexprResultsCached_;
+    return execCtx_->optimizationParams().maxSharedSubexprResultsCached;
+  }
+
+  /// Returns true if peeling is enabled.
+  bool peelingEnabled() const {
+    return execCtx_->optimizationParams().peelingEnabled;
+  }
+
+  /// Returns true if shared subexpression reuse is enabled.
+  bool sharedSubExpressionReuseEnabled() const {
+    return execCtx_->optimizationParams().sharedSubExpressionReuseEnabled;
+  }
+
+  /// Returns true if loading lazy inputs are deferred till they need to be
+  /// accessed.
+  bool deferredLazyLoadingEnabled() const {
+    return execCtx_->optimizationParams().deferredLazyLoadingEnabled;
   }
 
  private:
@@ -550,8 +567,6 @@ class EvalCtx {
   core::ExecCtx* const execCtx_;
   ExprSet* const exprSet_;
   const RowVector* row_;
-  const bool cacheEnabled_;
-  const uint32_t maxSharedSubexprResultsCached_;
   bool inputFlatNoNulls_;
 
   // Corresponds 1:1 to children of 'row_'. Set to an inner vector
diff --git a/velox/expression/Expr.cpp b/velox/expression/Expr.cpp
index 8be2572bd7a5..ee06286dfa4f 100644
--- a/velox/expression/Expr.cpp
+++ b/velox/expression/Expr.cpp
@@ -708,7 +708,7 @@ void Expr::evalFlatNoNulls(
     EvalCtx& context,
     VectorPtr& result,
     const ExprSet* parentExprSet) {
-  if (shouldEvaluateSharedSubexp()) {
+  if (shouldEvaluateSharedSubexp(context)) {
     evaluateSharedSubexpr(
         rows,
         context,
@@ -819,7 +819,8 @@ void Expr::eval(
   //
   // TODO: Re-work the logic of deciding when to load which field.
   if (!hasConditionals_ || distinctFields_.size() == 1 ||
-      shouldEvaluateSharedSubexp()) {
+      shouldEvaluateSharedSubexp(context) ||
+      !context.deferredLazyLoadingEnabled()) {
     // Load lazy vectors if any.
     for (auto* field : distinctFields_) {
       context.ensureFieldLoaded(field->index(context), rows);
@@ -874,10 +875,8 @@ void Expr::evaluateSharedSubexpr(
   }
 
   if (sharedSubexprResultsIter == sharedSubexprResults_.end()) {
-    auto maxSharedSubexprResultsCached = context.execCtx()
-                                             ->queryCtx()
-                                             ->queryConfig()
-                                             .maxSharedSubexprResultsCached();
+    auto maxSharedSubexprResultsCached =
+        context.maxSharedSubexprResultsCached();
     if (sharedSubexprResults_.size() < maxSharedSubexprResultsCached) {
       // If we have room left in the cache, add it.
       sharedSubexprResultsIter =
@@ -1039,7 +1038,7 @@ Expr::PeelEncodingsResult Expr::peelEncodings(
 
   // If the expression depends on one dictionary, results are cacheable.
   bool mayCache = false;
-  if (context.cacheEnabled()) {
+  if (context.dictionaryMemoizationEnabled()) {
     mayCache = distinctFields_.size() == 1 &&
         VectorEncoding::isDictionary(context.wrapEncoding()) &&
         !peeledVectors[0]->memoDisabled();
@@ -1054,7 +1053,8 @@ void Expr::evalEncodings(
     const SelectivityVector& rows,
     EvalCtx& context,
     VectorPtr& result) {
-  if (deterministic_ && !skipFieldDependentOptimizations()) {
+  if (deterministic_ && !skipFieldDependentOptimizations() &&
+      context.peelingEnabled()) {
     bool hasFlat = false;
     for (auto* field : distinctFields_) {
       if (isFlat(*context.getField(field->index(context)))) {
@@ -1381,7 +1381,7 @@ void Expr::evalAll(
     return;
   }
 
-  if (shouldEvaluateSharedSubexp()) {
+  if (shouldEvaluateSharedSubexp(context)) {
     evaluateSharedSubexpr(
         rows,
         context,
@@ -1462,6 +1462,16 @@ bool Expr::applyFunctionWithPeeling(
     VectorPtr& result) {
   LocalDecodedVector localDecoded(context);
   LocalSelectivityVector newRowsHolder(context);
+  if (!context.peelingEnabled()) {
+    if (inputValues_.size() == 1) {
+      // If we have a single input, velox needs to ensure that the
+      // vectorFunction would receive a flat input.
+      BaseVector::flattenVector(inputValues_[0]);
+      applyFunction(applyRows, context, result);
+      return true;
+    }
+    return false;
+  }
   // Attempt peeling.
   std::vector<VectorPtr> peeledVectors;
   auto peeledEncoding = PeeledEncoding::peel(
diff --git a/velox/expression/Expr.h b/velox/expression/Expr.h
index b425fd392bc9..ade47d61f8b0 100644
--- a/velox/expression/Expr.h
+++ b/velox/expression/Expr.h
@@ -486,8 +486,9 @@ class Expr {
   /// Evaluation of such expression is optimized by memoizing and reusing
   /// the results of prior evaluations. That logic is implemented in
   /// 'evaluateSharedSubexpr'.
-  bool shouldEvaluateSharedSubexp() const {
-    return deterministic_ && isMultiplyReferenced_ && !inputs_.empty();
+  bool shouldEvaluateSharedSubexp(EvalCtx& context) const {
+    return deterministic_ && isMultiplyReferenced_ && !inputs_.empty() &&
+        context.sharedSubExpressionReuseEnabled();
   }
 
   /// Evaluate common sub-expression. Check if sharedSubexprValues_ already has
diff --git a/velox/expression/tests/ExprTest.cpp b/velox/expression/tests/ExprTest.cpp
index d3af7b405d01..100d0acb71b1 100644
--- a/velox/expression/tests/ExprTest.cpp
+++ b/velox/expression/tests/ExprTest.cpp
@@ -166,10 +166,12 @@ class ExprTest : public testing::Test, public VectorTestBase {
   evaluateMultipleWithStats(
       const std::vector<std::string>& texts,
       const RowVectorPtr& input,
-      std::vector<VectorPtr> resultToReuse = {}) {
+      std::vector<VectorPtr> resultToReuse = {},
+      core::ExecCtx* execCtx = nullptr) {
     auto exprSet = compileMultiple(texts, asRowType(input->type()));
 
-    exec::EvalCtx context(execCtx_.get(), exprSet.get(), input.get());
+    exec::EvalCtx context(
+        execCtx ? execCtx : execCtx_.get(), exprSet.get(), input.get());
 
     SelectivityVector rows(input->size());
     if (resultToReuse.empty()) {
@@ -190,11 +192,15 @@ class ExprTest : public testing::Test, public VectorTestBase {
   }
 
   std::pair<VectorPtr, std::unordered_map<std::string, exec::ExprStats>>
-  evaluateWithStats(exec::ExprSet* exprSetPtr, const RowVectorPtr& input) {
+  evaluateWithStats(
+      exec::ExprSet* exprSetPtr,
+      const RowVectorPtr& input,
+      core::ExecCtx* execCtx = nullptr) {
     SelectivityVector rows(input->size());
     std::vector<VectorPtr> results(1);
 
-    exec::EvalCtx context(execCtx_.get(), exprSetPtr, input.get());
+    exec::EvalCtx context(
+        execCtx ? execCtx : execCtx_.get(), exprSetPtr, input.get());
     exprSetPtr->eval(rows, context, results);
 
     return {results[0], exprSetPtr->stats()};
@@ -4795,5 +4801,167 @@ VELOX_INSTANTIATE_TEST_SUITE_P(
     ParameterizedExprTest,
     testing::ValuesIn({false, true}));
 
+TEST_F(ExprTest, disablePeeling) {
+  // Verify that peeling is disabled when the config is set by checking whether
+  // the number of rows processed is equal to the alphabet size (when enabled)
+  // or the dictionary size (when disabled).
+  // Also, ensure that single arg function recieves a flat vector even when
+  // peeling is disabled.
+
+  // This throws if input is not flat or constant.
+  VELOX_REGISTER_VECTOR_FUNCTION(
+      udf_testing_single_arg_deterministic, "testing_single_arg_deterministic");
+  // This wraps the input in a dictionary.
+  exec::registerVectorFunction(
+      "dict_wrap",
+      WrapInDictionaryFunc::signatures(),
+      std::make_unique<WrapInDictionaryFunc>(),
+      exec::VectorFunctionMetadataBuilder().defaultNullBehavior(false).build());
+  const std::vector<std::string> expressions = {"c0 + 1"};
+
+  auto flatInput = makeFlatVector<int64_t>({1, 2, 3});
+  auto flatSize = flatInput->size();
+  auto dictInput = wrapInDictionary(
+      makeIndices(2 * flatSize, [&](auto row) { return row % flatSize; }),
+      2 * flatSize,
+      flatInput);
+  auto dictSize = dictInput->size();
+
+  // Peeling Enabled (by default)
+  auto [result, stats] = evaluateMultipleWithStats(
+      expressions, makeRowVector({dictInput}), {}, execCtx_.get());
+
+  ASSERT_TRUE(stats.find("plus") != stats.end());
+  ASSERT_EQ(stats["plus"].numProcessedRows, flatSize);
+
+  // Peeling Disabled
+  std::unordered_map<std::string, std::string> configData(
+      {{core::QueryConfig::kDebugDisableExpressionWithPeeling, "true"}});
+  auto queryCtx = velox::core::QueryCtx::create(
+      nullptr, core::QueryConfig(std::move(configData)));
+  auto execCtx = std::make_unique<core::ExecCtx>(pool_.get(), queryCtx.get());
+
+  std::tie(result, stats) = evaluateMultipleWithStats(
+      expressions, makeRowVector({dictInput}), {}, execCtx.get());
+
+  ASSERT_TRUE(stats.find("plus") != stats.end());
+  ASSERT_EQ(stats["plus"].numProcessedRows, dictSize);
+
+  // Ensure single arg function recieves a flat vector.
+  // When top level column is dictionary wrapped.
+  ASSERT_NO_THROW(evaluateMultiple(
+      {"testing_single_arg_deterministic((c0))"},
+      makeRowVector({dictInput}),
+      {},
+      execCtx.get()));
+  // When intermediate column is dictionary wrapped.
+  // dict_wrap helps generate an intermediate dictionary vector.
+  ASSERT_NO_THROW(evaluateMultiple(
+      {"testing_single_arg_deterministic(dict_wrap(c0))"},
+      makeRowVector({flatInput}),
+      {},
+      execCtx.get()));
+}
+
+TEST_F(ExprTest, disableSharedSubExpressionReuse) {
+  // Verify that shared subexpression reuse is disabled when the config is set
+  // by confirming that the same rows are processed twice by the shared
+  // expression when its disabled.
+  const std::vector<std::string> expressions = {"c0 + 1", "(c0 + 1) = 0"};
+
+  auto flatInput = makeFlatVector<int64_t>({1, 2, 3});
+  auto flatSize = flatInput->size();
+
+  // SharedSubExpressionReuse Enabled (by default)
+  auto [result, stats] = evaluateMultipleWithStats(
+      expressions, makeRowVector({flatInput}), {}, execCtx_.get());
+
+  ASSERT_TRUE(stats.find("plus") != stats.end());
+  ASSERT_EQ(stats["plus"].numProcessedRows, flatSize);
+
+  // SharedSubExpressionReuse Disabled
+  std::unordered_map<std::string, std::string> configData(
+      {{core::QueryConfig::kDebugDisableCommonSubExpressions, "true"}});
+  auto queryCtx = velox::core::QueryCtx::create(
+      nullptr, core::QueryConfig(std::move(configData)));
+  auto execCtx = std::make_unique<core::ExecCtx>(pool_.get(), queryCtx.get());
+
+  std::tie(result, stats) = evaluateMultipleWithStats(
+      expressions, makeRowVector({flatInput}), {}, execCtx.get());
+
+  ASSERT_TRUE(stats.find("plus") != stats.end());
+  ASSERT_EQ(stats["plus"].numProcessedRows, 2 * flatSize);
+}
+
+TEST_F(ExprTest, disableMemoization) {
+  // Verify that memoization is disabled when the config is set by confirming
+  // that the third invocation reuses the results.
+  auto flatInput = makeFlatVector<int64_t>({1, 2, 3});
+  auto flatSize = flatInput->size();
+  auto dictInput = wrapInDictionary(
+      makeIndices(2 * flatSize, [&](auto row) { return row % flatSize; }),
+      2 * flatSize,
+      flatInput);
+  auto dictSize = dictInput->size();
+  auto inputRow = makeRowVector({dictInput});
+
+  auto exprSet = compileExpression("c0 + 1", asRowType(inputRow->type()));
+  // Memoization Enabled (by default). We need to evaluate the expression
+  // atleast twice to enable memoization. The third invocation will use the
+  // memoized result.
+  evaluateWithStats(exprSet.get(), inputRow, execCtx_.get());
+  evaluateWithStats(exprSet.get(), inputRow, execCtx_.get());
+  auto [result, stats] =
+      evaluateWithStats(exprSet.get(), inputRow, execCtx_.get());
+
+  ASSERT_TRUE(stats.find("plus") != stats.end());
+  ASSERT_EQ(stats["plus"].numProcessedRows, 2 * flatSize);
+
+  // Memoization Disabled
+  std::unordered_map<std::string, std::string> configData(
+      {{core::QueryConfig::kDebugDisableExpressionWithMemoization, "true"}});
+  auto queryCtx = velox::core::QueryCtx::create(
+      nullptr, core::QueryConfig(std::move(configData)));
+  auto execCtx = std::make_unique<core::ExecCtx>(pool_.get(), queryCtx.get());
+
+  exprSet = compileExpression("c0 + 1", asRowType(inputRow->type()));
+  evaluateWithStats(exprSet.get(), inputRow, execCtx.get());
+  evaluateWithStats(exprSet.get(), inputRow, execCtx.get());
+  std::tie(result, stats) =
+      evaluateWithStats(exprSet.get(), inputRow, execCtx.get());
+
+  ASSERT_TRUE(stats.find("plus") != stats.end());
+  ASSERT_EQ(stats["plus"].numProcessedRows, 3 * flatSize);
+}
+
+TEST_F(ExprTest, disabledeferredLazyLoading) {
+  // Verify that deferred lazy loading is disabled when the config is set by
+  // confirming that all rows are loaded even when only a subset is required.
+
+  // The following expression only requires 1 row to be loaded on c1.
+  const std::vector<std::string> expressions = {"(c0 < 2) AND (c1 > 0)"};
+  auto c0 = makeFlatVector<int64_t>({1, 2, 3});
+  auto valueAt = [](auto row) { return row; };
+  // Confirm only 1 row is loaded.
+  auto c1 = makeLazyFlatVector<int64_t>(3, valueAt, nullptr, 1);
+
+  // Deferred lazy loading enabled (by default). Confirm that only required rows
+  // are loaded.
+  auto [result, stats] = evaluateMultipleWithStats(
+      expressions, makeRowVector({c0, c1}), {}, execCtx_.get());
+
+  // Deferred lazy loading disabled. Confirm all rows will be loaded.
+  std::unordered_map<std::string, std::string> configData(
+      {{core::QueryConfig::kDebugDisableExpressionWithLazyInputs, "true"}});
+  auto queryCtx = velox::core::QueryCtx::create(
+      nullptr, core::QueryConfig(std::move(configData)));
+  auto execCtx = std::make_unique<core::ExecCtx>(pool_.get(), queryCtx.get());
+
+  // Confirm that all rows are loaded.
+  c1 = makeLazyFlatVector<int64_t>(3, valueAt, nullptr, 3);
+  std::tie(result, stats) = evaluateMultipleWithStats(
+      expressions, makeRowVector({c0, c1}), {}, execCtx.get());
+}
+
 } // namespace
 } // namespace facebook::velox::test