Back out "Optimize evaluation by reducing peeled vectors size"

Summary: Original commit changeset: 23ef4c590eba Original Phabricator Diff: D60064934 Differential Revision: D61240010
bikramSingh91 · Aug 13, 2024 · 7ea72fa · 7ea72fa
1 parent 13899de
commit 7ea72fa
Show file tree

Hide file tree

Showing 4 changed files with 62 additions and 214 deletions.
diff --git a/velox/expression/PeeledEncoding.cpp b/velox/expression/PeeledEncoding.cpp
@@ -55,9 +55,7 @@ namespace facebook::velox::exec {
 SelectivityVector* PeeledEncoding::translateToInnerRows(
     const SelectivityVector& outerRows,
     LocalSelectivityVector& innerRowsHolder) const {
-  if (wrapEncoding_ == VectorEncoding::Simple::FLAT) {
-    return innerRowsHolder.get(outerRows);
-  }
+  VELOX_CHECK(wrapEncoding_ != VectorEncoding::Simple::FLAT);
   if (wrapEncoding_ == VectorEncoding::Simple::CONSTANT) {
     auto newRows = innerRowsHolder.get(constantWrapIndex_ + 1, false);
     newRows->setValid(constantWrapIndex_, true);
@@ -111,61 +109,6 @@ void PeeledEncoding::setDictionaryWrapping(
   wrapNulls_ = std::move(wrapping.nulls);
 }
 
-void PeeledEncoding::flattenPeeledVectors(
-    const SelectivityVector& rows,
-    const std::vector<bool>& constantFields,
-    std::vector<VectorPtr>& peeledVectors) {
-  VELOX_CHECK(flattenPeeled_.empty());
-  for (int i = 0; i < peeledVectors.size(); ++i) {
-    if (!constantFields.empty() && constantFields[i]) {
-      continue;
-    }
-    if (peeledVectors[i]->isConstantEncoding() && !wrapNulls_) {
-      continue;
-    }
-    switch (peeledVectors[i]->typeKind()) {
-      case TypeKind::ARRAY:
-      case TypeKind::MAP:
-      case TypeKind::ROW:
-        return;
-      default:
-        if (isLazyNotLoaded(*peeledVectors[i])) {
-          return;
-        }
-    }
-  }
-  auto nonNullRows = rows;
-  if (wrapNulls_) {
-    nonNullRows.deselectNulls(
-        wrapNulls_->as<uint64_t>(), rows.begin(), rows.end());
-  }
-  for (int i = 0; i < peeledVectors.size(); ++i) {
-    if (!constantFields.empty() && constantFields[i]) {
-      continue;
-    }
-    if (peeledVectors[i]->isConstantEncoding() && !wrapNulls_) {
-      if (peeledVectors[i]->size() < rows.end()) {
-        peeledVectors[i] =
-            BaseVector::wrapInConstant(rows.end(), 0, peeledVectors[i]);
-      }
-    } else {
-      auto flat = BaseVector::create(
-          peeledVectors[i]->type(), rows.end(), peeledVectors[i]->pool());
-      flat->copy(
-          peeledVectors[i].get(), nonNullRows, wrap_->as<vector_size_t>());
-      if (!nonNullRows.isAllSelected()) {
-        bits::andBits(
-            flat->mutableRawNulls(), nonNullRows.allBits(), 0, rows.end());
-      }
-      peeledVectors[i] = flat;
-      flattenPeeled_.push_back(std::move(flat));
-    }
-  }
-  wrapEncoding_ = VectorEncoding::Simple::FLAT;
-  baseSize_ = rows.end();
-  wrap_.reset();
-}
-
 bool PeeledEncoding::peelInternal(
     const std::vector<VectorPtr>& vectorsToPeel,
     const SelectivityVector& rows,
@@ -280,12 +223,6 @@ bool PeeledEncoding::peelInternal(
       constantWrapIndex_ = innerIdx;
     } else {
       setDictionaryWrapping(decodedVector, rows, *firstWrapper);
-      if (baseSize_ / 8 > rows.end()) {
-        // For large base vector, peeling would still need to allocate and
-        // initialize/copy the memory of unselected rows, resulting in large
-        // performance penalty, thus it's safer to do deep copy in this case.
-        flattenPeeledVectors(rows, constantFields, peeledVectors);
-      }
       // Make sure all the constant vectors have at least the same length as the
       // base vector after peeling. This will make sure any translated rows
       // point to valid rows in the constant vector.
@@ -311,9 +248,7 @@ VectorPtr PeeledEncoding::wrap(
     velox::memory::MemoryPool* pool,
     VectorPtr peeledResult,
     const SelectivityVector& rows) const {
-  if (wrapEncoding_ == VectorEncoding::Simple::FLAT) {
-    return peeledResult;
-  }
+  VELOX_CHECK(wrapEncoding_ != VectorEncoding::Simple::FLAT);
   VectorPtr wrappedResult;
   if (wrapEncoding_ == VectorEncoding::Simple::DICTIONARY) {
     if (!peeledResult) {

diff --git a/velox/expression/PeeledEncoding.h b/velox/expression/PeeledEncoding.h
@@ -163,38 +163,19 @@ class PeeledEncoding {
   /// Takes a set of outer rows (rows defined over the peel) and a functor that
   /// is executed for each outer row which does not map to a null row.
   /// The outer and its corresponding inner row is passed to the functor.
-  template <typename F>
-  void applyToNonNullInnerRows(const SelectivityVector& outerRows, F&& func)
-      const {
-    auto* wrapNulls = wrapNulls_ ? wrapNulls_->as<uint64_t>() : nullptr;
-    switch (wrapEncoding_) {
-      case VectorEncoding::Simple::FLAT:
-        outerRows.applyToSelected([&](auto outerRow) {
-          if (wrapNulls && bits::isBitNull(wrapNulls, outerRow)) {
-            return;
-          }
-          func(outerRow, outerRow);
-        });
-        break;
-      case VectorEncoding::Simple::CONSTANT:
-        VELOX_CHECK_NULL(wrapNulls);
-        outerRows.applyToSelected(
-            [&](auto outerRow) { func(outerRow, constantWrapIndex_); });
-        break;
-      case VectorEncoding::Simple::DICTIONARY: {
-        auto indices = wrap_->as<vector_size_t>();
-        outerRows.applyToSelected([&](auto outerRow) {
-          // A known null in the outer row masks an error.
-          if (wrapNulls && bits::isBitNull(wrapNulls, outerRow)) {
-            return;
-          }
-          func(outerRow, indices[outerRow]);
-        });
-        break;
+  void applyToNonNullInnerRows(
+      const SelectivityVector& outerRows,
+      std::function<void(vector_size_t, vector_size_t)> func) const {
+    auto indices = wrap_ ? wrap_->as<vector_size_t>() : nullptr;
+    auto wrapNulls = wrapNulls_ ? wrapNulls_->as<uint64_t>() : nullptr;
+    outerRows.applyToSelected([&](auto outerRow) {
+      // A known null in the outer row masks an error.
+      if (wrapNulls && bits::isBitNull(wrapNulls, outerRow)) {
+        return;
       }
-      default:
-        VELOX_UNREACHABLE();
-    }
+      vector_size_t innerRow = indices ? indices[outerRow] : constantWrapIndex_;
+      func(outerRow, innerRow);
+    });
   }
 
  private:
@@ -214,32 +195,22 @@ class PeeledEncoding {
       const SelectivityVector& rows,
       BaseVector& firstWrapper);
 
-  void flattenPeeledVectors(
-      const SelectivityVector& rows,
-      const std::vector<bool>& constantFields,
-      std::vector<VectorPtr>& peeledVectors);
-
-  // The encoding of the peel. Set after getPeeledVectors() is called. Is equal
-  // to Flat if getPeeledVectors() has not been called or peeling was not
-  // successful.
+  /// The encoding of the peel. Set after getPeeledVectors() is called. Is equal
+  /// to Flat if getPeeledVectors() has not been called or peeling was not
+  /// successful.
   VectorEncoding::Simple wrapEncoding_ = VectorEncoding::Simple::FLAT;
 
-  // The dictionary indices. Only valid if wrapEncoding_ = DICTIONARY.
+  /// The dictionary indices. Only valid if wrapEncoding_ = DICTIONARY.
   BufferPtr wrap_;
 
-  // The dictionary nulls. Only valid if wrapEncoding_ = DICTIONARY.
+  /// The dictionary nulls. Only valid if wrapEncoding_ = DICTIONARY.
   BufferPtr wrapNulls_;
 
-  // The size of one of the peeled vectors. Only valid if wrapEncoding_ =
-  // DICTIONARY.
+  /// The size of one of the peeled vectors. Only valid if wrapEncoding_ =
+  /// DICTIONARY.
   vector_size_t baseSize_ = 0;
 
-  // The constant index. Only valid if wrapEncoding_ = CONSTANT.
+  /// The constant index. Only valid if wrapEncoding_ = CONSTANT.
   vector_size_t constantWrapIndex_ = 0;
-
-  // Deep copies of peeled vectors with low selectivity to avoid cost on unused
-  // rows.  Stored here to bring their lifecycles no shorter than PeeledEncoding
-  // object.
-  std::vector<VectorPtr> flattenPeeled_;
 };
 } // namespace facebook::velox::exec
diff --git a/velox/expression/tests/ExprTest.cpp b/velox/expression/tests/ExprTest.cpp
@@ -2094,7 +2094,7 @@ TEST_F(ExprTest, memo) {
   // 1. correctly evaluates the unevaluated rows on subsequent runs
   // 2. Only caches results if it encounters the same base twice
   auto base = makeArrayVector<int64_t>(
-      500,
+      1'000,
       [](auto row) { return row % 5 + 1; },
       [](auto row, auto index) { return (row % 3) + index; });
 
@@ -2142,7 +2142,7 @@ TEST_F(ExprTest, memo) {
 
   // Create a new base
   base = makeArrayVector<int64_t>(
-      500,
+      1'000,
       [](auto row) { return row % 5 + 1; },
       [](auto row, auto index) { return (row % 3) + index; });