From 84428f9851ec0f0a04b5e75160b6400549366f8e Mon Sep 17 00:00:00 2001 From: "Huameng (Michael) Jiang" Date: Thu, 1 Aug 2024 20:42:51 -0700 Subject: [PATCH] Allow copying vector to a new memory pool (#10647) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/10647 Until we have a proper solution to transfer vector memory ownership from velox/koski to downstream components, we extend the vector api to currently copy the vector with a downstream memory pool. Reviewed By: Yuhta Differential Revision: D60616369 fbshipit-source-id: 13515ae32ef9beadc2500e2c050f40244d46dc8e --- velox/vector/BaseVector.h | 15 ++++--- velox/vector/BiasVector.h | 10 +++-- velox/vector/ComplexVector.h | 40 +++++++++++-------- velox/vector/ConstantVector.h | 10 +++-- velox/vector/DictionaryVector.h | 10 +++-- velox/vector/FlatVector.h | 5 ++- velox/vector/FunctionVector.h | 3 +- velox/vector/LazyVector.h | 3 +- velox/vector/SequenceVector.h | 8 ++-- .../tests/CopyPreserveEncodingsTest.cpp | 16 ++++++++ 10 files changed, 78 insertions(+), 42 deletions(-) diff --git a/velox/vector/BaseVector.h b/velox/vector/BaseVector.h index 2a441dfa6d1c..0e2901003a4f 100644 --- a/velox/vector/BaseVector.h +++ b/velox/vector/BaseVector.h @@ -464,9 +464,11 @@ class BaseVector { const vector_size_t* toSourceRow); /// Utility for making a deep copy of a whole vector. - static VectorPtr copy(const BaseVector& vector) { - auto result = - BaseVector::create(vector.type(), vector.size(), vector.pool()); + static VectorPtr copy( + const BaseVector& vector, + velox::memory::MemoryPool* pool = nullptr) { + auto result = BaseVector::create( + vector.type(), vector.size(), pool ? pool : vector.pool()); result->copy(&vector, 0, 0, vector.size()); return result; } @@ -497,10 +499,11 @@ class BaseVector { } /// This makes a deep copy of the Vector allocating new child Vectors and - // Buffers recursively. Unlike copy, this preserves encodings recursively. - virtual VectorPtr copyPreserveEncodings() const = 0; + /// Buffers recursively. Unlike copy, this preserves encodings recursively. + virtual VectorPtr copyPreserveEncodings( + velox::memory::MemoryPool* pool = nullptr) const = 0; - // Construct a zero-copy slice of the vector with the indicated offset and + /// Construct a zero-copy slice of the vector with the indicated offset and /// length. virtual VectorPtr slice(vector_size_t offset, vector_size_t length) const = 0; diff --git a/velox/vector/BiasVector.h b/velox/vector/BiasVector.h index 57d44f45cd58..2a2e43a6de9b 100644 --- a/velox/vector/BiasVector.h +++ b/velox/vector/BiasVector.h @@ -163,13 +163,15 @@ class BiasVector : public SimpleVector { VELOX_NYI(); } - VectorPtr copyPreserveEncodings() const override { + VectorPtr copyPreserveEncodings( + velox::memory::MemoryPool* pool = nullptr) const override { + auto selfPool = pool ? pool : BaseVector::pool_; return std::make_shared>( - BaseVector::pool_, - AlignedBuffer::copy(BaseVector::pool_, BaseVector::nulls_), + selfPool, + AlignedBuffer::copy(selfPool, BaseVector::nulls_), BaseVector::length_, valueType_, - AlignedBuffer::copy(BaseVector::pool_, values_), + AlignedBuffer::copy(selfPool, values_), bias_, SimpleVector::stats_, BaseVector::distinctValueCount_, diff --git a/velox/vector/ComplexVector.h b/velox/vector/ComplexVector.h index 9046a8b10b49..dbeec4298abf 100644 --- a/velox/vector/ComplexVector.h +++ b/velox/vector/ComplexVector.h @@ -168,17 +168,19 @@ class RowVector : public BaseVector { const BaseVector* source, const folly::Range& ranges) override; - VectorPtr copyPreserveEncodings() const override { + VectorPtr copyPreserveEncodings( + velox::memory::MemoryPool* pool = nullptr) const override { std::vector copiedChildren(children_.size()); for (auto i = 0; i < children_.size(); ++i) { - copiedChildren[i] = children_[i]->copyPreserveEncodings(); + copiedChildren[i] = children_[i]->copyPreserveEncodings(pool); } + auto selfPool = pool ? pool : pool_; return std::make_shared( - pool_, + selfPool, type_, - AlignedBuffer::copy(pool_, nulls_), + AlignedBuffer::copy(selfPool, nulls_), length_, copiedChildren, nullCount_); @@ -466,15 +468,17 @@ class ArrayVector : public ArrayVectorBase { const BaseVector* source, const folly::Range& ranges) override; - VectorPtr copyPreserveEncodings() const override { + VectorPtr copyPreserveEncodings( + velox::memory::MemoryPool* pool = nullptr) const override { + auto selfPool = pool ? pool : pool_; return std::make_shared( - pool_, + selfPool, type_, - AlignedBuffer::copy(pool_, nulls_), + AlignedBuffer::copy(selfPool, nulls_), length_, - AlignedBuffer::copy(pool_, offsets_), - AlignedBuffer::copy(pool_, sizes_), - elements_->copyPreserveEncodings(), + AlignedBuffer::copy(selfPool, offsets_), + AlignedBuffer::copy(selfPool, sizes_), + elements_->copyPreserveEncodings(pool), nullCount_); } @@ -607,16 +611,18 @@ class MapVector : public ArrayVectorBase { const BaseVector* source, const folly::Range& ranges) override; - VectorPtr copyPreserveEncodings() const override { + VectorPtr copyPreserveEncodings( + velox::memory::MemoryPool* pool = nullptr) const override { + auto selfPool = pool ? pool : pool_; return std::make_shared( - pool_, + selfPool, type_, - AlignedBuffer::copy(pool_, nulls_), + AlignedBuffer::copy(selfPool, nulls_), length_, - AlignedBuffer::copy(pool_, offsets_), - AlignedBuffer::copy(pool_, sizes_), - keys_->copyPreserveEncodings(), - values_->copyPreserveEncodings(), + AlignedBuffer::copy(selfPool, offsets_), + AlignedBuffer::copy(selfPool, sizes_), + keys_->copyPreserveEncodings(pool), + values_->copyPreserveEncodings(pool), nullCount_, sortedKeys_); } diff --git a/velox/vector/ConstantVector.h b/velox/vector/ConstantVector.h index 7e24e70865b6..839335ba5e49 100644 --- a/velox/vector/ConstantVector.h +++ b/velox/vector/ConstantVector.h @@ -356,18 +356,20 @@ class ConstantVector final : public SimpleVector { } } - VectorPtr copyPreserveEncodings() const override { + VectorPtr copyPreserveEncodings( + velox::memory::MemoryPool* pool = nullptr) const override { + auto selfPool = pool ? pool : BaseVector::pool_; if (valueVector_) { return std::make_shared>( - BaseVector::pool_, + selfPool, BaseVector::length_, index_, - valueVector_->copyPreserveEncodings(), + valueVector_->copyPreserveEncodings(pool), SimpleVector::stats_); } return std::make_shared>( - BaseVector::pool_, + selfPool, BaseVector::length_, isNull_, BaseVector::type_, diff --git a/velox/vector/DictionaryVector.h b/velox/vector/DictionaryVector.h index 97b4d346e4a9..87c210bf4a1f 100644 --- a/velox/vector/DictionaryVector.h +++ b/velox/vector/DictionaryVector.h @@ -227,13 +227,15 @@ class DictionaryVector : public SimpleVector { void validate(const VectorValidateOptions& options) const override; - VectorPtr copyPreserveEncodings() const override { + VectorPtr copyPreserveEncodings( + velox::memory::MemoryPool* pool = nullptr) const override { + auto selfPool = pool ? pool : BaseVector::pool_; return std::make_shared>( - BaseVector::pool_, - AlignedBuffer::copy(BaseVector::pool_, BaseVector::nulls_), + selfPool, + AlignedBuffer::copy(selfPool, BaseVector::nulls_), BaseVector::length_, dictionaryValues_->copyPreserveEncodings(), - AlignedBuffer::copy(BaseVector::pool_, indices_), + AlignedBuffer::copy(selfPool, indices_), SimpleVector::stats_, BaseVector::distinctValueCount_, BaseVector::nullCount_, diff --git a/velox/vector/FlatVector.h b/velox/vector/FlatVector.h index 9c5ef5157709..81286ff50814 100644 --- a/velox/vector/FlatVector.h +++ b/velox/vector/FlatVector.h @@ -272,9 +272,10 @@ class FlatVector final : public SimpleVector { const BaseVector* source, const folly::Range& ranges) override; - VectorPtr copyPreserveEncodings() const override { + VectorPtr copyPreserveEncodings( + velox::memory::MemoryPool* pool = nullptr) const override { return std::make_shared>( - BaseVector::pool_, + pool ? pool : BaseVector::pool_, BaseVector::type_, AlignedBuffer::copy(BaseVector::pool_, BaseVector::nulls_), BaseVector::length_, diff --git a/velox/vector/FunctionVector.h b/velox/vector/FunctionVector.h index cc1f6fc87d36..422505c9d9e5 100644 --- a/velox/vector/FunctionVector.h +++ b/velox/vector/FunctionVector.h @@ -196,7 +196,8 @@ class FunctionVector : public BaseVector { VELOX_NYI(); } - VectorPtr copyPreserveEncodings() const override { + VectorPtr copyPreserveEncodings( + velox::memory::MemoryPool* /* pool */ = nullptr) const override { VELOX_UNSUPPORTED("copyPreserveEncodings not defined for FunctionVector"); } diff --git a/velox/vector/LazyVector.h b/velox/vector/LazyVector.h index 15feae83eae9..36154e04317d 100644 --- a/velox/vector/LazyVector.h +++ b/velox/vector/LazyVector.h @@ -259,7 +259,8 @@ class LazyVector : public BaseVector { void validate(const VectorValidateOptions& options) const override; - VectorPtr copyPreserveEncodings() const override { + VectorPtr copyPreserveEncodings( + velox::memory::MemoryPool* /* pool */ = nullptr) const override { VELOX_UNSUPPORTED("copyPreserveEncodings not defined for LazyVector"); } diff --git a/velox/vector/SequenceVector.h b/velox/vector/SequenceVector.h index 75feeae947ae..bed33cf0428f 100644 --- a/velox/vector/SequenceVector.h +++ b/velox/vector/SequenceVector.h @@ -198,12 +198,14 @@ class SequenceVector : public SimpleVector { return false; } - VectorPtr copyPreserveEncodings() const override { + VectorPtr copyPreserveEncodings( + velox::memory::MemoryPool* pool = nullptr) const override { + auto selfPool = pool ? pool : BaseVector::pool_; return std::make_shared>( - BaseVector::pool_, + selfPool, BaseVector::length_, sequenceValues_->copyPreserveEncodings(), - AlignedBuffer::copy(BaseVector::pool_, sequenceLengths_), + AlignedBuffer::copy(selfPool, sequenceLengths_), SimpleVector::stats_, BaseVector::distinctValueCount_, BaseVector::nullCount_, diff --git a/velox/vector/tests/CopyPreserveEncodingsTest.cpp b/velox/vector/tests/CopyPreserveEncodingsTest.cpp index 5d079bf37498..83e71e76c74a 100644 --- a/velox/vector/tests/CopyPreserveEncodingsTest.cpp +++ b/velox/vector/tests/CopyPreserveEncodingsTest.cpp @@ -349,4 +349,20 @@ TEST_F(CopyPreserveEncodingsTest, sequenceNoNulls) { assertEqualVectors(sequenceVector, copy); validateCopyPreserveEncodings(sequenceVector, copy); } + +TEST_F(CopyPreserveEncodingsTest, newMemoryPool) { + auto dictionaryVector = vectorMaker_.dictionaryVector(generateIntInput()); + + auto sourcePool = dictionaryVector->pool(); + auto targetPool = memory::memoryManager()->addLeafPool(); + auto preCopySrcMemory = sourcePool->usedBytes(); + ASSERT_EQ(0, targetPool->usedBytes()); + + auto copy = dictionaryVector->copyPreserveEncodings(targetPool.get()); + assertEqualVectors(dictionaryVector, copy); + validateCopyPreserveEncodings(dictionaryVector, copy); + + EXPECT_EQ(preCopySrcMemory, sourcePool->usedBytes()); + EXPECT_EQ(preCopySrcMemory, targetPool->usedBytes()); +} } // namespace facebook::velox::test