Skip to content

Commit

Permalink
Add BaseVector::setType API (#7783)
Browse files Browse the repository at this point in the history
Summary:
Sometimes it is useful to be able to change the logical type of a vector without making a copy.

The new BaseVector::setType API allows to change vector type to a compatible logical type, e.g.
change ROW("a" BIGINT) to ROW("b" BIGINT).

For example, in #6074

Pull Request resolved: #7783

Reviewed By: xiaoxmeng

Differential Revision: D51847700

Pulled By: mbasmanova

fbshipit-source-id: bb5b30d3372239196b1f6f1dbb1dee9226bf306a
  • Loading branch information
JkSelf authored and facebook-github-bot committed Dec 5, 2023
1 parent 928f378 commit fec3ff0
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 1 deletion.
16 changes: 15 additions & 1 deletion velox/vector/BaseVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,20 @@ class BaseVector {
return type_;
}

/// Changes vector type. The new type can have a different
/// logical representation while maintaining the same physical type.
/// Additionally, note that the caller must ensure that this vector is not
/// shared, i.e. singly-referenced.
virtual void setType(const TypePtr& type) {
VELOX_CHECK_NOT_NULL(type);
VELOX_CHECK(
type_->kindEquals(type),
"Cannot change vector type from {} to {}. The old and new types can be different logical types, but the underlying physical types must match.",
type_,
type);
type_ = type;
}

TypeKind typeKind() const {
return typeKind_;
}
Expand Down Expand Up @@ -868,7 +882,7 @@ class BaseVector {
return sliceBuffer(*BOOLEAN(), nulls_, offset, length, pool_);
}

const TypePtr type_;
TypePtr type_;
const TypeKind typeKind_;
const VectorEncoding::Simple encoding_;
BufferPtr nulls_;
Expand Down
19 changes: 19 additions & 0 deletions velox/vector/ComplexVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,13 @@ void RowVector::copy(
}
}

void RowVector::setType(const TypePtr& type) {
BaseVector::setType(type);
for (auto i = 0; i < childrenSize_; i++) {
children_[i]->setType(type_->asRow().childAt(i));
}
}

namespace {

// Runs quick checks to determine whether input vector has only null values.
Expand Down Expand Up @@ -844,6 +851,11 @@ std::optional<int32_t> ArrayVector::compare(
flags);
}

void ArrayVector::setType(const TypePtr& type) {
BaseVector::setType(type);
elements_->setType(type_->asArray().elementType());
}

namespace {
uint64_t hashArray(
uint64_t hash,
Expand Down Expand Up @@ -1097,6 +1109,13 @@ bool MapVector::isSorted(vector_size_t index) const {
return true;
}

void MapVector::setType(const TypePtr& type) {
BaseVector::setType(type);
const auto& mapType = type_->asMap();
keys_->setType(mapType.keyType());
values_->setType(mapType.valueType());
}

// static
void MapVector::canonicalize(
const std::shared_ptr<MapVector>& map,
Expand Down
6 changes: 6 additions & 0 deletions velox/vector/ComplexVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ class RowVector : public BaseVector {
return children_;
}

void setType(const TypePtr& type) override;

void copy(
const BaseVector* source,
vector_size_t targetIndex,
Expand Down Expand Up @@ -434,6 +436,8 @@ class ArrayVector : public ArrayVectorBase {
std::move(elements), type()->childAt(0), pool_);
}

void setType(const TypePtr& type) override;

void copyRanges(
const BaseVector* source,
const folly::Range<const CopyRange*>& ranges) override;
Expand Down Expand Up @@ -550,6 +554,8 @@ class MapVector : public ArrayVectorBase {
return values_;
}

void setType(const TypePtr& type) override;

bool hasSortedKeys() const {
return sortedKeys_;
}
Expand Down
57 changes: 57 additions & 0 deletions velox/vector/tests/VectorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3535,5 +3535,62 @@ TEST_F(VectorTest, hashAll) {
}
}

TEST_F(VectorTest, setType) {
auto test = [&](auto& type, auto& newType, auto& invalidNewType) {
auto vector = BaseVector::create(type, 1'000, pool());

vector->setType(newType);
EXPECT_EQ(vector->type()->toString(), newType->toString());

VELOX_ASSERT_RUNTIME_THROW(
vector->setType(invalidNewType),
fmt::format(
"Cannot change vector type from {} to {}. The old and new types can be different logical types, but the underlying physical types must match.",
newType->toString(),
invalidNewType->toString()));
};

// ROW
auto type = ROW({"aa"}, {BIGINT()});
auto newType = ROW({"bb"}, {BIGINT()});
auto invalidNewType = ROW({"bb"}, {VARCHAR()});
test(type, newType, invalidNewType);

// ROW(ROW)
type = ROW({"a", "b"}, {ROW({"c", "d"}, {BIGINT(), BIGINT()}), BIGINT()});
newType =
ROW({"a", "b"}, {ROW({"cc", "dd"}, {BIGINT(), BIGINT()}), BIGINT()});
invalidNewType =
ROW({"a", "b"}, {ROW({"cc", "dd"}, {VARCHAR(), BIGINT()}), BIGINT()});
test(type, newType, invalidNewType);

// ARRAY(ROW)
type =
ROW({"a", "b"}, {ARRAY(ROW({"c", "d"}, {BIGINT(), BIGINT()})), BIGINT()});
newType = ROW(
{"a", "b"}, {ARRAY(ROW({"cc", "dd"}, {BIGINT(), BIGINT()})), BIGINT()});
invalidNewType = ROW(
{"a", "b"}, {ARRAY(ROW({"cc", "dd"}, {VARCHAR(), BIGINT()})), BIGINT()});
test(type, newType, invalidNewType);

// MAP(ROW)
type =
ROW({"a", "b"},
{MAP(ROW({"c", "d"}, {BIGINT(), BIGINT()}),
ROW({"e", "f"}, {BIGINT(), BIGINT()})),
BIGINT()});
newType =
ROW({"a", "b"},
{MAP(ROW({"cc", "dd"}, {BIGINT(), BIGINT()}),
ROW({"ee", "ff"}, {BIGINT(), BIGINT()})),
BIGINT()});
invalidNewType =
ROW({"a", "b"},
{MAP(ROW({"cc", "dd"}, {VARCHAR(), BIGINT()}),
ROW({"ee", "ff"}, {VARCHAR(), BIGINT()})),
BIGINT()});
test(type, newType, invalidNewType);
}

} // namespace
} // namespace facebook::velox

0 comments on commit fec3ff0

Please sign in to comment.