Skip to content

Commit

Permalink
Change TypeWithId to take exclusive ownership on children to avoid co…
Browse files Browse the repository at this point in the history
…nfusion (facebookincubator#9244)

Summary:

Because the constructor is reparenting the children, taking shared ownership on children is misleading, especially when they are `const` qualified.

Differential Revision: D55318609
  • Loading branch information
Yuhta authored and facebook-github-bot committed Mar 25, 2024
1 parent e955b4b commit bc00d8e
Show file tree
Hide file tree
Showing 9 changed files with 134 additions and 78 deletions.
34 changes: 19 additions & 15 deletions velox/dwio/common/TypeUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,49 +39,53 @@ void checkChildrenSelected(
}
}

std::shared_ptr<const TypeWithId> visit(
std::unique_ptr<TypeWithId> visit(
const std::shared_ptr<const TypeWithId>& typeWithId,
const std::function<bool(size_t)>& selector) {
if (typeWithId->type()->isPrimitiveType()) {
return typeWithId;
return std::make_unique<TypeWithId>(
typeWithId->type(),
std::vector<std::unique_ptr<TypeWithId>>(),
typeWithId->id(),
typeWithId->maxId(),
typeWithId->column());
}
if (typeWithId->type()->isRow()) {
std::vector<std::string> names;
std::vector<std::shared_ptr<const TypeWithId>> typesWithId;
std::vector<std::unique_ptr<TypeWithId>> children;
std::vector<std::shared_ptr<const Type>> types;
auto& row = typeWithId->type()->asRow();
for (auto i = 0; i < typeWithId->size(); ++i) {
auto& child = typeWithId->childAt(i);
if (selector(child->id())) {
names.push_back(row.nameOf(i));
std::shared_ptr<const TypeWithId> twid;
twid = visit(child, selector);
typesWithId.push_back(twid);
types.push_back(twid->type());
auto newChild = visit(child, selector);
types.push_back(newChild->type());
children.push_back(std::move(newChild));
}
}
VELOX_USER_CHECK(
!types.empty(), "selected nothing from row: " + row.toString());
return std::make_shared<TypeWithId>(
return std::make_unique<TypeWithId>(
ROW(std::move(names), std::move(types)),
std::move(typesWithId),
std::move(children),
typeWithId->id(),
typeWithId->maxId(),
typeWithId->column());
} else {
checkChildrenSelected(typeWithId, selector);
std::vector<std::shared_ptr<const TypeWithId>> typesWithId;
std::vector<std::unique_ptr<TypeWithId>> children;
std::vector<std::shared_ptr<const Type>> types;
for (auto i = 0; i < typeWithId->size(); ++i) {
auto& child = typeWithId->childAt(i);
std::shared_ptr<const TypeWithId> twid = visit(child, selector);
typesWithId.push_back(twid);
types.push_back(twid->type());
auto newChild = visit(child, selector);
types.push_back(newChild->type());
children.push_back(std::move(newChild));
}
auto type = createType(typeWithId->type()->kind(), std::move(types));
return std::make_shared<TypeWithId>(
return std::make_unique<TypeWithId>(
type,
std::move(typesWithId),
std::move(children),
typeWithId->id(),
typeWithId->maxId(),
typeWithId->column());
Expand Down
24 changes: 18 additions & 6 deletions velox/dwio/common/TypeWithId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,21 @@ namespace facebook::velox::dwio::common {
using velox::Type;
using velox::TypeKind;

namespace {
std::vector<std::shared_ptr<const TypeWithId>> toShared(
std::vector<std::unique_ptr<TypeWithId>> nodes) {
std::vector<std::shared_ptr<const TypeWithId>> result;
result.reserve(nodes.size());
for (auto&& node : nodes) {
result.emplace_back(std::move(node));
}
return result;
}
} // namespace

TypeWithId::TypeWithId(
std::shared_ptr<const Type> type,
std::vector<std::shared_ptr<const TypeWithId>>&& children,
std::vector<std::unique_ptr<TypeWithId>>&& children,
uint32_t id,
uint32_t maxId,
uint32_t column)
Expand All @@ -33,13 +45,13 @@ TypeWithId::TypeWithId(
id_{id},
maxId_{maxId},
column_{column},
children_{std::move(children)} {
children_{toShared(std::move(children))} {
for (auto& child : children_) {
const_cast<const TypeWithId*&>(child->parent_) = this;
}
}

std::shared_ptr<const TypeWithId> TypeWithId::create(
std::unique_ptr<TypeWithId> TypeWithId::create(
const std::shared_ptr<const Type>& root,
uint32_t next) {
return create(root, next, 0);
Expand All @@ -54,13 +66,13 @@ const std::shared_ptr<const TypeWithId>& TypeWithId::childAt(
return children_.at(idx);
}

std::shared_ptr<const TypeWithId> TypeWithId::create(
std::unique_ptr<TypeWithId> TypeWithId::create(
const std::shared_ptr<const Type>& type,
uint32_t& next,
uint32_t column) {
DWIO_ENSURE_NOT_NULL(type);
const uint32_t myId = next++;
std::vector<std::shared_ptr<const TypeWithId>> children{};
std::vector<std::unique_ptr<TypeWithId>> children;
children.reserve(type->size());
auto offset = 0;
for (const auto& child : *type) {
Expand All @@ -70,7 +82,7 @@ std::shared_ptr<const TypeWithId> TypeWithId::create(
(myId == 0 && type->kind() == TypeKind::ROW) ? offset++ : column));
}
const uint32_t maxId = next - 1;
return std::make_shared<const TypeWithId>(
return std::make_unique<TypeWithId>(
type, std::move(children), myId, maxId, column);
}

Expand Down
10 changes: 7 additions & 3 deletions velox/dwio/common/TypeWithId.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,18 @@ namespace facebook::velox::dwio::common {

class TypeWithId : public velox::Tree<std::shared_ptr<const TypeWithId>> {
public:
/// NOTE: This constructor will re-parent the children.
TypeWithId(
std::shared_ptr<const velox::Type> type,
std::vector<std::shared_ptr<const TypeWithId>>&& children,
std::vector<std::unique_ptr<TypeWithId>>&& children,
uint32_t id,
uint32_t maxId,
uint32_t column);

static std::shared_ptr<const TypeWithId> create(
TypeWithId(const TypeWithId&) = delete;
TypeWithId& operator=(const TypeWithId&) = delete;

static std::unique_ptr<TypeWithId> create(
const std::shared_ptr<const velox::Type>& root,
uint32_t next = 0);

Expand Down Expand Up @@ -70,7 +74,7 @@ class TypeWithId : public velox::Tree<std::shared_ptr<const TypeWithId>> {
}

private:
static std::shared_ptr<const TypeWithId> create(
static std::unique_ptr<TypeWithId> create(
const std::shared_ptr<const velox::Type>& type,
uint32_t& next,
uint32_t column);
Expand Down
2 changes: 1 addition & 1 deletion velox/dwio/common/tests/TypeTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ TEST(TestType, selectedType) {
"col3:map<float,double>,col4:float,"
"col5:int,col6:bigint,col7:string>",
HiveTypeSerializer::serialize(type).c_str());
auto typeWithId = TypeWithId::create(type);
std::shared_ptr<const TypeWithId> typeWithId = TypeWithId::create(type);
EXPECT_EQ(0, typeWithId->id());
EXPECT_EQ(11, typeWithId->maxId());

Expand Down
2 changes: 1 addition & 1 deletion velox/dwio/dwrf/test/ColumnWriterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4239,7 +4239,7 @@ TEST_F(ColumnWriterTest, ColumnIdInStream) {
const uint32_t kColumnId = 2;
auto typeWithId = std::make_shared<const TypeWithId>(
type,
std::vector<std::shared_ptr<const TypeWithId>>{},
std::vector<std::unique_ptr<TypeWithId>>{},
/* id */ kNodeId,
/* maxId */ kNodeId,
/* column */ kColumnId);
Expand Down
2 changes: 1 addition & 1 deletion velox/dwio/dwrf/test/TestColumnReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class ColumnReaderTestBase {
EXPECT_CALL(streams_, getRowReaderOptionsProxy())
.WillRepeatedly(testing::Return(&options));

auto fileTypeWithId =
std::shared_ptr<const TypeWithId> fileTypeWithId =
TypeWithId::create(fileType ? fileType : requestedType);

if (useSelectiveReader()) {
Expand Down
97 changes: 48 additions & 49 deletions velox/dwio/parquet/reader/ParquetReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class ReaderBase {

void initializeSchema();

std::shared_ptr<const ParquetTypeWithId> getParquetColumnInfo(
std::unique_ptr<ParquetTypeWithId> getParquetColumnInfo(
uint32_t maxSchemaElementIdx,
uint32_t maxRepeat,
uint32_t maxDefine,
Expand All @@ -100,9 +100,9 @@ class ReaderBase {

TypePtr convertType(const thrift::SchemaElement& schemaElement) const;

template <typename T>
static std::shared_ptr<const RowType> createRowType(
std::vector<std::shared_ptr<const ParquetTypeWithId::TypeWithId>>
children,
const std::vector<T>& children,
bool fileColumnNamesReadAsLowerCase);

memory::MemoryPool& pool_;
Expand Down Expand Up @@ -222,7 +222,7 @@ void ReaderBase::initializeSchema() {
schemaWithId_->getChildren(), isFileColumnNamesReadAsLowerCase());
}

std::shared_ptr<const ParquetTypeWithId> ReaderBase::getParquetColumnInfo(
std::unique_ptr<ParquetTypeWithId> ReaderBase::getParquetColumnInfo(
uint32_t maxSchemaElementIdx,
uint32_t maxRepeat,
uint32_t maxDefine,
Expand Down Expand Up @@ -256,7 +256,7 @@ std::shared_ptr<const ParquetTypeWithId> ReaderBase::getParquetColumnInfo(
schemaElement.__isset.num_children && schemaElement.num_children > 0,
"Node has no children but should");

std::vector<std::shared_ptr<const ParquetTypeWithId::TypeWithId>> children;
std::vector<std::unique_ptr<ParquetTypeWithId::TypeWithId>> children;

auto curSchemaIdx = schemaIdx;
for (int32_t i = 0; i < schemaElement.num_children; i++) {
Expand All @@ -267,7 +267,7 @@ std::shared_ptr<const ParquetTypeWithId> ReaderBase::getParquetColumnInfo(
curSchemaIdx,
++schemaIdx,
columnIdx);
children.push_back(child);
children.push_back(std::move(child));
}
VELOX_CHECK(!children.empty());

Expand All @@ -284,11 +284,11 @@ std::shared_ptr<const ParquetTypeWithId> ReaderBase::getParquetColumnInfo(
thrift::FieldRepetitionType::REPEATED);
VELOX_CHECK_EQ(children.size(), 2);

auto childrenCopy = children;
return std::make_shared<const ParquetTypeWithId>(
TypeFactory<TypeKind::MAP>::create(
children[0]->type(), children[1]->type()),
std::move(childrenCopy),
auto type = TypeFactory<TypeKind::MAP>::create(
children[0]->type(), children[1]->type());
return std::make_unique<ParquetTypeWithId>(
std::move(type),
std::move(children),
curSchemaIdx, // TODO: there are holes in the ids
maxSchemaElementIdx,
ParquetTypeWithId::kNonLeaf, // columnIdx,
Expand All @@ -308,10 +308,10 @@ std::shared_ptr<const ParquetTypeWithId> ReaderBase::getParquetColumnInfo(
case thrift::ConvertedType::MAP: {
VELOX_CHECK_EQ(children.size(), 1);
const auto& child = children[0];
auto grandChildren = child->getChildren();
return std::make_shared<const ParquetTypeWithId>(
child->type(),
std::move(grandChildren),
auto type = child->type();
return std::make_unique<ParquetTypeWithId>(
std::move(type),
std::move(*(ParquetTypeWithId*)child.get()).moveChildren(),
curSchemaIdx, // TODO: there are holes in the ids
maxSchemaElementIdx,
ParquetTypeWithId::kNonLeaf, // columnIdx,
Expand All @@ -335,10 +335,10 @@ std::shared_ptr<const ParquetTypeWithId> ReaderBase::getParquetColumnInfo(
children.size(), 2, "children size should not be larger than 2");
if (children.size() == 1) {
// child of LIST
auto childrenCopy = children;
return std::make_shared<ParquetTypeWithId>(
TypeFactory<TypeKind::ARRAY>::create(children[0]->type()),
std::move(childrenCopy),
auto type = TypeFactory<TypeKind::ARRAY>::create(children[0]->type());
return std::make_unique<ParquetTypeWithId>(
std::move(type),
std::move(children),
curSchemaIdx,
maxSchemaElementIdx,
ParquetTypeWithId::kNonLeaf, // columnIdx,
Expand All @@ -349,11 +349,11 @@ std::shared_ptr<const ParquetTypeWithId> ReaderBase::getParquetColumnInfo(
maxDefine);
} else if (children.size() == 2) {
// children of MAP
auto childrenCopy = children;
return std::make_shared<const ParquetTypeWithId>(
TypeFactory<TypeKind::MAP>::create(
children[0]->type(), children[1]->type()),
std::move(childrenCopy),
auto type = TypeFactory<TypeKind::MAP>::create(
children[0]->type(), children[1]->type());
return std::make_unique<ParquetTypeWithId>(
std::move(type),
std::move(children),
curSchemaIdx, // TODO: there are holes in the ids
maxSchemaElementIdx,
ParquetTypeWithId::kNonLeaf, // columnIdx,
Expand All @@ -365,10 +365,10 @@ std::shared_ptr<const ParquetTypeWithId> ReaderBase::getParquetColumnInfo(
}
} else {
// Row type
auto childrenCopy = children;
return std::make_shared<const ParquetTypeWithId>(
createRowType(children, isFileColumnNamesReadAsLowerCase()),
std::move(childrenCopy),
auto type = createRowType(children, isFileColumnNamesReadAsLowerCase());
return std::make_unique<ParquetTypeWithId>(
std::move(type),
std::move(children),
curSchemaIdx,
maxSchemaElementIdx,
ParquetTypeWithId::kNonLeaf, // columnIdx,
Expand All @@ -386,34 +386,33 @@ std::shared_ptr<const ParquetTypeWithId> ReaderBase::getParquetColumnInfo(
int32_t scale = schemaElement.__isset.scale ? schemaElement.scale : 0;
int32_t type_length =
schemaElement.__isset.type_length ? schemaElement.type_length : 0;
std::vector<std::shared_ptr<const dwio::common::TypeWithId>> children;
std::vector<std::unique_ptr<dwio::common::TypeWithId>> children;
const std::optional<thrift::LogicalType> logicalType_ =
schemaElement.__isset.logicalType
? std::optional<thrift::LogicalType>(schemaElement.logicalType)
: std::nullopt;
std::shared_ptr<const ParquetTypeWithId> leafTypePtr =
std::make_shared<const ParquetTypeWithId>(
veloxType,
std::move(children),
curSchemaIdx,
maxSchemaElementIdx,
columnIdx++,
name,
schemaElement.type,
logicalType_,
maxRepeat,
maxDefine,
precision,
scale,
type_length);
auto leafTypePtr = std::make_unique<ParquetTypeWithId>(
veloxType,
std::move(children),
curSchemaIdx,
maxSchemaElementIdx,
columnIdx++,
name,
schemaElement.type,
logicalType_,
maxRepeat,
maxDefine,
precision,
scale,
type_length);

if (schemaElement.repetition_type ==
thrift::FieldRepetitionType::REPEATED) {
// Array
children.clear();
children.reserve(1);
children.push_back(leafTypePtr);
return std::make_shared<const ParquetTypeWithId>(
children.push_back(std::move(leafTypePtr));
return std::make_unique<ParquetTypeWithId>(
TypeFactory<TypeKind::ARRAY>::create(veloxType),
std::move(children),
curSchemaIdx,
Expand Down Expand Up @@ -571,14 +570,14 @@ TypePtr ReaderBase::convertType(
}
}

template <typename T>
std::shared_ptr<const RowType> ReaderBase::createRowType(
std::vector<std::shared_ptr<const ParquetTypeWithId::TypeWithId>> children,
const std::vector<T>& children,
bool fileColumnNamesReadAsLowerCase) {
std::vector<std::string> childNames;
std::vector<TypePtr> childTypes;
for (auto& child : children) {
auto childName =
std::static_pointer_cast<const ParquetTypeWithId>(child)->name_;
auto childName = static_cast<const ParquetTypeWithId&>(*child).name_;
if (fileColumnNamesReadAsLowerCase) {
folly::toLowerAscii(childName);
}
Expand Down
Loading

0 comments on commit bc00d8e

Please sign in to comment.