Skip to content

Commit

Permalink
Clean up some spurious shared_ptr creations/deletions in DwrfReader (#…
Browse files Browse the repository at this point in the history
…10095)

Summary:
Pull Request resolved: #10095

I was looking at a query reading from a table with many columns, and it's spending a huge amount
of CPU time just creating and deleting shared_ptrs when setting up the SplitReader and the
RowReader.

There's still more that can be done but this change cleans up some easy ones.

Reviewed By: xiaoxmeng, Yuhta

Differential Revision: D58263307

fbshipit-source-id: 3cbedfa1c47d97ae63029bceb400b7b7dc72a47d
  • Loading branch information
Kevin Wilfong authored and facebook-github-bot committed Jun 7, 2024
1 parent c860058 commit b0c2f44
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 18 deletions.
2 changes: 1 addition & 1 deletion velox/dwio/common/ColumnSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ FilterTypePtr ColumnSelector::buildNode(
// column selector filter tree
nodes_.reserve(nodes_.size() + type->size());
if (node.node == 0) {
auto rowType = type->asRow();
auto& rowType = type->asRow();
for (size_t i = 0, size = type->size(); i < size; ++i) {
bool inData = contentType && i < contentType->size();
current->addChild(buildNode(
Expand Down
4 changes: 2 additions & 2 deletions velox/dwio/common/FilterNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,8 @@ class FilterType {
return node_.node == 0;
}

inline void addChild(const FilterTypePtr& child) {
children_.push_back(child);
inline void addChild(FilterTypePtr child) {
children_.push_back(std::move(child));
}

inline void setSequenceFilter(const SeqFilter& seqFilter) {
Expand Down
26 changes: 15 additions & 11 deletions velox/dwio/dwrf/reader/DwrfReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -791,21 +791,29 @@ TypePtr updateColumnNames(const TypePtr& fileType, const TypePtr& tableType) {
auto fileRowType = std::dynamic_pointer_cast<const T>(fileType);
auto tableRowType = std::dynamic_pointer_cast<const T>(tableType);

std::vector<std::string> newFileFieldNames{fileRowType->names()};
std::vector<TypePtr> newFileFieldTypes{fileRowType->children()};
std::vector<std::string> newFileFieldNames;
newFileFieldNames.reserve(fileRowType->size());
std::vector<TypePtr> newFileFieldTypes;
newFileFieldTypes.reserve(fileRowType->size());

for (auto childIdx = 0; childIdx < tableRowType->size(); ++childIdx) {
if (childIdx >= fileRowType->size()) {
break;
}

newFileFieldTypes[childIdx] = updateColumnNames(
newFileFieldTypes.push_back(updateColumnNames(
fileRowType->childAt(childIdx),
tableRowType->childAt(childIdx),
fileRowType->nameOf(childIdx),
tableRowType->nameOf(childIdx));
tableRowType->nameOf(childIdx)));

newFileFieldNames[childIdx] = tableRowType->nameOf(childIdx);
newFileFieldNames.push_back(tableRowType->nameOf(childIdx));
}

for (auto childIdx = tableRowType->size(); childIdx < fileRowType->size();
++childIdx) {
newFileFieldTypes.push_back(fileRowType->childAt(childIdx));
newFileFieldNames.push_back(fileRowType->nameOf(childIdx));
}

return std::make_shared<const T>(
Expand All @@ -830,9 +838,6 @@ TypePtr updateColumnNames(
return fileType;
}

std::vector<std::string> fileFieldNames{fileType->size()};
std::vector<TypePtr> fileFieldTypes{fileType->size()};

if (fileType->isRow()) {
return updateColumnNames<RowType>(fileType, tableType);
}
Expand All @@ -855,9 +860,8 @@ TypePtr updateColumnNames(
void DwrfReader::updateColumnNamesFromTableSchema() {
const auto& tableSchema = options_.fileSchema();
const auto& fileSchema = readerBase_->getSchema();
auto newSchema = std::dynamic_pointer_cast<const RowType>(
updateColumnNames(fileSchema, tableSchema, "", ""));
readerBase_->setSchema(newSchema);
readerBase_->setSchema(std::dynamic_pointer_cast<const RowType>(
updateColumnNames(fileSchema, tableSchema, "", "")));
}

std::unique_ptr<StripeInformation> DwrfReader::getStripe(
Expand Down
13 changes: 11 additions & 2 deletions velox/dwio/dwrf/reader/ReaderBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,23 +289,32 @@ std::shared_ptr<const Type> ReaderBase::convertType(
const auto type = footer.types(index);
switch (type.kind()) {
case TypeKind::BOOLEAN:
return BOOLEAN();
case TypeKind::TINYINT:
return TINYINT();
case TypeKind::SMALLINT:
return SMALLINT();
case TypeKind::INTEGER:
return INTEGER();
case TypeKind::BIGINT:
return BIGINT();
case TypeKind::HUGEINT:
if (type.format() == DwrfFormat::kOrc &&
type.getOrcPtr()->kind() == proto::orc::Type_Kind_DECIMAL) {
return DECIMAL(
type.getOrcPtr()->precision(), type.getOrcPtr()->scale());
}
[[fallthrough]];
return HUGEINT();
case TypeKind::REAL:
return REAL();
case TypeKind::DOUBLE:
return DOUBLE();
case TypeKind::VARCHAR:
return VARCHAR();
case TypeKind::VARBINARY:
return VARBINARY();
case TypeKind::TIMESTAMP:
return createScalarType(type.kind());
return TIMESTAMP();
case TypeKind::ARRAY:
return ARRAY(convertType(
footer, type.subtypes(0), fileColumnNamesReadAsLowerCase));
Expand Down
4 changes: 2 additions & 2 deletions velox/dwio/dwrf/reader/ReaderBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ class ReaderBase {
return schema_;
}

void setSchema(const RowTypePtr& newSchema) {
schema_ = newSchema;
void setSchema(RowTypePtr newSchema) {
schema_ = std::move(newSchema);
}

const std::shared_ptr<const dwio::common::TypeWithId>& getSchemaWithId()
Expand Down

0 comments on commit b0c2f44

Please sign in to comment.