Skip to content

Commit

Permalink
Convert MAP subfield prunings to Alpha flatmap feature selector (face…
Browse files Browse the repository at this point in the history
…bookincubator#9088)

Summary: Pull Request resolved: facebookincubator#9088

Reviewed By: tanjialiang

Differential Revision: D54870453

fbshipit-source-id: 4466d53faa8c70e5a8e1bc8786460591bf23ea25
  • Loading branch information
Yuhta authored and Joe-Abraham committed Jun 7, 2024
1 parent 5c84bc1 commit bd95801
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 2 deletions.
17 changes: 15 additions & 2 deletions velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ void addSubfields(
if (stringKey) {
deduplicate(stringSubscripts);
filter = std::make_unique<common::BytesValues>(stringSubscripts, false);
spec.setFlatMapFeatureSelection(std::move(stringSubscripts));
} else {
deduplicate(longSubscripts);
if (keyType->isReal()) {
Expand All @@ -189,6 +190,11 @@ void addSubfields(
} else {
filter = common::createBigintValues(longSubscripts, false);
}
std::vector<std::string> features;
for (auto num : longSubscripts) {
features.push_back(std::to_string(num));
}
spec.setFlatMapFeatureSelection(std::move(features));
}
keys->setFilter(std::move(filter));
break;
Expand Down Expand Up @@ -510,9 +516,16 @@ void configureRowReaderOptions(

std::vector<std::string> columnNames;
for (auto& spec : scanSpec->children()) {
if (!spec->isConstant()) {
columnNames.push_back(spec->fieldName());
if (spec->isConstant()) {
continue;
}
std::string name = spec->fieldName();
if (!spec->flatMapFeatureSelection().empty()) {
name += "#[";
name += folly::join(',', spec->flatMapFeatureSelection());
name += ']';
}
columnNames.push_back(std::move(name));
}
std::shared_ptr<dwio::common::ColumnSelector> cs;
if (columnNames.empty()) {
Expand Down
4 changes: 4 additions & 0 deletions velox/connectors/hive/tests/HiveConnectorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ TEST_F(HiveConnectorTest, makeScanSpec_requiredSubfields_mergeArray) {
pool_.get());
auto* c0 = scanSpec->childByName("c0");
ASSERT_EQ(c0->maxArrayElementsCount(), 2);
ASSERT_TRUE(c0->flatMapFeatureSelection().empty());
auto* elements = c0->childByName(ScanSpec::kArrayElementsFieldName);
ASSERT_FALSE(elements->childByName("c0c0")->isConstant());
ASSERT_FALSE(elements->childByName("c0c2")->isConstant());
Expand Down Expand Up @@ -180,6 +181,8 @@ TEST_F(HiveConnectorTest, makeScanSpec_requiredSubfields_mergeMap) {
{},
pool_.get());
auto* c0 = scanSpec->childByName("c0");
ASSERT_EQ(
c0->flatMapFeatureSelection(), std::vector<std::string>({"10", "20"}));
auto* keysFilter = c0->childByName(ScanSpec::kMapKeysFieldName)->filter();
ASSERT_TRUE(keysFilter);
ASSERT_TRUE(applyFilter(*keysFilter, 10));
Expand All @@ -206,6 +209,7 @@ TEST_F(HiveConnectorTest, makeScanSpec_requiredSubfields_allSubscripts) {
{},
pool_.get());
auto* c0 = scanSpec->childByName("c0");
ASSERT_TRUE(c0->flatMapFeatureSelection().empty());
ASSERT_FALSE(c0->childByName(ScanSpec::kMapKeysFieldName)->filter());
auto* values = c0->childByName(ScanSpec::kMapValuesFieldName);
ASSERT_EQ(
Expand Down
17 changes: 17 additions & 0 deletions velox/connectors/hive/tests/HiveConnectorUtilTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,4 +205,21 @@ TEST_F(HiveConnectorUtilTest, configureReaderOptions) {
hiveConfig->filePreloadThreshold());
}

TEST_F(HiveConnectorUtilTest, configureRowReaderOptions) {
auto split =
std::make_shared<hive::HiveConnectorSplit>("", "", FileFormat::UNKNOWN);
auto rowType = ROW({{"float_features", MAP(INTEGER(), REAL())}});
auto spec = std::make_shared<common::ScanSpec>("<root>");
spec->addAllChildFields(*rowType);
auto* float_features = spec->childByName("float_features");
float_features->childByName(common::ScanSpec::kMapKeysFieldName)
->setFilter(common::createBigintValues({1, 3}, false));
float_features->setFlatMapFeatureSelection({"1", "3"});
RowReaderOptions options;
configureRowReaderOptions(options, {}, spec, nullptr, rowType, split);
auto& nodes = options.getSelector()->getProjection();
ASSERT_EQ(nodes.size(), 1);
ASSERT_EQ(nodes[0].expression, "[1,3]");
}

} // namespace facebook::velox::connector
11 changes: 11 additions & 0 deletions velox/dwio/common/ScanSpec.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,14 @@ class ScanSpec {
// projected out.
void addAllChildFields(const Type&);

const std::vector<std::string>& flatMapFeatureSelection() const {
return flatMapFeatureSelection_;
}

void setFlatMapFeatureSelection(std::vector<std::string> features) {
flatMapFeatureSelection_ = std::move(features);
}

private:
void reorder();

Expand Down Expand Up @@ -400,6 +408,9 @@ class ScanSpec {
// Only take the first maxArrayElementsCount_ elements from each array.
vector_size_t maxArrayElementsCount_ =
std::numeric_limits<vector_size_t>::max();

// Used only for bulk reader to project flat map features.
std::vector<std::string> flatMapFeatureSelection_;
};

// Returns false if no value from a range defined by stats can pass the
Expand Down

0 comments on commit bd95801

Please sign in to comment.