Skip to content

Commit

Permalink
add ReplaceNodeScanWithIndexSeek optimization rule (TuGraph-family#801)
Browse files Browse the repository at this point in the history
add ReplaceNodeScanWithIndexSeek
  • Loading branch information
ljcui authored Dec 3, 2024
1 parent 64ec55a commit a4a3697
Show file tree
Hide file tree
Showing 12 changed files with 724 additions and 274 deletions.
2 changes: 1 addition & 1 deletion src/bolt/connection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ void BoltConnection::WriteResponseDone(const boost::system::error_code& ec) {

void BoltConnection::ReadChunkSizeDone(const boost::system::error_code& ec) {
if (ec) {
LOG_WARN() << FMA_FMT("ReadChunkSizeDone error: {}", ec.message());
// LOG_WARN() << FMA_FMT("ReadChunkSizeDone error: {}", ec.message());
Close();
return;
}
Expand Down
2 changes: 2 additions & 0 deletions src/cypher/execution_plan/ops/op_node_by_label_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ class NodeByLabelScan : public OpBase {

const std::string& GetLabel() { return label_; }

const SymbolTable * GetSymtab() {return sym_tab_;}

CYPHER_DEFINE_VISITABLE()

CYPHER_DEFINE_CONST_VISITABLE()
Expand Down
744 changes: 485 additions & 259 deletions src/cypher/execution_plan/optimization/optimization_filter_visitor_impl.h

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion src/cypher/execution_plan/optimization/pass_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "execution_plan/optimization/locate_node_by_indexed_prop_v2.h"
#include "execution_plan/optimization/locate_node_by_prop_range_filter.h"
#include "execution_plan/optimization/parallel_traversal_v2.h"
#include "execution_plan/optimization/rewrite_label_scan.h"

namespace cypher {

Expand All @@ -48,7 +49,8 @@ class PassManager {
all_passes_.emplace_back(new ParallelTraversal());
all_passes_.emplace_back(new ParallelTraversalV2());
all_passes_.emplace_back(new LocateNodeByVidV2());
all_passes_.emplace_back(new LocateNodeByIndexedPropV2());
// all_passes_.emplace_back(new LocateNodeByIndexedPropV2());
all_passes_.emplace_back(new ReplaceNodeScanWithIndexSeek(ctx));
all_passes_.emplace_back(new LocateNodeByPropRangeFilter());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class PropertyFilterDetector : public cypher::OptimizationFilterVisitorImpl {
public:
PropertyFilterDetector() : isValidDetector(false) {}

virtual ~PropertyFilterDetector() = default;
~PropertyFilterDetector() override = default;

bool Build(geax::frontend::AstNode* astNode) {
try {
Expand All @@ -49,26 +49,49 @@ class PropertyFilterDetector : public cypher::OptimizationFilterVisitorImpl {
std::string cur_symbol_;
std::string cur_field_;
std::set<lgraph::FieldData> cur_properties_;
bool has_and = false;
bool has_or = false;

std::any visit(geax::frontend::BAnd* node) override {
if (has_or) {
return geax::frontend::GEAXErrorCode::GEAX_COMMON_NOT_SUPPORT;
}
has_and = true;
ACCEPT_AND_CHECK_WITH_PASS_MSG(node->left());
ACCEPT_AND_CHECK_WITH_PASS_MSG(node->right());
return geax::frontend::GEAXErrorCode::GEAX_OPTIMIZATION_PASS;
}

std::any visit(geax::frontend::BOr* node) override {
if (has_and) {
return geax::frontend::GEAXErrorCode::GEAX_COMMON_NOT_SUPPORT;
}
has_or = true;
ACCEPT_AND_CHECK_WITH_PASS_MSG(node->left());
ACCEPT_AND_CHECK_WITH_PASS_MSG(node->right());
return geax::frontend::GEAXErrorCode::GEAX_OPTIMIZATION_PASS;
}

std::any visit(geax::frontend::GetField* node) override {
if (has_or && !cur_field_.empty() && cur_field_ != node->fieldName()) {
return geax::frontend::GEAXErrorCode::GEAX_COMMON_NOT_SUPPORT;
}
isValidDetector = true;
cur_field_ = node->fieldName();
ACCEPT_AND_CHECK_WITH_PASS_MSG(node->expr());
return geax::frontend::GEAXErrorCode::GEAX_OPTIMIZATION_PASS;
}

std::any visit(geax::frontend::Ref* node) override {
if (has_or && !cur_symbol_.empty() && cur_symbol_ != node->name()) {
return geax::frontend::GEAXErrorCode::GEAX_COMMON_NOT_SUPPORT;
}
cur_symbol_ = node->name();
return geax::frontend::GEAXErrorCode::GEAX_OPTIMIZATION_PASS;
}

std::any visit(geax::frontend::BEqual* node) override {
cur_properties_.clear();
ACCEPT_AND_CHECK_WITH_PASS_MSG(node->left());
ACCEPT_AND_CHECK_WITH_PASS_MSG(node->right());
if (!cur_properties_.empty()) {
Expand All @@ -87,6 +110,7 @@ class PropertyFilterDetector : public cypher::OptimizationFilterVisitorImpl {
}

std::any visit(geax::frontend::BIn* node) override {
cur_properties_.clear();
ACCEPT_AND_CHECK_WITH_PASS_MSG(node->left());
ACCEPT_AND_CHECK_WITH_PASS_MSG(node->right());
if (!cur_properties_.empty()) {
Expand Down
172 changes: 172 additions & 0 deletions src/cypher/execution_plan/optimization/rewrite_label_scan.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
/**
* Copyright 2022 AntGroup CO., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/

#pragma once

#include "tools/lgraph_log.h"
#include "core/data_type.h"
#include "cypher/execution_plan/ops/op_filter.h"
#include "cypher/execution_plan/ops/op_node_index_seek.h"
#include "cypher/execution_plan/ops/op_node_by_label_scan.h"
#include "cypher/execution_plan/optimization/opt_pass.h"
#include "cypher/execution_plan/optimization/property_filter_detector.h"

namespace cypher {

typedef std::unordered_map<
std::string, std::unordered_map<
std::string, std::set<lgraph::FieldData>>> FilterCollections;

class ReplaceNodeScanWithIndexSeek : public OptPass {
private:
RTContext *ctx_ = nullptr;
const lgraph::SchemaInfo *si_ = nullptr;

void Impl(OpBase *root) {
OpBase *op_filter = nullptr;
FilterCollections filters;
if (FindNodePropFilter(root, op_filter, filters)) {
Replace(op_filter, filters);
}
}

bool FindNodePropFilter(OpBase *root, OpBase *&op_filter, FilterCollections &filters) {
auto op = root;
if (op->type == OpType::FILTER) {
auto filter = dynamic_cast<OpFilter *>(op);
if (_CheckPropFilter(filter, filters)) {
op_filter = op;
return true;
}
}

for (auto child : op->children) {
if (FindNodePropFilter(child, op_filter, filters)) return true;
}

return false;
}

void Replace(OpBase *root, FilterCollections &filter_collections) {
if (root->type == OpType::NODE_BY_LABEL_SCAN) {
auto scan = dynamic_cast<NodeByLabelScan *>(root);
auto label = scan->GetLabel();
auto node = scan->GetNode();
auto n = node->Alias();
if (!filter_collections.count(n)) {
return;
}
auto& filters = filter_collections.at(n);
auto schema = si_->v_schema_manager.GetSchema(label);
if (!schema) {
return;
}
auto pk = schema->GetPrimaryField();
if (filters.count(pk)) {
std::vector<lgraph::FieldData> values;
for (auto& val : filters.at(pk)) {
values.push_back(val);
}
auto parent = root->parent;
auto op_node_index_seek = new NodeIndexSeek(node, scan->GetSymtab(), pk, values);
op_node_index_seek->parent = parent;
parent->RemoveChild(root);
OpBase::FreeStream(root);
parent->AddChild(op_node_index_seek);
return;
}
for (auto& [k, set] : filters) {
if (k == pk) {
continue;
}
if (!schema->TryGetFieldExtractor(k)->GetVertexIndex()) {
continue;
}
std::vector<lgraph::FieldData> values;
for (auto& val : set) {
values.push_back(val);
}
auto parent = root->parent;
auto op_node_index_seek = new NodeIndexSeek(node, scan->GetSymtab(), k, values);
op_node_index_seek->parent = parent;
parent->RemoveChild(root);
OpBase::FreeStream(root);
parent->AddChild(op_node_index_seek);
return;
}
return;
} else if (root->type == OpType::ALL_NODE_SCAN) {
auto scan = dynamic_cast<AllNodeScan *>(root);
auto node = scan->GetNode();
auto n = node->Alias();
if (!filter_collections.count(n)) {
return;
}
const auto& filters = filter_collections.at(n);
if (filters.size() != 1) {
return;
}
std::vector<lgraph::FieldData> values;
std::string field;
for (auto& [k, v] : filters) {
field = k;
for (auto &item : v) {
values.push_back(item);
}
break;
}
auto parent = root->parent;
auto op_node_index_seek = new NodeIndexSeek(node, scan->SymTab(), field, values);
op_node_index_seek->parent = parent;
parent->RemoveChild(root);
OpBase::FreeStream(root);
parent->AddChild(op_node_index_seek);
return;
}
for (auto child : root->children) {
Replace(child, filter_collections);
}
}

bool _CheckPropFilter(OpFilter *&op_filter, FilterCollections &filters) {
auto filter = op_filter->Filter();
CYPHER_THROW_ASSERT(filter->Type() == lgraph::Filter::GEAX_EXPR_FILTER);
auto geax_filter = ((lgraph::GeaxExprFilter *)filter.get())->GetArithExpr();
geax::frontend::Expr *expr = geax_filter.expr_;
PropertyFilterDetector detector;
if (!detector.Build(expr)) return false;
filters = detector.GetProperties();
if (filters.empty()) return false;
return true;
}

public:
explicit ReplaceNodeScanWithIndexSeek(RTContext *ctx)
: OptPass(typeid(ReplaceNodeScanWithIndexSeek).name()), ctx_(ctx) {}
bool Gate() override { return true; }
int Execute(OpBase *root) override {
if (ctx_->graph_.empty()) {
return 0;
}
ctx_->ac_db_ = std::make_unique<lgraph::AccessControlledDB>(
ctx_->galaxy_->OpenGraph(ctx_->user_, ctx_->graph_));
lgraph_api::GraphDB db(ctx_->ac_db_.get(), true);
auto txn = db.CreateReadTxn();
si_ = &txn.GetTxn()->GetSchemaInfo();
Impl(root);
txn.Abort();
return 0;
}
};
} // namespace cypher
9 changes: 9 additions & 0 deletions src/cypher/parser/cypher_base_visitor_v2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,15 @@ void CypherBaseVisitorV2::PropertyExtractor(geax::frontend::ElementFiller *fille
}
}
for (auto &label : labels) {
if (isVertex) {
if (!node_property_.count(label)) {
node_property_[label] = {};
}
} else {
if (!rel_property_.count(label)) {
rel_property_[label] = {};
}
}
for (auto field : fields) {
if (isVertex) {
node_property_[label].emplace(field);
Expand Down
4 changes: 1 addition & 3 deletions src/cypher/utils/geax_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,7 @@
if (!ast) NOT_SUPPORT(); \
auto res = std::any_cast<geax::frontend::GEAXErrorCode>(ast->accept(*this)); \
if (res != geax::frontend::GEAXErrorCode::GEAX_OPTIMIZATION_PASS) { \
auto error_msg = fma_common::StringFormatter::Format( \
"visit({}) failed", std::string(#ast)); \
throw lgraph::CypherException(error_msg); \
return res; \
} \
} while (0)
#endif
Expand Down
23 changes: 20 additions & 3 deletions test/cypher_plan_validate.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@
},
{
"query": "MATCH (n) where n.name in [\"Rachel Kempson\",\"Dennis Quaid\",\"none\"] RETURN n",
"plan": "ReadOnly:1\nExecution Plan:\nProduce Results\n Project [n]\n Node Index Seek [n] name IN [Dennis Quaid,Rachel Kempson,none,]\n",
"plan": "ReadOnly:1\nExecution Plan:\nProduce Results\n Project [n]\n Filter [n.name In {\"Rachel Kempson\", \"Dennis Quaid\", \"none\"}]\n Node Index Seek [n] name IN [Dennis Quaid,Rachel Kempson,none,]\n",
"res": 2
},
{
"query": "MATCH (n) where n.name =\"Rachel Kempson\" or n.name= \"Dennis Quaid\" or n.name=\"none\" RETURN n",
"plan": "ReadOnly:1\nExecution Plan:\nProduce Results\n Project [n]\n Node Index Seek [n] name IN [Dennis Quaid,Rachel Kempson,none,]\n",
"plan": "ReadOnly:1\nExecution Plan:\nProduce Results\n Project [n]\n Filter [(((n.name=\"Rachel Kempson\") or (n.name=\"Dennis Quaid\")) or (n.name=\"none\"))]\n Node Index Seek [n] name IN [Dennis Quaid,Rachel Kempson,none,]\n",
"res": 2
},
{
Expand All @@ -50,7 +50,7 @@
},
{
"query": "MATCH (n) where n.name = \"Rachel Kempson\" or n.name = \"Dennis Quaid\" or n.name = \"Vanessa Redgrave\" RETURN n",
"plan": "ReadOnly:1\nExecution Plan:\nProduce Results\n Project [n]\n Node Index Seek [n] name IN [Dennis Quaid,Rachel Kempson,Vanessa Redgrave,]\n",
"plan": "ReadOnly:1\nExecution Plan:\nProduce Results\n Project [n]\n Filter [(((n.name=\"Rachel Kempson\") or (n.name=\"Dennis Quaid\")) or (n.name=\"Vanessa Redgrave\"))]\n Node Index Seek [n] name IN [Dennis Quaid,Rachel Kempson,Vanessa Redgrave,]\n",
"res": 3
}
],
Expand Down Expand Up @@ -163,6 +163,23 @@
"plan": "ReadOnly:1\nExecution Plan:\nProduce Results\n Project [p]\n Expand(All) [m --> n ]\n Expand(All) [m1 <-- m ]\n Node By Label Scan [m1:City]\n",
"res": 9
}
],
"index": [
{
"query": "match (n:Person)-[r:BORN_IN]->(m:City) where n.name in ['Vanessa Redgrave'] return m",
"plan": "ReadOnly:1\nExecution Plan:\nProduce Results\n Project [m]\n Filter [n.name In {\"Vanessa Redgrave\"}]\n Expand(All) [n --> m ]\n Node Index Seek [n] name IN [Vanessa Redgrave,]\n",
"res": 1
},
{
"query": "match (n:Person)-[r:BORN_IN]->(m:City) where n.name in ['Vanessa Redgrave'] and n.birthyear in [1937] return m",
"plan": "ReadOnly:1\nExecution Plan:\nProduce Results\n Project [m]\n Filter [(n.name In {\"Vanessa Redgrave\"} and n.birthyear In {1937})]\n Expand(All) [n --> m ]\n Node Index Seek [n] name IN [Vanessa Redgrave,]\n",
"res": 1
},
{
"query": "match (n:Person)-[r:BORN_IN]->(m:City) where n.birthyear in [1937] return m",
"plan": "ReadOnly:1\nExecution Plan:\nProduce Results\n Project [m]\n Filter [n.birthyear In {1937}]\n Expand(All) [n --> m ]\n Node Index Seek [n] birthyear IN [1937,]\n",
"res": 1
}
]
}
}
Expand Down
6 changes: 3 additions & 3 deletions test/graph_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -345,9 +345,9 @@ class GraphFactory {
fma_common::OutputFmaStream stream;
for (auto& kv : data) {
const std::string& file_name = kv.first;
const std::string& data = kv.second;
const std::string& content = kv.second;
stream.Open(file_name);
stream.Write(data.data(), data.size());
stream.Write(content.data(), content.size());
stream.Close();
UT_LOG() << file_name << " created";
}
Expand All @@ -370,7 +370,7 @@ R"(
"primary" : "name",
"properties" : [
{"name" : "name", "type":"STRING"},
{"name" : "birthyear", "type":"INT16", "optional":true}
{"name" : "birthyear", "type":"INT16", "optional":true, "index":true, "unique":false}
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions test/resource/unit_test/query/cypher/query.result
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ MATCH (n:Person)-[b:BORN_IN]->(m) WHERE (b.weight + b.weight) < 38.4 OR b.weight
MATCH (a)-[e]->(b) WHERE a.name='Liam Neeson' and b.title<>'' and (e.charactername='Henri Ducard' or e.relation = '') RETURN a,e,b;
[{"a":{"identity":4,"label":"Person","properties":{"birthyear":1952,"name":"Liam Neeson"}},"b":{"identity":17,"label":"Film","properties":{"title":"Batman Begins"}},"e":{"dst":17,"forward":false,"identity":0,"label":"ACTED_IN","label_id":5,"properties":{"charactername":"Henri Ducard"},"src":4,"temporal_id":0}}]
MATCH (a) WHERE a.name IN ['Dennis Quaid', 'Christopher Nolan'] WITH a MATCH (b) WHERE b.name IN ['London'] RETURN a, b;
[{"a":{"identity":12,"label":"Person","properties":{"birthyear":1970,"name":"Christopher Nolan"}},"b":{"identity":14,"label":"City","properties":{"name":"London"}}},{"a":{"identity":7,"label":"Person","properties":{"birthyear":1954,"name":"Dennis Quaid"}},"b":{"identity":14,"label":"City","properties":{"name":"London"}}}]
[{"a":{"identity":7,"label":"Person","properties":{"birthyear":1954,"name":"Dennis Quaid"}},"b":{"identity":14,"label":"City","properties":{"name":"London"}}},{"a":{"identity":12,"label":"Person","properties":{"birthyear":1970,"name":"Christopher Nolan"}},"b":{"identity":14,"label":"City","properties":{"name":"London"}}}]
MATCH (a) WHERE a.name IN ['Dennis Quaid', 'Christopher Nolan'] WITH a MATCH (b) WHERE b.name IN ['London', 'Beijing', 'Houston'] RETURN a, b;
[{"a":{"identity":12,"label":"Person","properties":{"birthyear":1970,"name":"Christopher Nolan"}},"b":{"identity":14,"label":"City","properties":{"name":"London"}}},{"a":{"identity":12,"label":"Person","properties":{"birthyear":1970,"name":"Christopher Nolan"}},"b":{"identity":15,"label":"City","properties":{"name":"Houston"}}},{"a":{"identity":7,"label":"Person","properties":{"birthyear":1954,"name":"Dennis Quaid"}},"b":{"identity":14,"label":"City","properties":{"name":"London"}}},{"a":{"identity":7,"label":"Person","properties":{"birthyear":1954,"name":"Dennis Quaid"}},"b":{"identity":15,"label":"City","properties":{"name":"Houston"}}}]
[{"a":{"identity":7,"label":"Person","properties":{"birthyear":1954,"name":"Dennis Quaid"}},"b":{"identity":14,"label":"City","properties":{"name":"London"}}},{"a":{"identity":7,"label":"Person","properties":{"birthyear":1954,"name":"Dennis Quaid"}},"b":{"identity":15,"label":"City","properties":{"name":"Houston"}}},{"a":{"identity":12,"label":"Person","properties":{"birthyear":1970,"name":"Christopher Nolan"}},"b":{"identity":14,"label":"City","properties":{"name":"London"}}},{"a":{"identity":12,"label":"Person","properties":{"birthyear":1970,"name":"Christopher Nolan"}},"b":{"identity":15,"label":"City","properties":{"name":"Houston"}}}]
MATCH (n:Person) WHERE n.name = 'Vanessa Redgrave' OR NOT n.name <> 'Dennis Quaid' RETURN n.name;
[{"n.name":"Dennis Quaid"},{"n.name":"Vanessa Redgrave"}]
MATCH (n:Person {name:'Vanessa Redgrave'})-[:BORN_IN|ACTED_IN]->(m) RETURN m;
Expand Down
Loading

0 comments on commit a4a3697

Please sign in to comment.