diff --git a/src/core/functions/table/match.cpp b/src/core/functions/table/match.cpp index fcbc0e8..627b6d6 100644 --- a/src/core/functions/table/match.cpp +++ b/src/core/functions/table/match.cpp @@ -1,6 +1,8 @@ #include #include "duckpgq/core/functions/table/match.hpp" +#include "duckdb/common/string_util.hpp" +#include "duckdb/common/case_insensitive_map.hpp" #include "duckdb/parser/tableref/matchref.hpp" #include "duckdb/parser/tableref/subqueryref.hpp" #include "duckdb/parser/tableref/joinref.hpp" @@ -33,6 +35,55 @@ namespace duckpgq { namespace core { +namespace { + +// Get fully-qualified column names for the property graph [tbl], and insert +// into set [col_names]. +void PopulateFullyQualifiedColName( + const vector> &tbls, + const case_insensitive_map_t> &tbl_name_to_aliases, + case_insensitive_set_t &col_names) { + for (const auto &cur_tbl : tbls) { + for (const auto &cur_col : cur_tbl->column_names) { + // It's legal to query by `` instead of `.`. + col_names.insert(cur_col); + + const string &tbl_name = cur_tbl->table_name; + auto iter = tbl_name_to_aliases.find(tbl_name); + // Prefer to use table alias specified in the statement, otherwise use + // table name. + if (iter == tbl_name_to_aliases.end()) { + col_names.insert(StringUtil::Format("%s.%s", tbl_name, cur_col)); + } else { + const auto &all_aliases = iter->second; + for (const auto &cur_alias : all_aliases) { + col_names.insert(StringUtil::Format("%s.%s", cur_alias, cur_col)); + } + } + } + } +} + +// Get fully-qualified column names from property graph. +case_insensitive_set_t GetFullyQualifiedColFromPg( + const CreatePropertyGraphInfo &pg, + const case_insensitive_map_t> &alias_map) { + case_insensitive_map_t> relation_name_to_aliases; + for (const auto &entry : alias_map) { + relation_name_to_aliases[entry.second->table_name].emplace_back( + entry.first); + } + + case_insensitive_set_t col_names; + PopulateFullyQualifiedColName(pg.vertex_tables, relation_name_to_aliases, + col_names); + PopulateFullyQualifiedColName(pg.edge_tables, relation_name_to_aliases, + col_names); + return col_names; +} + +} // namespace + shared_ptr PGQMatchFunction::FindGraphTable(const string &label, CreatePropertyGraphInfo &pg_table) { @@ -145,6 +196,18 @@ PathElement *PGQMatchFunction::GetPathElement( throw InternalException("Unknown path reference type detected"); } +SubPath * +PGQMatchFunction::GetSubPath(const unique_ptr &path_reference) { + if (path_reference->path_reference_type == + PGQPathReferenceType::PATH_ELEMENT) { + return nullptr; + } + if (path_reference->path_reference_type == PGQPathReferenceType::SUBPATH) { + return reinterpret_cast(path_reference.get()); + } + throw InternalException("Unknown path reference type detected"); +} + unique_ptr PGQMatchFunction::CreateCountCTESubquery() { //! BEGIN OF (SELECT count(cte1.temp) as temp * 0 from cte1) __x @@ -907,15 +970,88 @@ void PGQMatchFunction::ProcessPathList( } } +void PGQMatchFunction::PopulateGraphTableAliasMap( + const CreatePropertyGraphInfo &pg_table, + const unique_ptr &path_reference, + case_insensitive_map_t> + &alias_to_vertex_and_edge_tables) { + PathElement *path_elem = GetPathElement(path_reference); + + // Populate binding from PathElement. + if (path_elem != nullptr) { + auto iter = pg_table.label_map.find(path_elem->label); + if (iter == pg_table.label_map.end()) { + throw BinderException( + "The label %s is not registered in property graph %s", + path_elem->label, pg_table.property_graph_name); + } + alias_to_vertex_and_edge_tables[path_elem->variable_binding] = iter->second; + return; + } + + // Recursively populate binding from SubPath. + SubPath *sub_path = GetSubPath(path_reference); + D_ASSERT(sub_path != nullptr); + const auto &path_list = sub_path->path_list; + for (const auto &cur_path : path_list) { + PopulateGraphTableAliasMap(pg_table, cur_path, + alias_to_vertex_and_edge_tables); + } +} + +void PGQMatchFunction::CheckColumnBinding( + const CreatePropertyGraphInfo &pg_table, const MatchExpression &ref) { + // Maps from table alias to table, including vertex and edge tables. + case_insensitive_map_t> + alias_to_vertex_and_edge_tables; + for (idx_t idx_i = 0; idx_i < ref.path_patterns.size(); idx_i++) { + const auto &path_list = ref.path_patterns[idx_i]->path_elements; + for (const auto &cur_path : path_list) { + PopulateGraphTableAliasMap(pg_table, cur_path, + alias_to_vertex_and_edge_tables); + } + } + + // All fully-qualified column names for vertex tables and edge tables. + const auto all_fq_col_names = + GetFullyQualifiedColFromPg(pg_table, alias_to_vertex_and_edge_tables); + + for (auto &expression : ref.column_list) { + // TODO(hjiang): `ColumnRefExpression` alone is not enough, we could have + // more complicated expression. + // + // See issue for reference: + // https://github.com/cwida/duckpgq-extension/issues/198 + auto *column_ref = dynamic_cast(expression.get()); + if (column_ref == nullptr) { + continue; + } + // 'shortest_path_cte' is a special table populated by pgq. + if (column_ref->column_names[0] == "shortest_path_cte") { + continue; + } + // 'rowid' is a column duckdb binds automatically. + if (column_ref->column_names.back() == "rowid") { + continue; + } + const auto cur_fq_col_name = + StringUtil::Join(column_ref->column_names, /*separator=*/"."); + if (all_fq_col_names.find(cur_fq_col_name) == all_fq_col_names.end()) { + throw BinderException("Property %s is never registered!", + cur_fq_col_name); + } + } +} + unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, TableFunctionBindInput &bind_input) { auto duckpgq_state = GetDuckPGQState(context); auto match_index = bind_input.inputs[0].GetValue(); - auto ref = dynamic_cast( + auto *ref = dynamic_cast( duckpgq_state->transform_expression[match_index].get()); - auto pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); + auto *pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); vector> conditions; @@ -948,11 +1084,16 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context, if (ref->where_clause) { conditions.push_back(std::move(ref->where_clause)); } + + CheckColumnBinding(*pg_table, *ref); + std::vector> final_column_list; for (auto &expression : ref->column_list) { unordered_set named_subpaths; - auto column_ref = dynamic_cast(expression.get()); + + // Handle ColumnRefExpression. + auto *column_ref = dynamic_cast(expression.get()); if (column_ref != nullptr) { if (named_subpaths.count(column_ref->column_names[0]) && column_ref->column_names.size() == 1) { @@ -963,7 +1104,9 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context, } continue; } - auto function_ref = dynamic_cast(expression.get()); + + // Handle FunctionExpression. + auto *function_ref = dynamic_cast(expression.get()); if (function_ref != nullptr) { if (function_ref->function_name == "path_length") { column_ref = dynamic_cast( @@ -995,6 +1138,9 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context, continue; } + // TODO(hjiang): For star expression, only select columns in vertex or edge + // table, but not those unspecified in property graph. + // Issue reference: https://github.com/cwida/duckpgq-extension/issues/192 final_column_list.push_back(std::move(expression)); } diff --git a/src/include/duckpgq/core/functions/table/match.hpp b/src/include/duckpgq/core/functions/table/match.hpp index 0eda8f8..9d83b5c 100644 --- a/src/include/duckpgq/core/functions/table/match.hpp +++ b/src/include/duckpgq/core/functions/table/match.hpp @@ -53,9 +53,20 @@ struct PGQMatchFunction : public TableFunction { vector vertex_keys, vector edge_keys, const string &vertex_alias, const string &edge_alias); + // Populate all vertex and edge tables and their alias into + // [alias_to_vertex_and_edge_tables], from paths information from + // [path_reference]. + static void PopulateGraphTableAliasMap( + const CreatePropertyGraphInfo &pg_table, + const unique_ptr &path_reference, + case_insensitive_map_t> + &alias_to_vertex_and_edge_tables); + static PathElement * GetPathElement(const unique_ptr &path_reference); + static SubPath *GetSubPath(const unique_ptr &path_reference); + static unique_ptr GetJoinRef(const shared_ptr &edge_table, const string &edge_binding, const string &prev_binding, @@ -157,6 +168,11 @@ struct PGQMatchFunction : public TableFunction { CreatePropertyGraphInfo &pg_table, unique_ptr &final_select_node, vector> &conditions); + + // Check whether columns to query are valid against the property graph, throws + // BinderException if error. + static void CheckColumnBinding(const CreatePropertyGraphInfo &pg_table, + const MatchExpression &ref); }; } // namespace core diff --git a/test/sql/create_pg/attach_pg.test b/test/sql/create_pg/attach_pg.test index f1ed490..d4d4ef2 100644 --- a/test/sql/create_pg/attach_pg.test +++ b/test/sql/create_pg/attach_pg.test @@ -151,4 +151,22 @@ from pagerank(bluesky, bluesky.account, follows) limit 10; ---- Invalid Error: Label 'bluesky.account' not found. Did you mean the vertex label 'account'? +statement ok +-CREATE PROPERTY GRAPH pg VERTEX TABLES (bluesky.account PROPERTIES (displayName)); +# Query on unspecified columns in the graph throws error. +statement error +-SELECT * FROM GRAPH_TABLE (pg MATCH (acc:account) COLUMNS (acc.displayName, acc.handle)); +---- +Binder Error: Property acc.handle is never registered! + +# Columns to query is only allowed to be or
., which we cannot prefix catalog or schema. +statement error +-SELECT * FROM GRAPH_TABLE (pg MATCH (acc:account) COLUMNS (bluesky.main.acc.displayName)); +---- +Binder Error: Property bluesky.main.acc.displayName is never registered! + +statement error +-SELECT * FROM GRAPH_TABLE (pg MATCH (acc:account) COLUMNS (main.acc.displayName)); +---- +Binder Error: Property main.acc.displayName is never registered! diff --git a/test/sql/create_pg/no_properties.test b/test/sql/create_pg/no_properties.test index 8d715aa..28b531c 100644 --- a/test/sql/create_pg/no_properties.test +++ b/test/sql/create_pg/no_properties.test @@ -35,3 +35,18 @@ EDGE TABLES ( DESTINATION KEY ( dst ) REFERENCES Student ( id ) LABEL Knows ) + +statement ok +-CREATE PROPERTY GRAPH g VERTEX TABLES (Student PROPERTIES (id)); + +# Query on unspecified columns in the graph throws error. +statement error +-SELECT * FROM GRAPH_TABLE (g MATCH (s:Student) COLUMNS (s.id, s.name)); +---- +Binder Error: Property s.name is never registered! + +# Columns to query is only allowed to be or
., which we cannot prefix catalog or schema. +statement error +-SELECT * FROM GRAPH_TABLE (g MATCH (s:Student) COLUMNS (main.s.id)); +---- +Binder Error: Property main.s.id is never registered! diff --git a/test/sql/path_finding/complex_matching.test b/test/sql/path_finding/complex_matching.test index 4f7b30b..0136a09 100644 --- a/test/sql/path_finding/complex_matching.test +++ b/test/sql/path_finding/complex_matching.test @@ -92,7 +92,7 @@ statement error ) tmp limit 10; ---- -Binder Error: Referenced column "o" not found in FROM clause! +Binder Error: Property o is never registered! # https://github.com/cwida/duckpgq-extension/issues/68 diff --git a/test/sql/path_finding/shortest_path.test b/test/sql/path_finding/shortest_path.test index c265e2a..fc7304d 100644 --- a/test/sql/path_finding/shortest_path.test +++ b/test/sql/path_finding/shortest_path.test @@ -90,7 +90,7 @@ statement error COLUMNS (p, a.name as name, b.name as b_name) ) study; ---- -Binder Error: Referenced column "p" not found in FROM clause! +Binder Error: Property p is never registered query III