Skip to content

Commit

Permalink
Merge pull request #191 from dentiny/hjiang/fix-undefined-property
Browse files Browse the repository at this point in the history
Fix query on unspecified property
  • Loading branch information
Dtenwolde authored Jan 25, 2025
2 parents 4697869 + 2b10233 commit 0895438
Show file tree
Hide file tree
Showing 6 changed files with 201 additions and 6 deletions.
154 changes: 150 additions & 4 deletions src/core/functions/table/match.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include <duckpgq_extension.hpp>
#include "duckpgq/core/functions/table/match.hpp"

#include "duckdb/common/string_util.hpp"
#include "duckdb/common/case_insensitive_map.hpp"
#include "duckdb/parser/tableref/matchref.hpp"
#include "duckdb/parser/tableref/subqueryref.hpp"
#include "duckdb/parser/tableref/joinref.hpp"
Expand Down Expand Up @@ -33,6 +35,55 @@ namespace duckpgq {

namespace core {

namespace {

// Get fully-qualified column names for the property graph [tbl], and insert
// into set [col_names].
void PopulateFullyQualifiedColName(
const vector<shared_ptr<PropertyGraphTable>> &tbls,
const case_insensitive_map_t<vector<string>> &tbl_name_to_aliases,
case_insensitive_set_t &col_names) {
for (const auto &cur_tbl : tbls) {
for (const auto &cur_col : cur_tbl->column_names) {
// It's legal to query by `<col>` instead of `<table>.<col>`.
col_names.insert(cur_col);

const string &tbl_name = cur_tbl->table_name;
auto iter = tbl_name_to_aliases.find(tbl_name);
// Prefer to use table alias specified in the statement, otherwise use
// table name.
if (iter == tbl_name_to_aliases.end()) {
col_names.insert(StringUtil::Format("%s.%s", tbl_name, cur_col));
} else {
const auto &all_aliases = iter->second;
for (const auto &cur_alias : all_aliases) {
col_names.insert(StringUtil::Format("%s.%s", cur_alias, cur_col));
}
}
}
}
}

// Get fully-qualified column names from property graph.
case_insensitive_set_t GetFullyQualifiedColFromPg(
const CreatePropertyGraphInfo &pg,
const case_insensitive_map_t<shared_ptr<PropertyGraphTable>> &alias_map) {
case_insensitive_map_t<vector<string>> relation_name_to_aliases;
for (const auto &entry : alias_map) {
relation_name_to_aliases[entry.second->table_name].emplace_back(
entry.first);
}

case_insensitive_set_t col_names;
PopulateFullyQualifiedColName(pg.vertex_tables, relation_name_to_aliases,
col_names);
PopulateFullyQualifiedColName(pg.edge_tables, relation_name_to_aliases,
col_names);
return col_names;
}

} // namespace

shared_ptr<PropertyGraphTable>
PGQMatchFunction::FindGraphTable(const string &label,
CreatePropertyGraphInfo &pg_table) {
Expand Down Expand Up @@ -145,6 +196,18 @@ PathElement *PGQMatchFunction::GetPathElement(
throw InternalException("Unknown path reference type detected");
}

SubPath *
PGQMatchFunction::GetSubPath(const unique_ptr<PathReference> &path_reference) {
if (path_reference->path_reference_type ==
PGQPathReferenceType::PATH_ELEMENT) {
return nullptr;
}
if (path_reference->path_reference_type == PGQPathReferenceType::SUBPATH) {
return reinterpret_cast<SubPath *>(path_reference.get());
}
throw InternalException("Unknown path reference type detected");
}

unique_ptr<SubqueryRef> PGQMatchFunction::CreateCountCTESubquery() {
//! BEGIN OF (SELECT count(cte1.temp) as temp * 0 from cte1) __x

Expand Down Expand Up @@ -907,15 +970,88 @@ void PGQMatchFunction::ProcessPathList(
}
}

void PGQMatchFunction::PopulateGraphTableAliasMap(
const CreatePropertyGraphInfo &pg_table,
const unique_ptr<PathReference> &path_reference,
case_insensitive_map_t<shared_ptr<PropertyGraphTable>>
&alias_to_vertex_and_edge_tables) {
PathElement *path_elem = GetPathElement(path_reference);

// Populate binding from PathElement.
if (path_elem != nullptr) {
auto iter = pg_table.label_map.find(path_elem->label);
if (iter == pg_table.label_map.end()) {
throw BinderException(
"The label %s is not registered in property graph %s",
path_elem->label, pg_table.property_graph_name);
}
alias_to_vertex_and_edge_tables[path_elem->variable_binding] = iter->second;
return;
}

// Recursively populate binding from SubPath.
SubPath *sub_path = GetSubPath(path_reference);
D_ASSERT(sub_path != nullptr);
const auto &path_list = sub_path->path_list;
for (const auto &cur_path : path_list) {
PopulateGraphTableAliasMap(pg_table, cur_path,
alias_to_vertex_and_edge_tables);
}
}

void PGQMatchFunction::CheckColumnBinding(
const CreatePropertyGraphInfo &pg_table, const MatchExpression &ref) {
// Maps from table alias to table, including vertex and edge tables.
case_insensitive_map_t<shared_ptr<PropertyGraphTable>>
alias_to_vertex_and_edge_tables;
for (idx_t idx_i = 0; idx_i < ref.path_patterns.size(); idx_i++) {
const auto &path_list = ref.path_patterns[idx_i]->path_elements;
for (const auto &cur_path : path_list) {
PopulateGraphTableAliasMap(pg_table, cur_path,
alias_to_vertex_and_edge_tables);
}
}

// All fully-qualified column names for vertex tables and edge tables.
const auto all_fq_col_names =
GetFullyQualifiedColFromPg(pg_table, alias_to_vertex_and_edge_tables);

for (auto &expression : ref.column_list) {
// TODO(hjiang): `ColumnRefExpression` alone is not enough, we could have
// more complicated expression.
//
// See issue for reference:
// https://github.com/cwida/duckpgq-extension/issues/198
auto *column_ref = dynamic_cast<ColumnRefExpression *>(expression.get());
if (column_ref == nullptr) {
continue;
}
// 'shortest_path_cte' is a special table populated by pgq.
if (column_ref->column_names[0] == "shortest_path_cte") {
continue;
}
// 'rowid' is a column duckdb binds automatically.
if (column_ref->column_names.back() == "rowid") {
continue;
}
const auto cur_fq_col_name =
StringUtil::Join(column_ref->column_names, /*separator=*/".");
if (all_fq_col_names.find(cur_fq_col_name) == all_fq_col_names.end()) {
throw BinderException("Property %s is never registered!",
cur_fq_col_name);
}
}
}

unique_ptr<TableRef>
PGQMatchFunction::MatchBindReplace(ClientContext &context,
TableFunctionBindInput &bind_input) {
auto duckpgq_state = GetDuckPGQState(context);

auto match_index = bind_input.inputs[0].GetValue<int32_t>();
auto ref = dynamic_cast<MatchExpression *>(
auto *ref = dynamic_cast<MatchExpression *>(
duckpgq_state->transform_expression[match_index].get());
auto pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name);
auto *pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name);

vector<unique_ptr<ParsedExpression>> conditions;

Expand Down Expand Up @@ -948,11 +1084,16 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context,
if (ref->where_clause) {
conditions.push_back(std::move(ref->where_clause));
}

CheckColumnBinding(*pg_table, *ref);

std::vector<unique_ptr<ParsedExpression>> final_column_list;

for (auto &expression : ref->column_list) {
unordered_set<string> named_subpaths;
auto column_ref = dynamic_cast<ColumnRefExpression *>(expression.get());

// Handle ColumnRefExpression.
auto *column_ref = dynamic_cast<ColumnRefExpression *>(expression.get());
if (column_ref != nullptr) {
if (named_subpaths.count(column_ref->column_names[0]) &&
column_ref->column_names.size() == 1) {
Expand All @@ -963,7 +1104,9 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context,
}
continue;
}
auto function_ref = dynamic_cast<FunctionExpression *>(expression.get());

// Handle FunctionExpression.
auto *function_ref = dynamic_cast<FunctionExpression *>(expression.get());
if (function_ref != nullptr) {
if (function_ref->function_name == "path_length") {
column_ref = dynamic_cast<ColumnRefExpression *>(
Expand Down Expand Up @@ -995,6 +1138,9 @@ PGQMatchFunction::MatchBindReplace(ClientContext &context,
continue;
}

// TODO(hjiang): For star expression, only select columns in vertex or edge
// table, but not those unspecified in property graph.
// Issue reference: https://github.com/cwida/duckpgq-extension/issues/192
final_column_list.push_back(std::move(expression));
}

Expand Down
16 changes: 16 additions & 0 deletions src/include/duckpgq/core/functions/table/match.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,20 @@ struct PGQMatchFunction : public TableFunction {
vector<string> vertex_keys, vector<string> edge_keys,
const string &vertex_alias, const string &edge_alias);

// Populate all vertex and edge tables and their alias into
// [alias_to_vertex_and_edge_tables], from paths information from
// [path_reference].
static void PopulateGraphTableAliasMap(
const CreatePropertyGraphInfo &pg_table,
const unique_ptr<PathReference> &path_reference,
case_insensitive_map_t<shared_ptr<PropertyGraphTable>>
&alias_to_vertex_and_edge_tables);

static PathElement *
GetPathElement(const unique_ptr<PathReference> &path_reference);

static SubPath *GetSubPath(const unique_ptr<PathReference> &path_reference);

static unique_ptr<JoinRef>
GetJoinRef(const shared_ptr<PropertyGraphTable> &edge_table,
const string &edge_binding, const string &prev_binding,
Expand Down Expand Up @@ -157,6 +168,11 @@ struct PGQMatchFunction : public TableFunction {
CreatePropertyGraphInfo &pg_table,
unique_ptr<SelectNode> &final_select_node,
vector<unique_ptr<ParsedExpression>> &conditions);

// Check whether columns to query are valid against the property graph, throws
// BinderException if error.
static void CheckColumnBinding(const CreatePropertyGraphInfo &pg_table,
const MatchExpression &ref);
};

} // namespace core
Expand Down
18 changes: 18 additions & 0 deletions test/sql/create_pg/attach_pg.test
Original file line number Diff line number Diff line change
Expand Up @@ -151,4 +151,22 @@ from pagerank(bluesky, bluesky.account, follows) limit 10;
----
Invalid Error: Label 'bluesky.account' not found. Did you mean the vertex label 'account'?

statement ok
-CREATE PROPERTY GRAPH pg VERTEX TABLES (bluesky.account PROPERTIES (displayName));

# Query on unspecified columns in the graph throws error.
statement error
-SELECT * FROM GRAPH_TABLE (pg MATCH (acc:account) COLUMNS (acc.displayName, acc.handle));
----
Binder Error: Property acc.handle is never registered!

# Columns to query is only allowed to be <column> or <table>.<column>, which we cannot prefix catalog or schema.
statement error
-SELECT * FROM GRAPH_TABLE (pg MATCH (acc:account) COLUMNS (bluesky.main.acc.displayName));
----
Binder Error: Property bluesky.main.acc.displayName is never registered!

statement error
-SELECT * FROM GRAPH_TABLE (pg MATCH (acc:account) COLUMNS (main.acc.displayName));
----
Binder Error: Property main.acc.displayName is never registered!
15 changes: 15 additions & 0 deletions test/sql/create_pg/no_properties.test
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,18 @@ EDGE TABLES (
DESTINATION KEY ( dst ) REFERENCES Student ( id )
LABEL Knows
)

statement ok
-CREATE PROPERTY GRAPH g VERTEX TABLES (Student PROPERTIES (id));

# Query on unspecified columns in the graph throws error.
statement error
-SELECT * FROM GRAPH_TABLE (g MATCH (s:Student) COLUMNS (s.id, s.name));
----
Binder Error: Property s.name is never registered!

# Columns to query is only allowed to be <column> or <table>.<column>, which we cannot prefix catalog or schema.
statement error
-SELECT * FROM GRAPH_TABLE (g MATCH (s:Student) COLUMNS (main.s.id));
----
Binder Error: Property main.s.id is never registered!
2 changes: 1 addition & 1 deletion test/sql/path_finding/complex_matching.test
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ statement error
) tmp
limit 10;
----
Binder Error: Referenced column "o" not found in FROM clause!
Binder Error: Property o is never registered!


# https://github.com/cwida/duckpgq-extension/issues/68
Expand Down
2 changes: 1 addition & 1 deletion test/sql/path_finding/shortest_path.test
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ statement error
COLUMNS (p, a.name as name, b.name as b_name)
) study;
----
Binder Error: Referenced column "p" not found in FROM clause!
Binder Error: Property p is never registered


query III
Expand Down

0 comments on commit 0895438

Please sign in to comment.