From cfe06492d6f695623c1f1bf0c8935224d74b4887 Mon Sep 17 00:00:00 2001 From: rliget Date: Thu, 8 Feb 2024 14:23:59 +0100 Subject: [PATCH 01/39] initial fix of issue 67, need to test further --- .../functions/scalar/iterativelength.cpp | 34 ++- scripts/kuzu_shortest_path.py | 37 ++++ .../sql/path-finding/shortest_path_bound.test | 199 ++++++++++++++++++ 3 files changed, 267 insertions(+), 3 deletions(-) create mode 100644 scripts/kuzu_shortest_path.py create mode 100644 test/sql/path-finding/shortest_path_bound.test diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp index 61746d41..533d9e2d 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp @@ -74,6 +74,16 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, auto src_data = (int64_t *)vdata_src.data; auto dst_data = (int64_t *)vdata_dst.data; + // get lowerbound and upperbound + auto &lower_bound = args.data[4]; + auto &upper_bound = args.data[5]; + UnifiedVectorFormat vdata_lower_bound; + UnifiedVectorFormat vdata_upper_bound; + lower_bound.ToUnifiedFormat(args.size(), vdata_lower_bound); + upper_bound.ToUnifiedFormat(args.size(), vdata_upper_bound); + auto lower_bound_data = (int64_t *)vdata_lower_bound.data; + auto upper_bound_data = (int64_t *)vdata_upper_bound.data; + ValidityMask &result_validity = FlatVector::Validity(result); // create result vector @@ -115,6 +125,7 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, result_data[search_num] = (uint64_t)0; // path of length 0 does not require a search } else { + seen[src_data[src_pos]][lane] = true; visit1[src_data[src_pos]][lane] = true; lane_to_num[lane] = search_num; // active lane active++; @@ -134,9 +145,25 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, int64_t search_num = lane_to_num[lane]; if (search_num >= 0) { // active lane int64_t dst_pos = vdata_dst.sel->get_index(search_num); - if (seen[dst_data[dst_pos]][lane]) { - result_data[search_num] = - iter; /* found at iter => iter = path length */ + if (seen[dst_data[dst_pos]][lane]){ + + // check if the path length is within bounds + // bound vector is either a constant or a flat vector + if (lower_bound.GetVectorType() == VectorType::CONSTANT_VECTOR ? + iter < lower_bound_data[0] : iter < lower_bound_data[dst_pos]) { + // when reach the destination too early, treat destination as null + // looks like the graph does not have that vertex + seen[dst_data[dst_pos]][lane] = false; + (iter & 1) ? visit2[dst_data[dst_pos]][lane] = false + : visit1[dst_data[dst_pos]][lane] = false; + continue; + } else if (upper_bound.GetVectorType() == VectorType::CONSTANT_VECTOR ? + iter > upper_bound_data[0] : iter > upper_bound_data[dst_pos]) { + result_data[search_num] = (int64_t)-1; /* no path */ + } else { + result_data[search_num] = + iter; /* found at iter => iter = path length */ + } lane_to_num[lane] = -1; // mark inactive active--; } @@ -160,6 +187,7 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, CreateScalarFunctionInfo DuckPGQFunctions::GetIterativeLengthFunction() { auto fun = ScalarFunction("iterativelength", {LogicalType::INTEGER, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::BIGINT, IterativeLengthFunction, IterativeLengthFunctionData::IterativeLengthBind); diff --git a/scripts/kuzu_shortest_path.py b/scripts/kuzu_shortest_path.py new file mode 100644 index 00000000..7cf2bb89 --- /dev/null +++ b/scripts/kuzu_shortest_path.py @@ -0,0 +1,37 @@ +import kuzu +import pandas as pd + +db = kuzu.Database('./test') +conn = kuzu.Connection(db) + +# Drop the table if it exists: +try: + conn.execute("DROP TABLE knows") +except: + pass +try: + conn.execute("DROP TABLE Person") +except: + pass + +# Define the schema: +conn.execute("CREATE NODE TABLE Person (creationDate TIMESTAMP, id INT64, firstName STRING, lastName STRING, gender STRING, birthday DATE, locationIP STRING, browserUsed STRING, LocationCityId INT64, speaks STRING, email STRING, PRIMARY KEY (id))") +conn.execute("CREATE REL TABLE knows (FROM Person TO Person)") + +# Load the data: +conn.execute("Copy Person FROM './test/person.csv'") +conn.execute("Copy knows FROM './test/person_knows_person.csv'") + +# Calculate the shortest path between two people with bounded distance: +MIN_DISTANCE = 0 +MAX_DISTANCE = 30 +results = pd.DataFrame() +for low in range(MIN_DISTANCE, MAX_DISTANCE + 1): + for high in range(low, MAX_DISTANCE + 1): + result = conn.execute("MATCH (a:Person)-[e:knows*%d..%d]->(b:Person) RETURN a.id, b.id, length(e) AS distance ORDER BY distance ASC" % (low, high)).get_as_df() + result = result.drop_duplicates(subset=['a.id', 'b.id'], keep='first') + result['min_distance'] = low + result['max_distance'] = high + results = pd.concat([results, result], ignore_index=True) + +results.to_csv('./test/shortest_length_kuzu.csv', index=False) \ No newline at end of file diff --git a/test/sql/path-finding/shortest_path_bound.test b/test/sql/path-finding/shortest_path_bound.test new file mode 100644 index 00000000..f0008d5d --- /dev/null +++ b/test/sql/path-finding/shortest_path_bound.test @@ -0,0 +1,199 @@ +# name: test/sql/sqlpgq/shortest_path_bound.test +# group: [sqlpgq] + +statement ok +pragma enable_verification + +require duckpgq + +# Graph to test regular shortest path bound +# (0) -> (1) +# ↓ ↑ +# (2) -> (3) + +statement ok +CREATE TABLE Point(id BIGINT); INSERT INTO Point VALUES (0), (1), (2), (3); + +statement ok +CREATE TABLE know(src BIGINT, dst BIGINT); INSERT INTO know VALUES (0, 1), (0, 2), (2, 3), (3, 1); + +statement ok +-CREATE PROPERTY GRAPH pg +VERTEX TABLES ( + Point PROPERTIES ( id ) LABEL Pnt + ) +EDGE TABLES ( + know SOURCE KEY ( src ) REFERENCES Point ( id ) + DESTINATION KEY ( dst ) REFERENCES Point ( id ) + LABEL Knows + ); + +query III +WITH cte1 AS ( + SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Point a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Point a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Point a + LEFT JOIN Know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + a.rowid, + c.rowid, + k.rowid) as temp + FROM Know k + JOIN Point a on a.id = k.src + JOIN Point c on c.id = k.dst +) SELECT a.id as srd_id, b.id as dst_id, iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 2, 3) as path_length + FROM Point a, Point b, (select count(cte1.temp) * 0 as temp from cte1) __x + WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 2, 3); +---- +0 1 3 +0 3 2 + +query III +WITH cte1 AS ( + SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Point a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Point a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Point a + LEFT JOIN Know k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + a.rowid, + c.rowid, + k.rowid) as temp + FROM Know k + JOIN Point a on a.id = k.src + JOIN Point c on c.id = k.dst +) SELECT a.id, b.id, iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 1, 3) as path_length + FROM Point a, Point b, (select count(cte1.temp) * 0 as temp from cte1) __x + WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 1, 3); +---- +0 1 1 +0 2 1 +0 3 2 + +# Graph to test shortest path bound with a cycle +# (0) --> (1) <-> (2) +# 0 to 1 is 1 hop + +statement ok +CREATE TABLE Point2(id BIGINT); INSERT INTO Point2 VALUES (0), (1), (2); + +statement ok +CREATE TABLE know2(src BIGINT, dst BIGINT); INSERT INTO know2 VALUES (0, 1), (1, 2), (2, 1); + +statement ok +-CREATE PROPERTY GRAPH pg2 +VERTEX TABLES ( + Point2 PROPERTIES ( id ) LABEL Pnt2 + ) +EDGE TABLES ( + know2 SOURCE KEY ( src ) REFERENCES Point2 ( id ) + DESTINATION KEY ( dst ) REFERENCES Point2 ( id ) + LABEL Knows2 + ); + +query III +WITH cte1 AS ( + SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Point2 a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Point2 a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Point2 a + LEFT JOIN know2 k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + a.rowid, + c.rowid, + k.rowid) as temp + FROM know2 k + JOIN Point2 a on a.id = k.src + JOIN Point2 c on c.id = k.dst +) SELECT a.id, b.id, iterativelength(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 3) as path_length + FROM Point2 a, Point2 b, (select count(cte1.temp) * 0 as temp from cte1) __x + WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 3); +---- +0 2 2 + + +# Graph to test shortest path bound with a cycle +# (1) <- (0) <-> (2) +# 0 to 1 is 1 hop + +statement ok +CREATE TABLE Point3(id BIGINT); INSERT INTO Point3 VALUES (0), (1), (2); + +statement ok +CREATE TABLE know3(src BIGINT, dst BIGINT); INSERT INTO know3 VALUES (0, 1), (0, 2), (2, 0); + +statement ok +-CREATE PROPERTY GRAPH pg3 +VERTEX TABLES ( + Point3 PROPERTIES ( id ) LABEL Pnt + ) +EDGE TABLES ( + know3 SOURCE KEY ( src ) REFERENCES Point3 ( id ) + DESTINATION KEY ( dst ) REFERENCES Point3 ( id ) + LABEL Knows + ); + +query III +WITH cte1 AS ( + SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Point3 a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Point3 a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Point3 a + LEFT JOIN know3 k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + a.rowid, + c.rowid, + k.rowid) as temp + FROM know3 k + JOIN Point3 a on a.id = k.src + JOIN Point3 c on c.id = k.dst +) SELECT a.id, b.id, iterativelength(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3) as path_length + FROM Point3 a, Point3 b, (select count(cte1.temp) * 0 as temp from cte1) __x + WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3); +---- + From cb660731bf6a2519a579e0490c1b6a70d1fe6985 Mon Sep 17 00:00:00 2001 From: rliget Date: Tue, 13 Feb 2024 12:32:06 +0100 Subject: [PATCH 02/39] Fix the bug of not being able to find longer paths --- .gitignore | 1 + .../functions/scalar/iterativelength.cpp | 35 ++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index b9f264b9..f9a23d01 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ duckdb_unittest_tempdir/ testext test/python/__pycache__/ .Rhistory +.vscode \ No newline at end of file diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp index 533d9e2d..d8f92d9c 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp @@ -24,10 +24,30 @@ static bool IterativeLength(int64_t v_size, int64_t *v, vector &e, } } for (auto i = 0; i < v_size; i++) { - next[i] = next[i] & ~seen[i]; + // next[i] = next[i] & ~seen[i]; seen[i] = seen[i] | next[i]; + + // change |= next[i].any(); + } + + vector> next_next = vector>(v_size, 0); + // If a vertex in next is a successor of other vertices in next, set it as unvisited + for (auto i = 0; i < v_size; i++) { + if (next[i].any()) { + for (auto offset = v[i]; offset < v[i + 1]; offset++) { + auto n = e[offset]; + next_next[n] = next_next[n] | next[i]; + } + } + } + for (auto i = 0; i < v_size; i++) { + next[i] = next[i] & ~next_next[i]; + } + + for (auto i = 0; i < v_size; i++) { change |= next[i].any(); } + return change; } @@ -136,10 +156,12 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, // make passes while a lane is still active for (int64_t iter = 1; active; iter++) { - if (!IterativeLength(v_size, v, e, seen, (iter & 1) ? visit1 : visit2, - (iter & 1) ? visit2 : visit1)) { - break; - } + // if (!IterativeLength(v_size, v, e, seen, (iter & 1) ? visit1 : visit2, + // (iter & 1) ? visit2 : visit1)) { + // break; + // } + bool stop = !IterativeLength(v_size, v, e, seen, (iter & 1) ? visit1 : visit2, + (iter & 1) ? visit2 : visit1); // detect lanes that finished for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { int64_t search_num = lane_to_num[lane]; @@ -169,6 +191,9 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, } } } + if (stop) { + break; + } } // no changes anymore: any still active searches have no path From 49494a097c466052e93974bdb0f8a1a2e5e7faab Mon Sep 17 00:00:00 2001 From: rliget Date: Tue, 13 Feb 2024 14:21:18 +0100 Subject: [PATCH 03/39] Previous commit causes unable to find shortest path --- .../functions/scalar/iterativelength.cpp | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp index d8f92d9c..d78ee344 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp @@ -30,19 +30,19 @@ static bool IterativeLength(int64_t v_size, int64_t *v, vector &e, // change |= next[i].any(); } - vector> next_next = vector>(v_size, 0); - // If a vertex in next is a successor of other vertices in next, set it as unvisited - for (auto i = 0; i < v_size; i++) { - if (next[i].any()) { - for (auto offset = v[i]; offset < v[i + 1]; offset++) { - auto n = e[offset]; - next_next[n] = next_next[n] | next[i]; - } - } - } - for (auto i = 0; i < v_size; i++) { - next[i] = next[i] & ~next_next[i]; - } + // vector> next_next = vector>(v_size, 0); + // // If a vertex in next is a successor of other vertices in next, set it as unvisited + // for (auto i = 0; i < v_size; i++) { + // if (next[i].any()) { + // for (auto offset = v[i]; offset < v[i + 1]; offset++) { + // auto n = e[offset]; + // next_next[n] = next_next[n] | next[i]; + // } + // } + // } + // for (auto i = 0; i < v_size; i++) { + // next[i] = next[i] & ~next_next[i]; + // } for (auto i = 0; i < v_size; i++) { change |= next[i].any(); From c90456d3481c6d7ec1220065b16dff9102c6206f Mon Sep 17 00:00:00 2001 From: rliget Date: Tue, 13 Feb 2024 15:00:49 +0100 Subject: [PATCH 04/39] The path returns -1 internally if it does not exist, but should not end up in the results --- duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp index d78ee344..8f3cf5c5 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp @@ -140,11 +140,12 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (!vdata_src.validity.RowIsValid(src_pos)) { result_validity.SetInvalid(search_num); - result_data[search_num] = (uint64_t)-1; /* no path */ + result_data[search_num] = (int64_t)-1; /* no path */ } else if (src_data[src_pos] == dst_data[dst_pos]) { result_data[search_num] = - (uint64_t)0; // path of length 0 does not require a search + (int64_t)0; // path of length 0 does not require a search } else { + result_data[search_num] = (int64_t)-1; /* initialize to no path */ seen[src_data[src_pos]][lane] = true; visit1[src_data[src_pos]][lane] = true; lane_to_num[lane] = search_num; // active lane @@ -181,6 +182,7 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, continue; } else if (upper_bound.GetVectorType() == VectorType::CONSTANT_VECTOR ? iter > upper_bound_data[0] : iter > upper_bound_data[dst_pos]) { + result_validity.SetInvalid(search_num); result_data[search_num] = (int64_t)-1; /* no path */ } else { result_data[search_num] = From d7a7a8a41e255529090b9be3f497f9ff5e736c45 Mon Sep 17 00:00:00 2001 From: rliget Date: Tue, 13 Feb 2024 23:53:39 +0100 Subject: [PATCH 05/39] Fix getting stuck in a dead loop in a cyclic graph --- .../functions/scalar/iterativelength.cpp | 39 +++++++++++++------ .../sql/path-finding/shortest_path_bound.test | 4 +- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp index 8f3cf5c5..dc70b463 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp @@ -8,6 +8,7 @@ namespace duckdb { static bool IterativeLength(int64_t v_size, int64_t *v, vector &e, + vector>> &parents_v, vector> &seen, vector> &visit, vector> &next) { @@ -15,19 +16,33 @@ static bool IterativeLength(int64_t v_size, int64_t *v, vector &e, for (auto i = 0; i < v_size; i++) { next[i] = 0; } - for (auto i = 0; i < v_size; i++) { - if (visit[i].any()) { - for (auto offset = v[i]; offset < v[i + 1]; offset++) { - auto n = e[offset]; - next[n] = next[n] | visit[i]; + + for (auto lane = 0; lane < LANE_LIMIT; lane++) { + for (auto i = 0; i < v_size; i++) { + if (visit[i][lane]) { + for (auto offset = v[i]; offset < v[i + 1]; offset++) { + auto n = e[offset]; + if (parents_v[i][lane].find(n) == parents_v[i][lane].end()) { + parents_v[i][lane].insert(n); + next[n][lane] = true; + } + } } } } + + // for (auto i = 0; i < v_size; i++) { + // if (visit[i].any()) { + // for (auto offset = v[i]; offset < v[i + 1]; offset++) { + // auto n = e[offset]; + // next[n] = next[n] | visit[i]; + // } + // } + // } for (auto i = 0; i < v_size; i++) { // next[i] = next[i] & ~seen[i]; seen[i] = seen[i] | next[i]; - - // change |= next[i].any(); + change |= next[i].any(); } // vector> next_next = vector>(v_size, 0); @@ -44,9 +59,9 @@ static bool IterativeLength(int64_t v_size, int64_t *v, vector &e, // next[i] = next[i] & ~next_next[i]; // } - for (auto i = 0; i < v_size; i++) { - change |= next[i].any(); - } + // for (auto i = 0; i < v_size; i++) { + // change |= next[i].any(); + // } return change; } @@ -114,6 +129,8 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, vector> seen(v_size); vector> visit1(v_size); vector> visit2(v_size); + // vector> level(v_size, std::vector(LANE_LIMIT, INT64_MAX)); + vector>> parents_v(v_size, std::vector>(LANE_LIMIT)); // maps lane to search number short lane_to_num[LANE_LIMIT]; @@ -161,7 +178,7 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, // (iter & 1) ? visit2 : visit1)) { // break; // } - bool stop = !IterativeLength(v_size, v, e, seen, (iter & 1) ? visit1 : visit2, + bool stop = !IterativeLength(v_size, v, e, parents_v, seen, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1); // detect lanes that finished for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { diff --git a/test/sql/path-finding/shortest_path_bound.test b/test/sql/path-finding/shortest_path_bound.test index f0008d5d..74a7951b 100644 --- a/test/sql/path-finding/shortest_path_bound.test +++ b/test/sql/path-finding/shortest_path_bound.test @@ -139,9 +139,9 @@ WITH cte1 AS ( FROM know2 k JOIN Point2 a on a.id = k.src JOIN Point2 c on c.id = k.dst -) SELECT a.id, b.id, iterativelength(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 3) as path_length +) SELECT a.id, b.id, iterativelength(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 30) as path_length FROM Point2 a, Point2 b, (select count(cte1.temp) * 0 as temp from cte1) __x - WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 3); + WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 30); ---- 0 2 2 From f1076c0fc877e5402a62ca668654ae34fe5ba356 Mon Sep 17 00:00:00 2001 From: rliget Date: Fri, 16 Feb 2024 12:00:15 +0100 Subject: [PATCH 06/39] Adding upper and lower bounds to the shortestpath function --- .../functions/tablefunctions/match.hpp | 5 +- .../functions/scalar/iterativelength.cpp | 36 +-- .../functions/scalar/shortest_path.cpp | 291 ++++++++++++------ .../functions/tablefunctions/match.cpp | 41 ++- test/sql/path-finding/shortest_path.test | 18 +- 5 files changed, 249 insertions(+), 142 deletions(-) diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp index 4144d22a..3e425ef0 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp @@ -106,8 +106,11 @@ struct PGQMatchFunction : public TableFunction { vector> &column_list, unordered_set &named_subpaths); + // static unique_ptr + // CreatePathFindingFunction(vector> &path_list, + // CreatePropertyGraphInfo &pg_table); static unique_ptr - CreatePathFindingFunction(vector> &path_list, + CreatePathFindingFunction(SubPath &subpath, CreatePropertyGraphInfo &pg_table); static void AddPathFinding(const unique_ptr &select_node, diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp index dc70b463..abbdc124 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp @@ -8,7 +8,7 @@ namespace duckdb { static bool IterativeLength(int64_t v_size, int64_t *v, vector &e, - vector>> &parents_v, + vector>> &parents_v, vector> &seen, vector> &visit, vector> &next) { @@ -22,8 +22,9 @@ static bool IterativeLength(int64_t v_size, int64_t *v, vector &e, if (visit[i][lane]) { for (auto offset = v[i]; offset < v[i + 1]; offset++) { auto n = e[offset]; - if (parents_v[i][lane].find(n) == parents_v[i][lane].end()) { - parents_v[i][lane].insert(n); + if (seen[n][lane] == false || parents_v[i][lane].find(n) == parents_v[i][lane].end()) { + parents_v[n][lane] = parents_v[i][lane]; + parents_v[n][lane].insert(i); next[n][lane] = true; } } @@ -31,37 +32,11 @@ static bool IterativeLength(int64_t v_size, int64_t *v, vector &e, } } - // for (auto i = 0; i < v_size; i++) { - // if (visit[i].any()) { - // for (auto offset = v[i]; offset < v[i + 1]; offset++) { - // auto n = e[offset]; - // next[n] = next[n] | visit[i]; - // } - // } - // } for (auto i = 0; i < v_size; i++) { // next[i] = next[i] & ~seen[i]; seen[i] = seen[i] | next[i]; change |= next[i].any(); } - - // vector> next_next = vector>(v_size, 0); - // // If a vertex in next is a successor of other vertices in next, set it as unvisited - // for (auto i = 0; i < v_size; i++) { - // if (next[i].any()) { - // for (auto offset = v[i]; offset < v[i + 1]; offset++) { - // auto n = e[offset]; - // next_next[n] = next_next[n] | next[i]; - // } - // } - // } - // for (auto i = 0; i < v_size; i++) { - // next[i] = next[i] & ~next_next[i]; - // } - - // for (auto i = 0; i < v_size; i++) { - // change |= next[i].any(); - // } return change; } @@ -129,8 +104,7 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, vector> seen(v_size); vector> visit1(v_size); vector> visit2(v_size); - // vector> level(v_size, std::vector(LANE_LIMIT, INT64_MAX)); - vector>> parents_v(v_size, std::vector>(LANE_LIMIT)); + vector>> parents_v(v_size, std::vector>(LANE_LIMIT)); // maps lane to search number short lane_to_num[LANE_LIMIT]; diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp index 9cf0ad99..d02af0e5 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp @@ -13,12 +13,18 @@ namespace duckdb { static bool IterativeLength(int64_t v_size, int64_t *V, vector &E, vector &edge_ids, - vector> &parents_v, - vector> &parents_e, + // vector> &parents_v, + // vector> &parents_e, + vector>> &parents_v, + vector>> &paths_v, + vector>> &paths_e, vector> &seen, vector> &visit, vector> &next) { bool change = false; + map, unordered_set> parents_v_cache; + map, vector> paths_v_cache; + map, vector> paths_e_cache; for (auto v = 0; v < v_size; v++) { next[v] = 0; } @@ -28,20 +34,55 @@ static bool IterativeLength(int64_t v_size, int64_t *V, vector &E, for (auto e = V[v]; e < V[v + 1]; e++) { auto n = E[e]; auto edge_id = edge_ids[e]; - next[n] = next[n] | visit[v]; - for (auto l = 0; l < LANE_LIMIT; l++) { - parents_v[n][l] = - ((parents_v[n][l] == -1) && visit[v][l]) ? v : parents_v[n][l]; - parents_e[n][l] = ((parents_e[n][l] == -1) && visit[v][l]) - ? edge_id - : parents_e[n][l]; + + for (auto lane = 0; lane < LANE_LIMIT; lane++) { + if (visit[v][lane]) { + //! If the node has not been visited, then update the parent and edge + if (seen[n][lane] == false || parents_v[v][lane].find(n) == parents_v[v][lane].end()) { + if (visit[n][lane]) { + parents_v_cache[make_pair(n, lane)] = parents_v[v][lane]; + parents_v_cache[make_pair(n, lane)].insert(v); + paths_v_cache[make_pair(n, lane)] = paths_v[v][lane]; + paths_v_cache[make_pair(n, lane)].push_back(v); + paths_e_cache[make_pair(n, lane)] = paths_e[v][lane]; + paths_e_cache[make_pair(n, lane)].push_back(edge_id); + } else { + parents_v[n][lane] = parents_v[v][lane]; + parents_v[n][lane].insert(v); + paths_v[n][lane] = paths_v[v][lane]; + paths_v[n][lane].push_back(v); + paths_e[n][lane] = paths_e[v][lane]; + paths_e[n][lane].push_back(edge_id); + } + next[n][lane] = true; + } + } } + + // next[n] = next[n] | visit[v]; + // for (auto l = 0; l < LANE_LIMIT; l++) { + // parents_v[n][l] = + // ((parents_v[n][l] == -1) && visit[v][l]) ? v : parents_v[n][l]; + // parents_e[n][l] = ((parents_e[n][l] == -1) && visit[v][l]) + // ? edge_id + // : parents_e[n][l]; + // } } } } + for (auto const& [node, parents]: parents_v_cache) { + parents_v[node.first][node.second] = parents; + } + for (auto const& [node, path]: paths_v_cache) { + paths_v[node.first][node.second] = path; + } + for (auto const& [node, edge]: paths_e_cache) { + paths_e[node.first][node.second] = edge; + } + for (auto v = 0; v < v_size; v++) { - next[v] = next[v] & ~seen[v]; + // next[v] = next[v] & ~seen[v]; seen[v] = seen[v] | next[v]; change |= next[v].any(); } @@ -80,6 +121,16 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, auto src_data = (int64_t *)vdata_src.data; auto dst_data = (int64_t *)vdata_dst.data; + // get lowerbound and upperbound + auto &lower_bound = args.data[4]; + auto &upper_bound = args.data[5]; + UnifiedVectorFormat vdata_lower_bound; + UnifiedVectorFormat vdata_upper_bound; + lower_bound.ToUnifiedFormat(args.size(), vdata_lower_bound); + upper_bound.ToUnifiedFormat(args.size(), vdata_upper_bound); + auto lower_bound_data = (int64_t *)vdata_lower_bound.data; + auto upper_bound_data = (int64_t *)vdata_upper_bound.data; + result.SetVectorType(VectorType::FLAT_VECTOR); auto result_data = FlatVector::GetData(result); ValidityMask &result_validity = FlatVector::Validity(result); @@ -88,10 +139,19 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, vector> seen(v_size); vector> visit1(v_size); vector> visit2(v_size); - vector> parents_v(v_size, - std::vector(LANE_LIMIT, -1)); - vector> parents_e(v_size, - std::vector(LANE_LIMIT, -1)); + // vector> parents_v(v_size, + // std::vector(LANE_LIMIT, -1)); + // vector> parents_e(v_size, + // std::vector(LANE_LIMIT, -1)); + // vector> parents_v_result(v_size, + // std::vector(LANE_LIMIT, -1)); + // vector> parents_e_result(v_size, + // std::vector(LANE_LIMIT, -1)); + + vector>> parents_v(v_size, std::vector>(LANE_LIMIT)); + vector>> paths_v(v_size, std::vector>(LANE_LIMIT)); + vector>> paths_e(v_size, std::vector>(LANE_LIMIT)); + // maps lane to search number int16_t lane_to_num[LANE_LIMIT]; @@ -107,10 +167,12 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, for (auto i = 0; i < v_size; i++) { seen[i] = 0; visit1[i] = 0; - for (auto j = 0; j < LANE_LIMIT; j++) { - parents_v[i][j] = -1; - parents_e[i][j] = -1; - } + // for (auto j = 0; j < LANE_LIMIT; j++) { + // parents_v[i][j] = -1; + // parents_e[i][j] = -1; + // parents_v_result[i][j] = -1; + // parents_e_result[i][j] = -1; + // } } // add search jobs to free lanes @@ -122,13 +184,23 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, int64_t src_pos = vdata_src.sel->get_index(search_num); if (!vdata_src.validity.RowIsValid(src_pos)) { result_validity.SetInvalid(search_num); + } else if (src_data[src_pos] == dst_data[src_pos]) { + unique_ptr output = + make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + ListVector::PushBack(*output, src_data[src_pos]); + ListVector::Append(result, ListVector::GetEntry(*output), + ListVector::GetListSize(*output)); + result_data[search_num].length = ListVector::GetListSize(*output); + result_data[search_num].offset = total_len; + total_len += result_data[search_num].length; } else { visit1[src_data[src_pos]][lane] = true; - parents_v[src_data[src_pos]][lane] = - src_data[src_pos]; // Mark source with source id - parents_e[src_data[src_pos]][lane] = - -2; // Mark the source with -2, there is no incoming edge for the - // source. + seen[src_data[src_pos]][lane] = true; + // parents_v[src_data[src_pos]][lane] = + // -2; // No incoming vertex for the source + // parents_e[src_data[src_pos]][lane] = + // -2; // Mark the source with -2, there is no incoming edge for the + // // source. lane_to_num[lane] = search_num; // active lane active++; break; @@ -136,22 +208,61 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, } } + bool stop[LANE_LIMIT] = {false}; //! make passes while a lane is still active for (int64_t iter = 1; active; iter++) { //! Perform one step of bfs exploration - if (!IterativeLength(v_size, v, e, edge_ids, parents_v, parents_e, seen, + if (!IterativeLength(v_size, v, e, edge_ids, parents_v, paths_v, paths_e, seen, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { break; } int64_t finished_searches = 0; // detect lanes that finished - for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + for (int64_t lane = 0; lane < LANE_LIMIT && (stop[lane] == false); lane++) { int64_t search_num = lane_to_num[lane]; if (search_num >= 0) { // active lane //! Check if dst for a source has been seen int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (seen[dst_data[dst_pos]][lane]) { + // check if the path length is within bounds + // bound vector is either a constant or a flat vector + if (lower_bound.GetVectorType() == VectorType::CONSTANT_VECTOR ? + iter < lower_bound_data[0] : iter < lower_bound_data[dst_pos]) { + // when reach the destination too early, treat destination as null + // looks like the graph does not have that vertex + seen[dst_data[dst_pos]][lane] = false; + (iter & 1) ? visit2[dst_data[dst_pos]][lane] = false + : visit1[dst_data[dst_pos]][lane] = false; + continue; + } else if (upper_bound.GetVectorType() == VectorType::CONSTANT_VECTOR ? + iter > upper_bound_data[0] : iter > upper_bound_data[dst_pos]) { + result_validity.SetInvalid(search_num); + } else { + vector output_vector; + auto it_v = paths_v[dst_data[dst_pos]][lane].begin(), + end_v = paths_v[dst_data[dst_pos]][lane].end(); + auto it_e = paths_e[dst_data[dst_pos]][lane].begin(), + end_e = paths_e[dst_data[dst_pos]][lane].end(); + while (it_v != end_v && it_e != end_e) { + output_vector.push_back(*it_v); + output_vector.push_back(*it_e); + it_v++; + it_e++; + } + output_vector.push_back(dst_data[dst_pos]); + auto output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + for (auto val : output_vector) { + Value value_to_insert = val; + ListVector::PushBack(*output, value_to_insert); + } + result_data[search_num].length = ListVector::GetListSize(*output); + result_data[search_num].offset = total_len; + ListVector::Append(result, ListVector::GetEntry(*output), + ListVector::GetListSize(*output)); + total_len += result_data[search_num].length; + } + stop[lane] = true; finished_searches++; } } @@ -160,71 +271,74 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, break; } } - //! Reconstruct the paths - for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { - int64_t search_num = lane_to_num[lane]; - if (search_num == -1) { // empty lanes - continue; - } + // //! Reconstruct the paths + // for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + // int64_t search_num = lane_to_num[lane]; + // if (search_num == -1) { // empty lanes + // continue; + // } - //! Searches that have stopped have found a path - int64_t src_pos = vdata_src.sel->get_index(search_num); - int64_t dst_pos = vdata_dst.sel->get_index(search_num); - if (src_data[src_pos] == dst_data[dst_pos]) { // Source == destination - unique_ptr output = - make_uniq(LogicalType::LIST(LogicalType::BIGINT)); - ListVector::PushBack(*output, src_data[src_pos]); - ListVector::Append(result, ListVector::GetEntry(*output), - ListVector::GetListSize(*output)); - result_data[search_num].length = ListVector::GetListSize(*output); - result_data[search_num].offset = total_len; - total_len += result_data[search_num].length; - continue; - } - std::vector output_vector; - std::vector output_edge; - auto source_v = src_data[src_pos]; // Take the source - - auto parent_vertex = - parents_v[dst_data[dst_pos]] - [lane]; // Take the parent vertex of the destination vertex - auto parent_edge = - parents_e[dst_data[dst_pos]] - [lane]; // Take the parent edge of the destination vertex - - output_vector.push_back(dst_data[dst_pos]); // Add destination vertex - output_vector.push_back(parent_edge); - while (parent_vertex != source_v) { // Continue adding vertices until we - // have reached the source vertex - //! -1 is used to signify no parent - if (parent_vertex == -1 || - parent_vertex == parents_v[parent_vertex][lane]) { - result_validity.SetInvalid(search_num); - break; - } - output_vector.push_back(parent_vertex); - parent_edge = parents_e[parent_vertex][lane]; - parent_vertex = parents_v[parent_vertex][lane]; - output_vector.push_back(parent_edge); - } + // //! Searches that have stopped have found a path + // int64_t src_pos = vdata_src.sel->get_index(search_num); + // int64_t dst_pos = vdata_dst.sel->get_index(search_num); - if (!result_validity.RowIsValid(search_num)) { - continue; - } - output_vector.push_back(source_v); - std::reverse(output_vector.begin(), output_vector.end()); - auto output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); - for (auto val : output_vector) { - Value value_to_insert = val; - ListVector::PushBack(*output, value_to_insert); - } + // parents_v_result[src_data[src_pos]][lane] = src_data[src_pos]; - result_data[search_num].length = ListVector::GetListSize(*output); - result_data[search_num].offset = total_len; - ListVector::Append(result, ListVector::GetEntry(*output), - ListVector::GetListSize(*output)); - total_len += result_data[search_num].length; - } + // if (src_data[src_pos] == dst_data[dst_pos]) { // Source == destination + // unique_ptr output = + // make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + // ListVector::PushBack(*output, src_data[src_pos]); + // ListVector::Append(result, ListVector::GetEntry(*output), + // ListVector::GetListSize(*output)); + // result_data[search_num].length = ListVector::GetListSize(*output); + // result_data[search_num].offset = total_len; + // total_len += result_data[search_num].length; + // continue; + // } + // std::vector output_vector; + // std::vector output_edge; + // auto source_v = src_data[src_pos]; // Take the source + + // auto parent_vertex = + // parents_v_result[dst_data[dst_pos]] + // [lane]; // Take the parent vertex of the destination vertex + // auto parent_edge = + // parents_e_result[dst_data[dst_pos]] + // [lane]; // Take the parent edge of the destination vertex + + // output_vector.push_back(dst_data[dst_pos]); // Add destination vertex + // output_vector.push_back(parent_edge); + // while (parent_vertex != source_v) { // Continue adding vertices until we + // // have reached the source vertex + // //! -1 is used to signify no parent + // if (parent_vertex == -1 || + // parent_vertex == parents_v_result[parent_vertex][lane]) { + // result_validity.SetInvalid(search_num); + // break; + // } + // output_vector.push_back(parent_vertex); + // parent_edge = parents_e_result[parent_vertex][lane]; + // parent_vertex = parents_v_result[parent_vertex][lane]; + // output_vector.push_back(parent_edge); + // } + + // if (!result_validity.RowIsValid(search_num)) { + // continue; + // } + // output_vector.push_back(source_v); + // std::reverse(output_vector.begin(), output_vector.end()); + // auto output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + // for (auto val : output_vector) { + // Value value_to_insert = val; + // ListVector::PushBack(*output, value_to_insert); + // } + + // result_data[search_num].length = ListVector::GetListSize(*output); + // result_data[search_num].offset = total_len; + // ListVector::Append(result, ListVector::GetEntry(*output), + // ListVector::GetListSize(*output)); + // total_len += result_data[search_num].length; + // } } duckpgq_state->csr_to_delete.insert(info.csr_id); } @@ -232,6 +346,7 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, CreateScalarFunctionInfo DuckPGQFunctions::GetShortestPathFunction() { auto fun = ScalarFunction("shortestpath", {LogicalType::INTEGER, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::LIST(LogicalType::BIGINT), ShortestPathFunction, @@ -239,4 +354,4 @@ CreateScalarFunctionInfo DuckPGQFunctions::GetShortestPathFunction() { return CreateScalarFunctionInfo(fun); } -}; // namespace duckdb +} // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 4c7a4fdb..74284eb3 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -486,7 +486,8 @@ CreateWhereClause(vector> &conditions) { } unique_ptr PGQMatchFunction::CreatePathFindingFunction( - vector> &path_list, + // vector> &path_list, + SubPath &subpath, CreatePropertyGraphInfo &pg_table) { // This method will return a SubqueryRef of a list of rowids // For every vertex and edge element, we add the rowid to the list using @@ -496,6 +497,7 @@ unique_ptr PGQMatchFunction::CreatePathFindingFunction( // full list of element rowids, using list_concat. For now we will only // support returning rowids unique_ptr final_list; + auto &path_list = subpath.path_list; auto previous_vertex_element = GetPathElement(path_list[0]); if (!previous_vertex_element) { @@ -535,6 +537,10 @@ unique_ptr PGQMatchFunction::CreatePathFindingFunction( edge_table, previous_vertex_element->variable_binding))); pathfinding_children.push_back(std::move(src_row_id)); pathfinding_children.push_back(std::move(dst_row_id)); + pathfinding_children.push_back( + make_uniq(Value::INTEGER(static_cast(subpath.lower)))); + pathfinding_children.push_back( + make_uniq(Value::INTEGER(static_cast(subpath.upper)))); auto shortest_path_function = make_uniq( "shortestpath", std::move(pathfinding_children)); @@ -658,6 +664,7 @@ void PGQMatchFunction::AddPathFinding( //! START //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) //! from dst c, a.rowid, b.rowid) between lower and upper + //! from dst c, a.rowid, b.rowid, lower, upper) auto src_row_id = make_uniq("rowid", prev_binding); auto dst_row_id = make_uniq("rowid", next_binding); @@ -669,6 +676,10 @@ void PGQMatchFunction::AddPathFinding( std::move(GetCountTable(edge_table, prev_binding))); pathfinding_children.push_back(std::move(src_row_id)); pathfinding_children.push_back(std::move(dst_row_id)); + pathfinding_children.push_back( + make_uniq(Value::INTEGER(static_cast(subpath->lower)))); + pathfinding_children.push_back( + make_uniq(Value::INTEGER(static_cast(subpath->upper)))); auto reachability_function = make_uniq( "iterativelength", std::move(pathfinding_children)); @@ -681,14 +692,14 @@ void PGQMatchFunction::AddPathFinding( auto addition_function = make_uniq("add", std::move(addition_children)); - auto lower_limit = make_uniq( - Value::INTEGER(static_cast(subpath->lower))); - auto upper_limit = make_uniq( - Value::INTEGER(static_cast(subpath->upper))); - auto between_expression = make_uniq( - std::move(addition_function), std::move(lower_limit), - std::move(upper_limit)); - conditions.push_back(std::move(between_expression)); + // auto lower_limit = make_uniq( + // Value::INTEGER(static_cast(subpath->lower))); + // auto upper_limit = make_uniq( + // Value::INTEGER(static_cast(subpath->upper))); + // auto between_expression = make_uniq( + // std::move(addition_function), std::move(lower_limit), + // std::move(upper_limit)); + conditions.push_back(std::move(addition_function)); //! END //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) @@ -718,8 +729,10 @@ void PGQMatchFunction::CheckNamedSubpath( if (parsed_ref->function_name == "element_id") { // Check subpath name matches the column referenced in the function --> // element_id(named_subpath) + // auto shortest_path_function = + // CreatePathFindingFunction(subpath.path_list, pg_table); auto shortest_path_function = - CreatePathFindingFunction(subpath.path_list, pg_table); + CreatePathFindingFunction(subpath, pg_table); if (column_alias.empty()) { shortest_path_function->alias = @@ -731,8 +744,10 @@ void PGQMatchFunction::CheckNamedSubpath( column_list.insert(column_list.begin() + idx_i, std::move(shortest_path_function)); } else if (parsed_ref->function_name == "path_length") { + // auto shortest_path_function = + // CreatePathFindingFunction(subpath.path_list, pg_table); auto shortest_path_function = - CreatePathFindingFunction(subpath.path_list, pg_table); + CreatePathFindingFunction(subpath, pg_table); auto path_len_children = vector>(); path_len_children.push_back(std::move(shortest_path_function)); auto path_len = @@ -751,8 +766,10 @@ void PGQMatchFunction::CheckNamedSubpath( std::move(path_length_function)); } else if (parsed_ref->function_name == "vertices" || parsed_ref->function_name == "edges") { + // auto shortest_path_function = + // CreatePathFindingFunction(subpath.path_list, pg_table); auto shortest_path_function = - CreatePathFindingFunction(subpath.path_list, pg_table); + CreatePathFindingFunction(subpath, pg_table); auto list_slice_children = vector>(); list_slice_children.push_back(std::move(shortest_path_function)); diff --git a/test/sql/path-finding/shortest_path.test b/test/sql/path-finding/shortest_path.test index a82e8ec1..4d6e7d03 100644 --- a/test/sql/path-finding/shortest_path.test +++ b/test/sql/path-finding/shortest_path.test @@ -46,18 +46,17 @@ Daniel VU query III -FROM GRAPH_TABLE (pg MATCH - p = ANY SHORTEST (a:Person WHERE a.name = 'Daniel')-[k:knows]->{1,3}(b:Person) + p = ANY SHORTEST (a:Person WHERE a.name = 'Daniel')-[k:knows]->{2,3}(b:Person) COLUMNS (element_id(p), a.name as name, b.name as b_name) ) study; ---- -[0, 0, 1] Daniel Tavneet -[0, 1, 2] Daniel Gabor -[0, 2, 3] Daniel Peter +[0, 0, 1, 4, 2] Daniel Gabor +[0, 1, 2, 6, 3] Daniel Peter query IIII -FROM GRAPH_TABLE (pg MATCH - p = ANY SHORTEST (a:Person)-[k:knows]->{1,3}(b:Person) + p = ANY SHORTEST (a:Person)-[k:knows]->{2,3}(b:Person) COLUMNS (path_length(p), element_id(p), a.name as name, b.name as b_name) ) study order by study.name, study.b_name; @@ -114,10 +113,9 @@ WITH cte1 AS ( FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst -) SELECT shortestpath(0, (select count(*) from student), a.rowid, b.rowid) as path, a.name as a_name, b.name as b_name +) SELECT shortestpath(0, (select count(*) from student), a.rowid, b.rowid, 2, 3) as path, a.name as a_name, b.name as b_name FROM student a, student b, (select count(cte1.temp) * 0 as temp from cte1) __x - WHERE a.name = 'Daniel' and __x.temp * 0 + iterativelength(0, (select count(*) from student), a.rowid, b.rowid) between 1 and 3 + WHERE a.name = 'Daniel' and __x.temp * 0 + iterativelength(0, (select count(*) from student), a.rowid, b.rowid, 2, 3); ---- -[0, 0, 1] Daniel Tavneet -[0, 1, 2] Daniel Gabor -[0, 2, 3] Daniel Peter +[0, 0, 1, 4, 2] Daniel Gabor +[0, 1, 2, 6, 3] Daniel Peter From 77083739bc0beee1da98d60ca098000ddd04d452 Mon Sep 17 00:00:00 2001 From: SiberiaWolfP Date: Fri, 16 Feb 2024 14:35:20 +0100 Subject: [PATCH 07/39] Fix bug: path always is [0] --- duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp index d02af0e5..0585e96b 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp @@ -182,9 +182,10 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, while (started_searches < args.size()) { int64_t search_num = started_searches++; int64_t src_pos = vdata_src.sel->get_index(search_num); + int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (!vdata_src.validity.RowIsValid(src_pos)) { result_validity.SetInvalid(search_num); - } else if (src_data[src_pos] == dst_data[src_pos]) { + } else if (src_data[src_pos] == dst_data[dst_pos]) { unique_ptr output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); ListVector::PushBack(*output, src_data[src_pos]); From db60bdfb166c487aaeea7321419930aa34533348 Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Fri, 16 Feb 2024 14:43:46 +0100 Subject: [PATCH 08/39] Code clean --- .../functions/scalar/shortest_path.cpp | 110 +----------------- 1 file changed, 6 insertions(+), 104 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp index 0585e96b..05e6a93f 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp @@ -13,8 +13,6 @@ namespace duckdb { static bool IterativeLength(int64_t v_size, int64_t *V, vector &E, vector &edge_ids, - // vector> &parents_v, - // vector> &parents_e, vector>> &parents_v, vector>> &paths_v, vector>> &paths_e, @@ -58,27 +56,18 @@ static bool IterativeLength(int64_t v_size, int64_t *V, vector &E, } } } - - // next[n] = next[n] | visit[v]; - // for (auto l = 0; l < LANE_LIMIT; l++) { - // parents_v[n][l] = - // ((parents_v[n][l] == -1) && visit[v][l]) ? v : parents_v[n][l]; - // parents_e[n][l] = ((parents_e[n][l] == -1) && visit[v][l]) - // ? edge_id - // : parents_e[n][l]; - // } } } } - for (auto const& [node, parents]: parents_v_cache) { - parents_v[node.first][node.second] = parents; + for (auto const& cache: parents_v_cache) { + parents_v[cache.first.first][cache.first.second] = cache.second; } - for (auto const& [node, path]: paths_v_cache) { - paths_v[node.first][node.second] = path; + for (auto const& cache: paths_v_cache) { + paths_v[cache.first.first][cache.first.second] = cache.second; } - for (auto const& [node, edge]: paths_e_cache) { - paths_e[node.first][node.second] = edge; + for (auto const& cache: paths_e_cache) { + paths_e[cache.first.first][cache.first.second] = cache.second; } for (auto v = 0; v < v_size; v++) { @@ -139,14 +128,6 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, vector> seen(v_size); vector> visit1(v_size); vector> visit2(v_size); - // vector> parents_v(v_size, - // std::vector(LANE_LIMIT, -1)); - // vector> parents_e(v_size, - // std::vector(LANE_LIMIT, -1)); - // vector> parents_v_result(v_size, - // std::vector(LANE_LIMIT, -1)); - // vector> parents_e_result(v_size, - // std::vector(LANE_LIMIT, -1)); vector>> parents_v(v_size, std::vector>(LANE_LIMIT)); vector>> paths_v(v_size, std::vector>(LANE_LIMIT)); @@ -167,12 +148,6 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, for (auto i = 0; i < v_size; i++) { seen[i] = 0; visit1[i] = 0; - // for (auto j = 0; j < LANE_LIMIT; j++) { - // parents_v[i][j] = -1; - // parents_e[i][j] = -1; - // parents_v_result[i][j] = -1; - // parents_e_result[i][j] = -1; - // } } // add search jobs to free lanes @@ -197,11 +172,6 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, } else { visit1[src_data[src_pos]][lane] = true; seen[src_data[src_pos]][lane] = true; - // parents_v[src_data[src_pos]][lane] = - // -2; // No incoming vertex for the source - // parents_e[src_data[src_pos]][lane] = - // -2; // Mark the source with -2, there is no incoming edge for the - // // source. lane_to_num[lane] = search_num; // active lane active++; break; @@ -272,74 +242,6 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, break; } } - // //! Reconstruct the paths - // for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { - // int64_t search_num = lane_to_num[lane]; - // if (search_num == -1) { // empty lanes - // continue; - // } - - // //! Searches that have stopped have found a path - // int64_t src_pos = vdata_src.sel->get_index(search_num); - // int64_t dst_pos = vdata_dst.sel->get_index(search_num); - - // parents_v_result[src_data[src_pos]][lane] = src_data[src_pos]; - - // if (src_data[src_pos] == dst_data[dst_pos]) { // Source == destination - // unique_ptr output = - // make_uniq(LogicalType::LIST(LogicalType::BIGINT)); - // ListVector::PushBack(*output, src_data[src_pos]); - // ListVector::Append(result, ListVector::GetEntry(*output), - // ListVector::GetListSize(*output)); - // result_data[search_num].length = ListVector::GetListSize(*output); - // result_data[search_num].offset = total_len; - // total_len += result_data[search_num].length; - // continue; - // } - // std::vector output_vector; - // std::vector output_edge; - // auto source_v = src_data[src_pos]; // Take the source - - // auto parent_vertex = - // parents_v_result[dst_data[dst_pos]] - // [lane]; // Take the parent vertex of the destination vertex - // auto parent_edge = - // parents_e_result[dst_data[dst_pos]] - // [lane]; // Take the parent edge of the destination vertex - - // output_vector.push_back(dst_data[dst_pos]); // Add destination vertex - // output_vector.push_back(parent_edge); - // while (parent_vertex != source_v) { // Continue adding vertices until we - // // have reached the source vertex - // //! -1 is used to signify no parent - // if (parent_vertex == -1 || - // parent_vertex == parents_v_result[parent_vertex][lane]) { - // result_validity.SetInvalid(search_num); - // break; - // } - // output_vector.push_back(parent_vertex); - // parent_edge = parents_e_result[parent_vertex][lane]; - // parent_vertex = parents_v_result[parent_vertex][lane]; - // output_vector.push_back(parent_edge); - // } - - // if (!result_validity.RowIsValid(search_num)) { - // continue; - // } - // output_vector.push_back(source_v); - // std::reverse(output_vector.begin(), output_vector.end()); - // auto output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); - // for (auto val : output_vector) { - // Value value_to_insert = val; - // ListVector::PushBack(*output, value_to_insert); - // } - - // result_data[search_num].length = ListVector::GetListSize(*output); - // result_data[search_num].offset = total_len; - // ListVector::Append(result, ListVector::GetEntry(*output), - // ListVector::GetListSize(*output)); - // total_len += result_data[search_num].length; - // } } duckpgq_state->csr_to_delete.insert(info.csr_id); } From 470ad31fb8121e1ccb17d9ee6abe5765401f1780 Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Sun, 18 Feb 2024 21:41:15 +0100 Subject: [PATCH 09/39] Fix bug: stop too early Add more test cases --- .../functions/tablefunctions/match.hpp | 5 +- .../functions/scalar/shortest_path.cpp | 10 +- .../functions/tablefunctions/match.cpp | 20 ++-- test/sql/path-finding/complex_matching.test | 8 +- test/sql/path-finding/shortest_path.test | 24 +++-- .../sql/path-finding/shortest_path_bound.test | 101 ++++++++++++++++++ 6 files changed, 129 insertions(+), 39 deletions(-) diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp index 3e425ef0..4144d22a 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp @@ -106,11 +106,8 @@ struct PGQMatchFunction : public TableFunction { vector> &column_list, unordered_set &named_subpaths); - // static unique_ptr - // CreatePathFindingFunction(vector> &path_list, - // CreatePropertyGraphInfo &pg_table); static unique_ptr - CreatePathFindingFunction(SubPath &subpath, + CreatePathFindingFunction(vector> &path_list, CreatePropertyGraphInfo &pg_table); static void AddPathFinding(const unique_ptr &select_node, diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp index 05e6a93f..f11df13b 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp @@ -179,7 +179,6 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, } } - bool stop[LANE_LIMIT] = {false}; //! make passes while a lane is still active for (int64_t iter = 1; active; iter++) { //! Perform one step of bfs exploration @@ -188,9 +187,8 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, (iter & 1) ? visit2 : visit1)) { break; } - int64_t finished_searches = 0; // detect lanes that finished - for (int64_t lane = 0; lane < LANE_LIMIT && (stop[lane] == false); lane++) { + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { int64_t search_num = lane_to_num[lane]; if (search_num >= 0) { // active lane //! Check if dst for a source has been seen @@ -233,14 +231,10 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, ListVector::GetListSize(*output)); total_len += result_data[search_num].length; } - stop[lane] = true; - finished_searches++; + lane_to_num[lane] = -1; // mark inactive } } } - if (finished_searches == LANE_LIMIT) { - break; - } } } duckpgq_state->csr_to_delete.insert(info.csr_id); diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 04431236..0deb7f21 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -486,8 +486,7 @@ CreateWhereClause(vector> &conditions) { } unique_ptr PGQMatchFunction::CreatePathFindingFunction( - // vector> &path_list, - SubPath &subpath, + vector> &path_list, CreatePropertyGraphInfo &pg_table) { // This method will return a SubqueryRef of a list of rowids // For every vertex and edge element, we add the rowid to the list using @@ -497,7 +496,6 @@ unique_ptr PGQMatchFunction::CreatePathFindingFunction( // full list of element rowids, using list_concat. For now we will only // support returning rowids unique_ptr final_list; - auto &path_list = subpath.path_list; auto previous_vertex_element = GetPathElement(path_list[0]); if (!previous_vertex_element) { @@ -538,9 +536,9 @@ unique_ptr PGQMatchFunction::CreatePathFindingFunction( pathfinding_children.push_back(std::move(src_row_id)); pathfinding_children.push_back(std::move(dst_row_id)); pathfinding_children.push_back( - make_uniq(Value::INTEGER(static_cast(subpath.lower)))); + make_uniq(Value::INTEGER(static_cast(edge_subpath->lower)))); pathfinding_children.push_back( - make_uniq(Value::INTEGER(static_cast(subpath.upper)))); + make_uniq(Value::INTEGER(static_cast(edge_subpath->upper)))); auto shortest_path_function = make_uniq( "shortestpath", std::move(pathfinding_children)); @@ -729,10 +727,8 @@ void PGQMatchFunction::CheckNamedSubpath( if (parsed_ref->function_name == "element_id") { // Check subpath name matches the column referenced in the function --> // element_id(named_subpath) - // auto shortest_path_function = - // CreatePathFindingFunction(subpath.path_list, pg_table); auto shortest_path_function = - CreatePathFindingFunction(subpath, pg_table); + CreatePathFindingFunction(subpath.path_list, pg_table); if (column_alias.empty()) { shortest_path_function->alias = @@ -744,10 +740,8 @@ void PGQMatchFunction::CheckNamedSubpath( column_list.insert(column_list.begin() + idx_i, std::move(shortest_path_function)); } else if (parsed_ref->function_name == "path_length") { - // auto shortest_path_function = - // CreatePathFindingFunction(subpath.path_list, pg_table); auto shortest_path_function = - CreatePathFindingFunction(subpath, pg_table); + CreatePathFindingFunction(subpath.path_list, pg_table); auto path_len_children = vector>(); path_len_children.push_back(std::move(shortest_path_function)); auto path_len = @@ -766,10 +760,8 @@ void PGQMatchFunction::CheckNamedSubpath( std::move(path_length_function)); } else if (parsed_ref->function_name == "vertices" || parsed_ref->function_name == "edges") { - // auto shortest_path_function = - // CreatePathFindingFunction(subpath.path_list, pg_table); auto shortest_path_function = - CreatePathFindingFunction(subpath, pg_table); + CreatePathFindingFunction(subpath.path_list, pg_table); auto list_slice_children = vector>(); list_slice_children.push_back(std::move(shortest_path_function)); diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 4f406385..25d62a15 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -76,7 +76,7 @@ query IIIIIII 28587302322223 [0, 1, 26, 64, 33, 78, 38] 3 [0, 26, 33, 38] [1, 64, 78] 14 24189255811081 30786325577731 [0, 1, 26, 64, 33, 79, 39] 3 [0, 26, 33, 39] [1, 64, 79] 14 24189255811081 32985348833329 [0, 1, 26, 64, 33, 80, 43] 3 [0, 26, 33, 43] [1, 64, 80] 14 24189255811081 -35184372088850 [0, 1, 26, 63, 32, 76, 36, 82, 45] 4 [0, 26, 32, 36, 45] [1, 63, 76, 82] 14 24189255811081 +35184372088850 [0, 1, 26, 64, 33, 77, 36, 82, 45] 4 [0, 26, 33, 36, 45] [1, 64, 77, 82] 14 24189255811081 28587302322204 [0, 2, 32, 75, 33, 77, 36] 3 [0, 32, 33, 36] [2, 75, 77] 14 26388279066668 28587302322223 [0, 2, 32, 75, 33, 78, 38] 3 [0, 32, 33, 38] [2, 75, 78] 14 26388279066668 30786325577731 [0, 2, 32, 75, 33, 79, 39] 3 [0, 32, 33, 39] [2, 75, 79] 14 26388279066668 @@ -147,7 +147,7 @@ WITH CTE1 AS (SELECT CREATE_CSR_EDGE( FROM Person_knows_Person k JOIN Person a on a.id = k.person1id JOIN Person b on b.id = k.person2id) -SELECT shortestpath(0, (select count(*) from Person), a.rowid, b.rowid) as path, +SELECT shortestpath(0, (select count(*) from Person), a.rowid, b.rowid, 1, 3) as path, a.firstname as a_name, b.rowid as b_rowid, b.id as b_id, @@ -160,7 +160,7 @@ FROM Person a, WHERE a.id = 28587302322180 and b.id = i.PersonId and t.id = i.TagId - and __x.temp * 0 + iterativelength(0, (select count(*) from Person), a.rowid, b.rowid) between 1 and 3 + and __x.temp * 0 + iterativelength(0, (select count(*) from Person), a.rowid, b.rowid, 1, 3) ORDER BY b_id, t_id ---- [33, 77, 36] Bryn 36 28587302322204 6 @@ -366,5 +366,5 @@ query III [1, 3, 5, 27, 40] 16 30786325577740 [1, 5, 33, 80, 43] 16 32985348833329 [1, 3, 5, 22, 26, 66, 44] 16 35184372088834 -[1, 3, 5, 28, 45] 16 35184372088850 +[1, 6, 36, 82, 45] 16 35184372088850 [1, 3, 5, 23, 31, 74, 46] 16 35184372088856 diff --git a/test/sql/path-finding/shortest_path.test b/test/sql/path-finding/shortest_path.test index 3b8d8c5a..ccfb8572 100644 --- a/test/sql/path-finding/shortest_path.test +++ b/test/sql/path-finding/shortest_path.test @@ -61,22 +61,18 @@ query IIII ) study order by study.name, study.b_name; ---- -1 [0, 1, 2] Daniel Gabor -1 [0, 2, 3] Daniel Peter -1 [0, 0, 1] Daniel Tavneet +2 [0, 0, 1, 4, 2] Daniel Gabor +2 [0, 1, 2, 6, 3] Daniel Peter 2 [4, 7, 3, 3, 0] David Daniel 3 [4, 7, 3, 3, 0, 1, 2] David Gabor -1 [4, 7, 3] David Peter 3 [4, 7, 3, 3, 0, 0, 1] David Tavneet 2 [2, 6, 3, 3, 0] Gabor Daniel -1 [2, 6, 3] Gabor Peter 3 [2, 6, 3, 3, 0, 0, 1] Gabor Tavneet -1 [3, 3, 0] Peter Daniel 2 [3, 3, 0, 1, 2] Peter Gabor 2 [3, 3, 0, 0, 1] Peter Tavneet 2 [1, 5, 3, 3, 0] Tavneet Daniel -1 [1, 4, 2] Tavneet Gabor -1 [1, 5, 3] Tavneet Peter +3 [1, 5, 3, 3, 0, 1, 2] Tavneet Gabor +2 [1, 4, 2, 6, 3] Tavneet Peter statement error @@ -117,7 +113,17 @@ WITH cte1 AS ( JOIN student c on c.id = k.dst ) SELECT shortestpath(0, (select count(*) from student), a.rowid, b.rowid, 2, 3) as path, a.name as a_name, b.name as b_name FROM student a, student b, (select count(cte1.temp) * 0 as temp from cte1) __x - WHERE a.name = 'Daniel' and __x.temp * 0 + iterativelength(0, (select count(*) from student), a.rowid, b.rowid, 2, 3); + WHERE __x.temp * 0 + iterativelength(0, (select count(*) from student), a.rowid, b.rowid, 2, 3); ---- [0, 0, 1, 4, 2] Daniel Gabor [0, 1, 2, 6, 3] Daniel Peter +[1, 5, 3, 3, 0] Tavneet Daniel +[1, 5, 3, 3, 0, 1, 2] Tavneet Gabor +[1, 4, 2, 6, 3] Tavneet Peter +[2, 6, 3, 3, 0] Gabor Daniel +[2, 6, 3, 3, 0, 0, 1] Gabor Tavneet +[3, 3, 0, 0, 1] Peter Tavneet +[3, 3, 0, 1, 2] Peter Gabor +[4, 7, 3, 3, 0] David Daniel +[4, 7, 3, 3, 0, 0, 1] David Tavneet +[4, 7, 3, 3, 0, 1, 2] David Gabor diff --git a/test/sql/path-finding/shortest_path_bound.test b/test/sql/path-finding/shortest_path_bound.test index 74a7951b..6d713239 100644 --- a/test/sql/path-finding/shortest_path_bound.test +++ b/test/sql/path-finding/shortest_path_bound.test @@ -197,3 +197,104 @@ WITH cte1 AS ( WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3); ---- +# Graph to test shortest path bound with a cycle +# (1) <- (0) <-> (2) +# 0 to 1 is 1 hop + +statement ok +CREATE TABLE Point4(id BIGINT); INSERT INTO Point4 VALUES (0), (1), (2); + +statement ok +CREATE TABLE know4(src BIGINT, dst BIGINT); INSERT INTO know4 VALUES (0, 1), (0, 2), (2, 0); + +statement ok +-CREATE PROPERTY GRAPH pg4 +VERTEX TABLES ( + Point4 PROPERTIES ( id ) LABEL Point4 + ) +EDGE TABLES ( + know4 SOURCE KEY ( src ) REFERENCES Point4 ( id ) + DESTINATION KEY ( dst ) REFERENCES Point4 ( id ) + LABEL know4 + ); + +query III +-FROM GRAPH_TABLE (pg4 + MATCH + p = ANY SHORTEST (a:Point4)-[k:know4]->{2,3}(b:Point4) + COLUMNS (a.id, b.id, vertices(p)) + ) tmp; +---- +2 1 [2, 0, 1] + + +# Description: Test algorithm's capability to ignore isolated nodes. +# Graph Structure: +# (0) <-> (2), (1), (3) + +statement ok +CREATE TABLE Point5(id BIGINT); INSERT INTO Point5 VALUES (0), (1), (2), (3); + +statement ok +CREATE TABLE know5(src BIGINT, dst BIGINT); INSERT INTO know5 VALUES (0, 2), (2, 0); + +statement ok +-CREATE PROPERTY GRAPH pg5 +VERTEX TABLES ( + Point5 PROPERTIES ( id ) LABEL Point5 +) +EDGE TABLES ( + know5 SOURCE KEY ( src ) REFERENCES Point5 ( id ) + DESTINATION KEY ( dst ) REFERENCES Point5 ( id ) + LABEL know5 +); + +query III +-FROM GRAPH_TABLE (pg5 + MATCH + p = ANY SHORTEST (a:Point5)-[k:know5]->{2,3}(b:Point5) + COLUMNS (a.id, b.id, vertices(p)) +) tmp; +---- + +# Description: Test shortest paths in a graph with cycles. +# Graph Structure: +# (0) <-> (2) <-> (3) -> (1) +# ↑ | +# └----------------------┘ + +statement ok +CREATE TABLE Point6(id BIGINT); INSERT INTO Point6 VALUES (0), (1), (2), (3); + +statement ok +CREATE TABLE know6(src BIGINT, dst BIGINT); INSERT INTO know6 VALUES (0, 2), (2, 0), (2, 3), (3, 2), (3, 1), (1, 0); + +statement ok +-CREATE PROPERTY GRAPH pg6 +VERTEX TABLES ( + Point6 PROPERTIES ( id ) LABEL Point6 +) +EDGE TABLES ( + know6 SOURCE KEY ( src ) REFERENCES Point6 ( id ) + DESTINATION KEY ( dst ) REFERENCES Point6 ( id ) + LABEL know6 +); + +query III +-FROM GRAPH_TABLE (pg6 + MATCH + p = ANY SHORTEST (a:Point6)-[k:know6]->{2,4}(b:Point6) + COLUMNS (a.id as id1, b.id as id2, vertices(p)) + ) tmp + order by tmp.id1, tmp.id2; +---- +0 1 [0, 2, 3, 1] +0 3 [0, 2, 3] +1 2 [1, 0, 2] +1 3 [1, 0, 2, 3] +2 0 [2, 3, 1, 0] +2 1 [2, 3, 1] +3 0 [3, 2, 0] +3 2 [3, 1, 0, 2] + + From 9ef75742814f347a317ca6ac6e40c8a9a1783af9 Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Sun, 18 Feb 2024 21:46:21 +0100 Subject: [PATCH 10/39] clean match.cpp --- duckpgq/src/duckpgq/functions/tablefunctions/match.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 0deb7f21..1c6cb2bb 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -661,7 +661,6 @@ void PGQMatchFunction::AddPathFinding( //! START //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) - //! from dst c, a.rowid, b.rowid) between lower and upper //! from dst c, a.rowid, b.rowid, lower, upper) auto src_row_id = make_uniq("rowid", prev_binding); @@ -690,18 +689,11 @@ void PGQMatchFunction::AddPathFinding( auto addition_function = make_uniq("add", std::move(addition_children)); - // auto lower_limit = make_uniq( - // Value::INTEGER(static_cast(subpath->lower))); - // auto upper_limit = make_uniq( - // Value::INTEGER(static_cast(subpath->upper))); - // auto between_expression = make_uniq( - // std::move(addition_function), std::move(lower_limit), - // std::move(upper_limit)); conditions.push_back(std::move(addition_function)); //! END //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) - //! from src s, a.rowid, b.rowid) between lower and upper + //! from src s, a.rowid, b.rowid, lower, upper) } void PGQMatchFunction::CheckNamedSubpath( From bf84fe3d9f8d7da9872b10ee264b04096e30ad1c Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Tue, 20 Feb 2024 19:14:25 +0100 Subject: [PATCH 11/39] Bug fix: src == dst search result is always 0 --- .../functions/scalar/iterativelength.cpp | 9 +++++-- .../functions/scalar/shortest_path.cpp | 20 ++++++++------ test/sql/path-finding/shortest_path.test | 26 +++++++++++++------ .../sql/path-finding/shortest_path_bound.test | 9 +++++++ test/sql/path-finding/subpath_match.test | 1 + 5 files changed, 47 insertions(+), 18 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp index abbdc124..61f56808 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp @@ -133,8 +133,13 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, result_validity.SetInvalid(search_num); result_data[search_num] = (int64_t)-1; /* no path */ } else if (src_data[src_pos] == dst_data[dst_pos]) { - result_data[search_num] = - (int64_t)0; // path of length 0 does not require a search + // result_data[search_num] = + // (int64_t)0; // path of length 0 does not require a search + result_data[search_num] = (int64_t)-1; /* no path */ + visit1[src_data[src_pos]][lane] = true; + lane_to_num[lane] = search_num; // active lane + active++; + break; } else { result_data[search_num] = (int64_t)-1; /* initialize to no path */ seen[src_data[src_pos]][lane] = true; diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp index f11df13b..aca4bdab 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp @@ -161,14 +161,18 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, if (!vdata_src.validity.RowIsValid(src_pos)) { result_validity.SetInvalid(search_num); } else if (src_data[src_pos] == dst_data[dst_pos]) { - unique_ptr output = - make_uniq(LogicalType::LIST(LogicalType::BIGINT)); - ListVector::PushBack(*output, src_data[src_pos]); - ListVector::Append(result, ListVector::GetEntry(*output), - ListVector::GetListSize(*output)); - result_data[search_num].length = ListVector::GetListSize(*output); - result_data[search_num].offset = total_len; - total_len += result_data[search_num].length; + // unique_ptr output = + // make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + // ListVector::PushBack(*output, src_data[src_pos]); + // ListVector::Append(result, ListVector::GetEntry(*output), + // ListVector::GetListSize(*output)); + // result_data[search_num].length = ListVector::GetListSize(*output); + // result_data[search_num].offset = total_len; + // total_len += result_data[search_num].length; + visit1[src_data[src_pos]][lane] = true; + lane_to_num[lane] = search_num; // active lane + active++; + break; } else { visit1[src_data[src_pos]][lane] = true; seen[src_data[src_pos]][lane] = true; diff --git a/test/sql/path-finding/shortest_path.test b/test/sql/path-finding/shortest_path.test index ccfb8572..7bfa70a8 100644 --- a/test/sql/path-finding/shortest_path.test +++ b/test/sql/path-finding/shortest_path.test @@ -50,6 +50,7 @@ query III COLUMNS (element_id(p), a.name as name, b.name as b_name) ) study; ---- +[0, 2, 3, 3, 0] Daniel Daniel [0, 0, 1, 4, 2] Daniel Gabor [0, 1, 2, 6, 3] Daniel Peter @@ -61,18 +62,22 @@ query IIII ) study order by study.name, study.b_name; ---- +2 [0, 2, 3, 3, 0] Daniel Daniel 2 [0, 0, 1, 4, 2] Daniel Gabor 2 [0, 1, 2, 6, 3] Daniel Peter 2 [4, 7, 3, 3, 0] David Daniel 3 [4, 7, 3, 3, 0, 1, 2] David Gabor 3 [4, 7, 3, 3, 0, 0, 1] David Tavneet 2 [2, 6, 3, 3, 0] Gabor Daniel +3 [2, 6, 3, 3, 0, 1, 2] Gabor Gabor 3 [2, 6, 3, 3, 0, 0, 1] Gabor Tavneet 2 [3, 3, 0, 1, 2] Peter Gabor +2 [3, 3, 0, 2, 3] Peter Peter 2 [3, 3, 0, 0, 1] Peter Tavneet 2 [1, 5, 3, 3, 0] Tavneet Daniel 3 [1, 5, 3, 3, 0, 1, 2] Tavneet Gabor 2 [1, 4, 2, 6, 3] Tavneet Peter +3 [1, 5, 3, 3, 0, 0, 1] Tavneet Tavneet statement error @@ -113,17 +118,22 @@ WITH cte1 AS ( JOIN student c on c.id = k.dst ) SELECT shortestpath(0, (select count(*) from student), a.rowid, b.rowid, 2, 3) as path, a.name as a_name, b.name as b_name FROM student a, student b, (select count(cte1.temp) * 0 as temp from cte1) __x - WHERE __x.temp * 0 + iterativelength(0, (select count(*) from student), a.rowid, b.rowid, 2, 3); + WHERE __x.temp * 0 + iterativelength(0, (select count(*) from student), a.rowid, b.rowid, 2, 3) + ORDER BY a.name, b.name; ---- +[0, 2, 3, 3, 0] Daniel Daniel [0, 0, 1, 4, 2] Daniel Gabor [0, 1, 2, 6, 3] Daniel Peter -[1, 5, 3, 3, 0] Tavneet Daniel -[1, 5, 3, 3, 0, 1, 2] Tavneet Gabor -[1, 4, 2, 6, 3] Tavneet Peter +[4, 7, 3, 3, 0] David Daniel +[4, 7, 3, 3, 0, 1, 2] David Gabor +[4, 7, 3, 3, 0, 0, 1] David Tavneet [2, 6, 3, 3, 0] Gabor Daniel +[2, 6, 3, 3, 0, 1, 2] Gabor Gabor [2, 6, 3, 3, 0, 0, 1] Gabor Tavneet -[3, 3, 0, 0, 1] Peter Tavneet [3, 3, 0, 1, 2] Peter Gabor -[4, 7, 3, 3, 0] David Daniel -[4, 7, 3, 3, 0, 0, 1] David Tavneet -[4, 7, 3, 3, 0, 1, 2] David Gabor +[3, 3, 0, 2, 3] Peter Peter +[3, 3, 0, 0, 1] Peter Tavneet +[1, 5, 3, 3, 0] Tavneet Daniel +[1, 5, 3, 3, 0, 1, 2] Tavneet Gabor +[1, 4, 2, 6, 3] Tavneet Peter +[1, 5, 3, 3, 0, 0, 1] Tavneet Tavneet \ No newline at end of file diff --git a/test/sql/path-finding/shortest_path_bound.test b/test/sql/path-finding/shortest_path_bound.test index 6d713239..013f6c50 100644 --- a/test/sql/path-finding/shortest_path_bound.test +++ b/test/sql/path-finding/shortest_path_bound.test @@ -196,6 +196,7 @@ WITH cte1 AS ( FROM Point3 a, Point3 b, (select count(cte1.temp) * 0 as temp from cte1) __x WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3); ---- +0 0 2 # Graph to test shortest path bound with a cycle # (1) <- (0) <-> (2) @@ -225,7 +226,9 @@ query III COLUMNS (a.id, b.id, vertices(p)) ) tmp; ---- +0 0 [0, 2, 0] 2 1 [2, 0, 1] +2 2 [2, 0, 2] # Description: Test algorithm's capability to ignore isolated nodes. @@ -256,6 +259,8 @@ query III COLUMNS (a.id, b.id, vertices(p)) ) tmp; ---- +0 0 [0, 2, 0] +2 2 [2, 0, 2] # Description: Test shortest paths in a graph with cycles. # Graph Structure: @@ -288,13 +293,17 @@ query III ) tmp order by tmp.id1, tmp.id2; ---- +0 0 [0, 2, 0] 0 1 [0, 2, 3, 1] 0 3 [0, 2, 3] +1 1 [1, 0, 2, 3, 1] 1 2 [1, 0, 2] 1 3 [1, 0, 2, 3] 2 0 [2, 3, 1, 0] 2 1 [2, 3, 1] +2 2 [2, 3, 2] 3 0 [3, 2, 0] 3 2 [3, 1, 0, 2] +3 3 [3, 2, 3] diff --git a/test/sql/path-finding/subpath_match.test b/test/sql/path-finding/subpath_match.test index 1e8d7527..97ae2384 100644 --- a/test/sql/path-finding/subpath_match.test +++ b/test/sql/path-finding/subpath_match.test @@ -110,6 +110,7 @@ FROM GRAPH_TABLE (pg Peter Daniel Peter Tavneet Peter Gabor +Peter Peter statement error -SELECT study.a_name, study.b_name From 79b7495a1cb848113b209ed633750531224befbf Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Tue, 20 Feb 2024 21:07:09 +0100 Subject: [PATCH 12/39] If the lower bound is not greater than 1, the high performance algorithm is invoked --- duckpgq/include/duckpgq/duckpgq_functions.hpp | 4 + .../duckpgq/functions/scalar/CMakeLists.txt | 2 + .../functions/scalar/iterativelength.cpp | 79 ++---- .../scalar/iterativelength_lowerbound.cpp | 209 ++++++++++++++ .../functions/scalar/shortest_path.cpp | 201 +++++++------- .../scalar/shortest_path_lowerbound.cpp | 254 ++++++++++++++++++ .../functions/tablefunctions/match.cpp | 6 +- test/sql/path-finding/complex_matching.test | 4 +- test/sql/path-finding/shortest_path.test | 4 +- .../sql/path-finding/shortest_path_bound.test | 12 +- 10 files changed, 593 insertions(+), 182 deletions(-) create mode 100644 duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp create mode 100644 duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp diff --git a/duckpgq/include/duckpgq/duckpgq_functions.hpp b/duckpgq/include/duckpgq/duckpgq_functions.hpp index 0aaaa557..6b789daf 100644 --- a/duckpgq/include/duckpgq/duckpgq_functions.hpp +++ b/duckpgq/include/duckpgq/duckpgq_functions.hpp @@ -27,8 +27,10 @@ class DuckPGQFunctions { functions.push_back(GetCsrEdgeFunction()); functions.push_back(GetCheapestPathLengthFunction()); functions.push_back(GetShortestPathFunction()); + functions.push_back(GetShortestPathLowerBoundFunction()); functions.push_back(GetReachabilityFunction()); functions.push_back(GetIterativeLengthFunction()); + functions.push_back(GetIterativeLengthLowerBoundFunction()); functions.push_back(GetIterativeLengthBidirectionalFunction()); functions.push_back(GetIterativeLength2Function()); functions.push_back(GetDeleteCsrFunction()); @@ -57,8 +59,10 @@ class DuckPGQFunctions { static CreateScalarFunctionInfo GetCsrEdgeFunction(); static CreateScalarFunctionInfo GetCheapestPathLengthFunction(); static CreateScalarFunctionInfo GetShortestPathFunction(); + static CreateScalarFunctionInfo GetShortestPathLowerBoundFunction(); static CreateScalarFunctionInfo GetReachabilityFunction(); static CreateScalarFunctionInfo GetIterativeLengthFunction(); + static CreateScalarFunctionInfo GetIterativeLengthLowerBoundFunction(); static CreateScalarFunctionInfo GetIterativeLengthBidirectionalFunction(); static CreateScalarFunctionInfo GetIterativeLength2Function(); static CreateScalarFunctionInfo GetDeleteCsrFunction(); diff --git a/duckpgq/src/duckpgq/functions/scalar/CMakeLists.txt b/duckpgq/src/duckpgq/functions/scalar/CMakeLists.txt index 2a35040f..44c3eea2 100644 --- a/duckpgq/src/duckpgq/functions/scalar/CMakeLists.txt +++ b/duckpgq/src/duckpgq/functions/scalar/CMakeLists.txt @@ -5,10 +5,12 @@ set(EXTENSION_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/csr_deletion.cpp ${CMAKE_CURRENT_SOURCE_DIR}/csr_get_w_type.cpp ${CMAKE_CURRENT_SOURCE_DIR}/iterativelength.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/iterativelength_lowerbound.cpp ${CMAKE_CURRENT_SOURCE_DIR}/iterativelength2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/iterativelength_bidirectional.cpp ${CMAKE_CURRENT_SOURCE_DIR}/reachability.cpp ${CMAKE_CURRENT_SOURCE_DIR}/shortest_path.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/shortest_path_lowerbound.cpp ${CMAKE_CURRENT_SOURCE_DIR}/csr_creation.cpp PARENT_SCOPE ) \ No newline at end of file diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp index 61f56808..7303ec22 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp @@ -8,7 +8,6 @@ namespace duckdb { static bool IterativeLength(int64_t v_size, int64_t *v, vector &e, - vector>> &parents_v, vector> &seen, vector> &visit, vector> &next) { @@ -16,28 +15,19 @@ static bool IterativeLength(int64_t v_size, int64_t *v, vector &e, for (auto i = 0; i < v_size; i++) { next[i] = 0; } - - for (auto lane = 0; lane < LANE_LIMIT; lane++) { - for (auto i = 0; i < v_size; i++) { - if (visit[i][lane]) { - for (auto offset = v[i]; offset < v[i + 1]; offset++) { - auto n = e[offset]; - if (seen[n][lane] == false || parents_v[i][lane].find(n) == parents_v[i][lane].end()) { - parents_v[n][lane] = parents_v[i][lane]; - parents_v[n][lane].insert(i); - next[n][lane] = true; - } - } + for (auto i = 0; i < v_size; i++) { + if (visit[i].any()) { + for (auto offset = v[i]; offset < v[i + 1]; offset++) { + auto n = e[offset]; + next[n] = next[n] | visit[i]; } } } - for (auto i = 0; i < v_size; i++) { - // next[i] = next[i] & ~seen[i]; + next[i] = next[i] & ~seen[i]; seen[i] = seen[i] | next[i]; change |= next[i].any(); } - return change; } @@ -85,14 +75,10 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, auto dst_data = (int64_t *)vdata_dst.data; // get lowerbound and upperbound - auto &lower_bound = args.data[4]; - auto &upper_bound = args.data[5]; - UnifiedVectorFormat vdata_lower_bound; + auto &upper = args.data[5]; UnifiedVectorFormat vdata_upper_bound; - lower_bound.ToUnifiedFormat(args.size(), vdata_lower_bound); - upper_bound.ToUnifiedFormat(args.size(), vdata_upper_bound); - auto lower_bound_data = (int64_t *)vdata_lower_bound.data; - auto upper_bound_data = (int64_t *)vdata_upper_bound.data; + upper.ToUnifiedFormat(args.size(), vdata_upper_bound); + auto upper_bound = ((int64_t *)vdata_upper_bound.data)[0]; ValidityMask &result_validity = FlatVector::Validity(result); @@ -104,7 +90,6 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, vector> seen(v_size); vector> visit1(v_size); vector> visit2(v_size); - vector>> parents_v(v_size, std::vector>(LANE_LIMIT)); // maps lane to search number short lane_to_num[LANE_LIMIT]; @@ -128,21 +113,11 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, while (started_searches < args.size()) { int64_t search_num = started_searches++; int64_t src_pos = vdata_src.sel->get_index(search_num); - int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (!vdata_src.validity.RowIsValid(src_pos)) { result_validity.SetInvalid(search_num); result_data[search_num] = (int64_t)-1; /* no path */ - } else if (src_data[src_pos] == dst_data[dst_pos]) { - // result_data[search_num] = - // (int64_t)0; // path of length 0 does not require a search - result_data[search_num] = (int64_t)-1; /* no path */ - visit1[src_data[src_pos]][lane] = true; - lane_to_num[lane] = search_num; // active lane - active++; - break; } else { result_data[search_num] = (int64_t)-1; /* initialize to no path */ - seen[src_data[src_pos]][lane] = true; visit1[src_data[src_pos]][lane] = true; lane_to_num[lane] = search_num; // active lane active++; @@ -152,46 +127,24 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, } // make passes while a lane is still active - for (int64_t iter = 1; active; iter++) { - // if (!IterativeLength(v_size, v, e, seen, (iter & 1) ? visit1 : visit2, - // (iter & 1) ? visit2 : visit1)) { - // break; - // } - bool stop = !IterativeLength(v_size, v, e, parents_v, seen, (iter & 1) ? visit1 : visit2, - (iter & 1) ? visit2 : visit1); + for (int64_t iter = 1; active && iter <= upper_bound; iter++) { + if (!IterativeLength(v_size, v, e, seen, (iter & 1) ? visit1 : visit2, + (iter & 1) ? visit2 : visit1)) { + break; + } // detect lanes that finished for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { int64_t search_num = lane_to_num[lane]; if (search_num >= 0) { // active lane int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (seen[dst_data[dst_pos]][lane]){ - - // check if the path length is within bounds - // bound vector is either a constant or a flat vector - if (lower_bound.GetVectorType() == VectorType::CONSTANT_VECTOR ? - iter < lower_bound_data[0] : iter < lower_bound_data[dst_pos]) { - // when reach the destination too early, treat destination as null - // looks like the graph does not have that vertex - seen[dst_data[dst_pos]][lane] = false; - (iter & 1) ? visit2[dst_data[dst_pos]][lane] = false - : visit1[dst_data[dst_pos]][lane] = false; - continue; - } else if (upper_bound.GetVectorType() == VectorType::CONSTANT_VECTOR ? - iter > upper_bound_data[0] : iter > upper_bound_data[dst_pos]) { - result_validity.SetInvalid(search_num); - result_data[search_num] = (int64_t)-1; /* no path */ - } else { - result_data[search_num] = - iter; /* found at iter => iter = path length */ - } + result_data[search_num] = + iter; /* found at iter => iter = path length */ lane_to_num[lane] = -1; // mark inactive active--; } } } - if (stop) { - break; - } } // no changes anymore: any still active searches have no path diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp new file mode 100644 index 00000000..1fe6d52d --- /dev/null +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp @@ -0,0 +1,209 @@ +#include +#include "duckdb/main/client_data.hpp" +#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/duckpgq_functions.hpp" + +namespace duckdb { + +static bool IterativeLengthLowerBound(int64_t v_size, int64_t *v, vector &e, + vector>> &parents_v, + vector> &seen, + vector> &visit, + vector> &next) { + bool change = false; + for (auto i = 0; i < v_size; i++) { + next[i] = 0; + } + + for (auto lane = 0; lane < LANE_LIMIT; lane++) { + for (auto i = 0; i < v_size; i++) { + if (visit[i][lane]) { + for (auto offset = v[i]; offset < v[i + 1]; offset++) { + auto n = e[offset]; + if (seen[n][lane] == false || parents_v[i][lane].find(n) == parents_v[i][lane].end()) { + parents_v[n][lane] = parents_v[i][lane]; + parents_v[n][lane].insert(i); + next[n][lane] = true; + } + } + } + } + } + + for (auto i = 0; i < v_size; i++) { + seen[i] = seen[i] | next[i]; + change |= next[i].any(); + } + + return change; +} + +static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState &state, + Vector &result) { + auto &func_expr = (BoundFunctionExpression &)state.expr; + auto &info = (IterativeLengthFunctionData &)*func_expr.bind_info; + auto duckpgq_state_entry = info.context.registered_state.find("duckpgq"); + if (duckpgq_state_entry == info.context.registered_state.end()) { + //! Wondering how you can get here if the extension wasn't loaded, but + //! leaving this check in anyways + throw MissingExtensionException( + "The DuckPGQ extension has not been loaded"); + } + auto duckpgq_state = + reinterpret_cast(duckpgq_state_entry->second.get()); + + D_ASSERT(duckpgq_state->csr_list[info.csr_id]); + + if ((uint64_t)info.csr_id + 1 > duckpgq_state->csr_list.size()) { + throw ConstraintException("Invalid ID"); + } + auto csr_entry = duckpgq_state->csr_list.find((uint64_t)info.csr_id); + if (csr_entry == duckpgq_state->csr_list.end()) { + throw ConstraintException( + "Need to initialize CSR before doing shortest path"); + } + + if (!(csr_entry->second->initialized_v && csr_entry->second->initialized_e)) { + throw ConstraintException( + "Need to initialize CSR before doing shortest path"); + } + int64_t v_size = args.data[1].GetValue(0).GetValue(); + int64_t *v = (int64_t *)duckpgq_state->csr_list[info.csr_id]->v; + vector &e = duckpgq_state->csr_list[info.csr_id]->e; + + // get src and dst vectors for searches + auto &src = args.data[2]; + auto &dst = args.data[3]; + UnifiedVectorFormat vdata_src; + UnifiedVectorFormat vdata_dst; + src.ToUnifiedFormat(args.size(), vdata_src); + dst.ToUnifiedFormat(args.size(), vdata_dst); + auto src_data = (int64_t *)vdata_src.data; + auto dst_data = (int64_t *)vdata_dst.data; + + // get lowerbound and upperbound + auto &lower = args.data[4]; + auto &upper = args.data[5]; + UnifiedVectorFormat vdata_lower_bound; + UnifiedVectorFormat vdata_upper_bound; + lower.ToUnifiedFormat(args.size(), vdata_lower_bound); + upper.ToUnifiedFormat(args.size(), vdata_upper_bound); + auto lower_bound = ((int64_t *)vdata_lower_bound.data)[0]; + auto upper_bound = ((int64_t *)vdata_upper_bound.data)[0]; + + ValidityMask &result_validity = FlatVector::Validity(result); + + // create result vector + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + + // create temp SIMD arrays + vector> seen(v_size); + vector> visit1(v_size); + vector> visit2(v_size); + vector>> parents_v(v_size, std::vector>(LANE_LIMIT)); + + // maps lane to search number + short lane_to_num[LANE_LIMIT]; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; // inactive + } + + idx_t started_searches = 0; + while (started_searches < args.size()) { + + // empty visit vectors + for (auto i = 0; i < v_size; i++) { + seen[i] = 0; + visit1[i] = 0; + } + + // add search jobs to free lanes + uint64_t active = 0; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; + while (started_searches < args.size()) { + int64_t search_num = started_searches++; + int64_t src_pos = vdata_src.sel->get_index(search_num); + int64_t dst_pos = vdata_dst.sel->get_index(search_num); + if (!vdata_src.validity.RowIsValid(src_pos)) { + result_validity.SetInvalid(search_num); + result_data[search_num] = (int64_t)-1; /* no path */ + } else if (src_data[src_pos] == dst_data[dst_pos]) { + result_data[search_num] = (int64_t)-1; /* no path */ + visit1[src_data[src_pos]][lane] = true; + lane_to_num[lane] = search_num; // active lane + active++; + break; + } else { + result_data[search_num] = (int64_t)-1; /* initialize to no path */ + seen[src_data[src_pos]][lane] = true; + visit1[src_data[src_pos]][lane] = true; + lane_to_num[lane] = search_num; // active lane + active++; + break; + } + } + } + + // make passes while a lane is still active + for (int64_t iter = 1; active && iter <= upper_bound; iter++) { + bool stop = !IterativeLengthLowerBound(v_size, v, e, parents_v, seen, (iter & 1) ? visit1 : visit2, + (iter & 1) ? visit2 : visit1); + // detect lanes that finished + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { // active lane + int64_t dst_pos = vdata_dst.sel->get_index(search_num); + if (seen[dst_data[dst_pos]][lane]){ + + // check if the path length is within bounds + // bound vector is either a constant or a flat vector + if (iter < lower_bound) { + // when reach the destination too early, treat destination as null + // looks like the graph does not have that vertex + seen[dst_data[dst_pos]][lane] = false; + (iter & 1) ? visit2[dst_data[dst_pos]][lane] = false + : visit1[dst_data[dst_pos]][lane] = false; + continue; + } else { + result_data[search_num] = + iter; /* found at iter => iter = path length */ + lane_to_num[lane] = -1; // mark inactive + active--; + } + + } + } + } + if (stop) { + break; + } + } + + // no changes anymore: any still active searches have no path + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { // active lane + result_validity.SetInvalid(search_num); + result_data[search_num] = (int64_t)-1; /* no path */ + lane_to_num[lane] = -1; // mark inactive + } + } + } + duckpgq_state->csr_to_delete.insert(info.csr_id); +} + +CreateScalarFunctionInfo DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { + auto fun = ScalarFunction("iterativelength_lowerbound", + {LogicalType::INTEGER, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::BIGINT, IterativeLengthLowerBoundFunction, + IterativeLengthFunctionData::IterativeLengthBind); + return CreateScalarFunctionInfo(fun); +} + +} // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp index aca4bdab..fd68ca52 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp @@ -13,16 +13,12 @@ namespace duckdb { static bool IterativeLength(int64_t v_size, int64_t *V, vector &E, vector &edge_ids, - vector>> &parents_v, - vector>> &paths_v, - vector>> &paths_e, + vector> &parents_v, + vector> &parents_e, vector> &seen, vector> &visit, vector> &next) { bool change = false; - map, unordered_set> parents_v_cache; - map, vector> paths_v_cache; - map, vector> paths_e_cache; for (auto v = 0; v < v_size; v++) { next[v] = 0; } @@ -32,46 +28,20 @@ static bool IterativeLength(int64_t v_size, int64_t *V, vector &E, for (auto e = V[v]; e < V[v + 1]; e++) { auto n = E[e]; auto edge_id = edge_ids[e]; - - for (auto lane = 0; lane < LANE_LIMIT; lane++) { - if (visit[v][lane]) { - //! If the node has not been visited, then update the parent and edge - if (seen[n][lane] == false || parents_v[v][lane].find(n) == parents_v[v][lane].end()) { - if (visit[n][lane]) { - parents_v_cache[make_pair(n, lane)] = parents_v[v][lane]; - parents_v_cache[make_pair(n, lane)].insert(v); - paths_v_cache[make_pair(n, lane)] = paths_v[v][lane]; - paths_v_cache[make_pair(n, lane)].push_back(v); - paths_e_cache[make_pair(n, lane)] = paths_e[v][lane]; - paths_e_cache[make_pair(n, lane)].push_back(edge_id); - } else { - parents_v[n][lane] = parents_v[v][lane]; - parents_v[n][lane].insert(v); - paths_v[n][lane] = paths_v[v][lane]; - paths_v[n][lane].push_back(v); - paths_e[n][lane] = paths_e[v][lane]; - paths_e[n][lane].push_back(edge_id); - } - next[n][lane] = true; - } - } + next[n] = next[n] | visit[v]; + for (auto l = 0; l < LANE_LIMIT; l++) { + parents_v[n][l] = + ((parents_v[n][l] == -1) && visit[v][l]) ? v : parents_v[n][l]; + parents_e[n][l] = ((parents_e[n][l] == -1) && visit[v][l]) + ? edge_id + : parents_e[n][l]; } } } } - for (auto const& cache: parents_v_cache) { - parents_v[cache.first.first][cache.first.second] = cache.second; - } - for (auto const& cache: paths_v_cache) { - paths_v[cache.first.first][cache.first.second] = cache.second; - } - for (auto const& cache: paths_e_cache) { - paths_e[cache.first.first][cache.first.second] = cache.second; - } - for (auto v = 0; v < v_size; v++) { - // next[v] = next[v] & ~seen[v]; + next[v] = next[v] & ~seen[v]; seen[v] = seen[v] | next[v]; change |= next[v].any(); } @@ -111,14 +81,10 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, auto dst_data = (int64_t *)vdata_dst.data; // get lowerbound and upperbound - auto &lower_bound = args.data[4]; - auto &upper_bound = args.data[5]; - UnifiedVectorFormat vdata_lower_bound; + auto &upper = args.data[5]; UnifiedVectorFormat vdata_upper_bound; - lower_bound.ToUnifiedFormat(args.size(), vdata_lower_bound); - upper_bound.ToUnifiedFormat(args.size(), vdata_upper_bound); - auto lower_bound_data = (int64_t *)vdata_lower_bound.data; - auto upper_bound_data = (int64_t *)vdata_upper_bound.data; + upper.ToUnifiedFormat(args.size(), vdata_upper_bound); + auto upper_bound = ((int64_t *)vdata_upper_bound.data)[0]; result.SetVectorType(VectorType::FLAT_VECTOR); auto result_data = FlatVector::GetData(result); @@ -128,11 +94,10 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, vector> seen(v_size); vector> visit1(v_size); vector> visit2(v_size); - - vector>> parents_v(v_size, std::vector>(LANE_LIMIT)); - vector>> paths_v(v_size, std::vector>(LANE_LIMIT)); - vector>> paths_e(v_size, std::vector>(LANE_LIMIT)); - + vector> parents_v(v_size, + std::vector(LANE_LIMIT, -1)); + vector> parents_e(v_size, + std::vector(LANE_LIMIT, -1)); // maps lane to search number int16_t lane_to_num[LANE_LIMIT]; @@ -157,25 +122,15 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, while (started_searches < args.size()) { int64_t search_num = started_searches++; int64_t src_pos = vdata_src.sel->get_index(search_num); - int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (!vdata_src.validity.RowIsValid(src_pos)) { result_validity.SetInvalid(search_num); - } else if (src_data[src_pos] == dst_data[dst_pos]) { - // unique_ptr output = - // make_uniq(LogicalType::LIST(LogicalType::BIGINT)); - // ListVector::PushBack(*output, src_data[src_pos]); - // ListVector::Append(result, ListVector::GetEntry(*output), - // ListVector::GetListSize(*output)); - // result_data[search_num].length = ListVector::GetListSize(*output); - // result_data[search_num].offset = total_len; - // total_len += result_data[search_num].length; - visit1[src_data[src_pos]][lane] = true; - lane_to_num[lane] = search_num; // active lane - active++; - break; } else { visit1[src_data[src_pos]][lane] = true; - seen[src_data[src_pos]][lane] = true; + parents_v[src_data[src_pos]][lane] = + src_data[src_pos]; // Mark source with source id + parents_e[src_data[src_pos]][lane] = + -2; // Mark the source with -2, there is no incoming edge for the + // source. lane_to_num[lane] = search_num; // active lane active++; break; @@ -184,13 +139,14 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, } //! make passes while a lane is still active - for (int64_t iter = 1; active; iter++) { + for (int64_t iter = 1; active && iter <= upper_bound; iter++) { //! Perform one step of bfs exploration - if (!IterativeLength(v_size, v, e, edge_ids, parents_v, paths_v, paths_e, seen, + if (!IterativeLength(v_size, v, e, edge_ids, parents_v, parents_e, seen, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { break; } + int64_t finished_searches = 0; // detect lanes that finished for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { int64_t search_num = lane_to_num[lane]; @@ -198,47 +154,78 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, //! Check if dst for a source has been seen int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (seen[dst_data[dst_pos]][lane]) { - // check if the path length is within bounds - // bound vector is either a constant or a flat vector - if (lower_bound.GetVectorType() == VectorType::CONSTANT_VECTOR ? - iter < lower_bound_data[0] : iter < lower_bound_data[dst_pos]) { - // when reach the destination too early, treat destination as null - // looks like the graph does not have that vertex - seen[dst_data[dst_pos]][lane] = false; - (iter & 1) ? visit2[dst_data[dst_pos]][lane] = false - : visit1[dst_data[dst_pos]][lane] = false; - continue; - } else if (upper_bound.GetVectorType() == VectorType::CONSTANT_VECTOR ? - iter > upper_bound_data[0] : iter > upper_bound_data[dst_pos]) { - result_validity.SetInvalid(search_num); - } else { - vector output_vector; - auto it_v = paths_v[dst_data[dst_pos]][lane].begin(), - end_v = paths_v[dst_data[dst_pos]][lane].end(); - auto it_e = paths_e[dst_data[dst_pos]][lane].begin(), - end_e = paths_e[dst_data[dst_pos]][lane].end(); - while (it_v != end_v && it_e != end_e) { - output_vector.push_back(*it_v); - output_vector.push_back(*it_e); - it_v++; - it_e++; - } - output_vector.push_back(dst_data[dst_pos]); - auto output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); - for (auto val : output_vector) { - Value value_to_insert = val; - ListVector::PushBack(*output, value_to_insert); - } - result_data[search_num].length = ListVector::GetListSize(*output); - result_data[search_num].offset = total_len; - ListVector::Append(result, ListVector::GetEntry(*output), - ListVector::GetListSize(*output)); - total_len += result_data[search_num].length; - } - lane_to_num[lane] = -1; // mark inactive + finished_searches++; } } } + if (finished_searches == LANE_LIMIT) { + break; + } + } + //! Reconstruct the paths + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num == -1) { // empty lanes + continue; + } + + //! Searches that have stopped have found a path + int64_t src_pos = vdata_src.sel->get_index(search_num); + int64_t dst_pos = vdata_dst.sel->get_index(search_num); + if (src_data[src_pos] == dst_data[dst_pos]) { // Source == destination + unique_ptr output = + make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + ListVector::PushBack(*output, src_data[src_pos]); + ListVector::Append(result, ListVector::GetEntry(*output), + ListVector::GetListSize(*output)); + result_data[search_num].length = ListVector::GetListSize(*output); + result_data[search_num].offset = total_len; + total_len += result_data[search_num].length; + continue; + } + std::vector output_vector; + std::vector output_edge; + auto source_v = src_data[src_pos]; // Take the source + + auto parent_vertex = + parents_v[dst_data[dst_pos]] + [lane]; // Take the parent vertex of the destination vertex + auto parent_edge = + parents_e[dst_data[dst_pos]] + [lane]; // Take the parent edge of the destination vertex + + output_vector.push_back(dst_data[dst_pos]); // Add destination vertex + output_vector.push_back(parent_edge); + while (parent_vertex != source_v) { // Continue adding vertices until we + // have reached the source vertex + //! -1 is used to signify no parent + if (parent_vertex == -1 || + parent_vertex == parents_v[parent_vertex][lane]) { + result_validity.SetInvalid(search_num); + break; + } + output_vector.push_back(parent_vertex); + parent_edge = parents_e[parent_vertex][lane]; + parent_vertex = parents_v[parent_vertex][lane]; + output_vector.push_back(parent_edge); + } + + if (!result_validity.RowIsValid(search_num)) { + continue; + } + output_vector.push_back(source_v); + std::reverse(output_vector.begin(), output_vector.end()); + auto output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + for (auto val : output_vector) { + Value value_to_insert = val; + ListVector::PushBack(*output, value_to_insert); + } + + result_data[search_num].length = ListVector::GetListSize(*output); + result_data[search_num].offset = total_len; + ListVector::Append(result, ListVector::GetEntry(*output), + ListVector::GetListSize(*output)); + total_len += result_data[search_num].length; } } duckpgq_state->csr_to_delete.insert(info.csr_id); diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp new file mode 100644 index 00000000..63daba93 --- /dev/null +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp @@ -0,0 +1,254 @@ +#include "duckdb/common/fstream.hpp" +#include "duckdb/common/profiler.hpp" +#include "duckdb/main/client_data.hpp" +#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/duckpgq_functions.hpp" + +#include +#include + +namespace duckdb { + +static bool IterativeLengthLowerBound(int64_t v_size, int64_t *V, vector &E, + vector &edge_ids, + vector>> &parents_v, + vector>> &paths_v, + vector>> &paths_e, + vector> &seen, + vector> &visit, + vector> &next) { + bool change = false; + map, unordered_set> parents_v_cache; + map, vector> paths_v_cache; + map, vector> paths_e_cache; + for (auto v = 0; v < v_size; v++) { + next[v] = 0; + } + //! Keep track of edge id through which the node was reached + for (auto v = 0; v < v_size; v++) { + if (visit[v].any()) { + for (auto e = V[v]; e < V[v + 1]; e++) { + auto n = E[e]; + auto edge_id = edge_ids[e]; + + for (auto lane = 0; lane < LANE_LIMIT; lane++) { + if (visit[v][lane]) { + //! If the node has not been visited, then update the parent and edge + if (seen[n][lane] == false || parents_v[v][lane].find(n) == parents_v[v][lane].end()) { + if (visit[n][lane]) { + parents_v_cache[make_pair(n, lane)] = parents_v[v][lane]; + parents_v_cache[make_pair(n, lane)].insert(v); + paths_v_cache[make_pair(n, lane)] = paths_v[v][lane]; + paths_v_cache[make_pair(n, lane)].push_back(v); + paths_e_cache[make_pair(n, lane)] = paths_e[v][lane]; + paths_e_cache[make_pair(n, lane)].push_back(edge_id); + } else { + parents_v[n][lane] = parents_v[v][lane]; + parents_v[n][lane].insert(v); + paths_v[n][lane] = paths_v[v][lane]; + paths_v[n][lane].push_back(v); + paths_e[n][lane] = paths_e[v][lane]; + paths_e[n][lane].push_back(edge_id); + } + next[n][lane] = true; + } + } + } + } + } + } + + for (auto const& cache: parents_v_cache) { + parents_v[cache.first.first][cache.first.second] = cache.second; + } + for (auto const& cache: paths_v_cache) { + paths_v[cache.first.first][cache.first.second] = cache.second; + } + for (auto const& cache: paths_e_cache) { + paths_e[cache.first.first][cache.first.second] = cache.second; + } + + for (auto v = 0; v < v_size; v++) { + seen[v] = seen[v] | next[v]; + change |= next[v].any(); + } + return change; +} + +static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &state, + Vector &result) { + auto &func_expr = (BoundFunctionExpression &)state.expr; + auto &info = (IterativeLengthFunctionData &)*func_expr.bind_info; + auto duckpgq_state_entry = info.context.registered_state.find("duckpgq"); + if (duckpgq_state_entry == info.context.registered_state.end()) { + //! Wondering how you can get here if the extension wasn't loaded, but + //! leaving this check in anyways + throw MissingExtensionException( + "The DuckPGQ extension has not been loaded"); + } + auto duckpgq_state = + reinterpret_cast(duckpgq_state_entry->second.get()); + + D_ASSERT(duckpgq_state->csr_list[info.csr_id]); + int32_t id = args.data[0].GetValue(0).GetValue(); + int64_t v_size = args.data[1].GetValue(0).GetValue(); + + int64_t *v = (int64_t *)duckpgq_state->csr_list[id]->v; + vector &e = duckpgq_state->csr_list[id]->e; + vector &edge_ids = duckpgq_state->csr_list[id]->edge_ids; + + auto &src = args.data[2]; + auto &target = args.data[3]; + + UnifiedVectorFormat vdata_src, vdata_dst; + src.ToUnifiedFormat(args.size(), vdata_src); + target.ToUnifiedFormat(args.size(), vdata_dst); + + auto src_data = (int64_t *)vdata_src.data; + auto dst_data = (int64_t *)vdata_dst.data; + + // get lowerbound and upperbound + auto &lower = args.data[4]; + auto &upper = args.data[5]; + UnifiedVectorFormat vdata_lower_bound; + UnifiedVectorFormat vdata_upper_bound; + lower.ToUnifiedFormat(args.size(), vdata_lower_bound); + upper.ToUnifiedFormat(args.size(), vdata_upper_bound); + auto lower_bound = ((int64_t *)vdata_lower_bound.data)[0]; + auto upper_bound = ((int64_t *)vdata_upper_bound.data)[0]; + + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + ValidityMask &result_validity = FlatVector::Validity(result); + + // create temp SIMD arrays + vector> seen(v_size); + vector> visit1(v_size); + vector> visit2(v_size); + + vector>> parents_v(v_size, std::vector>(LANE_LIMIT)); + vector>> paths_v(v_size, std::vector>(LANE_LIMIT)); + vector>> paths_e(v_size, std::vector>(LANE_LIMIT)); + + + // maps lane to search number + int16_t lane_to_num[LANE_LIMIT]; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; // inactive + } + int64_t total_len = 0; + + idx_t started_searches = 0; + while (started_searches < args.size()) { + + // empty visit vectors + for (auto i = 0; i < v_size; i++) { + seen[i] = 0; + visit1[i] = 0; + } + + // add search jobs to free lanes + uint64_t active = 0; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; + while (started_searches < args.size()) { + int64_t search_num = started_searches++; + int64_t src_pos = vdata_src.sel->get_index(search_num); + int64_t dst_pos = vdata_dst.sel->get_index(search_num); + if (!vdata_src.validity.RowIsValid(src_pos)) { + result_validity.SetInvalid(search_num); + } else if (src_data[src_pos] == dst_data[dst_pos]) { + visit1[src_data[src_pos]][lane] = true; + lane_to_num[lane] = search_num; // active lane + active++; + break; + } else { + visit1[src_data[src_pos]][lane] = true; + seen[src_data[src_pos]][lane] = true; + lane_to_num[lane] = search_num; // active lane + active++; + break; + } + } + } + + //! make passes while a lane is still active + for (int64_t iter = 1; active && iter <= upper_bound; iter++) { + //! Perform one step of bfs exploration + if (!IterativeLengthLowerBound(v_size, v, e, edge_ids, parents_v, paths_v, paths_e, seen, + (iter & 1) ? visit1 : visit2, + (iter & 1) ? visit2 : visit1)) { + break; + } + // detect lanes that finished + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { // active lane + //! Check if dst for a source has been seen + int64_t dst_pos = vdata_dst.sel->get_index(search_num); + if (seen[dst_data[dst_pos]][lane]) { + // check if the path length is within bounds + // bound vector is either a constant or a flat vector + if (iter < lower_bound) { + // when reach the destination too early, treat destination as null + // looks like the graph does not have that vertex + seen[dst_data[dst_pos]][lane] = false; + (iter & 1) ? visit2[dst_data[dst_pos]][lane] = false + : visit1[dst_data[dst_pos]][lane] = false; + continue; + } else { + vector output_vector; + auto it_v = paths_v[dst_data[dst_pos]][lane].begin(), + end_v = paths_v[dst_data[dst_pos]][lane].end(); + auto it_e = paths_e[dst_data[dst_pos]][lane].begin(), + end_e = paths_e[dst_data[dst_pos]][lane].end(); + while (it_v != end_v && it_e != end_e) { + output_vector.push_back(*it_v); + output_vector.push_back(*it_e); + it_v++; + it_e++; + } + output_vector.push_back(dst_data[dst_pos]); + auto output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + for (auto val : output_vector) { + Value value_to_insert = val; + ListVector::PushBack(*output, value_to_insert); + } + result_data[search_num].length = ListVector::GetListSize(*output); + result_data[search_num].offset = total_len; + ListVector::Append(result, ListVector::GetEntry(*output), + ListVector::GetListSize(*output)); + total_len += result_data[search_num].length; + lane_to_num[lane] = -1; // mark inactive + } + } + } + } + } + + // no changes anymore: any still active searches have no path + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { // active lane + result_validity.SetInvalid(search_num); + lane_to_num[lane] = -1; // mark inactive + } + } + } + duckpgq_state->csr_to_delete.insert(info.csr_id); +} + +CreateScalarFunctionInfo DuckPGQFunctions::GetShortestPathLowerBoundFunction() { + auto fun = ScalarFunction("shortestpath_lowerbound", + {LogicalType::INTEGER, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::LIST(LogicalType::BIGINT), + ShortestPathLowerBoundFunction, + IterativeLengthFunctionData::IterativeLengthBind); + return CreateScalarFunctionInfo(fun); +} + +} // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 1c6cb2bb..f098e330 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -541,7 +541,8 @@ unique_ptr PGQMatchFunction::CreatePathFindingFunction( make_uniq(Value::INTEGER(static_cast(edge_subpath->upper)))); auto shortest_path_function = make_uniq( - "shortestpath", std::move(pathfinding_children)); + edge_subpath->lower > 1 ? "shortestpath_lowerbound": "shortestpath", + std::move(pathfinding_children)); if (!final_list) { final_list = std::move(shortest_path_function); @@ -679,7 +680,8 @@ void PGQMatchFunction::AddPathFinding( make_uniq(Value::INTEGER(static_cast(subpath->upper)))); auto reachability_function = make_uniq( - "iterativelength", std::move(pathfinding_children)); + subpath->lower > 1 ? "iterativelength_lowerbound": "iterativelength", + std::move(pathfinding_children)); auto cte_col_ref = make_uniq("temp", "__x"); diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 25d62a15..e67934e2 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -76,7 +76,7 @@ query IIIIIII 28587302322223 [0, 1, 26, 64, 33, 78, 38] 3 [0, 26, 33, 38] [1, 64, 78] 14 24189255811081 30786325577731 [0, 1, 26, 64, 33, 79, 39] 3 [0, 26, 33, 39] [1, 64, 79] 14 24189255811081 32985348833329 [0, 1, 26, 64, 33, 80, 43] 3 [0, 26, 33, 43] [1, 64, 80] 14 24189255811081 -35184372088850 [0, 1, 26, 64, 33, 77, 36, 82, 45] 4 [0, 26, 33, 36, 45] [1, 64, 77, 82] 14 24189255811081 +35184372088850 [0, 1, 26, 63, 32, 76, 36, 82, 45] 4 [0, 26, 32, 36, 45] [1, 63, 76, 82] 14 24189255811081 28587302322204 [0, 2, 32, 75, 33, 77, 36] 3 [0, 32, 33, 36] [2, 75, 77] 14 26388279066668 28587302322223 [0, 2, 32, 75, 33, 78, 38] 3 [0, 32, 33, 38] [2, 75, 78] 14 26388279066668 30786325577731 [0, 2, 32, 75, 33, 79, 39] 3 [0, 32, 33, 39] [2, 75, 79] 14 26388279066668 @@ -366,5 +366,5 @@ query III [1, 3, 5, 27, 40] 16 30786325577740 [1, 5, 33, 80, 43] 16 32985348833329 [1, 3, 5, 22, 26, 66, 44] 16 35184372088834 -[1, 6, 36, 82, 45] 16 35184372088850 +[1, 3, 5, 28, 45] 16 35184372088850 [1, 3, 5, 23, 31, 74, 46] 16 35184372088856 diff --git a/test/sql/path-finding/shortest_path.test b/test/sql/path-finding/shortest_path.test index 7bfa70a8..3969b411 100644 --- a/test/sql/path-finding/shortest_path.test +++ b/test/sql/path-finding/shortest_path.test @@ -116,9 +116,9 @@ WITH cte1 AS ( FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst -) SELECT shortestpath(0, (select count(*) from student), a.rowid, b.rowid, 2, 3) as path, a.name as a_name, b.name as b_name +) SELECT shortestpath_lowerbound(0, (select count(*) from student), a.rowid, b.rowid, 2, 3) as path, a.name as a_name, b.name as b_name FROM student a, student b, (select count(cte1.temp) * 0 as temp from cte1) __x - WHERE __x.temp * 0 + iterativelength(0, (select count(*) from student), a.rowid, b.rowid, 2, 3) + WHERE __x.temp * 0 + iterativelength_lowerbound(0, (select count(*) from student), a.rowid, b.rowid, 2, 3) ORDER BY a.name, b.name; ---- [0, 2, 3, 3, 0] Daniel Daniel diff --git a/test/sql/path-finding/shortest_path_bound.test b/test/sql/path-finding/shortest_path_bound.test index 013f6c50..97ee93ba 100644 --- a/test/sql/path-finding/shortest_path_bound.test +++ b/test/sql/path-finding/shortest_path_bound.test @@ -53,9 +53,9 @@ WITH cte1 AS ( FROM Know k JOIN Point a on a.id = k.src JOIN Point c on c.id = k.dst -) SELECT a.id as srd_id, b.id as dst_id, iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 2, 3) as path_length +) SELECT a.id as srd_id, b.id as dst_id, iterativelength_lowerbound(0, (select count(*) from Point), a.rowid, b.rowid, 2, 3) as path_length FROM Point a, Point b, (select count(cte1.temp) * 0 as temp from cte1) __x - WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 2, 3); + WHERE a.id = 0 and __x.temp * 0 + iterativelength_lowerbound(0, (select count(*) from Point), a.rowid, b.rowid, 2, 3); ---- 0 1 3 0 3 2 @@ -139,9 +139,9 @@ WITH cte1 AS ( FROM know2 k JOIN Point2 a on a.id = k.src JOIN Point2 c on c.id = k.dst -) SELECT a.id, b.id, iterativelength(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 30) as path_length +) SELECT a.id, b.id, iterativelength_lowerbound(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 30) as path_length FROM Point2 a, Point2 b, (select count(cte1.temp) * 0 as temp from cte1) __x - WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 30); + WHERE a.id = 0 and __x.temp * 0 + iterativelength_lowerbound(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 30); ---- 0 2 2 @@ -192,9 +192,9 @@ WITH cte1 AS ( FROM know3 k JOIN Point3 a on a.id = k.src JOIN Point3 c on c.id = k.dst -) SELECT a.id, b.id, iterativelength(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3) as path_length +) SELECT a.id, b.id, iterativelength_lowerbound(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3) as path_length FROM Point3 a, Point3 b, (select count(cte1.temp) * 0 as temp from cte1) __x - WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3); + WHERE a.id = 0 and __x.temp * 0 + iterativelength_lowerbound(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3); ---- 0 0 2 From 90c608537c3d65d4c4888c29d8d3dbf2db25edad Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Tue, 20 Feb 2024 21:12:19 +0100 Subject: [PATCH 13/39] Keep the original algorithm consistent --- duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp | 2 +- duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp index 7303ec22..10ab0113 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp @@ -137,7 +137,7 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, int64_t search_num = lane_to_num[lane]; if (search_num >= 0) { // active lane int64_t dst_pos = vdata_dst.sel->get_index(search_num); - if (seen[dst_data[dst_pos]][lane]){ + if (seen[dst_data[dst_pos]][lane]) { result_data[search_num] = iter; /* found at iter => iter = path length */ lane_to_num[lane] = -1; // mark inactive diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp index fd68ca52..7f857204 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp @@ -97,7 +97,7 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, vector> parents_v(v_size, std::vector(LANE_LIMIT, -1)); vector> parents_e(v_size, - std::vector(LANE_LIMIT, -1)); + std::vector(LANE_LIMIT, -1)); // maps lane to search number int16_t lane_to_num[LANE_LIMIT]; @@ -113,6 +113,10 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, for (auto i = 0; i < v_size; i++) { seen[i] = 0; visit1[i] = 0; + for (auto j = 0; j < LANE_LIMIT; j++) { + parents_v[i][j] = -1; + parents_e[i][j] = -1; + } } // add search jobs to free lanes From 03416347bcfa8dccd8e6916c59ae019dca9ccaa1 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 21 Feb 2024 10:41:34 +0100 Subject: [PATCH 14/39] Format fix --- .../duckpgq/functions/scalar/csr_creation.cpp | 9 ++- .../scalar/iterativelength_lowerbound.cpp | 47 ++++++------ .../duckpgq/functions/scalar/reachability.cpp | 3 +- .../functions/scalar/shortest_path.cpp | 13 ++-- .../scalar/shortest_path_lowerbound.cpp | 74 ++++++++++--------- .../tablefunctions/create_property_graph.cpp | 58 ++++++++++----- .../functions/tablefunctions/match.cpp | 29 ++++---- duckpgq/src/duckpgq_extension.cpp | 5 +- 8 files changed, 138 insertions(+), 100 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/csr_creation.cpp b/duckpgq/src/duckpgq/functions/scalar/csr_creation.cpp index 14f954ef..c9c5180a 100644 --- a/duckpgq/src/duckpgq/functions/scalar/csr_creation.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/csr_creation.cpp @@ -36,7 +36,8 @@ static void CsrInitializeVertex(DuckPGQState &context, int32_t id, csr->initialized_v = true; context.csr_list[id] = std::move(csr); } catch (std::bad_alloc const &) { - throw Exception(ExceptionType::INTERNAL, "Unable to initialize vector of size for csr vertex table " + throw Exception(ExceptionType::INTERNAL, + "Unable to initialize vector of size for csr vertex table " "representation"); } @@ -55,7 +56,8 @@ static void CsrInitializeEdge(DuckPGQState &context, int32_t id, int64_t v_size, csr_entry->second->e.resize(e_size, 0); csr_entry->second->edge_ids.resize(e_size, 0); } catch (std::bad_alloc const &) { - throw Exception(ExceptionType::INTERNAL, "Unable to initialize vector of size for csr edge table " + throw Exception(ExceptionType::INTERNAL, + "Unable to initialize vector of size for csr edge table " "representation"); } for (auto i = 1; i < v_size + 2; i++) { @@ -82,7 +84,8 @@ static void CsrInitializeWeight(DuckPGQState &context, int32_t id, throw NotImplementedException("Unrecognized weight type detected."); } } catch (std::bad_alloc const &) { - throw Exception(ExceptionType::INTERNAL, "Unable to initialize vector of size for csr weight table " + throw Exception(ExceptionType::INTERNAL, + "Unable to initialize vector of size for csr weight table " "representation"); } diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp index 1fe6d52d..70f394a0 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp @@ -7,11 +7,12 @@ namespace duckdb { -static bool IterativeLengthLowerBound(int64_t v_size, int64_t *v, vector &e, - vector>> &parents_v, - vector> &seen, - vector> &visit, - vector> &next) { +static bool +IterativeLengthLowerBound(int64_t v_size, int64_t *v, vector &e, + vector>> &parents_v, + vector> &seen, + vector> &visit, + vector> &next) { bool change = false; for (auto i = 0; i < v_size; i++) { next[i] = 0; @@ -22,7 +23,8 @@ static bool IterativeLengthLowerBound(int64_t v_size, int64_t *v, vector> seen(v_size); vector> visit1(v_size); vector> visit2(v_size); - vector>> parents_v(v_size, std::vector>(LANE_LIMIT)); + vector>> parents_v( + v_size, std::vector>(LANE_LIMIT)); // maps lane to search number short lane_to_num[LANE_LIMIT]; @@ -150,14 +154,15 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & // make passes while a lane is still active for (int64_t iter = 1; active && iter <= upper_bound; iter++) { - bool stop = !IterativeLengthLowerBound(v_size, v, e, parents_v, seen, (iter & 1) ? visit1 : visit2, - (iter & 1) ? visit2 : visit1); + bool stop = !IterativeLengthLowerBound(v_size, v, e, parents_v, seen, + (iter & 1) ? visit1 : visit2, + (iter & 1) ? visit2 : visit1); // detect lanes that finished for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { int64_t search_num = lane_to_num[lane]; if (search_num >= 0) { // active lane int64_t dst_pos = vdata_dst.sel->get_index(search_num); - if (seen[dst_data[dst_pos]][lane]){ + if (seen[dst_data[dst_pos]][lane]) { // check if the path length is within bounds // bound vector is either a constant or a flat vector @@ -174,7 +179,6 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & lane_to_num[lane] = -1; // mark inactive active--; } - } } } @@ -196,13 +200,14 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & duckpgq_state->csr_to_delete.insert(info.csr_id); } -CreateScalarFunctionInfo DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { - auto fun = ScalarFunction("iterativelength_lowerbound", - {LogicalType::INTEGER, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT}, - LogicalType::BIGINT, IterativeLengthLowerBoundFunction, - IterativeLengthFunctionData::IterativeLengthBind); +CreateScalarFunctionInfo +DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { + auto fun = ScalarFunction( + "iterativelength_lowerbound", + {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::BIGINT, IterativeLengthLowerBoundFunction, + IterativeLengthFunctionData::IterativeLengthBind); return CreateScalarFunctionInfo(fun); } diff --git a/duckpgq/src/duckpgq/functions/scalar/reachability.cpp b/duckpgq/src/duckpgq/functions/scalar/reachability.cpp index 5c6443fb..1408bd60 100644 --- a/duckpgq/src/duckpgq/functions/scalar/reachability.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/reachability.cpp @@ -245,7 +245,8 @@ static void ReachabilityFunction(DataChunk &args, ExpressionState &state, break; } default: - throw Exception(ExceptionType::INTERNAL, "Unknown reachability mode encountered"); + throw Exception(ExceptionType::INTERNAL, + "Unknown reachability mode encountered"); } } else { exit_early = BfsWithoutArray(exit_early, csr, input_size, seen, visit, diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp index 7f857204..19a43ec5 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path.cpp @@ -236,13 +236,12 @@ static void ShortestPathFunction(DataChunk &args, ExpressionState &state, } CreateScalarFunctionInfo DuckPGQFunctions::GetShortestPathFunction() { - auto fun = ScalarFunction("shortestpath", - {LogicalType::INTEGER, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT}, - LogicalType::LIST(LogicalType::BIGINT), - ShortestPathFunction, - IterativeLengthFunctionData::IterativeLengthBind); + auto fun = ScalarFunction( + "shortestpath", + {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::LIST(LogicalType::BIGINT), ShortestPathFunction, + IterativeLengthFunctionData::IterativeLengthBind); return CreateScalarFunctionInfo(fun); } diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp index 63daba93..af6e592a 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp @@ -11,14 +11,15 @@ namespace duckdb { -static bool IterativeLengthLowerBound(int64_t v_size, int64_t *V, vector &E, - vector &edge_ids, - vector>> &parents_v, - vector>> &paths_v, - vector>> &paths_e, - vector> &seen, - vector> &visit, - vector> &next) { +static bool +IterativeLengthLowerBound(int64_t v_size, int64_t *V, vector &E, + vector &edge_ids, + vector>> &parents_v, + vector>> &paths_v, + vector>> &paths_e, + vector> &seen, + vector> &visit, + vector> &next) { bool change = false; map, unordered_set> parents_v_cache; map, vector> paths_v_cache; @@ -35,8 +36,10 @@ static bool IterativeLengthLowerBound(int64_t v_size, int64_t *V, vector> visit1(v_size); vector> visit2(v_size); - vector>> parents_v(v_size, std::vector>(LANE_LIMIT)); - vector>> paths_v(v_size, std::vector>(LANE_LIMIT)); - vector>> paths_e(v_size, std::vector>(LANE_LIMIT)); - + vector>> parents_v( + v_size, std::vector>(LANE_LIMIT)); + vector>> paths_v( + v_size, std::vector>(LANE_LIMIT)); + vector>> paths_e( + v_size, std::vector>(LANE_LIMIT)); // maps lane to search number int16_t lane_to_num[LANE_LIMIT]; @@ -177,9 +183,9 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta //! make passes while a lane is still active for (int64_t iter = 1; active && iter <= upper_bound; iter++) { //! Perform one step of bfs exploration - if (!IterativeLengthLowerBound(v_size, v, e, edge_ids, parents_v, paths_v, paths_e, seen, - (iter & 1) ? visit1 : visit2, - (iter & 1) ? visit2 : visit1)) { + if (!IterativeLengthLowerBound( + v_size, v, e, edge_ids, parents_v, paths_v, paths_e, seen, + (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { break; } // detect lanes that finished @@ -203,7 +209,7 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta auto it_v = paths_v[dst_data[dst_pos]][lane].begin(), end_v = paths_v[dst_data[dst_pos]][lane].end(); auto it_e = paths_e[dst_data[dst_pos]][lane].begin(), - end_e = paths_e[dst_data[dst_pos]][lane].end(); + end_e = paths_e[dst_data[dst_pos]][lane].end(); while (it_v != end_v && it_e != end_e) { output_vector.push_back(*it_v); output_vector.push_back(*it_e); @@ -211,7 +217,8 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta it_e++; } output_vector.push_back(dst_data[dst_pos]); - auto output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + auto output = + make_uniq(LogicalType::LIST(LogicalType::BIGINT)); for (auto val : output_vector) { Value value_to_insert = val; ListVector::PushBack(*output, value_to_insert); @@ -219,7 +226,7 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta result_data[search_num].length = ListVector::GetListSize(*output); result_data[search_num].offset = total_len; ListVector::Append(result, ListVector::GetEntry(*output), - ListVector::GetListSize(*output)); + ListVector::GetListSize(*output)); total_len += result_data[search_num].length; lane_to_num[lane] = -1; // mark inactive } @@ -233,7 +240,7 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta int64_t search_num = lane_to_num[lane]; if (search_num >= 0) { // active lane result_validity.SetInvalid(search_num); - lane_to_num[lane] = -1; // mark inactive + lane_to_num[lane] = -1; // mark inactive } } } @@ -241,13 +248,12 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta } CreateScalarFunctionInfo DuckPGQFunctions::GetShortestPathLowerBoundFunction() { - auto fun = ScalarFunction("shortestpath_lowerbound", - {LogicalType::INTEGER, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT}, - LogicalType::LIST(LogicalType::BIGINT), - ShortestPathLowerBoundFunction, - IterativeLengthFunctionData::IterativeLengthBind); + auto fun = ScalarFunction( + "shortestpath_lowerbound", + {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::LIST(LogicalType::BIGINT), ShortestPathLowerBoundFunction, + IterativeLengthFunctionData::IterativeLengthBind); return CreateScalarFunctionInfo(fun); } diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/create_property_graph.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/create_property_graph.cpp index c8ce874e..58db4769 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/create_property_graph.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/create_property_graph.cpp @@ -7,15 +7,17 @@ void CreatePropertyGraphFunction::CheckPropertyGraphTableLabels( const shared_ptr &pg_table, TableCatalogEntry &table) { if (!pg_table->discriminator.empty()) { if (!table.ColumnExists(pg_table->discriminator)) { - throw Exception(ExceptionType::INVALID, "Column " + pg_table->discriminator + - " not found in table " + pg_table->table_name); + throw Exception(ExceptionType::INVALID, + "Column " + pg_table->discriminator + + " not found in table " + pg_table->table_name); } auto &column = table.GetColumn(pg_table->discriminator); if (!(column.GetType() == LogicalType::BIGINT || column.GetType() == LogicalType::INTEGER)) { - throw Exception(ExceptionType::INVALID, "The discriminator column " + - pg_table->discriminator + " of table " + - pg_table->table_name + " should be of type BIGINT or INTEGER"); + throw Exception(ExceptionType::INVALID, + "The discriminator column " + pg_table->discriminator + + " of table " + pg_table->table_name + + " should be of type BIGINT or INTEGER"); } } } @@ -29,8 +31,9 @@ void CreatePropertyGraphFunction::CheckPropertyGraphTableColumns( if (pg_table->all_columns) { for (auto &except_column : pg_table->except_columns) { if (!table.ColumnExists(except_column)) { - throw Exception(ExceptionType::INVALID, "Except column " + except_column + - " not found in table " + pg_table->table_name); + throw Exception(ExceptionType::INVALID, + "Except column " + except_column + + " not found in table " + pg_table->table_name); } } @@ -50,13 +53,13 @@ void CreatePropertyGraphFunction::CheckPropertyGraphTableColumns( for (auto &column : pg_table->column_names) { if (!table.ColumnExists(column)) { throw Exception(ExceptionType::INVALID, "Column " + column + - " not found in table " + pg_table->table_name); + " not found in table " + + pg_table->table_name); } } } -unique_ptr -CreatePropertyGraphFunction::CreatePropertyGraphBind( +unique_ptr CreatePropertyGraphFunction::CreatePropertyGraphBind( ClientContext &context, TableFunctionBindInput &input, vector &return_types, vector &names) { names.emplace_back("Success"); @@ -80,7 +83,9 @@ CreatePropertyGraphFunction::CreatePropertyGraphBind( duckpgq_state->registered_property_graphs.find(info->property_graph_name); if (pg_table != duckpgq_state->registered_property_graphs.end()) { - throw Exception(ExceptionType::INVALID, "Property graph table with name " + info->property_graph_name + " already exists"); + throw Exception(ExceptionType::INVALID, "Property graph table with name " + + info->property_graph_name + + " already exists"); } auto &catalog = Catalog::GetCatalog(context, info->catalog); @@ -107,20 +112,26 @@ CreatePropertyGraphFunction::CreatePropertyGraphBind( if (v_table_names.find(edge_table->source_reference) == v_table_names.end()) { - throw Exception(ExceptionType::INVALID, "Referenced vertex table " + edge_table->source_reference + " does not exist."); + throw Exception(ExceptionType::INVALID, "Referenced vertex table " + + edge_table->source_reference + + " does not exist."); } auto &pk_source_table = catalog.GetEntry( context, info->schema, edge_table->source_reference); for (auto &pk : edge_table->source_pk) { if (!pk_source_table.ColumnExists(pk)) { - throw Exception(ExceptionType::INVALID, "Primary key " + pk + " does not exist in table " + edge_table->source_reference); + throw Exception(ExceptionType::INVALID, + "Primary key " + pk + " does not exist in table " + + edge_table->source_reference); } } if (v_table_names.find(edge_table->source_reference) == v_table_names.end()) { - throw Exception(ExceptionType::INVALID, "Referenced vertex table " + edge_table->source_reference + " does not exist"); + throw Exception(ExceptionType::INVALID, "Referenced vertex table " + + edge_table->source_reference + + " does not exist"); } auto &pk_destination_table = catalog.GetEntry( @@ -128,19 +139,25 @@ CreatePropertyGraphFunction::CreatePropertyGraphBind( for (auto &pk : edge_table->destination_pk) { if (!pk_destination_table.ColumnExists(pk)) { - throw Exception(ExceptionType::INVALID,"Primary key " + pk + " does not exist in table " + edge_table->destination_reference); + throw Exception(ExceptionType::INVALID, + "Primary key " + pk + " does not exist in table " + + edge_table->destination_reference); } } for (auto &fk : edge_table->source_fk) { if (!table.ColumnExists(fk)) { - throw Exception(ExceptionType::INVALID,"Foreign key " + fk + " does not exist in table " + edge_table->table_name); + throw Exception(ExceptionType::INVALID, + "Foreign key " + fk + " does not exist in table " + + edge_table->table_name); } } for (auto &fk : edge_table->destination_fk) { if (!table.ColumnExists(fk)) { - throw Exception(ExceptionType::INVALID,"Foreign key " + fk + " does not exist in table " + edge_table->table_name); + throw Exception(ExceptionType::INVALID, + "Foreign key " + fk + " does not exist in table " + + edge_table->table_name); } } } @@ -160,7 +177,8 @@ void CreatePropertyGraphFunction::CreatePropertyGraphFunc( auto pg_info = bind_data.create_pg_info; auto lookup = context.registered_state.find("duckpgq"); if (lookup == context.registered_state.end()) { - throw Exception(ExceptionType::INVALID,"Registered DuckPGQ state not found"); + throw Exception(ExceptionType::INVALID, + "Registered DuckPGQ state not found"); } auto duckpgq_state = (DuckPGQState *)lookup->second.get(); auto pg_lookup = duckpgq_state->registered_property_graphs.find( @@ -169,7 +187,9 @@ void CreatePropertyGraphFunction::CreatePropertyGraphFunc( duckpgq_state->registered_property_graphs[pg_info->property_graph_name] = pg_info->Copy(); } else { - throw Exception(ExceptionType::INVALID,"A property graph with name " + pg_info->property_graph_name + " already exists."); + throw Exception(ExceptionType::INVALID, "A property graph with name " + + pg_info->property_graph_name + + " already exists."); } } }; // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index f098e330..d08c566e 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -31,8 +31,10 @@ PGQMatchFunction::FindGraphTable(const string &label, CreatePropertyGraphInfo &pg_table) { const auto graph_table_entry = pg_table.label_map.find(label); if (graph_table_entry == pg_table.label_map.end()) { - throw Exception(ExceptionType::BINDER, "The label " + label + - " is not registered in property graph " + pg_table.property_graph_name); + throw Exception(ExceptionType::BINDER, + "The label " + label + + " is not registered in property graph " + + pg_table.property_graph_name); } return graph_table_entry->second; @@ -535,13 +537,14 @@ unique_ptr PGQMatchFunction::CreatePathFindingFunction( edge_table, previous_vertex_element->variable_binding))); pathfinding_children.push_back(std::move(src_row_id)); pathfinding_children.push_back(std::move(dst_row_id)); - pathfinding_children.push_back( - make_uniq(Value::INTEGER(static_cast(edge_subpath->lower)))); - pathfinding_children.push_back( - make_uniq(Value::INTEGER(static_cast(edge_subpath->upper)))); + pathfinding_children.push_back(make_uniq( + Value::INTEGER(static_cast(edge_subpath->lower)))); + pathfinding_children.push_back(make_uniq( + Value::INTEGER(static_cast(edge_subpath->upper)))); auto shortest_path_function = make_uniq( - edge_subpath->lower > 1 ? "shortestpath_lowerbound": "shortestpath", + edge_subpath->lower > 1 ? "shortestpath_lowerbound" + : "shortestpath", std::move(pathfinding_children)); if (!final_list) { @@ -674,13 +677,13 @@ void PGQMatchFunction::AddPathFinding( std::move(GetCountTable(edge_table, prev_binding))); pathfinding_children.push_back(std::move(src_row_id)); pathfinding_children.push_back(std::move(dst_row_id)); - pathfinding_children.push_back( - make_uniq(Value::INTEGER(static_cast(subpath->lower)))); - pathfinding_children.push_back( - make_uniq(Value::INTEGER(static_cast(subpath->upper)))); + pathfinding_children.push_back(make_uniq( + Value::INTEGER(static_cast(subpath->lower)))); + pathfinding_children.push_back(make_uniq( + Value::INTEGER(static_cast(subpath->upper)))); auto reachability_function = make_uniq( - subpath->lower > 1 ? "iterativelength_lowerbound": "iterativelength", + subpath->lower > 1 ? "iterativelength_lowerbound" : "iterativelength", std::move(pathfinding_children)); auto cte_col_ref = make_uniq("temp", "__x"); @@ -695,7 +698,7 @@ void PGQMatchFunction::AddPathFinding( //! END //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) - //! from src s, a.rowid, b.rowid, lower, upper) + //! from src s, a.rowid, b.rowid, lower, upper) } void PGQMatchFunction::CheckNamedSubpath( diff --git a/duckpgq/src/duckpgq_extension.cpp b/duckpgq/src/duckpgq_extension.cpp index 6637cd9e..37e8fc47 100644 --- a/duckpgq/src/duckpgq_extension.cpp +++ b/duckpgq/src/duckpgq_extension.cpp @@ -86,7 +86,7 @@ ParserExtensionParseResult duckpgq_parse(ParserExtensionInfo *info, : query); if (parser.statements.size() != 1) { throw Exception(ExceptionType::PARSER, - "More than 1 statement detected, please only give one."); + "More than 1 statement detected, please only give one."); } return {make_uniq_base( std::move(parser.statements[0]))}; @@ -181,7 +181,8 @@ duckpgq_handle_statement(SQLStatement *statement, DuckPGQState &duckpgq_state) { // Preferably throw NotImplementedExpection here, but only BinderExceptions // are caught properly on MacOS right now throw Exception(ExceptionType::NOT_IMPLEMENTED, - StatementTypeToString(statement->type) + "has not been implemented yet for DuckPGQ queries"); + StatementTypeToString(statement->type) + + "has not been implemented yet for DuckPGQ queries"); } ParserExtensionPlanResult From 04e8e97f98e4f93d76afc29c5cb3586a1cd8f41e Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 21 Feb 2024 10:42:20 +0100 Subject: [PATCH 15/39] Comment --- duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp index 10ab0113..b7098b4c 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp @@ -74,7 +74,7 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, auto src_data = (int64_t *)vdata_src.data; auto dst_data = (int64_t *)vdata_dst.data; - // get lowerbound and upperbound + // get upperbound (lower bound always zero in this function) auto &upper = args.data[5]; UnifiedVectorFormat vdata_upper_bound; upper.ToUnifiedFormat(args.size(), vdata_upper_bound); From 29a17bc74b86979855debbbadf2960364804823b Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 21 Feb 2024 11:26:57 +0100 Subject: [PATCH 16/39] Adding extra test case --- .../sql/path-finding/shortest_path_bound.test | 18 +++++++ test/sql/shortest_path_function.test | 50 +++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 test/sql/shortest_path_function.test diff --git a/test/sql/path-finding/shortest_path_bound.test b/test/sql/path-finding/shortest_path_bound.test index 97ee93ba..49006a44 100644 --- a/test/sql/path-finding/shortest_path_bound.test +++ b/test/sql/path-finding/shortest_path_bound.test @@ -306,4 +306,22 @@ query III 3 2 [3, 1, 0, 2] 3 3 [3, 2, 3] +query III +-FROM GRAPH_TABLE (pg6 + MATCH + p = ANY SHORTEST (a:Point6 where a.id = 0)-[k:know6]->{1,3}(b:Point6 where b.id = 0) + COLUMNS (a.id as id1, b.id as id2, vertices(p), path_length(p)) + ) tmp + order by tmp.id1, tmp.id2; +---- +0 0 [0, 2, 0] 2 +query III +-FROM GRAPH_TABLE (pg6 + MATCH + p = ANY SHORTEST (a:Point6 where a.id = 0)-[k:know6]->{0,3}(b:Point6 where b.id = 0) + COLUMNS (a.id as id1, b.id as id2, vertices(p), path_length(p)) + ) tmp + order by tmp.id1, tmp.id2; +---- +0 0 [0] 0 \ No newline at end of file diff --git a/test/sql/shortest_path_function.test b/test/sql/shortest_path_function.test new file mode 100644 index 00000000..2f8c4219 --- /dev/null +++ b/test/sql/shortest_path_function.test @@ -0,0 +1,50 @@ +# name: test/sql/sqlpgq/shortest_path_bound.test +# group: [sqlpgq] + +statement ok +pragma enable_verification + +require duckpgq + +# Description: Test shortest paths in a graph with cycles. +# Graph Structure: +# (0) <-> (2) <-> (3) -> (1) +# ↑ | +# └----------------------┘ + +statement ok +CREATE TABLE Point6(id BIGINT); INSERT INTO Point6 VALUES (0), (1), (2), (3); + +statement ok +CREATE TABLE know6(src BIGINT, dst BIGINT); INSERT INTO know6 VALUES (0, 2), (2, 0), (2, 3), (3, 2), (3, 1), (1, 0); + +statement ok +-CREATE PROPERTY GRAPH pg6 +VERTEX TABLES ( + Point6 PROPERTIES ( id ) LABEL Point6 +) +EDGE TABLES ( + know6 SOURCE KEY ( src ) REFERENCES Point6 ( id ) + DESTINATION KEY ( dst ) REFERENCES Point6 ( id ) + LABEL know6 +); + +query IIII +-FROM GRAPH_TABLE (pg6 + MATCH + p = ANY SHORTEST (a:Point6 where a.id = 0)-[k:know6]->{1,3}(b:Point6 where b.id = 0) + COLUMNS (a.id as id1, b.id as id2, vertices(p), path_length(p)) + ) tmp + order by tmp.id1, tmp.id2; +---- +0 0 [0, 2, 0] 2 + +query IIII +-FROM GRAPH_TABLE (pg6 + MATCH + p = ANY SHORTEST (a:Point6 where a.id = 0)-[k:know6]->{0,3}(b:Point6 where b.id = 0) + COLUMNS (a.id as id1, b.id as id2, vertices(p), path_length(p)) + ) tmp + order by tmp.id1, tmp.id2; +---- +0 0 [0] 0 \ No newline at end of file From eb7678e347432bf62823fac95c64a4e0682a43f3 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 21 Feb 2024 11:27:11 +0100 Subject: [PATCH 17/39] Trigger lowerbound function when lowerbound > 0 --- duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp | 3 +++ .../functions/scalar/iterativelength_lowerbound.cpp | 8 +------- duckpgq/src/duckpgq/functions/tablefunctions/match.cpp | 4 ++-- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp index b7098b4c..d243d079 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp @@ -113,9 +113,12 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, while (started_searches < args.size()) { int64_t search_num = started_searches++; int64_t src_pos = vdata_src.sel->get_index(search_num); + int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (!vdata_src.validity.RowIsValid(src_pos)) { result_validity.SetInvalid(search_num); result_data[search_num] = (int64_t)-1; /* no path */ + } else if (src_data[src_pos] == dst_data[dst_pos]) { + result_data[search_num] = (int64_t)0; /* source == destination, length is 0 */ } else { result_data[search_num] = (int64_t)-1; /* initialize to no path */ visit1[src_data[src_pos]][lane] = true; diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp index 70f394a0..8062a50f 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp @@ -132,15 +132,9 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, int64_t search_num = started_searches++; int64_t src_pos = vdata_src.sel->get_index(search_num); int64_t dst_pos = vdata_dst.sel->get_index(search_num); - if (!vdata_src.validity.RowIsValid(src_pos)) { + if (!vdata_src.validity.RowIsValid(src_pos)) { // NULL value result_validity.SetInvalid(search_num); result_data[search_num] = (int64_t)-1; /* no path */ - } else if (src_data[src_pos] == dst_data[dst_pos]) { - result_data[search_num] = (int64_t)-1; /* no path */ - visit1[src_data[src_pos]][lane] = true; - lane_to_num[lane] = search_num; // active lane - active++; - break; } else { result_data[search_num] = (int64_t)-1; /* initialize to no path */ seen[src_data[src_pos]][lane] = true; diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index d08c566e..c167e8e6 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -543,7 +543,7 @@ unique_ptr PGQMatchFunction::CreatePathFindingFunction( Value::INTEGER(static_cast(edge_subpath->upper)))); auto shortest_path_function = make_uniq( - edge_subpath->lower > 1 ? "shortestpath_lowerbound" + edge_subpath->lower > 0 ? "shortestpath_lowerbound" : "shortestpath", std::move(pathfinding_children)); @@ -683,7 +683,7 @@ void PGQMatchFunction::AddPathFinding( Value::INTEGER(static_cast(subpath->upper)))); auto reachability_function = make_uniq( - subpath->lower > 1 ? "iterativelength_lowerbound" : "iterativelength", + subpath->lower > 0 ? "iterativelength_lowerbound" : "iterativelength", std::move(pathfinding_children)); auto cte_col_ref = make_uniq("temp", "__x"); From 03558fee0939baa5bce0e65134853cd46b8ffc57 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 21 Feb 2024 11:28:57 +0100 Subject: [PATCH 18/39] comment --- duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp index d243d079..a86e836d 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp @@ -74,7 +74,7 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, auto src_data = (int64_t *)vdata_src.data; auto dst_data = (int64_t *)vdata_dst.data; - // get upperbound (lower bound always zero in this function) + // get lowerbound and upperbound auto &upper = args.data[5]; UnifiedVectorFormat vdata_upper_bound; upper.ToUnifiedFormat(args.size(), vdata_upper_bound); From c6f0edb79f114cb733d1daca7fa7ec1987fc0b98 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 21 Feb 2024 11:29:11 +0100 Subject: [PATCH 19/39] Revert "Trigger lowerbound function when lowerbound > 0" This reverts commit eb7678e347432bf62823fac95c64a4e0682a43f3. --- duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp | 3 --- .../functions/scalar/iterativelength_lowerbound.cpp | 8 +++++++- duckpgq/src/duckpgq/functions/tablefunctions/match.cpp | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp index a86e836d..10ab0113 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp @@ -113,12 +113,9 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, while (started_searches < args.size()) { int64_t search_num = started_searches++; int64_t src_pos = vdata_src.sel->get_index(search_num); - int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (!vdata_src.validity.RowIsValid(src_pos)) { result_validity.SetInvalid(search_num); result_data[search_num] = (int64_t)-1; /* no path */ - } else if (src_data[src_pos] == dst_data[dst_pos]) { - result_data[search_num] = (int64_t)0; /* source == destination, length is 0 */ } else { result_data[search_num] = (int64_t)-1; /* initialize to no path */ visit1[src_data[src_pos]][lane] = true; diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp index 8062a50f..70f394a0 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp @@ -132,9 +132,15 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, int64_t search_num = started_searches++; int64_t src_pos = vdata_src.sel->get_index(search_num); int64_t dst_pos = vdata_dst.sel->get_index(search_num); - if (!vdata_src.validity.RowIsValid(src_pos)) { // NULL value + if (!vdata_src.validity.RowIsValid(src_pos)) { result_validity.SetInvalid(search_num); result_data[search_num] = (int64_t)-1; /* no path */ + } else if (src_data[src_pos] == dst_data[dst_pos]) { + result_data[search_num] = (int64_t)-1; /* no path */ + visit1[src_data[src_pos]][lane] = true; + lane_to_num[lane] = search_num; // active lane + active++; + break; } else { result_data[search_num] = (int64_t)-1; /* initialize to no path */ seen[src_data[src_pos]][lane] = true; diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index c167e8e6..d08c566e 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -543,7 +543,7 @@ unique_ptr PGQMatchFunction::CreatePathFindingFunction( Value::INTEGER(static_cast(edge_subpath->upper)))); auto shortest_path_function = make_uniq( - edge_subpath->lower > 0 ? "shortestpath_lowerbound" + edge_subpath->lower > 1 ? "shortestpath_lowerbound" : "shortestpath", std::move(pathfinding_children)); @@ -683,7 +683,7 @@ void PGQMatchFunction::AddPathFinding( Value::INTEGER(static_cast(subpath->upper)))); auto reachability_function = make_uniq( - subpath->lower > 0 ? "iterativelength_lowerbound" : "iterativelength", + subpath->lower > 1 ? "iterativelength_lowerbound" : "iterativelength", std::move(pathfinding_children)); auto cte_col_ref = make_uniq("temp", "__x"); From ed4f5ef00a86d8d3bb49f2d57894ae8b55e17333 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 21 Feb 2024 11:27:11 +0100 Subject: [PATCH 20/39] Trigger lowerbound function when lowerbound > 0 --- duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp | 3 +++ .../functions/scalar/iterativelength_lowerbound.cpp | 8 +------- duckpgq/src/duckpgq/functions/tablefunctions/match.cpp | 4 ++-- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp index 10ab0113..a86e836d 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp @@ -113,9 +113,12 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state, while (started_searches < args.size()) { int64_t search_num = started_searches++; int64_t src_pos = vdata_src.sel->get_index(search_num); + int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (!vdata_src.validity.RowIsValid(src_pos)) { result_validity.SetInvalid(search_num); result_data[search_num] = (int64_t)-1; /* no path */ + } else if (src_data[src_pos] == dst_data[dst_pos]) { + result_data[search_num] = (int64_t)0; /* source == destination, length is 0 */ } else { result_data[search_num] = (int64_t)-1; /* initialize to no path */ visit1[src_data[src_pos]][lane] = true; diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp index 70f394a0..8062a50f 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp @@ -132,15 +132,9 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, int64_t search_num = started_searches++; int64_t src_pos = vdata_src.sel->get_index(search_num); int64_t dst_pos = vdata_dst.sel->get_index(search_num); - if (!vdata_src.validity.RowIsValid(src_pos)) { + if (!vdata_src.validity.RowIsValid(src_pos)) { // NULL value result_validity.SetInvalid(search_num); result_data[search_num] = (int64_t)-1; /* no path */ - } else if (src_data[src_pos] == dst_data[dst_pos]) { - result_data[search_num] = (int64_t)-1; /* no path */ - visit1[src_data[src_pos]][lane] = true; - lane_to_num[lane] = search_num; // active lane - active++; - break; } else { result_data[search_num] = (int64_t)-1; /* initialize to no path */ seen[src_data[src_pos]][lane] = true; diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index d08c566e..c167e8e6 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -543,7 +543,7 @@ unique_ptr PGQMatchFunction::CreatePathFindingFunction( Value::INTEGER(static_cast(edge_subpath->upper)))); auto shortest_path_function = make_uniq( - edge_subpath->lower > 1 ? "shortestpath_lowerbound" + edge_subpath->lower > 0 ? "shortestpath_lowerbound" : "shortestpath", std::move(pathfinding_children)); @@ -683,7 +683,7 @@ void PGQMatchFunction::AddPathFinding( Value::INTEGER(static_cast(subpath->upper)))); auto reachability_function = make_uniq( - subpath->lower > 1 ? "iterativelength_lowerbound" : "iterativelength", + subpath->lower > 0 ? "iterativelength_lowerbound" : "iterativelength", std::move(pathfinding_children)); auto cte_col_ref = make_uniq("temp", "__x"); From 9f913b498df0db3acb077aba78bec18c0a871551 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 21 Feb 2024 11:32:02 +0100 Subject: [PATCH 21/39] Remove unused dst_pos --- .../src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp index 8062a50f..15f88de9 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp @@ -131,7 +131,6 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, while (started_searches < args.size()) { int64_t search_num = started_searches++; int64_t src_pos = vdata_src.sel->get_index(search_num); - int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (!vdata_src.validity.RowIsValid(src_pos)) { // NULL value result_validity.SetInvalid(search_num); result_data[search_num] = (int64_t)-1; /* no path */ From d39776f0a8020bdb7b095c8b935a883b991ae20f Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 21 Feb 2024 11:33:40 +0100 Subject: [PATCH 22/39] Remove test --- test/sql/path-finding/shortest_path_bound.test | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/test/sql/path-finding/shortest_path_bound.test b/test/sql/path-finding/shortest_path_bound.test index 49006a44..5ac8ed82 100644 --- a/test/sql/path-finding/shortest_path_bound.test +++ b/test/sql/path-finding/shortest_path_bound.test @@ -314,14 +314,4 @@ query III ) tmp order by tmp.id1, tmp.id2; ---- -0 0 [0, 2, 0] 2 - -query III --FROM GRAPH_TABLE (pg6 - MATCH - p = ANY SHORTEST (a:Point6 where a.id = 0)-[k:know6]->{0,3}(b:Point6 where b.id = 0) - COLUMNS (a.id as id1, b.id as id2, vertices(p), path_length(p)) - ) tmp - order by tmp.id1, tmp.id2; ----- -0 0 [0] 0 \ No newline at end of file +0 0 [0, 2, 0] 2 \ No newline at end of file From e17f417d4c6cb351233918210f0ae837ccbbc193 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 21 Feb 2024 11:34:35 +0100 Subject: [PATCH 23/39] Add column --- test/sql/path-finding/shortest_path_bound.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sql/path-finding/shortest_path_bound.test b/test/sql/path-finding/shortest_path_bound.test index 5ac8ed82..b8305cce 100644 --- a/test/sql/path-finding/shortest_path_bound.test +++ b/test/sql/path-finding/shortest_path_bound.test @@ -306,7 +306,7 @@ query III 3 2 [3, 1, 0, 2] 3 3 [3, 2, 3] -query III +query IIII -FROM GRAPH_TABLE (pg6 MATCH p = ANY SHORTEST (a:Point6 where a.id = 0)-[k:know6]->{1,3}(b:Point6 where b.id = 0) From 195ff2730b9947ae32ad33783d4c127d95816d3f Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 21 Feb 2024 11:34:56 +0100 Subject: [PATCH 24/39] Remove test file --- test/sql/shortest_path_function.test | 50 ---------------------------- 1 file changed, 50 deletions(-) delete mode 100644 test/sql/shortest_path_function.test diff --git a/test/sql/shortest_path_function.test b/test/sql/shortest_path_function.test deleted file mode 100644 index 2f8c4219..00000000 --- a/test/sql/shortest_path_function.test +++ /dev/null @@ -1,50 +0,0 @@ -# name: test/sql/sqlpgq/shortest_path_bound.test -# group: [sqlpgq] - -statement ok -pragma enable_verification - -require duckpgq - -# Description: Test shortest paths in a graph with cycles. -# Graph Structure: -# (0) <-> (2) <-> (3) -> (1) -# ↑ | -# └----------------------┘ - -statement ok -CREATE TABLE Point6(id BIGINT); INSERT INTO Point6 VALUES (0), (1), (2), (3); - -statement ok -CREATE TABLE know6(src BIGINT, dst BIGINT); INSERT INTO know6 VALUES (0, 2), (2, 0), (2, 3), (3, 2), (3, 1), (1, 0); - -statement ok --CREATE PROPERTY GRAPH pg6 -VERTEX TABLES ( - Point6 PROPERTIES ( id ) LABEL Point6 -) -EDGE TABLES ( - know6 SOURCE KEY ( src ) REFERENCES Point6 ( id ) - DESTINATION KEY ( dst ) REFERENCES Point6 ( id ) - LABEL know6 -); - -query IIII --FROM GRAPH_TABLE (pg6 - MATCH - p = ANY SHORTEST (a:Point6 where a.id = 0)-[k:know6]->{1,3}(b:Point6 where b.id = 0) - COLUMNS (a.id as id1, b.id as id2, vertices(p), path_length(p)) - ) tmp - order by tmp.id1, tmp.id2; ----- -0 0 [0, 2, 0] 2 - -query IIII --FROM GRAPH_TABLE (pg6 - MATCH - p = ANY SHORTEST (a:Point6 where a.id = 0)-[k:know6]->{0,3}(b:Point6 where b.id = 0) - COLUMNS (a.id as id1, b.id as id2, vertices(p), path_length(p)) - ) tmp - order by tmp.id1, tmp.id2; ----- -0 0 [0] 0 \ No newline at end of file From 34559be99b822d9864498586093b7de91f295025 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 21 Feb 2024 12:26:05 +0100 Subject: [PATCH 25/39] Add correct test results --- test/sql/path-finding/complex_matching.test | 100 ++++++++++++++++-- .../sql/path-finding/shortest_path_bound.test | 18 +++- 2 files changed, 104 insertions(+), 14 deletions(-) diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index e67934e2..1e0cd7c9 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -55,6 +55,11 @@ query IIIIIII COLUMNS (p3.id, element_id(o), path_length(o), vertices(o), edges(o), p4.id, p.id) ) tmp; ---- +26388279066641 [0, 0, 13, 42, 29] 2 [0, 13, 29] [0, 42] 14 10995116277782 +26388279066658 [0, 0, 13, 43, 31] 2 [0, 13, 31] [0, 43] 14 10995116277782 +28587302322180 [0, 0, 13, 44, 33] 2 [0, 13, 33] [0, 44] 14 10995116277782 +28587302322204 [0, 0, 13, 45, 36] 2 [0, 13, 36] [0, 45] 14 10995116277782 +35184372088856 [0, 0, 13, 46, 46] 2 [0, 13, 46] [0, 46] 14 10995116277782 28587302322180 [0, 0, 13, 42, 29, 68, 33] 3 [0, 13, 29, 33] [0, 42, 68] 14 10995116277782 28587302322204 [0, 0, 13, 42, 29, 69, 36] 3 [0, 13, 29, 36] [0, 42, 69] 14 10995116277782 28587302322196 [0, 0, 13, 43, 31, 71, 35] 3 [0, 13, 31, 35] [0, 43, 71] 14 10995116277782 @@ -66,6 +71,12 @@ query IIIIIII 30786325577731 [0, 0, 13, 44, 33, 79, 39] 3 [0, 13, 33, 39] [0, 44, 79] 14 10995116277782 32985348833329 [0, 0, 13, 44, 33, 80, 43] 3 [0, 13, 33, 43] [0, 44, 80] 14 10995116277782 35184372088850 [0, 0, 13, 45, 36, 82, 45] 3 [0, 13, 36, 45] [0, 45, 82] 14 10995116277782 +24189255811109 [0, 1, 26, 61, 27] 2 [0, 26, 27] [1, 61] 14 24189255811081 +26388279066658 [0, 1, 26, 62, 31] 2 [0, 26, 31] [1, 62] 14 24189255811081 +26388279066668 [0, 1, 26, 63, 32] 2 [0, 26, 32] [1, 63] 14 24189255811081 +28587302322180 [0, 1, 26, 64, 33] 2 [0, 26, 33] [1, 64] 14 24189255811081 +28587302322196 [0, 1, 26, 65, 35] 2 [0, 26, 35] [1, 65] 14 24189255811081 +35184372088834 [0, 1, 26, 66, 44] 2 [0, 26, 44] [1, 66] 14 24189255811081 28587302322196 [0, 1, 26, 62, 31, 71, 35] 3 [0, 26, 31, 35] [1, 62, 71] 14 24189255811081 30786325577740 [0, 1, 26, 62, 31, 72, 40] 3 [0, 26, 31, 40] [1, 62, 72] 14 24189255811081 35184372088850 [0, 1, 26, 62, 31, 73, 45] 3 [0, 26, 31, 45] [1, 62, 73] 14 24189255811081 @@ -76,7 +87,9 @@ query IIIIIII 28587302322223 [0, 1, 26, 64, 33, 78, 38] 3 [0, 26, 33, 38] [1, 64, 78] 14 24189255811081 30786325577731 [0, 1, 26, 64, 33, 79, 39] 3 [0, 26, 33, 39] [1, 64, 79] 14 24189255811081 32985348833329 [0, 1, 26, 64, 33, 80, 43] 3 [0, 26, 33, 43] [1, 64, 80] 14 24189255811081 -35184372088850 [0, 1, 26, 63, 32, 76, 36, 82, 45] 4 [0, 26, 32, 36, 45] [1, 63, 76, 82] 14 24189255811081 +35184372088850 [0, 1, 26, 64, 33, 77, 36, 82, 45] 4 [0, 26, 33, 36, 45] [1, 64, 77, 82] 14 24189255811081 +28587302322180 [0, 2, 32, 75, 33] 2 [0, 32, 33] [2, 75] 14 26388279066668 +28587302322204 [0, 2, 32, 76, 36] 2 [0, 32, 36] [2, 76] 14 26388279066668 28587302322204 [0, 2, 32, 75, 33, 77, 36] 3 [0, 32, 33, 36] [2, 75, 77] 14 26388279066668 28587302322223 [0, 2, 32, 75, 33, 78, 38] 3 [0, 32, 33, 38] [2, 75, 78] 14 26388279066668 30786325577731 [0, 2, 32, 75, 33, 79, 39] 3 [0, 32, 33, 39] [2, 75, 79] 14 26388279066668 @@ -112,16 +125,16 @@ query III ORDER BY p_id, p2_id limit 10; ---- -14 10995116277782 598 -14 10995116277782 805 -14 10995116277782 1174 -14 10995116277782 1183 -14 10995116277782 1527 -14 10995116277782 1676 -14 10995116277782 1998 -14 10995116277782 798 -14 10995116277782 1031 -14 10995116277782 1986 +14 14 290 +14 14 584 +14 14 783 +14 14 804 +14 14 1621 +14 14 2038 +14 14 2045 +14 14 779 +14 14 1031 +14 14 1763 query IIIII WITH CTE1 AS (SELECT CREATE_CSR_EDGE( @@ -218,6 +231,71 @@ query IIII ) tmp ORDER BY b_id, t_id ---- +Bryn 33 28587302322180 139 +Bryn 33 28587302322180 294 +Bryn 33 28587302322180 458 +Bryn 33 28587302322180 572 +Bryn 33 28587302322180 590 +Bryn 33 28587302322180 788 +Bryn 33 28587302322180 974 +Bryn 33 28587302322180 1184 +Bryn 33 28587302322180 1401 +Bryn 33 28587302322180 1680 +Bryn 33 28587302322180 1984 +Bryn 33 28587302322180 1985 +Bryn 33 28587302322180 1991 +Bryn 33 28587302322180 1992 +Bryn 33 28587302322180 1994 +Bryn 33 28587302322180 2006 +Bryn 33 28587302322180 2017 +Bryn 33 28587302322180 2023 +Bryn 33 28587302322180 2052 +Bryn 33 28587302322180 2056 +Bryn 33 28587302322180 2058 +Bryn 33 28587302322180 2078 +Bryn 33 28587302322180 2777 +Bryn 33 28587302322180 2783 +Bryn 33 28587302322180 2788 +Bryn 33 28587302322180 2796 +Bryn 33 28587302322180 2797 +Bryn 33 28587302322180 2812 +Bryn 33 28587302322180 2827 +Bryn 33 28587302322180 2836 +Bryn 33 28587302322180 2845 +Bryn 33 28587302322180 2859 +Bryn 33 28587302322180 2872 +Bryn 33 28587302322180 2880 +Bryn 33 28587302322180 2929 +Bryn 33 28587302322180 2943 +Bryn 33 28587302322180 2976 +Bryn 33 28587302322180 3011 +Bryn 33 28587302322180 3022 +Bryn 33 28587302322180 3064 +Bryn 33 28587302322180 3087 +Bryn 33 28587302322180 5163 +Bryn 33 28587302322180 5170 +Bryn 33 28587302322180 5193 +Bryn 33 28587302322180 5699 +Bryn 33 28587302322180 6405 +Bryn 33 28587302322180 6993 +Bryn 33 28587302322180 6999 +Bryn 33 28587302322180 7001 +Bryn 33 28587302322180 7010 +Bryn 33 28587302322180 7522 +Bryn 33 28587302322180 7526 +Bryn 33 28587302322180 7564 +Bryn 33 28587302322180 7570 +Bryn 33 28587302322180 9479 +Bryn 33 28587302322180 9496 +Bryn 33 28587302322180 9760 +Bryn 33 28587302322180 9939 +Bryn 33 28587302322180 10080 +Bryn 33 28587302322180 10322 +Bryn 33 28587302322180 10991 +Bryn 33 28587302322180 11408 +Bryn 33 28587302322180 11557 +Bryn 33 28587302322180 11609 +Bryn 33 28587302322180 12273 Bryn 36 28587302322204 6 Bryn 36 28587302322204 588 Bryn 36 28587302322204 1021 diff --git a/test/sql/path-finding/shortest_path_bound.test b/test/sql/path-finding/shortest_path_bound.test index b8305cce..1cc6732f 100644 --- a/test/sql/path-finding/shortest_path_bound.test +++ b/test/sql/path-finding/shortest_path_bound.test @@ -264,7 +264,7 @@ query III # Description: Test shortest paths in a graph with cycles. # Graph Structure: -# (0) <-> (2) <-> (3) -> (1) +# (0) <-> (2) <-> (3) -> (1) (selfloop) # ↑ | # └----------------------┘ @@ -272,7 +272,7 @@ statement ok CREATE TABLE Point6(id BIGINT); INSERT INTO Point6 VALUES (0), (1), (2), (3); statement ok -CREATE TABLE know6(src BIGINT, dst BIGINT); INSERT INTO know6 VALUES (0, 2), (2, 0), (2, 3), (3, 2), (3, 1), (1, 0); +CREATE TABLE know6(src BIGINT, dst BIGINT); INSERT INTO know6 VALUES (0, 2), (2, 0), (2, 3), (3, 2), (3, 1), (1, 0), (1, 1); statement ok -CREATE PROPERTY GRAPH pg6 @@ -296,6 +296,7 @@ query III 0 0 [0, 2, 0] 0 1 [0, 2, 3, 1] 0 3 [0, 2, 3] +1 0 [1, 1, 0] 1 1 [1, 0, 2, 3, 1] 1 2 [1, 0, 2] 1 3 [1, 0, 2, 3] @@ -306,6 +307,7 @@ query III 3 2 [3, 1, 0, 2] 3 3 [3, 2, 3] + query IIII -FROM GRAPH_TABLE (pg6 MATCH @@ -314,4 +316,14 @@ query IIII ) tmp order by tmp.id1, tmp.id2; ---- -0 0 [0, 2, 0] 2 \ No newline at end of file +0 0 [0, 2, 0] 2 + +query IIII +-FROM GRAPH_TABLE (pg6 + MATCH + p = ANY SHORTEST (a:Point6 where a.id = 1)-[k:know6]->{1,3}(b:Point6 where b.id = 1) + COLUMNS (a.id as id1, b.id as id2, vertices(p), path_length(p)) + ) tmp + order by tmp.id1, tmp.id2; +---- +1 1 [1, 1] 1 \ No newline at end of file From a05525dab7a6534ffd10be9bbbb5350d273d0497 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 21 Feb 2024 12:27:25 +0100 Subject: [PATCH 26/39] Add results --- test/sql/path-finding/complex_matching.test | 65 +++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 1e0cd7c9..3ff4ae29 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -350,6 +350,71 @@ query IIII ) tmp ORDER BY b_id, t_id ---- +Bryn 33 28587302322180 139 +Bryn 33 28587302322180 294 +Bryn 33 28587302322180 458 +Bryn 33 28587302322180 572 +Bryn 33 28587302322180 590 +Bryn 33 28587302322180 788 +Bryn 33 28587302322180 974 +Bryn 33 28587302322180 1184 +Bryn 33 28587302322180 1401 +Bryn 33 28587302322180 1680 +Bryn 33 28587302322180 1984 +Bryn 33 28587302322180 1985 +Bryn 33 28587302322180 1991 +Bryn 33 28587302322180 1992 +Bryn 33 28587302322180 1994 +Bryn 33 28587302322180 2006 +Bryn 33 28587302322180 2017 +Bryn 33 28587302322180 2023 +Bryn 33 28587302322180 2052 +Bryn 33 28587302322180 2056 +Bryn 33 28587302322180 2058 +Bryn 33 28587302322180 2078 +Bryn 33 28587302322180 2777 +Bryn 33 28587302322180 2783 +Bryn 33 28587302322180 2788 +Bryn 33 28587302322180 2796 +Bryn 33 28587302322180 2797 +Bryn 33 28587302322180 2812 +Bryn 33 28587302322180 2827 +Bryn 33 28587302322180 2836 +Bryn 33 28587302322180 2845 +Bryn 33 28587302322180 2859 +Bryn 33 28587302322180 2872 +Bryn 33 28587302322180 2880 +Bryn 33 28587302322180 2929 +Bryn 33 28587302322180 2943 +Bryn 33 28587302322180 2976 +Bryn 33 28587302322180 3011 +Bryn 33 28587302322180 3022 +Bryn 33 28587302322180 3064 +Bryn 33 28587302322180 3087 +Bryn 33 28587302322180 5163 +Bryn 33 28587302322180 5170 +Bryn 33 28587302322180 5193 +Bryn 33 28587302322180 5699 +Bryn 33 28587302322180 6405 +Bryn 33 28587302322180 6993 +Bryn 33 28587302322180 6999 +Bryn 33 28587302322180 7001 +Bryn 33 28587302322180 7010 +Bryn 33 28587302322180 7522 +Bryn 33 28587302322180 7526 +Bryn 33 28587302322180 7564 +Bryn 33 28587302322180 7570 +Bryn 33 28587302322180 9479 +Bryn 33 28587302322180 9496 +Bryn 33 28587302322180 9760 +Bryn 33 28587302322180 9939 +Bryn 33 28587302322180 10080 +Bryn 33 28587302322180 10322 +Bryn 33 28587302322180 10991 +Bryn 33 28587302322180 11408 +Bryn 33 28587302322180 11557 +Bryn 33 28587302322180 11609 +Bryn 33 28587302322180 12273 Bryn 36 28587302322204 6 Bryn 36 28587302322204 588 Bryn 36 28587302322204 1021 From d61e335ac5effc58672a53b4c0e09204f3e7b84b Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 23 Feb 2024 13:43:37 +0100 Subject: [PATCH 27/39] Add correct resutl --- test/sql/path-finding/complex_matching.test | 60 +++++++++++---------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 3ff4ae29..14df71f2 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -479,35 +479,37 @@ statement error ---- Parser Error: syntax error at or near "{" -query III +query IIII -FROM GRAPH_TABLE (snb - MATCH p = (a:Person where a.id = 16)-[k:knows]->{1,3}(b:Person) - COLUMNS (element_id(p), a.id, b.id) + MATCH p = (a:Person where a.id = 16)-[k:knows]->{1,3}(b:Person where b.id = 16) + COLUMNS (element_id(p), a.id, b.id, path_length(p)) ) tmp; ---- -[1, 3, 5] 16 2199023255594 -[1, 3, 5, 16, 10] 16 8796093022244 -[1, 3, 5, 17, 12] 16 10995116277761 -[1, 3, 5, 18, 16] 16 13194139533342 -[1, 3, 5, 19, 17] 16 13194139533352 -[1, 3, 5, 16, 10, 32, 18] 16 13194139533355 -[1, 3, 5, 20, 19] 16 15393162788877 -[1, 3, 5, 17, 12, 39, 20] 16 17592186044443 -[1, 3, 5, 21, 21] 16 17592186044461 -[1, 3, 5, 19, 17, 48, 23] 16 19791209299987 -[1, 3, 5, 22, 26] 16 24189255811081 -[1, 3, 5, 22, 26, 61, 27] 16 24189255811109 -[1, 3, 5, 19, 17, 49, 29] 16 26388279066641 -[1, 4, 30] 16 26388279066655 -[1, 3, 5, 23, 31] 16 26388279066658 -[1, 3, 5, 24, 32] 16 26388279066668 -[1, 5, 33] 16 28587302322180 -[1, 3, 5, 26, 35] 16 28587302322196 -[1, 6, 36] 16 28587302322204 -[1, 5, 33, 78, 38] 16 28587302322223 -[1, 5, 33, 79, 39] 16 30786325577731 -[1, 3, 5, 27, 40] 16 30786325577740 -[1, 5, 33, 80, 43] 16 32985348833329 -[1, 3, 5, 22, 26, 66, 44] 16 35184372088834 -[1, 3, 5, 28, 45] 16 35184372088850 -[1, 3, 5, 23, 31, 74, 46] 16 35184372088856 +NULL 16 16 NULL +[1, 3, 5] 16 2199023255594 1 +[1, 3, 5, 16, 10] 16 8796093022244 2 +[1, 3, 5, 17, 12] 16 10995116277761 2 +[1, 3, 5, 18, 16] 16 13194139533342 2 +[1, 3, 5, 19, 17] 16 13194139533352 2 +[1, 3, 5, 16, 10, 32, 18] 16 13194139533355 3 +[1, 3, 5, 20, 19] 16 15393162788877 2 +[1, 3, 5, 17, 12, 39, 20] 16 17592186044443 3 +[1, 3, 5, 21, 21] 16 17592186044461 2 +[1, 3, 5, 19, 17, 48, 23] 16 19791209299987 3 +[1, 3, 5, 22, 26] 16 24189255811081 2 +[1, 3, 5, 22, 26, 61, 27] 16 24189255811109 3 +[1, 3, 5, 19, 17, 49, 29] 16 26388279066641 3 +[1, 4, 30] 16 26388279066655 1 +[1, 3, 5, 23, 31] 16 26388279066658 2 +[1, 3, 5, 24, 32] 16 26388279066668 2 +[1, 5, 33] 16 28587302322180 1 +[1, 3, 5, 26, 35] 16 28587302322196 2 +[1, 6, 36] 16 28587302322204 1 +[1, 5, 33, 78, 38] 16 28587302322223 2 +[1, 5, 33, 79, 39] 16 30786325577731 2 +[1, 3, 5, 27, 40] 16 30786325577740 2 +[1, 5, 33, 80, 43] 16 32985348833329 2 +[1, 3, 5, 22, 26, 66, 44] 16 35184372088834 3 +[1, 6, 36, 82, 45] 16 35184372088850 2 +[1, 3, 5, 23, 31, 74, 46] 16 35184372088856 3 + From 1ccf97cb9809bf4e6ee683dfc6551f4209bfb785 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 23 Feb 2024 13:57:18 +0100 Subject: [PATCH 28/39] Remove condition --- test/sql/path-finding/complex_matching.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 14df71f2..455e550f 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -481,7 +481,7 @@ Parser Error: syntax error at or near "{" query IIII -FROM GRAPH_TABLE (snb - MATCH p = (a:Person where a.id = 16)-[k:knows]->{1,3}(b:Person where b.id = 16) + MATCH p = (a:Person where a.id = 16)-[k:knows]->{1,3}(b:Person where) COLUMNS (element_id(p), a.id, b.id, path_length(p)) ) tmp; ---- From aa8dfa497800317bbe490356a292cdcb14d3c7b8 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 23 Feb 2024 13:57:54 +0100 Subject: [PATCH 29/39] Remove condition --- test/sql/path-finding/complex_matching.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 455e550f..411f8479 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -481,7 +481,7 @@ Parser Error: syntax error at or near "{" query IIII -FROM GRAPH_TABLE (snb - MATCH p = (a:Person where a.id = 16)-[k:knows]->{1,3}(b:Person where) + MATCH p = (a:Person where a.id = 16)-[k:knows]->{1,3}(b:Person) COLUMNS (element_id(p), a.id, b.id, path_length(p)) ) tmp; ---- From 81ca5f990982d202e654fa585f6b64ffc4f01883 Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Mon, 26 Feb 2024 17:14:00 +0100 Subject: [PATCH 30/39] Implement new idea --- .../duckpgq/functions/scalar/CMakeLists.txt | 2 + .../scalar/iterativelength_lowerbound.cpp | 37 +- .../scalar/iterativelength_two_phase.cpp | 249 +++++++++++++ .../scalar/shortest_path_lowerbound.cpp | 57 ++- .../scalar/shortest_path_two_phase.cpp | 334 ++++++++++++++++++ test/sql/path-finding/iterativelength.test | 196 ++++++++++ .../sql/path-finding/shortest_path_bound.test | 82 +++-- 7 files changed, 868 insertions(+), 89 deletions(-) create mode 100644 duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp create mode 100644 duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp create mode 100644 test/sql/path-finding/iterativelength.test diff --git a/duckpgq/src/duckpgq/functions/scalar/CMakeLists.txt b/duckpgq/src/duckpgq/functions/scalar/CMakeLists.txt index 44c3eea2..e03d89e9 100644 --- a/duckpgq/src/duckpgq/functions/scalar/CMakeLists.txt +++ b/duckpgq/src/duckpgq/functions/scalar/CMakeLists.txt @@ -6,11 +6,13 @@ set(EXTENSION_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/csr_get_w_type.cpp ${CMAKE_CURRENT_SOURCE_DIR}/iterativelength.cpp ${CMAKE_CURRENT_SOURCE_DIR}/iterativelength_lowerbound.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/iterativelength_two_phase.cpp ${CMAKE_CURRENT_SOURCE_DIR}/iterativelength2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/iterativelength_bidirectional.cpp ${CMAKE_CURRENT_SOURCE_DIR}/reachability.cpp ${CMAKE_CURRENT_SOURCE_DIR}/shortest_path.cpp ${CMAKE_CURRENT_SOURCE_DIR}/shortest_path_lowerbound.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/shortest_path_two_phase.cpp ${CMAKE_CURRENT_SOURCE_DIR}/csr_creation.cpp PARENT_SCOPE ) \ No newline at end of file diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp index 1fe6d52d..8c8c5f89 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp @@ -8,13 +8,14 @@ namespace duckdb { static bool IterativeLengthLowerBound(int64_t v_size, int64_t *v, vector &e, - vector>> &parents_v, + // vector>> &parents_v, vector> &seen, vector> &visit, vector> &next) { bool change = false; for (auto i = 0; i < v_size; i++) { next[i] = 0; + seen[i] = 0; } for (auto lane = 0; lane < LANE_LIMIT; lane++) { @@ -22,11 +23,12 @@ static bool IterativeLengthLowerBound(int64_t v_size, int64_t *v, vectorget_index(search_num); - int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (!vdata_src.validity.RowIsValid(src_pos)) { result_validity.SetInvalid(search_num); result_data[search_num] = (int64_t)-1; /* no path */ - } else if (src_data[src_pos] == dst_data[dst_pos]) { - result_data[search_num] = (int64_t)-1; /* no path */ - visit1[src_data[src_pos]][lane] = true; - lane_to_num[lane] = search_num; // active lane - active++; - break; } else { result_data[search_num] = (int64_t)-1; /* initialize to no path */ - seen[src_data[src_pos]][lane] = true; visit1[src_data[src_pos]][lane] = true; lane_to_num[lane] = search_num; // active lane active++; @@ -150,8 +144,10 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & // make passes while a lane is still active for (int64_t iter = 1; active && iter <= upper_bound; iter++) { - bool stop = !IterativeLengthLowerBound(v_size, v, e, parents_v, seen, (iter & 1) ? visit1 : visit2, - (iter & 1) ? visit2 : visit1); + if (!IterativeLengthLowerBound(v_size, v, e, seen, (iter & 1) ? visit1 : visit2, + (iter & 1) ? visit2 : visit1)) { + break; + } // detect lanes that finished for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { int64_t search_num = lane_to_num[lane]; @@ -164,9 +160,9 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & if (iter < lower_bound) { // when reach the destination too early, treat destination as null // looks like the graph does not have that vertex - seen[dst_data[dst_pos]][lane] = false; - (iter & 1) ? visit2[dst_data[dst_pos]][lane] = false - : visit1[dst_data[dst_pos]][lane] = false; + // seen[dst_data[dst_pos]][lane] = false; + // (iter & 1) ? visit2[dst_data[dst_pos]][lane] = false + // : visit1[dst_data[dst_pos]][lane] = false; continue; } else { result_data[search_num] = @@ -178,9 +174,6 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & } } } - if (stop) { - break; - } } // no changes anymore: any still active searches have no path diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp new file mode 100644 index 00000000..e270f5d5 --- /dev/null +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp @@ -0,0 +1,249 @@ +#include +#include "duckdb/main/client_data.hpp" +#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/duckpgq_functions.hpp" + +namespace duckdb { + +static bool IterativeLengthPhaseOne(int64_t v_size, int64_t *v, vector &e, + vector> &visit, + vector> &next) { + bool change = false; + for (auto i = 0; i < v_size; i++) { + next[i] = 0; + } + + for (auto i = 0; i < v_size; i++) { + if (visit[i].any()) { + for (auto offset = v[i]; offset < v[i + 1]; offset++) { + auto n = e[offset]; + next[n] = next[n] | visit[i]; + } + } + } + + for (auto i = 0; i < v_size; i++) { + change |= next[i].any(); + } + + return change; +} + +static bool IterativeLengthPhaseTwo(int64_t v_size, int64_t *v, vector &e, + vector> &seen, + vector> &visit, + vector> &next) { + bool change = false; + for (auto i = 0; i < v_size; i++) { + next[i] = 0; + } + + for (auto i = 0; i < v_size; i++) { + if (visit[i].any()) { + for (auto offset = v[i]; offset < v[i + 1]; offset++) { + auto n = e[offset]; + next[n] = next[n] | visit[i]; + } + } + } + + for (auto i = 0; i < v_size; i++) { + next[i] = next[i] & ~seen[i]; + seen[i] = seen[i] | next[i]; + change |= next[i].any(); + } + + return change; + +} + +static int64_t IterativeLengthInternal(int64_t lane, int64_t v_size, int64_t destination, + int64_t bound, + int64_t *v, vector &e, + vector> &visit) { + vector src; + for (int64_t v = 0; v < v_size; v++) { + if (visit[v][lane]) { + src.push_back(v); + } + } + vector> seen(v_size); + vector> visit1(v_size); + vector> visit2(v_size); + + idx_t started_searches = 0; + while (started_searches < src.size()) { + for (auto i = 0; i < v_size; i++) { + seen[i] = 0; + visit1[i] = 0; + } + // add search jobs to free lanes + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + while (started_searches < src.size()) { + int64_t search_num = started_searches++; + visit1[src[search_num]][lane] = true; + } + } + + for (int64_t iter = 1; iter <= bound; iter++) { + if (!IterativeLengthPhaseTwo(v_size, v, e, seen, (iter & 1) ? visit1 : visit2, + (iter & 1) ? visit2 : visit1)) { + break; + } + // detect lanes that found the destination + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + if (seen[destination][lane]) { + return iter; + } + } + } + } + return -1; +} + +static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState &state, + Vector &result) { + auto &func_expr = (BoundFunctionExpression &)state.expr; + auto &info = (IterativeLengthFunctionData &)*func_expr.bind_info; + auto duckpgq_state_entry = info.context.registered_state.find("duckpgq"); + if (duckpgq_state_entry == info.context.registered_state.end()) { + //! Wondering how you can get here if the extension wasn't loaded, but + //! leaving this check in anyways + throw MissingExtensionException( + "The DuckPGQ extension has not been loaded"); + } + auto duckpgq_state = + reinterpret_cast(duckpgq_state_entry->second.get()); + + D_ASSERT(duckpgq_state->csr_list[info.csr_id]); + + if ((uint64_t)info.csr_id + 1 > duckpgq_state->csr_list.size()) { + throw ConstraintException("Invalid ID"); + } + auto csr_entry = duckpgq_state->csr_list.find((uint64_t)info.csr_id); + if (csr_entry == duckpgq_state->csr_list.end()) { + throw ConstraintException( + "Need to initialize CSR before doing shortest path"); + } + + if (!(csr_entry->second->initialized_v && csr_entry->second->initialized_e)) { + throw ConstraintException( + "Need to initialize CSR before doing shortest path"); + } + int64_t v_size = args.data[1].GetValue(0).GetValue(); + int64_t *v = (int64_t *)duckpgq_state->csr_list[info.csr_id]->v; + vector &e = duckpgq_state->csr_list[info.csr_id]->e; + + // get src and dst vectors for searches + auto &src = args.data[2]; + auto &dst = args.data[3]; + UnifiedVectorFormat vdata_src; + UnifiedVectorFormat vdata_dst; + src.ToUnifiedFormat(args.size(), vdata_src); + dst.ToUnifiedFormat(args.size(), vdata_dst); + auto src_data = (int64_t *)vdata_src.data; + auto dst_data = (int64_t *)vdata_dst.data; + + // get lowerbound and upperbound + auto &lower = args.data[4]; + auto &upper = args.data[5]; + UnifiedVectorFormat vdata_lower_bound; + UnifiedVectorFormat vdata_upper_bound; + lower.ToUnifiedFormat(args.size(), vdata_lower_bound); + upper.ToUnifiedFormat(args.size(), vdata_upper_bound); + auto lower_bound = ((int64_t *)vdata_lower_bound.data)[0]; + auto upper_bound = ((int64_t *)vdata_upper_bound.data)[0]; + + ValidityMask &result_validity = FlatVector::Validity(result); + + // create result vector + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + + // create temp SIMD arrays + vector> seen(v_size); + vector> visit1(v_size); + vector> visit2(v_size); + + // maps lane to search number + short lane_to_num[LANE_LIMIT]; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; // inactive + } + + idx_t started_searches = 0; + while (started_searches < args.size()) { + + // empty visit vectors + for (auto i = 0; i < v_size; i++) { + seen[i] = 0; + visit1[i] = 0; + } + + // add search jobs to free lanes + uint64_t active = 0; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; + while (started_searches < args.size()) { + int64_t search_num = started_searches++; + int64_t src_pos = vdata_src.sel->get_index(search_num); + if (!vdata_src.validity.RowIsValid(src_pos)) { + result_validity.SetInvalid(search_num); + result_data[search_num] = (int64_t)-1; /* no path */ + } else { + result_data[search_num] = (int64_t)-1; /* initialize to no path */ + visit1[src_data[src_pos]][lane] = true; + lane_to_num[lane] = search_num; // active lane + active++; + break; + } + } + } + + int64_t iter = 1; + // phase one: search without seen until lower bound - 1 + for (; iter < lower_bound; iter++) { + IterativeLengthPhaseOne(v_size, v, e, (iter & 1) ? visit1 : visit2, + (iter & 1) ? visit2 : visit1); + } + + // phase two: search with seen until upper bound + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + auto search_num = lane_to_num[lane]; + if (search_num >= 0) { + int64_t dst_pos = vdata_dst.sel->get_index(search_num); + auto length = IterativeLengthInternal(lane, v_size, + dst_data[dst_pos], upper_bound - lower_bound + 1, v, e, (iter & 1) ? visit1 : visit2); + if (length >= 0) { + result_data[search_num] = length + lower_bound - 1; + lane_to_num[lane] = -1; // mark inactive + } + } + } + + // no changes anymore: any still active searches have no path + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { // active lane + result_validity.SetInvalid(search_num); + result_data[search_num] = (int64_t)-1; /* no path */ + lane_to_num[lane] = -1; // mark inactive + } + } + } + duckpgq_state->csr_to_delete.insert(info.csr_id); +} + +// CreateScalarFunctionInfo DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { +// auto fun = ScalarFunction("iterativelength_lowerbound", +// {LogicalType::INTEGER, LogicalType::BIGINT, +// LogicalType::BIGINT, LogicalType::BIGINT, +// LogicalType::BIGINT, LogicalType::BIGINT}, +// LogicalType::BIGINT, IterativeLengthLowerBoundFunction, +// IterativeLengthFunctionData::IterativeLengthBind); +// return CreateScalarFunctionInfo(fun); +// } + +} // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp index 63daba93..c1af6859 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp @@ -13,18 +13,19 @@ namespace duckdb { static bool IterativeLengthLowerBound(int64_t v_size, int64_t *V, vector &E, vector &edge_ids, - vector>> &parents_v, + // vector>> &parents_v, vector>> &paths_v, vector>> &paths_e, vector> &seen, vector> &visit, vector> &next) { bool change = false; - map, unordered_set> parents_v_cache; + // map, unordered_set> parents_v_cache; map, vector> paths_v_cache; map, vector> paths_e_cache; for (auto v = 0; v < v_size; v++) { next[v] = 0; + seen[v] = 0; } //! Keep track of edge id through which the node was reached for (auto v = 0; v < v_size; v++) { @@ -36,17 +37,17 @@ static bool IterativeLengthLowerBound(int64_t v_size, int64_t *V, vector> visit1(v_size); vector> visit2(v_size); - vector>> parents_v(v_size, std::vector>(LANE_LIMIT)); + // vector>> parents_v(v_size, std::vector>(LANE_LIMIT)); vector>> paths_v(v_size, std::vector>(LANE_LIMIT)); vector>> paths_e(v_size, std::vector>(LANE_LIMIT)); @@ -156,17 +157,10 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta while (started_searches < args.size()) { int64_t search_num = started_searches++; int64_t src_pos = vdata_src.sel->get_index(search_num); - int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (!vdata_src.validity.RowIsValid(src_pos)) { result_validity.SetInvalid(search_num); - } else if (src_data[src_pos] == dst_data[dst_pos]) { - visit1[src_data[src_pos]][lane] = true; - lane_to_num[lane] = search_num; // active lane - active++; - break; } else { visit1[src_data[src_pos]][lane] = true; - seen[src_data[src_pos]][lane] = true; lane_to_num[lane] = search_num; // active lane active++; break; @@ -177,7 +171,7 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta //! make passes while a lane is still active for (int64_t iter = 1; active && iter <= upper_bound; iter++) { //! Perform one step of bfs exploration - if (!IterativeLengthLowerBound(v_size, v, e, edge_ids, parents_v, paths_v, paths_e, seen, + if (!IterativeLengthLowerBound(v_size, v, e, edge_ids, paths_v, paths_e, seen, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { break; @@ -192,11 +186,6 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta // check if the path length is within bounds // bound vector is either a constant or a flat vector if (iter < lower_bound) { - // when reach the destination too early, treat destination as null - // looks like the graph does not have that vertex - seen[dst_data[dst_pos]][lane] = false; - (iter & 1) ? visit2[dst_data[dst_pos]][lane] = false - : visit1[dst_data[dst_pos]][lane] = false; continue; } else { vector output_vector; @@ -240,15 +229,15 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta duckpgq_state->csr_to_delete.insert(info.csr_id); } -CreateScalarFunctionInfo DuckPGQFunctions::GetShortestPathLowerBoundFunction() { - auto fun = ScalarFunction("shortestpath_lowerbound", - {LogicalType::INTEGER, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT}, - LogicalType::LIST(LogicalType::BIGINT), - ShortestPathLowerBoundFunction, - IterativeLengthFunctionData::IterativeLengthBind); - return CreateScalarFunctionInfo(fun); -} +// CreateScalarFunctionInfo DuckPGQFunctions::GetShortestPathLowerBoundFunction() { +// auto fun = ScalarFunction("shortestpath_lowerbound", +// {LogicalType::INTEGER, LogicalType::BIGINT, +// LogicalType::BIGINT, LogicalType::BIGINT, +// LogicalType::BIGINT, LogicalType::BIGINT}, +// LogicalType::LIST(LogicalType::BIGINT), +// ShortestPathLowerBoundFunction, +// IterativeLengthFunctionData::IterativeLengthBind); +// return CreateScalarFunctionInfo(fun); +// } } // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp new file mode 100644 index 00000000..93adb3d7 --- /dev/null +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp @@ -0,0 +1,334 @@ +#include "duckdb/common/fstream.hpp" +#include "duckdb/common/profiler.hpp" +#include "duckdb/main/client_data.hpp" +#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckpgq/common.hpp" +#include "duckpgq/duckpgq_functions.hpp" + +#include +#include + +namespace duckdb { + +static bool IterativeLengthPhaseOne(int64_t v_size, int64_t *V, vector &E, + int64_t iter, vector &edge_ids, + vector>> &paths_v, + vector>> &paths_e, + vector> &seen, + vector> &visit, + vector> &next) { + bool change = false; + for (auto v = 0; v < v_size; v++) { + next[v] = 0; + seen[v] = 0; + } + //! Keep track of edge id through which the node was reached + for (auto v = 0; v < v_size; v++) { + if (visit[v].any()) { + for (auto e = V[v]; e < V[v + 1]; e++) { + auto n = E[e]; + auto edge_id = edge_ids[e]; + next[n] = next[n] | visit[v]; + for (auto lane = 0; lane < LANE_LIMIT; lane++) { + if (visit[v][lane]) { + paths_v[n][lane][iter] = v; + paths_e[n][lane][iter] = edge_id; + } + } + } + } + } + + for (auto v = 0; v < v_size; v++) { + seen[v] = seen[v] | next[v]; + change |= next[v].any(); + } + return change; +} + +static bool IterativeLengthPhaseTwo(int64_t v_size, int64_t *V, vector &E, + vector &edge_ids, + vector> &parents_v, + vector> &parents_e, + vector> &seen, + vector> &visit, + vector> &next) { + bool change = false; + for (auto v = 0; v < v_size; v++) { + next[v] = 0; + } + //! Keep track of edge id through which the node was reached + for (auto v = 0; v < v_size; v++) { + if (visit[v].any()) { + for (auto e = V[v]; e < V[v + 1]; e++) { + auto n = E[e]; + auto edge_id = edge_ids[e]; + next[n] = next[n] | visit[v]; + for (auto l = 0; l < LANE_LIMIT; l++) { + parents_v[n][l] = + ((parents_v[n][l] == -1) && visit[v][l]) ? v : parents_v[n][l]; + parents_e[n][l] = ((parents_e[n][l] == -1) && visit[v][l]) + ? edge_id + : parents_e[n][l]; + } + } + } + } + + for (auto v = 0; v < v_size; v++) { + next[v] = next[v] & ~seen[v]; + seen[v] = seen[v] | next[v]; + change |= next[v].any(); + } + return change; +} + +static std::tuple> ShortestPathInternal(int64_t lane, int64_t v_size, int64_t destination, + int64_t bound, + int64_t *v, vector &e, vector &edge_ids, + vector> &visit) { + vector src; + vector result; + for (int64_t v = 0; v < v_size; v++) { + if (visit[v][lane]) { + src.push_back(v); + } + } + vector> seen(v_size); + vector> visit1(v_size); + vector> visit2(v_size); + + vector> parents_v(v_size, + std::vector(LANE_LIMIT, -1)); + vector> parents_e(v_size, + std::vector(LANE_LIMIT, -1)); + + + // maps lane to search number + int16_t lane_to_num[LANE_LIMIT]; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; // inactive + } + + idx_t started_searches = 0; + while (started_searches < src.size()) { + for (auto i = 0; i < v_size; i++) { + seen[i] = 0; + visit1[i] = 0; + } + // add search jobs to free lanes + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + if (started_searches < src.size()) { + int64_t search_num = started_searches++; + visit1[src[search_num]][lane] = true; + lane_to_num[lane] = search_num; + } else { + break; + } + } + + for (int64_t iter = 1; iter <= bound; iter++) { + if (!IterativeLengthPhaseTwo(v_size, v, e, edge_ids, parents_v, parents_e, + seen, (iter & 1) ? visit1 : visit2, + (iter & 1) ? visit2 : visit1)) { + break; + } + // detect lanes that found the destination + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + if (seen[destination][lane]) { + auto search_num = lane_to_num[lane]; + + // found the destination, reconstruct the path + auto parent_vertex = parents_v[destination][lane]; + auto parent_edge = parents_e[destination][lane]; + + result.push_back(destination); + result.push_back(parent_edge); + while (parent_vertex != src[search_num]) { + result.push_back(parent_vertex); + parent_edge = parents_e[parent_vertex][lane]; + parent_vertex = parents_v[parent_vertex][lane]; + result.push_back(parent_edge); + } + result.push_back(src[search_num]); + std::reverse(result.begin(), result.end()); + return std::make_tuple(src[search_num], result); + } + } + } + } + return std::make_tuple(-1, result); +} + +static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &state, + Vector &result) { + auto &func_expr = (BoundFunctionExpression &)state.expr; + auto &info = (IterativeLengthFunctionData &)*func_expr.bind_info; + auto duckpgq_state_entry = info.context.registered_state.find("duckpgq"); + if (duckpgq_state_entry == info.context.registered_state.end()) { + //! Wondering how you can get here if the extension wasn't loaded, but + //! leaving this check in anyways + throw MissingExtensionException( + "The DuckPGQ extension has not been loaded"); + } + auto duckpgq_state = + reinterpret_cast(duckpgq_state_entry->second.get()); + + D_ASSERT(duckpgq_state->csr_list[info.csr_id]); + int32_t id = args.data[0].GetValue(0).GetValue(); + int64_t v_size = args.data[1].GetValue(0).GetValue(); + + int64_t *v = (int64_t *)duckpgq_state->csr_list[id]->v; + vector &e = duckpgq_state->csr_list[id]->e; + vector &edge_ids = duckpgq_state->csr_list[id]->edge_ids; + + auto &src = args.data[2]; + auto &target = args.data[3]; + + UnifiedVectorFormat vdata_src, vdata_dst; + src.ToUnifiedFormat(args.size(), vdata_src); + target.ToUnifiedFormat(args.size(), vdata_dst); + + auto src_data = (int64_t *)vdata_src.data; + auto dst_data = (int64_t *)vdata_dst.data; + + // get lowerbound and upperbound + auto &lower = args.data[4]; + auto &upper = args.data[5]; + UnifiedVectorFormat vdata_lower_bound; + UnifiedVectorFormat vdata_upper_bound; + lower.ToUnifiedFormat(args.size(), vdata_lower_bound); + upper.ToUnifiedFormat(args.size(), vdata_upper_bound); + auto lower_bound = ((int64_t *)vdata_lower_bound.data)[0]; + auto upper_bound = ((int64_t *)vdata_upper_bound.data)[0]; + + result.SetVectorType(VectorType::FLAT_VECTOR); + auto result_data = FlatVector::GetData(result); + ValidityMask &result_validity = FlatVector::Validity(result); + + // create temp SIMD arrays + vector> seen(v_size); + vector> visit1(v_size); + vector> visit2(v_size); + + vector>> paths_v(v_size, + std::vector>(LANE_LIMIT)); + vector>> paths_e(v_size, + std::vector>(LANE_LIMIT)); + + + // maps lane to search number + int16_t lane_to_num[LANE_LIMIT]; + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; // inactive + } + int64_t total_len = 0; + + idx_t started_searches = 0; + while (started_searches < args.size()) { + + // empty visit vectors + for (auto i = 0; i < v_size; i++) { + seen[i] = 0; + visit1[i] = 0; + } + + // add search jobs to free lanes + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + lane_to_num[lane] = -1; + while (started_searches < args.size()) { + int64_t search_num = started_searches++; + int64_t src_pos = vdata_src.sel->get_index(search_num); + if (!vdata_src.validity.RowIsValid(src_pos)) { + result_validity.SetInvalid(search_num); + } else { + visit1[src_data[src_pos]][lane] = true; + lane_to_num[lane] = search_num; // active lane + break; + } + } + } + + int64_t iter = 1; + for (; iter < lower_bound; iter++) { + IterativeLengthPhaseOne(v_size, v, e, iter, edge_ids, paths_v, paths_e, seen, + (iter & 1) ? visit1 : visit2, + (iter & 1) ? visit2 : visit1); + } + + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + auto search_num = lane_to_num[lane]; + if (search_num >= 0) { + int64_t src_pos = vdata_src.sel->get_index(search_num); + int64_t dst_pos = vdata_dst.sel->get_index(search_num); + auto phase_two_result = ShortestPathInternal(lane, v_size, dst_data[dst_pos], + upper_bound - lower_bound + 1, v, e, edge_ids, (iter & 1) ? visit1 : visit2); + auto phase_two_src = std::get<0>(phase_two_result); + auto phase_two_path = std::get<1>(phase_two_result); + if (phase_two_src >= 0) { + vector output_vector; + // construct the path of phase one + if (paths_v[phase_two_src][lane].size() > 0) { + auto parent_vertex = paths_v[phase_two_src][lane][lower_bound - 1]; + auto parent_edge = paths_e[phase_two_src][lane][lower_bound - 1]; + + output_vector.push_back(parent_edge); + while (parent_vertex != src_data[src_pos]) { + output_vector.push_back(parent_vertex); + parent_edge = paths_e[parent_vertex][lane][lower_bound - 1]; + parent_vertex = paths_v[parent_vertex][lane][lower_bound - 1]; + output_vector.push_back(parent_edge); + } + output_vector.push_back(src_data[src_pos]); + std::reverse(output_vector.begin(), output_vector.end()); + } + + // construct the path of phase two + for (auto val : phase_two_path) { + output_vector.push_back(val); + } + + // construct the output + auto output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + for (auto val : output_vector) { + Value value_to_insert = val; + ListVector::PushBack(*output, value_to_insert); + } + result_data[search_num].length = ListVector::GetListSize(*output); + result_data[search_num].offset = total_len; + ListVector::Append(result, ListVector::GetEntry(*output), + ListVector::GetListSize(*output)); + total_len += result_data[search_num].length; + lane_to_num[lane] = -1; // mark inactive + } else { + result_validity.SetInvalid(search_num); + lane_to_num[lane] = -1; // mark inactive + } + } + } + + // no changes anymore: any still active searches have no path + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { // active lane + result_validity.SetInvalid(search_num); + lane_to_num[lane] = -1; // mark inactive + } + } + } + duckpgq_state->csr_to_delete.insert(info.csr_id); +} + +CreateScalarFunctionInfo DuckPGQFunctions::GetShortestPathLowerBoundFunction() { + auto fun = ScalarFunction("shortestpath_lowerbound", + {LogicalType::INTEGER, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::LIST(LogicalType::BIGINT), + ShortestPathLowerBoundFunction, + IterativeLengthFunctionData::IterativeLengthBind); + return CreateScalarFunctionInfo(fun); +} + +} // namespace duckdb diff --git a/test/sql/path-finding/iterativelength.test b/test/sql/path-finding/iterativelength.test new file mode 100644 index 00000000..a93fb82c --- /dev/null +++ b/test/sql/path-finding/iterativelength.test @@ -0,0 +1,196 @@ +# name: test/sql/path-finding/iterativelength.test +# group: [iterative] + +statement ok +pragma enable_verification + +require duckpgq + +# Graph to test shortest path bound with a cycle +# (0) --> (1) <-> (2) + +statement ok +CREATE TABLE Point3(id BIGINT); INSERT INTO Point3 VALUES (0), (1), (2); + +statement ok +CREATE TABLE know3(src BIGINT, dst BIGINT); INSERT INTO know3 VALUES (0, 1), (1, 2), (2, 1); + +statement ok +-CREATE PROPERTY GRAPH pg3 +VERTEX TABLES ( + Point3 PROPERTIES ( id ) LABEL Pnt + ) +EDGE TABLES ( + know3 SOURCE KEY ( src ) REFERENCES Point3 ( id ) + DESTINATION KEY ( dst ) REFERENCES Point3 ( id ) + LABEL Knows + ); + +query III +WITH cte1 AS ( + SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Point3 a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Point3 a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Point3 a + LEFT JOIN know3 k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + a.rowid, + c.rowid, + k.rowid) as temp + FROM know3 k + JOIN Point3 a on a.id = k.src + JOIN Point3 c on c.id = k.dst +) SELECT a.id, b.id, iterativelength_lowerbound(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3) as path_length + FROM Point3 a, Point3 b, (select count(cte1.temp) * 0 as temp from cte1) __x + WHERE __x.temp * 0 + iterativelength_lowerbound(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3); +---- +0 1 3 +0 2 2 +1 1 2 +1 2 3 +2 1 3 +2 2 2 + +# (0) --> (1) --> (4) +# ↗ ↘ +# (3) <-- (2) + +statement ok +CREATE TABLE Point4(id BIGINT); INSERT INTO Point4 VALUES (0), (1), (2), (3), (4); + +statement ok +CREATE TABLE know4(src BIGINT, dst BIGINT); INSERT INTO know4 VALUES (0, 1), (1, 4), (1, 2), (2, 3), (3, 1); + +statement ok +-CREATE PROPERTY GRAPH pg4 +VERTEX TABLES ( + Point4 PROPERTIES ( id ) LABEL Pnt + ) +EDGE TABLES ( + know4 SOURCE KEY ( src ) REFERENCES Point4 ( id ) + DESTINATION KEY ( dst ) REFERENCES Point4 ( id ) + LABEL Knows + ); + +query III +WITH cte1 AS ( + SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Point4 a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Point4 a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Point4 a + LEFT JOIN know4 k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + a.rowid, + c.rowid, + k.rowid) as temp + FROM know4 k + JOIN Point4 a on a.id = k.src + JOIN Point4 c on c.id = k.dst +) SELECT a.id, b.id, iterativelength_lowerbound(0, (select count(*) from Point4), a.rowid, b.rowid, 3, 10) as path_length + FROM Point4 a, Point4 b, (select count(cte1.temp) * 0 as temp from cte1) __x + WHERE __x.temp * 0 + iterativelength_lowerbound(0, (select count(*) from Point4), a.rowid, b.rowid, 3, 10); +---- +0 1 4 +0 2 5 +0 3 3 +0 4 5 +1 1 3 +1 2 4 +1 3 5 +1 4 4 +2 1 5 +2 2 3 +2 3 4 +2 4 3 +3 1 4 +3 2 5 +3 3 3 +3 4 5 + +# (0) --> (1) --> (4) +# ↗ ↘ +# (3) <-- (2) + +statement ok +CREATE TABLE Point5(id BIGINT); INSERT INTO Point5 VALUES (0), (1), (2), (3), (4); + +statement ok +CREATE TABLE know5(src BIGINT, dst BIGINT); INSERT INTO know5 VALUES (0, 1), (1, 4), (0, 2), (2, 3), (3, 0); + +statement ok +-CREATE PROPERTY GRAPH pg5 +VERTEX TABLES ( + Point5 PROPERTIES ( id ) LABEL Pnt + ) +EDGE TABLES ( + know5 SOURCE KEY ( src ) REFERENCES Point5 ( id ) + DESTINATION KEY ( dst ) REFERENCES Point5 ( id ) + LABEL Knows + ); + +query III +WITH cte1 AS ( + SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Point5 a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Point5 a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.src) as cnt + FROM Point5 a + LEFT JOIN know5 k ON k.src = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + a.rowid, + c.rowid, + k.rowid) as temp + FROM know5 k + JOIN Point5 a on a.id = k.src + JOIN Point5 c on c.id = k.dst +) SELECT a.id, b.id, iterativelength_lowerbound(0, (select count(*) from Point5), a.rowid, b.rowid, 3, 10) as path_length + FROM Point5 a, Point5 b, (select count(cte1.temp) * 0 as temp from cte1) __x + WHERE __x.temp * 0 + iterativelength_lowerbound(0, (select count(*) from Point5), a.rowid, b.rowid, 3, 10); +---- +0 0 3 +0 1 4 +0 2 4 +0 3 5 +0 4 5 +2 0 5 +2 1 3 +2 2 3 +2 3 4 +2 4 4 +3 0 4 +3 1 5 +3 2 5 +3 3 3 +3 4 3 \ No newline at end of file diff --git a/test/sql/path-finding/shortest_path_bound.test b/test/sql/path-finding/shortest_path_bound.test index 97ee93ba..9d32f589 100644 --- a/test/sql/path-finding/shortest_path_bound.test +++ b/test/sql/path-finding/shortest_path_bound.test @@ -53,12 +53,12 @@ WITH cte1 AS ( FROM Know k JOIN Point a on a.id = k.src JOIN Point c on c.id = k.dst -) SELECT a.id as srd_id, b.id as dst_id, iterativelength_lowerbound(0, (select count(*) from Point), a.rowid, b.rowid, 2, 3) as path_length +) SELECT a.id as srd_id, b.id as dst_id, shortestpath_lowerbound(0, (select count(*) from Point), a.rowid, b.rowid, 2, 3) as path FROM Point a, Point b, (select count(cte1.temp) * 0 as temp from cte1) __x WHERE a.id = 0 and __x.temp * 0 + iterativelength_lowerbound(0, (select count(*) from Point), a.rowid, b.rowid, 2, 3); ---- -0 1 3 -0 3 2 +0 1 [0, 1, 2, 2, 3, 3, 1] +0 3 [0, 1, 2, 2, 3] query III WITH cte1 AS ( @@ -85,13 +85,13 @@ WITH cte1 AS ( FROM Know k JOIN Point a on a.id = k.src JOIN Point c on c.id = k.dst -) SELECT a.id, b.id, iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 1, 3) as path_length +) SELECT a.id, b.id, shortestpath_lowerbound(0, (select count(*) from Point), a.rowid, b.rowid, 1, 3) as path FROM Point a, Point b, (select count(cte1.temp) * 0 as temp from cte1) __x WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 1, 3); ---- -0 1 1 -0 2 1 -0 3 2 +0 1 [0, 0, 1] +0 2 [0, 1, 2] +0 3 [0, 1, 2, 2, 3] # Graph to test shortest path bound with a cycle # (0) --> (1) <-> (2) @@ -139,11 +139,12 @@ WITH cte1 AS ( FROM know2 k JOIN Point2 a on a.id = k.src JOIN Point2 c on c.id = k.dst -) SELECT a.id, b.id, iterativelength_lowerbound(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 30) as path_length +) SELECT a.id, b.id, shortestpath_lowerbound(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 30) as path FROM Point2 a, Point2 b, (select count(cte1.temp) * 0 as temp from cte1) __x WHERE a.id = 0 and __x.temp * 0 + iterativelength_lowerbound(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 30); ---- -0 2 2 +0 1 [0, 0, 1, 1, 2, 2, 1] +0 2 [0, 0, 1, 1, 2] # Graph to test shortest path bound with a cycle @@ -192,11 +193,16 @@ WITH cte1 AS ( FROM know3 k JOIN Point3 a on a.id = k.src JOIN Point3 c on c.id = k.dst -) SELECT a.id, b.id, iterativelength_lowerbound(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3) as path_length +) SELECT a.id, b.id, shortestpath_lowerbound(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3) as path FROM Point3 a, Point3 b, (select count(cte1.temp) * 0 as temp from cte1) __x - WHERE a.id = 0 and __x.temp * 0 + iterativelength_lowerbound(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3); + WHERE __x.temp * 0 + iterativelength_lowerbound(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3); ---- -0 0 2 +0 0 [0, 1, 2, 2, 0] +0 1 [0, 1, 2, 2, 0, 0, 1] +0 2 [0, 1, 2, 2, 0, 1, 2] +2 0 [2, 2, 0, 1, 2, 2, 0] +2 1 [2, 2, 0, 0, 1] +2 2 [2, 2, 0, 1, 2] # Graph to test shortest path bound with a cycle # (1) <- (0) <-> (2) @@ -223,12 +229,16 @@ query III -FROM GRAPH_TABLE (pg4 MATCH p = ANY SHORTEST (a:Point4)-[k:know4]->{2,3}(b:Point4) - COLUMNS (a.id, b.id, vertices(p)) - ) tmp; + COLUMNS (a.id AS id1, b.id AS id2, element_id(p)) + ) tmp + ORDER BY tmp.id1, tmp.id2; ---- -0 0 [0, 2, 0] -2 1 [2, 0, 1] -2 2 [2, 0, 2] +0 0 [0, 1, 2, 2, 0] +0 1 [0, 1, 2, 2, 0, 0, 1] +0 2 [0, 1, 2, 2, 0, 1, 2] +2 0 [2, 2, 0, 1, 2, 2, 0] +2 1 [2, 2, 0, 0, 1] +2 2 [2, 2, 0, 1, 2] # Description: Test algorithm's capability to ignore isolated nodes. @@ -256,11 +266,13 @@ query III -FROM GRAPH_TABLE (pg5 MATCH p = ANY SHORTEST (a:Point5)-[k:know5]->{2,3}(b:Point5) - COLUMNS (a.id, b.id, vertices(p)) + COLUMNS (a.id, b.id, element_id(p)) ) tmp; ---- -0 0 [0, 2, 0] -2 2 [2, 0, 2] +0 0 [0, 0, 2, 1, 0] +2 0 [2, 1, 0, 0, 2, 1, 0] +0 2 [0, 0, 2, 1, 0, 0, 2] +2 2 [2, 1, 0, 0, 2] # Description: Test shortest paths in a graph with cycles. # Graph Structure: @@ -289,21 +301,25 @@ query III -FROM GRAPH_TABLE (pg6 MATCH p = ANY SHORTEST (a:Point6)-[k:know6]->{2,4}(b:Point6) - COLUMNS (a.id as id1, b.id as id2, vertices(p)) + COLUMNS (a.id as id1, b.id as id2, element_id(p)) ) tmp order by tmp.id1, tmp.id2; ---- -0 0 [0, 2, 0] -0 1 [0, 2, 3, 1] -0 3 [0, 2, 3] -1 1 [1, 0, 2, 3, 1] -1 2 [1, 0, 2] -1 3 [1, 0, 2, 3] -2 0 [2, 3, 1, 0] -2 1 [2, 3, 1] -2 2 [2, 3, 2] -3 0 [3, 2, 0] -3 2 [3, 1, 0, 2] -3 3 [3, 2, 3] +0 0 [0, 0, 2, 1, 0] +0 1 [0, 0, 2, 2, 3, 4, 1] +0 2 [0, 0, 2, 1, 0, 0, 2] +0 3 [0, 0, 2, 2, 3] +1 0 [1, 5, 0, 0, 2, 1, 0] +1 1 [1, 5, 0, 0, 2, 2, 3, 4, 1] +1 2 [1, 5, 0, 0, 2] +1 3 [1, 5, 0, 0, 2, 2, 3] +2 0 [2, 1, 0, 0, 2, 1, 0] +2 1 [2, 2, 3, 4, 1] +2 2 [2, 1, 0, 0, 2] +2 3 [2, 1, 0, 0, 2, 2, 3] +3 0 [3, 4, 1, 5, 0] +3 1 [3, 3, 2, 2, 3, 4, 1] +3 2 [3, 4, 1, 5, 0, 0, 2] +3 3 [3, 3, 2, 2, 3] From 40530e346a323188e74c4a141667cf043ce4472e Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Tue, 27 Feb 2024 11:43:21 +0100 Subject: [PATCH 31/39] implement path reconstruction --- .../scalar/iterativelength_lowerbound.cpp | 20 ++++----- .../scalar/iterativelength_two_phase.cpp | 21 +++++----- .../scalar/shortest_path_two_phase.cpp | 41 ++++++++----------- .../sql/path-finding/shortest_path_bound.test | 40 ++++++++++++++---- 4 files changed, 71 insertions(+), 51 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp index f23dfeb7..76a421dc 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp @@ -185,15 +185,15 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, duckpgq_state->csr_to_delete.insert(info.csr_id); } -CreateScalarFunctionInfo -DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { - auto fun = ScalarFunction( - "iterativelength_lowerbound", - {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, - LogicalType::BIGINT, IterativeLengthLowerBoundFunction, - IterativeLengthFunctionData::IterativeLengthBind); - return CreateScalarFunctionInfo(fun); -} +// CreateScalarFunctionInfo +// DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { +// auto fun = ScalarFunction( +// "iterativelength_lowerbound", +// {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, +// LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, +// LogicalType::BIGINT, IterativeLengthLowerBoundFunction, +// IterativeLengthFunctionData::IterativeLengthBind); +// return CreateScalarFunctionInfo(fun); +// } } // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp index e270f5d5..a697fbcc 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp @@ -163,7 +163,6 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & auto result_data = FlatVector::GetData(result); // create temp SIMD arrays - vector> seen(v_size); vector> visit1(v_size); vector> visit2(v_size); @@ -178,7 +177,6 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & // empty visit vectors for (auto i = 0; i < v_size; i++) { - seen[i] = 0; visit1[i] = 0; } @@ -236,14 +234,15 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & duckpgq_state->csr_to_delete.insert(info.csr_id); } -// CreateScalarFunctionInfo DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { -// auto fun = ScalarFunction("iterativelength_lowerbound", -// {LogicalType::INTEGER, LogicalType::BIGINT, -// LogicalType::BIGINT, LogicalType::BIGINT, -// LogicalType::BIGINT, LogicalType::BIGINT}, -// LogicalType::BIGINT, IterativeLengthLowerBoundFunction, -// IterativeLengthFunctionData::IterativeLengthBind); -// return CreateScalarFunctionInfo(fun); -// } +CreateScalarFunctionInfo +DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { + auto fun = ScalarFunction( + "iterativelength_lowerbound", + {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::BIGINT, IterativeLengthLowerBoundFunction, + IterativeLengthFunctionData::IterativeLengthBind); + return CreateScalarFunctionInfo(fun); +} } // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp index 93adb3d7..8b515bcd 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp @@ -15,13 +15,11 @@ static bool IterativeLengthPhaseOne(int64_t v_size, int64_t *V, vector int64_t iter, vector &edge_ids, vector>> &paths_v, vector>> &paths_e, - vector> &seen, vector> &visit, vector> &next) { bool change = false; for (auto v = 0; v < v_size; v++) { next[v] = 0; - seen[v] = 0; } //! Keep track of edge id through which the node was reached for (auto v = 0; v < v_size; v++) { @@ -41,7 +39,6 @@ static bool IterativeLengthPhaseOne(int64_t v_size, int64_t *V, vector } for (auto v = 0; v < v_size; v++) { - seen[v] = seen[v] | next[v]; change |= next[v].any(); } return change; @@ -208,7 +205,6 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta ValidityMask &result_validity = FlatVector::Validity(result); // create temp SIMD arrays - vector> seen(v_size); vector> visit1(v_size); vector> visit2(v_size); @@ -230,7 +226,6 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta // empty visit vectors for (auto i = 0; i < v_size; i++) { - seen[i] = 0; visit1[i] = 0; } @@ -252,7 +247,7 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta int64_t iter = 1; for (; iter < lower_bound; iter++) { - IterativeLengthPhaseOne(v_size, v, e, iter, edge_ids, paths_v, paths_e, seen, + IterativeLengthPhaseOne(v_size, v, e, iter, edge_ids, paths_v, paths_e, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1); } @@ -260,7 +255,6 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { auto search_num = lane_to_num[lane]; if (search_num >= 0) { - int64_t src_pos = vdata_src.sel->get_index(search_num); int64_t dst_pos = vdata_dst.sel->get_index(search_num); auto phase_two_result = ShortestPathInternal(lane, v_size, dst_data[dst_pos], upper_bound - lower_bound + 1, v, e, edge_ids, (iter & 1) ? visit1 : visit2); @@ -270,17 +264,17 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta vector output_vector; // construct the path of phase one if (paths_v[phase_two_src][lane].size() > 0) { - auto parent_vertex = paths_v[phase_two_src][lane][lower_bound - 1]; - auto parent_edge = paths_e[phase_two_src][lane][lower_bound - 1]; + auto iterations = lower_bound - 1; + auto parent_vertex = paths_v[phase_two_src][lane][iterations]; + auto parent_edge = paths_e[phase_two_src][lane][iterations]; - output_vector.push_back(parent_edge); - while (parent_vertex != src_data[src_pos]) { - output_vector.push_back(parent_vertex); - parent_edge = paths_e[parent_vertex][lane][lower_bound - 1]; - parent_vertex = paths_v[parent_vertex][lane][lower_bound - 1]; + while (iterations > 0) { output_vector.push_back(parent_edge); + output_vector.push_back(parent_vertex); + iterations--; + parent_edge = paths_e[parent_vertex][lane][iterations]; + parent_vertex = paths_v[parent_vertex][lane][iterations]; } - output_vector.push_back(src_data[src_pos]); std::reverse(output_vector.begin(), output_vector.end()); } @@ -320,14 +314,15 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta duckpgq_state->csr_to_delete.insert(info.csr_id); } -CreateScalarFunctionInfo DuckPGQFunctions::GetShortestPathLowerBoundFunction() { - auto fun = ScalarFunction("shortestpath_lowerbound", - {LogicalType::INTEGER, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT}, - LogicalType::LIST(LogicalType::BIGINT), - ShortestPathLowerBoundFunction, - IterativeLengthFunctionData::IterativeLengthBind); +CreateScalarFunctionInfo +DuckPGQFunctions::GetShortestPathLowerBoundFunction() { + auto fun = ScalarFunction( + "shortestpath_lowerbound", + {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::LIST(LogicalType::BIGINT), + ShortestPathLowerBoundFunction, + IterativeLengthFunctionData::IterativeLengthBind); return CreateScalarFunctionInfo(fun); } diff --git a/test/sql/path-finding/shortest_path_bound.test b/test/sql/path-finding/shortest_path_bound.test index 3d57a5c0..da12c592 100644 --- a/test/sql/path-finding/shortest_path_bound.test +++ b/test/sql/path-finding/shortest_path_bound.test @@ -206,7 +206,6 @@ WITH cte1 AS ( # Graph to test shortest path bound with a cycle # (1) <- (0) <-> (2) -# 0 to 1 is 1 hop statement ok CREATE TABLE Point4(id BIGINT); INSERT INTO Point4 VALUES (0), (1), (2); @@ -240,9 +239,37 @@ query III 2 1 [2, 2, 0, 0, 1] 2 2 [2, 2, 0, 1, 2] +query IIIII +-FROM GRAPH_TABLE (pg4 + MATCH + p = ANY SHORTEST (a:Point4)-[k:know4]->{1,5}(b:Point4) + COLUMNS (a.id as id1, b.id as id2, element_id(p) as elements, vertices(p) as vertices, path_length(p) as length) + ) tmp + order by tmp.id1, tmp.id2; +---- +0 0 [0, 1, 2, 2, 0] [0, 2, 0] 2 +0 1 [0, 0, 1] [0, 1] 1 +0 2 [0, 1, 2] [0, 2] 1 +2 0 [2, 2, 0] [2, 0] 1 +2 1 [2, 2, 0, 0, 1] [2, 0, 1] 2 +2 2 [2, 2, 0, 1, 2] [2, 0, 2] 2 + +query IIIII +-FROM GRAPH_TABLE (pg4 + MATCH + p = ANY SHORTEST (a:Point4)-[k:know4]->{10,15}(b:Point4) + COLUMNS (a.id as id1, b.id as id2, element_id(p) as elements, vertices(p) as vertices, path_length(p) as length) + ) tmp + order by tmp.id1, tmp.id2; +---- +0 0 [0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0] [0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0] 10 +0 1 [0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1] [0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1] 11 +0 2 [0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2] [0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2] 11 +2 0 [2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0] [2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0] 11 +2 1 [2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1] [2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1] 10 +2 2 [2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2] [2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2] 10 # Description: Test algorithm's capability to ignore isolated nodes. -# Graph Structure: # (0) <-> (2), (1), (3) statement ok @@ -275,8 +302,7 @@ query III 2 2 [2, 1, 0, 0, 2] # Description: Test shortest paths in a graph with cycles. -# Graph Structure: -# (0) <-> (2) <-> (3) -> (1) (selfloop) +# (0) <-> (2) <-> (3) -> (1) ⮌ # ↑ | # └----------------------┘ @@ -309,8 +335,8 @@ query III 0 1 [0, 0, 2, 2, 3, 4, 1] 0 2 [0, 0, 2, 1, 0, 0, 2] 0 3 [0, 0, 2, 2, 3] -1 0 [1, 5, 0, 0, 2, 1, 0] -1 1 [1, 5, 0, 0, 2, 2, 3, 4, 1] +1 0 [1, 6, 1, 5, 0] +1 1 [1, 6, 1, 6, 1] 1 2 [1, 5, 0, 0, 2] 1 3 [1, 5, 0, 0, 2, 2, 3] 2 0 [2, 1, 0, 0, 2, 1, 0] @@ -318,7 +344,7 @@ query III 2 2 [2, 1, 0, 0, 2] 2 3 [2, 1, 0, 0, 2, 2, 3] 3 0 [3, 4, 1, 5, 0] -3 1 [3, 3, 2, 2, 3, 4, 1] +3 1 [3, 4, 1, 6, 1] 3 2 [3, 4, 1, 5, 0, 0, 2] 3 3 [3, 3, 2, 2, 3] From 49c957f7463bdc6a02943046bd67f04ca7534748 Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Wed, 28 Feb 2024 11:20:45 +0100 Subject: [PATCH 32/39] Updating the path record of the old algorithm --- .../scalar/iterativelength_lowerbound.cpp | 62 +++------ .../scalar/iterativelength_two_phase.cpp | 22 ++- .../scalar/shortest_path_lowerbound.cpp | 128 +++++++----------- .../scalar/shortest_path_two_phase.cpp | 48 ++++--- 4 files changed, 109 insertions(+), 151 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp index 76a421dc..2488d1cd 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_lowerbound.cpp @@ -16,18 +16,14 @@ static bool IterativeLengthLowerBound(int64_t v_size, int64_t *v, vector> seen(v_size); vector> visit1(v_size); vector> visit2(v_size); - vector>> parents_v( - v_size, std::vector>(LANE_LIMIT)); // maps lane to search number short lane_to_num[LANE_LIMIT]; @@ -119,7 +113,6 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, } // add search jobs to free lanes - uint64_t active = 0; for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { lane_to_num[lane] = -1; while (started_searches < args.size()) { @@ -132,41 +125,30 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, result_data[search_num] = (int64_t)-1; /* initialize to no path */ visit1[src_data[src_pos]][lane] = true; lane_to_num[lane] = search_num; // active lane - active++; break; } } } // make passes while a lane is still active - for (int64_t iter = 1; active && iter <= upper_bound; iter++) { + for (int64_t iter = 1; iter <= upper_bound; iter++) { if (!IterativeLengthLowerBound(v_size, v, e, seen, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { break; } + if (iter < lower_bound) { + continue; + } // detect lanes that finished for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { int64_t search_num = lane_to_num[lane]; if (search_num >= 0) { // active lane int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (seen[dst_data[dst_pos]][lane]) { - - // check if the path length is within bounds - // bound vector is either a constant or a flat vector - if (iter < lower_bound) { - // when reach the destination too early, treat destination as null - // looks like the graph does not have that vertex - // seen[dst_data[dst_pos]][lane] = false; - // (iter & 1) ? visit2[dst_data[dst_pos]][lane] = false - // : visit1[dst_data[dst_pos]][lane] = false; - continue; - } else { - result_data[search_num] = - iter; /* found at iter => iter = path length */ - lane_to_num[lane] = -1; // mark inactive - active--; - } + result_data[search_num] = + iter; /* found at iter => iter = path length */ + lane_to_num[lane] = -1; // mark inactive } } } @@ -185,15 +167,15 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, duckpgq_state->csr_to_delete.insert(info.csr_id); } -// CreateScalarFunctionInfo -// DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { -// auto fun = ScalarFunction( -// "iterativelength_lowerbound", -// {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, -// LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, -// LogicalType::BIGINT, IterativeLengthLowerBoundFunction, -// IterativeLengthFunctionData::IterativeLengthBind); -// return CreateScalarFunctionInfo(fun); -// } +CreateScalarFunctionInfo +DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { + auto fun = ScalarFunction( + "iterativelength_lowerbound", + {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::BIGINT, IterativeLengthLowerBoundFunction, + IterativeLengthFunctionData::IterativeLengthBind); + return CreateScalarFunctionInfo(fun); +} } // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp index a697fbcc..a6daab73 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp @@ -181,7 +181,6 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & } // add search jobs to free lanes - uint64_t active = 0; for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { lane_to_num[lane] = -1; while (started_searches < args.size()) { @@ -194,7 +193,6 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & result_data[search_num] = (int64_t)-1; /* initialize to no path */ visit1[src_data[src_pos]][lane] = true; lane_to_num[lane] = search_num; // active lane - active++; break; } } @@ -234,15 +232,15 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & duckpgq_state->csr_to_delete.insert(info.csr_id); } -CreateScalarFunctionInfo -DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { - auto fun = ScalarFunction( - "iterativelength_lowerbound", - {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, - LogicalType::BIGINT, IterativeLengthLowerBoundFunction, - IterativeLengthFunctionData::IterativeLengthBind); - return CreateScalarFunctionInfo(fun); -} +// CreateScalarFunctionInfo +// DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { +// auto fun = ScalarFunction( +// "iterativelength_lowerbound", +// {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, +// LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, +// LogicalType::BIGINT, IterativeLengthLowerBoundFunction, +// IterativeLengthFunctionData::IterativeLengthBind); +// return CreateScalarFunctionInfo(fun); +// } } // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp index 03d87e41..d7b8c671 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp @@ -12,16 +12,13 @@ namespace duckdb { static bool IterativeLengthLowerBound(int64_t v_size, int64_t *V, vector &E, - vector &edge_ids, - vector>> &paths_v, - vector>> &paths_e, + int64_t iter, vector &edge_ids, + vector>> &paths_v, + vector>> &paths_e, vector> &seen, vector> &visit, vector> &next) { bool change = false; - // map, unordered_set> parents_v_cache; - map, vector> paths_v_cache; - map, vector> paths_e_cache; for (auto v = 0; v < v_size; v++) { next[v] = 0; seen[v] = 0; @@ -32,42 +29,17 @@ static bool IterativeLengthLowerBound(int64_t v_size, int64_t *V, vector> visit1(v_size); vector> visit2(v_size); - vector>> paths_v( - v_size, std::vector>(LANE_LIMIT)); - vector>> paths_e( - v_size, std::vector>(LANE_LIMIT)); + vector>> paths_v(v_size, + std::vector>(LANE_LIMIT)); + vector>> paths_e(v_size, + std::vector>(LANE_LIMIT)); // maps lane to search number int16_t lane_to_num[LANE_LIMIT]; @@ -146,6 +118,10 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, for (auto i = 0; i < v_size; i++) { seen[i] = 0; visit1[i] = 0; + for (auto j = 0; j < LANE_LIMIT; j++) { + paths_v[i][j].clear(); + paths_v[i][j].clear(); + } } // add search jobs to free lanes @@ -170,10 +146,13 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, for (int64_t iter = 1; active && iter <= upper_bound; iter++) { //! Perform one step of bfs exploration if (!IterativeLengthLowerBound( - v_size, v, e, edge_ids, paths_v, paths_e, seen, + v_size, v, e, iter, edge_ids, paths_v, paths_e, seen, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { break; } + if (iter < lower_bound) { + continue; + } // detect lanes that finished for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { int64_t search_num = lane_to_num[lane]; @@ -181,36 +160,31 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, //! Check if dst for a source has been seen int64_t dst_pos = vdata_dst.sel->get_index(search_num); if (seen[dst_data[dst_pos]][lane]) { - // check if the path length is within bounds - // bound vector is either a constant or a flat vector - if (iter < lower_bound) { - continue; - } else { - vector output_vector; - auto it_v = paths_v[dst_data[dst_pos]][lane].begin(), - end_v = paths_v[dst_data[dst_pos]][lane].end(); - auto it_e = paths_e[dst_data[dst_pos]][lane].begin(), - end_e = paths_e[dst_data[dst_pos]][lane].end(); - while (it_v != end_v && it_e != end_e) { - output_vector.push_back(*it_v); - output_vector.push_back(*it_e); - it_v++; - it_e++; - } - output_vector.push_back(dst_data[dst_pos]); - auto output = - make_uniq(LogicalType::LIST(LogicalType::BIGINT)); - for (auto val : output_vector) { - Value value_to_insert = val; - ListVector::PushBack(*output, value_to_insert); - } - result_data[search_num].length = ListVector::GetListSize(*output); - result_data[search_num].offset = total_len; - ListVector::Append(result, ListVector::GetEntry(*output), - ListVector::GetListSize(*output)); - total_len += result_data[search_num].length; - lane_to_num[lane] = -1; // mark inactive + vector output_vector; + auto iterations = iter; + auto parent_vertex = paths_v[dst_data[dst_pos]][lane][iterations]; + auto parent_edge = paths_e[dst_data[dst_pos]][lane][iterations]; + output_vector.push_back(dst_data[dst_pos]); + while (iterations > 0) { + output_vector.push_back(parent_edge); + output_vector.push_back(parent_vertex); + iterations--; + parent_edge = paths_e[parent_vertex][lane][iterations]; + parent_vertex = paths_v[parent_vertex][lane][iterations]; } + std::reverse(output_vector.begin(), output_vector.end()); + auto output = + make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + for (auto val : output_vector) { + Value value_to_insert = val; + ListVector::PushBack(*output, value_to_insert); + } + result_data[search_num].length = ListVector::GetListSize(*output); + result_data[search_num].offset = total_len; + ListVector::Append(result, ListVector::GetEntry(*output), + ListVector::GetListSize(*output)); + total_len += result_data[search_num].length; + lane_to_num[lane] = -1; // mark inactive } } } @@ -228,14 +202,14 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, duckpgq_state->csr_to_delete.insert(info.csr_id); } -// CreateScalarFunctionInfo DuckPGQFunctions::GetShortestPathLowerBoundFunction() { -// auto fun = ScalarFunction( -// "shortestpath_lowerbound", -// {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, -// LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, -// LogicalType::LIST(LogicalType::BIGINT), ShortestPathLowerBoundFunction, -// IterativeLengthFunctionData::IterativeLengthBind); -// return CreateScalarFunctionInfo(fun); -// } +CreateScalarFunctionInfo DuckPGQFunctions::GetShortestPathLowerBoundFunction() { + auto fun = ScalarFunction( + "shortestpath_lowerbound", + {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::LIST(LogicalType::BIGINT), ShortestPathLowerBoundFunction, + IterativeLengthFunctionData::IterativeLengthBind); + return CreateScalarFunctionInfo(fun); +} } // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp index 8b515bcd..da9bbe8e 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp @@ -12,11 +12,11 @@ namespace duckdb { static bool IterativeLengthPhaseOne(int64_t v_size, int64_t *V, vector &E, - int64_t iter, vector &edge_ids, - vector>> &paths_v, - vector>> &paths_e, - vector> &visit, - vector> &next) { + int64_t iter, vector &edge_ids, + vector>> &paths_v, + vector>> &paths_e, + vector> &visit, + vector> &next) { bool change = false; for (auto v = 0; v < v_size; v++) { next[v] = 0; @@ -45,12 +45,12 @@ static bool IterativeLengthPhaseOne(int64_t v_size, int64_t *V, vector } static bool IterativeLengthPhaseTwo(int64_t v_size, int64_t *V, vector &E, - vector &edge_ids, - vector> &parents_v, - vector> &parents_e, - vector> &seen, - vector> &visit, - vector> &next) { + vector &edge_ids, + vector> &parents_v, + vector> &parents_e, + vector> &seen, + vector> &visit, + vector> &next) { bool change = false; for (auto v = 0; v < v_size; v++) { next[v] = 0; @@ -227,6 +227,10 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta // empty visit vectors for (auto i = 0; i < v_size; i++) { visit1[i] = 0; + for (auto j = 0; j < LANE_LIMIT; j++) { + paths_v[i][j].clear(); + paths_v[i][j].clear(); + } } // add search jobs to free lanes @@ -314,16 +318,16 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta duckpgq_state->csr_to_delete.insert(info.csr_id); } -CreateScalarFunctionInfo -DuckPGQFunctions::GetShortestPathLowerBoundFunction() { - auto fun = ScalarFunction( - "shortestpath_lowerbound", - {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, - LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, - LogicalType::LIST(LogicalType::BIGINT), - ShortestPathLowerBoundFunction, - IterativeLengthFunctionData::IterativeLengthBind); - return CreateScalarFunctionInfo(fun); -} +// CreateScalarFunctionInfo +// DuckPGQFunctions::GetShortestPathLowerBoundFunction() { +// auto fun = ScalarFunction( +// "shortestpath_lowerbound", +// {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, +// LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, +// LogicalType::LIST(LogicalType::BIGINT), +// ShortestPathLowerBoundFunction, +// IterativeLengthFunctionData::IterativeLengthBind); +// return CreateScalarFunctionInfo(fun); +// } } // namespace duckdb From ded74bd02b0885c609a3e2a1ddb19d6eb2d803c5 Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Thu, 29 Feb 2024 13:29:21 +0100 Subject: [PATCH 33/39] Register all shortest path functions for testing --- .../scalar/iterativelength_two_phase.cpp | 20 ++++++++-------- .../scalar/shortest_path_two_phase.cpp | 23 ++++++++++--------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp index a6daab73..a3fb893f 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp @@ -232,15 +232,15 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & duckpgq_state->csr_to_delete.insert(info.csr_id); } -// CreateScalarFunctionInfo -// DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { -// auto fun = ScalarFunction( -// "iterativelength_lowerbound", -// {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, -// LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, -// LogicalType::BIGINT, IterativeLengthLowerBoundFunction, -// IterativeLengthFunctionData::IterativeLengthBind); -// return CreateScalarFunctionInfo(fun); -// } +CreateScalarFunctionInfo +DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { + auto fun = ScalarFunction( + "iterativelength_two_phase", + {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::BIGINT, IterativeLengthLowerBoundFunction, + IterativeLengthFunctionData::IterativeLengthBind); + return CreateScalarFunctionInfo(fun); +} } // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp index da9bbe8e..f431bf1c 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp @@ -143,6 +143,7 @@ static std::tuple> ShortestPathInternal(int64_t lane, i result.push_back(destination); result.push_back(parent_edge); while (parent_vertex != src[search_num]) { + result.push_back(parent_vertex); parent_edge = parents_e[parent_vertex][lane]; parent_vertex = parents_v[parent_vertex][lane]; @@ -318,16 +319,16 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta duckpgq_state->csr_to_delete.insert(info.csr_id); } -// CreateScalarFunctionInfo -// DuckPGQFunctions::GetShortestPathLowerBoundFunction() { -// auto fun = ScalarFunction( -// "shortestpath_lowerbound", -// {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, -// LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, -// LogicalType::LIST(LogicalType::BIGINT), -// ShortestPathLowerBoundFunction, -// IterativeLengthFunctionData::IterativeLengthBind); -// return CreateScalarFunctionInfo(fun); -// } +CreateScalarFunctionInfo +DuckPGQFunctions::GetShortestPathLowerBoundFunction() { + auto fun = ScalarFunction( + "shortestpath_two_phase", + {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, + LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, + LogicalType::LIST(LogicalType::BIGINT), + ShortestPathLowerBoundFunction, + IterativeLengthFunctionData::IterativeLengthBind); + return CreateScalarFunctionInfo(fun); +} } // namespace duckdb From 9741705579bfd562ea54bcfc0d416348dd2f2703 Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Thu, 29 Feb 2024 16:27:55 +0100 Subject: [PATCH 34/39] fix build fail --- duckpgq/include/duckpgq/duckpgq_functions.hpp | 4 ++++ .../duckpgq/functions/scalar/iterativelength_two_phase.cpp | 6 +++--- .../duckpgq/functions/scalar/shortest_path_two_phase.cpp | 6 +++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/duckpgq/include/duckpgq/duckpgq_functions.hpp b/duckpgq/include/duckpgq/duckpgq_functions.hpp index 6b789daf..2542fa26 100644 --- a/duckpgq/include/duckpgq/duckpgq_functions.hpp +++ b/duckpgq/include/duckpgq/duckpgq_functions.hpp @@ -28,9 +28,11 @@ class DuckPGQFunctions { functions.push_back(GetCheapestPathLengthFunction()); functions.push_back(GetShortestPathFunction()); functions.push_back(GetShortestPathLowerBoundFunction()); + functions.push_back(GetShortestPathTwoPhaseFunction()); functions.push_back(GetReachabilityFunction()); functions.push_back(GetIterativeLengthFunction()); functions.push_back(GetIterativeLengthLowerBoundFunction()); + functions.push_back(GetIterativeLengthTwoPhaseFunction()); functions.push_back(GetIterativeLengthBidirectionalFunction()); functions.push_back(GetIterativeLength2Function()); functions.push_back(GetDeleteCsrFunction()); @@ -60,9 +62,11 @@ class DuckPGQFunctions { static CreateScalarFunctionInfo GetCheapestPathLengthFunction(); static CreateScalarFunctionInfo GetShortestPathFunction(); static CreateScalarFunctionInfo GetShortestPathLowerBoundFunction(); + static CreateScalarFunctionInfo GetShortestPathTwoPhaseFunction(); static CreateScalarFunctionInfo GetReachabilityFunction(); static CreateScalarFunctionInfo GetIterativeLengthFunction(); static CreateScalarFunctionInfo GetIterativeLengthLowerBoundFunction(); + static CreateScalarFunctionInfo GetIterativeLengthTwoPhaseFunction(); static CreateScalarFunctionInfo GetIterativeLengthBidirectionalFunction(); static CreateScalarFunctionInfo GetIterativeLength2Function(); static CreateScalarFunctionInfo GetDeleteCsrFunction(); diff --git a/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp index a3fb893f..b4d75547 100644 --- a/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/iterativelength_two_phase.cpp @@ -103,7 +103,7 @@ static int64_t IterativeLengthInternal(int64_t lane, int64_t v_size, int64_t des return -1; } -static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState &state, +static void IterativeLengthTwoPhaseFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &func_expr = (BoundFunctionExpression &)state.expr; auto &info = (IterativeLengthFunctionData &)*func_expr.bind_info; @@ -233,12 +233,12 @@ static void IterativeLengthLowerBoundFunction(DataChunk &args, ExpressionState & } CreateScalarFunctionInfo -DuckPGQFunctions::GetIterativeLengthLowerBoundFunction() { +DuckPGQFunctions::GetIterativeLengthTwoPhaseFunction() { auto fun = ScalarFunction( "iterativelength_two_phase", {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, - LogicalType::BIGINT, IterativeLengthLowerBoundFunction, + LogicalType::BIGINT, IterativeLengthTwoPhaseFunction, IterativeLengthFunctionData::IterativeLengthBind); return CreateScalarFunctionInfo(fun); } diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp index f431bf1c..e8d55f69 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp @@ -159,7 +159,7 @@ static std::tuple> ShortestPathInternal(int64_t lane, i return std::make_tuple(-1, result); } -static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &state, +static void ShortestPathTwoPhaseFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &func_expr = (BoundFunctionExpression &)state.expr; auto &info = (IterativeLengthFunctionData &)*func_expr.bind_info; @@ -320,13 +320,13 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, ExpressionState &sta } CreateScalarFunctionInfo -DuckPGQFunctions::GetShortestPathLowerBoundFunction() { +DuckPGQFunctions::GetShortestPathTwoPhaseFunction() { auto fun = ScalarFunction( "shortestpath_two_phase", {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::LIST(LogicalType::BIGINT), - ShortestPathLowerBoundFunction, + ShortestPathTwoPhaseFunction, IterativeLengthFunctionData::IterativeLengthBind); return CreateScalarFunctionInfo(fun); } From ac54a7d88bf2937ef40feb7d43a1621c9ac43457 Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Thu, 29 Feb 2024 22:07:06 +0100 Subject: [PATCH 35/39] Prevents completed lanes from being added to the calculation --- .../scalar/shortest_path_lowerbound.cpp | 7 +- .../scalar/shortest_path_two_phase.cpp | 98 ++++++++++--------- 2 files changed, 56 insertions(+), 49 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp index d7b8c671..e9fa7bff 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp @@ -12,7 +12,8 @@ namespace duckdb { static bool IterativeLengthLowerBound(int64_t v_size, int64_t *V, vector &E, - int64_t iter, vector &edge_ids, + int64_t iter, int16_t lane_to_num[LANE_LIMIT], + vector &edge_ids, vector>> &paths_v, vector>> &paths_e, vector> &seen, @@ -31,7 +32,7 @@ static bool IterativeLengthLowerBound(int64_t v_size, int64_t *V, vector= 0 && visit[v][lane]) { paths_v[n][lane][iter] = v; paths_e[n][lane][iter] = edge_id; } @@ -146,7 +147,7 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, for (int64_t iter = 1; active && iter <= upper_bound; iter++) { //! Perform one step of bfs exploration if (!IterativeLengthLowerBound( - v_size, v, e, iter, edge_ids, paths_v, paths_e, seen, + v_size, v, e, iter, lane_to_num, edge_ids, paths_v, paths_e, seen, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { break; } diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp index e8d55f69..25be0a84 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp @@ -113,6 +113,10 @@ static std::tuple> ShortestPathInternal(int64_t lane, i for (auto i = 0; i < v_size; i++) { seen[i] = 0; visit1[i] = 0; + for (auto j = 0; j < LANE_LIMIT; j++) { + parents_v[i][j] = -1; + parents_e[i][j] = -1; + } } // add search jobs to free lanes for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { @@ -252,61 +256,63 @@ static void ShortestPathTwoPhaseFunction(DataChunk &args, ExpressionState &state int64_t iter = 1; for (; iter < lower_bound; iter++) { - IterativeLengthPhaseOne(v_size, v, e, iter, edge_ids, paths_v, paths_e, + if (!IterativeLengthPhaseOne(v_size, v, e, iter, edge_ids, paths_v, paths_e, (iter & 1) ? visit1 : visit2, - (iter & 1) ? visit2 : visit1); + (iter & 1) ? visit2 : visit1)) { + break; + } } - - for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { - auto search_num = lane_to_num[lane]; - if (search_num >= 0) { - int64_t dst_pos = vdata_dst.sel->get_index(search_num); - auto phase_two_result = ShortestPathInternal(lane, v_size, dst_data[dst_pos], - upper_bound - lower_bound + 1, v, e, edge_ids, (iter & 1) ? visit1 : visit2); - auto phase_two_src = std::get<0>(phase_two_result); - auto phase_two_path = std::get<1>(phase_two_result); - if (phase_two_src >= 0) { - vector output_vector; - // construct the path of phase one - if (paths_v[phase_two_src][lane].size() > 0) { - auto iterations = lower_bound - 1; - auto parent_vertex = paths_v[phase_two_src][lane][iterations]; - auto parent_edge = paths_e[phase_two_src][lane][iterations]; - - while (iterations > 0) { - output_vector.push_back(parent_edge); - output_vector.push_back(parent_vertex); - iterations--; - parent_edge = paths_e[parent_vertex][lane][iterations]; - parent_vertex = paths_v[parent_vertex][lane][iterations]; + if (iter == lower_bound) { + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + auto search_num = lane_to_num[lane]; + if (search_num >= 0) { + int64_t dst_pos = vdata_dst.sel->get_index(search_num); + auto phase_two_result = ShortestPathInternal(lane, v_size, dst_data[dst_pos], + upper_bound - lower_bound + 1, v, e, edge_ids, (iter & 1) ? visit1 : visit2); + auto phase_two_src = std::get<0>(phase_two_result); + auto phase_two_path = std::get<1>(phase_two_result); + if (phase_two_src >= 0) { + vector output_vector; + // construct the path of phase one + if (paths_v[phase_two_src][lane].size() > 0) { + auto iterations = lower_bound - 1; + auto parent_vertex = paths_v[phase_two_src][lane][iterations]; + auto parent_edge = paths_e[phase_two_src][lane][iterations]; + + while (iterations > 0) { + output_vector.push_back(parent_edge); + output_vector.push_back(parent_vertex); + iterations--; + parent_edge = paths_e[parent_vertex][lane][iterations]; + parent_vertex = paths_v[parent_vertex][lane][iterations]; + } + std::reverse(output_vector.begin(), output_vector.end()); } - std::reverse(output_vector.begin(), output_vector.end()); - } - // construct the path of phase two - for (auto val : phase_two_path) { - output_vector.push_back(val); - } + // construct the path of phase two + for (auto val : phase_two_path) { + output_vector.push_back(val); + } - // construct the output - auto output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); - for (auto val : output_vector) { - Value value_to_insert = val; - ListVector::PushBack(*output, value_to_insert); + // construct the output + auto output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + for (auto val : output_vector) { + Value value_to_insert = val; + ListVector::PushBack(*output, value_to_insert); + } + result_data[search_num].length = ListVector::GetListSize(*output); + result_data[search_num].offset = total_len; + ListVector::Append(result, ListVector::GetEntry(*output), + ListVector::GetListSize(*output)); + total_len += result_data[search_num].length; + lane_to_num[lane] = -1; // mark inactive + } else { + result_validity.SetInvalid(search_num); + lane_to_num[lane] = -1; // mark inactive } - result_data[search_num].length = ListVector::GetListSize(*output); - result_data[search_num].offset = total_len; - ListVector::Append(result, ListVector::GetEntry(*output), - ListVector::GetListSize(*output)); - total_len += result_data[search_num].length; - lane_to_num[lane] = -1; // mark inactive - } else { - result_validity.SetInvalid(search_num); - lane_to_num[lane] = -1; // mark inactive } } } - // no changes anymore: any still active searches have no path for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { int64_t search_num = lane_to_num[lane]; From a3fa824603faf3b61ead7388202ceaefd1c3ef55 Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Mon, 4 Mar 2024 15:59:52 +0100 Subject: [PATCH 36/39] Further optimization --- .../scalar/shortest_path_lowerbound.cpp | 19 +++++-- .../scalar/shortest_path_two_phase.cpp | 57 ++++++++++++------- .../sql/path-finding/shortest_path_bound.test | 12 ++-- 3 files changed, 58 insertions(+), 30 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp index e9fa7bff..91f852ea 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp @@ -11,8 +11,9 @@ namespace duckdb { -static bool IterativeLengthLowerBound(int64_t v_size, int64_t *V, vector &E, - int64_t iter, int16_t lane_to_num[LANE_LIMIT], +static bool IterativeLengthLowerBound(int64_t v_size, int64_t iter, bool seen_check, + int64_t *V, vector &E, + int16_t lane_to_num[LANE_LIMIT], vector &edge_ids, vector>> &paths_v, vector>> &paths_e, @@ -22,7 +23,6 @@ static bool IterativeLengthLowerBound(int64_t v_size, int64_t *V, vector(result); ValidityMask &result_validity = FlatVector::Validity(result); + bool seen_check = false; + // create temp SIMD arrays vector> seen(v_size); vector> visit1(v_size); @@ -114,6 +119,7 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, idx_t started_searches = 0; while (started_searches < args.size()) { + seen_check = false; // empty visit vectors for (auto i = 0; i < v_size; i++) { @@ -145,9 +151,12 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, //! make passes while a lane is still active for (int64_t iter = 1; active && iter <= upper_bound; iter++) { + if (iter >= lower_bound) { + seen_check = true; + } //! Perform one step of bfs exploration if (!IterativeLengthLowerBound( - v_size, v, e, iter, lane_to_num, edge_ids, paths_v, paths_e, seen, + v_size, iter, seen_check, v, e, lane_to_num, edge_ids, paths_v, paths_e, seen, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { break; } diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp index 25be0a84..d49709f5 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp @@ -81,26 +81,22 @@ static bool IterativeLengthPhaseTwo(int64_t v_size, int64_t *V, vector return change; } -static std::tuple> ShortestPathInternal(int64_t lane, int64_t v_size, int64_t destination, +static vector ShortestPathInternal(int64_t lane, int64_t v_size, int64_t destination, int64_t bound, int64_t *v, vector &e, vector &edge_ids, - vector> &visit) { + vector> &visit, + vector> &seen, + vector> &visit1, + vector> &visit2, + vector> &parents_v, + vector> &parents_e) { vector src; - vector result; + vector> results; for (int64_t v = 0; v < v_size; v++) { if (visit[v][lane]) { src.push_back(v); } } - vector> seen(v_size); - vector> visit1(v_size); - vector> visit2(v_size); - - vector> parents_v(v_size, - std::vector(LANE_LIMIT, -1)); - vector> parents_e(v_size, - std::vector(LANE_LIMIT, -1)); - // maps lane to search number int16_t lane_to_num[LANE_LIMIT]; @@ -139,6 +135,7 @@ static std::tuple> ShortestPathInternal(int64_t lane, i for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { if (seen[destination][lane]) { auto search_num = lane_to_num[lane]; + vector result; // found the destination, reconstruct the path auto parent_vertex = parents_v[destination][lane]; @@ -155,12 +152,24 @@ static std::tuple> ShortestPathInternal(int64_t lane, i } result.push_back(src[search_num]); std::reverse(result.begin(), result.end()); - return std::make_tuple(src[search_num], result); + results.push_back(result); + break; } } } } - return std::make_tuple(-1, result); + size_t min_size = INT64_MAX; + size_t min_index = -1; + for (size_t i = 0; i < results.size(); i++) { + if (results[i].size() < min_size) { + min_size = results[i].size(); + min_index = i; + } + } + if (min_index >= 0) { + return results[min_index]; + } + return {}; } static void ShortestPathTwoPhaseFunction(DataChunk &args, ExpressionState &state, @@ -263,16 +272,26 @@ static void ShortestPathTwoPhaseFunction(DataChunk &args, ExpressionState &state } } if (iter == lower_bound) { + // resource reuse + vector> seen_in(v_size); + vector> visit1_in(v_size); + vector> visit2_in(v_size); + + vector> parents_v_in(v_size, + std::vector(LANE_LIMIT, -1)); + vector> parents_e_in(v_size, + std::vector(LANE_LIMIT, -1)); + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { auto search_num = lane_to_num[lane]; if (search_num >= 0) { int64_t dst_pos = vdata_dst.sel->get_index(search_num); auto phase_two_result = ShortestPathInternal(lane, v_size, dst_data[dst_pos], - upper_bound - lower_bound + 1, v, e, edge_ids, (iter & 1) ? visit1 : visit2); - auto phase_two_src = std::get<0>(phase_two_result); - auto phase_two_path = std::get<1>(phase_two_result); - if (phase_two_src >= 0) { + upper_bound - lower_bound + 1, v, e, edge_ids, (iter & 1) ? visit1 : visit2, + seen_in, visit1_in, visit2_in, parents_v_in, parents_e_in); + if (phase_two_result.size() > 0) { vector output_vector; + auto phase_two_src = phase_two_result[0]; // construct the path of phase one if (paths_v[phase_two_src][lane].size() > 0) { auto iterations = lower_bound - 1; @@ -290,7 +309,7 @@ static void ShortestPathTwoPhaseFunction(DataChunk &args, ExpressionState &state } // construct the path of phase two - for (auto val : phase_two_path) { + for (auto val : phase_two_result) { output_vector.push_back(val); } diff --git a/test/sql/path-finding/shortest_path_bound.test b/test/sql/path-finding/shortest_path_bound.test index da12c592..ca139ae9 100644 --- a/test/sql/path-finding/shortest_path_bound.test +++ b/test/sql/path-finding/shortest_path_bound.test @@ -333,19 +333,19 @@ query III ---- 0 0 [0, 0, 2, 1, 0] 0 1 [0, 0, 2, 2, 3, 4, 1] -0 2 [0, 0, 2, 1, 0, 0, 2] +0 2 [0, 0, 2, 2, 3, 3, 2] 0 3 [0, 0, 2, 2, 3] 1 0 [1, 6, 1, 5, 0] 1 1 [1, 6, 1, 6, 1] 1 2 [1, 5, 0, 0, 2] 1 3 [1, 5, 0, 0, 2, 2, 3] -2 0 [2, 1, 0, 0, 2, 1, 0] +2 0 [2, 2, 3, 3, 2, 1, 0] 2 1 [2, 2, 3, 4, 1] -2 2 [2, 1, 0, 0, 2] -2 3 [2, 1, 0, 0, 2, 2, 3] -3 0 [3, 4, 1, 5, 0] +2 2 [2, 2, 3, 3, 2] +2 3 [2, 2, 3, 3, 2, 2, 3] +3 0 [3, 3, 2, 1, 0] 3 1 [3, 4, 1, 6, 1] -3 2 [3, 4, 1, 5, 0, 0, 2] +3 2 [3, 3, 2, 2, 3, 3, 2] 3 3 [3, 3, 2, 2, 3] From e6ab7f1fbbc1bfe1c461a7949f57715a02228f38 Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Mon, 4 Mar 2024 22:35:16 +0100 Subject: [PATCH 37/39] make funtion template --- .../scalar/shortest_path_lowerbound.cpp | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp index 91f852ea..97086ba1 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp @@ -11,7 +11,8 @@ namespace duckdb { -static bool IterativeLengthLowerBound(int64_t v_size, int64_t iter, bool seen_check, +template +static bool IterativeLengthLowerBound(int64_t v_size, int64_t iter, int64_t *V, vector &E, int16_t lane_to_num[LANE_LIMIT], vector &edge_ids, @@ -42,7 +43,7 @@ static bool IterativeLengthLowerBound(int64_t v_size, int64_t iter, bool seen_ch } for (auto v = 0; v < v_size; v++) { - if (seen_check) { + if (SEEN_CHECK) { next[v] = next[v] & ~seen[v]; seen[v] = seen[v] | next[v]; } @@ -98,8 +99,6 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, auto result_data = FlatVector::GetData(result); ValidityMask &result_validity = FlatVector::Validity(result); - bool seen_check = false; - // create temp SIMD arrays vector> seen(v_size); vector> visit1(v_size); @@ -119,8 +118,6 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, idx_t started_searches = 0; while (started_searches < args.size()) { - seen_check = false; - // empty visit vectors for (auto i = 0; i < v_size; i++) { seen[i] = 0; @@ -151,17 +148,19 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, //! make passes while a lane is still active for (int64_t iter = 1; active && iter <= upper_bound; iter++) { - if (iter >= lower_bound) { - seen_check = true; - } - //! Perform one step of bfs exploration - if (!IterativeLengthLowerBound( - v_size, iter, seen_check, v, e, lane_to_num, edge_ids, paths_v, paths_e, seen, - (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { - break; - } if (iter < lower_bound) { + if (!IterativeLengthLowerBound( + v_size, iter, v, e, lane_to_num, edge_ids, paths_v, paths_e, seen, + (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { + break; + } continue; + } else { + if (!IterativeLengthLowerBound( + v_size, iter, v, e, lane_to_num, edge_ids, paths_v, paths_e, seen, + (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { + break; + } } // detect lanes that finished for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { From a1c68337f9798b5d579e4e1fe5ae8f570b9280b2 Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Wed, 6 Mar 2024 17:16:47 +0100 Subject: [PATCH 38/39] Optimizing path logging with one-dimensional array --- .../scalar/shortest_path_lowerbound.cpp | 2 +- .../scalar/shortest_path_two_phase.cpp | 227 +++++------------- 2 files changed, 62 insertions(+), 167 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp index 97086ba1..338c5477 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_lowerbound.cpp @@ -213,7 +213,7 @@ static void ShortestPathLowerBoundFunction(DataChunk &args, CreateScalarFunctionInfo DuckPGQFunctions::GetShortestPathLowerBoundFunction() { auto fun = ScalarFunction( - "shortestpath_lowerbound", + "shortestpath_two_phase", {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::LIST(LogicalType::BIGINT), ShortestPathLowerBoundFunction, diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp index d49709f5..6e10da99 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp @@ -11,10 +11,9 @@ namespace duckdb { -static bool IterativeLengthPhaseOne(int64_t v_size, int64_t *V, vector &E, - int64_t iter, vector &edge_ids, - vector>> &paths_v, - vector>> &paths_e, +static bool IterativeLengthPhaseOne(int64_t v_size, int64_t iter, int64_t *V, + vector &E, vector &edge_ids, + vector> &paths_ve, vector> &visit, vector> &next) { bool change = false; @@ -30,8 +29,8 @@ static bool IterativeLengthPhaseOne(int64_t v_size, int64_t *V, vector next[n] = next[n] | visit[v]; for (auto lane = 0; lane < LANE_LIMIT; lane++) { if (visit[v][lane]) { - paths_v[n][lane][iter] = v; - paths_e[n][lane][iter] = edge_id; + // paths_ve[iter][n][lane] = {v, edge_id}; + paths_ve[((iter - 1) * v_size + n) * LANE_LIMIT + lane] = {v, edge_id}; } } } @@ -46,8 +45,8 @@ static bool IterativeLengthPhaseOne(int64_t v_size, int64_t *V, vector static bool IterativeLengthPhaseTwo(int64_t v_size, int64_t *V, vector &E, vector &edge_ids, - vector> &parents_v, - vector> &parents_e, + int16_t lane_to_num[LANE_LIMIT], + vector> &paths_ve, vector> &seen, vector> &visit, vector> &next) { @@ -62,12 +61,10 @@ static bool IterativeLengthPhaseTwo(int64_t v_size, int64_t *V, vector auto n = E[e]; auto edge_id = edge_ids[e]; next[n] = next[n] | visit[v]; - for (auto l = 0; l < LANE_LIMIT; l++) { - parents_v[n][l] = - ((parents_v[n][l] == -1) && visit[v][l]) ? v : parents_v[n][l]; - parents_e[n][l] = ((parents_e[n][l] == -1) && visit[v][l]) - ? edge_id - : parents_e[n][l]; + for (auto lane = 0; lane < LANE_LIMIT; lane++) { + // paths_ve[n][lane] = {v, edge_id}; + if (lane_to_num[lane] >= 0 && visit[v][lane]) + paths_ve[(n * LANE_LIMIT + lane)] = {v, edge_id}; } } } @@ -81,97 +78,6 @@ static bool IterativeLengthPhaseTwo(int64_t v_size, int64_t *V, vector return change; } -static vector ShortestPathInternal(int64_t lane, int64_t v_size, int64_t destination, - int64_t bound, - int64_t *v, vector &e, vector &edge_ids, - vector> &visit, - vector> &seen, - vector> &visit1, - vector> &visit2, - vector> &parents_v, - vector> &parents_e) { - vector src; - vector> results; - for (int64_t v = 0; v < v_size; v++) { - if (visit[v][lane]) { - src.push_back(v); - } - } - - // maps lane to search number - int16_t lane_to_num[LANE_LIMIT]; - for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { - lane_to_num[lane] = -1; // inactive - } - - idx_t started_searches = 0; - while (started_searches < src.size()) { - for (auto i = 0; i < v_size; i++) { - seen[i] = 0; - visit1[i] = 0; - for (auto j = 0; j < LANE_LIMIT; j++) { - parents_v[i][j] = -1; - parents_e[i][j] = -1; - } - } - // add search jobs to free lanes - for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { - if (started_searches < src.size()) { - int64_t search_num = started_searches++; - visit1[src[search_num]][lane] = true; - lane_to_num[lane] = search_num; - } else { - break; - } - } - - for (int64_t iter = 1; iter <= bound; iter++) { - if (!IterativeLengthPhaseTwo(v_size, v, e, edge_ids, parents_v, parents_e, - seen, (iter & 1) ? visit1 : visit2, - (iter & 1) ? visit2 : visit1)) { - break; - } - // detect lanes that found the destination - for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { - if (seen[destination][lane]) { - auto search_num = lane_to_num[lane]; - vector result; - - // found the destination, reconstruct the path - auto parent_vertex = parents_v[destination][lane]; - auto parent_edge = parents_e[destination][lane]; - - result.push_back(destination); - result.push_back(parent_edge); - while (parent_vertex != src[search_num]) { - - result.push_back(parent_vertex); - parent_edge = parents_e[parent_vertex][lane]; - parent_vertex = parents_v[parent_vertex][lane]; - result.push_back(parent_edge); - } - result.push_back(src[search_num]); - std::reverse(result.begin(), result.end()); - results.push_back(result); - break; - } - } - } - } - size_t min_size = INT64_MAX; - size_t min_index = -1; - for (size_t i = 0; i < results.size(); i++) { - if (results[i].size() < min_size) { - min_size = results[i].size(); - min_index = i; - } - } - if (min_index >= 0) { - return results[min_index]; - } - return {}; -} - static void ShortestPathTwoPhaseFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &func_expr = (BoundFunctionExpression &)state.expr; @@ -219,14 +125,12 @@ static void ShortestPathTwoPhaseFunction(DataChunk &args, ExpressionState &state ValidityMask &result_validity = FlatVector::Validity(result); // create temp SIMD arrays + vector> seen(v_size); vector> visit1(v_size); vector> visit2(v_size); - vector>> paths_v(v_size, - std::vector>(LANE_LIMIT)); - vector>> paths_e(v_size, - std::vector>(LANE_LIMIT)); - + vector> paths_ve_one(v_size * LANE_LIMIT * (lower_bound - 1), {-1, -1}); + vector> paths_ve_two(v_size * LANE_LIMIT, {-1, -1}); // maps lane to search number int16_t lane_to_num[LANE_LIMIT]; @@ -240,11 +144,8 @@ static void ShortestPathTwoPhaseFunction(DataChunk &args, ExpressionState &state // empty visit vectors for (auto i = 0; i < v_size; i++) { + seen[i] = 0; visit1[i] = 0; - for (auto j = 0; j < LANE_LIMIT; j++) { - paths_v[i][j].clear(); - paths_v[i][j].clear(); - } } // add search jobs to free lanes @@ -265,69 +166,63 @@ static void ShortestPathTwoPhaseFunction(DataChunk &args, ExpressionState &state int64_t iter = 1; for (; iter < lower_bound; iter++) { - if (!IterativeLengthPhaseOne(v_size, v, e, iter, edge_ids, paths_v, paths_e, + if (!IterativeLengthPhaseOne(v_size, iter, v, e, edge_ids, paths_ve_one, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { break; } } if (iter == lower_bound) { - // resource reuse - vector> seen_in(v_size); - vector> visit1_in(v_size); - vector> visit2_in(v_size); - - vector> parents_v_in(v_size, - std::vector(LANE_LIMIT, -1)); - vector> parents_e_in(v_size, - std::vector(LANE_LIMIT, -1)); + for (; iter <= upper_bound; iter++) { + if (!IterativeLengthPhaseTwo(v_size, v, e, edge_ids, lane_to_num, + paths_ve_two, seen, + (iter & 1) ? visit1 : visit2, + (iter & 1) ? visit2 : visit1)) { + break; + } - for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { - auto search_num = lane_to_num[lane]; - if (search_num >= 0) { - int64_t dst_pos = vdata_dst.sel->get_index(search_num); - auto phase_two_result = ShortestPathInternal(lane, v_size, dst_data[dst_pos], - upper_bound - lower_bound + 1, v, e, edge_ids, (iter & 1) ? visit1 : visit2, - seen_in, visit1_in, visit2_in, parents_v_in, parents_e_in); - if (phase_two_result.size() > 0) { - vector output_vector; - auto phase_two_src = phase_two_result[0]; - // construct the path of phase one - if (paths_v[phase_two_src][lane].size() > 0) { - auto iterations = lower_bound - 1; - auto parent_vertex = paths_v[phase_two_src][lane][iterations]; - auto parent_edge = paths_e[phase_two_src][lane][iterations]; + // detect lanes that finished + for (int64_t lane = 0; lane < LANE_LIMIT; lane++) { + int64_t search_num = lane_to_num[lane]; + if (search_num >= 0) { // active lane + //! Check if dst for a source has been seen + int64_t dst_pos = vdata_dst.sel->get_index(search_num); + if (seen[dst_data[dst_pos]][lane]) { + vector output_vector(2 * iter + 1); + + //! construct the phase two path + auto iteration = iter; + auto parent_vertex = dst_data[dst_pos]; + while (iteration >= lower_bound) { + output_vector[2 * iteration - 1] = paths_ve_two[(parent_vertex * LANE_LIMIT + lane)].second; + output_vector[2 * iteration - 2] = paths_ve_two[(parent_vertex * LANE_LIMIT + lane)].first; + parent_vertex = output_vector[2 * iteration - 2]; + iteration--; + } - while (iterations > 0) { - output_vector.push_back(parent_edge); - output_vector.push_back(parent_vertex); - iterations--; - parent_edge = paths_e[parent_vertex][lane][iterations]; - parent_vertex = paths_v[parent_vertex][lane][iterations]; + //! construct the phase one path + while (iteration > 0) { + output_vector[2 * iteration - 1] = paths_ve_one[((iteration - 1) * v_size + parent_vertex) * LANE_LIMIT + lane].second; + output_vector[2 * iteration - 2] = paths_ve_one[((iteration - 1) * v_size + parent_vertex) * LANE_LIMIT + lane].first; + parent_vertex = output_vector[2 * iteration - 2]; + iteration--; } - std::reverse(output_vector.begin(), output_vector.end()); - } - // construct the path of phase two - for (auto val : phase_two_result) { - output_vector.push_back(val); - } + output_vector.back() = dst_data[dst_pos]; - // construct the output - auto output = make_uniq(LogicalType::LIST(LogicalType::BIGINT)); - for (auto val : output_vector) { - Value value_to_insert = val; - ListVector::PushBack(*output, value_to_insert); + auto output = + make_uniq(LogicalType::LIST(LogicalType::BIGINT)); + for (auto val : output_vector) { + Value value_to_insert = val; + ListVector::PushBack(*output, value_to_insert); + } + result_data[search_num].length = ListVector::GetListSize(*output); + result_data[search_num].offset = total_len; + ListVector::Append(result, ListVector::GetEntry(*output), + ListVector::GetListSize(*output)); + total_len += result_data[search_num].length; + lane_to_num[lane] = -1; // mark inactive } - result_data[search_num].length = ListVector::GetListSize(*output); - result_data[search_num].offset = total_len; - ListVector::Append(result, ListVector::GetEntry(*output), - ListVector::GetListSize(*output)); - total_len += result_data[search_num].length; - lane_to_num[lane] = -1; // mark inactive - } else { - result_validity.SetInvalid(search_num); - lane_to_num[lane] = -1; // mark inactive } } } @@ -347,7 +242,7 @@ static void ShortestPathTwoPhaseFunction(DataChunk &args, ExpressionState &state CreateScalarFunctionInfo DuckPGQFunctions::GetShortestPathTwoPhaseFunction() { auto fun = ScalarFunction( - "shortestpath_two_phase", + "shortestpath_lowerbound", {LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::LIST(LogicalType::BIGINT), From e307c6cd9e35c9415093b02575581a4030566d57 Mon Sep 17 00:00:00 2001 From: Pingan Ren Date: Wed, 6 Mar 2024 17:50:50 +0100 Subject: [PATCH 39/39] fix bug: index out of range --- .../scalar/shortest_path_two_phase.cpp | 26 ++++++++++++------- .../sql/path-finding/shortest_path_bound.test | 12 ++++----- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp index 6e10da99..33e5bbc9 100644 --- a/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp +++ b/duckpgq/src/duckpgq/functions/scalar/shortest_path_two_phase.cpp @@ -13,6 +13,7 @@ namespace duckdb { static bool IterativeLengthPhaseOne(int64_t v_size, int64_t iter, int64_t *V, vector &E, vector &edge_ids, + int16_t lane_to_num[LANE_LIMIT], vector> &paths_ve, vector> &visit, vector> &next) { @@ -28,9 +29,9 @@ static bool IterativeLengthPhaseOne(int64_t v_size, int64_t iter, int64_t *V, auto edge_id = edge_ids[e]; next[n] = next[n] | visit[v]; for (auto lane = 0; lane < LANE_LIMIT; lane++) { - if (visit[v][lane]) { - // paths_ve[iter][n][lane] = {v, edge_id}; - paths_ve[((iter - 1) * v_size + n) * LANE_LIMIT + lane] = {v, edge_id}; + if (lane_to_num[lane] >= 0 && visit[v][lane]) { + // paths_ve[iter][n][lane] = {v, edge_id}; + paths_ve[((iter - 1) * v_size + n) * LANE_LIMIT + lane] = {v, edge_id}; } } } @@ -63,8 +64,9 @@ static bool IterativeLengthPhaseTwo(int64_t v_size, int64_t *V, vector next[n] = next[n] | visit[v]; for (auto lane = 0; lane < LANE_LIMIT; lane++) { // paths_ve[n][lane] = {v, edge_id}; - if (lane_to_num[lane] >= 0 && visit[v][lane]) - paths_ve[(n * LANE_LIMIT + lane)] = {v, edge_id}; + if (lane_to_num[lane] >= 0 && visit[v][lane] && paths_ve[n * LANE_LIMIT + lane].first == -1) { + paths_ve[n * LANE_LIMIT + lane] = {v, edge_id}; + } } } } @@ -166,7 +168,8 @@ static void ShortestPathTwoPhaseFunction(DataChunk &args, ExpressionState &state int64_t iter = 1; for (; iter < lower_bound; iter++) { - if (!IterativeLengthPhaseOne(v_size, iter, v, e, edge_ids, paths_ve_one, + if (!IterativeLengthPhaseOne(v_size, iter, v, e, edge_ids, + lane_to_num, paths_ve_one, (iter & 1) ? visit1 : visit2, (iter & 1) ? visit2 : visit1)) { break; @@ -193,17 +196,20 @@ static void ShortestPathTwoPhaseFunction(DataChunk &args, ExpressionState &state //! construct the phase two path auto iteration = iter; auto parent_vertex = dst_data[dst_pos]; + auto parent_idx = parent_vertex * LANE_LIMIT + lane; while (iteration >= lower_bound) { - output_vector[2 * iteration - 1] = paths_ve_two[(parent_vertex * LANE_LIMIT + lane)].second; - output_vector[2 * iteration - 2] = paths_ve_two[(parent_vertex * LANE_LIMIT + lane)].first; + parent_idx = parent_vertex * LANE_LIMIT + lane; + output_vector[2 * iteration - 1] = paths_ve_two[parent_idx].second; + output_vector[2 * iteration - 2] = paths_ve_two[parent_idx].first; parent_vertex = output_vector[2 * iteration - 2]; iteration--; } //! construct the phase one path while (iteration > 0) { - output_vector[2 * iteration - 1] = paths_ve_one[((iteration - 1) * v_size + parent_vertex) * LANE_LIMIT + lane].second; - output_vector[2 * iteration - 2] = paths_ve_one[((iteration - 1) * v_size + parent_vertex) * LANE_LIMIT + lane].first; + parent_idx = ((iteration - 1) * v_size + parent_vertex) * LANE_LIMIT + lane; + output_vector[2 * iteration - 1] = paths_ve_one[parent_idx].second; + output_vector[2 * iteration - 2] = paths_ve_one[parent_idx].first; parent_vertex = output_vector[2 * iteration - 2]; iteration--; } diff --git a/test/sql/path-finding/shortest_path_bound.test b/test/sql/path-finding/shortest_path_bound.test index ca139ae9..3bd7544b 100644 --- a/test/sql/path-finding/shortest_path_bound.test +++ b/test/sql/path-finding/shortest_path_bound.test @@ -333,19 +333,19 @@ query III ---- 0 0 [0, 0, 2, 1, 0] 0 1 [0, 0, 2, 2, 3, 4, 1] -0 2 [0, 0, 2, 2, 3, 3, 2] +0 2 [0, 0, 2, 1, 0, 0, 2] 0 3 [0, 0, 2, 2, 3] 1 0 [1, 6, 1, 5, 0] 1 1 [1, 6, 1, 6, 1] 1 2 [1, 5, 0, 0, 2] 1 3 [1, 5, 0, 0, 2, 2, 3] -2 0 [2, 2, 3, 3, 2, 1, 0] +2 0 [2, 2, 3, 4, 1, 5, 0] 2 1 [2, 2, 3, 4, 1] -2 2 [2, 2, 3, 3, 2] -2 3 [2, 2, 3, 3, 2, 2, 3] -3 0 [3, 3, 2, 1, 0] +2 2 [2, 1, 0, 0, 2] +2 3 [2, 1, 0, 0, 2, 2, 3] +3 0 [3, 4, 1, 5, 0] 3 1 [3, 4, 1, 6, 1] -3 2 [3, 3, 2, 2, 3, 3, 2] +3 2 [3, 4, 1, 5, 0, 0, 2] 3 3 [3, 3, 2, 2, 3]