From 7c9280f6e3419088f23b5b262bea0e50d2a5067e Mon Sep 17 00:00:00 2001 From: Colm Date: Fri, 6 Dec 2024 11:55:12 +0000 Subject: [PATCH] PG17 compatibility: fix multi-1 diffs caused by PG17 optimizer enhancements (#7769) This fix ensures that the expected DEBUG error messages from the router planner in `multi_router_planner`, `multi_router_planner_fast_path` and `query_single_shard_table` are present with PG17. In `query_single_shard_table` the diff: ``` SELECT COUNT(*) FROM citus_local_table t1 WHERE t1.b IN ( SELECT b+1 FROM nullkey_c1_t1 t2 WHERE t2.b = t1.a ); -DEBUG: router planner does not support queries that reference non-colocated distributed tables +DEBUG: Local tables cannot be used in distributed queries. ``` occurred because of[ this PG17 commit](https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=9f1337639) which enables the optimizer to pull up a correlated ANY subquery to a join. The fix inhibits subquery pull up by including a volatile function in the predicate involving the ANY subquery, preserving the pre-PG17 optimizer treatment of the query. In the case of `multi_router_planner` and `multi_router_planner_fast_path` the diffs: ``` -- partition_column is null clause does not prune out any shards, -- all shards remain after shard pruning, not router plannable SELECT * FROM articles_hash a WHERE a.author_id is null; -DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: Creating router plan ``` are because of [this PG17 commit](https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=b262ad440), which enables the optimizer to detect and remove redundant IS (NOT) NULL expressions. The fix is to adjust the table definition so the column used for distribution is not marked NOT NULL, thus preserving the pre-PG17 query planning behavior. Finallly, a rule is added to `normalize.sed` to ignore DEBUG logging in CREATE MATERIALIZED VIEW AS statements introduced by [this PG17 commit](https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=b4da732fd64); _when creating materialized views, use REFRESH logic to load data_, a consequence of which is that with `client_min_messages` at `DEBUG2` Postgres emits extra detail for CREATE MATERIALIZED VIEW AS statements. ``` CREATE MATERIALIZED VIEW mv_articles_hash_empty AS SELECT * FROM articles_hash WHERE author_id = 1; DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 +DEBUG: drop auto-cascades to type multi_router_planner.pg_temp_61391 +DEBUG: drop auto-cascades to type multi_router_planner.pg_temp_61391[] ``` The rule can be changed to a normalization, or possibly dropped, when 17 becomes the minimum supported version. --- src/test/regress/bin/normalize.sed | 7 +++++++ src/test/regress/expected/multi_router_planner.out | 2 +- .../regress/expected/multi_router_planner_fast_path.out | 2 +- src/test/regress/expected/query_single_shard_table.out | 8 ++++---- src/test/regress/sql/multi_router_planner.sql | 2 +- src/test/regress/sql/multi_router_planner_fast_path.sql | 3 +-- src/test/regress/sql/query_single_shard_table.sql | 8 ++++---- 7 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed index 19ac512c5fc..59bf6d97c95 100644 --- a/src/test/regress/bin/normalize.sed +++ b/src/test/regress/bin/normalize.sed @@ -324,3 +324,10 @@ s/COPY delimiter must not appear in the DEFAULT specification/COPY delimiter cha # we add them back for pg15,pg16 compatibility # e.g. change CHECK other_col >= 100 to CHECK (other_col >= 100) s/\| CHECK ([a-zA-Z])(.*)/| CHECK \(\1\2\)/g + +# pg17 change: this is a rule that ignores additional DEBUG logging +# for CREATE MATERIALIZED VIEW (commit b4da732fd64). This could be +# changed to a normalization rule when 17 becomes the minimum +# supported Postgres version. + +/DEBUG: drop auto-cascades to type [a-zA-Z_]*.pg_temp_[0-9]*/d diff --git a/src/test/regress/expected/multi_router_planner.out b/src/test/regress/expected/multi_router_planner.out index fee821a7d24..ce68d133d4f 100644 --- a/src/test/regress/expected/multi_router_planner.out +++ b/src/test/regress/expected/multi_router_planner.out @@ -10,7 +10,7 @@ CREATE SCHEMA multi_router_planner; SET search_path TO multi_router_planner; CREATE TABLE articles_hash ( id bigint NOT NULL, - author_id bigint NOT NULL, + author_id bigint, title varchar(20) NOT NULL, word_count integer ); diff --git a/src/test/regress/expected/multi_router_planner_fast_path.out b/src/test/regress/expected/multi_router_planner_fast_path.out index 25cc8a1a70e..e483660ee03 100644 --- a/src/test/regress/expected/multi_router_planner_fast_path.out +++ b/src/test/regress/expected/multi_router_planner_fast_path.out @@ -12,7 +12,7 @@ SET citus.enable_fast_path_router_planner TO true; -- =================================================================== CREATE TABLE articles_hash ( id bigint NOT NULL, - author_id bigint NOT NULL, + author_id bigint, title varchar(20) NOT NULL, word_count integer ); diff --git a/src/test/regress/expected/query_single_shard_table.out b/src/test/regress/expected/query_single_shard_table.out index 5f551a9881d..0945bc1d745 100644 --- a/src/test/regress/expected/query_single_shard_table.out +++ b/src/test/regress/expected/query_single_shard_table.out @@ -1183,7 +1183,7 @@ DEBUG: Local tables cannot be used in distributed queries. DEBUG: skipping recursive planning for the subquery since it contains references to outer queries ERROR: direct joins between distributed and local tables are not supported SELECT COUNT(*) FROM nullkey_c1_t1 t1 -WHERE t1.b IN ( +WHERE t1.b + random() IN ( SELECT b+1 FROM citus_local_table t2 WHERE t2.b = t1.a ); DEBUG: router planner does not support queries that reference non-colocated distributed tables @@ -1258,7 +1258,7 @@ DEBUG: Local tables cannot be used in distributed queries. DEBUG: skipping recursive planning for the subquery since it contains references to outer queries ERROR: direct joins between distributed and local tables are not supported SELECT COUNT(*) FROM citus_local_table t1 -WHERE t1.b IN ( +WHERE t1.b + random() IN ( SELECT b+1 FROM nullkey_c1_t1 t2 WHERE t2.b = t1.a ); DEBUG: router planner does not support queries that reference non-colocated distributed tables @@ -1312,7 +1312,7 @@ DEBUG: skipping recursive planning for the subquery since it contains reference ERROR: direct joins between distributed and local tables are not supported HINT: Use CTE's or subqueries to select from local tables and use them in joins SELECT COUNT(*) FROM nullkey_c1_t1 t1 -WHERE t1.b IN ( +WHERE t1.b + random() IN ( SELECT b+1 FROM postgres_local_table t2 WHERE t2.b = t1.a ); DEBUG: found no worker with all shard placements @@ -1344,7 +1344,7 @@ DEBUG: skipping recursive planning for the subquery since it contains reference ERROR: direct joins between distributed and local tables are not supported HINT: Use CTE's or subqueries to select from local tables and use them in joins SELECT COUNT(*) FROM postgres_local_table t1 -WHERE t1.b IN ( +WHERE t1.b + random() IN ( SELECT b+1 FROM nullkey_c1_t1 t2 WHERE t2.b = t1.a ); DEBUG: found no worker with all shard placements diff --git a/src/test/regress/sql/multi_router_planner.sql b/src/test/regress/sql/multi_router_planner.sql index 2ccd43ea3b5..20b8a59562e 100644 --- a/src/test/regress/sql/multi_router_planner.sql +++ b/src/test/regress/sql/multi_router_planner.sql @@ -15,7 +15,7 @@ SET search_path TO multi_router_planner; CREATE TABLE articles_hash ( id bigint NOT NULL, - author_id bigint NOT NULL, + author_id bigint, title varchar(20) NOT NULL, word_count integer ); diff --git a/src/test/regress/sql/multi_router_planner_fast_path.sql b/src/test/regress/sql/multi_router_planner_fast_path.sql index 1fd1f6ce0d8..56684c07575 100644 --- a/src/test/regress/sql/multi_router_planner_fast_path.sql +++ b/src/test/regress/sql/multi_router_planner_fast_path.sql @@ -18,7 +18,7 @@ SET citus.enable_fast_path_router_planner TO true; CREATE TABLE articles_hash ( id bigint NOT NULL, - author_id bigint NOT NULL, + author_id bigint, title varchar(20) NOT NULL, word_count integer ); @@ -803,7 +803,6 @@ CREATE MATERIALIZED VIEW mv_articles_hash_empty AS SELECT * FROM articles_hash WHERE author_id = 1; SELECT * FROM mv_articles_hash_empty; - SELECT id FROM articles_hash WHERE author_id = 1; diff --git a/src/test/regress/sql/query_single_shard_table.sql b/src/test/regress/sql/query_single_shard_table.sql index 96de2705c6a..4abda0bea42 100644 --- a/src/test/regress/sql/query_single_shard_table.sql +++ b/src/test/regress/sql/query_single_shard_table.sql @@ -502,7 +502,7 @@ WHERE NOT EXISTS ( ); SELECT COUNT(*) FROM nullkey_c1_t1 t1 -WHERE t1.b IN ( +WHERE t1.b + random() IN ( SELECT b+1 FROM citus_local_table t2 WHERE t2.b = t1.a ); @@ -543,7 +543,7 @@ WHERE EXISTS ( ); SELECT COUNT(*) FROM citus_local_table t1 -WHERE t1.b IN ( +WHERE t1.b + random() IN ( SELECT b+1 FROM nullkey_c1_t1 t2 WHERE t2.b = t1.a ); @@ -573,7 +573,7 @@ WHERE NOT EXISTS ( ); SELECT COUNT(*) FROM nullkey_c1_t1 t1 -WHERE t1.b IN ( +WHERE t1.b + random() IN ( SELECT b+1 FROM postgres_local_table t2 WHERE t2.b = t1.a ); @@ -593,7 +593,7 @@ WHERE EXISTS ( ); SELECT COUNT(*) FROM postgres_local_table t1 -WHERE t1.b IN ( +WHERE t1.b + random() IN ( SELECT b+1 FROM nullkey_c1_t1 t2 WHERE t2.b = t1.a );