Skip to content

Commit

Permalink
ADBDEV-5527 Change the portal strategy for modifying CTE case
Browse files Browse the repository at this point in the history
In vanilla PG and Greenplum queries containing modifying CTE are executed
with portal strategy PORTAL_ONE_MOD_WITH, which involves storing the result
of the query in portal's tuple store, which the final result is then fetched
from. This strategy is fair enough and helps us avoid executing
modifying operation several times, for example, when one need to fetch
the query result partially. However, in Greenplum case executing this
strategy at QE nodes is redundant, because clients communicate with the
system throught the QD node. Therefore, this commit proposes another
type of strategy for execution at QE nodes.

Moreover, when a query contains modifying CTE, EXPLAIN ANALYZE reports most of
plan nodes as "never executed", even though the corresponding nodes have been
fully executed and have returned the tuples. This problem arise because
of PORTAL_ONE_MOD_WITH execution strategy, according to which the extra
pq message with tuple description is sent to QD, and the statistics
can't be retrieved properly in cdbexplain_recvExecStats function.

(cherry picked from commit c9b7b02)
  • Loading branch information
bimboterminator1 authored and mos65o2 committed Jan 10, 2025
1 parent 8492198 commit 5fe368d
Show file tree
Hide file tree
Showing 4 changed files with 209 additions and 33 deletions.
10 changes: 7 additions & 3 deletions src/backend/tcop/pquery.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,9 @@ ChoosePortalStrategy(List *stmts)
/* Note For CreateTableAs, we still use PORTAL_MULTI_QUERY (not like PG)
* since QE needs to use DestRemote to deliver completionTag to QD and
* use DestIntoRel to insert tuples into the table(s).
*
* For modifying CTE we use PORTAL_MULTI_QUERY at QE nodes, because
* we don't need saving the result inside portal's tuple store.
*/
if (list_length(stmts) == 1)
{
Expand Down Expand Up @@ -396,10 +399,11 @@ ChoosePortalStrategy(List *stmts)
pstmt->copyIntoClause == NULL &&
pstmt->refreshClause == NULL)
{
if (pstmt->hasModifyingCTE)
return PORTAL_ONE_MOD_WITH;
else
if (!pstmt->hasModifyingCTE)
return PORTAL_ONE_SELECT;
if (Gp_role != GP_ROLE_EXECUTE)
return PORTAL_ONE_MOD_WITH;
return PORTAL_MULTI_QUERY;
}
if (pstmt->commandType == CMD_UTILITY)
{
Expand Down
101 changes: 87 additions & 14 deletions src/test/regress/expected/explain_analyze.out
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
-- s/Execution Time: [0-9.]+ ms/Execution Time: #.### ms/
-- m/Executor memory: \d+\w? bytes/
-- s/Executor memory: \d+\w? bytes/Executor memory: ### bytes/
-- m/\(slice\d+\) Executor memory: (\d+)\w bytes avg x \d+ workers, \d+\w bytes max \(seg\d+\)\./
-- s/Executor memory: (\d+)\w bytes avg x \d+ workers, \d+\w bytes max \(seg\d+\)\./Executor memory: ####K bytes avg x #### workers, ####K bytes max (seg#)./
-- m/Memory used:\s+\d+\w?B/
-- s/Memory used:\s+\d+\w?B/Memory used: ###B/
-- m/\d+\w? bytes max \(seg\d+\)/
Expand All @@ -32,7 +34,7 @@ EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF, SUMMARY OFF) SELECT a FROM empty_table;
------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (actual rows=0 loops=1)
-> Seq Scan on empty_table (actual rows=0 loops=1)
Optimizer: Postgres query optimizer
Optimizer: Postgres-based planner
(3 rows)

-- explain_processing_on
Expand All @@ -48,13 +50,14 @@ EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF, SUMMARY OFF) SELECT t1.a FROM empty_tab
-> Seq Scan on empty_table t1 (actual rows=0 loops=1)
-> Hash (never executed)
-> Seq Scan on empty_table t2 (never executed)
Optimizer: Postgres query optimizer
Optimizer: Postgres-based planner
(7 rows)

-- explain_processing_on
-- If all QEs hit errors when executing sort, we might not receive stat data for sort.
-- rethrow error before print explain info.
create extension if not exists gp_inject_fault;
NOTICE: extension "gp_inject_fault" already exists, skipping
create table sort_error_test1(tc1 int, tc2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'tc1' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
Expand All @@ -72,7 +75,7 @@ select gp_inject_fault('explain_analyze_sort_error', 'error', dbid)
(3 rows)

EXPLAIN analyze insert into sort_error_test2 select * from sort_error_test1 order by 1;
ERROR: fault triggered, fault name:'explain_analyze_sort_error' fault type:'error' (seg1 127.0.1.1:7003 pid=103595)
ERROR: fault triggered, fault name:'explain_analyze_sort_error' fault type:'error' (seg1 172.17.0.2:7003 pid=36295)
select count(*) from sort_error_test2;
count
-------
Expand Down Expand Up @@ -116,12 +119,12 @@ explain (analyze, timing off, costs off) select a.i from slice_test a
-> Broadcast Motion 3:3 (slice2; segments: 3) (actual rows=1 loops=1)
-> Seq Scan on slice_test2 b (actual rows=1 loops=1)
Optimizer: Postgres-based planner
Planning Time: 0.662 ms
(slice0) Executor memory: 47K bytes.
(slice1) Executor memory: 43K bytes avg x 3 workers, 43K bytes max (seg0). Work_mem: 17K bytes max.
Planning Time: 0.434 ms
(slice0) Executor memory: 45K bytes.
(slice1) Executor memory: 42K bytes avg x 3 workers, 42K bytes max (seg0). Work_mem: 17K bytes max.
(slice2) Executor memory: 37K bytes avg x 3 workers, 37K bytes max (seg0).
Memory used: 128000kB
Execution Time: 4.953 ms
Memory used: 256000kB
Execution Time: 9.264 ms
(17 rows)

-- checking this didn't break previous behavior
Expand All @@ -140,13 +143,83 @@ explain (analyze, timing off, costs off)
-> Result (actual rows=1 loops=2)
One-Time Filter: ((x.x)::integer = 0)
Optimizer: Postgres-based planner
Planning Time: 0.245 ms
(slice0) Executor memory: 19K bytes.
(slice1) Executor memory: 36K bytes. Work_mem: 17K bytes max.
Memory used: 128000kB
Execution Time: 0.082 ms
(14 rows)
Planning Time: 0.233 ms
(slice0) Executor memory: 18K bytes.
(slice1) Executor memory: 35K bytes. Work_mem: 17K bytes max.
Memory used: 256000kB
Execution Time: 0.090 ms
(13 rows)

-- explain_processing_on
drop table slice_test;
drop table slice_test2;
-- The statistics for modifying CTEs used to be reported as "never executed",
-- when all plan nodes were executed and some stat information was expected.
-- Test QD recieving the stats from all slices and showing it in explain output.
--start_ignore
DROP TABLE IF EXISTS with_dml;
NOTICE: table "with_dml" does not exist, skipping
--end_ignore
CREATE TABLE with_dml (i int, j int) DISTRIBUTED BY (i);
EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF)
WITH cte AS (
INSERT INTO with_dml SELECT i, i * 100 FROM generate_series(1,5) i
RETURNING i
) SELECT * FROM cte;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (actual rows=5 loops=1)
-> Shared Scan (share slice:id 1:0) (actual rows=3 loops=1)
-> Insert on with_dml (actual rows=3 loops=1)
-> Redistribute Motion 1:3 (slice2; segments: 1) (actual rows=3 loops=1)
Hash Key: i.i
-> Function Scan on generate_series i (actual rows=5 loops=1)
Optimizer: Postgres-based planner
Planning Time: 0.163 ms
(slice0) Executor memory: 36K bytes.
(slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0). Work_mem: 17K bytes max.
(slice2) Executor memory: 40K bytes (seg2). Work_mem: 17K bytes max.
Memory used: 256000kB
Execution Time: 7.189 ms
(13 rows)

EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF)
WITH cte AS (
UPDATE with_dml SET j = j + 1
RETURNING i
) SELECT * FROM cte;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (actual rows=5 loops=1)
-> Shared Scan (share slice:id 1:0) (actual rows=3 loops=1)
-> Update on with_dml (actual rows=3 loops=1)
-> Seq Scan on with_dml (actual rows=3 loops=1)
Optimizer: Postgres-based planner
Planning Time: 0.288 ms
(slice0) Executor memory: 38K bytes.
(slice1) Executor memory: 38K bytes avg x 3 workers, 38K bytes max (seg0). Work_mem: 17K bytes max.
Memory used: 256000kB
Execution Time: 1.290 ms
(10 rows)

EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF)
WITH cte AS (
DELETE FROM with_dml WHERE i > 0
RETURNING i
) SELECT * FROM cte;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (actual rows=5 loops=1)
-> Shared Scan (share slice:id 1:0) (actual rows=3 loops=1)
-> Delete on with_dml (actual rows=3 loops=1)
-> Seq Scan on with_dml (actual rows=3 loops=1)
Filter: (i > 0)
Optimizer: Postgres-based planner
Planning Time: 0.232 ms
(slice0) Executor memory: 38K bytes.
(slice1) Executor memory: 37K bytes avg x 3 workers, 37K bytes max (seg0). Work_mem: 17K bytes max.
Memory used: 256000kB
Execution Time: 0.947 ms
(11 rows)

DROP TABLE with_dml;
105 changes: 89 additions & 16 deletions src/test/regress/expected/explain_analyze_optimizer.out
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
-- s/Execution Time: [0-9.]+ ms/Execution Time: #.### ms/
-- m/Executor memory: \d+\w? bytes/
-- s/Executor memory: \d+\w? bytes/Executor memory: ### bytes/
-- m/\(slice\d+\) Executor memory: (\d+)\w bytes avg x \d+ workers, \d+\w bytes max \(seg\d+\)\./
-- s/Executor memory: (\d+)\w bytes avg x \d+ workers, \d+\w bytes max \(seg\d+\)\./Executor memory: ####K bytes avg x #### workers, ####K bytes max (seg#)./
-- m/Memory used:\s+\d+\w?B/
-- s/Memory used:\s+\d+\w?B/Memory used: ###B/
-- m/\d+\w? bytes max \(seg\d+\)/
Expand All @@ -32,30 +34,31 @@ EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF, SUMMARY OFF) SELECT a FROM empty_table;
------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (actual rows=0 loops=1)
-> Seq Scan on empty_table (actual rows=0 loops=1)
Optimizer: Pivotal Optimizer (GPORCA)
Optimizer: GPORCA
(3 rows)

-- explain_processing_on
-- For a node that is truly never executed, we still expect "never executed" to
-- be reported
-- explain_processing_off
EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF, SUMMARY OFF) SELECT t1.a FROM empty_table t1 join empty_table t2 on t1.a = t2.a;
QUERY PLAN
---------------------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (actual rows=0 loops=1)
-> Hash Join (actual rows=0 loops=1)
Hash Cond: (t1.a = t2.a)
-> Seq Scan on empty_table t1 (never executed)
-> Hash (actual rows=0 loops=1)
Buckets: 524288 Batches: 1 Memory Usage: 4096kB
Buckets: 1048576 Batches: 1 Memory Usage: 8192kB
-> Seq Scan on empty_table t2 (actual rows=0 loops=1)
Optimizer: Pivotal Optimizer (GPORCA)
Optimizer: GPORCA
(8 rows)

-- explain_processing_on
-- If all QEs hit errors when executing sort, we might not receive stat data for sort.
-- rethrow error before print explain info.
create extension if not exists gp_inject_fault;
NOTICE: extension "gp_inject_fault" already exists, skipping
create table sort_error_test1(tc1 int, tc2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'tc1' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
Expand All @@ -73,7 +76,7 @@ select gp_inject_fault('explain_analyze_sort_error', 'error', dbid)
(3 rows)

EXPLAIN analyze insert into sort_error_test2 select * from sort_error_test1 order by 1;
ERROR: fault triggered, fault name:'explain_analyze_sort_error' fault type:'error' (seg0 127.0.1.1:7002 pid=103209)
ERROR: fault triggered, fault name:'explain_analyze_sort_error' fault type:'error' (seg0 172.17.0.2:7002 pid=37168)
select count(*) from sort_error_test2;
count
-------
Expand Down Expand Up @@ -119,12 +122,12 @@ explain (analyze, timing off, costs off) select a.i from slice_test a
-> Gather Motion 3:1 (slice2; segments: 3) (actual rows=1 loops=1)
-> Seq Scan on slice_test2 b (actual rows=1 loops=1)
Optimizer: GPORCA
Planning Time: 6.096 ms
(slice0) Executor memory: 49K bytes. Work_mem: 17K bytes max.
Planning Time: 4.463 ms
(slice0) Executor memory: 48K bytes. Work_mem: 17K bytes max.
(slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0).
(slice2) Executor memory: 37K bytes avg x 3 workers, 37K bytes max (seg0).
Memory used: 128000kB
Execution Time: 5.096 ms
Memory used: 256000kB
Execution Time: 6.883 ms
(19 rows)

-- checking this didn't break previous behavior
Expand All @@ -143,13 +146,83 @@ explain (analyze, timing off, costs off)
-> Result (actual rows=1 loops=2)
One-Time Filter: ((x.x)::integer = 0)
Optimizer: Postgres-based planner
Planning Time: 2.920 ms
(slice0) Executor memory: 19K bytes.
(slice1) Executor memory: 36K bytes. Work_mem: 17K bytes max.
Memory used: 128000kB
Execution Time: 0.085 ms
(14 rows)
Planning Time: 3.210 ms
(slice0) Executor memory: 18K bytes.
(slice1) Executor memory: 35K bytes. Work_mem: 17K bytes max.
Memory used: 256000kB
Execution Time: 0.074 ms
(13 rows)

-- explain_processing_on
drop table slice_test;
drop table slice_test2;
-- The statistics for modifying CTEs used to be reported as "never executed",
-- when all plan nodes were executed and some stat information was expected.
-- Test QD recieving the stats from all slices and showing it in explain output.
--start_ignore
DROP TABLE IF EXISTS with_dml;
NOTICE: table "with_dml" does not exist, skipping
--end_ignore
CREATE TABLE with_dml (i int, j int) DISTRIBUTED BY (i);
EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF)
WITH cte AS (
INSERT INTO with_dml SELECT i, i * 100 FROM generate_series(1,5) i
RETURNING i
) SELECT * FROM cte;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (actual rows=5 loops=1)
-> Shared Scan (share slice:id 1:0) (actual rows=3 loops=1)
-> Insert on with_dml (actual rows=3 loops=1)
-> Redistribute Motion 1:3 (slice2; segments: 1) (actual rows=3 loops=1)
Hash Key: i.i
-> Function Scan on generate_series i (actual rows=5 loops=1)
Optimizer: Postgres-based planner
Planning Time: 2.969 ms
(slice0) Executor memory: 36K bytes.
(slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0). Work_mem: 17K bytes max.
(slice2) Executor memory: 40K bytes (seg2). Work_mem: 17K bytes max.
Memory used: 256000kB
Execution Time: 7.598 ms
(13 rows)

EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF)
WITH cte AS (
UPDATE with_dml SET j = j + 1
RETURNING i
) SELECT * FROM cte;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (actual rows=5 loops=1)
-> Shared Scan (share slice:id 1:0) (actual rows=3 loops=1)
-> Update on with_dml (actual rows=3 loops=1)
-> Seq Scan on with_dml (actual rows=3 loops=1)
Optimizer: Postgres-based planner
Planning Time: 2.896 ms
(slice0) Executor memory: 38K bytes.
(slice1) Executor memory: 38K bytes avg x 3 workers, 38K bytes max (seg0). Work_mem: 17K bytes max.
Memory used: 256000kB
Execution Time: 1.348 ms
(10 rows)

EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF)
WITH cte AS (
DELETE FROM with_dml WHERE i > 0
RETURNING i
) SELECT * FROM cte;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (actual rows=5 loops=1)
-> Shared Scan (share slice:id 1:0) (actual rows=3 loops=1)
-> Delete on with_dml (actual rows=3 loops=1)
-> Seq Scan on with_dml (actual rows=3 loops=1)
Filter: (i > 0)
Optimizer: Postgres-based planner
Planning Time: 2.691 ms
(slice0) Executor memory: 38K bytes.
(slice1) Executor memory: 37K bytes avg x 3 workers, 37K bytes max (seg0). Work_mem: 17K bytes max.
Memory used: 256000kB
Execution Time: 1.059 ms
(11 rows)

DROP TABLE with_dml;
26 changes: 26 additions & 0 deletions src/test/regress/sql/explain_analyze.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
-- s/Execution Time: [0-9.]+ ms/Execution Time: #.### ms/
-- m/Executor memory: \d+\w? bytes/
-- s/Executor memory: \d+\w? bytes/Executor memory: ### bytes/
-- m/\(slice\d+\) Executor memory: (\d+)\w bytes avg x \d+ workers, \d+\w bytes max \(seg\d+\)\./
-- s/Executor memory: (\d+)\w bytes avg x \d+ workers, \d+\w bytes max \(seg\d+\)\./Executor memory: ####K bytes avg x #### workers, ####K bytes max (seg#)./
-- m/Memory used:\s+\d+\w?B/
-- s/Memory used:\s+\d+\w?B/Memory used: ###B/
-- m/\d+\w? bytes max \(seg\d+\)/
Expand Down Expand Up @@ -76,3 +78,27 @@ explain (analyze, timing off, costs off)

drop table slice_test;
drop table slice_test2;

-- The statistics for modifying CTEs used to be reported as "never executed",
-- when all plan nodes were executed and some stat information was expected.
-- Test QD recieving the stats from all slices and showing it in explain output.
--start_ignore
DROP TABLE IF EXISTS with_dml;
--end_ignore
CREATE TABLE with_dml (i int, j int) DISTRIBUTED BY (i);
EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF)
WITH cte AS (
INSERT INTO with_dml SELECT i, i * 100 FROM generate_series(1,5) i
RETURNING i
) SELECT * FROM cte;
EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF)
WITH cte AS (
UPDATE with_dml SET j = j + 1
RETURNING i
) SELECT * FROM cte;
EXPLAIN (ANALYZE, TIMING OFF, COSTS OFF)
WITH cte AS (
DELETE FROM with_dml WHERE i > 0
RETURNING i
) SELECT * FROM cte;
DROP TABLE with_dml;

0 comments on commit 5fe368d

Please sign in to comment.