diff --git a/src/backend/cdb/cdbmutate.c b/src/backend/cdb/cdbmutate.c index f68952fb6f03..db1e9d7ab0f8 100644 --- a/src/backend/cdb/cdbmutate.c +++ b/src/backend/cdb/cdbmutate.c @@ -75,8 +75,18 @@ typedef struct ApplyMotionState * plan_tree_walker/mutator */ int nextMotionID; int sliceDepth; - bool containMotionNodes; HTAB *planid_subplans; /* hash table for InitPlanItem */ + + /* Context for ModifyTable to elide Explicit Redistribute Motion */ + bool mtIsChecking; /* True if we encountered ModifyTable + * node with UPDATE/DELETE and we plan + * to insert Explicit Motions. */ + List *mtResultRelations; /* Indexes into rtable for relations to + * be modified. Only valid if mtIsChecking + * is true. */ + int nMotionsAbove; /* Number of motions above the current + * node. Only valid if mtIsChecking is + * true. */ } ApplyMotionState; typedef struct InitPlanItem @@ -404,7 +414,10 @@ apply_motion(PlannerInfo *root, Plan *plan, Query *query) * plan */ state.nextMotionID = 1; /* Start at 1 so zero will mean "unassigned". */ state.sliceDepth = 0; - state.containMotionNodes = false; + state.mtIsChecking = false; + state.mtResultRelations = NIL; + state.nMotionsAbove = 0; + memset(&ctl, 0, sizeof(ctl)); ctl.keysize = sizeof(int); ctl.entrysize = sizeof(InitPlanItem); @@ -755,14 +768,15 @@ apply_motion_mutator(Node *node, ApplyMotionState *context) if (!is_plan_node(node)) { /* - * The containMotionNodes flag keeps track of whether there are any - * Motion nodes, ignoring any in InitPlans. So if we recurse into an - * InitPlan, save and restore the flag. + * If we expect to elide the Explicit Redistribute Motion, we can + * disable mtIsChecking while we're in an InitPlan, since there will not + * be any scan nodes that perform a scan on the same range table entry + * as ModifyTable under InitPlans. */ if (IsA(node, SubPlan) &&((SubPlan *) node)->is_initplan) { bool found; - bool saveContainMotionNodes = context->containMotionNodes; + bool saveMtIsChecking = context->mtIsChecking; int saveSliceDepth = context->sliceDepth; SubPlan *subplan = (SubPlan *) node; /* @@ -777,9 +791,10 @@ apply_motion_mutator(Node *node, ApplyMotionState *context) /* reset sliceDepth for each init plan */ context->sliceDepth = 0; + context->mtIsChecking = false; node = plan_tree_mutator(node, apply_motion_mutator, context); - context->containMotionNodes = saveContainMotionNodes; + context->mtIsChecking = saveMtIsChecking; context->sliceDepth = saveSliceDepth; return node; @@ -788,6 +803,82 @@ apply_motion_mutator(Node *node, ApplyMotionState *context) return plan_tree_mutator(node, apply_motion_mutator, context); } + /* + * For UPDATE/DELETE, we check if there's any motions before scan in the + * same subtree for the table we're going to modify. If we encounter the + * scan before any motions, then we can elide unnecessary Explicit + * Redistribute Motion. + */ + if (IsA(node, ModifyTable)) + { + ModifyTable *mt = (ModifyTable *) node; + + if (mt->operation == CMD_UPDATE || mt->operation == CMD_DELETE) + { + /* + * Sanity check, since we don't allow multiple ModifyTable nodes. + */ + Assert(!context->mtIsChecking); + Assert(context->mtResultRelations == NIL); + Assert(context->nMotionsAbove == 0); + + /* + * When UPDATE/DELETE occurs on a partitioned table, or a table that + * is a part of inheritance tree, ModifyTable node will have more + * than one relation in resultRelations. + * + * Remember resulting relations' indexes to compare them later. + */ + context->mtIsChecking = true; + context->mtResultRelations = mt->resultRelations; + } + } + else if (context->mtIsChecking) + { + /* + * Remember if we are descending into a motion node. + */ + if (IsA(node, Motion)) + context->nMotionsAbove++; + else + { + /* + * If this is a scan and it's scanrelid matches ModifyTable's relid, + * we need to check if there were any motions above. + * + * These are scan nodes that can be used to perform distributed + * UPDATE/DELETE on the relation they scan, possibly with motions + * above them. This list needs to be updated for other nodes if they + * are changed to support DML execution on segments. + */ + switch (nodeTag(node)) + { + case T_SeqScan: + case T_DynamicSeqScan: + case T_IndexScan: + case T_DynamicIndexScan: + case T_IndexOnlyScan: + case T_BitmapIndexScan: + case T_DynamicBitmapIndexScan: + case T_BitmapHeapScan: + case T_DynamicBitmapHeapScan: + case T_TidScan: + if (list_member_int(context->mtResultRelations, + ((Scan *) node)->scanrelid)) + { + /* + * Freeze the motion counter. Also, we don't + * need to check other nodes in this subtree + * anymore. + */ + context->mtIsChecking = false; + } + default: + break; + } + } + } + plan = (Plan *) node; flow = plan->flow; @@ -936,17 +1027,11 @@ apply_motion_mutator(Node *node, ApplyMotionState *context) break; case MOVEMENT_EXPLICIT: - /* - * add an ExplicitRedistribute motion node only if child plan - * nodes have a motion node + * Were there any motions above the scan? */ - if (context->containMotionNodes) + if (context->nMotionsAbove > 0) { - /* - * motion node in child nodes: add a ExplicitRedistribute - * motion - */ newnode = (Node *) make_explicit_motion(plan, flow->segidColIdx, true /* useExecutorVarFormat */ @@ -955,13 +1040,20 @@ apply_motion_mutator(Node *node, ApplyMotionState *context) else { /* - * no motion nodes in child plan nodes - no need for - * ExplicitRedistribute: restore flow + * Restore flow if Explicit Redistribute Motion is not needed */ flow->req_move = MOVEMENT_NONE; flow->flow_before_req_move = NULL; } + /* + * If we're here, it means we are directly under the ModifyTable + * node. We are about to go to out of the recursion and go into + * other subtree. So reset the state and continue checking in case + * of another Explicit Redistribute Motion is needed. + */ + context->mtIsChecking = true; + context->nMotionsAbove = 0; break; case MOVEMENT_NONE: @@ -1009,16 +1101,14 @@ apply_motion_mutator(Node *node, ApplyMotionState *context) plan->nMotionNodes = context->nextMotionID - saveNextMotionID; plan->nInitPlans = hash_get_num_entries(context->planid_subplans) - saveNumInitPlans; - /* - * Remember if this was a Motion node. This is used at the top of the - * tree, with MOVEMENT_EXPLICIT, to avoid adding an explicit motion, if - * there were no Motion in the subtree. Note that this does not take - * InitPlans containing Motion nodes into account. InitPlans are executed - * as a separate step before the main plan, and hence any Motion nodes in - * them don't need to affect the way the main plan is executed. - */ - if (IsA(newnode, Motion)) - context->containMotionNodes = true; + if (context->mtIsChecking) + { + /* We're going out of this motion node. */ + if (IsA(node, Motion)) + context->nMotionsAbove--; + else if (IsA(node, ModifyTable)) + context->mtIsChecking = false; + } return newnode; } /* apply_motion_mutator */ diff --git a/src/test/regress/expected/bfv_dml.out b/src/test/regress/expected/bfv_dml.out index 444c9673c877..bb18be62e795 100644 --- a/src/test/regress/expected/bfv_dml.out +++ b/src/test/regress/expected/bfv_dml.out @@ -521,17 +521,16 @@ alter table bar set with(REORGANIZE=false) distributed randomly; analyze foo; analyze bar; explain delete from foo using bar; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------ + QUERY PLAN +-------------------------------------------------------------------------------------------------- Delete on foo (cost=10000000000.00..10000000009.98 rows=34 width=16) - -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) (cost=10000000000.00..10000000009.98 rows=34 width=16) - -> Nested Loop (cost=10000000000.00..10000000009.98 rows=34 width=16) - -> Seq Scan on foo (cost=0.00..3.10 rows=4 width=10) - -> Materialize (cost=0.00..3.65 rows=10 width=6) - -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..3.50 rows=10 width=6) - -> Seq Scan on bar (cost=0.00..3.10 rows=4 width=6) + -> Nested Loop (cost=10000000000.00..10000000009.98 rows=34 width=16) + -> Seq Scan on foo (cost=0.00..3.10 rows=4 width=10) + -> Materialize (cost=0.00..3.65 rows=10 width=6) + -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..3.50 rows=10 width=6) + -> Seq Scan on bar (cost=0.00..3.10 rows=4 width=6) Optimizer: Postgres query optimizer -(8 rows) +(7 rows) delete from foo using bar; drop table foo; diff --git a/src/test/regress/expected/gangsize.out b/src/test/regress/expected/gangsize.out index c72d8df7c6ff..4afce1ccb1ec 100644 --- a/src/test/regress/expected/gangsize.out +++ b/src/test/regress/expected/gangsize.out @@ -173,13 +173,11 @@ end; INFO: Distributed transaction command 'Distributed Commit (one-phase)' to PARTIAL contents: 0 1 update random_2_0 set a = 1 from hash_3_3_2 where hash_3_3_2.b = random_2_0.c; INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 -INFO: (slice 2) Dispatch command to PARTIAL contents: 0 1 INFO: (slice 0) Dispatch command to PARTIAL contents: 0 1 INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 begin; update random_2_0 set a = 1 from hash_3_3_2 where hash_3_3_2.b = random_2_0.c; INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 -INFO: (slice 2) Dispatch command to PARTIAL contents: 0 1 INFO: (slice 0) Dispatch command to PARTIAL contents: 0 1 end; INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 diff --git a/src/test/regress/expected/gangsize_optimizer.out b/src/test/regress/expected/gangsize_optimizer.out index efe59dc3f6a1..fc664fe89260 100644 --- a/src/test/regress/expected/gangsize_optimizer.out +++ b/src/test/regress/expected/gangsize_optimizer.out @@ -171,13 +171,11 @@ end; INFO: Distributed transaction command 'Distributed Commit (one-phase)' to PARTIAL contents: 0 1 update random_2_0 set a = 1 from hash_3_3_2 where hash_3_3_2.b = random_2_0.c; INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 -INFO: (slice 2) Dispatch command to PARTIAL contents: 0 1 INFO: (slice 0) Dispatch command to PARTIAL contents: 0 1 INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 begin; update random_2_0 set a = 1 from hash_3_3_2 where hash_3_3_2.b = random_2_0.c; INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 -INFO: (slice 2) Dispatch command to PARTIAL contents: 0 1 INFO: (slice 0) Dispatch command to PARTIAL contents: 0 1 end; INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 diff --git a/src/test/regress/expected/gp_unique_rowid.out b/src/test/regress/expected/gp_unique_rowid.out index ec782ff45645..f2bcbcf6c699 100644 --- a/src/test/regress/expected/gp_unique_rowid.out +++ b/src/test/regress/expected/gp_unique_rowid.out @@ -192,22 +192,21 @@ where e.x in select b from t2_12512 ) ; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------- Update on t_12512 - -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) - -> HashAggregate - Group Key: t_12512.ctid, x.ctid - -> Hash Join - Hash Cond: (x.x = t2_12512.b) - -> Nested Loop - -> Seq Scan on t_12512 - -> Function Scan on x - -> Hash - -> Broadcast Motion 3:3 (slice1; segments: 3) - -> Seq Scan on t2_12512 + -> HashAggregate + Group Key: t_12512.ctid, x.ctid + -> Hash Join + Hash Cond: (x.x = t2_12512.b) + -> Nested Loop + -> Seq Scan on t_12512 + -> Function Scan on x + -> Hash + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on t2_12512 Optimizer: Postgres query optimizer -(13 rows) +(12 rows) update t_12512 set b = 1 from @@ -231,23 +230,22 @@ where e.x in select b from t2_12512 ) ; - QUERY PLAN ---------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------- Update on t_12512 - -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) - -> HashAggregate - Group Key: t_12512.ctid, "*VALUES*".ctid - -> Hash Join - Hash Cond: ("*VALUES*".column1 = t2_12512.b) - -> Nested Loop - -> Seq Scan on t_12512 - -> Materialize - -> Values Scan on "*VALUES*" - -> Hash - -> Broadcast Motion 3:3 (slice1; segments: 3) - -> Seq Scan on t2_12512 + -> HashAggregate + Group Key: t_12512.ctid, "*VALUES*".ctid + -> Hash Join + Hash Cond: ("*VALUES*".column1 = t2_12512.b) + -> Nested Loop + -> Seq Scan on t_12512 + -> Materialize + -> Values Scan on "*VALUES*" + -> Hash + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on t2_12512 Optimizer: Postgres query optimizer -(14 rows) +(13 rows) update t_12512 set b = 1 from diff --git a/src/test/regress/expected/gporca.out b/src/test/regress/expected/gporca.out index 80c77c8f17f9..02be58a7c50a 100644 --- a/src/test/regress/expected/gporca.out +++ b/src/test/regress/expected/gporca.out @@ -11861,19 +11861,18 @@ select b, count(*) from gpexp_hash group by b order by b; (11 rows) explain update gpexp_rand set b=(select b from gpexp_hash where gpexp_rand.a = gpexp_hash.a); - QUERY PLAN ---------------------------------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------------------------- Update on gpexp_rand (cost=0.00..133.75 rows=25 width=14) - -> Explicit Redistribute Motion 2:2 (slice2; segments: 2) (cost=0.00..133.75 rows=25 width=14) - -> Seq Scan on gpexp_rand (cost=0.00..133.75 rows=25 width=14) - SubPlan 1 (slice2; segments: 2) - -> Result (cost=0.00..2.63 rows=1 width=4) - Filter: (gpexp_rand.a = gpexp_hash.a) - -> Materialize (cost=0.00..2.63 rows=1 width=4) - -> Broadcast Motion 2:2 (slice1; segments: 2) (cost=0.00..2.62 rows=1 width=4) - -> Seq Scan on gpexp_hash (cost=0.00..2.62 rows=1 width=4) + -> Seq Scan on gpexp_rand (cost=0.00..133.75 rows=25 width=14) + SubPlan 1 (slice0; segments: 2) + -> Result (cost=0.00..2.63 rows=1 width=4) + Filter: (gpexp_rand.a = gpexp_hash.a) + -> Materialize (cost=0.00..2.63 rows=1 width=4) + -> Broadcast Motion 2:2 (slice1; segments: 2) (cost=0.00..2.62 rows=1 width=4) + -> Seq Scan on gpexp_hash (cost=0.00..2.62 rows=1 width=4) Optimizer: Postgres query optimizer -(10 rows) +(9 rows) update gpexp_rand set b=(select b from gpexp_hash where gpexp_rand.a = gpexp_hash.a); select b, count(*) from gpexp_rand group by b order by b; diff --git a/src/test/regress/expected/gporca_optimizer.out b/src/test/regress/expected/gporca_optimizer.out index 914ad2c88bea..0f2b9265a4bc 100644 --- a/src/test/regress/expected/gporca_optimizer.out +++ b/src/test/regress/expected/gporca_optimizer.out @@ -11970,19 +11970,18 @@ DETAIL: Unknown error: Partially Distributed Data explain update gpexp_rand set b=(select b from gpexp_hash where gpexp_rand.a = gpexp_hash.a); INFO: GPORCA failed to produce a plan, falling back to planner DETAIL: Unknown error: Partially Distributed Data - QUERY PLAN ---------------------------------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------------------------- Update on gpexp_rand (cost=0.00..133.75 rows=25 width=14) - -> Explicit Redistribute Motion 2:2 (slice2; segments: 2) (cost=0.00..133.75 rows=25 width=14) - -> Seq Scan on gpexp_rand (cost=0.00..133.75 rows=25 width=14) - SubPlan 1 (slice2; segments: 2) - -> Result (cost=0.00..2.63 rows=1 width=4) - Filter: (gpexp_rand.a = gpexp_hash.a) - -> Materialize (cost=0.00..2.63 rows=1 width=4) - -> Broadcast Motion 2:2 (slice1; segments: 2) (cost=0.00..2.62 rows=1 width=4) - -> Seq Scan on gpexp_hash (cost=0.00..2.62 rows=1 width=4) + -> Seq Scan on gpexp_rand (cost=0.00..133.75 rows=25 width=14) + SubPlan 1 (slice0; segments: 2) + -> Result (cost=0.00..2.63 rows=1 width=4) + Filter: (gpexp_rand.a = gpexp_hash.a) + -> Materialize (cost=0.00..2.63 rows=1 width=4) + -> Broadcast Motion 2:2 (slice1; segments: 2) (cost=0.00..2.62 rows=1 width=4) + -> Seq Scan on gpexp_hash (cost=0.00..2.62 rows=1 width=4) Optimizer: Postgres query optimizer -(10 rows) +(9 rows) update gpexp_rand set b=(select b from gpexp_hash where gpexp_rand.a = gpexp_hash.a); INFO: GPORCA failed to produce a plan, falling back to planner diff --git a/src/test/regress/expected/partition_pruning.out b/src/test/regress/expected/partition_pruning.out index 942fad0a7f45..d3d112af15f7 100644 --- a/src/test/regress/expected/partition_pruning.out +++ b/src/test/regress/expected/partition_pruning.out @@ -3498,26 +3498,25 @@ from ( ) src where trg.key1 = src.key1 and trg.key1 = 2; - QUERY PLAN -------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------- Update on t_part1_1_prt_2 trg - -> Explicit Redistribute Motion 3:3 (slice3; segments: 3) - -> Nested Loop - -> Seq Scan on t_part1_1_prt_2 trg - Filter: (key1 = 2) - -> Materialize - -> Redistribute Motion 1:3 (slice2; segments: 1) - Hash Key: src.key1 - -> Subquery Scan on src - Filter: (src.key1 = 2) - -> WindowAgg - -> Gather Motion 3:1 (slice1; segments: 3) - -> Result - -> Append - -> Seq Scan on t_part1_1_prt_2 r - Filter: (key1 = 2) + -> Nested Loop + -> Seq Scan on t_part1_1_prt_2 trg + Filter: (key1 = 2) + -> Materialize + -> Redistribute Motion 1:3 (slice2; segments: 1) + Hash Key: src.key1 + -> Subquery Scan on src + Filter: (src.key1 = 2) + -> WindowAgg + -> Gather Motion 3:1 (slice1; segments: 3) + -> Result + -> Append + -> Seq Scan on t_part1_1_prt_2 r + Filter: (key1 = 2) Optimizer: Postgres query optimizer -(17 rows) +(16 rows) DROP TABLE t_part1; -- Test that the dynamic partition pruning should not be performed if the partition's opclass and the diff --git a/src/test/regress/expected/qp_subquery.out b/src/test/regress/expected/qp_subquery.out index fc255ecd860d..b60668709932 100644 --- a/src/test/regress/expected/qp_subquery.out +++ b/src/test/regress/expected/qp_subquery.out @@ -1073,32 +1073,30 @@ create table TabDel4(a int not null, b int not null); insert into TabDel4 values(1,2); commit; explain delete from TabDel1 where TabDel1.a not in (select a from TabDel3); -- do not support this because we produce NLASJ - QUERY PLAN -------------------------------------------------------------------------------------------------------- - Delete on tabdel1 (cost=1.09..3.17 rows=2 width=16) - -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) (cost=1.09..3.17 rows=2 width=10) - -> Hash Left Anti Semi (Not-In) Join (cost=1.09..3.17 rows=2 width=10) - Hash Cond: tabdel1.a = tabdel3.a - -> Seq Scan on tabdel1 (cost=0.00..2.03 rows=1 width=14) - -> Hash (cost=1.05..1.05 rows=1 width=4) - -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.05 rows=1 width=4) - -> Seq Scan on tabdel3 (cost=0.00..1.01 rows=1 width=4) - Optimizer status: Postgres query optimizer -(9 rows) + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Delete on tabdel1 (cost=1.09..4.17 rows=2 width=16) + -> Hash Left Anti Semi (Not-In) Join (cost=1.09..4.17 rows=2 width=16) + Hash Cond: (tabdel1.a = tabdel3.a) + -> Seq Scan on tabdel1 (cost=0.00..3.03 rows=1 width=14) + -> Hash (cost=1.05..1.05 rows=1 width=10) + -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.05 rows=1 width=10) + -> Seq Scan on tabdel3 (cost=0.00..1.01 rows=1 width=10) + Optimizer: Postgres query optimizer +(8 rows) explain delete from TabDel2 where TabDel2.a not in (select a from TabDel4); -- support this - QUERY PLAN --------------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------- Delete on tabdel2 (cost=1.09..4.17 rows=2 width=16) - -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) (cost=1.09..4.17 rows=2 width=16) - -> Hash Left Anti Semi (Not-In) Join (cost=1.09..4.17 rows=2 width=16) - Hash Cond: (tabdel2.a = tabdel4.a) - -> Seq Scan on tabdel2 (cost=0.00..3.03 rows=1 width=14) - -> Hash (cost=1.05..1.05 rows=1 width=10) - -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.05 rows=1 width=10) - -> Seq Scan on tabdel4 (cost=0.00..1.01 rows=1 width=10) + -> Hash Left Anti Semi (Not-In) Join (cost=1.09..4.17 rows=2 width=16) + Hash Cond: (tabdel2.a = tabdel4.a) + -> Seq Scan on tabdel2 (cost=0.00..3.03 rows=1 width=14) + -> Hash (cost=1.05..1.05 rows=1 width=10) + -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.05 rows=1 width=10) + -> Seq Scan on tabdel4 (cost=0.00..1.01 rows=1 width=10) Optimizer: Postgres query optimizer -(9 rows) +(8 rows) delete from TabDel2 where TabDel2.a not in (select a from TabDel4); select * from TabDel2; diff --git a/src/test/regress/expected/update_gp.out b/src/test/regress/expected/update_gp.out index 2f8a4e309628..f62e0ffc2d37 100644 --- a/src/test/regress/expected/update_gp.out +++ b/src/test/regress/expected/update_gp.out @@ -82,8 +82,8 @@ NOTICE: merging column "a" with inherited definition NOTICE: merging column "b" with inherited definition insert into base_tbl select g, g from generate_series(1, 5) g; explain (costs off) update base_tbl set a=a+1; - QUERY PLAN ---------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------ Update on base_tbl -> Redistribute Motion 3:3 (slice1; segments: 3) Hash Key: ((base_tbl.a + 1)) @@ -93,10 +93,9 @@ explain (costs off) update base_tbl set a=a+1; Hash Key: ((child_a.a + 1)) -> Split -> Seq Scan on child_a - -> Explicit Redistribute Motion 3:3 (slice3; segments: 3) - -> Seq Scan on child_b + -> Seq Scan on child_b Optimizer: Postgres query optimizer -(12 rows) +(11 rows) update base_tbl set a = 5; -- @@ -209,6 +208,375 @@ DROP TABLE keo2; DROP TABLE keo3; DROP TABLE keo4; DROP TABLE keo5; +-- Explicit Redistribute Motion should be added only if there is a motion +-- between the scan and the ModifyTable on the relation we are going to modify. +-- (test case not applicable to ORCA) +CREATE TABLE t1 (i int, j int) DISTRIBUTED BY (i); +CREATE TABLE t2 (i int) DISTRIBUTED BY (i); +CREATE TABLE t_strewn (i int) DISTRIBUTED RANDOMLY; +CREATE TABLE t_strewn2 (i int) DISTRIBUTED RANDOMLY; +INSERT INTO t1 SELECT + generate_series(1, 4) * 3, generate_series(1, 4); +INSERT INTO t2 SELECT generate_series(1, 4) * 3; +INSERT INTO t_strewn SELECT generate_series(1, 16); +INSERT INTO t_strewn2 SELECT generate_series(2, 17); +EXPLAIN (costs off) +UPDATE t1 SET j = t_strewn.i FROM t_strewn WHERE t_strewn.i = t1.i; + QUERY PLAN +------------------------------------------------------------ + Update on t1 + -> Hash Join + Hash Cond: (t_strewn.i = t1.i) + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: t_strewn.i + -> Seq Scan on t_strewn + -> Hash + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(9 rows) + +UPDATE t1 SET j = t_strewn.i FROM t_strewn WHERE t_strewn.i = t1.i +RETURNING *; + i | j | i +----+----+---- + 9 | 9 | 9 + 6 | 6 | 6 + 12 | 12 | 12 + 3 | 3 | 3 +(4 rows) + +EXPLAIN (costs off) +WITH CTE AS (DELETE FROM t1 RETURNING *) +SELECT count(*) AS a FROM t_strewn JOIN cte USING (i); + QUERY PLAN +--------------------------------------------------------------------------- + Aggregate + -> Gather Motion 3:1 (slice2; segments: 3) + -> Aggregate + -> Hash Join + Hash Cond: (t_strewn.i = cte.i) + -> Seq Scan on t_strewn + -> Hash + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Subquery Scan on cte + -> Delete on t1 + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(12 rows) + +WITH CTE AS (DELETE FROM t1 RETURNING *) +SELECT count(*) AS a FROM t_strewn JOIN cte USING (i); + a +--- + 4 +(1 row) + +EXPLAIN (costs off) +DELETE FROM t_strewn WHERE t_strewn.i = (SELECT t2.i FROM t2 WHERE t_strewn.i = t2.i); + QUERY PLAN +----------------------------------------------------------------------- + Delete on t_strewn + -> Seq Scan on t_strewn + Filter: (i = (SubPlan 1)) + SubPlan 1 (slice0; segments: 3) + -> Result + Filter: (t_strewn.i = t2.i) + -> Materialize + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on t2 + Optimizer: Postgres query optimizer +(10 rows) + +DELETE FROM t_strewn WHERE t_strewn.i = (SELECT t2.i FROM t2 WHERE t_strewn.i = t2.i) +RETURNING *; + i +---- + 3 + 9 + 6 + 12 +(4 rows) + +EXPLAIN (costs off) +UPDATE t_strewn SET i = t_strewn2.i +FROM t_strewn2 WHERE t_strewn.i = t_strewn2.i; + QUERY PLAN +------------------------------------------------------------------------ + Update on t_strewn + -> Explicit Redistribute Motion 3:3 (slice3; segments: 3) + -> Hash Join + Hash Cond: (t_strewn.i = t_strewn2.i) + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: t_strewn.i + -> Seq Scan on t_strewn + -> Hash + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: t_strewn2.i + -> Seq Scan on t_strewn2 + Optimizer: Postgres query optimizer +(12 rows) + +UPDATE t_strewn SET i = t_strewn2.i +FROM t_strewn2 WHERE t_strewn.i = t_strewn2.i +RETURNING *; + i | i +----+---- + 5 | 5 + 13 | 13 + 15 | 15 + 10 | 10 + 11 | 11 + 2 | 2 + 7 | 7 + 8 | 8 + 14 | 14 + 4 | 4 + 16 | 16 +(11 rows) + +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t_strewn; +DROP TABLE t_strewn2; +-- Explicit Redistribute Motion should not be mistakenly elided for inherited +-- tables. (test case not applicable to ORCA) +CREATE TABLE i (i int, j int) DISTRIBUTED BY (i); +INSERT INTO i SELECT + generate_series(1, 16), generate_series(1, 16) * 3; +CREATE TABLE foo (f1 serial, f2 text, f3 int) DISTRIBUTED RANDOMLY; +INSERT INTO foo (f2, f3) + VALUES ('first', 1), ('second', 2), ('third', 3); +CREATE TABLE foochild (fc int) INHERITS (foo); +NOTICE: table has parent, setting distribution columns to match parent table +INSERT INTO foochild + VALUES(123, 'child', 999, -123); +EXPLAIN (costs off) +DELETE FROM foo + USING i + WHERE foo.f1 = i.j; + QUERY PLAN +--------------------------------------------------------------------- + Delete on foo + -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) + -> Hash Join + Hash Cond: (i.j = foo.f1) + -> Seq Scan on i + -> Hash + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on foo + -> Explicit Redistribute Motion 3:3 (slice4; segments: 3) + -> Hash Join + Hash Cond: (i.j = foochild.f1) + -> Seq Scan on i + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on foochild + Optimizer: Postgres query optimizer +(16 rows) + +DROP TABLE i; +DROP TABLE foochild; +DROP TABLE foo; +-- Explicit Redistribute Motion should not be mistakenly elided for partitioned +-- tables. (test case not applicable to ORCA) +CREATE TABLE t1 (a int, b int) DISTRIBUTED BY (a) +PARTITION BY + range(b) (start(1) end(16) every(5)); +NOTICE: CREATE TABLE will create partition "t1_1_prt_1" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_2" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_3" for table "t1" +CREATE TABLE t2 (a int, b int) DISTRIBUTED BY (b) +PARTITION BY + range(a) (start(1) end(16) every(10), default partition def); +NOTICE: CREATE TABLE will create partition "t2_1_prt_def" for table "t2" +NOTICE: CREATE TABLE will create partition "t2_1_prt_2" for table "t2" +NOTICE: CREATE TABLE will create partition "t2_1_prt_3" for table "t2" +INSERT INTO t1 SELECT + generate_series(1, 4) * 3, generate_series(1, 4); +INSERT INTO t2 SELECT + generate_series(1, 4), generate_series(1, 4) * 3; +INSERT INTO t2 VALUES + (generate_series(7, 11), NULL); +EXPLAIN (costs off) +DELETE FROM t2 USING t1 WHERE t1.a = t2.a; + QUERY PLAN +------------------------------------------------------------------------ + Delete on t2_1_prt_def + -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) + -> Hash Join + Hash Cond: (t1_1_prt_1.a = t2_1_prt_def.a) + -> Append + -> Seq Scan on t1_1_prt_1 + -> Seq Scan on t1_1_prt_2 + -> Seq Scan on t1_1_prt_3 + -> Hash + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: t2_1_prt_def.a + -> Seq Scan on t2_1_prt_def + -> Explicit Redistribute Motion 3:3 (slice4; segments: 3) + -> Hash Join + Hash Cond: (t1_1_prt_1.a = t2_1_prt_2.a) + -> Append + -> Seq Scan on t1_1_prt_1 + -> Seq Scan on t1_1_prt_2 + -> Seq Scan on t1_1_prt_3 + -> Hash + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: t2_1_prt_2.a + -> Seq Scan on t2_1_prt_2 + -> Explicit Redistribute Motion 3:3 (slice6; segments: 3) + -> Hash Join + Hash Cond: (t1_1_prt_1.a = t2_1_prt_3.a) + -> Append + -> Seq Scan on t1_1_prt_1 + -> Seq Scan on t1_1_prt_2 + -> Seq Scan on t1_1_prt_3 + -> Hash + -> Redistribute Motion 3:3 (slice5; segments: 3) + Hash Key: t2_1_prt_3.a + -> Seq Scan on t2_1_prt_3 + Optimizer: Postgres query optimizer +(35 rows) + +DROP TABLE t1; +DROP TABLE t2; +-- Explicit Redistribute Motion should not be elided if we encounter a scan on +-- the same table that we are going to modify, but with different range table +-- index. (test case not applicable to ORCA) +CREATE TABLE t1 (a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE t2 (a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO t1 SELECT a, a FROM generate_series(1, 4) a; +INSERT INTO t2 SELECT a, a FROM generate_series(1, 16) a; +EXPLAIN (costs off) UPDATE t2 trg +SET b = src.b1 +FROM (SELECT t1.a AS a1, t1.b AS b1, t2.a AS a2, t2.b AS b2 FROM t1 JOIN t2 USING (b)) src +WHERE trg.a = src.a1 + AND trg.a = 2; + QUERY PLAN +--------------------------------------------------------------------------- + Update on t2 trg + -> Explicit Redistribute Motion 3:3 (slice3; segments: 3) + -> Nested Loop + -> Hash Join + Hash Cond: (t2.b = t1.b) + -> Seq Scan on t2 + -> Hash + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on t1 + Filter: (a = 2) + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on t2 trg + Filter: (a = 2) + Optimizer: Postgres query optimizer +(15 rows) + +-- Use Nested Loop to change left tree with the right tree, to swap the extra +-- scan we don't indend to detect with the real one. +SET enable_hashjoin = off; +SET enable_nestloop = on; +EXPLAIN (costs off) UPDATE t2 trg +SET b = src.b1 +FROM (SELECT t1.a AS a1, t1.b AS b1, t2.a AS a2, t2.b AS b2 FROM t1 JOIN t2 USING (b)) src +WHERE trg.a = src.a1 + AND trg.a = 2; + QUERY PLAN +--------------------------------------------------------------------------- + Update on t2 trg + -> Explicit Redistribute Motion 3:3 (slice3; segments: 3) + -> Nested Loop + -> Nested Loop + Join Filter: (t1.b = t2.b) + -> Seq Scan on t2 + -> Materialize + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on t1 + Filter: (a = 2) + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on t2 trg + Filter: (a = 2) + Optimizer: Postgres query optimizer +(15 rows) + +RESET enable_hashjoin; +RESET enable_nestloop; +DROP TABLE t1; +DROP TABLE t2; +-- Explicit Redistribute Motion should be elided for every partition that does +-- not have any motions above the scan on the table/partition we are going to +-- update. (test case not applicable to ORCA) +CREATE TABLE t1 (a int, b int, c int) DISTRIBUTED BY (b) + PARTITION BY RANGE(b) (start (1) end(5) every(1)); +NOTICE: CREATE TABLE will create partition "t1_1_prt_1" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_2" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_3" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_4" for table "t1" +CREATE TABLE t2 (a int, b int, c int) DISTRIBUTED BY (a); +INSERT INTO t1 SELECT i * 2, i, i * 3 FROM generate_series(1, 4) i; +INSERT INTO t2 SELECT i, i * 2, i * 3 FROM generate_series(1, 4) i; +-- These partitions will need to have Explicit Redistribute above them. +TRUNCATE t1_1_prt_1; +TRUNCATE t1_1_prt_3; +ANALYZE t1_1_prt_1; +ANALYZE t1_1_prt_3; +EXPLAIN (costs off) + UPDATE t1 SET c = t2.b FROM t2; + QUERY PLAN +--------------------------------------------------------------- + Update on t1_1_prt_1 + -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) + -> Nested Loop + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on t1_1_prt_1 + -> Materialize + -> Seq Scan on t2 + -> Nested Loop + -> Seq Scan on t1_1_prt_2 + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on t2 + -> Explicit Redistribute Motion 3:3 (slice5; segments: 3) + -> Nested Loop + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on t1_1_prt_3 + -> Materialize + -> Seq Scan on t2 + -> Nested Loop + -> Seq Scan on t1_1_prt_4 + -> Materialize + -> Broadcast Motion 3:3 (slice6; segments: 3) + -> Seq Scan on t2 + Optimizer: Postgres query optimizer +(24 rows) + +DROP TABLE t1; +DROP TABLE t2; +-- Explicit Redistribute Motion should not be elided if there's a Gather Motion +-- beneath the ModifyTable. (test case not applicable to ORCA) +CREATE TABLE t1 (a int) DISTRIBUTED BY (a); +INSERT INTO t1 SELECT i FROM generate_series(1, 4) i; +-- "USING pg_class" forces a Gather Motion. +EXPLAIN (costs off) +DELETE FROM t1 +USING pg_class; + QUERY PLAN +------------------------------------------------------------------ + Delete on t1 + -> Explicit Redistribute Motion 1:3 (slice2) + -> Nested Loop + -> Seq Scan on pg_class + -> Materialize + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(8 rows) + +DROP TABLE t1; -- -- text types. We should support the following updates. -- @@ -623,26 +991,24 @@ insert into t2_13265 values (2, null, 2, 2); explain (verbose, costs off) update t1_13265 set b = 2 where (c, d) not in (select c, d from t2_13265 where a = 2); - QUERY PLAN -------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------- Update on public.t1_13265 - -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) + -> Nested Loop Left Anti Semi (Not-In) Join Output: t1_13265.a, 2, t1_13265.c, t1_13265.d, t1_13265.ctid, t1_13265.gp_segment_id, t2_13265.ctid - -> Nested Loop Left Anti Semi (Not-In) Join - Output: t1_13265.a, 2, t1_13265.c, t1_13265.d, t1_13265.ctid, t1_13265.gp_segment_id, t2_13265.ctid - Join Filter: ((t1_13265.c = t2_13265.c) AND (t1_13265.d = t2_13265.d)) - -> Seq Scan on public.t1_13265 - Output: t1_13265.a, t1_13265.c, t1_13265.d, t1_13265.ctid, t1_13265.gp_segment_id - -> Materialize + Join Filter: ((t1_13265.c = t2_13265.c) AND (t1_13265.d = t2_13265.d)) + -> Seq Scan on public.t1_13265 + Output: t1_13265.a, t1_13265.c, t1_13265.d, t1_13265.ctid, t1_13265.gp_segment_id + -> Materialize + Output: t2_13265.ctid, t2_13265.c, t2_13265.d, (2) + -> Broadcast Motion 1:3 (slice1; segments: 1) Output: t2_13265.ctid, t2_13265.c, t2_13265.d, (2) - -> Broadcast Motion 3:3 (slice1; segments: 3) - Output: t2_13265.ctid, t2_13265.c, t2_13265.d, (2) - -> Seq Scan on public.t2_13265 - Output: t2_13265.ctid, t2_13265.c, t2_13265.d, 2 - Filter: (t2_13265.a = 2) + -> Seq Scan on public.t2_13265 + Output: t2_13265.ctid, t2_13265.c, t2_13265.d, 2 + Filter: (t2_13265.a = 2) Optimizer: Postgres query optimizer Settings: optimizer=off -(17 rows) +(15 rows) update t1_13265 set b = 2 where (c, d) not in (select c, d from t2_13265 where a = 2); @@ -678,43 +1044,39 @@ insert into from_table select i*1.5,i*2,i*3,'xx'||i,'yy'||i, i+1 from generate_s explain (costs off) update into_table set d=from_table.d, e=from_table.e, f=from_table.f from from_table where into_table.a=from_table.a and into_table.b=from_table.b and into_table.c=from_table.c; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------ Update on into_table_1_prt_1 - -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) - -> Hash Join - Hash Cond: ((into_table_1_prt_1.a = from_table.a) AND (into_table_1_prt_1.b = from_table.b) AND (into_table_1_prt_1.c = from_table.c)) - -> Seq Scan on into_table_1_prt_1 - -> Hash - -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: from_table.a, from_table.b, from_table.c - -> Seq Scan on from_table - -> Explicit Redistribute Motion 3:3 (slice4; segments: 3) - -> Hash Join - Hash Cond: ((into_table_1_prt_2.a = from_table.a) AND (into_table_1_prt_2.b = from_table.b) AND (into_table_1_prt_2.c = from_table.c)) - -> Seq Scan on into_table_1_prt_2 - -> Hash - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: from_table.a, from_table.b, from_table.c - -> Seq Scan on from_table - -> Explicit Redistribute Motion 3:3 (slice6; segments: 3) - -> Hash Join - Hash Cond: ((into_table_1_prt_3.a = from_table.a) AND (into_table_1_prt_3.b = from_table.b) AND (into_table_1_prt_3.c = from_table.c)) - -> Seq Scan on into_table_1_prt_3 - -> Hash - -> Redistribute Motion 3:3 (slice5; segments: 3) - Hash Key: from_table.a, from_table.b, from_table.c - -> Seq Scan on from_table - -> Explicit Redistribute Motion 3:3 (slice8; segments: 3) - -> Hash Join - Hash Cond: ((into_table_1_prt_4.a = from_table.a) AND (into_table_1_prt_4.b = from_table.b) AND (into_table_1_prt_4.c = from_table.c)) - -> Seq Scan on into_table_1_prt_4 - -> Hash - -> Redistribute Motion 3:3 (slice7; segments: 3) - Hash Key: from_table.a, from_table.b, from_table.c - -> Seq Scan on from_table + -> Hash Join + Hash Cond: ((into_table_1_prt_1.a = from_table.a) AND (into_table_1_prt_1.b = from_table.b) AND (into_table_1_prt_1.c = from_table.c)) + -> Seq Scan on into_table_1_prt_1 + -> Hash + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: from_table.a, from_table.b, from_table.c + -> Seq Scan on from_table + -> Hash Join + Hash Cond: ((into_table_1_prt_2.a = from_table.a) AND (into_table_1_prt_2.b = from_table.b) AND (into_table_1_prt_2.c = from_table.c)) + -> Seq Scan on into_table_1_prt_2 + -> Hash + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: from_table.a, from_table.b, from_table.c + -> Seq Scan on from_table + -> Hash Join + Hash Cond: ((into_table_1_prt_3.a = from_table.a) AND (into_table_1_prt_3.b = from_table.b) AND (into_table_1_prt_3.c = from_table.c)) + -> Seq Scan on into_table_1_prt_3 + -> Hash + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: from_table.a, from_table.b, from_table.c + -> Seq Scan on from_table + -> Hash Join + Hash Cond: ((into_table_1_prt_4.a = from_table.a) AND (into_table_1_prt_4.b = from_table.b) AND (into_table_1_prt_4.c = from_table.c)) + -> Seq Scan on into_table_1_prt_4 + -> Hash + -> Redistribute Motion 3:3 (slice4; segments: 3) + Hash Key: from_table.a, from_table.b, from_table.c + -> Seq Scan on from_table Optimizer: Postgres query optimizer -(34 rows) +(30 rows) -- start_matchsubs -- m/"into_table_1_prt_\d" to partition "into_table_1_prt_\d"/ diff --git a/src/test/regress/expected/update_gp_optimizer.out b/src/test/regress/expected/update_gp_optimizer.out index 3edebee2b0e1..bee8de0f2b42 100644 --- a/src/test/regress/expected/update_gp_optimizer.out +++ b/src/test/regress/expected/update_gp_optimizer.out @@ -82,8 +82,8 @@ NOTICE: merging column "a" with inherited definition NOTICE: merging column "b" with inherited definition insert into base_tbl select g, g from generate_series(1, 5) g; explain (costs off) update base_tbl set a=a+1; - QUERY PLAN ---------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------ Update on base_tbl -> Redistribute Motion 3:3 (slice1; segments: 3) Hash Key: ((base_tbl.a + 1)) @@ -93,10 +93,9 @@ explain (costs off) update base_tbl set a=a+1; Hash Key: ((child_a.a + 1)) -> Split -> Seq Scan on child_a - -> Explicit Redistribute Motion 3:3 (slice3; segments: 3) - -> Seq Scan on child_b + -> Seq Scan on child_b Optimizer: Postgres query optimizer -(12 rows) +(11 rows) update base_tbl set a = 5; -- @@ -214,6 +213,353 @@ DROP TABLE keo2; DROP TABLE keo3; DROP TABLE keo4; DROP TABLE keo5; +-- Explicit Redistribute Motion should be added only if there is a motion +-- between the scan and the ModifyTable on the relation we are going to modify. +-- (test case not applicable to ORCA) +CREATE TABLE t1 (i int, j int) DISTRIBUTED BY (i); +CREATE TABLE t2 (i int) DISTRIBUTED BY (i); +CREATE TABLE t_strewn (i int) DISTRIBUTED RANDOMLY; +CREATE TABLE t_strewn2 (i int) DISTRIBUTED RANDOMLY; +INSERT INTO t1 SELECT + generate_series(1, 4) * 3, generate_series(1, 4); +INSERT INTO t2 SELECT generate_series(1, 4) * 3; +INSERT INTO t_strewn SELECT generate_series(1, 16); +INSERT INTO t_strewn2 SELECT generate_series(2, 17); +EXPLAIN (costs off) +UPDATE t1 SET j = t_strewn.i FROM t_strewn WHERE t_strewn.i = t1.i; + QUERY PLAN +------------------------------------------------------------------------------ + Update + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: t1.i + -> Split + -> Hash Join + Hash Cond: (t1.i = t_strewn.i) + -> Seq Scan on t1 + -> Hash + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: t_strewn.i + -> Seq Scan on t_strewn + Optimizer: Pivotal Optimizer (GPORCA) +(12 rows) + +UPDATE t1 SET j = t_strewn.i FROM t_strewn WHERE t_strewn.i = t1.i +RETURNING *; + i | j | i +----+----+---- + 3 | 3 | 3 + 12 | 12 | 12 + 6 | 6 | 6 + 9 | 9 | 9 +(4 rows) + +EXPLAIN (costs off) +WITH CTE AS (DELETE FROM t1 RETURNING *) +SELECT count(*) AS a FROM t_strewn JOIN cte USING (i); + QUERY PLAN +--------------------------------------------------------------------------- + Aggregate + -> Gather Motion 3:1 (slice2; segments: 3) + -> Aggregate + -> Hash Join + Hash Cond: (t_strewn.i = cte.i) + -> Seq Scan on t_strewn + -> Hash + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Subquery Scan on cte + -> Delete on t1 + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(12 rows) + +WITH CTE AS (DELETE FROM t1 RETURNING *) +SELECT count(*) AS a FROM t_strewn JOIN cte USING (i); + a +--- + 4 +(1 row) + +EXPLAIN (costs off) +DELETE FROM t_strewn WHERE t_strewn.i = (SELECT t2.i FROM t2 WHERE t_strewn.i = t2.i); + QUERY PLAN +----------------------------------------------------------------------------- + Delete + -> Result + -> Seq Scan on t_strewn + Filter: (i = (SubPlan 1)) + SubPlan 1 (slice0; segments: 3) + -> Result + Filter: (t_strewn.i = t2.i) + -> Materialize + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on t2 + Optimizer: Pivotal Optimizer (GPORCA) +(11 rows) + +DELETE FROM t_strewn WHERE t_strewn.i = (SELECT t2.i FROM t2 WHERE t_strewn.i = t2.i) +RETURNING *; + i +---- + 6 + 3 + 9 + 12 +(4 rows) + +EXPLAIN (costs off) +UPDATE t_strewn SET i = t_strewn2.i +FROM t_strewn2 WHERE t_strewn.i = t_strewn2.i; + QUERY PLAN +------------------------------------------------------------------------------ + Update + -> Explicit Redistribute Motion 3:3 (slice3; segments: 3) + -> Split + -> Hash Join + Hash Cond: (t_strewn.i = t_strewn2.i) + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: t_strewn.i + -> Seq Scan on t_strewn + -> Hash + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: t_strewn2.i + -> Seq Scan on t_strewn2 + Optimizer: Pivotal Optimizer (GPORCA) +(13 rows) + +UPDATE t_strewn SET i = t_strewn2.i +FROM t_strewn2 WHERE t_strewn.i = t_strewn2.i +RETURNING *; + i | i +----+---- + 14 | 14 + 16 | 16 + 5 | 5 + 10 | 10 + 7 | 7 + 8 | 8 + 11 | 11 + 13 | 13 + 15 | 15 + 2 | 2 + 4 | 4 +(11 rows) + +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t_strewn; +DROP TABLE t_strewn2; +-- Explicit Redistribute Motion should not be mistakenly elided for inherited +-- tables. (test case not applicable to ORCA) +CREATE TABLE i (i int, j int) DISTRIBUTED BY (i); +INSERT INTO i SELECT + generate_series(1, 16), generate_series(1, 16) * 3; +CREATE TABLE foo (f1 serial, f2 text, f3 int) DISTRIBUTED RANDOMLY; +INSERT INTO foo (f2, f3) + VALUES ('first', 1), ('second', 2), ('third', 3); +CREATE TABLE foochild (fc int) INHERITS (foo); +NOTICE: table has parent, setting distribution columns to match parent table +INSERT INTO foochild + VALUES(123, 'child', 999, -123); +EXPLAIN (costs off) +DELETE FROM foo + USING i + WHERE foo.f1 = i.j; + QUERY PLAN +--------------------------------------------------------------------- + Delete on foo + -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) + -> Hash Join + Hash Cond: (i.j = foo.f1) + -> Seq Scan on i + -> Hash + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on foo + -> Explicit Redistribute Motion 3:3 (slice4; segments: 3) + -> Hash Join + Hash Cond: (i.j = foochild.f1) + -> Seq Scan on i + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on foochild + Optimizer: Postgres query optimizer +(16 rows) + +DROP TABLE i; +DROP TABLE foochild; +DROP TABLE foo; +-- Explicit Redistribute Motion should not be mistakenly elided for partitioned +-- tables. (test case not applicable to ORCA) +CREATE TABLE t1 (a int, b int) DISTRIBUTED BY (a) +PARTITION BY + range(b) (start(1) end(16) every(5)); +NOTICE: CREATE TABLE will create partition "t1_1_prt_1" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_2" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_3" for table "t1" +CREATE TABLE t2 (a int, b int) DISTRIBUTED BY (b) +PARTITION BY + range(a) (start(1) end(16) every(10), default partition def); +NOTICE: CREATE TABLE will create partition "t2_1_prt_def" for table "t2" +NOTICE: CREATE TABLE will create partition "t2_1_prt_2" for table "t2" +NOTICE: CREATE TABLE will create partition "t2_1_prt_3" for table "t2" +INSERT INTO t1 SELECT + generate_series(1, 4) * 3, generate_series(1, 4); +INSERT INTO t2 SELECT + generate_series(1, 4), generate_series(1, 4) * 3; +INSERT INTO t2 VALUES + (generate_series(7, 11), NULL); +EXPLAIN (costs off) +DELETE FROM t2 USING t1 WHERE t1.a = t2.a; + QUERY PLAN +------------------------------------------------------------------------------------ + Delete + -> Result + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: t2.b + -> Hash Join + Hash Cond: (t2.a = t1.a) + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: t2.a + -> Sequence + -> Partition Selector for t2 (dynamic scan id: 1) + Partitions selected: 3 (out of 3) + -> Dynamic Seq Scan on t2 (dynamic scan id: 1) + -> Hash + -> Sequence + -> Partition Selector for t1 (dynamic scan id: 2) + Partitions selected: 3 (out of 3) + -> Dynamic Seq Scan on t1 (dynamic scan id: 2) + Optimizer: Pivotal Optimizer (GPORCA) +(18 rows) + +DROP TABLE t1; +DROP TABLE t2; +-- Explicit Redistribute Motion should not be elided if we encounter a scan on +-- the same table that we are going to modify, but with different range table +-- index. (test case not applicable to ORCA) +CREATE TABLE t1 (a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE t2 (a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO t1 SELECT a, a FROM generate_series(1, 4) a; +INSERT INTO t2 SELECT a, a FROM generate_series(1, 16) a; +EXPLAIN (costs off) UPDATE t2 trg +SET b = src.b1 +FROM (SELECT t1.a AS a1, t1.b AS b1, t2.a AS a2, t2.b AS b2 FROM t1 JOIN t2 USING (b)) src +WHERE trg.a = src.a1 + AND trg.a = 2; + QUERY PLAN +--------------------------------------------------------------------------------------- + Update + -> Split + -> Hash Join + Hash Cond: (t2.a = t1.a) + -> Seq Scan on t2 + Filter: (a = 2) + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Hash Join + Hash Cond: (t2_1.b = t1.b) + -> Seq Scan on t2 t2_1 + -> Hash + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on t1 + Filter: (a = 2) + Optimizer: Pivotal Optimizer (GPORCA) +(16 rows) + +-- Use Nested Loop to change left tree with the right tree, to swap the extra +-- scan we don't indend to detect with the real one. +SET enable_hashjoin = off; +SET enable_nestloop = on; +EXPLAIN (costs off) UPDATE t2 trg +SET b = src.b1 +FROM (SELECT t1.a AS a1, t1.b AS b1, t2.a AS a2, t2.b AS b2 FROM t1 JOIN t2 USING (b)) src +WHERE trg.a = src.a1 + AND trg.a = 2; + QUERY PLAN +--------------------------------------------------------------------------------------- + Update + -> Split + -> Hash Join + Hash Cond: (t2.a = t1.a) + -> Seq Scan on t2 + Filter: (a = 2) + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Hash Join + Hash Cond: (t2_1.b = t1.b) + -> Seq Scan on t2 t2_1 + -> Hash + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on t1 + Filter: (a = 2) + Optimizer: Pivotal Optimizer (GPORCA) +(16 rows) + +RESET enable_hashjoin; +RESET enable_nestloop; +DROP TABLE t1; +DROP TABLE t2; +-- Explicit Redistribute Motion should be elided for every partition that does +-- not have any motions above the scan on the table/partition we are going to +-- update. (test case not applicable to ORCA) +CREATE TABLE t1 (a int, b int, c int) DISTRIBUTED BY (b) + PARTITION BY RANGE(b) (start (1) end(5) every(1)); +NOTICE: CREATE TABLE will create partition "t1_1_prt_1" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_2" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_3" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_4" for table "t1" +CREATE TABLE t2 (a int, b int, c int) DISTRIBUTED BY (a); +INSERT INTO t1 SELECT i * 2, i, i * 3 FROM generate_series(1, 4) i; +INSERT INTO t2 SELECT i, i * 2, i * 3 FROM generate_series(1, 4) i; +-- These partitions will need to have Explicit Redistribute above them. +TRUNCATE t1_1_prt_1; +TRUNCATE t1_1_prt_3; +ANALYZE t1_1_prt_1; +ANALYZE t1_1_prt_3; +EXPLAIN (costs off) + UPDATE t1 SET c = t2.b FROM t2; + QUERY PLAN +------------------------------------------------------------------------ + Update + -> Split + -> Nested Loop + Join Filter: true + -> Sequence + -> Partition Selector for t1 (dynamic scan id: 1) + Partitions selected: 4 (out of 4) + -> Dynamic Seq Scan on t1 (dynamic scan id: 1) + -> Materialize + -> Broadcast Motion 3:3 (slice1; segments: 3) + -> Seq Scan on t2 + Optimizer: Pivotal Optimizer (GPORCA) +(12 rows) + +DROP TABLE t1; +DROP TABLE t2; +-- Explicit Redistribute Motion should not be elided if there's a Gather Motion +-- beneath the ModifyTable. (test case not applicable to ORCA) +CREATE TABLE t1 (a int) DISTRIBUTED BY (a); +INSERT INTO t1 SELECT i FROM generate_series(1, 4) i; +-- "USING pg_class" forces a Gather Motion. +EXPLAIN (costs off) +DELETE FROM t1 +USING pg_class; + QUERY PLAN +------------------------------------------------------------------ + Delete on t1 + -> Explicit Redistribute Motion 1:3 (slice2) + -> Nested Loop + -> Seq Scan on pg_class + -> Materialize + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1 + Optimizer: Postgres query optimizer +(8 rows) + +DROP TABLE t1; -- -- text types. We should support the following updates. -- @@ -624,26 +970,24 @@ insert into t2_13265 values (2, null, 2, 2); explain (verbose, costs off) update t1_13265 set b = 2 where (c, d) not in (select c, d from t2_13265 where a = 2); - QUERY PLAN -------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------- Update on public.t1_13265 - -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) + -> Nested Loop Left Anti Semi (Not-In) Join Output: t1_13265.a, 2, t1_13265.c, t1_13265.d, t1_13265.ctid, t1_13265.gp_segment_id, t2_13265.ctid - -> Nested Loop Left Anti Semi (Not-In) Join - Output: t1_13265.a, 2, t1_13265.c, t1_13265.d, t1_13265.ctid, t1_13265.gp_segment_id, t2_13265.ctid - Join Filter: ((t1_13265.c = t2_13265.c) AND (t1_13265.d = t2_13265.d)) - -> Seq Scan on public.t1_13265 - Output: t1_13265.a, t1_13265.c, t1_13265.d, t1_13265.ctid, t1_13265.gp_segment_id - -> Materialize + Join Filter: ((t1_13265.c = t2_13265.c) AND (t1_13265.d = t2_13265.d)) + -> Seq Scan on public.t1_13265 + Output: t1_13265.a, t1_13265.c, t1_13265.d, t1_13265.ctid, t1_13265.gp_segment_id + -> Materialize + Output: t2_13265.ctid, t2_13265.c, t2_13265.d, (2) + -> Broadcast Motion 1:3 (slice1; segments: 1) Output: t2_13265.ctid, t2_13265.c, t2_13265.d, (2) - -> Broadcast Motion 3:3 (slice1; segments: 3) - Output: t2_13265.ctid, t2_13265.c, t2_13265.d, (2) - -> Seq Scan on public.t2_13265 - Output: t2_13265.ctid, t2_13265.c, t2_13265.d, 2 - Filter: (t2_13265.a = 2) + -> Seq Scan on public.t2_13265 + Output: t2_13265.ctid, t2_13265.c, t2_13265.d, 2 + Filter: (t2_13265.a = 2) Optimizer: Postgres query optimizer Settings: optimizer=on -(17 rows) +(15 rows) update t1_13265 set b = 2 where (c, d) not in (select c, d from t2_13265 where a = 2); diff --git a/src/test/regress/sql/update_gp.sql b/src/test/regress/sql/update_gp.sql index f387738153d3..8074176900e2 100644 --- a/src/test/regress/sql/update_gp.sql +++ b/src/test/regress/sql/update_gp.sql @@ -117,6 +117,180 @@ DROP TABLE keo3; DROP TABLE keo4; DROP TABLE keo5; +-- Explicit Redistribute Motion should be added only if there is a motion +-- between the scan and the ModifyTable on the relation we are going to modify. +-- (test case not applicable to ORCA) +-- start_ignore +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t_strewn; +DROP TABLE IF EXISTS t_strewn2; +-- end_ignore + +CREATE TABLE t1 (i int, j int) DISTRIBUTED BY (i); +CREATE TABLE t2 (i int) DISTRIBUTED BY (i); +CREATE TABLE t_strewn (i int) DISTRIBUTED RANDOMLY; +CREATE TABLE t_strewn2 (i int) DISTRIBUTED RANDOMLY; + +INSERT INTO t1 SELECT + generate_series(1, 4) * 3, generate_series(1, 4); +INSERT INTO t2 SELECT generate_series(1, 4) * 3; +INSERT INTO t_strewn SELECT generate_series(1, 16); +INSERT INTO t_strewn2 SELECT generate_series(2, 17); + +EXPLAIN (costs off) +UPDATE t1 SET j = t_strewn.i FROM t_strewn WHERE t_strewn.i = t1.i; + +UPDATE t1 SET j = t_strewn.i FROM t_strewn WHERE t_strewn.i = t1.i +RETURNING *; + +EXPLAIN (costs off) +WITH CTE AS (DELETE FROM t1 RETURNING *) +SELECT count(*) AS a FROM t_strewn JOIN cte USING (i); + +WITH CTE AS (DELETE FROM t1 RETURNING *) +SELECT count(*) AS a FROM t_strewn JOIN cte USING (i); + +EXPLAIN (costs off) +DELETE FROM t_strewn WHERE t_strewn.i = (SELECT t2.i FROM t2 WHERE t_strewn.i = t2.i); + +DELETE FROM t_strewn WHERE t_strewn.i = (SELECT t2.i FROM t2 WHERE t_strewn.i = t2.i) +RETURNING *; + +EXPLAIN (costs off) +UPDATE t_strewn SET i = t_strewn2.i +FROM t_strewn2 WHERE t_strewn.i = t_strewn2.i; + +UPDATE t_strewn SET i = t_strewn2.i +FROM t_strewn2 WHERE t_strewn.i = t_strewn2.i +RETURNING *; + +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t_strewn; +DROP TABLE t_strewn2; + +-- Explicit Redistribute Motion should not be mistakenly elided for inherited +-- tables. (test case not applicable to ORCA) +-- start_ignore +DROP TABLE IF EXISTS i; +DROP TABLE IF EXISTS foochild; +DROP TABLE IF EXISTS foo; +-- end_ignore + +CREATE TABLE i (i int, j int) DISTRIBUTED BY (i); +INSERT INTO i SELECT + generate_series(1, 16), generate_series(1, 16) * 3; + +CREATE TABLE foo (f1 serial, f2 text, f3 int) DISTRIBUTED RANDOMLY; +INSERT INTO foo (f2, f3) + VALUES ('first', 1), ('second', 2), ('third', 3); + +CREATE TABLE foochild (fc int) INHERITS (foo); +INSERT INTO foochild + VALUES(123, 'child', 999, -123); + +EXPLAIN (costs off) +DELETE FROM foo + USING i + WHERE foo.f1 = i.j; + +DROP TABLE i; +DROP TABLE foochild; +DROP TABLE foo; + +-- Explicit Redistribute Motion should not be mistakenly elided for partitioned +-- tables. (test case not applicable to ORCA) +CREATE TABLE t1 (a int, b int) DISTRIBUTED BY (a) +PARTITION BY + range(b) (start(1) end(16) every(5)); + +CREATE TABLE t2 (a int, b int) DISTRIBUTED BY (b) +PARTITION BY + range(a) (start(1) end(16) every(10), default partition def); + +INSERT INTO t1 SELECT + generate_series(1, 4) * 3, generate_series(1, 4); +INSERT INTO t2 SELECT + generate_series(1, 4), generate_series(1, 4) * 3; +INSERT INTO t2 VALUES + (generate_series(7, 11), NULL); + +EXPLAIN (costs off) +DELETE FROM t2 USING t1 WHERE t1.a = t2.a; + +DROP TABLE t1; +DROP TABLE t2; + +-- Explicit Redistribute Motion should not be elided if we encounter a scan on +-- the same table that we are going to modify, but with different range table +-- index. (test case not applicable to ORCA) +CREATE TABLE t1 (a int, b int); +CREATE TABLE t2 (a int, b int); + +INSERT INTO t1 SELECT a, a FROM generate_series(1, 4) a; +INSERT INTO t2 SELECT a, a FROM generate_series(1, 16) a; + +EXPLAIN (costs off) UPDATE t2 trg +SET b = src.b1 +FROM (SELECT t1.a AS a1, t1.b AS b1, t2.a AS a2, t2.b AS b2 FROM t1 JOIN t2 USING (b)) src +WHERE trg.a = src.a1 + AND trg.a = 2; + +-- Use Nested Loop to change left tree with the right tree, to swap the extra +-- scan we don't indend to detect with the real one. +SET enable_hashjoin = off; +SET enable_nestloop = on; + +EXPLAIN (costs off) UPDATE t2 trg +SET b = src.b1 +FROM (SELECT t1.a AS a1, t1.b AS b1, t2.a AS a2, t2.b AS b2 FROM t1 JOIN t2 USING (b)) src +WHERE trg.a = src.a1 + AND trg.a = 2; + +RESET enable_hashjoin; +RESET enable_nestloop; + +DROP TABLE t1; +DROP TABLE t2; + +-- Explicit Redistribute Motion should be elided for every partition that does +-- not have any motions above the scan on the table/partition we are going to +-- update. (test case not applicable to ORCA) +CREATE TABLE t1 (a int, b int, c int) DISTRIBUTED BY (b) + PARTITION BY RANGE(b) (start (1) end(5) every(1)); + +CREATE TABLE t2 (a int, b int, c int) DISTRIBUTED BY (a); + +INSERT INTO t1 SELECT i * 2, i, i * 3 FROM generate_series(1, 4) i; +INSERT INTO t2 SELECT i, i * 2, i * 3 FROM generate_series(1, 4) i; + +-- These partitions will need to have Explicit Redistribute above them. +TRUNCATE t1_1_prt_1; +TRUNCATE t1_1_prt_3; + +ANALYZE t1_1_prt_1; +ANALYZE t1_1_prt_3; + +EXPLAIN (costs off) + UPDATE t1 SET c = t2.b FROM t2; + +DROP TABLE t1; +DROP TABLE t2; + +-- Explicit Redistribute Motion should not be elided if there's a Gather Motion +-- beneath the ModifyTable. (test case not applicable to ORCA) +CREATE TABLE t1 (a int) DISTRIBUTED BY (a); + +INSERT INTO t1 SELECT i FROM generate_series(1, 4) i; + +-- "USING pg_class" forces a Gather Motion. +EXPLAIN (costs off) +DELETE FROM t1 +USING pg_class; + +DROP TABLE t1; + -- -- text types. We should support the following updates. --