Skip to content

Commit

Permalink
Fix issue #7674 related to Indirection in UPDATE SET()
Browse files Browse the repository at this point in the history
ruleutils in Citus is based on PostgreSQL source code, but in PostgreSQL
ruleutils is not used at the planner stage.
For instance, it is assumed after parser that targetList are ordered as
they were read, but it's not true after rewriter, the resulting rewrite
tree is then provided to planner (and citus), but the ordering of the list
is not granted anymore.

It's similar to others previous issues reported and still open, as well
as to other bugfixes/improvment over time, the most noticable being the
ProcessIndirection() which is for domain and similar.

However, the implications of this bug are huge for users of `UPDATE SET
(...)`:

1. if you used to order by columns order, you're maybe safe: `SET (col1,
   col2, col3, ...)`
2. if you used not to order by column order: `SET (col2, col1, col3,
  ...)` then you probably found a problem, or you have one.

Note about 1. that despite appearance and your QA, you are at risk: if
physical columns ordering is changed (for example after DROPping/ADDing
some), the same query which use to apparently works well will silently
update other columns...

As it is this code is not optimized for performance, not sure it'll be
needed.
  • Loading branch information
c2main committed Nov 19, 2024
1 parent 85851c4 commit 73f4f67
Show file tree
Hide file tree
Showing 3 changed files with 459 additions and 0 deletions.
153 changes: 153 additions & 0 deletions src/backend/distributed/deparser/ruleutils_14.c
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,9 @@ static void get_tablesample_def(TableSampleClause *tablesample,
deparse_context *context);
static void get_opclass_name(Oid opclass, Oid actual_datatype,
StringInfo buf);
static bool is_update_set_with_multiple_columns(List *targetList);
static List *processTargetsIndirection(List *targetList);
static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func);
static Node *processIndirection(Node *node, deparse_context *context);
static void printSubscripts(SubscriptingRef *aref, deparse_context *context);
static char *get_relation_name(Oid relid);
Expand Down Expand Up @@ -3464,6 +3467,9 @@ get_update_query_targetlist_def(Query *query, List *targetList,
}
}
}
if (is_update_set_with_multiple_columns(targetList))
targetList = processTargetsIndirection(targetList);

next_ma_cell = list_head(ma_sublinks);
cur_ma_sublink = NULL;
remaining_ma_columns = 0;
Expand Down Expand Up @@ -8101,6 +8107,153 @@ get_opclass_name(Oid opclass, Oid actual_datatype,
ReleaseSysCache(ht_opc);
}

/*
* helper function to evaluate if we are in an SET (...)
* Caller is responsible to check the command type (UPDATE)
*/
static bool is_update_set_with_multiple_columns(List *targetList)
{
ListCell *lc;
foreach(lc, targetList) {
TargetEntry *tle = (TargetEntry *) lfirst(lc);
Node *expr;

if (tle->resjunk)
continue;

expr = strip_implicit_coercions((Node *) tle->expr);

if (expr && IsA(expr, Param) &&
((Param *) expr)->paramkind == PARAM_MULTIEXPR)
{
return true;
}
}

// No multi-column set expression found
return false;
}

/*
* processTargetsIndirection - reorder targets list (from indirection)
*
* We don't change anything but the order the target list.
* The purpose here is to be able to deparse a query tree as if it was
* provided by the PostgreSQL parser, not the rewriter (which is the one
* received by the planner hook).
*
* It's required only for UPDATE SET (MULTIEXPR) queries, other candidates
* are not supported by Citus.
*
* Returns the new target list, reordered.
*/
static List *processTargetsIndirection(List *targetList)
{
int nAssignableCols;
int targetListPosition;
bool sawJunk = false;
List *newTargetList = NIL;
ListCell *lc;

/* Count non-junk columns and ensure they precede junk columns */
nAssignableCols = 0;
foreach(lc, targetList)
{
TargetEntry *tle = lfirst_node(TargetEntry, lc);

if (tle->resjunk)
{
sawJunk = true;
}
else
{
if (sawJunk)
elog(ERROR, "Subplan target list is out of order");

nAssignableCols++;
}
}

/* If no assignable columns, return the original target list */
if (nAssignableCols == 0)
return targetList;

/* Reorder the target list */
/* we start from 1 */
targetListPosition = 1;
while (nAssignableCols > 0)
{
nAssignableCols--;

foreach(lc, targetList)
{
TargetEntry *tle = lfirst_node(TargetEntry, lc);

if (IsA(tle->expr, FuncExpr))
{
FuncExpr *funcexpr = (FuncExpr *) tle->expr;
AttrNumber attnum = extract_paramid_from_funcexpr(funcexpr);

if (attnum == targetListPosition)
{
ereport(DEBUG1, (errmsg("Adding FuncExpr resno: %d", tle->resno)));
newTargetList = lappend(newTargetList, tle);
targetListPosition++;
break;
}
}
else if (IsA(tle->expr, Param))
{
Param *param = (Param *) tle->expr;
AttrNumber attnum = param->paramid;

if (attnum == targetListPosition)
{
newTargetList = lappend(newTargetList, tle);
targetListPosition++;
break;
}
}
}
}

// TODO add check about what we did here ?

/* Append any remaining junk columns */
foreach(lc, targetList)
{
TargetEntry *tle = lfirst_node(TargetEntry, lc);
if (tle->resjunk)
newTargetList = lappend(newTargetList, tle);
}

return newTargetList;
}

/* Function to extract paramid from a FuncExpr node */
static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func)
{
AttrNumber targetAttnum = InvalidAttrNumber;
ListCell *lc;

/* Iterate through the arguments of the FuncExpr */
foreach(lc, func->args)
{
Node *arg = (Node *) lfirst(lc);

/* Check if the argument is a PARAM node */
if (IsA(arg, Param))
{
Param *param = (Param *) arg;
targetAttnum = param->paramid;

break; // Exit loop once we find the PARAM node
}
}

return targetAttnum;
}

/*
* processIndirection - take care of array and subfield assignment
*
Expand Down
153 changes: 153 additions & 0 deletions src/backend/distributed/deparser/ruleutils_15.c
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,9 @@ static void get_tablesample_def(TableSampleClause *tablesample,
deparse_context *context);
static void get_opclass_name(Oid opclass, Oid actual_datatype,
StringInfo buf);
static bool is_update_set_with_multiple_columns(List *targetList);
static List *processTargetsIndirection(List *targetList);
static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func);
static Node *processIndirection(Node *node, deparse_context *context);
static void printSubscripts(SubscriptingRef *aref, deparse_context *context);
static char *get_relation_name(Oid relid);
Expand Down Expand Up @@ -3529,6 +3532,9 @@ get_update_query_targetlist_def(Query *query, List *targetList,
}
}
}
if (is_update_set_with_multiple_columns(targetList))
targetList = processTargetsIndirection(targetList);

next_ma_cell = list_head(ma_sublinks);
cur_ma_sublink = NULL;
remaining_ma_columns = 0;
Expand Down Expand Up @@ -8331,6 +8337,153 @@ get_opclass_name(Oid opclass, Oid actual_datatype,
ReleaseSysCache(ht_opc);
}

/*
* helper function to evaluate if we are in an SET (...)
* Caller is responsible to check the command type (UPDATE)
*/
static bool is_update_set_with_multiple_columns(List *targetList)
{
ListCell *lc;
foreach(lc, targetList) {
TargetEntry *tle = (TargetEntry *) lfirst(lc);
Node *expr;

if (tle->resjunk)
continue;

expr = strip_implicit_coercions((Node *) tle->expr);

if (expr && IsA(expr, Param) &&
((Param *) expr)->paramkind == PARAM_MULTIEXPR)
{
return true;
}
}

// No multi-column set expression found
return false;
}

/*
* processTargetsIndirection - reorder targets list (from indirection)
*
* We don't change anything but the order the target list.
* The purpose here is to be able to deparse a query tree as if it was
* provided by the PostgreSQL parser, not the rewriter (which is the one
* received by the planner hook).
*
* It's required only for UPDATE SET (MULTIEXPR) queries, other candidates
* are not supported by Citus.
*
* Returns the new target list, reordered.
*/
static List *processTargetsIndirection(List *targetList)
{
int nAssignableCols;
int targetListPosition;
bool sawJunk = false;
List *newTargetList = NIL;
ListCell *lc;

/* Count non-junk columns and ensure they precede junk columns */
nAssignableCols = 0;
foreach(lc, targetList)
{
TargetEntry *tle = lfirst_node(TargetEntry, lc);

if (tle->resjunk)
{
sawJunk = true;
}
else
{
if (sawJunk)
elog(ERROR, "Subplan target list is out of order");

nAssignableCols++;
}
}

/* If no assignable columns, return the original target list */
if (nAssignableCols == 0)
return targetList;

/* Reorder the target list */
/* we start from 1 */
targetListPosition = 1;
while (nAssignableCols > 0)
{
nAssignableCols--;

foreach(lc, targetList)
{
TargetEntry *tle = lfirst_node(TargetEntry, lc);

if (IsA(tle->expr, FuncExpr))
{
FuncExpr *funcexpr = (FuncExpr *) tle->expr;
AttrNumber attnum = extract_paramid_from_funcexpr(funcexpr);

if (attnum == targetListPosition)
{
ereport(DEBUG1, (errmsg("Adding FuncExpr resno: %d", tle->resno)));
newTargetList = lappend(newTargetList, tle);
targetListPosition++;
break;
}
}
else if (IsA(tle->expr, Param))
{
Param *param = (Param *) tle->expr;
AttrNumber attnum = param->paramid;

if (attnum == targetListPosition)
{
newTargetList = lappend(newTargetList, tle);
targetListPosition++;
break;
}
}
}
}

// TODO add check about what we did here ?

/* Append any remaining junk columns */
foreach(lc, targetList)
{
TargetEntry *tle = lfirst_node(TargetEntry, lc);
if (tle->resjunk)
newTargetList = lappend(newTargetList, tle);
}

return newTargetList;
}

/* Function to extract paramid from a FuncExpr node */
static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func)
{
AttrNumber targetAttnum = InvalidAttrNumber;
ListCell *lc;

/* Iterate through the arguments of the FuncExpr */
foreach(lc, func->args)
{
Node *arg = (Node *) lfirst(lc);

/* Check if the argument is a PARAM node */
if (IsA(arg, Param))
{
Param *param = (Param *) arg;
targetAttnum = param->paramid;

break; // Exit loop once we find the PARAM node
}
}

return targetAttnum;
}

/*
* processIndirection - take care of array and subfield assignment
*
Expand Down
Loading

0 comments on commit 73f4f67

Please sign in to comment.