Skip to content

Commit

Permalink
opt: add rule to decorrelate unions in EXISTS subqueries
Browse files Browse the repository at this point in the history
This commit adds a new rule `TryDecorrelateUnion`, which matches on a
`Union` or `UnionAll` operator in the input of a `ScalarGroupBy`. The
`ScalarGroupBy` must have "any-not-null" semantics, meaning it produces
an arbitrary non-null value from each input column.

If these conditions are satisfied, the `Union` operator is replaced by an
`InnerJoin` between two `ScalarGroupBy` operators. A `Project` coalesces
columns from each side of the join to produce the final aggregated values.

This transformation does not itself decorrelate the `Union` operators, but
it does make it easier for other rules to do so.

Release note: None

Epic: None
  • Loading branch information
DrewKimball committed Oct 2, 2024
1 parent 5bc3c1e commit 1ec3568
Show file tree
Hide file tree
Showing 3 changed files with 438 additions and 0 deletions.
29 changes: 29 additions & 0 deletions pkg/sql/opt/norm/decorrelate_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1537,3 +1537,32 @@ func getSubstituteColsSetOp(set memo.RelExpr, substituteCols opt.ColSet) opt.Col
}
return newSubstituteCols
}

// MakeCoalesceProjectionsForUnion builds a series of projections that coalesce
// columns from the left and right inputs of a union, projecting the result
// using the union operator's output columns.
func (c *CustomFuncs) MakeCoalesceProjectionsForUnion(
setPrivate *memo.SetPrivate,
) memo.ProjectionsExpr {
projections := make(memo.ProjectionsExpr, len(setPrivate.OutCols))
for i := range setPrivate.OutCols {
projections[i] = c.f.ConstructProjectionsItem(
c.f.ConstructCoalesce(memo.ScalarListExpr{
c.f.ConstructVariable(setPrivate.LeftCols[i]),
c.f.ConstructVariable(setPrivate.RightCols[i]),
}),
setPrivate.OutCols[i],
)
}
return projections
}

// MakeAnyNotNullScalarGroupBy wraps the input expression in a ScalarGroupBy
// that aggregates the input columns with AnyNotNull functions.
func (c *CustomFuncs) MakeAnyNotNullScalarGroupBy(input memo.RelExpr) memo.RelExpr {
return c.f.ConstructScalarGroupBy(
input,
c.MakeAggCols(opt.AnyNotNullAggOp, input.Relational().OutputCols),
memo.EmptyGroupingPrivate,
)
}
64 changes: 64 additions & 0 deletions pkg/sql/opt/norm/rules/decorrelate.opt
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,70 @@
(OutputCols2 $left $right)
)

# TryDecorrelateUnion replaces a Union/UnionAll beneath a ScalarGroupBy with a
# cross-join (InnerJoin on True) between two ScalarGroupBy operators. A Project
# operator coalesces columns from each join input to produce the final result.
# This transformation applies when the ScalarGroupBy has only "any-not-null"
# aggregations, which select an arbitrary non-null value from the input column.
#
# Here's a simplified example:
#
# scalar-group-by
# ├── union-all
# │ ├── scan foo
# │ └── scan bar (has-outer-cols)
# └── aggregations
# └── any-not-null
# =>
# project
# ├── inner-join (cross)
# │ ├── scalar-group-by
# │ │ └── scan foo
# │ ├── scalar-group-by
# │ │ └── scan bar
# │ └── filters (true)
# └── projections
# └── coalesce
#
# This situation occurs after a correlated EXISTS subquery containing a Union is
# hoisted. Note that TryDecorrelateUnion does not itself decorrelate the Union,
# but makes it easier for other rules to do so.
#
# NOTE: the outer Project operator is necessary just in case the ScalarGroupBy
# is synthesizing new columns, despite using any-not-null aggregations.
# NOTE: TryDecorrelateUnion should be ordered before TryDecorrelateScalarGroupBy
# to ensure that Union operators have a chance to be decorrelated.
#
# TODO(drewk): We could extend this rule to apply to other aggregations; for
# example, for a count() we can sum the counts taken on each side of the join.
# TODO(drewk): We could extend this rule to handle other set operations. For
# example, ExceptAll could become an AntiJoin.
[TryDecorrelateUnion, Normalize]
(ScalarGroupBy
$input:(Union | UnionAll $left:* $right:* $unionPrivate:*) &
(HasOuterCols $input)
$aggs:* & (AreAllAnyNotNullAggs $aggs)
$private:*
)
=>
(Project
(Project
(InnerJoin
(MakeAnyNotNullScalarGroupBy $left)
(MakeAnyNotNullScalarGroupBy $right)
[]
(EmptyJoinPrivate)
)
(MakeCoalesceProjectionsForUnion $unionPrivate)
(MakeEmptyColSet)
)
(ConvertAnyNotNullAggsToProjections $aggs)
(IntersectionCols
(GroupingOutputCols $private $aggs)
(OutputCols $input)
)
)

# TryDecorrelateScalarGroupBy "pushes down" a Join into a ScalarGroupBy
# operator, in an attempt to keep "digging" down to find and eliminate
# unnecessary correlation. The eventual hope is to trigger the DecorrelateJoin
Expand Down
Loading

0 comments on commit 1ec3568

Please sign in to comment.