Skip to content

Commit

Permalink
[feature](Nereids): Pushdown TopN-Distinct through Union (apache#27628)
Browse files Browse the repository at this point in the history
```
  TopN-Distinct
  -> Union All
  -> child plan1
  -> child plan2
  -> child plan3
 
  rewritten to
 
  TopN-Distinct
  -> Union All
    -> TopN-Distinct
      -> child plan1
    -> TopN-Distinct
      -> child plan2
    -> TopN-Distinct
      -> child plan3
```
  • Loading branch information
jackwener authored Nov 28, 2023
1 parent 2ea1e9d commit 91f56ce
Show file tree
Hide file tree
Showing 7 changed files with 530 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,10 @@ public enum RuleType {
PUSH_DOWN_TOP_N_THROUGH_PROJECT_WINDOW(RuleTypeClass.REWRITE),
PUSH_DOWN_TOP_N_THROUGH_WINDOW(RuleTypeClass.REWRITE),
PUSH_DOWN_TOP_N_THROUGH_UNION(RuleTypeClass.REWRITE),
PUSH_DOWN_TOP_N_LIMIT_THROUGH_UNION(RuleTypeClass.REWRITE),
// limit distinct push down
PUSH_LIMIT_DISTINCT_THROUGH_JOIN(RuleTypeClass.REWRITE),
PUSH_LIMIT_DISTINCT_THROUGH_PROJECT_JOIN(RuleTypeClass.REWRITE),
PUSH_DOWN_LIMIT_DISTINCT_THROUGH_JOIN(RuleTypeClass.REWRITE),
PUSH_DOWN_LIMIT_DISTINCT_THROUGH_PROJECT_JOIN(RuleTypeClass.REWRITE),
// adjust nullable
ADJUST_NULLABLE(RuleTypeClass.REWRITE),
ADJUST_CONJUNCTS_RETURN_TYPE(RuleTypeClass.REWRITE),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public List<Rule> buildRules() {
}
return limit.withChildren(agg.withChildren(newJoin));
})
.toRule(RuleType.PUSH_LIMIT_DISTINCT_THROUGH_JOIN),
.toRule(RuleType.PUSH_DOWN_LIMIT_DISTINCT_THROUGH_JOIN),

// limit -> distinct -> project -> join
logicalLimit(logicalAggregate(logicalProject(logicalJoin()).when(LogicalProject::isAllSlots))
Expand All @@ -67,7 +67,7 @@ public List<Rule> buildRules() {
return null;
}
return limit.withChildren(agg.withChildren(project.withChildren(newJoin)));
}).toRule(RuleType.PUSH_LIMIT_DISTINCT_THROUGH_JOIN)
}).toRule(RuleType.PUSH_DOWN_LIMIT_DISTINCT_THROUGH_PROJECT_JOIN)
);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.rules.rewrite;

import org.apache.doris.nereids.properties.OrderKey;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.algebra.SetOperation.Qualifier;
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
import org.apache.doris.nereids.trees.plans.logical.LogicalTopN;
import org.apache.doris.nereids.trees.plans.logical.LogicalUnion;
import org.apache.doris.nereids.util.ExpressionUtils;

import com.google.common.collect.ImmutableList;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* <pre>
* TopN-Distinct
* -> Union All
* -> child plan1
* -> child plan2
* -> child plan3
*
* rewritten to
*
* -> Union All
* -> TopN-Distinct
* -> child plan1
* -> TopN-Distinct
* -> child plan2
* -> TopN-Distinct
* -> child plan3
* </pre>
*/
public class PushDownTopNDistinctThroughUnion implements RewriteRuleFactory {

@Override
public List<Rule> buildRules() {
return ImmutableList.of(
logicalTopN(logicalAggregate(logicalUnion().when(union -> union.getQualifier() == Qualifier.ALL))
.when(agg -> agg.isDistinct()))
.then(topN -> {
LogicalAggregate<LogicalUnion> agg = topN.child();
LogicalUnion union = agg.child();
List<Plan> newChildren = new ArrayList<>();
for (Plan child : union.children()) {
Map<Expression, Expression> replaceMap = new HashMap<>();
for (int i = 0; i < union.getOutputs().size(); ++i) {
NamedExpression output = union.getOutputs().get(i);
replaceMap.put(output, child.getOutput().get(i));
}

List<OrderKey> orderKeys = topN.getOrderKeys().stream()
.map(orderKey -> orderKey.withExpression(
ExpressionUtils.replace(orderKey.getExpr(), replaceMap)))
.collect(ImmutableList.toImmutableList());
newChildren.add(
new LogicalTopN<>(orderKeys, topN.getLimit() + topN.getOffset(), 0, child));
}
if (union.children().equals(newChildren)) {
return null;
}
return topN.withChildren(agg.withChildren(union.withChildren(newChildren)));
})
.toRule(RuleType.PUSH_DOWN_TOP_N_THROUGH_UNION)
);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !push_down_topn_through_union --
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalDistribute
------------------PhysicalOlapScan[t]
----------------PhysicalDistribute
------------------PhysicalOlapScan[t]

-- !push_down_topn_union_with_conditions --
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalDistribute
------------------filter((t1.score > 10))
--------------------PhysicalOlapScan[t]
----------------PhysicalDistribute
------------------filter((t2.name = 'Test'))
--------------------PhysicalOlapScan[t]
----------------PhysicalDistribute
------------------filter((t3.id < 5))
--------------------PhysicalOlapScan[t]

-- !push_down_topn_union_with_order_by --
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalDistribute
------------------PhysicalOlapScan[t]
----------------PhysicalDistribute
------------------PhysicalOlapScan[t]
----------------PhysicalDistribute
------------------PhysicalOlapScan[t]

-- !push_down_topn_nested_union --
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalDistribute
------------------PhysicalOlapScan[t]
----------------PhysicalDistribute
------------------PhysicalOlapScan[t]
----------------PhysicalDistribute
------------------PhysicalOlapScan[t]
----------------PhysicalDistribute
------------------PhysicalOlapScan[t]

-- !push_down_topn_union_after_join --
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalDistribute
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=()
----------------------PhysicalProject
------------------------PhysicalOlapScan[t]
----------------------PhysicalProject
------------------------PhysicalOlapScan[t]
----------------PhysicalDistribute
------------------PhysicalProject
--------------------PhysicalOlapScan[t]

-- !push_down_topn_union_different_projections --
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalDistribute
------------------PhysicalProject
--------------------PhysicalOlapScan[t]
----------------PhysicalDistribute
------------------PhysicalProject
--------------------PhysicalOlapScan[t]

-- !push_down_topn_union_with_subquery --
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalDistribute
------------------PhysicalProject
--------------------filter((t.score > 20))
----------------------PhysicalOlapScan[t]
----------------PhysicalDistribute
------------------PhysicalProject
--------------------PhysicalOlapScan[t]

-- !push_down_topn_union_with_limit --
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalDistribute
------------------PhysicalLimit[GLOBAL]
--------------------PhysicalDistribute
----------------------PhysicalLimit[LOCAL]
------------------------PhysicalOlapScan[t]
----------------PhysicalDistribute
------------------PhysicalLimit[GLOBAL]
--------------------PhysicalDistribute
----------------------PhysicalLimit[LOCAL]
------------------------PhysicalOlapScan[t]

-- !push_down_topn_union_complex_conditions --
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalDistribute
------------------filter((t1.name = 'Test') and (t1.score > 10))
--------------------PhysicalOlapScan[t]
----------------PhysicalDistribute
------------------filter((t2.id < 5) and (t2.score < 20))
--------------------PhysicalOlapScan[t]

Loading

0 comments on commit 91f56ce

Please sign in to comment.