Skip to content

Commit

Permalink
[Improvement](sort) add session variable force_sort_algorithm and adj…
Browse files Browse the repository at this point in the history
…ust some parameter about sort (apache#39334)

1. add force_sort_algorithm to set sort algorithm
2. do not use partitial sort on column string
```sql
select count(*) from (select lo_orderpriority from lineorder order by lo_orderpriority limit 100000)t;
partition sort: 22s
pdq sort: 8s
```
4. enlarge topn_opt_limit_threshold to 10240000
```sql
select count(*) from (select * from lineorder order by lo_linenumber limit 100000)t;
heap 1s
set topn_opt_limit_threshold=10240000; heap  0.4s

select count(*) from (select * from lineorder order by lo_linenumber limit 10000000)t;
heap 13s
set topn_opt_limit_threshold=10240000; heap 12s

select count(*) from (select * from lineorder order by lo_linenumber limit 100000000)t;
heap 2min13s
set topn_opt_limit_threshold=102400000;  heap 2 min 22.56 sec

select count(*) from (select lo_orderpriority from lineorder order by lo_orderpriority limit 100000)t;
heap 2.4s
set topn_opt_limit_threshold=102400000;  heap 1s

select count(*) from (select lo_orderpriority from lineorder order by lo_orderpriority limit 10000000)t;
heap 21s
set topn_opt_limit_threshold=102400000; heap 20s

```
  • Loading branch information
BiteTheDDDDt authored Aug 16, 2024
1 parent 7a09202 commit 7372c99
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 23 deletions.
17 changes: 3 additions & 14 deletions be/src/vec/columns/column_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -483,21 +483,10 @@ void ColumnStr<T>::get_permutation(bool reverse, size_t limit, int /*nan_directi
res[i] = i;
}

// std::partial_sort need limit << s can get performance benefit
if (limit > (s / 8.0)) limit = 0;

if (limit) {
if (reverse) {
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<false>(*this));
} else {
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<true>(*this));
}
if (reverse) {
pdqsort(res.begin(), res.end(), less<false>(*this));
} else {
if (reverse) {
pdqsort(res.begin(), res.end(), less<false>(*this));
} else {
pdqsort(res.begin(), res.end(), less<true>(*this));
}
pdqsort(res.begin(), res.end(), less<true>(*this));
}
}

Expand Down
28 changes: 20 additions & 8 deletions fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.doris.analysis.SortInfo;
import org.apache.doris.common.NotImplementedException;
import org.apache.doris.common.UserException;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.StatisticalType;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TExplainLevel;
Expand Down Expand Up @@ -339,16 +340,27 @@ protected void toThrift(TPlanNode msg) {
msg.sort_node.setIsAnalyticSort(isAnalyticSort);
msg.sort_node.setIsColocate(isColocate);

boolean isFixedLength = info.getOrderingExprs().stream().allMatch(e -> !e.getType().isStringType()
&& !e.getType().isCollectionType());
boolean isFixedLength = info.getOrderingExprs().stream()
.allMatch(e -> !e.getType().isStringType() && !e.getType().isCollectionType());
ConnectContext connectContext = ConnectContext.get();
TSortAlgorithm algorithm;
if (limit > 0 && limit + offset < 1024 && (useTwoPhaseReadOpt || hasRuntimePredicate
|| isFixedLength)) {
algorithm = TSortAlgorithm.HEAP_SORT;
} else if (limit > 0 && !isFixedLength && limit + offset < 256) {
algorithm = TSortAlgorithm.TOPN_SORT;
if (connectContext != null && !connectContext.getSessionVariable().forceSortAlgorithm.isEmpty()) {
String algo = connectContext.getSessionVariable().forceSortAlgorithm;
if (algo.equals("heap")) {
algorithm = TSortAlgorithm.HEAP_SORT;
} else if (algo.equals("topn")) {
algorithm = TSortAlgorithm.TOPN_SORT;
} else {
algorithm = TSortAlgorithm.FULL_SORT;
}
} else {
algorithm = TSortAlgorithm.FULL_SORT;
if (limit > 0 && limit + offset < 1024 && (useTwoPhaseReadOpt || hasRuntimePredicate || isFixedLength)) {
algorithm = TSortAlgorithm.HEAP_SORT;
} else if (limit > 0 && !isFixedLength && limit + offset < 256) {
algorithm = TSortAlgorithm.TOPN_SORT;
} else {
algorithm = TSortAlgorithm.FULL_SORT;
}
}
msg.sort_node.setAlgorithm(algorithm);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,8 @@ public class SessionVariable implements Serializable, Writable {
// when true, the partition column must be set to NOT NULL.
public static final String ALLOW_PARTITION_COLUMN_NULLABLE = "allow_partition_column_nullable";

public static final String FORCE_SORT_ALGORITHM = "force_sort_algorithm";

// runtime filter run mode
public static final String RUNTIME_FILTER_MODE = "runtime_filter_mode";
// Size in bytes of Bloom Filters used for runtime filters. Actual size of filter will
Expand Down Expand Up @@ -1065,6 +1067,11 @@ public class SessionVariable implements Serializable, Writable {

@VariableMgr.VarAttr(name = ENABLE_REWRITE_ELEMENT_AT_TO_SLOT, fuzzy = true)
private boolean enableRewriteElementAtToSlot = true;

@VariableMgr.VarAttr(name = FORCE_SORT_ALGORITHM, needForward = true, description = { "强制指定SortNode的排序算法",
"Force the sort algorithm of SortNode to be specified" })
public String forceSortAlgorithm = "";

@VariableMgr.VarAttr(name = RUNTIME_FILTER_MODE, needForward = true)
private String runtimeFilterMode = "GLOBAL";

Expand Down Expand Up @@ -1458,7 +1465,7 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) {
@VariableMgr.VarAttr(name = ENABLE_TWO_PHASE_READ_OPT, fuzzy = true)
public boolean enableTwoPhaseReadOpt = true;
@VariableMgr.VarAttr(name = TOPN_OPT_LIMIT_THRESHOLD)
public long topnOptLimitThreshold = 1024;
public long topnOptLimitThreshold = 10240000;
@VariableMgr.VarAttr(name = ENABLE_SNAPSHOT_POINT_QUERY)
public boolean enableSnapshotPointQuery = true;

Expand Down

0 comments on commit 7372c99

Please sign in to comment.