Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/metadata-lock' into metadata-lock
Browse files Browse the repository at this point in the history
  • Loading branch information
CalvinKirs committed Sep 19, 2023
2 parents 23c5b66 + b92d0cb commit 740ec0c
Show file tree
Hide file tree
Showing 71 changed files with 1,451 additions and 772 deletions.
12 changes: 10 additions & 2 deletions be/src/vec/exprs/table_function/vexplode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,16 @@ void VExplodeTableFunction::get_value(MutableColumnPtr& column) {
if (current_empty() || (_detail.nested_nullmap_data && _detail.nested_nullmap_data[pos])) {
column->insert_default();
} else {
column->insert_data(const_cast<char*>(_detail.nested_col->get_data_at(pos).data),
_detail.nested_col->get_data_at(pos).size);
if (_is_nullable) {
assert_cast<ColumnNullable*>(column.get())
->get_nested_column_ptr()
->insert_from(*_detail.nested_col, pos);
assert_cast<ColumnUInt8*>(
assert_cast<ColumnNullable*>(column.get())->get_null_map_column_ptr().get())
->insert_default();
} else {
column->insert_from(*_detail.nested_col, pos);
}
}
}

Expand Down
6 changes: 3 additions & 3 deletions docs/en/docs/query-acceleration/statistics.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ Column statistics collection syntax:

```SQL
ANALYZE < TABLE | DATABASE table_name | db_name >
[ PARTITIONS (partition_name [, ...]) ]
[ PARTITIONS [(*) | (partition_name [, ...]) | WITH RECENT COUNT] ]
[ (column_name [, ...]) ]
[ [ WITH SYNC ] [ WITH INCREMENTAL ] [ WITH SAMPLE PERCENT | ROWS ] [ WITH PERIOD ]]
[ PROPERTIES ("key" = "value", ...) ];
Expand All @@ -78,7 +78,7 @@ ANALYZE < TABLE | DATABASE table_name | db_name >
Explanation:

- Table_name: The target table for the specified. It can be a `db_name.table_name` form.
- partition_name: The specified target partitions(for hive external table only)。Must be partitions exist in `table_name`. Multiple partition names are separated by commas. e.g. (nation=US/city=Washington)
- partition_name: The specified target partitions(for hive external table only)。Must be partitions exist in `table_name`. Multiple partition names are separated by commas. e.g. for single level partition: PARTITIONS(`event_date=20230706`), for multi level partition: PARTITIONS(`nation=US/city=Washington`). PARTITIONS(*) specifies all partitions, PARTITIONS WITH RECENT 30 specifies the latest 30 partitions.
- Column_name: The specified target column. Must be `table_name` a column that exists in. Multiple column names are separated by commas.
- Sync: Synchronizes the collection of statistics. Return after collection. If not specified, it will be executed asynchronously and the job ID will be returned.
- Incremental: Incrementally gather statistics.
Expand Down Expand Up @@ -673,4 +673,4 @@ When executing ANALYZE, statistical data is written to the internal table __inte

### ANALYZE Failure for Large Tables

Due to the strict resource limitations for ANALYZE, the ANALYZE operation for large tables might experience timeouts or exceed the memory limit of BE. In such cases, it's recommended to use ANALYZE ... WITH SAMPLE.... Additionally, for scenarios involving dynamic partitioned tables, it's highly recommended to use ANALYZE ... WITH INCREMENTAL.... This statement processes only the partitions with updated data incrementally, avoiding redundant computations and improving efficiency.
Due to the strict resource limitations for ANALYZE, the ANALYZE operation for large tables might experience timeouts or exceed the memory limit of BE. In such cases, it's recommended to use ANALYZE ... WITH SAMPLE.... Additionally, for scenarios involving dynamic partitioned tables, it's highly recommended to use ANALYZE ... WITH INCREMENTAL.... This statement processes only the partitions with updated data incrementally, avoiding redundant computations and improving efficiency.
4 changes: 2 additions & 2 deletions docs/zh-CN/docs/query-acceleration/statistics.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ Doris 查询优化器使用统计信息来确定查询最有效的执行计划

```SQL
ANALYZE < TABLE | DATABASE table_name | db_name >
[ PARTITIONS (partition_name [, ...]) ]
[ PARTITIONS [(*) | (partition_name [, ...]) | WITH RECENT COUNT] ]
[ (column_name [, ...]) ]
[ [ WITH SYNC ] [WITH INCREMENTAL] [ WITH SAMPLE PERCENT | ROWS ] [ WITH PERIOD ] ]
[ PROPERTIES ("key" = "value", ...) ];
Expand All @@ -75,7 +75,7 @@ ANALYZE < TABLE | DATABASE table_name | db_name >
其中:

- table_name: 指定的的目标表。可以是  `db_name.table_name`  形式。
- partition_name: 指定的目标分区(目前只针对Hive外表)。必须是  `table_name`  中存在的分区,多个列名称用逗号分隔。分区名样例:event_date=20230706, nation=CN/city=Beijing
- partition_name: 指定的目标分区(目前只针对Hive外表)。必须是  `table_name`  中存在的分区,多个列名称用逗号分隔。分区名样例: 单层分区PARTITIONS(`event_date=20230706`),多层分区PARTITIONS(`nation=CN/city=Beijing`)。PARTITIONS (*)指定所有分区,PARTITIONS WITH RECENT 100指定最新的100个分区。
- column_name: 指定的目标列。必须是  `table_name`  中存在的列,多个列名称用逗号分隔。
- sync:同步收集统计信息。收集完后返回。若不指定则异步执行并返回任务 ID。
- period:周期性收集统计信息。单位为秒,指定后会定期收集相应的统计信息。
Expand Down
9 changes: 9 additions & 0 deletions fe/fe-core/src/main/cup/sql_parser.cup
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,7 @@ terminal String
KW_QUOTA,
KW_RANDOM,
KW_RANGE,
KW_RECENT,
KW_READ,
KW_REBALANCE,
KW_RECOVER,
Expand Down Expand Up @@ -5900,6 +5901,14 @@ partition_names ::=
{:
RESULT = new PartitionNames(true, Lists.newArrayList(partName));
:}
| KW_PARTITIONS LPAREN STAR RPAREN
{:
RESULT = new PartitionNames(true);
:}
| KW_PARTITIONS KW_WITH KW_RECENT INTEGER_LITERAL:count
{:
RESULT = new PartitionNames(count);
:}
;

opt_table_sample ::=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public class AnalyzeTblStmt extends AnalyzeStmt {

private final TableName tableName;
private List<String> columnNames;
private List<String> partitionNames;
private PartitionNames partitionNames;
private boolean isAllColumns;

// after analyzed
Expand All @@ -97,7 +97,7 @@ public AnalyzeTblStmt(TableName tableName,
AnalyzeProperties properties) {
super(properties);
this.tableName = tableName;
this.partitionNames = partitionNames == null ? null : partitionNames.getPartitionNames();
this.partitionNames = partitionNames;
this.columnNames = columnNames;
this.analyzeProperties = properties;
this.isAllColumns = columnNames == null;
Expand Down Expand Up @@ -240,14 +240,34 @@ public Set<String> getColumnNames() {
}

public Set<String> getPartitionNames() {
Set<String> partitions = partitionNames == null ? table.getPartitionNames() : Sets.newHashSet(partitionNames);
if (isSamplingPartition()) {
int partNum = ConnectContext.get().getSessionVariable().getExternalTableAnalyzePartNum();
partitions = partitions.stream().limit(partNum).collect(Collectors.toSet());
if (partitionNames == null || partitionNames.getPartitionNames() == null) {
return null;
}
Set<String> partitions = Sets.newHashSet();
partitions.addAll(partitionNames.getPartitionNames());
/*
if (isSamplingPartition()) {
int partNum = ConnectContext.get().getSessionVariable().getExternalTableAnalyzePartNum();
partitions = partitions.stream().limit(partNum).collect(Collectors.toSet());
}
*/
return partitions;
}

public boolean isAllPartitions() {
if (partitionNames == null) {
return false;
}
return partitionNames.isAllPartitions();
}

public long getPartitionCount() {
if (partitionNames == null) {
return 0;
}
return partitionNames.getCount();
}

public boolean isPartitionOnly() {
return partitionNames != null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,37 @@ public class PartitionNames implements ParseNode, Writable {
// true if these partitions are temp partitions
@SerializedName(value = "isTemp")
private final boolean isTemp;
private final boolean allPartitions;
private final long count;
// Default partition count to collect statistic for external table.
private static final long DEFAULT_PARTITION_COUNT = 100;

public PartitionNames(boolean isTemp, List<String> partitionNames) {
this.partitionNames = partitionNames;
this.isTemp = isTemp;
this.allPartitions = false;
this.count = 0;
}

public PartitionNames(PartitionNames other) {
this.partitionNames = Lists.newArrayList(other.partitionNames);
this.isTemp = other.isTemp;
this.allPartitions = other.allPartitions;
this.count = 0;
}

public PartitionNames(boolean allPartitions) {
this.partitionNames = null;
this.isTemp = false;
this.allPartitions = allPartitions;
this.count = 0;
}

public PartitionNames(long partitionCount) {
this.partitionNames = null;
this.isTemp = false;
this.allPartitions = false;
this.count = partitionCount;
}

public List<String> getPartitionNames() {
Expand All @@ -67,9 +89,23 @@ public boolean isTemp() {
return isTemp;
}

public boolean isAllPartitions() {
return allPartitions;
}

public long getCount() {
return count;
}

@Override
public void analyze(Analyzer analyzer) throws AnalysisException {
if (partitionNames.isEmpty()) {
if (allPartitions && count > 0) {
throw new AnalysisException("All partition and partition count couldn't be set at the same time.");
}
if (allPartitions || count > 0) {
return;
}
if (partitionNames == null || partitionNames.isEmpty()) {
throw new AnalysisException("No partition specified in partition lists");
}
// check if partition name is not empty string
Expand Down
10 changes: 4 additions & 6 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
Original file line number Diff line number Diff line change
Expand Up @@ -855,12 +855,10 @@ private boolean tryLock(boolean mustLock) {
while (true) {
try {
if (!lock.tryLock(Config.catalog_try_lock_timeout_ms, TimeUnit.MILLISECONDS)) {
if (LOG.isDebugEnabled()) {
// to see which thread held this lock for long time.
Thread owner = lock.getOwner();
if (owner != null) {
LOG.info("catalog lock is held by: {}", Util.dumpThread(owner, 10));
}
// to see which thread held this lock for long time.
Thread owner = lock.getOwner();
if (owner != null) {
LOG.info("catalog lock is held by: {}", Util.dumpThread(owner, 10));
}

if (mustLock) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.time.LocalDate;
Expand Down Expand Up @@ -628,6 +629,12 @@ private void setStatData(Column col, ColumnStatisticsData data, ColumnStatisticB
builder.setMaxValue(Double.MAX_VALUE);
}
}

@Override
public void gsonPostProcess() throws IOException {
super.gsonPostProcess();
estimatedRowCount = -1;
}
}


Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
public class Counter {
private volatile long value;
private volatile int type;
private volatile boolean remove = false;

public long getValue() {
return value;
Expand Down Expand Up @@ -68,4 +69,12 @@ public boolean isTimeType() {
TUnit ttype = TUnit.findByValue(type);
return ttype == TUnit.TIME_MS || ttype == TUnit.TIME_NS || ttype == TUnit.TIME_S;
}

public void setCanRemove() {
this.remove = true;
}

public boolean isRemove() {
return this.remove;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ private static void mergeProfileCounter(RuntimeProfile src, String counterName,

mergeProfileCounter(src, childCounterName, rhs);
mergeCounter(src, childCounterName, counter, rhsCounter);
removeZeroeCounter(childCounterSet, childCounterName, counter);
removeCounter(childCounterSet, childCounterName, counter);

}
}
Expand All @@ -423,8 +423,8 @@ private static void mergeProfileInfoStr(RuntimeProfile src, LinkedList<RuntimePr
}
}

private static void removeZeroeCounter(Set<String> childCounterSet, String childCounterName, Counter counter) {
if (counter.getValue() == 0) {
private static void removeCounter(Set<String> childCounterSet, String childCounterName, Counter counter) {
if (counter.isRemove()) {
childCounterSet.remove(childCounterName);
}
}
Expand Down Expand Up @@ -476,7 +476,7 @@ private static void mergeCounter(RuntimeProfile src, String counterName, Counter
+ MIN_TIME_PRE + printCounter(minCounter.getValue(), minCounter.getType()) + " ]";
src.infoStrings.put(counterName, infoString);
}
counter.setValue(0); // value 0 will remove in removeZeroeCounter
counter.setCanRemove(); // value will remove in removeCounter
} else {
if (rhsCounter.size() == 0) {
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ public static String getEtlOutputPath(String fsDefaultName, String outputPath, l
return String.format(ETL_OUTPUT_PATH, fsDefaultName, outputPath, dbId, loadLabel, etlOutputDir);
}

private class InputSizeInvalidException extends LoadException {
private static class InputSizeInvalidException extends LoadException {
public InputSizeInvalidException(String msg) {
super(msg);
}
Expand Down
13 changes: 6 additions & 7 deletions fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ public void removePhysicalExpression() {

private Plan skipProject(Plan plan, Group targetGroup) {
// Some top project can't be eliminated
if (plan instanceof LogicalProject && ((LogicalProject<?>) plan).canEliminate()) {
LogicalProject<Plan> logicalProject = (LogicalProject<Plan>) plan;
if (plan instanceof LogicalProject) {
LogicalProject<?> logicalProject = (LogicalProject<?>) plan;
if (targetGroup != root) {
if (logicalProject.getOutputSet().equals(logicalProject.child().getOutputSet())) {
return skipProject(logicalProject.child(), targetGroup);
Expand All @@ -155,7 +155,7 @@ private Plan skipProject(Plan plan, Group targetGroup) {

private Plan skipProjectGetChild(Plan plan) {
if (plan instanceof LogicalProject) {
LogicalProject<Plan> logicalProject = (LogicalProject<Plan>) plan;
LogicalProject<?> logicalProject = (LogicalProject<?>) plan;
Plan child = logicalProject.child();
if (logicalProject.getOutputSet().equals(child.getOutputSet())) {
return skipProjectGetChild(child);
Expand Down Expand Up @@ -915,7 +915,7 @@ private PhysicalPlan unrankGroup(Group group, PhysicalProperties prop, long rank
int prefix = 0;
for (GroupExpression groupExpression : extractGroupExpressionContainsProp(group, prop)) {
List<Pair<Long, Double>> possiblePlans = rankGroupExpression(groupExpression, prop);
if (possiblePlans.size() != 0 && rank - prefix <= possiblePlans.get(possiblePlans.size() - 1).first) {
if (!possiblePlans.isEmpty() && rank - prefix <= possiblePlans.get(possiblePlans.size() - 1).first) {
return unrankGroupExpression(groupExpression, prop, rank - prefix);
}
prefix += possiblePlans.size();
Expand Down Expand Up @@ -944,10 +944,9 @@ private PhysicalPlan unrankGroupExpression(GroupExpression groupExpression,
childrenPlan.add(unrankGroup(groupExpression.child(i), properties.get(i), childrenRanks.get(i)));
}
Plan plan = groupExpression.getPlan().withChildren(childrenPlan);
PhysicalPlan physicalPlan = ((PhysicalPlan) plan).withPhysicalPropertiesAndStats(
return ((PhysicalPlan) plan).withPhysicalPropertiesAndStats(
groupExpression.getOutputProperties(prop),
groupExpression.getOwnerGroup().getStatistics());
return physicalPlan;
}

/**
Expand All @@ -957,7 +956,7 @@ private PhysicalPlan unrankGroupExpression(GroupExpression groupExpression,
* 2: [2%1, 2%(1*2)]
*/
private List<Long> extractChildRanks(long rank, List<List<Pair<Long, Double>>> children) {
Preconditions.checkArgument(children.size() > 0);
Preconditions.checkArgument(!children.isEmpty(), "children should not empty in extractChildRanks");
int factor = children.get(0).size();
List<Long> indices = new ArrayList<>();
for (int i = 0; i < children.size() - 1; i++) {
Expand Down
Loading

0 comments on commit 740ec0c

Please sign in to comment.