Skip to content

Commit

Permalink
[Improvement](statistics)Improve statistics user experience (apache#2…
Browse files Browse the repository at this point in the history
…4414)

Two improvements:
1. Move the `Job_id` column for the return info of `Analyze table` command to the first column. To keep consistent with `show analyze`.
```
mysql> analyze table hive.tpch100.region;
+--------+--------------+-------------------------+------------+--------------------------------+
| Job_Id | Catalog_Name | DB_Name                 | Table_Name | Columns                        |
+--------+--------------+-------------------------+------------+--------------------------------+
| 14403  | hive         | default_cluster:tpch100 | region     | [r_regionkey,r_comment,r_name] |
+--------+--------------+-------------------------+------------+--------------------------------+
1 row in set (0.03 sec)
```
2. Add `analyze_timeout` session variable, to control `analyze table/database with sync` timeout.
  • Loading branch information
Jibing-Li authored Sep 18, 2023
1 parent 79fbc2e commit f3e350e
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,8 @@ public int getExecTimeout() {
if (executor != null && executor.isInsertStmt()) {
// particular for insert stmt, we can expand other type of timeout in the same way
return Math.max(sessionVariable.getInsertTimeoutS(), sessionVariable.getQueryTimeoutS());
} else if (executor != null && executor.isAnalyzeStmt()) {
return sessionVariable.getAnalyzeTimeoutS();
} else {
// normal query stmt
return sessionVariable.getQueryTimeoutS();
Expand Down
17 changes: 17 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ public class SessionVariable implements Serializable, Writable {
public static final String EXEC_MEM_LIMIT = "exec_mem_limit";
public static final String SCAN_QUEUE_MEM_LIMIT = "scan_queue_mem_limit";
public static final String QUERY_TIMEOUT = "query_timeout";
public static final String ANALYZE_TIMEOUT = "analyze_timeout";

public static final String MAX_EXECUTION_TIME = "max_execution_time";
public static final String INSERT_TIMEOUT = "insert_timeout";
Expand Down Expand Up @@ -453,6 +454,10 @@ public class SessionVariable implements Serializable, Writable {
@VariableMgr.VarAttr(name = QUERY_TIMEOUT)
public int queryTimeoutS = 900;

// query timeout in second.
@VariableMgr.VarAttr(name = ANALYZE_TIMEOUT, needForward = true)
public int analyzeTimeoutS = 43200;

// The global max_execution_time value provides the default for the session value for new connections.
// The session value applies to SELECT executions executed within the session that include
// no MAX_EXECUTION_TIME(N) optimizer hint or for which N is 0.
Expand Down Expand Up @@ -1373,6 +1378,10 @@ public int getQueryTimeoutS() {
return queryTimeoutS;
}

public int getAnalyzeTimeoutS() {
return analyzeTimeoutS;
}

public void setEnableTwoPhaseReadOpt(boolean enable) {
enableTwoPhaseReadOpt = enable;
}
Expand Down Expand Up @@ -1552,6 +1561,10 @@ public void setQueryTimeoutS(int queryTimeoutS) {
this.queryTimeoutS = queryTimeoutS;
}

public void setAnalyzeTimeoutS(int analyzeTimeoutS) {
this.analyzeTimeoutS = analyzeTimeoutS;
}

public void setMaxExecutionTimeMS(int maxExecutionTimeMS) {
this.maxExecutionTimeMS = maxExecutionTimeMS;
this.queryTimeoutS = this.maxExecutionTimeMS / 1000;
Expand Down Expand Up @@ -2486,6 +2499,9 @@ public void setForwardedSessionVariables(TQueryOptions queryOptions) {
if (queryOptions.isSetInsertTimeout()) {
setInsertTimeoutS(queryOptions.getInsertTimeout());
}
if (queryOptions.isSetAnalyzeTimeout()) {
setAnalyzeTimeoutS(queryOptions.getAnalyzeTimeout());
}
}

/**
Expand All @@ -2497,6 +2513,7 @@ public TQueryOptions getQueryOptionVariables() {
queryOptions.setScanQueueMemLimit(Math.min(maxScanQueueMemByte, maxExecMemByte / 20));
queryOptions.setQueryTimeout(queryTimeoutS);
queryOptions.setInsertTimeout(insertTimeoutS);
queryOptions.setAnalyzeTimeout(analyzeTimeoutS);
return queryOptions;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,13 @@ public boolean isInsertStmt() {
return parsedStmt instanceof InsertStmt;
}

public boolean isAnalyzeStmt() {
if (parsedStmt == null) {
return false;
}
return parsedStmt instanceof AnalyzeStmt;
}

/**
* Used for audit in ConnectProcessor.
* <p>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -384,23 +384,23 @@ protected AnalysisInfo buildAndAssignJob(AnalyzeTblStmt stmt) throws DdlExceptio

private void sendJobId(List<AnalysisInfo> analysisInfos, boolean proxy) {
List<Column> columns = new ArrayList<>();
columns.add(new Column("Job_Id", ScalarType.createVarchar(19)));
columns.add(new Column("Catalog_Name", ScalarType.createVarchar(1024)));
columns.add(new Column("DB_Name", ScalarType.createVarchar(1024)));
columns.add(new Column("Table_Name", ScalarType.createVarchar(1024)));
columns.add(new Column("Columns", ScalarType.createVarchar(1024)));
columns.add(new Column("Job_Id", ScalarType.createVarchar(19)));
ShowResultSetMetaData commonResultSetMetaData = new ShowResultSetMetaData(columns);
List<List<String>> resultRows = new ArrayList<>();
for (AnalysisInfo analysisInfo : analysisInfos) {
if (analysisInfo == null) {
continue;
}
List<String> row = new ArrayList<>();
row.add(String.valueOf(analysisInfo.jobId));
row.add(analysisInfo.catalogName);
row.add(analysisInfo.dbName);
row.add(analysisInfo.tblName);
row.add(analysisInfo.colName);
row.add(String.valueOf(analysisInfo.jobId));
resultRows.add(row);
}
ShowResultSet commonResultSet = new ShowResultSet(commonResultSetMetaData, resultRows);
Expand Down
1 change: 1 addition & 0 deletions gensrc/thrift/PaloInternalService.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ struct TQueryOptions {
// use is_report_success any more
84: optional bool enable_profile = false;
85: optional bool enable_page_cache = false;
86: optional i32 analyze_timeout = 43200
}


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !01 --
p_container 200000000 40 0 JUMBO BAG WRAP PKG
p_partkey 200000000 200778064 0 1 200000000
p_retailprice 200000000 120014 0 900.00 2099.00
p_type 200000000 150 0 ECONOMY ANODIZED BRASS STANDARD POLISHED TIN

Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_hive_statistic_timeout", "p2,external,hive,external_remote,external_remote_hive") {
String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
if (enabled != null && enabled.equalsIgnoreCase("true")) {
String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost")
String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort")
String catalog_name = "test_hive_statistic_timeout"
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (
'type'='hms',
'hadoop.username' = 'hadoop',
'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
);
"""
logger.info("catalog " + catalog_name + " created")

sql """use ${catalog_name}.tpch_1000_parquet"""
sql """set query_timeout=1"""
sql """analyze table part (p_partkey, p_container, p_type, p_retailprice) with sync;"""

def result = sql """show column stats part"""
assertTrue(result.size() == 4)

def ctlId
result = sql """show proc '/catalogs'"""

for (int i = 0; i < result.size(); i++) {
if (result[i][1] == catalog_name) {
ctlId = result[i][0]
}
}

qt_01 """select col_id, count, ndv, null_count, min, max from internal.__internal_schema.column_statistics where catalog_id='$ctlId' order by col_id;"""
sql """drop catalog ${catalog_name}""";
}
}

0 comments on commit f3e350e

Please sign in to comment.