Skip to content

Commit 53a42f5

Browse files
authored
HIVE-29187: Unable to parse explain analyze statement when query has quoted identifier with special character (#6070)
1 parent 38236c7 commit 53a42f5

28 files changed

+546
-461
lines changed

iceberg/iceberg-handler/src/test/results/positive/delete_all_iceberg.q.out

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,10 @@ POSTHOOK: query: insert into ice01 values (1,5),(2,5),(3,5),(4,5)
4848
POSTHOOK: type: QUERY
4949
POSTHOOK: Input: _dummy_database@_dummy_table
5050
POSTHOOK: Output: default@ice01
51-
PREHOOK: query: delete from ice01
51+
PREHOOK: query: delete from `ice01`
5252
PREHOOK: type: TRUNCATETABLE
5353
PREHOOK: Output: default@ice01
54-
POSTHOOK: query: delete from ice01
54+
POSTHOOK: query: delete from `ice01`
5555
POSTHOOK: type: TRUNCATETABLE
5656
POSTHOOK: Output: default@ice01
5757
PREHOOK: query: explain analyze delete from ice01
@@ -200,10 +200,10 @@ POSTHOOK: query: insert into ice01 values (1,5),(2,5),(3,5),(4,5)
200200
POSTHOOK: type: QUERY
201201
POSTHOOK: Input: _dummy_database@_dummy_table
202202
POSTHOOK: Output: default@ice01
203-
PREHOOK: query: delete from ice01
203+
PREHOOK: query: delete from `ice01`
204204
PREHOOK: type: TRUNCATETABLE
205205
PREHOOK: Output: default@ice01
206-
POSTHOOK: query: delete from ice01
206+
POSTHOOK: query: delete from `ice01`
207207
POSTHOOK: type: TRUNCATETABLE
208208
POSTHOOK: Output: default@ice01
209209
PREHOOK: query: explain analyze delete from ice01
@@ -351,10 +351,10 @@ POSTHOOK: query: insert into ice01 values (1,5),(2,5),(3,5),(4,5)
351351
POSTHOOK: type: QUERY
352352
POSTHOOK: Input: _dummy_database@_dummy_table
353353
POSTHOOK: Output: default@ice01
354-
PREHOOK: query: delete from ice01
354+
PREHOOK: query: delete from `ice01`
355355
PREHOOK: type: TRUNCATETABLE
356356
PREHOOK: Output: default@ice01
357-
POSTHOOK: query: delete from ice01
357+
POSTHOOK: query: delete from `ice01`
358358
POSTHOOK: type: TRUNCATETABLE
359359
POSTHOOK: Output: default@ice01
360360
PREHOOK: query: explain analyze delete from ice01

iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,11 @@ Stage-3
6161
Output:["strcol","intcol","pcol"]
6262
Please refer to the previous Select Operator [SEL_12]
6363

64-
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE INTO TABLE ice_parquet
64+
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE INTO TABLE `ice_parquet`
6565
PREHOOK: type: QUERY
6666
PREHOOK: Input: default@ice_parquet__temp_table_for_load_data__
6767
PREHOOK: Output: default@ice_parquet
68-
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE INTO TABLE ice_parquet
68+
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE INTO TABLE `ice_parquet`
6969
POSTHOOK: type: QUERY
7070
POSTHOOK: Input: default@ice_parquet__temp_table_for_load_data__
7171
POSTHOOK: Output: default@ice_parquet
@@ -162,9 +162,9 @@ Stage-0
162162
Move Operator
163163
table:{"name:":"default.ice_avro"}
164164

165-
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO TABLE ice_avro
165+
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO TABLE `ice_avro`
166166
PREHOOK: type: LOAD
167-
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO TABLE ice_avro
167+
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO TABLE `ice_avro`
168168
POSTHOOK: type: LOAD
169169
PREHOOK: query: explain analyze LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO TABLE ice_avro
170170
PREHOOK: type: LOAD
@@ -276,9 +276,9 @@ Stage-0
276276
Move Operator
277277
table:{"name:":"default.ice_orc"}
278278

279-
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO TABLE ice_orc
279+
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO TABLE `ice_orc`
280280
PREHOOK: type: LOAD
281-
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO TABLE ice_orc
281+
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO TABLE `ice_orc`
282282
POSTHOOK: type: LOAD
283283
PREHOOK: query: explain analyze LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO TABLE ice_orc
284284
PREHOOK: type: LOAD

ql/src/java/org/apache/hadoop/hive/ql/ddl/table/constraint/ConstraintsUtils.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -245,10 +245,8 @@ private static List<ConstraintInfo> generateConstraintInfos(ASTNode child, List<
245245
// try to get default value only if this is DEFAULT constraint
246246
checkOrDefaultValue = getDefaultValue(grandChild, typeChildForDefault, tokenRewriteStream);
247247
} else if (childType == HiveParser.TOK_CHECK_CONSTRAINT) {
248-
UnparseTranslator unparseTranslator = HiveUtils.collectUnescapeIdentifierTranslations(grandChild);
249-
unparseTranslator.applyTranslations(tokenRewriteStream, CHECK_CONSTRAINT_PROGRAM);
250-
checkOrDefaultValue = tokenRewriteStream.toString(CHECK_CONSTRAINT_PROGRAM, grandChild.getTokenStartIndex(),
251-
grandChild.getTokenStopIndex());
248+
checkOrDefaultValue = HiveUtils.getSqlTextWithQuotedIdentifiers(
249+
grandChild, tokenRewriteStream, CHECK_CONSTRAINT_PROGRAM);
252250
}
253251
}
254252

ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@
2828

2929
import com.google.common.collect.HashMultimap;
3030
import com.google.common.collect.SetMultimap;
31+
import org.antlr.runtime.TokenRewriteStream;
3132
import org.apache.hadoop.fs.Path;
33+
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
3234
import org.apache.hadoop.hive.ql.lib.CostLessRuleDispatcher;
3335
import org.apache.hadoop.hive.ql.lib.ExpressionWalker;
3436
import org.apache.hadoop.hive.ql.lib.Node;
@@ -317,14 +319,21 @@ public static String unparseIdentifier(String identifier) {
317319
return unparseIdentifier(identifier, Quotation.BACKTICKS);
318320
}
319321

322+
public static String getSqlTextWithQuotedIdentifiers(
323+
ASTNode node, TokenRewriteStream tokenRewriteStream, String rewriteProgram)
324+
throws SemanticException {
325+
UnparseTranslator unparseTranslator = HiveUtils.collectUnescapeIdentifierTranslations(node);
326+
unparseTranslator.applyTranslations(tokenRewriteStream, rewriteProgram);
327+
return tokenRewriteStream.toString(rewriteProgram, node.getTokenStartIndex(), node.getTokenStopIndex());
328+
}
329+
320330
public static UnparseTranslator collectUnescapeIdentifierTranslations(ASTNode node)
321331
throws SemanticException {
322332
UnparseTranslator unparseTranslator = new UnparseTranslator(Quotation.BACKTICKS);
323333
unparseTranslator.enable();
324334

325335
SetMultimap<Integer, SemanticNodeProcessor> astNodeToProcessor = HashMultimap.create();
326-
astNodeToProcessor.put(HiveParser.TOK_TABLE_OR_COL, new ColumnExprProcessor());
327-
astNodeToProcessor.put(HiveParser.DOT, new ColumnExprProcessor());
336+
astNodeToProcessor.put(HiveParser.Identifier, new IdentifierProcessor());
328337
NodeProcessorCtx nodeProcessorCtx = new QuotedIdExpressionContext(unparseTranslator);
329338

330339
CostLessRuleDispatcher costLessRuleDispatcher = new CostLessRuleDispatcher(
@@ -334,19 +343,19 @@ public static UnparseTranslator collectUnescapeIdentifierTranslations(ASTNode no
334343
return unparseTranslator;
335344
}
336345

337-
static class ColumnExprProcessor implements SemanticNodeProcessor {
346+
static class IdentifierProcessor implements SemanticNodeProcessor {
338347

339348
@Override
340349
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
341-
throws SemanticException {
350+
throws SemanticException {
342351
UnparseTranslator unparseTranslator = ((QuotedIdExpressionContext)procCtx).getUnparseTranslator();
343-
ASTNode tokTableOrColNode = (ASTNode) nd;
344-
for (int i = 0; i < tokTableOrColNode.getChildCount(); ++i) {
345-
ASTNode child = (ASTNode) tokTableOrColNode.getChild(i);
346-
if (child.getType() == HiveParser.Identifier) {
347-
unparseTranslator.addIdentifierTranslation(child);
348-
}
352+
ASTNode identifier = (ASTNode) nd;
353+
String id = identifier.getText();
354+
if (FunctionRegistry.getFunctionInfo(id) != null){
355+
return null;
349356
}
357+
358+
unparseTranslator.addIdentifierTranslation(identifier);
350359
return null;
351360
}
352361
}

ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import org.apache.hadoop.hive.ql.exec.FetchTask;
4444
import org.apache.hadoop.hive.ql.exec.Task;
4545
import org.apache.hadoop.hive.ql.exec.TaskFactory;
46+
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
4647
import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState;
4748
import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel;
4849
import org.apache.hadoop.hive.ql.plan.ExplainWork;
@@ -57,6 +58,9 @@
5758
*
5859
*/
5960
public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer {
61+
62+
public static final String EXPLAIN_ANALYZE_PROGRAM = "EXPLAIN_ANALYZE_PROGRAM";
63+
6064
List<FieldSchema> fieldList;
6165
ExplainConfiguration config;
6266

@@ -145,8 +149,8 @@ public void analyzeInternal(ASTNode ast) throws SemanticException {
145149
// step 1 (ANALYZE_STATE.RUNNING), run the query and collect the runtime #rows
146150
// step 2 (ANALYZE_STATE.ANALYZING), explain the query and provide the runtime #rows collected.
147151
if (config.getAnalyze() == AnalyzeState.RUNNING) {
148-
String query = ctx.getTokenRewriteStream().toString(input.getTokenStartIndex(),
149-
input.getTokenStopIndex());
152+
String query = HiveUtils.getSqlTextWithQuotedIdentifiers(
153+
input, ctx.getTokenRewriteStream(), EXPLAIN_ANALYZE_PROGRAM);
150154
LOG.info("Explain analyze (running phase) for query " + query);
151155
conf.unset(ValidTxnList.VALID_TXNS_KEY);
152156
conf.unset(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY);

ql/src/test/queries/clientpositive/quotedid_basic.q

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,6 @@ create table test (
4747
);
4848

4949
describe formatted test;
50+
51+
explain analyze
52+
select 1 as ` "%&'()*+,-/;<=>?[]_|{}$^!~#@```;

ql/src/test/queries/clientpositive/quotedid_basic_standard.q

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,7 @@ create table test (
6363

6464
describe formatted test;
6565

66+
explain analyze
67+
select 1 as " ""%&'()*+,-/;<=>?[]_|{}$^!~#@`";
68+
6669
set hive.support.quoted.identifiers=column;
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
FAILED: SemanticException [Error 10326]: Invalid Constraint syntax Invalid CHECK constraint expression: 3=(select count(*) from t). Invalid Constraint syntax Subqueries are not allowed in Check Constraints
1+
FAILED: SemanticException [Error 10326]: Invalid Constraint syntax Invalid CHECK constraint expression: 3=(select count(*) from `t`). Invalid Constraint syntax Subqueries are not allowed in Check Constraints

ql/src/test/results/clientpositive/beeline/explain_outputs.q.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,12 +139,12 @@ STAGE PLANS:
139139
Processor Tree:
140140
ListSink
141141

142-
PREHOOK: query: select sum(t1_n22.id) from t1_n22 join t2_n14 on (t1_n22.id=t2_n14.id)
142+
PREHOOK: query: select sum(`t1_n22`.`id`) from `t1_n22` join `t2_n14` on (`t1_n22`.`id`=`t2_n14`.`id`)
143143
PREHOOK: type: QUERY
144144
PREHOOK: Input: default@t1_n22
145145
PREHOOK: Input: default@t2_n14
146146
#### A masked pattern was here ####
147-
POSTHOOK: query: select sum(t1_n22.id) from t1_n22 join t2_n14 on (t1_n22.id=t2_n14.id)
147+
POSTHOOK: query: select sum(`t1_n22`.`id`) from `t1_n22` join `t2_n14` on (`t1_n22`.`id`=`t2_n14`.`id`)
148148
POSTHOOK: type: QUERY
149149
POSTHOOK: Input: default@t1_n22
150150
POSTHOOK: Input: default@t2_n14

ql/src/test/results/clientpositive/llap/estimate_pkfk_filtered_fk.q.out

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -120,32 +120,32 @@ POSTHOOK: Lineage: torpedos.admiral_id SCRIPT [(ships)s.FieldSchema(name:id, typ
120120
POSTHOOK: Lineage: torpedos.id SCRIPT [(ships)s.FieldSchema(name:id, type:int, comment:null), ]
121121
POSTHOOK: Lineage: torpedos.ship_id SIMPLE [(ships)s.FieldSchema(name:id, type:int, comment:null), ]
122122
PREHOOK: query: select
123-
s.id
123+
`s`.`id`
124124
from
125-
ships s,
126-
ship_types st,
127-
torpedos t
125+
`ships` `s`,
126+
`ship_types` `st`,
127+
`torpedos` `t`
128128
where
129-
st.type_name='galaxy class'
130-
and s.crew_size=2
131-
and ship_type_id=st.id
132-
and ship_id=s.id
129+
`st`.`type_name`='galaxy class'
130+
and `s`.`crew_size`=2
131+
and `ship_type_id`=`st`.`id`
132+
and `ship_id`=`s`.`id`
133133
PREHOOK: type: QUERY
134134
PREHOOK: Input: default@ship_types
135135
PREHOOK: Input: default@ships
136136
PREHOOK: Input: default@torpedos
137137
#### A masked pattern was here ####
138138
POSTHOOK: query: select
139-
s.id
139+
`s`.`id`
140140
from
141-
ships s,
142-
ship_types st,
143-
torpedos t
141+
`ships` `s`,
142+
`ship_types` `st`,
143+
`torpedos` `t`
144144
where
145-
st.type_name='galaxy class'
146-
and s.crew_size=2
147-
and ship_type_id=st.id
148-
and ship_id=s.id
145+
`st`.`type_name`='galaxy class'
146+
and `s`.`crew_size`=2
147+
and `ship_type_id`=`st`.`id`
148+
and `ship_id`=`s`.`id`
149149
POSTHOOK: type: QUERY
150150
POSTHOOK: Input: default@ship_types
151151
POSTHOOK: Input: default@ships
@@ -233,27 +233,27 @@ Stage-0
233233
default@ship_types,st,Tbl:COMPLETE,Col:COMPLETE,Output:["id","type_name"]
234234

235235
PREHOOK: query: select
236-
s.id
236+
`s`.`id`
237237
from
238-
ships s,
239-
ship_types st
238+
`ships` `s`,
239+
`ship_types` `st`
240240
where
241-
st.type_name='galaxy class'
242-
and s.crew_size=2
243-
and ship_type_id=st.id
241+
`st`.`type_name`='galaxy class'
242+
and `s`.`crew_size`=2
243+
and `ship_type_id`=`st`.`id`
244244
PREHOOK: type: QUERY
245245
PREHOOK: Input: default@ship_types
246246
PREHOOK: Input: default@ships
247247
#### A masked pattern was here ####
248248
POSTHOOK: query: select
249-
s.id
249+
`s`.`id`
250250
from
251-
ships s,
252-
ship_types st
251+
`ships` `s`,
252+
`ship_types` `st`
253253
where
254-
st.type_name='galaxy class'
255-
and s.crew_size=2
256-
and ship_type_id=st.id
254+
`st`.`type_name`='galaxy class'
255+
and `s`.`crew_size`=2
256+
and `ship_type_id`=`st`.`id`
257257
POSTHOOK: type: QUERY
258258
POSTHOOK: Input: default@ship_types
259259
POSTHOOK: Input: default@ships

0 commit comments

Comments
 (0)