From d63d2ef5d9c66164e06aa0150ed084bd4c54d3cd Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Sat, 13 Jul 2024 15:16:10 +0800 Subject: [PATCH] Support scientific notation representation in PPL Signed-off-by: Lantao Jin --- docs/user/dql/expressions.rst | 12 ++--- docs/user/ppl/functions/expressions.rst | 45 +++++++++++++++++++ .../org/opensearch/sql/ppl/DataTypeIT.java | 28 ++++++++++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 8 +++- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 6 +++ .../sql/ppl/parser/AstExpressionBuilder.java | 6 +++ .../sql/ppl/antlr/PPLSyntaxParserTest.java | 22 +++++++++ .../ppl/parser/AstExpressionBuilderTest.java | 38 ++++++++++++++++ 8 files changed, 157 insertions(+), 8 deletions(-) diff --git a/docs/user/dql/expressions.rst b/docs/user/dql/expressions.rst index 123bba046a..a24cb02acb 100644 --- a/docs/user/dql/expressions.rst +++ b/docs/user/dql/expressions.rst @@ -32,13 +32,13 @@ Examples Here is an example for different type of literals:: - os> SELECT 123, 'hello', false, -4.567, DATE '2020-07-07', TIME '01:01:01', TIMESTAMP '2020-07-07 01:01:01'; + os> SELECT 123, 'hello', false, -4.567, 9.876E-1, DATE '2020-07-07', TIME '01:01:01', TIMESTAMP '2020-07-07 01:01:01'; fetched rows / total rows = 1/1 - +-------+-----------+---------+----------+---------------------+-------------------+-----------------------------------+ - | 123 | 'hello' | false | -4.567 | DATE '2020-07-07' | TIME '01:01:01' | TIMESTAMP '2020-07-07 01:01:01' | - |-------+-----------+---------+----------+---------------------+-------------------+-----------------------------------| - | 123 | hello | False | -4.567 | 2020-07-07 | 01:01:01 | 2020-07-07 01:01:01 | - +-------+-----------+---------+----------+---------------------+-------------------+-----------------------------------+ + +-------+-----------+---------+----------+------------+---------------------+-------------------+-----------------------------------+ + | 123 | 'hello' | false | -4.567 | 9.876E-1 | DATE '2020-07-07' | TIME '01:01:01' | TIMESTAMP '2020-07-07 01:01:01' | + |-------+-----------+---------+----------+------------+---------------------+-------------------+-----------------------------------| + | 123 | hello | False | -4.567 | 0.9876 | 2020-07-07 | 01:01:01 | 2020-07-07 01:01:01 | + +-------+-----------+---------+----------+------------+---------------------+-------------------+-----------------------------------+ os> SELECT "Hello", 'Hello', "It""s", 'It''s', "It's", '"Its"', 'It\'s', 'It\\\'s', "\I\t\s" diff --git a/docs/user/ppl/functions/expressions.rst b/docs/user/ppl/functions/expressions.rst index ac48324680..788a1dc702 100644 --- a/docs/user/ppl/functions/expressions.rst +++ b/docs/user/ppl/functions/expressions.rst @@ -14,6 +14,51 @@ Introduction Expressions, particularly value expressions, are those which return a scalar value. Expressions have different types and forms. For example, there are literal values as atom expression and arithmetic, predicate and function expression built on top of them. And also expressions can be used in different clauses, such as using arithmetic expression in ``Filter``, ``Stats`` command. +Literal Values +============== + +Description +----------- + +A literal is a symbol that represents a value. The most common literal values include: + +1. Numeric literals: specify numeric values such as integer and floating-point numbers. +2. String literals: specify a string enclosed by single or double quotes. +3. Boolean literals: ``true`` or ``false``. +4. Date and Time literals: DATE 'YYYY-MM-DD' represent the date, TIME 'hh:mm:ss' represent the time, TIMESTAMP 'YYYY-MM-DD hh:mm:ss' represent the timestamp. + +Examples +-------- + +Here is an example for different type of literals:: + + os> source=accounts | eval `123`=123, `'hello'`='hello', `false`=false, `-4.567`=-4.567, `9.876E-1`=9.876E-1, `DATE '2020-07-07'`=DATE '2020-07-07', `TIME '01:01:01'`=TIME '01:01:01', `TIMESTAMP '2020-07-07 01:01:01'`=TIMESTAMP '2020-07-07 01:01:01' | fields `123`, `'hello'`, `false`, `-4.567`, `9.876E-1`, `DATE '2020-07-07'`, `TIME '01:01:01'`, `TIMESTAMP '2020-07-07 01:01:01'` | head 1; + fetched rows / total rows = 1/1 + +-------+-----------+---------+----------+------------+---------------------+-------------------+-----------------------------------+ + | 123 | 'hello' | false | -4.567 | 9.876E-1 | DATE '2020-07-07' | TIME '01:01:01' | TIMESTAMP '2020-07-07 01:01:01' | + |-------+-----------+---------+----------+------------+---------------------+-------------------+-----------------------------------| + | 123 | hello | False | -4.567 | 0.9876 | 2020-07-07 | 01:01:01 | 2020-07-07 01:01:01 | + +-------+-----------+---------+----------+------------+---------------------+-------------------+-----------------------------------+ + + + os> source=accounts | eval `"Hello"`="Hello", `'Hello'`='Hello', `"It""s"`="It""s", `'It''s'`='It''s', `"It's"`="It's", `'"Its"'`='"Its"', `'It\'s'`='It\'s', `'It\\\'s'`='It\\\'s', `"\I\t\s"`="\I\t\s" | fields `"Hello"`, `'Hello'`, `"It""s"`, `'It''s'`, `"It's"`, `'"Its"'`, `'It\'s'`, `'It\\\'s'`, `"\I\t\s"` | head 1; + fetched rows / total rows = 1/1 + +-----------+-----------+-----------+-----------+----------+-----------+-----------+-------------+------------+ + | "Hello" | 'Hello' | "It""s" | 'It''s' | "It's" | '"Its"' | 'It\'s' | 'It\\\'s' | "\I\t\s" | + |-----------+-----------+-----------+-----------+----------+-----------+-----------+-------------+------------| + | Hello | Hello | It"s | It's | It's | "Its" | It's | It\'s | \I\t\s | + +-----------+-----------+-----------+-----------+----------+-----------+-----------+-------------+------------+ + + + os> source=accounts | eval `{DATE '2020-07-07'}`={DATE '2020-07-07'}, `{TIME '01:01:01'}`={TIME '01:01:01'}, `{TIMESTAMP '2020-07-07 01:01:01'}`={TIMESTAMP '2020-07-07 01:01:01'} | fields `{DATE '2020-07-07'}`, `{TIME '01:01:01'}`, `{TIMESTAMP '2020-07-07 01:01:01'}` | head 1; + fetched rows / total rows = 1/1 + +-----------------------+---------------------+-------------------------------------+ + | {DATE '2020-07-07'} | {TIME '01:01:01'} | {TIMESTAMP '2020-07-07 01:01:01'} | + |-----------------------+---------------------+-------------------------------------| + | 2020-07-07 | 01:01:01 | 2020-07-07 01:01:01 | + +-----------------------+---------------------+-------------------------------------+ + + Arithmetic Operators ==================== diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java index fe5c2ff270..8a4801074d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java @@ -9,7 +9,9 @@ import static org.opensearch.sql.legacy.SQLIntegTestCase.Index.DATA_TYPE_NUMERIC; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DATATYPE_NONNUMERIC; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DATATYPE_NUMERIC; +import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; @@ -75,4 +77,30 @@ public void test_long_integer_data_type() throws IOException { schema("long1", "long"), schema("long2", "long")); } + + @Test + public void test_exponent_literal_converting_to_double_type() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval `9e1` = 9e1, `+9e+1` = +9e+1, `900e-1` = 900e-1, `9.0e1` =" + + " 9.0e1, `9.0e+1` = 9.0e+1, `9.0E1` = 9.0E1, `.9e+2` = .9e+2, `0.09e+3` =" + + " 0.09e+3, `900.0e-1` = 900.0e-1, `-900.0E-1` = -900.0E-1 | fields `9e1`," + + " `+9e+1`, `900e-1`, `9.0e1`, `9.0e+1`, `9.0E1`, `.9e+2`, `0.09e+3`," + + " `900.0e-1`, `-900.0E-1`", + TEST_INDEX_DATATYPE_NUMERIC)); + verifySchema( + result, + schema("9e1", "double"), + schema("+9e+1", "double"), + schema("900e-1", "double"), + schema("9.0e1", "double"), + schema("9.0e+1", "double"), + schema("9.0E1", "double"), + schema(".9e+2", "double"), + schema("0.09e+3", "double"), + schema("900.0e-1", "double"), + schema("-900.0E-1", "double")); + verifyDataRows(result, rows(90.0, 90.0, 90.0, 90.0, 90.0, 90.0, 90.0, 90.0, 90.0, -90.0)); + } } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 9f707c13cd..f2ac2547b6 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -385,8 +385,9 @@ Y: 'Y'; //STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING; ID: ID_LITERAL; CLUSTER: CLUSTER_PREFIX_LITERAL; -INTEGER_LITERAL: DEC_DIGIT+; -DECIMAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+; +INTEGER_LITERAL: INTEGER_NUM; +DECIMAL_LITERAL: DECIMAL_NUM; +EXPONENT_LITERAL: INTEGER_NUM EXPONENT_NUM | DECIMAL_NUM EXPONENT_NUM; fragment DATE_SUFFIX: ([\-.][*0-9]+)+; fragment ID_LITERAL: [@*A-Z]+?[*A-Z_\-0-9]*; @@ -396,6 +397,9 @@ DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"'; SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\''; BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`'; fragment DEC_DIGIT: [0-9]; +fragment INTEGER_NUM: DEC_DIGIT+; +fragment DECIMAL_NUM: (DEC_DIGIT+)? '.' DEC_DIGIT+; +fragment EXPONENT_NUM: 'E' [-+]? DEC_DIGIT+; ERROR_RECOGNITION: . -> channel(ERRORCHANNEL); diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 39fb7f53a6..c66d03f48d 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -246,6 +246,7 @@ percentileApproxFunction numericLiteral : integerLiteral | decimalLiteral + | exponentLiteral ; // expressions @@ -690,6 +691,7 @@ literalValue | stringLiteral | integerLiteral | decimalLiteral + | exponentLiteral | booleanLiteral | datetimeLiteral //#datetime ; @@ -711,6 +713,10 @@ decimalLiteral : (PLUS | MINUS)? DECIMAL_LITERAL ; +exponentLiteral + : (PLUS | MINUS)? EXPONENT_LITERAL + ; + booleanLiteral : TRUE | FALSE diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 47db10c99b..79726812b1 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -20,6 +20,7 @@ import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DistinctCountFunctionCallContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.EvalClauseContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.EvalFunctionCallContext; +import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.ExponentLiteralContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FieldExpressionContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.IdentsAsQualifiedNameContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.IdentsAsTableQualifiedNameContext; @@ -343,6 +344,11 @@ public UnresolvedExpression visitDecimalLiteral(DecimalLiteralContext ctx) { return new Literal(Double.valueOf(ctx.getText()), DataType.DOUBLE); } + @Override + public UnresolvedExpression visitExponentLiteral(ExponentLiteralContext ctx) { + return new Literal(Double.valueOf(ctx.getText()), DataType.DOUBLE); + } + @Override public UnresolvedExpression visitBooleanLiteral(BooleanLiteralContext ctx) { return new Literal(Boolean.valueOf(ctx.getText()), DataType.BOOLEAN); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java index 943953d416..78b6ac729c 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java @@ -417,4 +417,26 @@ public void testCanParseTimestampdiffFunction() { new PPLSyntaxParser() .parse("SOURCE=test | eval k = TIMESTAMPDIFF(WEEK,'2003-01-02','2003-01-02')")); } + + @Test + public void testExponentLiteralShouldPass() { + List scientificNotationList = + List.of( + "9e1", + "+9e+1", + "9e-1", + "-9e1", + "9.0e1", + "9.0e+1", + "9.0E1", + ".9e+2", + "0.9e+2", + "900e-1", + "900.0E-1"); + for (String exponentLiteral : scientificNotationList) { + ParseTree tree = + new PPLSyntaxParser().parse("search source=t | eval scientific = " + exponentLiteral); + assertNotEquals(null, tree); + } + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 7bcb87d193..1f2a6fe6ec 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -502,6 +502,44 @@ public void testDoubleLiteralExpr() { "source=t b=0.1", filter(relation("t"), compare("=", field("b"), doubleLiteral(0.1)))); } + @Test + public void testExponentLiteralExpr() { + List scientificNotationList = + List.of( + "9e1", + "+9e+1", + "900e-1", + "9.0e1", + "9.0e+1", + "9.0E1", + ".9e+2", + "0.09e+3", + "900.0e-1", + "+900.0E-1"); + for (String scientificNotation : scientificNotationList) { + assertEqual( + "source=t b=" + scientificNotation, + filter(relation("t"), compare("=", field("b"), doubleLiteral(90.0)))); + } + List negativeScientificNotationList = + List.of( + "-9e1", + "-9e+1", + "-900e-1", + "-9.0e1", + "-9.0e+1", + "-9.0E1", + "-.9e+2", + "-0.09e+3", + "-900.0e-1", + "-900.0E-1"); + for (String negativeScientificNotation : negativeScientificNotationList) { + assertEqual( + "source=t b=" + negativeScientificNotation, + filter(relation("t"), compare("=", field("b"), doubleLiteral(-90.0)))); + } + } + @Test public void testBooleanLiteralExpr() { assertEqual(