From d8c54aa0f9452c4e62b93c31d819193c6f6c8d60 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Sat, 19 Oct 2024 11:07:28 +0800 Subject: [PATCH] Support line comment and block comment in PPL Signed-off-by: Lantao Jin --- docs/ppl-lang/PPL-Example-Commands.md | 5 ++ docs/ppl-lang/README.md | 2 + docs/ppl-lang/ppl-comment.md | 34 ++++++++ .../ppl/FlintSparkPPLCommentITSuite.scala | 79 +++++++++++++++++++ .../src/main/antlr4/OpenSearchPPLLexer.g4 | 2 + ...lPlanBasicQueriesTranslatorTestSuite.scala | 42 ++++++++++ 6 files changed, 164 insertions(+) create mode 100644 docs/ppl-lang/ppl-comment.md create mode 100644 integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLCommentITSuite.scala diff --git a/docs/ppl-lang/PPL-Example-Commands.md b/docs/ppl-lang/PPL-Example-Commands.md index 96eeef726..2bd56df89 100644 --- a/docs/ppl-lang/PPL-Example-Commands.md +++ b/docs/ppl-lang/PPL-Example-Commands.md @@ -1,5 +1,10 @@ ## Example PPL Queries +#### **Comment** +[See additional command details](ppl-comment.md) +- `source=accounts | top gender // finds most common gender of all the accounts` (line comment) +- `source=accounts | dedup 2 gender /* dedup the document with gender field keep 2 duplication */ | fields account_number, gender` (block comment) + #### **Describe** - `describe table` This command is equal to the `DESCRIBE EXTENDED table` SQL command - `describe schema.table` diff --git a/docs/ppl-lang/README.md b/docs/ppl-lang/README.md index 2ddceca0a..9054a79f5 100644 --- a/docs/ppl-lang/README.md +++ b/docs/ppl-lang/README.md @@ -22,6 +22,8 @@ For additional examples see the next [documentation](PPL-Example-Commands.md). * **Commands** + - [`comment`](ppl-comment.md) + - [`explain command `](PPL-Example-Commands.md/#explain) - [`dedup command `](ppl-dedup-command.md) diff --git a/docs/ppl-lang/ppl-comment.md b/docs/ppl-lang/ppl-comment.md new file mode 100644 index 000000000..3a869955b --- /dev/null +++ b/docs/ppl-lang/ppl-comment.md @@ -0,0 +1,34 @@ +## Comments + +Comments are not evaluated texts. PPL supports both line comments and block comments. + +### Line Comments + +Line comments begin with two slashes `//` and end with a new line. + +Example:: + + os> source=accounts | top gender // finds most common gender of all the accounts + fetched rows / total rows = 2/2 + +----------+ + | gender | + |----------| + | M | + | F | + +----------+ + +### Block Comments + +Block comments begin with a slash followed by an asterisk `\*` and end with an asterisk followed by a slash `*/`. + +Example:: + + os> source=accounts | dedup 2 gender /* dedup the document with gender field keep 2 duplication */ | fields account_number, gender + fetched rows / total rows = 3/3 + +------------------+----------+ + | account_number | gender | + |------------------+----------| + | 1 | M | + | 6 | M | + | 13 | F | + +------------------+----------+ \ No newline at end of file diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLCommentITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLCommentITSuite.scala new file mode 100644 index 000000000..71d9f1693 --- /dev/null +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLCommentITSuite.scala @@ -0,0 +1,79 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.flint.spark.ppl + +import org.opensearch.sql.ppl.utils.DataTypeTransformer.seq + +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation, UnresolvedStar} +import org.apache.spark.sql.catalyst.expressions.{Alias, And, Ascending, CaseWhen, Descending, EqualTo, GreaterThanOrEqual, LessThan, Literal, SortOrder} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.streaming.StreamTest + +class FlintSparkPPLCommentITSuite + extends QueryTest + with LogicalPlanTestUtils + with FlintPPLSuite + with StreamTest { + + private val testTable = "spark_catalog.default.flint_ppl_test" + + override def beforeAll(): Unit = { + super.beforeAll() + + createPartitionedStateCountryTable(testTable) + } + + protected override def afterEach(): Unit = { + super.afterEach() + spark.streams.active.foreach { job => + job.stop() + job.awaitTermination() + } + } + + test("test line comment") { + val frame = sql(s""" + | /* + | * This is a + | * multiple + | * line block + | * comment + | */ + | source = /* block comment */ $testTable /* block comment */ + | | eval /* + | This is a + | multiple + | line + | block + | comment + | */ col = 1 + | | /* block comment */ fields name, /* block comment */ age + | /* block comment */ + | """.stripMargin) + + val results: Array[Row] = frame.collect() + val expectedResults: Array[Row] = + Array(Row("Jake", 70), Row("Hello", 30), Row("John", 25), Row("Jane", 20)) + implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, String](_.getAs[String](0)) + assert(results.sorted.sameElements(expectedResults.sorted)) + } + + test("test block comment") { + val frame = sql(s""" + | source = $testTable //line comment + | | eval col = 1 // line comment + | | fields name, age // line comment + | /////////line comment + | """.stripMargin) + + val results: Array[Row] = frame.collect() + val expectedResults: Array[Row] = + Array(Row("Jake", 70), Row("Hello", 30), Row("John", 25), Row("Jane", 20)) + implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, String](_.getAs[String](0)) + assert(results.sorted.sameElements(expectedResults.sorted)) + } +} diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 index 2b41530f0..f62553d4c 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 @@ -441,5 +441,7 @@ SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\'' BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`'; fragment DEC_DIGIT: [0-9]; +LINE_COMMENT: '//' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN); +BLOCK_COMMENT: '/*' .*? '*/' -> channel(HIDDEN); ERROR_RECOGNITION: . -> channel(ERRORCHANNEL); diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBasicQueriesTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBasicQueriesTranslatorTestSuite.scala index 96176982e..2da93d5d8 100644 --- a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBasicQueriesTranslatorTestSuite.scala +++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanBasicQueriesTranslatorTestSuite.scala @@ -354,4 +354,46 @@ class PPLLogicalPlanBasicQueriesTranslatorTestSuite thrown.getMessage === "[Field(field=A, fieldArgs=[]), Field(field=B, fieldArgs=[])] can't be resolved") } + + test("test line comment should pass without exceptions") { + val context = new CatalystPlanContext + planTransformer.visit(plan(pplParser, "source=t a=1 b=2 //this is a comment"), context) + planTransformer.visit(plan(pplParser, "source=t a=1 b=2 // this is a comment "), context) + planTransformer.visit( + plan( + pplParser, + """ + | // test is a new line comment + | source=t a=1 b=2 // test is a line comment at the end of ppl command + | | fields a,b // this is line comment inner ppl command + | ////this is a new line comment + |""".stripMargin), + context) + } + + test("test block comment should pass without exceptions") { + val context = new CatalystPlanContext + planTransformer.visit(plan(pplParser, "source=t a=1 b=2 /*block comment*/"), context) + planTransformer.visit(plan(pplParser, "source=t a=1 b=2 /* block comment */"), context) + planTransformer.visit( + plan( + pplParser, + """ + | /* + | * This is a + | * multiple + | * line block + | * comment + | */ + | search /* block comment */ source=t /* block comment */ a=1 b=2 + | | /* + | This is a + | multiple + | line + | block + | comment */ fields a,b /* block comment */ + | /* block comment */ + |""".stripMargin), + context) + } }