-
Notifications
You must be signed in to change notification settings - Fork 33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
#963 Unimplemented PPL Sort Syntax #994
Changes from 8 commits
46b0f95
db59830
a5f35bc
cec3f39
953a1fc
9a76e17
3639399
874c63d
0b36545
e7e8785
410b7c0
6c91f52
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,7 +58,7 @@ The example shows how to sort street numbers that are higher than 500 in ``addre | |
|
||
PPL query: | ||
|
||
os> source=accounts | parse address '(?<streetNumber>\d+) (?<street>.+)' | where cast(streetNumber as int) > 500 | sort num(streetNumber) | fields streetNumber, street ; | ||
os> source=accounts | parse address '(?<streetNumber>\d+) (?<street>.+)' | eval streetNumberInt = cast(streetNumber as integer) | where streetNumberInt > 500 | sort streetNumberInt | fields streetNumber, street ; | ||
fetched rows / total rows = 3/3 | ||
+----------------+----------------+ | ||
| streetNumber | street | | ||
|
@@ -68,6 +68,8 @@ PPL query: | |
| 880 | Holmes Lane | | ||
+----------------+----------------+ | ||
|
||
**Note**: The `sort num` syntax is not implemented. To sort numerically, cast to a numerical data type - e.g. `sort cast(streetNumber as integer)`. See [#963](https://github.com/opensearch-project/opensearch-spark/issues/963) for more details. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Someone in the future may get confused by this comment since the |
||
|
||
### Limitations | ||
|
||
There are a few limitations with parse command: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,9 +13,10 @@ import org.scalatest.matchers.should.Matchers | |
import org.apache.spark.SparkFunSuite | ||
import org.apache.spark.sql.catalyst.ScalaReflection.universe.Star | ||
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation, UnresolvedStar} | ||
import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Coalesce, Descending, GreaterThan, Literal, NamedExpression, NullsFirst, NullsLast, RegExpExtract, SortOrder} | ||
import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Cast, Coalesce, Descending, GreaterThan, Literal, NamedExpression, NullsFirst, NullsLast, RegExpExtract, SortOrder} | ||
import org.apache.spark.sql.catalyst.plans.PlanTest | ||
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, GlobalLimit, LocalLimit, Project, Sort} | ||
import org.apache.spark.sql.types.IntegerType | ||
|
||
class PPLLogicalPlanParseTranslatorTestSuite | ||
extends SparkFunSuite | ||
|
@@ -122,41 +123,47 @@ class PPLLogicalPlanParseTranslatorTestSuite | |
|
||
test("test parse email & host expressions including cast and sort commands") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @currantw plz add the corresponding IT (place holder ?) for a concrete index:
|
||
val context = new CatalystPlanContext | ||
val logPlan = | ||
planTransformer.visit( | ||
plan( | ||
pplParser, | ||
"source=t | parse address '(?<streetNumber>\\d+) (?<street>.+)' | where streetNumber > 500 | sort num(streetNumber) | fields streetNumber, street"), | ||
context) | ||
|
||
// TODO #963: Implement sort syntax | ||
val query = | ||
"source=t" + | ||
" | parse address '(?<streetNumber>\\d+) (?<street>.+)'" + | ||
" | eval streetNumberInt = cast(streetNumber as integer)" + | ||
" | where streetNumberInt > 500" + | ||
" | sort streetNumberInt" + | ||
" | fields streetNumber, street" | ||
|
||
val logPlan = planTransformer.visit(plan(pplParser, query), context) | ||
|
||
val addressAttribute = UnresolvedAttribute("address") | ||
val streetNumberAttribute = UnresolvedAttribute("streetNumber") | ||
val streetAttribute = UnresolvedAttribute("street") | ||
val streetNumberIntAttribute = UnresolvedAttribute("streetNumberInt") | ||
|
||
val streetNumberExpression = Alias( | ||
RegExpExtract( | ||
addressAttribute, | ||
Literal("(?<streetNumber>\\d+) (?<street>.+)"), | ||
Literal("1")), | ||
"streetNumber")() | ||
val regexLiteral = Literal("(?<streetNumber>\\d+) (?<street>.+)") | ||
val streetNumberExpression = | ||
Alias(RegExpExtract(addressAttribute, regexLiteral, Literal("1")), "streetNumber")() | ||
val streetExpression = | ||
Alias(RegExpExtract(addressAttribute, regexLiteral, Literal("2")), "street")() | ||
|
||
val streetExpression = Alias( | ||
RegExpExtract( | ||
addressAttribute, | ||
Literal("(?<streetNumber>\\d+) (?<street>.+)"), | ||
Literal("2")), | ||
"street")() | ||
val castExpression = Cast(streetNumberAttribute, IntegerType) | ||
|
||
val expectedPlan = Project( | ||
Seq(streetNumberAttribute, streetAttribute), | ||
Sort( | ||
Seq(SortOrder(streetNumberAttribute, Ascending, NullsFirst, Seq.empty)), | ||
Seq(SortOrder(streetNumberIntAttribute, Ascending, NullsFirst, Seq.empty)), | ||
global = true, | ||
Filter( | ||
GreaterThan(streetNumberAttribute, Literal(500)), | ||
GreaterThan(streetNumberIntAttribute, Literal(500)), | ||
Project( | ||
Seq(addressAttribute, streetNumberExpression, streetExpression, UnresolvedStar(None)), | ||
UnresolvedRelation(Seq("t")))))) | ||
Seq(UnresolvedStar(None), Alias(castExpression, "streetNumberInt")()), | ||
Project( | ||
Seq( | ||
addressAttribute, | ||
streetNumberExpression, | ||
streetExpression, | ||
UnresolvedStar(None)), | ||
UnresolvedRelation(Seq("t"))))))) | ||
|
||
assert(compareByString(expectedPlan) === compareByString(logPlan)) | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.