Skip to content

Commit

Permalink
[CH] Support shuffle function (apache#5432)
Browse files Browse the repository at this point in the history
What changes were proposed in this pull request?
How was this patch tested?
Pass CI
  • Loading branch information
exmy authored Apr 18, 2024
1 parent 3e5742a commit 65dd411
Show file tree
Hide file tree
Showing 12 changed files with 51 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,22 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends GlutenClickHouseTPCHAbstr
}
}

test("test shuffle function") {
withSQLConf(
SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> (ConstantFolding.ruleName + "," + NullPropagation.ruleName)) {
runQueryAndCompare(
"select shuffle(split(n_comment, ' ')) from nation",
compareResult = false
)(checkGlutenOperatorMatch[ProjectExecTransformer])

runQueryAndCompare(
"select shuffle(array(1,2,3,4,5)), shuffle(array(1,3,null,3,4)), shuffle(null)",
compareResult = false,
noFallBack = false
)(checkGlutenOperatorMatch[ProjectExecTransformer])
}
}

test("test 'function regexp_extract_all'") {
runQueryAndCompare(
"select l_orderkey, regexp_extract_all(l_comment, '([a-z])', 1) " +
Expand Down
1 change: 1 addition & 0 deletions cpp-ch/local-engine/Parser/SerializedPlanParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ static const std::map<std::string, std::string> SCALAR_FUNCTIONS

// array functions
{"array", "array"},
{"shuffle", "arrayShuffle"},
{"range", "range"}, /// dummy mapping

// map functions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ object ExpressionMappings {
Sig[ArrayRepeat](ARRAY_REPEAT),
Sig[ArrayRemove](ARRAY_REMOVE),
Sig[ArrayFilter](FILTER),
Sig[Shuffle](SHUFFLE),
// Map functions
Sig[CreateMap](CREATE_MAP),
Sig[GetMapValue](GET_MAP_VALUE),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("transform values function - test empty")
.exclude("SPARK-14393: values generated by non-deterministic functions shouldn't change after coalesce or union")
.exclude("SPARK-24734: Fix containsNull of Concat for array type")
.exclude("shuffle function - array for primitive type not containing null")
.exclude("shuffle function - array for primitive type containing null")
.exclude("shuffle function - array for non-primitive type")
enableSuite[GlutenDataFrameHintSuite]
enableSuite[GlutenDataFrameImplicitsSuite]
enableSuite[GlutenDataFrameJoinSuite].exclude(
Expand Down Expand Up @@ -686,6 +689,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-36755: ArraysOverlap hould handle duplicated Double.NaN and Float.Nan")
.exclude(
"SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then non-NaN value")
.excludeGlutenTest("Shuffle")
enableSuite[GlutenComplexTypeSuite]
.exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
.exclude("SPARK-33460: GetMapValue NoSuchElementException")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenCollectionExpressionsSuite]
// Rewrite in Gluten to replace Seq with Array
.exclude("Shuffle")
.excludeGlutenTest("Shuffle")
// TODO: ArrayDistinct should handle duplicated Double.NaN
.excludeByPrefix("SPARK-36741")
// TODO: ArrayIntersect should handle duplicated Double.NaN
Expand Down Expand Up @@ -273,6 +274,9 @@ class VeloxTestSettings extends BackendTestSettings {
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
.exclude("aggregate function - array for non-primitive type")
.exclude("shuffle function - array for primitive type not containing null")
.exclude("shuffle function - array for primitive type containing null")
.exclude("shuffle function - array for non-primitive type")
enableSuite[GlutenDataFrameTungstenSuite]
enableSuite[GlutenDataFrameSetOperationsSuite]
// Result depends on the implementation for nondeterministic expression rand.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("transform values function - test empty")
.exclude("SPARK-14393: values generated by non-deterministic functions shouldn't change after coalesce or union")
.exclude("SPARK-24734: Fix containsNull of Concat for array type")
.exclude("shuffle function - array for primitive type not containing null")
.exclude("shuffle function - array for primitive type containing null")
.exclude("shuffle function - array for non-primitive type")
enableSuite[GlutenDataFrameHintSuite]
enableSuite[GlutenDataFrameImplicitsSuite]
enableSuite[GlutenDataFrameJoinSuite].exclude(
Expand Down Expand Up @@ -727,6 +730,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude(
"SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then non-NaN value")
.exclude("SPARK-39184: Avoid ArrayIndexOutOfBoundsException when crossing DST boundary")
.excludeGlutenTest("Shuffle")
enableSuite[GlutenComplexTypeSuite]
.exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
.exclude("SPARK-33460: GetMapValue NoSuchElementException")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenCollectionExpressionsSuite]
// Rewrite in Gluten to replace Seq with Array
.exclude("Shuffle")
.excludeGlutenTest("Shuffle")
// TODO: ArrayDistinct should handle duplicated Double.NaN
.excludeByPrefix("SPARK-36741")
// TODO: ArrayIntersect should handle duplicated Double.NaN
Expand Down Expand Up @@ -938,6 +939,9 @@ class VeloxTestSettings extends BackendTestSettings {
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
.exclude("aggregate function - array for non-primitive type")
.exclude("shuffle function - array for primitive type not containing null")
.exclude("shuffle function - array for primitive type containing null")
.exclude("shuffle function - array for non-primitive type")
enableSuite[GlutenDataFrameHintSuite]
enableSuite[GlutenDataFrameImplicitsSuite]
enableSuite[GlutenDataFrameJoinSuite]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("transform values function - test empty")
.exclude("SPARK-14393: values generated by non-deterministic functions shouldn't change after coalesce or union")
.exclude("SPARK-24734: Fix containsNull of Concat for array type")
.exclude("shuffle function - array for primitive type not containing null")
.exclude("shuffle function - array for primitive type containing null")
.exclude("shuffle function - array for non-primitive type")
enableSuite[GlutenDataFrameHintSuite]
enableSuite[GlutenDataFrameImplicitsSuite]
enableSuite[GlutenDataFrameJoinSuite].exclude(
Expand Down Expand Up @@ -567,6 +570,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude(
"SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then non-NaN value")
.exclude("SPARK-39184: Avoid ArrayIndexOutOfBoundsException when crossing DST boundary")
.excludeGlutenTest("Shuffle")
enableSuite[GlutenComplexTypeSuite]
.exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
.exclude("SPARK-33460: GetMapValue NoSuchElementException")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenCollectionExpressionsSuite]
// Rewrite in Gluten to replace Seq with Array
.exclude("Shuffle")
.excludeGlutenTest("Shuffle")
// TODO: ArrayDistinct should handle duplicated Double.NaN
.excludeByPrefix("SPARK-36741")
// TODO: ArrayIntersect should handle duplicated Double.NaN
Expand Down Expand Up @@ -943,6 +944,9 @@ class VeloxTestSettings extends BackendTestSettings {
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
.exclude("aggregate function - array for non-primitive type")
.exclude("shuffle function - array for primitive type not containing null")
.exclude("shuffle function - array for primitive type containing null")
.exclude("shuffle function - array for non-primitive type")
enableSuite[GlutenDataFrameHintSuite]
enableSuite[GlutenDataFrameImplicitsSuite]
enableSuite[GlutenDataFrameJoinSuite]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("transform values function - test empty")
.exclude("SPARK-14393: values generated by non-deterministic functions shouldn't change after coalesce or union")
.exclude("SPARK-24734: Fix containsNull of Concat for array type")
.exclude("shuffle function - array for primitive type not containing null")
.exclude("shuffle function - array for primitive type containing null")
.exclude("shuffle function - array for non-primitive type")
enableSuite[GlutenDataFrameHintSuite]
enableSuite[GlutenDataFrameImplicitsSuite]
enableSuite[GlutenDataFrameJoinSuite].exclude(
Expand Down Expand Up @@ -567,6 +570,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude(
"SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then non-NaN value")
.exclude("SPARK-39184: Avoid ArrayIndexOutOfBoundsException when crossing DST boundary")
.excludeGlutenTest("Shuffle")
enableSuite[GlutenComplexTypeSuite]
.exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
.exclude("SPARK-33460: GetMapValue NoSuchElementException")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenCollectionExpressionsSuite]
// Rewrite in Gluten to replace Seq with Array
.exclude("Shuffle")
.excludeGlutenTest("Shuffle")
// TODO: ArrayDistinct should handle duplicated Double.NaN
.excludeByPrefix("SPARK-36741")
// TODO: ArrayIntersect should handle duplicated Double.NaN
Expand Down Expand Up @@ -959,6 +960,9 @@ class VeloxTestSettings extends BackendTestSettings {
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
.exclude("aggregate function - array for non-primitive type")
.exclude("shuffle function - array for primitive type not containing null")
.exclude("shuffle function - array for primitive type containing null")
.exclude("shuffle function - array for non-primitive type")
enableSuite[GlutenDataFrameHintSuite]
enableSuite[GlutenDataFrameImplicitsSuite]
enableSuite[GlutenDataFrameJoinSuite]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ object ExpressionNames {
final val ARRAY_REPEAT = "array_repeat"
final val ARRAY_REMOVE = "array_remove"
final val FILTER = "filter"
final val SHUFFLE = "shuffle"

// Map functions
final val CREATE_MAP = "map"
Expand Down

0 comments on commit 65dd411

Please sign in to comment.