From 212febced1a8f8d1cbf241327924dc6cf7c1f8c9 Mon Sep 17 00:00:00 2001 From: wforget <643348094@qq.com> Date: Fri, 10 Jan 2025 20:35:06 +0800 Subject: [PATCH 1/3] [GLUTEN-8499][VL] Fix the logic of cast expression --- .../gluten/execution/ScalarFunctionsValidateSuite.scala | 4 ++-- .../org/apache/gluten/expression/ExpressionConverter.scala | 3 +++ .../apache/gluten/expression/UnaryExpressionTransformer.scala | 2 +- .../org/apache/gluten/sql/shims/spark34/Spark34Shims.scala | 1 + .../org/apache/gluten/sql/shims/spark35/Spark35Shims.scala | 1 + 5 files changed, 8 insertions(+), 3 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index a0c1f1ec000b..9a4da9aaf70b 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -1482,13 +1482,13 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { withTempView("try_cast_table") { withTempPath { path => - Seq[(String)](("123456"), ("000A1234")) + Seq[(String)](("123456"), ("000A1234"), ("1.1")) .toDF("str") .write .parquet(path.getCanonicalPath) spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("try_cast_table") runQueryAndCompare("select try_cast(str as bigint) from try_cast_table") { - checkGlutenOperatorMatch[ProjectExecTransformer] + checkSparkOperatorMatch[ProjectExec] } runQueryAndCompare("select try_cast(str as double) from try_cast_table") { checkGlutenOperatorMatch[ProjectExecTransformer] diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala index e4aeb3ef78a9..cbd492289658 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala @@ -337,6 +337,9 @@ object ExpressionConverter extends SQLConfHelper with Logging { case s: ScalarSubquery => ScalarSubqueryTransformer(substraitExprName, s) case c: Cast => + if (SparkShimLoader.getSparkShims.withAnsiEvalMode(c)) { + throw new GlutenNotSupportException(s"Cast expression does not support ANSI mode, $c") + } // Add trim node, as necessary. val newCast = BackendsApiManager.getSparkPlanExecApiInstance.genCastWithNewChild(c) diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala index f9eb1e8eab42..12c06ee37d24 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala @@ -47,7 +47,7 @@ case class CastTransformer(substraitExprName: String, child: ExpressionTransform ExpressionBuilder.makeCast( typeNode, child.doTransform(args), - SparkShimLoader.getSparkShims.withAnsiEvalMode(original)) + SparkShimLoader.getSparkShims.withTryEvalMode(original)) } } diff --git a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala index bedad4c01741..7b30d1230250 100644 --- a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala +++ b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala @@ -463,6 +463,7 @@ class Spark34Shims extends SparkShims { case s: Subtract => s.evalMode == EvalMode.ANSI case d: Divide => d.evalMode == EvalMode.ANSI case m: Multiply => m.evalMode == EvalMode.ANSI + case c: Cast => c.ansiEnabled case _ => false } } diff --git a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala index 43ed51579a1b..b45c0ecfaf22 100644 --- a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala +++ b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala @@ -488,6 +488,7 @@ class Spark35Shims extends SparkShims { case s: Subtract => s.evalMode == EvalMode.ANSI case d: Divide => d.evalMode == EvalMode.ANSI case m: Multiply => m.evalMode == EvalMode.ANSI + case c: Cast => c.ansiEnabled case _ => false } } From d20b460dfe66dd2e0c8fa228ece9232770a6feb4 Mon Sep 17 00:00:00 2001 From: wforget <643348094@qq.com> Date: Fri, 10 Jan 2025 21:41:29 +0800 Subject: [PATCH 2/3] fix --- .../apache/gluten/execution/ScalarFunctionsValidateSuite.scala | 2 +- .../apache/gluten/expression/UnaryExpressionTransformer.scala | 2 +- .../org/apache/gluten/sql/shims/spark34/Spark34Shims.scala | 1 + .../org/apache/gluten/sql/shims/spark35/Spark35Shims.scala | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index 9a4da9aaf70b..a43650f39eb9 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -1482,7 +1482,7 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { withTempView("try_cast_table") { withTempPath { path => - Seq[(String)](("123456"), ("000A1234"), ("1.1")) + Seq[(String)](("123456"), ("000A1234"), ("1.1"), ("1a.1")) .toDF("str") .write .parquet(path.getCanonicalPath) diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala index 12c06ee37d24..2c6c37295edc 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala @@ -47,7 +47,7 @@ case class CastTransformer(substraitExprName: String, child: ExpressionTransform ExpressionBuilder.makeCast( typeNode, child.doTransform(args), - SparkShimLoader.getSparkShims.withTryEvalMode(original)) + !SparkShimLoader.getSparkShims.withTryEvalMode(original)) } } diff --git a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala index 7b30d1230250..7c89826f112a 100644 --- a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala +++ b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala @@ -453,6 +453,7 @@ class Spark34Shims extends SparkShims { case s: Subtract => s.evalMode == EvalMode.TRY case d: Divide => d.evalMode == EvalMode.TRY case m: Multiply => m.evalMode == EvalMode.TRY + case c: Cast => c.isTryCast case _ => false } } diff --git a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala index b45c0ecfaf22..d3b4ce8fa4b1 100644 --- a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala +++ b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala @@ -478,6 +478,7 @@ class Spark35Shims extends SparkShims { case s: Subtract => s.evalMode == EvalMode.TRY case d: Divide => d.evalMode == EvalMode.TRY case m: Multiply => m.evalMode == EvalMode.TRY + case c: Cast => c.isTryCast case _ => false } } From ce5ce1a36c2354dd3d9243be8922ebc94585cd49 Mon Sep 17 00:00:00 2001 From: wforget <643348094@qq.com> Date: Sat, 11 Jan 2025 09:30:32 +0800 Subject: [PATCH 3/3] fix --- .../apache/gluten/expression/UnaryExpressionTransformer.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala index 2c6c37295edc..54ffd1689cc9 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala @@ -47,7 +47,8 @@ case class CastTransformer(substraitExprName: String, child: ExpressionTransform ExpressionBuilder.makeCast( typeNode, child.doTransform(args), - !SparkShimLoader.getSparkShims.withTryEvalMode(original)) + SparkShimLoader.getSparkShims.withAnsiEvalMode(original) && + !SparkShimLoader.getSparkShims.withTryEvalMode(original)) } }