From 63e83bd30f112ae4f305b4decc087153726f94ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AB=98=E9=98=B3=E9=98=B3?= Date: Fri, 22 Mar 2024 20:53:45 +0800 Subject: [PATCH] [CORE] Add support for Spark url_decode function (#5070) --- .../io/glutenproject/utils/CHExpressionUtil.scala | 1 + .../execution/VeloxFunctionsValidateSuite.scala | 15 +++++++++++++++ .../expression/ExpressionConverter.scala | 14 ++++++++++++++ .../expression/ExpressionNames.scala | 1 + 4 files changed, 31 insertions(+) diff --git a/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala b/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala index 24555c05c1a0..028e4e9e9dba 100644 --- a/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala +++ b/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala @@ -177,6 +177,7 @@ object CHExpressionUtil { DATE_FROM_UNIX_DATE -> DefaultValidator(), MONOTONICALLY_INCREASING_ID -> DefaultValidator(), SPARK_PARTITION_ID -> DefaultValidator(), + URL_DECODE -> DefaultValidator(), SKEWNESS -> DefaultValidator(), BIT_LENGTH -> DefaultValidator() ) diff --git a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala index 1e9871a6c422..ef9c80c4e97f 100644 --- a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala @@ -458,6 +458,21 @@ class VeloxFunctionsValidateSuite extends VeloxWholeStageTransformerSuite { } } + testWithSpecifiedSparkVersion("Test url_decode function", Some("3.4.2")) { + withTempPath { + path => + Seq("https%3A%2F%2Fspark.apache.org") + .toDF("a") + .write + .parquet(path.getCanonicalPath) + spark.sparkContext.setLogLevel("info") + spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("url_tbl") + runQueryAndCompare("select url_decode(a) from url_tbl") { + checkOperatorMatch[ProjectExecTransformer] + } + } + } + test("Test hex function") { runQueryAndCompare("SELECT hex(l_partkey), hex(l_shipmode) FROM lineitem limit 1") { checkOperatorMatch[ProjectExecTransformer] diff --git a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala index f8f3ead05739..4de0ab14234f 100644 --- a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala +++ b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala @@ -28,6 +28,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke import org.apache.spark.sql.catalyst.optimizer.NormalizeNaNAndZero import org.apache.spark.sql.execution.{ScalarSubquery, _} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec @@ -115,6 +116,19 @@ object ExpressionConverter extends SQLConfHelper with Logging { return replaceScalaUDFWithExpressionTransformer(s, attributeSeq, expressionsMap) case _ if HiveUDFTransformer.isHiveUDF(expr) => return HiveUDFTransformer.replaceWithExpressionTransformer(expr, attributeSeq) + case i: StaticInvoke => + val objectName = i.staticObject.getName.stripSuffix("$") + if (objectName.endsWith("UrlCodec")) { + val child = i.arguments(0) + i.functionName match { + case "decode" => + return GenericExpressionTransformer( + ExpressionNames.URL_DECODE, + child.map( + replaceWithExpressionTransformerInternal(_, attributeSeq, expressionsMap)), + i) + } + } case _ => } diff --git a/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala b/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala index cb9e1ab71cd7..2f339162913b 100644 --- a/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala +++ b/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala @@ -117,6 +117,7 @@ object ExpressionNames { // URL functions final val PARSE_URL = "parse_url" + final val URL_DECODE = "url_decode" // SparkSQL Math functions final val ABS = "abs"