diff --git a/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala b/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala index 9bd1f938ab077..cea88266ea442 100644 --- a/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala +++ b/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala @@ -581,6 +581,13 @@ class CHSparkPlanExecApi extends SparkPlanExecApi { CHPosExplodeTransformer(substraitExprName, child, original, attributeSeq) } + override def genRegexpReplaceTransformer( + substraitExprName: String, + children: Seq[ExpressionTransformer], + expr: RegExpReplace): ExpressionTransformer = { + CHRegExpReplaceTransformer(substraitExprName, children, expr) + } + override def createColumnarWriteFilesExec( child: SparkPlan, fileFormat: FileFormat, diff --git a/backends-clickhouse/src/main/scala/io/glutenproject/expression/CHExpressionTransformer.scala b/backends-clickhouse/src/main/scala/io/glutenproject/expression/CHExpressionTransformer.scala index bb8b704d9069e..a52c67265971e 100644 --- a/backends-clickhouse/src/main/scala/io/glutenproject/expression/CHExpressionTransformer.scala +++ b/backends-clickhouse/src/main/scala/io/glutenproject/expression/CHExpressionTransformer.scala @@ -197,3 +197,28 @@ case class CHPosExplodeTransformer( } } } + +case class CHRegExpReplaceTransformer( + substraitExprName: String, + children: Seq[ExpressionTransformer], + original: RegExpReplace) + extends ExpressionTransformer { + + override def doTransform(args: java.lang.Object): ExpressionNode = { + // In CH: replaceRegexpAll(subject, regexp, rep), which is equivalent + // In Spark: regexp_replace(subject, regexp, rep, pos=1) + val posNode = children(3).doTransform(args) + if ( + !posNode.isInstanceOf[IntLiteralNode] || + posNode.asInstanceOf[IntLiteralNode].getValue != 1 + ) { + throw new UnsupportedOperationException(s"$original not supported yet.") + } + + GenericExpressionTransformer( + substraitExprName, + Seq(children(0), children(1), children(2)), + original) + .doTransform(args) + } +} diff --git a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxStringFunctionsSuite.scala b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxStringFunctionsSuite.scala index c306d70ac5190..ed1f851cae6cd 100644 --- a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxStringFunctionsSuite.scala +++ b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxStringFunctionsSuite.scala @@ -466,12 +466,9 @@ class VeloxStringFunctionsSuite extends VeloxWholeStageTransformerSuite { runQueryAndCompare( s"select l_orderkey, regexp_replace(l_comment, '([a-z])', '1', 1) " + s"from $LINEITEM_TABLE limit 5")(checkOperatorMatch[ProjectExecTransformer]) - // todo incorrect results runQueryAndCompare( s"select l_orderkey, regexp_replace(l_comment, '([a-z])', '1', 10) " + - s"from $LINEITEM_TABLE limit 5", - true, - false)(_ => {}) + s"from $LINEITEM_TABLE limit 5")(checkOperatorMatch[ProjectExecTransformer]) } test("regex invalid") { diff --git a/gluten-core/src/main/scala/io/glutenproject/backendsapi/SparkPlanExecApi.scala b/gluten-core/src/main/scala/io/glutenproject/backendsapi/SparkPlanExecApi.scala index b4466cf206f0f..e8aceb2064b5d 100644 --- a/gluten-core/src/main/scala/io/glutenproject/backendsapi/SparkPlanExecApi.scala +++ b/gluten-core/src/main/scala/io/glutenproject/backendsapi/SparkPlanExecApi.scala @@ -216,6 +216,13 @@ trait SparkPlanExecApi { throw new GlutenNotSupportException("map_entries is not supported") } + def genRegexpReplaceTransformer( + substraitExprName: String, + children: Seq[ExpressionTransformer], + expr: RegExpReplace): ExpressionTransformer = { + GenericExpressionTransformer(substraitExprName, children, expr) + } + /** * Generate ShuffleDependency for ColumnarShuffleExchangeExec. * diff --git a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala index b13786a11a6cb..a01c52eb58156 100644 --- a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala +++ b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala @@ -372,12 +372,14 @@ object ExpressionConverter extends SQLConfHelper with Logging { s ) case r: RegExpReplace => - RegExpReplaceTransformer( - substraitExprName, - replaceWithExpressionTransformerInternal(r.subject, attributeSeq, expressionsMap), - replaceWithExpressionTransformerInternal(r.regexp, attributeSeq, expressionsMap), - replaceWithExpressionTransformerInternal(r.rep, attributeSeq, expressionsMap), - replaceWithExpressionTransformerInternal(r.pos, attributeSeq, expressionsMap), + BackendsApiManager.getSparkPlanExecApiInstance.genRegexpReplaceTransformer( + substraitExprName, + Seq( + replaceWithExpressionTransformerInternal(r.subject, attributeSeq, expressionsMap), + replaceWithExpressionTransformerInternal(r.regexp, attributeSeq, expressionsMap), + replaceWithExpressionTransformerInternal(r.rep, attributeSeq, expressionsMap), + replaceWithExpressionTransformerInternal(r.pos, attributeSeq, expressionsMap) + ), r ) case equal: EqualNullSafe => diff --git a/gluten-core/src/main/scala/io/glutenproject/expression/StringExpressionTransformer.scala b/gluten-core/src/main/scala/io/glutenproject/expression/StringExpressionTransformer.scala index b1ac0ddae8c7e..3fe42e8d70392 100644 --- a/gluten-core/src/main/scala/io/glutenproject/expression/StringExpressionTransformer.scala +++ b/gluten-core/src/main/scala/io/glutenproject/expression/StringExpressionTransformer.scala @@ -16,7 +16,6 @@ */ package io.glutenproject.expression -import io.glutenproject.exception.GlutenNotSupportException import io.glutenproject.expression.ConverterUtils.FunctionConfig import io.glutenproject.substrait.expression._ @@ -48,28 +47,3 @@ case class String2TrimExpressionTransformer( ExpressionBuilder.makeScalarFunction(functionId, expressNodes, typeNode) } } - -case class RegExpReplaceTransformer( - substraitExprName: String, - subject: ExpressionTransformer, - regexp: ExpressionTransformer, - rep: ExpressionTransformer, - pos: ExpressionTransformer, - original: RegExpReplace) - extends ExpressionTransformer { - - override def doTransform(args: java.lang.Object): ExpressionNode = { - // In CH: replaceRegexpAll(subject, regexp, rep), which is equivalent - // In Spark: regexp_replace(subject, regexp, rep, pos=1) - val posNode = pos.doTransform(args) - if ( - !posNode.isInstanceOf[IntLiteralNode] || - posNode.asInstanceOf[IntLiteralNode].getValue != 1 - ) { - throw new GlutenNotSupportException(s"$original not supported yet.") - } - - GenericExpressionTransformer(substraitExprName, Seq(subject, regexp, rep), original) - .doTransform(args) - } -}