Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
PHILO-HE committed Mar 20, 2024
1 parent 6514392 commit 64c472d
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,13 @@ class CHSparkPlanExecApi extends SparkPlanExecApi {
CHPosExplodeTransformer(substraitExprName, child, original, attributeSeq)
}

override def genRegexpReplaceTransformer(
substraitExprName: String,
children: Seq[ExpressionTransformer],
expr: RegExpReplace): ExpressionTransformer = {
CHRegExpReplaceTransformer(substraitExprName, children, expr)
}

override def createColumnarWriteFilesExec(
child: SparkPlan,
fileFormat: FileFormat,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,28 @@ case class CHPosExplodeTransformer(
}
}
}

case class CHRegExpReplaceTransformer(
substraitExprName: String,
children: Seq[ExpressionTransformer],
original: RegExpReplace)
extends ExpressionTransformer {

override def doTransform(args: java.lang.Object): ExpressionNode = {
// In CH: replaceRegexpAll(subject, regexp, rep), which is equivalent
// In Spark: regexp_replace(subject, regexp, rep, pos=1)
val posNode = children(3).doTransform(args)
if (
!posNode.isInstanceOf[IntLiteralNode] ||
posNode.asInstanceOf[IntLiteralNode].getValue != 1
) {
throw new UnsupportedOperationException(s"$original not supported yet.")
}

GenericExpressionTransformer(
substraitExprName,
Seq(children(0), children(1), children(2)),
original)
.doTransform(args)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -466,12 +466,9 @@ class VeloxStringFunctionsSuite extends VeloxWholeStageTransformerSuite {
runQueryAndCompare(
s"select l_orderkey, regexp_replace(l_comment, '([a-z])', '1', 1) " +
s"from $LINEITEM_TABLE limit 5")(checkOperatorMatch[ProjectExecTransformer])
// todo incorrect results
runQueryAndCompare(
s"select l_orderkey, regexp_replace(l_comment, '([a-z])', '1', 10) " +
s"from $LINEITEM_TABLE limit 5",
true,
false)(_ => {})
s"from $LINEITEM_TABLE limit 5")(checkOperatorMatch[ProjectExecTransformer])
}

test("regex invalid") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,13 @@ trait SparkPlanExecApi {
throw new GlutenNotSupportException("make_timestamp is not supported")
}

def genRegexpReplaceTransformer(
substraitExprName: String,
children: Seq[ExpressionTransformer],
expr: RegExpReplace): ExpressionTransformer = {
GenericExpressionTransformer(substraitExprName, children, expr)
}

/**
* Generate ShuffleDependency for ColumnarShuffleExchangeExec.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -372,12 +372,14 @@ object ExpressionConverter extends SQLConfHelper with Logging {
s
)
case r: RegExpReplace =>
RegExpReplaceTransformer(
substraitExprName,
replaceWithExpressionTransformerInternal(r.subject, attributeSeq, expressionsMap),
replaceWithExpressionTransformerInternal(r.regexp, attributeSeq, expressionsMap),
replaceWithExpressionTransformerInternal(r.rep, attributeSeq, expressionsMap),
replaceWithExpressionTransformerInternal(r.pos, attributeSeq, expressionsMap),
BackendsApiManager.getSparkPlanExecApiInstance.genRegexpReplaceTransformer(
substraitExprName,
Seq(
replaceWithExpressionTransformerInternal(r.subject, attributeSeq, expressionsMap),
replaceWithExpressionTransformerInternal(r.regexp, attributeSeq, expressionsMap),
replaceWithExpressionTransformerInternal(r.rep, attributeSeq, expressionsMap),
replaceWithExpressionTransformerInternal(r.pos, attributeSeq, expressionsMap)
),
r
)
case equal: EqualNullSafe =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
*/
package io.glutenproject.expression

import io.glutenproject.exception.GlutenNotSupportException
import io.glutenproject.expression.ConverterUtils.FunctionConfig
import io.glutenproject.substrait.expression._

Expand Down Expand Up @@ -48,28 +47,3 @@ case class String2TrimExpressionTransformer(
ExpressionBuilder.makeScalarFunction(functionId, expressNodes, typeNode)
}
}

case class RegExpReplaceTransformer(
substraitExprName: String,
subject: ExpressionTransformer,
regexp: ExpressionTransformer,
rep: ExpressionTransformer,
pos: ExpressionTransformer,
original: RegExpReplace)
extends ExpressionTransformer {

override def doTransform(args: java.lang.Object): ExpressionNode = {
// In CH: replaceRegexpAll(subject, regexp, rep), which is equivalent
// In Spark: regexp_replace(subject, regexp, rep, pos=1)
val posNode = pos.doTransform(args)
if (
!posNode.isInstanceOf[IntLiteralNode] ||
posNode.asInstanceOf[IntLiteralNode].getValue != 1
) {
throw new GlutenNotSupportException(s"$original not supported yet.")
}

GenericExpressionTransformer(substraitExprName, Seq(subject, regexp, rep), original)
.doTransform(args)
}
}

0 comments on commit 64c472d

Please sign in to comment.