Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VL] Support uuid function #5014

Merged
merged 3 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.{AggregateFunctionRewriteRule, FlushableHas
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Attribute, Cast, CreateNamedStruct, ElementAt, Expression, ExpressionInfo, Generator, GetArrayItem, GetMapValue, GetStructField, If, IsNaN, Literal, Murmur3Hash, NamedExpression, NaNvl, PosExplode, Round, SortOrder, StringSplit, StringTrim}
import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Attribute, Cast, CreateNamedStruct, ElementAt, Expression, ExpressionInfo, Generator, GetArrayItem, GetMapValue, GetStructField, If, IsNaN, Literal, Murmur3Hash, NamedExpression, NaNvl, PosExplode, Round, SortOrder, StringSplit, StringTrim, Uuid}
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, HLLAdapter}
import org.apache.spark.sql.catalyst.optimizer.BuildSide
import org.apache.spark.sql.catalyst.plans.JoinType
Expand Down Expand Up @@ -129,6 +129,16 @@ class SparkPlanExecApiImpl extends SparkPlanExecApi {
)
}

/** Transform Uuid to Substrait. */
override def genUuidTransformer(
substraitExprName: String,
original: Uuid): ExpressionTransformer = {
GenericExpressionTransformer(
substraitExprName,
Seq(LiteralTransformer(Literal(original.randomSeed.get))),
original)
}

/** Transform map_entries to Substrait. */
override def genMapEntriesTransformer(
substraitExprName: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import io.glutenproject.substrait.rel.LocalFilesNode.ReadFileFormat
import io.glutenproject.substrait.rel.LocalFilesNode.ReadFileFormat.{DwrfReadFormat, OrcReadFormat, ParquetReadFormat}

import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.expressions.{Alias, CumeDist, DenseRank, Descending, Expression, Lag, Lead, Literal, NamedExpression, NthValue, NTile, PercentRank, Rand, RangeFrame, Rank, RowNumber, SortOrder, SpecialFrameBoundary, SpecifiedWindowFrame}
import org.apache.spark.sql.catalyst.expressions.{Alias, CumeDist, DenseRank, Descending, Expression, Lag, Lead, Literal, NamedExpression, NthValue, NTile, PercentRank, Rand, RangeFrame, Rank, RowNumber, SortOrder, SpecialFrameBoundary, SpecifiedWindowFrame, Uuid}
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Count, Sum}
import org.apache.spark.sql.catalyst.plans.JoinType
import org.apache.spark.sql.catalyst.util.CharVarcharUtils
Expand Down Expand Up @@ -388,6 +388,7 @@ object BackendSettings extends BackendSettingsApi {
// Block directly falling back the below functions by FallbackEmptySchemaRelation.
case alias: Alias => checkExpr(alias.child)
case _: Rand => true
case _: Uuid => true
case _ => false
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,12 @@ class VeloxFunctionsValidateSuite extends VeloxWholeStageTransformerSuite {
}
}

test("Test uuid function") {
runQueryAndCompare("""SELECT uuid() from lineitem limit 100""".stripMargin, false) {
checkOperatorMatch[ProjectExecTransformer]
}
}

test("regexp_replace") {
runQueryAndCompare(
"SELECT regexp_replace(l_partkey, '\\w', 'something') FROM lineitem limit 100") {
Expand Down
1 change: 1 addition & 0 deletions docs/velox-backend-support-progress.md
Original file line number Diff line number Diff line change
Expand Up @@ -428,3 +428,4 @@ Gluten supports 199 functions. (Draw to right to see all data types)
| spark_partition_id | | | S | | | | | | | | | | | | | | | | | | | |
| stack | | | | | | | | | | | | | | | | | | | | | | |
| xxhash64 | xxhash64 | xxhash64 | | | | | | | | | | | | | | | | | | | | |
| uuid | uuid | uuid | S | | | | | | | | | | | | | | | | | | | |
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ trait SparkPlanExecApi {
throw new GlutenNotSupportException("NaNvl is not supported")
}

def genUuidTransformer(substraitExprName: String, original: Uuid): ExpressionTransformer = {
GenericExpressionTransformer(substraitExprName, Seq(), original)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you remind me why we distinguish the implementations of Velox and CH backend?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uuid is a leaf expression, looks CH just treat it as leaf expression and ignore the seed gerenated by spark.
Velox uuid can accept the seed parameter as a constant input. We get the seed and pass it to velox. This is the reason we distinguish the implements.
Thanks.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc: @zzcclp

}

/** Transform map_entries to Substrait. */
def genMapEntriesTransformer(
substraitExprName: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,8 @@ object ExpressionConverter extends SQLConfHelper with Logging {
val childrenTransformers =
e.children.map(replaceWithExpressionTransformerInternal(_, attributeSeq, expressionsMap))
e.getTransformer(childrenTransformers)
case u: Uuid =>
BackendsApiManager.getSparkPlanExecApiInstance.genUuidTransformer(substraitExprName, u)
case expr =>
GenericExpressionTransformer(
substraitExprName,
Expand Down
Loading