Skip to content

Commit baf4b59

Browse files
committed
[SPARK-51482][SQL] Support cast from string to time
### What changes were proposed in this pull request? In the PR, I propose to support `CAST` from `STRING` to `TIME(n)`. The format of input strings should match to: ``` [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us] ``` ### Why are the changes needed? Before the changes, such casting allowed by the SQL standard failed w/ the error: ```scala scala> Seq("17:18:19").toDS.select($"value".cast(TimeType())).show() org.apache.spark.sql.catalyst.ExtendedAnalysisException: [DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION] Cannot resolve "CAST(value AS TIME(6))" due to data type mismatch: cannot cast "STRING" to "TIME(6)". SQLSTATE: 42K09; ``` ### Does this PR introduce _any_ user-facing change? Yes. After the changes, the cast above works as expected: ```scala scala> Seq("17:18:19").toDS.select($"value".cast(TimeType())).show() +--------+ | value| +--------+ |17:18:19| +--------+ ``` ### How was this patch tested? By running the new tests: ``` $ build/sbt "test:testOnly *CastWithAnsiOffSuite" $ build/sbt "test:testOnly *CastWithAnsiOnSuite" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#50236 from MaxGekk/string-to-time. Authored-by: Max Gekk <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent d87851f commit baf4b59

File tree

4 files changed

+67
-0
lines changed

4 files changed

+67
-0
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

+41
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ object Cast extends QueryErrorsBase {
119119
case (_: YearMonthIntervalType, _: YearMonthIntervalType) => true
120120

121121
case (_: StringType, DateType) => true
122+
case (_: StringType, _: TimeType) => true
122123
case (TimestampType, DateType) => true
123124
case (TimestampNTZType, DateType) => true
124125

@@ -219,6 +220,7 @@ object Cast extends QueryErrorsBase {
219220
case (TimestampType, TimestampNTZType) => true
220221

221222
case (_: StringType, DateType) => true
223+
case (_: StringType, _: TimeType) => true
222224
case (TimestampType, DateType) => true
223225
case (TimestampNTZType, DateType) => true
224226

@@ -727,6 +729,15 @@ case class Cast(
727729
buildCast[Long](_, t => microsToDays(t, ZoneOffset.UTC))
728730
}
729731

732+
private[this] def castToTime(from: DataType): Any => Any = from match {
733+
case _: StringType =>
734+
if (ansiEnabled) {
735+
buildCast[UTF8String](_, s => DateTimeUtils.stringToTimeAnsi(s, getContextOrNull()))
736+
} else {
737+
buildCast[UTF8String](_, s => DateTimeUtils.stringToTime(s).orNull)
738+
}
739+
}
740+
730741
// IntervalConverter
731742
private[this] def castToInterval(from: DataType): Any => Any = from match {
732743
case _: StringType =>
@@ -1134,6 +1145,7 @@ case class Cast(
11341145
case s: StringType => castToString(from, s.constraint)
11351146
case BinaryType => castToBinary(from)
11361147
case DateType => castToDate(from)
1148+
case _: TimeType => castToTime(from)
11371149
case decimal: DecimalType => castToDecimal(from, decimal)
11381150
case TimestampType => castToTimestamp(from)
11391151
case TimestampNTZType => castToTimestampNTZ(from)
@@ -1241,6 +1253,7 @@ case class Cast(
12411253
(c, evPrim, _) => castToStringCode(from, ctx, s.constraint).apply(c, evPrim)
12421254
case BinaryType => castToBinaryCode(from)
12431255
case DateType => castToDateCode(from, ctx)
1256+
case _: TimeType => castToTimeCode(from, ctx)
12441257
case decimal: DecimalType => castToDecimalCode(from, decimal, ctx)
12451258
case TimestampType => castToTimestampCode(from, ctx)
12461259
case TimestampNTZType => castToTimestampNTZCode(from, ctx)
@@ -1335,6 +1348,34 @@ case class Cast(
13351348
}
13361349
}
13371350

1351+
private[this] def castToTimeCode(
1352+
from: DataType,
1353+
ctx: CodegenContext): CastFunction = {
1354+
from match {
1355+
case _: StringType =>
1356+
val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]])
1357+
(c, evPrim, evNull) =>
1358+
if (ansiEnabled) {
1359+
val errorContext = getContextOrNullCode(ctx)
1360+
code"""
1361+
$evPrim = $dateTimeUtilsCls.stringToTimeAnsi($c, $errorContext);
1362+
"""
1363+
} else {
1364+
code"""
1365+
scala.Option<Long> $longOpt = $dateTimeUtilsCls.stringToTime($c);
1366+
if ($longOpt.isDefined()) {
1367+
$evPrim = ((Long) $longOpt.get()).longValue();
1368+
} else {
1369+
$evNull = true;
1370+
}
1371+
"""
1372+
}
1373+
1374+
case _ =>
1375+
(_, _, evNull) => code"$evNull = true;"
1376+
}
1377+
}
1378+
13381379
private[this] def changePrecision(
13391380
d: ExprValue,
13401381
decimalType: DecimalType,

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala

+12
Original file line numberDiff line numberDiff line change
@@ -1469,4 +1469,16 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
14691469
checkEvaluation(Cast(Literal(time), StringType), expectedStr)
14701470
}
14711471
}
1472+
1473+
test("cast string to time") {
1474+
checkEvaluation(cast(Literal.create("0:0:0"), TimeType()), 0L)
1475+
checkEvaluation(cast(Literal.create(" 01:2:3.01 "), TimeType(2)), localTime(1, 2, 3, 10000))
1476+
checkEvaluation(cast(Literal.create(" 12:13:14.999"),
1477+
TimeType(3)), localTime(12, 13, 14, 999 * 1000))
1478+
checkEvaluation(cast(Literal.create("23:0:59.0001 "), TimeType(4)), localTime(23, 0, 59, 100))
1479+
checkEvaluation(cast(Literal.create("23:59:0.99999"),
1480+
TimeType(5)), localTime(23, 59, 0, 999990))
1481+
checkEvaluation(cast(Literal.create("23:59:59.000001 "),
1482+
TimeType(6)), localTime(23, 59, 59, 1))
1483+
}
14721484
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala

+6
Original file line numberDiff line numberDiff line change
@@ -901,4 +901,10 @@ class CastWithAnsiOffSuite extends CastSuiteBase {
901901
castOverflowErrMsg(toType))
902902
}
903903
}
904+
905+
test("cast invalid string input to time") {
906+
Seq("a", "123", "00:00:00ABC", "24:00:00").foreach { invalidInput =>
907+
checkEvaluation(cast(invalidInput, TimeType()), null)
908+
}
909+
}
904910
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOnSuite.scala

+8
Original file line numberDiff line numberDiff line change
@@ -737,4 +737,12 @@ class CastWithAnsiOnSuite extends CastSuiteBase with QueryErrorsBase {
737737
val input = Literal.create(Decimal(0.000000123), DecimalType(9, 9))
738738
checkEvaluation(cast(input, StringType), "0.000000123")
739739
}
740+
741+
test("cast invalid string input to time") {
742+
Seq("a", "123", "00:00:00ABC", "24:00:00").foreach { invalidInput =>
743+
checkExceptionInExpression[DateTimeException](
744+
cast(invalidInput, TimeType()),
745+
castErrMsg(invalidInput, TimeType()))
746+
}
747+
}
740748
}

0 commit comments

Comments
 (0)