Skip to content

Commit

Permalink
Add Spark CAST(integral as timestamp) (#11089)
Browse files Browse the repository at this point in the history
Summary:
Add Spark CAST (integral as timestamp). The input value is treated as the
number of seconds since the epoch (1970-01-01 00:00:00 UTC). Supported types
are tinyint, smallint, integer and bigint.

Spark's implementation: https://github.com/apache/spark/blob/v3.5.1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala#L680

Pull Request resolved: #11089

Reviewed By: amitkdutta

Differential Revision: D64805077

Pulled By: pedroerp

fbshipit-source-id: 0a551641b17540c92fb602b8a0f3524d574cb03e
  • Loading branch information
boneanxs authored and facebook-github-bot committed Oct 28, 2024
1 parent b24e1e6 commit 5180b68
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 0 deletions.
20 changes: 20 additions & 0 deletions velox/docs/functions/spark/conversion.rst
Original file line number Diff line number Diff line change
Expand Up @@ -239,3 +239,23 @@ Valid example
SELECT cast(cast(180 as smallint) as binary); -- [00 B4]
SELECT cast(cast(180000 as integer) as binary); -- [00 02 BF 20]
SELECT cast(cast(180000 as bigint) as binary); -- [00 00 00 00 00 02 BF 20]

Cast to Timestamp
-----------------

From integral types
^^^^^^^^^^^^^^^^^^^

Casting integral value to timestamp type is allowed.
The input value is treated as the number of seconds since the epoch (1970-01-01 00:00:00 UTC).
Supported types are tinyint, smallint, integer and bigint.

Valid example

::

SELECT cast(0 as timestamp); -- 1970-01-01 00:00:00
SELECT cast(1727181032 as timestamp); -- 2024-09-24 12:30:32
SELECT cast(9223372036855 as timestamp); -- 294247-01-10 04:00:54.775807
SELECT cast(-9223372036855 as timestamp); -- 290308-12-21 19:59:05.224192
10 changes: 10 additions & 0 deletions velox/expression/CastExpr-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,16 @@ void CastExpr::applyCastKernel(
try {
auto inputRowValue = input->valueAt(row);

if constexpr (
(FromKind == TypeKind::TINYINT || FromKind == TypeKind::SMALLINT ||
FromKind == TypeKind::INTEGER || FromKind == TypeKind::BIGINT) &&
ToKind == TypeKind::TIMESTAMP) {
const auto castResult =
hooks_->castIntToTimestamp((int64_t)inputRowValue);
setResultOrError(castResult, row);
return;
}

// Optimize empty input strings casting by avoiding throwing exceptions.
if constexpr (
FromKind == TypeKind::VARCHAR || FromKind == TypeKind::VARBINARY) {
Expand Down
2 changes: 2 additions & 0 deletions velox/expression/CastHooks.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class CastHooks {
virtual Expected<Timestamp> castStringToTimestamp(
const StringView& view) const = 0;

virtual Expected<Timestamp> castIntToTimestamp(int64_t seconds) const = 0;

virtual Expected<int32_t> castStringToDate(
const StringView& dateString) const = 0;

Expand Down
5 changes: 5 additions & 0 deletions velox/expression/PrestoCastHooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ Expected<Timestamp> PrestoCastHooks::castStringToTimestamp(
return result.first;
}

Expected<Timestamp> PrestoCastHooks::castIntToTimestamp(int64_t seconds) const {
return folly::makeUnexpected(
Status::UserError("Conversion to Timestamp is not supported"));
}

Expected<int32_t> PrestoCastHooks::castStringToDate(
const StringView& dateString) const {
// Cast from string to date allows only complete ISO 8601 formatted strings:
Expand Down
2 changes: 2 additions & 0 deletions velox/expression/PrestoCastHooks.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ class PrestoCastHooks : public CastHooks {
Expected<Timestamp> castStringToTimestamp(
const StringView& view) const override;

Expected<Timestamp> castIntToTimestamp(int64_t seconds) const override;

// Uses standard cast mode to cast from string to date.
Expected<int32_t> castStringToDate(
const StringView& dateString) const override;
Expand Down
15 changes: 15 additions & 0 deletions velox/functions/sparksql/specialforms/SparkCastHooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,21 @@ Expected<Timestamp> SparkCastHooks::castStringToTimestamp(
view.data(), view.size(), util::TimestampParseMode::kSparkCast);
}

Expected<Timestamp> SparkCastHooks::castIntToTimestamp(int64_t seconds) const {
// Spark internally use microsecond precision for timestamp.
// To avoid overflow, we need to check the range of seconds.
static constexpr int64_t maxSeconds = std::numeric_limits<int64_t>::max() /
(Timestamp::kMicrosecondsInMillisecond *
Timestamp::kMillisecondsInSecond);
if (seconds > maxSeconds) {
return Timestamp::fromMicrosNoError(std::numeric_limits<int64_t>::max());
}
if (seconds < -maxSeconds) {
return Timestamp::fromMicrosNoError(std::numeric_limits<int64_t>::min());
}
return Timestamp(seconds, 0);
}

Expected<int32_t> SparkCastHooks::castStringToDate(
const StringView& dateString) const {
// Allows all patterns supported by Spark:
Expand Down
4 changes: 4 additions & 0 deletions velox/functions/sparksql/specialforms/SparkCastHooks.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ class SparkCastHooks : public exec::CastHooks {
Expected<Timestamp> castStringToTimestamp(
const StringView& view) const override;

/// When casting integral value as timestamp, the input is treated as the
/// number of seconds since the epoch (1970-01-01 00:00:00 UTC).
Expected<Timestamp> castIntToTimestamp(int64_t seconds) const override;

/// 1) Removes all leading and trailing UTF8 white-spaces before cast. 2) Uses
/// non-standard cast mode to cast from string to date.
Expected<int32_t> castStringToDate(
Expand Down
46 changes: 46 additions & 0 deletions velox/functions/sparksql/tests/SparkCastExprTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,24 @@ class SparkCastExprTest : public functions::test::CastBaseTest {
72,
std::nullopt}));
}

template <typename T>
void testIntegralToTimestampCast() {
testCast(
makeNullableFlatVector<T>({
0,
1,
std::numeric_limits<T>::max(),
std::numeric_limits<T>::min(),
std::nullopt,
}),
makeNullableFlatVector<Timestamp>(
{Timestamp(0, 0),
Timestamp(1, 0),
Timestamp(std::numeric_limits<T>::max(), 0),
Timestamp(std::numeric_limits<T>::min(), 0),
std::nullopt}));
}
};

TEST_F(SparkCastExprTest, date) {
Expand Down Expand Up @@ -245,6 +263,34 @@ TEST_F(SparkCastExprTest, stringToTimestamp) {
testCast<std::string, Timestamp>("timestamp", input, expected);
}

TEST_F(SparkCastExprTest, intToTimestamp) {
// Cast bigint as timestamp.
testCast(
makeNullableFlatVector<int64_t>({
0,
1727181032,
-1727181032,
9223372036855,
-9223372036856,
std::numeric_limits<int64_t>::max(),
std::numeric_limits<int64_t>::min(),
}),
makeNullableFlatVector<Timestamp>({
Timestamp(0, 0),
Timestamp(1727181032, 0),
Timestamp(-1727181032, 0),
Timestamp(9223372036854, 775'807'000),
Timestamp(-9223372036855, 224'192'000),
Timestamp(9223372036854, 775'807'000),
Timestamp(-9223372036855, 224'192'000),
}));

// Cast tinyint/smallint/integer as timestamp.
testIntegralToTimestampCast<int8_t>();
testIntegralToTimestampCast<int16_t>();
testIntegralToTimestampCast<int32_t>();
}

TEST_F(SparkCastExprTest, primitiveInvalidCornerCases) {
// To integer.
{
Expand Down

0 comments on commit 5180b68

Please sign in to comment.