facebookincubator · boneanxs · Nov 7, 2024 · rui-mo · Nov 12, 2024 · jinchengchenghh
diff --git a/velox/expression/CastExpr-inl.h b/velox/expression/CastExpr-inl.h
@@ -286,6 +286,20 @@ void CastExpr::applyCastKernel(
       return;
     }
 
+    if constexpr (
+        (ToKind == TypeKind::TINYINT || ToKind == TypeKind::SMALLINT ||
+         ToKind == TypeKind::INTEGER || ToKind == TypeKind::BIGINT) &&
+        FromKind == TypeKind::TIMESTAMP) {
+      using To = typename TypeTraits<ToKind>::NativeType;
+      const auto castResult = hooks_->castTimestampToInt(inputRowValue);
+      if (castResult.hasError()) {
+        setError(castResult.error().message());
+      } else {
+        result->set(row, static_cast<To>(castResult.value()));
+      }
+      return;
+    }
+
     // Optimize empty input strings casting by avoiding throwing exceptions.
     if constexpr (
         FromKind == TypeKind::VARCHAR || FromKind == TypeKind::VARBINARY) {

diff --git a/velox/expression/CastHooks.h b/velox/expression/CastHooks.h
@@ -37,6 +37,8 @@ class CastHooks {
 
   virtual Expected<Timestamp> castIntToTimestamp(int64_t seconds) const = 0;
 
+  virtual Expected<int64_t> castTimestampToInt(Timestamp timestamp) const = 0;
+
   virtual Expected<int32_t> castStringToDate(
       const StringView& dateString) const = 0;
 

diff --git a/velox/expression/PrestoCastHooks.cpp b/velox/expression/PrestoCastHooks.cpp
@@ -67,11 +67,18 @@ Expected<Timestamp> PrestoCastHooks::castStringToTimestamp(
   return result.first;
 }
 
-Expected<Timestamp> PrestoCastHooks::castIntToTimestamp(int64_t seconds) const {
+Expected<Timestamp> PrestoCastHooks::castIntToTimestamp(
+    int64_t /*seconds*/) const {
   return folly::makeUnexpected(
       Status::UserError("Conversion to Timestamp is not supported"));
 }
 
+Expected<int64_t> PrestoCastHooks::castTimestampToInt(
+    Timestamp /*timestamp*/) const {
+  return folly::makeUnexpected(
+      Status::UserError("Conversion from Timestamp is not supported"));
+}
+
 Expected<int32_t> PrestoCastHooks::castStringToDate(
     const StringView& dateString) const {
   // Cast from string to date allows only complete ISO 8601 formatted strings:

diff --git a/velox/expression/PrestoCastHooks.h b/velox/expression/PrestoCastHooks.h
@@ -32,6 +32,8 @@ class PrestoCastHooks : public CastHooks {
 
   Expected<Timestamp> castIntToTimestamp(int64_t seconds) const override;
 
+  Expected<int64_t> castTimestampToInt(Timestamp timestamp) const override;
+
   // Uses standard cast mode to cast from string to date.
   Expected<int32_t> castStringToDate(
       const StringView& dateString) const override;

diff --git a/velox/functions/sparksql/specialforms/SparkCastHooks.cpp b/velox/functions/sparksql/specialforms/SparkCastHooks.cpp
@@ -29,9 +29,8 @@ Expected<Timestamp> SparkCastHooks::castStringToTimestamp(
 Expected<Timestamp> SparkCastHooks::castIntToTimestamp(int64_t seconds) const {
   // Spark internally use microsecond precision for timestamp.
   // To avoid overflow, we need to check the range of seconds.
-  static constexpr int64_t maxSeconds = std::numeric_limits<int64_t>::max() /
-      (Timestamp::kMicrosecondsInMillisecond *
-       Timestamp::kMillisecondsInSecond);
+  static constexpr int64_t maxSeconds =
+      std::numeric_limits<int64_t>::max() / Timestamp::kMicrosecondsInSecond;
   if (seconds > maxSeconds) {
     return Timestamp::fromMicrosNoError(std::numeric_limits<int64_t>::max());
   }
@@ -41,6 +40,12 @@ Expected<Timestamp> SparkCastHooks::castIntToTimestamp(int64_t seconds) const {
   return Timestamp(seconds, 0);
 }
 
+Expected<int64_t> SparkCastHooks::castTimestampToInt(
+    Timestamp timestamp) const {
+  return std::floor(
+      timestamp.toMicros() / (Timestamp::kMicrosecondsInSecond * 1.0));
+}
+
 Expected<int32_t> SparkCastHooks::castStringToDate(
     const StringView& dateString) const {
   // Allows all patterns supported by Spark:

diff --git a/velox/functions/sparksql/specialforms/SparkCastHooks.h b/velox/functions/sparksql/specialforms/SparkCastHooks.h
@@ -31,6 +31,8 @@ class SparkCastHooks : public exec::CastHooks {
   /// number of seconds since the epoch (1970-01-01 00:00:00 UTC).
   Expected<Timestamp> castIntToTimestamp(int64_t seconds) const override;
 
+  Expected<int64_t> castTimestampToInt(Timestamp timestamp) const override;
+
   /// 1) Removes all leading and trailing UTF8 white-spaces before cast. 2) Uses
   /// non-standard cast mode to cast from string to date.
   Expected<int32_t> castStringToDate(

diff --git a/velox/functions/sparksql/tests/SparkCastExprTest.cpp b/velox/functions/sparksql/tests/SparkCastExprTest.cpp
@@ -109,6 +109,24 @@ class SparkCastExprTest : public functions::test::CastBaseTest {
              Timestamp(std::numeric_limits<T>::min(), 0),
              std::nullopt}));
   }
+
+  template <typename T>
+  void testTimestampToIntegralCast() {
+    testCast(
+        makeNullableFlatVector<Timestamp>(
+            {Timestamp(0, 0),
+             Timestamp(1, 0),
+             Timestamp(std::numeric_limits<T>::max(), 0),
+             Timestamp(std::numeric_limits<T>::min(), 0),
+             std::nullopt}),
+        makeNullableFlatVector<T>({
+            0,
+            1,
+            std::numeric_limits<T>::max(),
+            std::numeric_limits<T>::min(),
+            std::nullopt,
+        }));
+  }
 };
 
 TEST_F(SparkCastExprTest, date) {
@@ -291,6 +309,34 @@ TEST_F(SparkCastExprTest, intToTimestamp) {
   testIntegralToTimestampCast<int32_t>();
 }
 
+TEST_F(SparkCastExprTest, timestampToInt) {
+  // Cast timestamp as bigint.
+  testCast(
+      makeNullableFlatVector<Timestamp>({
+          Timestamp(0, 0),
+          Timestamp(1727181032, 0),
+          Timestamp(-1727181032, 0),
+          Timestamp(9223372036854, 775'807'000),
+          Timestamp(-9223372036855, 224'192'000),
+      }),
+      makeNullableFlatVector<int64_t>({
+          0,
+          1727181032,
+          -1727181032,
+          9223372036854,
+          -9223372036855,
+      }));
+  testInvalidCast<Timestamp>(
+      "bigint",
+      {Timestamp(9223372036856, 0)},
+      "Could not convert Timestamp(9223372036856, 0) to microseconds");
+
+  // Cast timestamp as tinyint/smallint/integer.
+  testTimestampToIntegralCast<int8_t>();
+  testTimestampToIntegralCast<int16_t>();
+  testTimestampToIntegralCast<int32_t>();
+}
+
 TEST_F(SparkCastExprTest, primitiveInvalidCornerCases) {
   // To integer.
   {

diff --git a/velox/type/Timestamp.h b/velox/type/Timestamp.h
@@ -81,6 +81,8 @@ struct Timestamp {
  public:
   static constexpr int64_t kMillisecondsInSecond = 1'000;
   static constexpr int64_t kMicrosecondsInMillisecond = 1'000;
+  static constexpr int64_t kMicrosecondsInSecond =
+      kMicrosecondsInMillisecond * kMillisecondsInSecond;
   static constexpr int64_t kNanosecondsInMicrosecond = 1'000;
   static constexpr int64_t kNanosecondsInMillisecond = 1'000'000;
   static constexpr int64_t kNanosInSecond =
@@ -183,19 +185,21 @@ struct Timestamp {
 
   // Keep it in header for getting inlined.
   int64_t toMicros() const {
-    // When an integer overflow occurs in the calculation,
-    // an exception will be thrown.
-    try {
-      return checkedPlus(
-          checkedMultiply(seconds_, (int64_t)1'000'000),
-          (int64_t)(nanos_ / 1'000));
-    } catch (const std::exception& e) {
+    // We use int128_t to make sure the computation does not overflows since
+    // there are cases such that seconds*1000000 does not fit in int64_t,
+    // but seconds*1000000 + nanos does, an example is TimeStamp::minMillis().
+
+    // If the final result does not fit in int64_tw we throw.
+    __int128_t result =
+        (__int128_t)seconds_ * 1'000'000 + (int64_t)(nanos_ / 1'000);
+    if (result < std::numeric_limits<int64_t>::min() ||
+        result > std::numeric_limits<int64_t>::max()) {
       VELOX_USER_FAIL(
-          "Could not convert Timestamp({}, {}) to microseconds, {}",
+          "Could not convert Timestamp({}, {}) to microseconds",
           seconds_,
-          nanos_,
-          e.what());
+          nanos_);
     }
+    return result;
   }
 
   /// Exports the current timestamp as a std::chrono::time_point of millisecond