-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Spark CAST(timestamp as integral) #11468
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,9 +29,8 @@ Expected<Timestamp> SparkCastHooks::castStringToTimestamp( | |
Expected<Timestamp> SparkCastHooks::castIntToTimestamp(int64_t seconds) const { | ||
// Spark internally use microsecond precision for timestamp. | ||
// To avoid overflow, we need to check the range of seconds. | ||
static constexpr int64_t maxSeconds = std::numeric_limits<int64_t>::max() / | ||
(Timestamp::kMicrosecondsInMillisecond * | ||
Timestamp::kMillisecondsInSecond); | ||
static constexpr int64_t maxSeconds = | ||
std::numeric_limits<int64_t>::max() / Timestamp::kMicrosecondsInSecond; | ||
if (seconds > maxSeconds) { | ||
return Timestamp::fromMicrosNoError(std::numeric_limits<int64_t>::max()); | ||
} | ||
|
@@ -41,6 +40,12 @@ Expected<Timestamp> SparkCastHooks::castIntToTimestamp(int64_t seconds) const { | |
return Timestamp(seconds, 0); | ||
} | ||
|
||
Expected<int64_t> SparkCastHooks::castTimestampToInt( | ||
Timestamp timestamp) const { | ||
return std::floor( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does timestamp.toMicros() / Timestamp::kMicrosecondsInSecond work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the |
||
timestamp.toMicros() / (Timestamp::kMicrosecondsInSecond * 1.0)); | ||
} | ||
|
||
Expected<int32_t> SparkCastHooks::castStringToDate( | ||
const StringView& dateString) const { | ||
// Allows all patterns supported by Spark: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -81,6 +81,8 @@ struct Timestamp { | |
public: | ||
static constexpr int64_t kMillisecondsInSecond = 1'000; | ||
static constexpr int64_t kMicrosecondsInMillisecond = 1'000; | ||
static constexpr int64_t kMicrosecondsInSecond = | ||
kMicrosecondsInMillisecond * kMillisecondsInSecond; | ||
static constexpr int64_t kNanosecondsInMicrosecond = 1'000; | ||
static constexpr int64_t kNanosecondsInMillisecond = 1'000'000; | ||
static constexpr int64_t kNanosInSecond = | ||
|
@@ -183,19 +185,21 @@ struct Timestamp { | |
|
||
// Keep it in header for getting inlined. | ||
int64_t toMicros() const { | ||
// When an integer overflow occurs in the calculation, | ||
// an exception will be thrown. | ||
try { | ||
return checkedPlus( | ||
checkedMultiply(seconds_, (int64_t)1'000'000), | ||
(int64_t)(nanos_ / 1'000)); | ||
} catch (const std::exception& e) { | ||
// We use int128_t to make sure the computation does not overflows since | ||
// there are cases such that seconds*1000000 does not fit in int64_t, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Spark is using int64 type to represent micro second, so the max allowed seconds should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The min second could overflow, the min second is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would you like to extract this fix to a separate PR like 671e126? We could add test in 'velox/type/tests/TimestampTest.cpp'. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure, let me extract this to a new pr There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added here: #11774 |
||
// but seconds*1000000 + nanos does, an example is TimeStamp::minMillis(). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo TimeStamp::minMillis(). -> Timestamp::minMillis(). |
||
|
||
// If the final result does not fit in int64_tw we throw. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo int64_tw |
||
__int128_t result = | ||
(__int128_t)seconds_ * 1'000'000 + (int64_t)(nanos_ / 1'000); | ||
if (result < std::numeric_limits<int64_t>::min() || | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. INT64_MAX and INT64_MIN |
||
result > std::numeric_limits<int64_t>::max()) { | ||
VELOX_USER_FAIL( | ||
"Could not convert Timestamp({}, {}) to microseconds, {}", | ||
"Could not convert Timestamp({}, {}) to microseconds", | ||
seconds_, | ||
nanos_, | ||
e.what()); | ||
nanos_); | ||
} | ||
return result; | ||
} | ||
|
||
/// Exports the current timestamp as a std::chrono::time_point of millisecond | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is overflow well handled if casting int64_t as a lower-byte type?