From 1ef50c2f0e35dc666ecad8ef863c02570beccd82 Mon Sep 17 00:00:00 2001 From: Pedro Eugenio Rocha Pedreira Date: Wed, 24 Jul 2024 17:26:19 -0700 Subject: [PATCH] Timezone conversions based on seconds Summary: Timezone conversions were done based on a timepoint, which is unecessary, adds possibility for overflow, and requires us to floor it after the conversion. Simplifying the code and adding some more tests. Now, only exporting a time_point may overflow, but not exporting to time_point which is what is needed for timezone conversions. Reviewed By: mbasmanova Differential Revision: D60189530 --- velox/functions/lib/DateTimeFormatter.cpp | 4 +- velox/type/Timestamp.cpp | 19 +++++---- velox/type/Timestamp.h | 43 +++++++++++++------- velox/type/tests/TimestampConversionTest.cpp | 8 ++-- velox/type/tests/TimestampTest.cpp | 14 +++++-- 5 files changed, 56 insertions(+), 32 deletions(-) diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index 93a37abca4a3..50ad54521b76 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -1086,11 +1086,11 @@ int32_t DateTimeFormatter::format( Timestamp t = timestamp; if (timezone != nullptr) { const auto utcSeconds = timestamp.getSeconds(); - t.toTimezone(*timezone, allowOverflow); + t.toTimezone(*timezone); offset = t.getSeconds() - utcSeconds; } - const auto timePoint = t.toTimePoint(allowOverflow); + const auto timePoint = t.toTimePointMs(allowOverflow); const auto daysTimePoint = date::floor(timePoint); const auto durationInTheDay = date::make_time(timePoint - daysTimePoint); diff --git a/velox/type/Timestamp.cpp b/velox/type/Timestamp.cpp index 43f71b4fa0e6..80da58485ad0 100644 --- a/velox/type/Timestamp.cpp +++ b/velox/type/Timestamp.cpp @@ -122,7 +122,7 @@ void validateTimePoint(const std::chrono::time_point< } // namespace std::chrono::time_point -Timestamp::toTimePoint(bool allowOverflow) const { +Timestamp::toTimePointMs(bool allowOverflow) const { using namespace std::chrono; auto tp = time_point( milliseconds(allowOverflow ? toMillisAllowOverflow() : toMillis())); @@ -130,14 +130,19 @@ Timestamp::toTimePoint(bool allowOverflow) const { return tp; } -void Timestamp::toTimezone(const tz::TimeZone& zone, bool allowOverflow) { - auto tp = toTimePoint(allowOverflow); +std::chrono::time_point +Timestamp::toTimePointSec() const { + using namespace std::chrono; + auto tp = time_point(seconds(seconds_)); + validateTimePoint(tp); + return tp; +} - try { - auto epoch = zone.to_local(tp).time_since_epoch(); +void Timestamp::toTimezone(const tz::TimeZone& zone) { + auto tp = toTimePointSec(); - // NOTE: Round down to get the seconds of the current time point. - seconds_ = std::chrono::floor(epoch).count(); + try { + seconds_ = zone.to_local(tp).time_since_epoch().count(); } catch (const std::invalid_argument& e) { // Invalid argument means we hit a conversion not supported by // external/date. Need to throw a RuntimeError so that try() statements do diff --git a/velox/type/Timestamp.h b/velox/type/Timestamp.h index 8411d8390dc4..bf9d84d72979 100644 --- a/velox/type/Timestamp.h +++ b/velox/type/Timestamp.h @@ -192,12 +192,25 @@ struct Timestamp { } } - /// Due to the limit of std::chrono, throws if timestamp is outside of + /// Exports the current timestamp as a std::chrono::time_point of millisecond + /// precision. Note that the conversion may overflow since the internal + /// `seconds_` value will need to be multiplied by 1000. + /// + /// If `allowOverflow` is true, integer overflow is allowed in converting + /// to milliseconds. + /// + /// Due to the limit of velox/external/date, throws if timestamp is outside of /// [-32767-01-01, 32767-12-31] range. - /// If allowOverflow is true, integer overflow is allowed in converting - /// timestamp to milliseconds. std::chrono::time_point - toTimePoint(bool allowOverflow = false) const; + toTimePointMs(bool allowOverflow = false) const; + + /// Exports the current timestamp as a std::chrono::time_point of second + /// precision. + /// + /// Due to the limit of velox/external/date, throws if timestamp is outside of + /// [-32767-01-01, 32767-12-31] range. + std::chrono::time_point + toTimePointSec() const; static Timestamp fromMillis(int64_t millis) { if (millis >= 0 || millis % 1'000 == 0) { @@ -330,23 +343,23 @@ struct Timestamp { char* const startPosition); // Assuming the timestamp represents a time at zone, converts it to the GMT - // time at the same moment. - // Example: Timestamp ts{0, 0}; - // ts.Timezone("America/Los_Angeles"); - // ts.toString() returns January 1, 1970 08:00:00 + // time at the same moment. For example: + // + // Timestamp ts{0, 0}; + // ts.Timezone("America/Los_Angeles"); + // ts.toString(); // returns January 1, 1970 08:00:00 void toGMT(const tz::TimeZone& zone); // Same as above, but accepts PrestoDB time zone ID. void toGMT(int16_t tzID); /// Assuming the timestamp represents a GMT time, converts it to the time at - /// the same moment at zone. - /// @param allowOverflow If true, integer overflow is allowed when converting - /// timestamp to TimePoint. Otherwise, user exception is thrown for overflow. - /// Example: Timestamp ts{0, 0}; - /// ts.Timezone("America/Los_Angeles"); - /// ts.toString() returns December 31, 1969 16:00:00 - void toTimezone(const tz::TimeZone& zone, bool allowOverflow = false); + /// the same moment at zone. For example: + /// + /// Timestamp ts{0, 0}; + /// ts.Timezone("America/Los_Angeles"); + /// ts.toString(); // returns December 31, 1969 16:00:00 + void toTimezone(const tz::TimeZone& zone); // Same as above, but accepts PrestoDB time zone ID. void toTimezone(int16_t tzID); diff --git a/velox/type/tests/TimestampConversionTest.cpp b/velox/type/tests/TimestampConversionTest.cpp index b52c2c6d797b..2785661e1b61 100644 --- a/velox/type/tests/TimestampConversionTest.cpp +++ b/velox/type/tests/TimestampConversionTest.cpp @@ -282,22 +282,22 @@ TEST(DateTimeUtilTest, toGMT) { EXPECT_EQ(ts, parseTimestamp("1970-01-01 08:00:00")); // Set on a random date/time and try some variations. - ts = parseTimestamp("2020-04-23 04:23:37"); + ts = parseTimestamp("2020-04-23 04:23:37.926"); // To LA: auto tsCopy = ts; tsCopy.toGMT(*laZone); - EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 11:23:37")); + EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 11:23:37.926")); // To Sao Paulo: tsCopy = ts; tsCopy.toGMT(*tz::locateZone("America/Sao_Paulo")); - EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 07:23:37")); + EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 07:23:37.926")); // Moscow: tsCopy = ts; tsCopy.toGMT(*tz::locateZone("Europe/Moscow")); - EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 01:23:37")); + EXPECT_EQ(tsCopy, parseTimestamp("2020-04-23 01:23:37.926")); // Probe LA's daylight savings boundary (starts at 2021-13-14 02:00am). // Before it starts, 8h offset: diff --git a/velox/type/tests/TimestampTest.cpp b/velox/type/tests/TimestampTest.cpp index f54f75839db0..56171f688483 100644 --- a/velox/type/tests/TimestampTest.cpp +++ b/velox/type/tests/TimestampTest.cpp @@ -398,16 +398,22 @@ TEST(TimestampTest, outOfRange) { Timestamp t1(-3217830796800, 0); VELOX_ASSERT_THROW( - t1.toTimePoint(), "Timestamp is outside of supported range"); + t1.toTimePointMs(), "Timestamp is outside of supported range"); + VELOX_ASSERT_THROW( + t1.toTimePointSec(), "Timestamp is outside of supported range"); VELOX_ASSERT_THROW( t1.toTimezone(*timezone), "Timestamp is outside of supported range"); + timezone = tz::locateZone("America/Los_Angeles"); + VELOX_ASSERT_THROW( + t1.toGMT(*timezone), + "Timestamp seconds out of range for time zone adjustment"); + // #2. external/date doesn't understand OS_TZDB repetition rules. Therefore, // for timezones with pre-defined repetition rules for daylight savings, for // example, it will throw for anything larger than 2037 (which is what is // currently materialized in OS_TZDBs). America/Los_Angeles is an example of // such timezone. - timezone = tz::locateZone("America/Los_Angeles"); Timestamp t2(32517359891, 0); VELOX_ASSERT_THROW( t2.toTimezone(*timezone), @@ -420,12 +426,12 @@ TEST(TimestampTest, outOfRange) { TEST(TimestampTest, overflow) { Timestamp t(std::numeric_limits::max(), 0); VELOX_ASSERT_THROW( - t.toTimePoint(false), + t.toTimePointMs(false), fmt::format( "Could not convert Timestamp({}, {}) to milliseconds", std::numeric_limits::max(), 0)); - ASSERT_NO_THROW(t.toTimePoint(true)); + ASSERT_NO_THROW(t.toTimePointMs(true)); } #endif