From 42bd38ad1479ca373f641125cc07609c9112c322 Mon Sep 17 00:00:00 2001 From: Kevin Wilfong Date: Fri, 13 Dec 2024 10:37:15 -0800 Subject: [PATCH] fix: Date_add throws or produces wrong results when the result would be nonexistent time in the time zone (#11845) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/11845 Date_add is computed on top of Unix timestamps (millis since epoch). When the unit being added is at least a day, this calculation is done in the "local" time zone (the session time zone in the case of Timestamps and the time zone associated with the value in the case of TImestampWithTimeZones). This local time is then converted back to a system time. When this calculation is done in a "local" time zone, the resulting local millis since epoch may not exist, e.g. in U.S. time zones that respect daylight savings time, it may fall in the hour 2:00-3:00 am on the day when the clocks move forward. In this case, date_add will throw an exception when this is done with Timestamps, and return the next valid time (e.g. 3 am in the example above) when this is done with TimestampWithTimeZones. To fix this I added a function correct_nonexistent_time to TimeZoneMap which can take a local timestamp and adjust it by the difference between the to time zones (e.g. moves it forward an hour in the case of the example above) and returns a timestamp that exists. This gets us behavior that is consistent with Presto Java. Reviewed By: HeidiHan0000 Differential Revision: D67154813 fbshipit-source-id: eb56ee93ade8da29eeec2ecec3004f5fd7ac12fd --- velox/functions/prestosql/DateTimeFunctions.h | 9 ++++++-- velox/functions/prestosql/DateTimeImpl.h | 6 ++++++ .../prestosql/tests/DateTimeFunctionsTest.cpp | 21 +++++++++++++++++++ velox/type/tz/TimeZoneMap.cpp | 18 ++++++++++++++++ velox/type/tz/TimeZoneMap.h | 8 +++++++ 5 files changed, 60 insertions(+), 2 deletions(-) diff --git a/velox/functions/prestosql/DateTimeFunctions.h b/velox/functions/prestosql/DateTimeFunctions.h index 415c29280df1..0d059b1a95c8 100644 --- a/velox/functions/prestosql/DateTimeFunctions.h +++ b/velox/functions/prestosql/DateTimeFunctions.h @@ -1269,8 +1269,13 @@ struct DateAddFunction : public TimestampWithTimezoneSupport { result = Timestamp( resultTimestamp.getSeconds() + offset, resultTimestamp.getNanos()); } else { - resultTimestamp.toGMT(*sessionTimeZone_); - result = resultTimestamp; + result = Timestamp( + sessionTimeZone_ + ->correct_nonexistent_time( + std::chrono::seconds(resultTimestamp.getSeconds())) + .count(), + resultTimestamp.getNanos()); + result.toGMT(*sessionTimeZone_); } } else { result = addToTimestamp(timestamp, unit, (int32_t)value); diff --git a/velox/functions/prestosql/DateTimeImpl.h b/velox/functions/prestosql/DateTimeImpl.h index 374a8913256f..ecacbfa28a44 100644 --- a/velox/functions/prestosql/DateTimeImpl.h +++ b/velox/functions/prestosql/DateTimeImpl.h @@ -268,6 +268,12 @@ FOLLY_ALWAYS_INLINE int64_t addToTimestampWithTimezone( .count()); auto updatedTimeStamp = addToTimestamp(originalTimestamp, unit, (int32_t)value); + updatedTimeStamp = Timestamp( + timeZone + ->correct_nonexistent_time( + std::chrono::seconds(updatedTimeStamp.getSeconds())) + .count(), + updatedTimeStamp.getNanos()); finalSysMs = timeZone ->to_sys( diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index 6471a24357d4..81983edbfc0c 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -2375,6 +2375,16 @@ TEST_F(DateTimeFunctionsTest, dateAddTimestamp) { "year", -2, Timestamp(1582970400, 500'999'999) /*2020-02-29 10:00:00.500*/)); + + // Test cases where the result would end up in the nonexistent gap between + // daylight savings time and standard time. 2023-03-12 02:30:00.000 does not + // exist in America/Los_Angeles since that hour is skipped. + EXPECT_EQ( + Timestamp(1678617000, 0), /*2023-03-12 03:30:00*/ + dateAdd("day", 45, Timestamp(1674729000, 0) /*2023-01-26 02:30:00*/)); + EXPECT_EQ( + Timestamp(1678617000, 0), /*2023-03-12 03:30:00*/ + dateAdd("day", -45, Timestamp(1682501400, 0) /*2023-04-26 02:30:00*/)); } TEST_F(DateTimeFunctionsTest, dateAddTimestampWithTimeZone) { @@ -2553,6 +2563,17 @@ TEST_F(DateTimeFunctionsTest, dateAddTimestampWithTimeZone) { "2023-03-11 00:00:00.000 America/Los_Angeles", dateAddAndCast( "year", -1, "2024-03-11 00:00:00.000 America/Los_Angeles")); + + // Test cases where the result would end up in the nonexistent gap between + // daylight savings time and standard time. 2023-03-12 02:30:00.000 does not + // exist in America/Los_Angeles since that hour is skipped. + EXPECT_EQ( + "2023-03-12 03:30:00.000 America/Los_Angeles", + dateAddAndCast("day", 45, "2023-01-26 02:30:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2023-03-12 03:30:00.000 America/Los_Angeles", + dateAddAndCast( + "day", -45, "2023-04-26 02:30:00.000 America/Los_Angeles")); } TEST_F(DateTimeFunctionsTest, dateDiffDate) { diff --git a/velox/type/tz/TimeZoneMap.cpp b/velox/type/tz/TimeZoneMap.cpp index 876b63386b8e..e7935a308310 100644 --- a/velox/type/tz/TimeZoneMap.cpp +++ b/velox/type/tz/TimeZoneMap.cpp @@ -445,6 +445,24 @@ TimeZone::milliseconds TimeZone::to_local( return toLocalImpl(timestamp, tz_, offset_); } +TimeZone::seconds TimeZone::correct_nonexistent_time( + TimeZone::seconds timestamp) const { + // If this is an offset time zone. + if (tz_ == nullptr) { + return timestamp; + } + + const auto localInfo = tz_->get_info(date::local_time{timestamp}); + + if (localInfo.result != date::local_info::nonexistent) { + return timestamp; + } + + const auto adjustment = localInfo.second.offset - localInfo.first.offset; + + return timestamp + adjustment; +} + std::string TimeZone::getShortName( TimeZone::milliseconds timestamp, TimeZone::TChoose choose) const { diff --git a/velox/type/tz/TimeZoneMap.h b/velox/type/tz/TimeZoneMap.h index 46d1ea5a6680..627fdc133355 100644 --- a/velox/type/tz/TimeZoneMap.h +++ b/velox/type/tz/TimeZoneMap.h @@ -143,6 +143,14 @@ class TimeZone { seconds to_local(seconds timestamp) const; milliseconds to_local(milliseconds timestamp) const; + /// If a local time is nonexistent, i.e. refers to a time that exists in the + /// gap during a time zone conversion, this returns the time adjusted by + /// the difference between the two time zones, so that it lies in the later + /// time zone. + /// + /// If the local time exists then the same time is returned. + seconds correct_nonexistent_time(seconds timestamp) const; + const std::string& name() const { return timeZoneName_; }