diff --git a/velox/functions/prestosql/DateTimeFunctions.h b/velox/functions/prestosql/DateTimeFunctions.h index 5241af10b0cd..8cc132cdc18b 100644 --- a/velox/functions/prestosql/DateTimeFunctions.h +++ b/velox/functions/prestosql/DateTimeFunctions.h @@ -1384,11 +1384,12 @@ struct DateDiffFunction : public TimestampWithTimezoneSupport { // Presto's behavior is to use the time zone of the first parameter to // perform the calculation. Note that always normalizing to UTC is not // correct as calculations may cross daylight savings boundaries. - auto timestamp1 = this->toTimestamp(timestampWithTz1, false); - auto timestamp2 = this->toTimestamp(timestampWithTz2, true); - timestamp2.toTimezone(*tz::locateZone(unpackZoneKeyId(*timestampWithTz1))); + auto timeZoneId = unpackZoneKeyId(*timestampWithTz1); - result = diffTimestamp(unit, timestamp1, timestamp2); + result = diffTimestampWithTimeZone( + unit, + *timestampWithTz1, + pack(unpackMillisUtc(*timestampWithTz2), timeZoneId)); } }; diff --git a/velox/functions/prestosql/DateTimeImpl.h b/velox/functions/prestosql/DateTimeImpl.h index 1f0b61933a24..32ac08550fa0 100644 --- a/velox/functions/prestosql/DateTimeImpl.h +++ b/velox/functions/prestosql/DateTimeImpl.h @@ -23,6 +23,7 @@ #include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h" #include "velox/type/Timestamp.h" #include "velox/type/TimestampConversion.h" +#include "velox/type/tz/TimeZoneMap.h" namespace facebook::velox::functions { namespace { @@ -221,9 +222,37 @@ FOLLY_ALWAYS_INLINE int64_t addToTimestampWithTimezone( int64_t timestampWithTimezone, const DateTimeUnit unit, const int32_t value) { - auto timestamp = unpackTimestampUtc(timestampWithTimezone); - auto finalTimeStamp = addToTimestamp(timestamp, unit, (int32_t)value); - return pack(finalTimeStamp, unpackZoneKeyId(timestampWithTimezone)); + { + int64_t finalSysMs; + if (unit < DateTimeUnit::kDay) { + auto originalTimestamp = unpackTimestampUtc(timestampWithTimezone); + finalSysMs = + addToTimestamp(originalTimestamp, unit, (int32_t)value).toMillis(); + } else { + // Use local time to handle crossing daylight savings time boundaries. + // E.g. the "day" when the clock moves back an hour is 25 hours long, and + // the day it moves forward is 23 hours long. Daylight savings time + // doesn't affect time units less than a day, and will produce incorrect + // results if we use local time. + const tz::TimeZone* timeZone = + tz::locateZone(unpackZoneKeyId(timestampWithTimezone)); + auto originalTimestamp = + Timestamp::fromMillis(timeZone + ->to_local(std::chrono::milliseconds( + unpackMillisUtc(timestampWithTimezone))) + .count()); + auto updatedTimeStamp = + addToTimestamp(originalTimestamp, unit, (int32_t)value); + finalSysMs = + timeZone + ->to_sys( + std::chrono::milliseconds(updatedTimeStamp.toMillis()), + tz::TimeZone::TChoose::kEarliest) + .count(); + } + + return pack(finalSysMs, unpackZoneKeyId(timestampWithTimezone)); + } } FOLLY_ALWAYS_INLINE int64_t diffTimestamp( @@ -357,6 +386,45 @@ FOLLY_ALWAYS_INLINE int64_t diffTimestamp( VELOX_UNREACHABLE("Unsupported datetime unit"); } +FOLLY_ALWAYS_INLINE int64_t diffTimestampWithTimeZone( + const DateTimeUnit unit, + const int64_t fromTimestampWithTimeZone, + const int64_t toTimestampWithTimeZone) { + auto fromTimeZoneId = unpackZoneKeyId(fromTimestampWithTimeZone); + auto toTimeZoneId = unpackZoneKeyId(toTimestampWithTimeZone); + VELOX_CHECK_EQ( + fromTimeZoneId, + toTimeZoneId, + "diffTimestampWithTimeZone must receive timestamps in the same time zone."); + + Timestamp fromTimestamp; + Timestamp toTimestamp; + + if (unit < DateTimeUnit::kDay) { + fromTimestamp = unpackTimestampUtc(fromTimestampWithTimeZone); + toTimestamp = unpackTimestampUtc(toTimestampWithTimeZone); + } else { + // Use local time to handle crossing daylight savings time boundaries. + // E.g. the "day" when the clock moves back an hour is 25 hours long, and + // the day it moves forward is 23 hours long. Daylight savings time + // doesn't affect time units less than a day, and will produce incorrect + // results if we use local time. + const tz::TimeZone* timeZone = tz::locateZone(fromTimeZoneId); + fromTimestamp = Timestamp::fromMillis( + timeZone + ->to_local(std::chrono::milliseconds( + unpackMillisUtc(fromTimestampWithTimeZone))) + .count()); + toTimestamp = + Timestamp::fromMillis(timeZone + ->to_local(std::chrono::milliseconds( + unpackMillisUtc(toTimestampWithTimeZone))) + .count()); + } + + return diffTimestamp(unit, fromTimestamp, toTimestamp); +} + FOLLY_ALWAYS_INLINE int64_t diffDate( const DateTimeUnit unit, diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index 20265004aafa..3e75528e346b 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -2400,6 +2400,78 @@ TEST_F(DateTimeFunctionsTest, dateAddTimestampWithTimeZone) { EXPECT_EQ( "2024-11-03 00:50:00.000 America/Los_Angeles", dateAddAndCast("hour", -1, "2024-11-03 01:50:00 America/Los_Angeles")); + EXPECT_EQ( + "2024-11-04 00:00:00.000 America/Los_Angeles", + dateAddAndCast("day", 1, "2024-11-03 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2024-11-10 00:00:00.000 America/Los_Angeles", + dateAddAndCast("week", 1, "2024-11-03 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2024-12-03 00:00:00.000 America/Los_Angeles", + dateAddAndCast( + "month", 1, "2024-11-03 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2025-02-03 00:00:00.000 America/Los_Angeles", + dateAddAndCast( + "quarter", 1, "2024-11-03 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2025-11-03 00:00:00.000 America/Los_Angeles", + dateAddAndCast("year", 1, "2024-11-03 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2024-11-03 00:00:00.000 America/Los_Angeles", + dateAddAndCast("day", -1, "2024-11-04 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2024-10-28 00:00:00.000 America/Los_Angeles", + dateAddAndCast( + "week", -1, "2024-11-04 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2024-10-04 00:00:00.000 America/Los_Angeles", + dateAddAndCast( + "month", -1, "2024-11-04 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2024-08-04 00:00:00.000 America/Los_Angeles", + dateAddAndCast( + "quarter", -1, "2024-11-04 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2023-11-04 00:00:00.000 America/Los_Angeles", + dateAddAndCast( + "year", -1, "2024-11-04 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2024-03-11 00:00:00.000 America/Los_Angeles", + dateAddAndCast("day", 1, "2024-03-10 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2024-03-17 00:00:00.000 America/Los_Angeles", + dateAddAndCast("week", 1, "2024-03-10 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2024-04-10 00:00:00.000 America/Los_Angeles", + dateAddAndCast( + "month", 1, "2024-03-10 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2024-06-10 00:00:00.000 America/Los_Angeles", + dateAddAndCast( + "quarter", 1, "2024-03-10 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2025-03-10 00:00:00.000 America/Los_Angeles", + dateAddAndCast("year", 1, "2024-03-10 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2024-03-10 00:00:00.000 America/Los_Angeles", + dateAddAndCast("day", -1, "2024-03-11 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2024-03-04 00:00:00.000 America/Los_Angeles", + dateAddAndCast( + "week", -1, "2024-03-11 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2024-02-11 00:00:00.000 America/Los_Angeles", + dateAddAndCast( + "month", -1, "2024-03-11 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2023-12-11 00:00:00.000 America/Los_Angeles", + dateAddAndCast( + "quarter", -1, "2024-03-11 00:00:00.000 America/Los_Angeles")); + EXPECT_EQ( + "2023-03-11 00:00:00.000 America/Los_Angeles", + dateAddAndCast( + "year", -1, "2024-03-11 00:00:00.000 America/Los_Angeles")); } TEST_F(DateTimeFunctionsTest, dateDiffDate) { @@ -2881,6 +2953,155 @@ TEST_F(DateTimeFunctionsTest, dateDiffTimestampWithTimezone) { "day", TimestampWithTimezone(a, "America/Los_Angeles"), TimestampWithTimezone(b, "America/Los_Angeles"))); + + auto dateDiffAndCast = [&](std::optional unit, + std::optional timestampString1, + std::optional timestampString2) { + return evaluateOnce( + "date_diff(c0, cast(c1 as timestamp with time zone), cast(c2 as timestamp with time zone))", + unit, + timestampString1, + timestampString2); + }; + + EXPECT_EQ( + 1, + dateDiffAndCast( + "hour", + "2024-03-10 01:50:00 America/Los_Angeles", + "2024-03-10 03:50:00 America/Los_Angeles")); + EXPECT_EQ( + 0, + dateDiffAndCast( + "hour", + "2024-11-03 01:50:00 America/Los_Angeles", + "2024-11-03 01:50:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "hour", + "2024-11-03 01:50:00 America/Los_Angeles", + "2024-11-03 00:50:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "day", + "2024-11-03 00:00:00 America/Los_Angeles", + "2024-11-04 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "week", + "2024-11-03 00:00:00 America/Los_Angeles", + "2024-11-10 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "month", + "2024-11-03 00:00:00 America/Los_Angeles", + "2024-12-03 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "quarter", + "2024-11-03 00:00:00 America/Los_Angeles", + "2025-02-03 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "year", + "2024-11-03 00:00:00 America/Los_Angeles", + "2025-11-03 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "day", + "2024-11-04 00:00:00 America/Los_Angeles", + "2024-11-03 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "week", + "2024-11-04 00:00:00 America/Los_Angeles", + "2024-10-28 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "month", + "2024-11-04 00:00:00 America/Los_Angeles", + "2024-10-04 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "quarter", + "2024-11-04 00:00:00 America/Los_Angeles", + "2024-08-04 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "year", + "2024-11-04 00:00:00 America/Los_Angeles", + "2023-11-04 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "day", + "2024-03-10 00:00:00 America/Los_Angeles", + "2024-03-11 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "week", + "2024-03-10 00:00:00 America/Los_Angeles", + "2024-03-17 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "month", + "2024-03-10 00:00:00 America/Los_Angeles", + "2024-04-10 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "quarter", + "2024-03-10 00:00:00 America/Los_Angeles", + "2024-06-10 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "year", + "2024-03-10 00:00:00 America/Los_Angeles", + "2025-03-10 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "day", + "2024-03-11 00:00:00 America/Los_Angeles", + "2024-03-10 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "week", + "2024-03-11 00:00:00 America/Los_Angeles", + "2024-03-04 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "month", + "2024-03-11 00:00:00 America/Los_Angeles", + "2024-02-11 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "quarter", + "2024-03-11 00:00:00 America/Los_Angeles", + "2023-12-11 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "year", + "2024-03-11 00:00:00 America/Los_Angeles", + "2023-03-11 00:00:00 America/Los_Angeles")); } TEST_F(DateTimeFunctionsTest, parseDatetime) { diff --git a/velox/type/tz/TimeZoneMap.cpp b/velox/type/tz/TimeZoneMap.cpp index b68c88c6be04..7bb2cef9d932 100644 --- a/velox/type/tz/TimeZoneMap.cpp +++ b/velox/type/tz/TimeZoneMap.cpp @@ -267,6 +267,38 @@ date::zoned_time getZonedTime( : date::choose::latest; return date::zoned_time{tz, timestamp, dateChoose}; } + +template +TDuration toSysImpl( + const TDuration& timestamp, + const TimeZone::TChoose choose, + const date::time_zone* tz, + const std::chrono::minutes offset) { + date::local_time timePoint{timestamp}; + validateRange(date::sys_time{timestamp}); + + if (tz == nullptr) { + // We can ignore `choose` as time offset conversions are always linear. + return (timePoint - offset).time_since_epoch(); + } + + return getZonedTime(tz, timePoint, choose).get_sys_time().time_since_epoch(); +} + +template +TDuration toLocalImpl( + const TDuration& timestamp, + const date::time_zone* tz, + const std::chrono::minutes offset) { + date::sys_time timePoint{timestamp}; + validateRange(timePoint); + + // If this is an offset time zone. + if (tz == nullptr) { + return (timePoint + offset).time_since_epoch(); + } + return date::zoned_time{tz, timePoint}.get_local_time().time_since_epoch(); +} } // namespace void validateRange(time_point timePoint) { @@ -354,26 +386,22 @@ int16_t getTimeZoneID(int32_t offsetMinutes) { TimeZone::seconds TimeZone::to_sys( TimeZone::seconds timestamp, TimeZone::TChoose choose) const { - date::local_seconds timePoint{timestamp}; - validateRange(date::sys_seconds{timestamp}); - - if (tz_ == nullptr) { - // We can ignore `choose` as time offset conversions are always linear. - return (timePoint - offset_).time_since_epoch(); - } + return toSysImpl(timestamp, choose, tz_, offset_); +} - return getZonedTime(tz_, timePoint, choose).get_sys_time().time_since_epoch(); +TimeZone::milliseconds TimeZone::to_sys( + TimeZone::milliseconds timestamp, + TimeZone::TChoose choose) const { + return toSysImpl(timestamp, choose, tz_, offset_); } TimeZone::seconds TimeZone::to_local(TimeZone::seconds timestamp) const { - date::sys_seconds timePoint{timestamp}; - validateRange(timePoint); + return toLocalImpl(timestamp, tz_, offset_); +} - // If this is an offset time zone. - if (tz_ == nullptr) { - return (timePoint + offset_).time_since_epoch(); - } - return date::zoned_time{tz_, timePoint}.get_local_time().time_since_epoch(); +TimeZone::milliseconds TimeZone::to_local( + TimeZone::milliseconds timestamp) const { + return toLocalImpl(timestamp, tz_, offset_); } std::string TimeZone::getShortName( diff --git a/velox/type/tz/TimeZoneMap.h b/velox/type/tz/TimeZoneMap.h index c04cc308657b..46d1ea5a6680 100644 --- a/velox/type/tz/TimeZoneMap.h +++ b/velox/type/tz/TimeZoneMap.h @@ -133,12 +133,15 @@ class TimeZone { }; seconds to_sys(seconds timestamp, TChoose choose = TChoose::kFail) const; + milliseconds to_sys(milliseconds timestamp, TChoose choose = TChoose::kFail) + const; /// Do the opposite conversion. Taking a system time (the time as perceived in /// GMT), convert to the same instant in time as observed in the user local /// time represented by this object). Note that this conversion is not /// susceptible to the error above. seconds to_local(seconds timestamp) const; + milliseconds to_local(milliseconds timestamp) const; const std::string& name() const { return timeZoneName_; diff --git a/velox/type/tz/tests/TimeZoneMapTest.cpp b/velox/type/tz/tests/TimeZoneMapTest.cpp index 364fd2baf297..58d790788546 100644 --- a/velox/type/tz/tests/TimeZoneMapTest.cpp +++ b/velox/type/tz/tests/TimeZoneMapTest.cpp @@ -124,12 +124,12 @@ TEST(TimeZoneMapTest, timePointBoundary) { auto trySysYear = [&](year y) { auto date = year_month_day(y, month(1), day(1)); - return tz->to_sys(sys_days{date}.time_since_epoch()); + return tz->to_sys(seconds(sys_days{date}.time_since_epoch())); }; auto tryLocalYear = [&](year y) { auto date = year_month_day(y, month(1), day(1)); - return tz->to_local(sys_days{date}.time_since_epoch()); + return tz->to_local(seconds(sys_days{date}.time_since_epoch())); }; EXPECT_NO_THROW(trySysYear(year(0)));