From dad4d835a622da14f3ee943cc02d5eccc631d3cf Mon Sep 17 00:00:00 2001 From: Kevin Wilfong Date: Wed, 30 Oct 2024 10:17:19 -0700 Subject: [PATCH] Fix Presto's date_diff UDF with TimestampAndTimeZone and DST (#11380) Summary: Presto Java's date_diff UDF respects DST for units greater than or equal to a day, but for units less than a day computes the diff on the system time (GMT). This means for units less than a day date_diff with TimestampWithTimeZone cannot compute the difference of the local times. It needs to use system time to be consistent. Note that the for units greater than or equal to a day, this does not apply, and we should continue to use the local time to compute the difference. This is analogous to the change made to date_add in https://github.com/facebookincubator/velox/pull/11353 Reviewed By: xiaoxmeng Differential Revision: D65165953 --- velox/functions/prestosql/DateTimeFunctions.h | 9 +- velox/functions/prestosql/DateTimeImpl.h | 39 +++++ .../prestosql/tests/DateTimeFunctionsTest.cpp | 149 ++++++++++++++++++ 3 files changed, 193 insertions(+), 4 deletions(-) diff --git a/velox/functions/prestosql/DateTimeFunctions.h b/velox/functions/prestosql/DateTimeFunctions.h index 5241af10b0cd..8cc132cdc18b 100644 --- a/velox/functions/prestosql/DateTimeFunctions.h +++ b/velox/functions/prestosql/DateTimeFunctions.h @@ -1384,11 +1384,12 @@ struct DateDiffFunction : public TimestampWithTimezoneSupport { // Presto's behavior is to use the time zone of the first parameter to // perform the calculation. Note that always normalizing to UTC is not // correct as calculations may cross daylight savings boundaries. - auto timestamp1 = this->toTimestamp(timestampWithTz1, false); - auto timestamp2 = this->toTimestamp(timestampWithTz2, true); - timestamp2.toTimezone(*tz::locateZone(unpackZoneKeyId(*timestampWithTz1))); + auto timeZoneId = unpackZoneKeyId(*timestampWithTz1); - result = diffTimestamp(unit, timestamp1, timestamp2); + result = diffTimestampWithTimeZone( + unit, + *timestampWithTz1, + pack(unpackMillisUtc(*timestampWithTz2), timeZoneId)); } }; diff --git a/velox/functions/prestosql/DateTimeImpl.h b/velox/functions/prestosql/DateTimeImpl.h index fbe489c243c9..32ac08550fa0 100644 --- a/velox/functions/prestosql/DateTimeImpl.h +++ b/velox/functions/prestosql/DateTimeImpl.h @@ -386,6 +386,45 @@ FOLLY_ALWAYS_INLINE int64_t diffTimestamp( VELOX_UNREACHABLE("Unsupported datetime unit"); } +FOLLY_ALWAYS_INLINE int64_t diffTimestampWithTimeZone( + const DateTimeUnit unit, + const int64_t fromTimestampWithTimeZone, + const int64_t toTimestampWithTimeZone) { + auto fromTimeZoneId = unpackZoneKeyId(fromTimestampWithTimeZone); + auto toTimeZoneId = unpackZoneKeyId(toTimestampWithTimeZone); + VELOX_CHECK_EQ( + fromTimeZoneId, + toTimeZoneId, + "diffTimestampWithTimeZone must receive timestamps in the same time zone."); + + Timestamp fromTimestamp; + Timestamp toTimestamp; + + if (unit < DateTimeUnit::kDay) { + fromTimestamp = unpackTimestampUtc(fromTimestampWithTimeZone); + toTimestamp = unpackTimestampUtc(toTimestampWithTimeZone); + } else { + // Use local time to handle crossing daylight savings time boundaries. + // E.g. the "day" when the clock moves back an hour is 25 hours long, and + // the day it moves forward is 23 hours long. Daylight savings time + // doesn't affect time units less than a day, and will produce incorrect + // results if we use local time. + const tz::TimeZone* timeZone = tz::locateZone(fromTimeZoneId); + fromTimestamp = Timestamp::fromMillis( + timeZone + ->to_local(std::chrono::milliseconds( + unpackMillisUtc(fromTimestampWithTimeZone))) + .count()); + toTimestamp = + Timestamp::fromMillis(timeZone + ->to_local(std::chrono::milliseconds( + unpackMillisUtc(toTimestampWithTimeZone))) + .count()); + } + + return diffTimestamp(unit, fromTimestamp, toTimestamp); +} + FOLLY_ALWAYS_INLINE int64_t diffDate( const DateTimeUnit unit, diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index c19c61188084..3e75528e346b 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -2953,6 +2953,155 @@ TEST_F(DateTimeFunctionsTest, dateDiffTimestampWithTimezone) { "day", TimestampWithTimezone(a, "America/Los_Angeles"), TimestampWithTimezone(b, "America/Los_Angeles"))); + + auto dateDiffAndCast = [&](std::optional unit, + std::optional timestampString1, + std::optional timestampString2) { + return evaluateOnce( + "date_diff(c0, cast(c1 as timestamp with time zone), cast(c2 as timestamp with time zone))", + unit, + timestampString1, + timestampString2); + }; + + EXPECT_EQ( + 1, + dateDiffAndCast( + "hour", + "2024-03-10 01:50:00 America/Los_Angeles", + "2024-03-10 03:50:00 America/Los_Angeles")); + EXPECT_EQ( + 0, + dateDiffAndCast( + "hour", + "2024-11-03 01:50:00 America/Los_Angeles", + "2024-11-03 01:50:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "hour", + "2024-11-03 01:50:00 America/Los_Angeles", + "2024-11-03 00:50:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "day", + "2024-11-03 00:00:00 America/Los_Angeles", + "2024-11-04 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "week", + "2024-11-03 00:00:00 America/Los_Angeles", + "2024-11-10 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "month", + "2024-11-03 00:00:00 America/Los_Angeles", + "2024-12-03 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "quarter", + "2024-11-03 00:00:00 America/Los_Angeles", + "2025-02-03 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "year", + "2024-11-03 00:00:00 America/Los_Angeles", + "2025-11-03 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "day", + "2024-11-04 00:00:00 America/Los_Angeles", + "2024-11-03 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "week", + "2024-11-04 00:00:00 America/Los_Angeles", + "2024-10-28 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "month", + "2024-11-04 00:00:00 America/Los_Angeles", + "2024-10-04 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "quarter", + "2024-11-04 00:00:00 America/Los_Angeles", + "2024-08-04 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "year", + "2024-11-04 00:00:00 America/Los_Angeles", + "2023-11-04 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "day", + "2024-03-10 00:00:00 America/Los_Angeles", + "2024-03-11 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "week", + "2024-03-10 00:00:00 America/Los_Angeles", + "2024-03-17 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "month", + "2024-03-10 00:00:00 America/Los_Angeles", + "2024-04-10 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "quarter", + "2024-03-10 00:00:00 America/Los_Angeles", + "2024-06-10 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + 1, + dateDiffAndCast( + "year", + "2024-03-10 00:00:00 America/Los_Angeles", + "2025-03-10 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "day", + "2024-03-11 00:00:00 America/Los_Angeles", + "2024-03-10 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "week", + "2024-03-11 00:00:00 America/Los_Angeles", + "2024-03-04 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "month", + "2024-03-11 00:00:00 America/Los_Angeles", + "2024-02-11 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "quarter", + "2024-03-11 00:00:00 America/Los_Angeles", + "2023-12-11 00:00:00 America/Los_Angeles")); + EXPECT_EQ( + -1, + dateDiffAndCast( + "year", + "2024-03-11 00:00:00 America/Los_Angeles", + "2023-03-11 00:00:00 America/Los_Angeles")); } TEST_F(DateTimeFunctionsTest, parseDatetime) {