Skip to content

Commit

Permalink
Fix Presto's date_diff UDF with TimestampAndTimeZone and DST (faceboo…
Browse files Browse the repository at this point in the history
…kincubator#11380)

Summary:

Presto Java's date_diff UDF respects DST for units greater than or equal to a day, but
for units less than a day computes the diff on the system time (GMT).

This means for units less than a day date_diff with TimestampWithTimeZone cannot
compute the difference of the local times.  It needs to use system time to be
consistent.

Note that the for units greater than or equal to a day, this does not apply, and we
should continue to use the local time to compute the difference.

This is analogous to the change made to date_add in
facebookincubator#11353

Reviewed By: xiaoxmeng

Differential Revision: D65165953
  • Loading branch information
Kevin Wilfong authored and facebook-github-bot committed Oct 30, 2024
1 parent c255b78 commit dad4d83
Show file tree
Hide file tree
Showing 3 changed files with 193 additions and 4 deletions.
9 changes: 5 additions & 4 deletions velox/functions/prestosql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -1384,11 +1384,12 @@ struct DateDiffFunction : public TimestampWithTimezoneSupport<T> {
// Presto's behavior is to use the time zone of the first parameter to
// perform the calculation. Note that always normalizing to UTC is not
// correct as calculations may cross daylight savings boundaries.
auto timestamp1 = this->toTimestamp(timestampWithTz1, false);
auto timestamp2 = this->toTimestamp(timestampWithTz2, true);
timestamp2.toTimezone(*tz::locateZone(unpackZoneKeyId(*timestampWithTz1)));
auto timeZoneId = unpackZoneKeyId(*timestampWithTz1);

result = diffTimestamp(unit, timestamp1, timestamp2);
result = diffTimestampWithTimeZone(
unit,
*timestampWithTz1,
pack(unpackMillisUtc(*timestampWithTz2), timeZoneId));
}
};

Expand Down
39 changes: 39 additions & 0 deletions velox/functions/prestosql/DateTimeImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,45 @@ FOLLY_ALWAYS_INLINE int64_t diffTimestamp(
VELOX_UNREACHABLE("Unsupported datetime unit");
}

FOLLY_ALWAYS_INLINE int64_t diffTimestampWithTimeZone(
const DateTimeUnit unit,
const int64_t fromTimestampWithTimeZone,
const int64_t toTimestampWithTimeZone) {
auto fromTimeZoneId = unpackZoneKeyId(fromTimestampWithTimeZone);
auto toTimeZoneId = unpackZoneKeyId(toTimestampWithTimeZone);
VELOX_CHECK_EQ(
fromTimeZoneId,
toTimeZoneId,
"diffTimestampWithTimeZone must receive timestamps in the same time zone.");

Timestamp fromTimestamp;
Timestamp toTimestamp;

if (unit < DateTimeUnit::kDay) {
fromTimestamp = unpackTimestampUtc(fromTimestampWithTimeZone);
toTimestamp = unpackTimestampUtc(toTimestampWithTimeZone);
} else {
// Use local time to handle crossing daylight savings time boundaries.
// E.g. the "day" when the clock moves back an hour is 25 hours long, and
// the day it moves forward is 23 hours long. Daylight savings time
// doesn't affect time units less than a day, and will produce incorrect
// results if we use local time.
const tz::TimeZone* timeZone = tz::locateZone(fromTimeZoneId);
fromTimestamp = Timestamp::fromMillis(
timeZone
->to_local(std::chrono::milliseconds(
unpackMillisUtc(fromTimestampWithTimeZone)))
.count());
toTimestamp =
Timestamp::fromMillis(timeZone
->to_local(std::chrono::milliseconds(
unpackMillisUtc(toTimestampWithTimeZone)))
.count());
}

return diffTimestamp(unit, fromTimestamp, toTimestamp);
}

FOLLY_ALWAYS_INLINE
int64_t diffDate(
const DateTimeUnit unit,
Expand Down
149 changes: 149 additions & 0 deletions velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2953,6 +2953,155 @@ TEST_F(DateTimeFunctionsTest, dateDiffTimestampWithTimezone) {
"day",
TimestampWithTimezone(a, "America/Los_Angeles"),
TimestampWithTimezone(b, "America/Los_Angeles")));

auto dateDiffAndCast = [&](std::optional<std::string> unit,
std::optional<std::string> timestampString1,
std::optional<std::string> timestampString2) {
return evaluateOnce<int64_t>(
"date_diff(c0, cast(c1 as timestamp with time zone), cast(c2 as timestamp with time zone))",
unit,
timestampString1,
timestampString2);
};

EXPECT_EQ(
1,
dateDiffAndCast(
"hour",
"2024-03-10 01:50:00 America/Los_Angeles",
"2024-03-10 03:50:00 America/Los_Angeles"));
EXPECT_EQ(
0,
dateDiffAndCast(
"hour",
"2024-11-03 01:50:00 America/Los_Angeles",
"2024-11-03 01:50:00 America/Los_Angeles"));
EXPECT_EQ(
-1,
dateDiffAndCast(
"hour",
"2024-11-03 01:50:00 America/Los_Angeles",
"2024-11-03 00:50:00 America/Los_Angeles"));
EXPECT_EQ(
1,
dateDiffAndCast(
"day",
"2024-11-03 00:00:00 America/Los_Angeles",
"2024-11-04 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
1,
dateDiffAndCast(
"week",
"2024-11-03 00:00:00 America/Los_Angeles",
"2024-11-10 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
1,
dateDiffAndCast(
"month",
"2024-11-03 00:00:00 America/Los_Angeles",
"2024-12-03 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
1,
dateDiffAndCast(
"quarter",
"2024-11-03 00:00:00 America/Los_Angeles",
"2025-02-03 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
1,
dateDiffAndCast(
"year",
"2024-11-03 00:00:00 America/Los_Angeles",
"2025-11-03 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
-1,
dateDiffAndCast(
"day",
"2024-11-04 00:00:00 America/Los_Angeles",
"2024-11-03 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
-1,
dateDiffAndCast(
"week",
"2024-11-04 00:00:00 America/Los_Angeles",
"2024-10-28 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
-1,
dateDiffAndCast(
"month",
"2024-11-04 00:00:00 America/Los_Angeles",
"2024-10-04 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
-1,
dateDiffAndCast(
"quarter",
"2024-11-04 00:00:00 America/Los_Angeles",
"2024-08-04 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
-1,
dateDiffAndCast(
"year",
"2024-11-04 00:00:00 America/Los_Angeles",
"2023-11-04 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
1,
dateDiffAndCast(
"day",
"2024-03-10 00:00:00 America/Los_Angeles",
"2024-03-11 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
1,
dateDiffAndCast(
"week",
"2024-03-10 00:00:00 America/Los_Angeles",
"2024-03-17 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
1,
dateDiffAndCast(
"month",
"2024-03-10 00:00:00 America/Los_Angeles",
"2024-04-10 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
1,
dateDiffAndCast(
"quarter",
"2024-03-10 00:00:00 America/Los_Angeles",
"2024-06-10 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
1,
dateDiffAndCast(
"year",
"2024-03-10 00:00:00 America/Los_Angeles",
"2025-03-10 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
-1,
dateDiffAndCast(
"day",
"2024-03-11 00:00:00 America/Los_Angeles",
"2024-03-10 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
-1,
dateDiffAndCast(
"week",
"2024-03-11 00:00:00 America/Los_Angeles",
"2024-03-04 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
-1,
dateDiffAndCast(
"month",
"2024-03-11 00:00:00 America/Los_Angeles",
"2024-02-11 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
-1,
dateDiffAndCast(
"quarter",
"2024-03-11 00:00:00 America/Los_Angeles",
"2023-12-11 00:00:00 America/Los_Angeles"));
EXPECT_EQ(
-1,
dateDiffAndCast(
"year",
"2024-03-11 00:00:00 America/Los_Angeles",
"2023-03-11 00:00:00 America/Los_Angeles"));
}

TEST_F(DateTimeFunctionsTest, parseDatetime) {
Expand Down

0 comments on commit dad4d83

Please sign in to comment.