Skip to content

Commit

Permalink
Fix Presto's date_add UDF with TimestampAndTimeZone and DST (#11353)
Browse files Browse the repository at this point in the history
Summary:

Presto Java's date_add UDF treats the day the clocks move forward as a 23 hour day,
and the day the clocks move back as a 25 hour day.

This means for units greater than or equal to a day date_add with
TimestampWithTimeZone cannot simply use the addition on GMT.  It needs to check if
the time zone has changed between the original timestamp and the timestamp after
the addition and apply the change in time zone offset to the mills since epoch to
account for this long or short day.

Note that the for units less than a day, this does not apply.  E.g. if you add 24 hours
to a TimestampWithTimeZone and it cross a DST boundary, we do not need to apply
the change in offset like you would if you had added 1 day.

Differential Revision: D64982873
  • Loading branch information
Kevin Wilfong authored and facebook-github-bot committed Oct 29, 2024
1 parent e67f11b commit 97ec37f
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 53 deletions.
35 changes: 32 additions & 3 deletions velox/functions/prestosql/DateTimeImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h"
#include "velox/type/Timestamp.h"
#include "velox/type/TimestampConversion.h"
#include "velox/type/tz/TimeZoneMap.h"

namespace facebook::velox::functions {
namespace {
Expand Down Expand Up @@ -221,9 +222,37 @@ FOLLY_ALWAYS_INLINE int64_t addToTimestampWithTimezone(
int64_t timestampWithTimezone,
const DateTimeUnit unit,
const int32_t value) {
auto timestamp = unpackTimestampUtc(timestampWithTimezone);
auto finalTimeStamp = addToTimestamp(timestamp, unit, (int32_t)value);
return pack(finalTimeStamp, unpackZoneKeyId(timestampWithTimezone));
{
int64_t finalSysMs;
if (unit < DateTimeUnit::kDay) {
auto originalTimestamp = unpackTimestampUtc(timestampWithTimezone);
finalSysMs =
addToTimestamp(originalTimestamp, unit, (int32_t)value).toMillis();
} else {
// Use local time to handle crossing daylight savings time boundaries.
// E.g. the "day" when the clock moves back an hour is 25 hours long, and
// the day it moves forward is 23 hours long. Daylight savings time
// doesn't affect time units less than a day, and will produce incorrect
// results if we use local time.
const tz::TimeZone* timeZone =
tz::locateZone(unpackZoneKeyId(timestampWithTimezone));
auto originalTimestamp =
Timestamp::fromMillis(timeZone
->to_local(std::chrono::milliseconds(
unpackMillisUtc(timestampWithTimezone)))
.count());
auto updatedTimeStamp =
addToTimestamp(originalTimestamp, unit, (int32_t)value);
finalSysMs =
timeZone
->to_sys(
std::chrono::milliseconds(updatedTimeStamp.toMillis()),
tz::TimeZone::TChoose::kEarliest)
.count();
}

return pack(finalSysMs, unpackZoneKeyId(timestampWithTimezone));
}
}

FOLLY_ALWAYS_INLINE int64_t diffTimestamp(
Expand Down
72 changes: 72 additions & 0 deletions velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2400,6 +2400,78 @@ TEST_F(DateTimeFunctionsTest, dateAddTimestampWithTimeZone) {
EXPECT_EQ(
"2024-11-03 00:50:00.000 America/Los_Angeles",
dateAddAndCast("hour", -1, "2024-11-03 01:50:00 America/Los_Angeles"));
EXPECT_EQ(
"2024-11-04 00:00:00.000 America/Los_Angeles",
dateAddAndCast("day", 1, "2024-11-03 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2024-11-10 00:00:00.000 America/Los_Angeles",
dateAddAndCast("week", 1, "2024-11-03 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2024-12-03 00:00:00.000 America/Los_Angeles",
dateAddAndCast(
"month", 1, "2024-11-03 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2025-02-03 00:00:00.000 America/Los_Angeles",
dateAddAndCast(
"quarter", 1, "2024-11-03 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2025-11-03 00:00:00.000 America/Los_Angeles",
dateAddAndCast("year", 1, "2024-11-03 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2024-11-03 00:00:00.000 America/Los_Angeles",
dateAddAndCast("day", -1, "2024-11-04 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2024-10-28 00:00:00.000 America/Los_Angeles",
dateAddAndCast(
"week", -1, "2024-11-04 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2024-10-04 00:00:00.000 America/Los_Angeles",
dateAddAndCast(
"month", -1, "2024-11-04 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2024-08-04 00:00:00.000 America/Los_Angeles",
dateAddAndCast(
"quarter", -1, "2024-11-04 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2023-11-04 00:00:00.000 America/Los_Angeles",
dateAddAndCast(
"year", -1, "2024-11-04 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2024-03-11 00:00:00.000 America/Los_Angeles",
dateAddAndCast("day", 1, "2024-03-10 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2024-03-17 00:00:00.000 America/Los_Angeles",
dateAddAndCast("week", 1, "2024-03-10 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2024-04-10 00:00:00.000 America/Los_Angeles",
dateAddAndCast(
"month", 1, "2024-03-10 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2024-06-10 00:00:00.000 America/Los_Angeles",
dateAddAndCast(
"quarter", 1, "2024-03-10 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2025-03-10 00:00:00.000 America/Los_Angeles",
dateAddAndCast("year", 1, "2024-03-10 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2024-03-10 00:00:00.000 America/Los_Angeles",
dateAddAndCast("day", -1, "2024-03-11 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2024-03-04 00:00:00.000 America/Los_Angeles",
dateAddAndCast(
"week", -1, "2024-03-11 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2024-02-11 00:00:00.000 America/Los_Angeles",
dateAddAndCast(
"month", -1, "2024-03-11 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2023-12-11 00:00:00.000 America/Los_Angeles",
dateAddAndCast(
"quarter", -1, "2024-03-11 00:00:00.000 America/Los_Angeles"));
EXPECT_EQ(
"2023-03-11 00:00:00.000 America/Los_Angeles",
dateAddAndCast(
"year", -1, "2024-03-11 00:00:00.000 America/Los_Angeles"));
}

TEST_F(DateTimeFunctionsTest, dateDiffDate) {
Expand Down
94 changes: 46 additions & 48 deletions velox/type/tz/TimeZoneMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@
#include <fmt/core.h>
#include <folly/container/F14Map.h>
#include <folly/container/F14Set.h>
#include "velox/common/base/Exceptions.h"
#include "velox/common/testutil/TestValue.h"
#include "velox/external/date/tz.h"

using facebook::velox::common::testutil::TestValue;

Expand Down Expand Up @@ -224,33 +222,47 @@ std::string normalizeTimeZone(const std::string& originalZoneId) {
}

template <typename TDuration>
void validateRangeImpl(time_point<TDuration> timePoint) {
using namespace velox::date;
static constexpr auto kMinYear = date::year::min();
static constexpr auto kMaxYear = date::year::max();

auto year = year_month_day(floor<days>(timePoint)).year();

if (year < kMinYear || year > kMaxYear) {
// This is a special case where we intentionally throw
// VeloxRuntimeError to avoid it being suppressed by TRY().
VELOX_FAIL_UNSUPPORTED_INPUT_UNCATCHABLE(
"Timepoint is outside of supported year range: [{}, {}], got {}",
(int)kMinYear,
(int)kMaxYear,
(int)year);
TDuration toSysImpl(
const TDuration timestamp,
const TimeZone::TChoose choose,
const date::time_zone* tz,
const std::chrono::minutes offset) {
date::local_time<TDuration> timePoint{timestamp};
validateRange(date::sys_time<TDuration>{timestamp});

if (tz == nullptr) {
// We can ignore `choose` as time offset conversions are always linear.
return (timePoint - offset).time_since_epoch();
}
}

} // namespace
if (choose == TimeZone::TChoose::kFail) {
// By default, throws.
return date::zoned_time{tz, timePoint}.get_sys_time().time_since_epoch();
}

void validateRange(time_point<std::chrono::seconds> timePoint) {
validateRangeImpl(timePoint);
auto dateChoose = (choose == TimeZone::TChoose::kEarliest)
? date::choose::earliest
: date::choose::latest;
return date::zoned_time{tz, timePoint, dateChoose}
.get_sys_time()
.time_since_epoch();
}

void validateRange(time_point<std::chrono::milliseconds> timePoint) {
validateRangeImpl(timePoint);
template <typename TDuration>
TDuration toLocalImpl(
const TDuration timestamp,
const date::time_zone* tz,
const std::chrono::minutes offset) {
date::sys_time<TDuration> timePoint{timestamp};
validateRange(timePoint);

// If this is an offset time zone.
if (tz == nullptr) {
return (timePoint + offset).time_since_epoch();
}
return date::zoned_time{tz, timePoint}.get_local_time().time_since_epoch();
}
} // namespace

std::string getTimeZoneName(int64_t timeZoneID) {
return locateZone(timeZoneID, true)->name();
Expand Down Expand Up @@ -329,36 +341,22 @@ int16_t getTimeZoneID(int32_t offsetMinutes) {
TimeZone::seconds TimeZone::to_sys(
TimeZone::seconds timestamp,
TimeZone::TChoose choose) const {
date::local_seconds timePoint{timestamp};
validateRange(date::sys_seconds{timestamp});

if (tz_ == nullptr) {
// We can ignore `choose` as time offset conversions are always linear.
return (timePoint - offset_).time_since_epoch();
}

if (choose == TimeZone::TChoose::kFail) {
// By default, throws.
return date::zoned_time{tz_, timePoint}.get_sys_time().time_since_epoch();
}
return toSysImpl(timestamp, choose, tz_, offset_);
}

auto dateChoose = (choose == TimeZone::TChoose::kEarliest)
? date::choose::earliest
: date::choose::latest;
return date::zoned_time{tz_, timePoint, dateChoose}
.get_sys_time()
.time_since_epoch();
TimeZone::milliseconds TimeZone::to_sys(
TimeZone::milliseconds timestamp,
TimeZone::TChoose choose) const {
return toSysImpl(timestamp, choose, tz_, offset_);
}

TimeZone::seconds TimeZone::to_local(TimeZone::seconds timestamp) const {
date::sys_seconds timePoint{timestamp};
validateRange(timePoint);
return toLocalImpl(timestamp, tz_, offset_);
}

// If this is an offset time zone.
if (tz_ == nullptr) {
return (timePoint + offset_).time_since_epoch();
}
return date::zoned_time{tz_, timePoint}.get_local_time().time_since_epoch();
TimeZone::milliseconds TimeZone::to_local(
TimeZone::milliseconds timestamp) const {
return toLocalImpl(timestamp, tz_, offset_);
}

} // namespace facebook::velox::tz
38 changes: 36 additions & 2 deletions velox/type/tz/TimeZoneMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#include <chrono>
#include <string>
#include "velox/common/base/Exceptions.h"
#include "velox/external/date/tz.h"

namespace facebook::velox::date {
class time_zone;
Expand Down Expand Up @@ -68,8 +70,24 @@ int16_t getTimeZoneID(int32_t offsetMinutes);
template <typename T>
using time_point = std::chrono::time_point<std::chrono::system_clock, T>;

void validateRange(time_point<std::chrono::seconds> timePoint);
void validateRange(time_point<std::chrono::milliseconds> timePoint);
template <typename TDuration>
void validateRange(time_point<TDuration> timePoint) {
using namespace velox::date;
static constexpr auto kMinYear = date::year::min();
static constexpr auto kMaxYear = date::year::max();

auto year = year_month_day(floor<days>(timePoint)).year();

if (year < kMinYear || year > kMaxYear) {
// This is a special case where we intentionally throw
// VeloxRuntimeError to avoid it being suppressed by TRY().
VELOX_FAIL_UNSUPPORTED_INPUT_UNCATCHABLE(
"Timepoint is outside of supported year range: [{}, {}], got {}",
(int)kMinYear,
(int)kMaxYear,
(int)year);
}
}

/// TimeZone is the proxy object for time zone management. It provides access to
/// time zone names, their IDs (as defined in TimeZoneDatabase.cpp and
Expand Down Expand Up @@ -113,6 +131,7 @@ class TimeZone {
}

using seconds = std::chrono::seconds;
using milliseconds = std::chrono::milliseconds;

/// Converts a local time (the time as perceived in the user time zone
/// represented by this object) to a system time (the corresponding time in
Expand All @@ -132,12 +151,15 @@ class TimeZone {
};

seconds to_sys(seconds timestamp, TChoose choose = TChoose::kFail) const;
milliseconds to_sys(milliseconds timestamp, TChoose choose = TChoose::kFail)
const;

/// Do the opposite conversion. Taking a system time (the time as perceived in
/// GMT), convert to the same instant in time as observed in the user local
/// time represented by this object). Note that this conversion is not
/// susceptible to the error above.
seconds to_local(seconds timestamp) const;
milliseconds to_local(milliseconds timestamp) const;

const std::string& name() const {
return timeZoneName_;
Expand All @@ -151,6 +173,18 @@ class TimeZone {
return tz_;
}

template <typename TDuration>
seconds getOffset(TDuration timestamp) const {
date::sys_time<TDuration> timePoint{timestamp};
validateRange(timePoint);

if (tz_ == nullptr) {
return offset_;
}

return date::zoned_time(tz_, timePoint).get_info().offset;
}

private:
const date::time_zone* tz_{nullptr};
const std::chrono::minutes offset_{0};
Expand Down

0 comments on commit 97ec37f

Please sign in to comment.