From db11b69c99e0cf1567bf8d5341a9dd78baf1749b Mon Sep 17 00:00:00 2001 From: Pedro Eugenio Rocha Pedreira Date: Fri, 26 Jul 2024 12:36:54 -0700 Subject: [PATCH] Add support for time zone offsets to TimeZone (#10577) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/10577 Adding support for time zone offsets to TimeZone. Now, we will be able to clean up the callsites to use this single APIs, which will contain time zone name, ID, and conversion capabilities in a more consistent manner. Part of https://github.com/facebookincubator/velox/issues/10101 Reviewed By: mbasmanova Differential Revision: D60213004 fbshipit-source-id: 186809ba397b494b66fbf8d48b04028fc568caa7 --- velox/type/Timestamp.cpp | 14 ++- velox/type/Timestamp.h | 5 +- velox/type/tz/TimeZoneMap.cpp | 87 ++++++++++++++--- velox/type/tz/TimeZoneMap.h | 119 ++++++++++++++++++++---- velox/type/tz/tests/TimeZoneMapTest.cpp | 96 ++++++++++++++++++- 5 files changed, 279 insertions(+), 42 deletions(-) diff --git a/velox/type/Timestamp.cpp b/velox/type/Timestamp.cpp index 80da58485ad0..ffee970074f6 100644 --- a/velox/type/Timestamp.cpp +++ b/velox/type/Timestamp.cpp @@ -70,21 +70,19 @@ void Timestamp::toGMT(const tz::TimeZone& zone) { kMaxSeconds, "Timestamp seconds out of range for time zone adjustment"); - date::local_time localTime{ - std::chrono::seconds(seconds_)}; - std::chrono::time_point - sysTime; + std::chrono::seconds sysSeconds; try { - sysTime = zone.to_sys(localTime); + sysSeconds = zone.to_sys(std::chrono::seconds(seconds_)); } catch (const date::ambiguous_local_time&) { // If the time is ambiguous, pick the earlier possibility to be consistent // with Presto. - sysTime = zone.to_sys(localTime, date::choose::earliest); + sysSeconds = zone.to_sys( + std::chrono::seconds(seconds_), tz::TimeZone::TChoose::kEarliest); } catch (const date::nonexistent_local_time& error) { // If the time does not exist, fail the conversion. VELOX_USER_FAIL(error.what()); } - seconds_ = sysTime.time_since_epoch().count(); + seconds_ = sysSeconds.count(); } void Timestamp::toGMT(int16_t tzID) { @@ -142,7 +140,7 @@ void Timestamp::toTimezone(const tz::TimeZone& zone) { auto tp = toTimePointSec(); try { - seconds_ = zone.to_local(tp).time_since_epoch().count(); + seconds_ = zone.to_local(std::chrono::seconds(seconds_)).count(); } catch (const std::invalid_argument& e) { // Invalid argument means we hit a conversion not supported by // external/date. Need to throw a RuntimeError so that try() statements do diff --git a/velox/type/Timestamp.h b/velox/type/Timestamp.h index bf9d84d72979..df0e85c1170c 100644 --- a/velox/type/Timestamp.h +++ b/velox/type/Timestamp.h @@ -23,10 +23,13 @@ #include "velox/common/base/CheckedArithmetic.h" #include "velox/type/StringView.h" -#include "velox/type/tz/TimeZoneMap.h" namespace facebook::velox { +namespace tz { +class TimeZone; +} + enum class TimestampPrecision : int8_t { kMilliseconds = 3, // 10^3 milliseconds are equal to one second. kMicroseconds = 6, // 10^6 microseconds are equal to one second. diff --git a/velox/type/tz/TimeZoneMap.cpp b/velox/type/tz/TimeZoneMap.cpp index eeb896738ef5..f983823a0413 100644 --- a/velox/type/tz/TimeZoneMap.cpp +++ b/velox/type/tz/TimeZoneMap.cpp @@ -25,15 +25,20 @@ namespace facebook::velox::tz { +using TTimeZoneDatabase = std::vector>; +using TTimeZoneIndex = folly::F14FastMap; + // Defined in TimeZoneDatabase.cpp extern const std::vector>& getTimeZoneEntries(); -// TODO: The string will be moved to TimeZone in the next PR. -using TTimeZoneDatabase = std::vector>; -using TTimeZoneIndex = folly::F14FastMap; - namespace { +// Returns the offset in minutes for a specific time zone offset in the +// database. Do not call for tzID 0 (UTC / "+00:00"). +inline std::chrono::minutes getTimeZoneOffset(int16_t tzID) { + return std::chrono::minutes{(tzID <= 840) ? (tzID - 841) : (tzID - 840)}; +} + // Flattens the input vector of pairs into a vector, assuming that the // timezoneIDs are (mostly) sequential. Note that since they are "mostly" // senquential, the vector can have holes. But it is still more efficient than @@ -44,7 +49,23 @@ TTimeZoneDatabase buildTimeZoneDatabase( tzDatabase.resize(dbInput.back().first + 1); for (const auto& entry : dbInput) { - tzDatabase[entry.first] = std::make_unique(entry.second); + std::unique_ptr timeZonePtr; + + if (entry.first == 0) { + timeZonePtr = std::make_unique( + "UTC", entry.first, date::locate_zone("UTC")); + } else if (entry.first <= 1680) { + std::chrono::minutes offset = getTimeZoneOffset(entry.first); + timeZonePtr = + std::make_unique(entry.second, entry.first, offset); + } + // Every single other time zone entry (outside of offsets) needs to be + // available in external/date or this will throw. + else { + timeZonePtr = std::make_unique( + entry.second, entry.first, date::locate_zone(entry.second)); + } + tzDatabase[entry.first] = std::move(timeZonePtr); } return tzDatabase; } @@ -59,14 +80,19 @@ const TTimeZoneDatabase& getTimeZoneDatabase() { // reverse look ups. TTimeZoneIndex buildTimeZoneIndex(const TTimeZoneDatabase& tzDatabase) { TTimeZoneIndex reversed; - reversed.reserve(tzDatabase.size() + 1); + reversed.reserve(tzDatabase.size() + 2); for (int16_t i = 0; i < tzDatabase.size(); ++i) { if (tzDatabase[i] != nullptr) { - reversed.emplace(boost::algorithm::to_lower_copy(*tzDatabase[i]), i); + reversed.emplace( + boost::algorithm::to_lower_copy(tzDatabase[i]->name()), + tzDatabase[i].get()); } } - reversed.emplace("utc", 0); + + // Add aliases to UTC. + reversed.emplace("+00:00", tzDatabase.front().get()); + reversed.emplace("-00:00", tzDatabase.front().get()); return reversed; } @@ -157,10 +183,10 @@ std::string getTimeZoneName(int64_t timeZoneID) { timeZoneDatabase[timeZoneID], "Unable to resolve timeZoneID '{}'", timeZoneID); - return *timeZoneDatabase[timeZoneID]; + return timeZoneDatabase[timeZoneID]->name(); } -int16_t getTimeZoneID(std::string_view timeZone, bool failOnError) { +const TimeZone* locateZone(std::string_view timeZone, bool failOnError) { const auto& timeZoneIndex = getTimeZoneIndex(); std::string timeZoneLowered; @@ -177,10 +203,16 @@ int16_t getTimeZoneID(std::string_view timeZone, bool failOnError) { if (it != timeZoneIndex.end()) { return it->second; } + if (failOnError) { VELOX_USER_FAIL("Unknown time zone: '{}'", timeZone); } - return -1; + return nullptr; +} + +int16_t getTimeZoneID(std::string_view timeZone, bool failOnError) { + const TimeZone* tz = locateZone(timeZone, failOnError); + return tz == nullptr ? -1 : tz->id(); } int16_t getTimeZoneID(int32_t offsetMinutes) { @@ -209,8 +241,37 @@ int16_t getTimeZoneID(int32_t offsetMinutes) { } } -const TimeZone* locateZone(std::string_view timeZone) { - return date::locate_zone(timeZone); +TimeZone::seconds TimeZone::to_sys( + TimeZone::seconds timestamp, + TimeZone::TChoose choose) const { + date::local_seconds timePoint{timestamp}; + + if (tz_ == nullptr) { + // We can ignore `choose` as time offset conversions are always linear. + return (timePoint - offset_).time_since_epoch(); + } + + if (choose == TimeZone::TChoose::kFail) { + // By default, throws. + return date::zoned_time{tz_, timePoint}.get_sys_time().time_since_epoch(); + } + + auto dateChoose = (choose == TimeZone::TChoose::kEarliest) + ? date::choose::earliest + : date::choose::latest; + return date::zoned_time{tz_, timePoint, dateChoose} + .get_sys_time() + .time_since_epoch(); +} + +TimeZone::seconds TimeZone::to_local(TimeZone::seconds timestamp) const { + date::sys_seconds timePoint{timestamp}; + + // If this is an offset time zone. + if (tz_ == nullptr) { + return (timePoint + offset_).time_since_epoch(); + } + return date::zoned_time{tz_, timePoint}.get_local_time().time_since_epoch(); } } // namespace facebook::velox::tz diff --git a/velox/type/tz/TimeZoneMap.h b/velox/type/tz/TimeZoneMap.h index 224facc2c536..34e44ce40db6 100644 --- a/velox/type/tz/TimeZoneMap.h +++ b/velox/type/tz/TimeZoneMap.h @@ -16,6 +16,7 @@ #pragma once +#include #include namespace facebook::velox::date { @@ -24,28 +25,25 @@ class time_zone; namespace facebook::velox::tz { -/// This library provides time zone lookup and mapping utilities, in addition to -/// functions to enable timestamp conversions across time zones. It leverages -/// the velox/external/date underneath to perform conversions. +/// This library provides time zone management primitives. It maintains an +/// internal static database which is contructed lazily based on the first +/// access, based on TimeZoneDatabase.cpp and the local tzdata installed in your +/// system (through velox/external/date). /// -/// This library provides a thin layer of functionality on top of -/// velox/external/date for timezone lookup and conversions, so don't use the -/// external library directly. - -/// TimeZone is the object that allows conversions across timezones using the -/// .to_sys() and .to_local() methods, as documented in: -/// -/// https://howardhinnant.github.io/date/tz.html +/// It provides functions for one to lookup TimeZone pointers based on time zone +/// name or ID, and to performance timestamp conversion across time zones. /// -using TimeZone = date::time_zone; +/// This library provides a layer of functionality on top of +/// velox/external/date, so do not use the external library directly for +/// time zone routines. -/// TimeZone pointers can be found using `locateZone()`. -/// -/// This function in mostly implemented by velox/external/date, and performs a -/// binary search in the internal time zone database. On the first call, -/// velox/external/date will initialize a static list of timezone, read from the -/// local tzdata database. -const TimeZone* locateZone(std::string_view timeZone); +class TimeZone; + +/// Looks up a TimeZone pointer based on a time zone name. This makes an hash +/// map access, and will construct the index on the first access. `failOnError` +/// controls whether to throw or return nullptr in case the time zone was not +/// found. +const TimeZone* locateZone(std::string_view timeZone, bool failOnError = true); /// Returns the timezone name associated with timeZoneID. std::string getTimeZoneName(int64_t timeZoneID); @@ -59,6 +57,89 @@ int16_t getTimeZoneID(std::string_view timeZone, bool failOnError = true); /// [-14:00, +14:00] range. int16_t getTimeZoneID(int32_t offsetMinutes); +/// TimeZone is the proxy object for time zone management. It provides access to +/// time zone names, their IDs (as defined in TimeZoneDatabase.cpp and +/// consistent with Presto), and utilities for timestamp conversion across +/// timezones by leveraging the .to_sys() and .to_local() methods as documented +/// in: +/// +/// https://howardhinnant.github.io/date/tz.html +/// +/// Do not create your own objects; rather, look up a pointer by using one of +/// the methods above. +class TimeZone { + public: + // Constructor for regular time zones with a name and a pointer to + // external/date time zone database (from tzdata). + TimeZone( + std::string_view timeZoneName, + int16_t timeZoneID, + const date::time_zone* tz) + : tz_(tz), + offset_(0), + timeZoneName_(timeZoneName), + timeZoneID_(timeZoneID) {} + + // Constructor for time zone offsets ("+00:00"). + TimeZone( + std::string_view timeZoneName, + int16_t timeZoneID, + std::chrono::minutes offset) + : tz_(nullptr), + offset_(offset), + timeZoneName_(timeZoneName), + timeZoneID_(timeZoneID) {} + + // Do not copy it. + TimeZone(const TimeZone&) = delete; + TimeZone& operator=(const TimeZone&) = delete; + + using seconds = std::chrono::seconds; + + /// Converts a local time (the time as perceived in the user time zone + /// represented by this object) to a system time (the corresponding time in + /// GMT at the same instant). + /// + /// Conversions from local time to GMT are non-linear and may be ambiguous + /// during day light savings transitions, or non existent. By default (kFail), + /// `to_sys()` will throw `date::ambiguous_local_time` and + /// `date::nonexistent_local_time` in these cases. + /// + /// You can overwrite the behavior in ambiguous conversions by setting the + /// TChoose flag, but it will still throws in case of nonexistent conversions. + enum class TChoose { + kFail = 0, + kEarliest = 1, + kLatest = 2, + }; + + seconds to_sys(seconds timestamp, TChoose choose = TChoose::kFail) const; + + /// Do the opposite conversion. Taking a system time (the time as perceived in + /// GMT), convert to the same instant in time as observed in the user local + /// time represented by this object). Note that this conversion is not + /// susceptible to the error above. + seconds to_local(seconds timestamp) const; + + const std::string& name() const { + return timeZoneName_; + } + + int16_t id() const { + return timeZoneID_; + } + + const date::time_zone* tz() const { + return tz_; + } + + private: + const date::time_zone* tz_{nullptr}; + const std::chrono::minutes offset_{0}; + const std::string timeZoneName_; + const int16_t timeZoneID_; +}; + } // namespace facebook::velox::tz #ifdef VELOX_ENABLE_BACKWARD_COMPATIBILITY diff --git a/velox/type/tz/tests/TimeZoneMapTest.cpp b/velox/type/tz/tests/TimeZoneMapTest.cpp index 4e0f7818ec5d..b59f845157b5 100644 --- a/velox/type/tz/tests/TimeZoneMapTest.cpp +++ b/velox/type/tz/tests/TimeZoneMapTest.cpp @@ -23,6 +23,98 @@ namespace facebook::velox::tz { namespace { +using namespace std::chrono; + +TEST(TimeZoneMapTest, locateZoneID) { + auto locateZoneID = [&](std::string_view name) { + const auto* tz = locateZone(name); + EXPECT_NE(tz, nullptr); + return tz->id(); + }; + + EXPECT_EQ(0, locateZoneID("UTC")); + EXPECT_EQ(0, locateZoneID("+00:00")); + EXPECT_EQ(0, locateZoneID("-00:00")); + EXPECT_EQ(831, locateZoneID("-00:10")); + EXPECT_EQ(462, locateZoneID("-06:19")); + EXPECT_EQ(1315, locateZoneID("+07:55")); + EXPECT_EQ(1680, locateZoneID("+14:00")); + EXPECT_EQ(1720, locateZoneID("Africa/Maseru")); + EXPECT_EQ(2141, locateZoneID("Pacific/Marquesas")); + EXPECT_EQ(2215, locateZoneID("Asia/Chita")); + EXPECT_EQ(2233, locateZoneID("America/Ciudad_Juarez")); +} + +TEST(TimeZoneMapTest, locateZoneUTCAlias) { + auto locateZoneID = [&](std::string_view name) { + const auto* tz = locateZone(name); + EXPECT_NE(tz, nullptr); + return tz->name(); + }; + + // Ensure all of these aliases resolve to a time zone called "UTC". + EXPECT_EQ("UTC", locateZoneID("UTC")); + EXPECT_EQ("UTC", locateZoneID("gmt")); + EXPECT_EQ("UTC", locateZoneID("Z")); + EXPECT_EQ("UTC", locateZoneID("zulu")); + EXPECT_EQ("UTC", locateZoneID("Greenwich")); + EXPECT_EQ("UTC", locateZoneID("gmt0")); + EXPECT_EQ("UTC", locateZoneID("GMT")); + EXPECT_EQ("UTC", locateZoneID("uct")); + EXPECT_EQ("UTC", locateZoneID("+00:00")); + EXPECT_EQ("UTC", locateZoneID("-00:00")); +} + +TEST(TimeZoneMapTest, offsetToLocal) { + auto toLocalTime = [&](std::string_view name, size_t ts) { + const auto* tz = locateZone(name); + EXPECT_NE(tz, nullptr); + return tz->to_local(seconds{ts}).count(); + }; + + // Ensure all of these aliases resolve to a time zone called "UTC". + EXPECT_EQ(0, toLocalTime("+00:00", 0)); + EXPECT_EQ(60, toLocalTime("+00:01", 0)); + EXPECT_EQ(-60, toLocalTime("-00:01", 0)); + EXPECT_EQ(3600, toLocalTime("+01:00", 0)); + EXPECT_EQ(-3660, toLocalTime("-01:01", 0)); + + // In "2024-07-25", America/Los_Angeles was in daylight savings time (UTC-07). + size_t ts = 1721890800; + EXPECT_EQ(toLocalTime("-07:00", ts), toLocalTime("America/Los_Angeles", ts)); + EXPECT_NE(toLocalTime("-08:00", ts), toLocalTime("America/Los_Angeles", ts)); + + // In "2024-01-01", it was not (UTC-08). + ts = 1704096000; + EXPECT_EQ(toLocalTime("-08:00", ts), toLocalTime("America/Los_Angeles", ts)); + EXPECT_NE(toLocalTime("-07:00", ts), toLocalTime("America/Los_Angeles", ts)); +} + +TEST(TimeZoneMapTest, offsetToSys) { + auto toSysTime = [&](std::string_view name, size_t ts) { + const auto* tz = locateZone(name); + EXPECT_NE(tz, nullptr); + return tz->to_sys(seconds{ts}).count(); + }; + + // Ensure all of these aliases resolve to a time zone called "UTC". + EXPECT_EQ(0, toSysTime("+00:00", 0)); + EXPECT_EQ(-60, toSysTime("+00:01", 0)); + EXPECT_EQ(+60, toSysTime("-00:01", 0)); + EXPECT_EQ(-3600, toSysTime("+01:00", 0)); + EXPECT_EQ(+3660, toSysTime("-01:01", 0)); + + // In "2024-07-25", America/Los_Angeles was in daylight savings time (UTC-07). + size_t ts = 1721890800; + EXPECT_EQ(toSysTime("-07:00", ts), toSysTime("America/Los_Angeles", ts)); + EXPECT_NE(toSysTime("-08:00", ts), toSysTime("America/Los_Angeles", ts)); + + // In "2024-01-01", it was not (UTC-08). + ts = 1704096000; + EXPECT_EQ(toSysTime("-08:00", ts), toSysTime("America/Los_Angeles", ts)); + EXPECT_NE(toSysTime("-07:00", ts), toSysTime("America/Los_Angeles", ts)); +} + TEST(TimeZoneMapTest, getTimeZoneName) { EXPECT_EQ("America/Los_Angeles", getTimeZoneName(1825)); EXPECT_EQ("Europe/Moscow", getTimeZoneName(2079)); @@ -30,6 +122,7 @@ TEST(TimeZoneMapTest, getTimeZoneName) { EXPECT_EQ("Europe/Kyiv", getTimeZoneName(2232)); EXPECT_EQ("America/Ciudad_Juarez", getTimeZoneName(2233)); EXPECT_EQ("-00:01", getTimeZoneName(840)); + EXPECT_EQ("UTC", getTimeZoneName(0)); } TEST(TimeZoneMapTest, getTimeZoneID) { @@ -73,7 +166,8 @@ TEST(TimeZoneMapTest, getTimeZoneIDFromOffset) { return getTimeZoneName(getTimeZoneID(offset)); }; - EXPECT_EQ("+00:00", nameFromOffset(0)); + // "+00:00" is an alias to UTC. + EXPECT_EQ("UTC", nameFromOffset(0)); EXPECT_EQ("+05:30", nameFromOffset(5 * 60 + 30)); EXPECT_EQ("-08:00", nameFromOffset(-8 * 60)); EXPECT_EQ("+02:17", nameFromOffset(2 * 60 + 17));