diff --git a/velox/type/Timestamp.cpp b/velox/type/Timestamp.cpp index 80da58485ad0..ffee970074f6 100644 --- a/velox/type/Timestamp.cpp +++ b/velox/type/Timestamp.cpp @@ -70,21 +70,19 @@ void Timestamp::toGMT(const tz::TimeZone& zone) { kMaxSeconds, "Timestamp seconds out of range for time zone adjustment"); - date::local_time localTime{ - std::chrono::seconds(seconds_)}; - std::chrono::time_point - sysTime; + std::chrono::seconds sysSeconds; try { - sysTime = zone.to_sys(localTime); + sysSeconds = zone.to_sys(std::chrono::seconds(seconds_)); } catch (const date::ambiguous_local_time&) { // If the time is ambiguous, pick the earlier possibility to be consistent // with Presto. - sysTime = zone.to_sys(localTime, date::choose::earliest); + sysSeconds = zone.to_sys( + std::chrono::seconds(seconds_), tz::TimeZone::TChoose::kEarliest); } catch (const date::nonexistent_local_time& error) { // If the time does not exist, fail the conversion. VELOX_USER_FAIL(error.what()); } - seconds_ = sysTime.time_since_epoch().count(); + seconds_ = sysSeconds.count(); } void Timestamp::toGMT(int16_t tzID) { @@ -142,7 +140,7 @@ void Timestamp::toTimezone(const tz::TimeZone& zone) { auto tp = toTimePointSec(); try { - seconds_ = zone.to_local(tp).time_since_epoch().count(); + seconds_ = zone.to_local(std::chrono::seconds(seconds_)).count(); } catch (const std::invalid_argument& e) { // Invalid argument means we hit a conversion not supported by // external/date. Need to throw a RuntimeError so that try() statements do diff --git a/velox/type/Timestamp.h b/velox/type/Timestamp.h index bf9d84d72979..df0e85c1170c 100644 --- a/velox/type/Timestamp.h +++ b/velox/type/Timestamp.h @@ -23,10 +23,13 @@ #include "velox/common/base/CheckedArithmetic.h" #include "velox/type/StringView.h" -#include "velox/type/tz/TimeZoneMap.h" namespace facebook::velox { +namespace tz { +class TimeZone; +} + enum class TimestampPrecision : int8_t { kMilliseconds = 3, // 10^3 milliseconds are equal to one second. kMicroseconds = 6, // 10^6 microseconds are equal to one second. diff --git a/velox/type/tz/TimeZoneMap.cpp b/velox/type/tz/TimeZoneMap.cpp index eeb896738ef5..f983823a0413 100644 --- a/velox/type/tz/TimeZoneMap.cpp +++ b/velox/type/tz/TimeZoneMap.cpp @@ -25,15 +25,20 @@ namespace facebook::velox::tz { +using TTimeZoneDatabase = std::vector>; +using TTimeZoneIndex = folly::F14FastMap; + // Defined in TimeZoneDatabase.cpp extern const std::vector>& getTimeZoneEntries(); -// TODO: The string will be moved to TimeZone in the next PR. -using TTimeZoneDatabase = std::vector>; -using TTimeZoneIndex = folly::F14FastMap; - namespace { +// Returns the offset in minutes for a specific time zone offset in the +// database. Do not call for tzID 0 (UTC / "+00:00"). +inline std::chrono::minutes getTimeZoneOffset(int16_t tzID) { + return std::chrono::minutes{(tzID <= 840) ? (tzID - 841) : (tzID - 840)}; +} + // Flattens the input vector of pairs into a vector, assuming that the // timezoneIDs are (mostly) sequential. Note that since they are "mostly" // senquential, the vector can have holes. But it is still more efficient than @@ -44,7 +49,23 @@ TTimeZoneDatabase buildTimeZoneDatabase( tzDatabase.resize(dbInput.back().first + 1); for (const auto& entry : dbInput) { - tzDatabase[entry.first] = std::make_unique(entry.second); + std::unique_ptr timeZonePtr; + + if (entry.first == 0) { + timeZonePtr = std::make_unique( + "UTC", entry.first, date::locate_zone("UTC")); + } else if (entry.first <= 1680) { + std::chrono::minutes offset = getTimeZoneOffset(entry.first); + timeZonePtr = + std::make_unique(entry.second, entry.first, offset); + } + // Every single other time zone entry (outside of offsets) needs to be + // available in external/date or this will throw. + else { + timeZonePtr = std::make_unique( + entry.second, entry.first, date::locate_zone(entry.second)); + } + tzDatabase[entry.first] = std::move(timeZonePtr); } return tzDatabase; } @@ -59,14 +80,19 @@ const TTimeZoneDatabase& getTimeZoneDatabase() { // reverse look ups. TTimeZoneIndex buildTimeZoneIndex(const TTimeZoneDatabase& tzDatabase) { TTimeZoneIndex reversed; - reversed.reserve(tzDatabase.size() + 1); + reversed.reserve(tzDatabase.size() + 2); for (int16_t i = 0; i < tzDatabase.size(); ++i) { if (tzDatabase[i] != nullptr) { - reversed.emplace(boost::algorithm::to_lower_copy(*tzDatabase[i]), i); + reversed.emplace( + boost::algorithm::to_lower_copy(tzDatabase[i]->name()), + tzDatabase[i].get()); } } - reversed.emplace("utc", 0); + + // Add aliases to UTC. + reversed.emplace("+00:00", tzDatabase.front().get()); + reversed.emplace("-00:00", tzDatabase.front().get()); return reversed; } @@ -157,10 +183,10 @@ std::string getTimeZoneName(int64_t timeZoneID) { timeZoneDatabase[timeZoneID], "Unable to resolve timeZoneID '{}'", timeZoneID); - return *timeZoneDatabase[timeZoneID]; + return timeZoneDatabase[timeZoneID]->name(); } -int16_t getTimeZoneID(std::string_view timeZone, bool failOnError) { +const TimeZone* locateZone(std::string_view timeZone, bool failOnError) { const auto& timeZoneIndex = getTimeZoneIndex(); std::string timeZoneLowered; @@ -177,10 +203,16 @@ int16_t getTimeZoneID(std::string_view timeZone, bool failOnError) { if (it != timeZoneIndex.end()) { return it->second; } + if (failOnError) { VELOX_USER_FAIL("Unknown time zone: '{}'", timeZone); } - return -1; + return nullptr; +} + +int16_t getTimeZoneID(std::string_view timeZone, bool failOnError) { + const TimeZone* tz = locateZone(timeZone, failOnError); + return tz == nullptr ? -1 : tz->id(); } int16_t getTimeZoneID(int32_t offsetMinutes) { @@ -209,8 +241,37 @@ int16_t getTimeZoneID(int32_t offsetMinutes) { } } -const TimeZone* locateZone(std::string_view timeZone) { - return date::locate_zone(timeZone); +TimeZone::seconds TimeZone::to_sys( + TimeZone::seconds timestamp, + TimeZone::TChoose choose) const { + date::local_seconds timePoint{timestamp}; + + if (tz_ == nullptr) { + // We can ignore `choose` as time offset conversions are always linear. + return (timePoint - offset_).time_since_epoch(); + } + + if (choose == TimeZone::TChoose::kFail) { + // By default, throws. + return date::zoned_time{tz_, timePoint}.get_sys_time().time_since_epoch(); + } + + auto dateChoose = (choose == TimeZone::TChoose::kEarliest) + ? date::choose::earliest + : date::choose::latest; + return date::zoned_time{tz_, timePoint, dateChoose} + .get_sys_time() + .time_since_epoch(); +} + +TimeZone::seconds TimeZone::to_local(TimeZone::seconds timestamp) const { + date::sys_seconds timePoint{timestamp}; + + // If this is an offset time zone. + if (tz_ == nullptr) { + return (timePoint + offset_).time_since_epoch(); + } + return date::zoned_time{tz_, timePoint}.get_local_time().time_since_epoch(); } } // namespace facebook::velox::tz diff --git a/velox/type/tz/TimeZoneMap.h b/velox/type/tz/TimeZoneMap.h index 224facc2c536..34e44ce40db6 100644 --- a/velox/type/tz/TimeZoneMap.h +++ b/velox/type/tz/TimeZoneMap.h @@ -16,6 +16,7 @@ #pragma once +#include #include namespace facebook::velox::date { @@ -24,28 +25,25 @@ class time_zone; namespace facebook::velox::tz { -/// This library provides time zone lookup and mapping utilities, in addition to -/// functions to enable timestamp conversions across time zones. It leverages -/// the velox/external/date underneath to perform conversions. +/// This library provides time zone management primitives. It maintains an +/// internal static database which is contructed lazily based on the first +/// access, based on TimeZoneDatabase.cpp and the local tzdata installed in your +/// system (through velox/external/date). /// -/// This library provides a thin layer of functionality on top of -/// velox/external/date for timezone lookup and conversions, so don't use the -/// external library directly. - -/// TimeZone is the object that allows conversions across timezones using the -/// .to_sys() and .to_local() methods, as documented in: -/// -/// https://howardhinnant.github.io/date/tz.html +/// It provides functions for one to lookup TimeZone pointers based on time zone +/// name or ID, and to performance timestamp conversion across time zones. /// -using TimeZone = date::time_zone; +/// This library provides a layer of functionality on top of +/// velox/external/date, so do not use the external library directly for +/// time zone routines. -/// TimeZone pointers can be found using `locateZone()`. -/// -/// This function in mostly implemented by velox/external/date, and performs a -/// binary search in the internal time zone database. On the first call, -/// velox/external/date will initialize a static list of timezone, read from the -/// local tzdata database. -const TimeZone* locateZone(std::string_view timeZone); +class TimeZone; + +/// Looks up a TimeZone pointer based on a time zone name. This makes an hash +/// map access, and will construct the index on the first access. `failOnError` +/// controls whether to throw or return nullptr in case the time zone was not +/// found. +const TimeZone* locateZone(std::string_view timeZone, bool failOnError = true); /// Returns the timezone name associated with timeZoneID. std::string getTimeZoneName(int64_t timeZoneID); @@ -59,6 +57,89 @@ int16_t getTimeZoneID(std::string_view timeZone, bool failOnError = true); /// [-14:00, +14:00] range. int16_t getTimeZoneID(int32_t offsetMinutes); +/// TimeZone is the proxy object for time zone management. It provides access to +/// time zone names, their IDs (as defined in TimeZoneDatabase.cpp and +/// consistent with Presto), and utilities for timestamp conversion across +/// timezones by leveraging the .to_sys() and .to_local() methods as documented +/// in: +/// +/// https://howardhinnant.github.io/date/tz.html +/// +/// Do not create your own objects; rather, look up a pointer by using one of +/// the methods above. +class TimeZone { + public: + // Constructor for regular time zones with a name and a pointer to + // external/date time zone database (from tzdata). + TimeZone( + std::string_view timeZoneName, + int16_t timeZoneID, + const date::time_zone* tz) + : tz_(tz), + offset_(0), + timeZoneName_(timeZoneName), + timeZoneID_(timeZoneID) {} + + // Constructor for time zone offsets ("+00:00"). + TimeZone( + std::string_view timeZoneName, + int16_t timeZoneID, + std::chrono::minutes offset) + : tz_(nullptr), + offset_(offset), + timeZoneName_(timeZoneName), + timeZoneID_(timeZoneID) {} + + // Do not copy it. + TimeZone(const TimeZone&) = delete; + TimeZone& operator=(const TimeZone&) = delete; + + using seconds = std::chrono::seconds; + + /// Converts a local time (the time as perceived in the user time zone + /// represented by this object) to a system time (the corresponding time in + /// GMT at the same instant). + /// + /// Conversions from local time to GMT are non-linear and may be ambiguous + /// during day light savings transitions, or non existent. By default (kFail), + /// `to_sys()` will throw `date::ambiguous_local_time` and + /// `date::nonexistent_local_time` in these cases. + /// + /// You can overwrite the behavior in ambiguous conversions by setting the + /// TChoose flag, but it will still throws in case of nonexistent conversions. + enum class TChoose { + kFail = 0, + kEarliest = 1, + kLatest = 2, + }; + + seconds to_sys(seconds timestamp, TChoose choose = TChoose::kFail) const; + + /// Do the opposite conversion. Taking a system time (the time as perceived in + /// GMT), convert to the same instant in time as observed in the user local + /// time represented by this object). Note that this conversion is not + /// susceptible to the error above. + seconds to_local(seconds timestamp) const; + + const std::string& name() const { + return timeZoneName_; + } + + int16_t id() const { + return timeZoneID_; + } + + const date::time_zone* tz() const { + return tz_; + } + + private: + const date::time_zone* tz_{nullptr}; + const std::chrono::minutes offset_{0}; + const std::string timeZoneName_; + const int16_t timeZoneID_; +}; + } // namespace facebook::velox::tz #ifdef VELOX_ENABLE_BACKWARD_COMPATIBILITY diff --git a/velox/type/tz/tests/TimeZoneMapTest.cpp b/velox/type/tz/tests/TimeZoneMapTest.cpp index 4e0f7818ec5d..b59f845157b5 100644 --- a/velox/type/tz/tests/TimeZoneMapTest.cpp +++ b/velox/type/tz/tests/TimeZoneMapTest.cpp @@ -23,6 +23,98 @@ namespace facebook::velox::tz { namespace { +using namespace std::chrono; + +TEST(TimeZoneMapTest, locateZoneID) { + auto locateZoneID = [&](std::string_view name) { + const auto* tz = locateZone(name); + EXPECT_NE(tz, nullptr); + return tz->id(); + }; + + EXPECT_EQ(0, locateZoneID("UTC")); + EXPECT_EQ(0, locateZoneID("+00:00")); + EXPECT_EQ(0, locateZoneID("-00:00")); + EXPECT_EQ(831, locateZoneID("-00:10")); + EXPECT_EQ(462, locateZoneID("-06:19")); + EXPECT_EQ(1315, locateZoneID("+07:55")); + EXPECT_EQ(1680, locateZoneID("+14:00")); + EXPECT_EQ(1720, locateZoneID("Africa/Maseru")); + EXPECT_EQ(2141, locateZoneID("Pacific/Marquesas")); + EXPECT_EQ(2215, locateZoneID("Asia/Chita")); + EXPECT_EQ(2233, locateZoneID("America/Ciudad_Juarez")); +} + +TEST(TimeZoneMapTest, locateZoneUTCAlias) { + auto locateZoneID = [&](std::string_view name) { + const auto* tz = locateZone(name); + EXPECT_NE(tz, nullptr); + return tz->name(); + }; + + // Ensure all of these aliases resolve to a time zone called "UTC". + EXPECT_EQ("UTC", locateZoneID("UTC")); + EXPECT_EQ("UTC", locateZoneID("gmt")); + EXPECT_EQ("UTC", locateZoneID("Z")); + EXPECT_EQ("UTC", locateZoneID("zulu")); + EXPECT_EQ("UTC", locateZoneID("Greenwich")); + EXPECT_EQ("UTC", locateZoneID("gmt0")); + EXPECT_EQ("UTC", locateZoneID("GMT")); + EXPECT_EQ("UTC", locateZoneID("uct")); + EXPECT_EQ("UTC", locateZoneID("+00:00")); + EXPECT_EQ("UTC", locateZoneID("-00:00")); +} + +TEST(TimeZoneMapTest, offsetToLocal) { + auto toLocalTime = [&](std::string_view name, size_t ts) { + const auto* tz = locateZone(name); + EXPECT_NE(tz, nullptr); + return tz->to_local(seconds{ts}).count(); + }; + + // Ensure all of these aliases resolve to a time zone called "UTC". + EXPECT_EQ(0, toLocalTime("+00:00", 0)); + EXPECT_EQ(60, toLocalTime("+00:01", 0)); + EXPECT_EQ(-60, toLocalTime("-00:01", 0)); + EXPECT_EQ(3600, toLocalTime("+01:00", 0)); + EXPECT_EQ(-3660, toLocalTime("-01:01", 0)); + + // In "2024-07-25", America/Los_Angeles was in daylight savings time (UTC-07). + size_t ts = 1721890800; + EXPECT_EQ(toLocalTime("-07:00", ts), toLocalTime("America/Los_Angeles", ts)); + EXPECT_NE(toLocalTime("-08:00", ts), toLocalTime("America/Los_Angeles", ts)); + + // In "2024-01-01", it was not (UTC-08). + ts = 1704096000; + EXPECT_EQ(toLocalTime("-08:00", ts), toLocalTime("America/Los_Angeles", ts)); + EXPECT_NE(toLocalTime("-07:00", ts), toLocalTime("America/Los_Angeles", ts)); +} + +TEST(TimeZoneMapTest, offsetToSys) { + auto toSysTime = [&](std::string_view name, size_t ts) { + const auto* tz = locateZone(name); + EXPECT_NE(tz, nullptr); + return tz->to_sys(seconds{ts}).count(); + }; + + // Ensure all of these aliases resolve to a time zone called "UTC". + EXPECT_EQ(0, toSysTime("+00:00", 0)); + EXPECT_EQ(-60, toSysTime("+00:01", 0)); + EXPECT_EQ(+60, toSysTime("-00:01", 0)); + EXPECT_EQ(-3600, toSysTime("+01:00", 0)); + EXPECT_EQ(+3660, toSysTime("-01:01", 0)); + + // In "2024-07-25", America/Los_Angeles was in daylight savings time (UTC-07). + size_t ts = 1721890800; + EXPECT_EQ(toSysTime("-07:00", ts), toSysTime("America/Los_Angeles", ts)); + EXPECT_NE(toSysTime("-08:00", ts), toSysTime("America/Los_Angeles", ts)); + + // In "2024-01-01", it was not (UTC-08). + ts = 1704096000; + EXPECT_EQ(toSysTime("-08:00", ts), toSysTime("America/Los_Angeles", ts)); + EXPECT_NE(toSysTime("-07:00", ts), toSysTime("America/Los_Angeles", ts)); +} + TEST(TimeZoneMapTest, getTimeZoneName) { EXPECT_EQ("America/Los_Angeles", getTimeZoneName(1825)); EXPECT_EQ("Europe/Moscow", getTimeZoneName(2079)); @@ -30,6 +122,7 @@ TEST(TimeZoneMapTest, getTimeZoneName) { EXPECT_EQ("Europe/Kyiv", getTimeZoneName(2232)); EXPECT_EQ("America/Ciudad_Juarez", getTimeZoneName(2233)); EXPECT_EQ("-00:01", getTimeZoneName(840)); + EXPECT_EQ("UTC", getTimeZoneName(0)); } TEST(TimeZoneMapTest, getTimeZoneID) { @@ -73,7 +166,8 @@ TEST(TimeZoneMapTest, getTimeZoneIDFromOffset) { return getTimeZoneName(getTimeZoneID(offset)); }; - EXPECT_EQ("+00:00", nameFromOffset(0)); + // "+00:00" is an alias to UTC. + EXPECT_EQ("UTC", nameFromOffset(0)); EXPECT_EQ("+05:30", nameFromOffset(5 * 60 + 30)); EXPECT_EQ("-08:00", nameFromOffset(-8 * 60)); EXPECT_EQ("+02:17", nameFromOffset(2 * 60 + 17));