Skip to content

Commit

Permalink
Add support for time zone offsets to TimeZone
Browse files Browse the repository at this point in the history
Summary:
Adding support for time zone offsets to TimeZone. Now, we will be able
to clean up the callsites to use this single APIs, which will contain time zone
name, ID, and conversion capabilities in a more consistent manner.

Part of facebookincubator#10101

Reviewed By: mbasmanova

Differential Revision: D60213004
  • Loading branch information
pedroerp authored and facebook-github-bot committed Jul 26, 2024
1 parent e3e791a commit 595cabd
Show file tree
Hide file tree
Showing 5 changed files with 279 additions and 42 deletions.
14 changes: 6 additions & 8 deletions velox/type/Timestamp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,21 +70,19 @@ void Timestamp::toGMT(const tz::TimeZone& zone) {
kMaxSeconds,
"Timestamp seconds out of range for time zone adjustment");

date::local_time<std::chrono::seconds> localTime{
std::chrono::seconds(seconds_)};
std::chrono::time_point<std::chrono::system_clock, std::chrono::seconds>
sysTime;
std::chrono::seconds sysSeconds;
try {
sysTime = zone.to_sys(localTime);
sysSeconds = zone.to_sys(std::chrono::seconds(seconds_));
} catch (const date::ambiguous_local_time&) {
// If the time is ambiguous, pick the earlier possibility to be consistent
// with Presto.
sysTime = zone.to_sys(localTime, date::choose::earliest);
sysSeconds = zone.to_sys(
std::chrono::seconds(seconds_), tz::TimeZone::TChoose::kEarliest);
} catch (const date::nonexistent_local_time& error) {
// If the time does not exist, fail the conversion.
VELOX_USER_FAIL(error.what());
}
seconds_ = sysTime.time_since_epoch().count();
seconds_ = sysSeconds.count();
}

void Timestamp::toGMT(int16_t tzID) {
Expand Down Expand Up @@ -142,7 +140,7 @@ void Timestamp::toTimezone(const tz::TimeZone& zone) {
auto tp = toTimePointSec();

try {
seconds_ = zone.to_local(tp).time_since_epoch().count();
seconds_ = zone.to_local(std::chrono::seconds(seconds_)).count();
} catch (const std::invalid_argument& e) {
// Invalid argument means we hit a conversion not supported by
// external/date. Need to throw a RuntimeError so that try() statements do
Expand Down
5 changes: 4 additions & 1 deletion velox/type/Timestamp.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@

#include "velox/common/base/CheckedArithmetic.h"
#include "velox/type/StringView.h"
#include "velox/type/tz/TimeZoneMap.h"

namespace facebook::velox {

namespace tz {
class TimeZone;
}

enum class TimestampPrecision : int8_t {
kMilliseconds = 3, // 10^3 milliseconds are equal to one second.
kMicroseconds = 6, // 10^6 microseconds are equal to one second.
Expand Down
87 changes: 74 additions & 13 deletions velox/type/tz/TimeZoneMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,20 @@

namespace facebook::velox::tz {

using TTimeZoneDatabase = std::vector<std::unique_ptr<TimeZone>>;
using TTimeZoneIndex = folly::F14FastMap<std::string, const TimeZone*>;

// Defined in TimeZoneDatabase.cpp
extern const std::vector<std::pair<int16_t, std::string>>& getTimeZoneEntries();

// TODO: The string will be moved to TimeZone in the next PR.
using TTimeZoneDatabase = std::vector<std::unique_ptr<std::string>>;
using TTimeZoneIndex = folly::F14FastMap<std::string, int16_t>;

namespace {

// Returns the offset in minutes for a specific time zone offset in the
// database. Do not call for tzID 0 (UTC / "+00:00").
inline std::chrono::minutes getTimeZoneOffset(int16_t tzID) {
return std::chrono::minutes{(tzID <= 840) ? (tzID - 841) : (tzID - 840)};
}

// Flattens the input vector of pairs into a vector, assuming that the
// timezoneIDs are (mostly) sequential. Note that since they are "mostly"
// senquential, the vector can have holes. But it is still more efficient than
Expand All @@ -44,7 +49,23 @@ TTimeZoneDatabase buildTimeZoneDatabase(
tzDatabase.resize(dbInput.back().first + 1);

for (const auto& entry : dbInput) {
tzDatabase[entry.first] = std::make_unique<std::string>(entry.second);
std::unique_ptr<TimeZone> timeZonePtr;

if (entry.first == 0) {
timeZonePtr = std::make_unique<TimeZone>(
"UTC", entry.first, date::locate_zone("UTC"));
} else if (entry.first <= 1680) {
std::chrono::minutes offset = getTimeZoneOffset(entry.first);
timeZonePtr =
std::make_unique<TimeZone>(entry.second, entry.first, offset);
}
// Every single other time zone entry (outside of offsets) needs to be
// available in external/date or this will throw.
else {
timeZonePtr = std::make_unique<TimeZone>(
entry.second, entry.first, date::locate_zone(entry.second));
}
tzDatabase[entry.first] = std::move(timeZonePtr);
}
return tzDatabase;
}
Expand All @@ -59,14 +80,19 @@ const TTimeZoneDatabase& getTimeZoneDatabase() {
// reverse look ups.
TTimeZoneIndex buildTimeZoneIndex(const TTimeZoneDatabase& tzDatabase) {
TTimeZoneIndex reversed;
reversed.reserve(tzDatabase.size() + 1);
reversed.reserve(tzDatabase.size() + 2);

for (int16_t i = 0; i < tzDatabase.size(); ++i) {
if (tzDatabase[i] != nullptr) {
reversed.emplace(boost::algorithm::to_lower_copy(*tzDatabase[i]), i);
reversed.emplace(
boost::algorithm::to_lower_copy(tzDatabase[i]->name()),
tzDatabase[i].get());
}
}
reversed.emplace("utc", 0);

// Add aliases to UTC.
reversed.emplace("+00:00", tzDatabase.front().get());
reversed.emplace("-00:00", tzDatabase.front().get());
return reversed;
}

Expand Down Expand Up @@ -157,10 +183,10 @@ std::string getTimeZoneName(int64_t timeZoneID) {
timeZoneDatabase[timeZoneID],
"Unable to resolve timeZoneID '{}'",
timeZoneID);
return *timeZoneDatabase[timeZoneID];
return timeZoneDatabase[timeZoneID]->name();
}

int16_t getTimeZoneID(std::string_view timeZone, bool failOnError) {
const TimeZone* locateZone(std::string_view timeZone, bool failOnError) {
const auto& timeZoneIndex = getTimeZoneIndex();

std::string timeZoneLowered;
Expand All @@ -177,10 +203,16 @@ int16_t getTimeZoneID(std::string_view timeZone, bool failOnError) {
if (it != timeZoneIndex.end()) {
return it->second;
}

if (failOnError) {
VELOX_USER_FAIL("Unknown time zone: '{}'", timeZone);
}
return -1;
return nullptr;
}

int16_t getTimeZoneID(std::string_view timeZone, bool failOnError) {
const TimeZone* tz = locateZone(timeZone, failOnError);
return tz == nullptr ? -1 : tz->id();
}

int16_t getTimeZoneID(int32_t offsetMinutes) {
Expand Down Expand Up @@ -209,8 +241,37 @@ int16_t getTimeZoneID(int32_t offsetMinutes) {
}
}

const TimeZone* locateZone(std::string_view timeZone) {
return date::locate_zone(timeZone);
TimeZone::seconds TimeZone::to_sys(
TimeZone::seconds timestamp,
TimeZone::TChoose choose) const {
date::local_seconds timePoint{timestamp};

if (tz_ == nullptr) {
// We can ignore `choose` as time offset conversions are always linear.
return (timePoint - offset_).time_since_epoch();
}

if (choose == TimeZone::TChoose::kFail) {
// By default, throws.
return date::zoned_time{tz_, timePoint}.get_sys_time().time_since_epoch();
}

auto dateChoose = (choose == TimeZone::TChoose::kEarliest)
? date::choose::earliest
: date::choose::latest;
return date::zoned_time{tz_, timePoint, dateChoose}
.get_sys_time()
.time_since_epoch();
}

TimeZone::seconds TimeZone::to_local(TimeZone::seconds timestamp) const {
date::sys_seconds timePoint{timestamp};

// If this is an offset time zone.
if (tz_ == nullptr) {
return (timePoint + offset_).time_since_epoch();
}
return date::zoned_time{tz_, timePoint}.get_local_time().time_since_epoch();
}

} // namespace facebook::velox::tz
119 changes: 100 additions & 19 deletions velox/type/tz/TimeZoneMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#pragma once

#include <chrono>
#include <string>

namespace facebook::velox::date {
Expand All @@ -24,28 +25,25 @@ class time_zone;

namespace facebook::velox::tz {

/// This library provides time zone lookup and mapping utilities, in addition to
/// functions to enable timestamp conversions across time zones. It leverages
/// the velox/external/date underneath to perform conversions.
/// This library provides time zone management primitives. It maintains an
/// internal static database which is contructed lazily based on the first
/// access, based on TimeZoneDatabase.cpp and the local tzdata installed in your
/// system (through velox/external/date).
///
/// This library provides a thin layer of functionality on top of
/// velox/external/date for timezone lookup and conversions, so don't use the
/// external library directly.

/// TimeZone is the object that allows conversions across timezones using the
/// .to_sys() and .to_local() methods, as documented in:
///
/// https://howardhinnant.github.io/date/tz.html
/// It provides functions for one to lookup TimeZone pointers based on time zone
/// name or ID, and to performance timestamp conversion across time zones.
///
using TimeZone = date::time_zone;
/// This library provides a layer of functionality on top of
/// velox/external/date, so do not use the external library directly for
/// time zone routines.

/// TimeZone pointers can be found using `locateZone()`.
///
/// This function in mostly implemented by velox/external/date, and performs a
/// binary search in the internal time zone database. On the first call,
/// velox/external/date will initialize a static list of timezone, read from the
/// local tzdata database.
const TimeZone* locateZone(std::string_view timeZone);
class TimeZone;

/// Looks up a TimeZone pointer based on a time zone name. This makes an hash
/// map access, and will construct the index on the first access. `failOnError`
/// controls whether to throw or return nullptr in case the time zone was not
/// found.
const TimeZone* locateZone(std::string_view timeZone, bool failOnError = true);

/// Returns the timezone name associated with timeZoneID.
std::string getTimeZoneName(int64_t timeZoneID);
Expand All @@ -59,6 +57,89 @@ int16_t getTimeZoneID(std::string_view timeZone, bool failOnError = true);
/// [-14:00, +14:00] range.
int16_t getTimeZoneID(int32_t offsetMinutes);

/// TimeZone is the proxy object for time zone management. It provides access to
/// time zone names, their IDs (as defined in TimeZoneDatabase.cpp and
/// consistent with Presto), and utilities for timestamp conversion across
/// timezones by leveraging the .to_sys() and .to_local() methods as documented
/// in:
///
/// https://howardhinnant.github.io/date/tz.html
///
/// Do not create your own objects; rather, look up a pointer by using one of
/// the methods above.
class TimeZone {
public:
// Constructor for regular time zones with a name and a pointer to
// external/date time zone database (from tzdata).
TimeZone(
std::string_view timeZoneName,
int16_t timeZoneID,
const date::time_zone* tz)
: tz_(tz),
offset_(0),
timeZoneName_(timeZoneName),
timeZoneID_(timeZoneID) {}

// Constructor for time zone offsets ("+00:00").
TimeZone(
std::string_view timeZoneName,
int16_t timeZoneID,
std::chrono::minutes offset)
: tz_(nullptr),
offset_(offset),
timeZoneName_(timeZoneName),
timeZoneID_(timeZoneID) {}

// Do not copy it.
TimeZone(const TimeZone&) = delete;
TimeZone& operator=(const TimeZone&) = delete;

using seconds = std::chrono::seconds;

/// Converts a local time (the time as perceived in the user time zone
/// represented by this object) to a system time (the corresponding time in
/// GMT at the same instant).
///
/// Conversions from local time to GMT are non-linear and may be ambiguous
/// during day light savings transitions, or non existent. By default (kFail),
/// `to_sys()` will throw `date::ambiguous_local_time` and
/// `date::nonexistent_local_time` in these cases.
///
/// You can overwrite the behavior in ambiguous conversions by setting the
/// TChoose flag, but it will still throws in case of nonexistent conversions.
enum class TChoose {
kFail = 0,
kEarliest = 1,
kLatest = 2,
};

seconds to_sys(seconds timestamp, TChoose choose = TChoose::kFail) const;

/// Do the opposite conversion. Taking a system time (the time as perceived in
/// GMT), convert to the same instant in time as observed in the user local
/// time represented by this object). Note that this conversion is not
/// susceptible to the error above.
seconds to_local(seconds timestamp) const;

const std::string& name() const {
return timeZoneName_;
}

int16_t id() const {
return timeZoneID_;
}

const date::time_zone* tz() const {
return tz_;
}

private:
const date::time_zone* tz_{nullptr};
const std::chrono::minutes offset_{0};
const std::string timeZoneName_;
const int16_t timeZoneID_;
};

} // namespace facebook::velox::tz

#ifdef VELOX_ENABLE_BACKWARD_COMPATIBILITY
Expand Down
Loading

0 comments on commit 595cabd

Please sign in to comment.