Skip to content

Commit

Permalink
Clean up timezoneId from DateTimeFormatter (facebookincubator#11259)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: facebookincubator#11259

We want to capture a tz::TimeZone* instead of relying on timezoneId inside dateTimeFormatter. The former is more flexible and provides conversion capabilities.

Reviewed By: kevinwilfong

Differential Revision: D64348245

fbshipit-source-id: d10888e4ade9bbff2aea6c2b25800ced41a2daf0
  • Loading branch information
Daniel Hunte authored and facebook-github-bot committed Oct 17, 2024
1 parent 9d4cfb2 commit d90038d
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 76 deletions.
93 changes: 39 additions & 54 deletions velox/functions/lib/DateTimeFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ struct Date {
int32_t second = 0;
int32_t microsecond = 0;
bool isAm = true; // AM -> true, PM -> false
int64_t timezoneId = -1;
const tz::TimeZone* timezone = nullptr;

bool isClockHour = false; // Whether most recent hour specifier is clockhour
bool isHourOfHalfDay =
Expand Down Expand Up @@ -318,28 +318,29 @@ int64_t parseTimezone(const char* cur, const char* end, Date& date) {
if (cur < end) {
// If there are at least 3 letters left.
if (end - cur >= 3) {
static std::unordered_map<std::string_view, int64_t> defaultTzNames{
{"UTC", 0},
{"GMT", 0},
{"EST", tz::getTimeZoneID("America/New_York")},
{"EDT", tz::getTimeZoneID("America/New_York")},
{"CST", tz::getTimeZoneID("America/Chicago")},
{"CDT", tz::getTimeZoneID("America/Chicago")},
{"MST", tz::getTimeZoneID("America/Denver")},
{"MDT", tz::getTimeZoneID("America/Denver")},
{"PST", tz::getTimeZoneID("America/Los_Angeles")},
{"PDT", tz::getTimeZoneID("America/Los_Angeles")},
};
static std::unordered_map<std::string_view, const tz::TimeZone*>
defaultTzNames{
{"UTC", tz::locateZone("UTC")},
{"GMT", tz::locateZone("GMT")},
{"EST", tz::locateZone("America/New_York")},
{"EDT", tz::locateZone("America/New_York")},
{"CST", tz::locateZone("America/Chicago")},
{"CDT", tz::locateZone("America/Chicago")},
{"MST", tz::locateZone("America/Denver")},
{"MDT", tz::locateZone("America/Denver")},
{"PST", tz::locateZone("America/Los_Angeles")},
{"PDT", tz::locateZone("America/Los_Angeles")},
};

auto it = defaultTzNames.find(std::string_view(cur, 3));
if (it != defaultTzNames.end()) {
date.timezoneId = it->second;
date.timezone = it->second;
return 3;
}
}
// The format 'UT' is also accepted for UTC.
else if ((end - cur == 2) && (*cur == 'U') && (*(cur + 1) == 'T')) {
date.timezoneId = 0;
date.timezone = tz::locateZone("UTC");
return 2;
}
}
Expand All @@ -358,68 +359,52 @@ int64_t parseTimezoneOffset(const char* cur, const char* end, Date& date) {
if (*cur == '-' || *cur == '+') {
// Long format: "+00:00"
if ((end - cur) >= 6 && *(cur + 3) == ':') {
// Fast path for the common case ("+00:00" or "-00:00"), to prevent
// calling getTimeZoneID(), which does a map lookup.
if (std::strncmp(cur + 1, "00:00", 5) == 0) {
date.timezoneId = 0;
} else {
date.timezoneId = tz::getTimeZoneID(std::string_view(cur, 6), false);
if (date.timezoneId == -1) {
return -1;
}
date.timezone = tz::locateZone(std::string_view(cur, 6), false);
if (!date.timezone) {
return -1;
}
return 6;
}
// Long format without colon: "+0000"
else if ((end - cur) >= 5 && *(cur + 3) != ':') {
// Same fast path described above.
if (std::strncmp(cur + 1, "0000", 4) == 0) {
date.timezoneId = 0;
} else {
// We need to concatenate the 3 first chars with ":" followed by the
// last 2 chars before calling getTimeZoneID, so we use a static
// thread_local buffer to prevent extra allocations.
std::memcpy(&timezoneBuffer[0], cur, 3);
std::memcpy(&timezoneBuffer[4], cur + 3, 2);
date.timezoneId = tz::getTimeZoneID(timezoneBuffer, false);
if (date.timezoneId == -1) {
return -1;
}
// We need to concatenate the 3 first chars with ":" followed by the
// last 2 chars before calling locateZone, so we use a static
// thread_local buffer to prevent extra allocations.
std::memcpy(&timezoneBuffer[0], cur, 3);
std::memcpy(&timezoneBuffer[4], cur + 3, 2);
date.timezone = tz::locateZone(timezoneBuffer, false);
if (!date.timezone) {
return -1;
}
return 5;
}
// Short format: "+00"
else if ((end - cur) >= 3) {
// Same fast path described above.
if (std::strncmp(cur + 1, "00", 2) == 0) {
date.timezoneId = 0;
} else {
// We need to concatenate the 3 first chars with a trailing ":00"
// before calling getTimeZoneID, so we use a static thread_local
// buffer to prevent extra allocations.
std::memcpy(&timezoneBuffer[0], cur, 3);
std::memcpy(&timezoneBuffer[4], defaultTrailingOffset, 2);
date.timezoneId = tz::getTimeZoneID(timezoneBuffer, false);
if (date.timezoneId == -1) {
return -1;
}
// We need to concatenate the 3 first chars with a trailing ":00"
// before calling getTimeZoneID, so we use a static thread_local
// buffer to prevent extra allocations.
std::memcpy(&timezoneBuffer[0], cur, 3);
std::memcpy(&timezoneBuffer[4], defaultTrailingOffset, 2);
date.timezone = tz::locateZone(timezoneBuffer, false);
if (!date.timezone) {
return -1;
}
return 3;
}
}
// Single 'Z' character maps to GMT.
else if (*cur == 'Z') {
date.timezoneId = 0;
date.timezone = tz::locateZone("GMT");
return 1;
}
// "UTC", "UCT", "GMT" and "GMT0" are also acceptable by joda.
else if ((end - cur) >= 3) {
if (std::strncmp(cur, "UTC", 3) == 0 ||
std::strncmp(cur, "UCT", 3) == 0) {
date.timezoneId = 0;
date.timezone = tz::locateZone("UTC");
return 3;
} else if (std::strncmp(cur, "GMT", 3) == 0) {
date.timezoneId = 0;
date.timezone = tz::locateZone("GMT");
if ((end - cur) >= 4 && *(cur + 3) == '0') {
return 4;
}
Expand Down Expand Up @@ -1443,7 +1428,7 @@ Expected<DateTimeResult> DateTimeFormatter::parse(
util::fromTime(date.hour, date.minute, date.second, date.microsecond);
return DateTimeResult{
util::fromDatetime(daysSinceEpoch.value(), microsSinceMidnight),
date.timezoneId};
date.timezone};
}

Expected<std::shared_ptr<DateTimeFormatter>> buildMysqlDateTimeFormatter(
Expand Down
2 changes: 1 addition & 1 deletion velox/functions/lib/DateTimeFormatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ struct DateTimeToken {

struct DateTimeResult {
Timestamp timestamp;
int64_t timezoneId{-1};
const tz::TimeZone* timezone = nullptr;
};

/// A user defined formatter that formats/parses time to/from user provided
Expand Down
22 changes: 11 additions & 11 deletions velox/functions/lib/tests/DateTimeFormatterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,10 @@ class DateTimeFormatterTest : public testing::Test {
auto dateTimeResultExpected =
getJodaDateTimeFormatter(format)->parse(input);
auto result = dateTimeResult(dateTimeResultExpected);
if (result.timezoneId == 0) {
if (result.timezone->id() == 0) {
return "+00:00";
}
return tz::getTimeZoneName(result.timezoneId);
return result.timezone->name();
}

std::string formatMysqlDateTime(
Expand Down Expand Up @@ -1096,17 +1096,17 @@ TEST_F(JodaDateTimeFormatterTest, parseMixedYMDFormat) {
// Include timezone.
auto result = parseJoda("2021-11-05+01:00+09:00", "YYYY-MM-dd+HH:mmZZ");
EXPECT_EQ(fromTimestampString("2021-11-05 01:00:00"), result.timestamp);
EXPECT_EQ("+09:00", tz::getTimeZoneName(result.timezoneId));
EXPECT_EQ("+09:00", result.timezone->name());

// Timezone offset in -hh:mm format.
result = parseJoda("-07:232021-11-05+01:00", "ZZYYYY-MM-dd+HH:mm");
EXPECT_EQ(fromTimestampString("2021-11-05 01:00:00"), result.timestamp);
EXPECT_EQ("-07:23", tz::getTimeZoneName(result.timezoneId));
EXPECT_EQ("-07:23", result.timezone->name());

// Timezone offset in +hhmm format.
result = parseJoda("+01332022-03-08+13:00", "ZZYYYY-MM-dd+HH:mm");
EXPECT_EQ(fromTimestampString("2022-03-08 13:00:00"), result.timestamp);
EXPECT_EQ("+01:33", tz::getTimeZoneName(result.timezoneId));
EXPECT_EQ("+01:33", result.timezone->name());

// Z in the input means GMT in Joda.
EXPECT_EQ(
Expand All @@ -1117,7 +1117,7 @@ TEST_F(JodaDateTimeFormatterTest, parseMixedYMDFormat) {
// Timezone in string format.
result = parseJoda("2021-11-05+01:00 PST", "YYYY-MM-dd+HH:mm zz");
EXPECT_EQ(fromTimestampString("2021-11-05 01:00:00"), result.timestamp);
EXPECT_EQ("America/Los_Angeles", tz::getTimeZoneName(result.timezoneId));
EXPECT_EQ("America/Los_Angeles", result.timezone->name());
}

TEST_F(JodaDateTimeFormatterTest, parseMixedWeekFormat) {
Expand Down Expand Up @@ -1239,31 +1239,31 @@ TEST_F(JodaDateTimeFormatterTest, parseMixedWeekFormat) {
auto result =
parseJoda("2021 22 1 13:29:21.213+09:00", "x w e HH:mm:ss.SSSZZ");
EXPECT_EQ(fromTimestampString("2021-05-31 13:29:21.213"), result.timestamp);
EXPECT_EQ("+09:00", tz::getTimeZoneName(result.timezoneId));
EXPECT_EQ("+09:00", result.timezone->name());

// Timezone offset in -hh:mm format.
result = parseJoda("-07:232021 22 1 13:29:21.213", "ZZx w e HH:mm:ss.SSS");
EXPECT_EQ(fromTimestampString("2021-05-31 13:29:21.213"), result.timestamp);
EXPECT_EQ("-07:23", tz::getTimeZoneName(result.timezoneId));
EXPECT_EQ("-07:23", result.timezone->name());

// Timezone offset in +hhmm format.
result = parseJoda("+01332021 22 1 13:29:21.213", "ZZx w e HH:mm:ss.SSS");
EXPECT_EQ(fromTimestampString("2021-05-31 13:29:21.213"), result.timestamp);
EXPECT_EQ("+01:33", tz::getTimeZoneName(result.timezoneId));
EXPECT_EQ("+01:33", result.timezone->name());
}

TEST_F(JodaDateTimeFormatterTest, parseFractionOfSecond) {
// Valid milliseconds and timezone with positive offset.
auto result =
parseJoda("2022-02-23T12:15:00.364+04:00", "yyyy-MM-dd'T'HH:mm:ss.SSSZ");
EXPECT_EQ(fromTimestampString("2022-02-23 12:15:00.364"), result.timestamp);
EXPECT_EQ("+04:00", tz::getTimeZoneName(result.timezoneId));
EXPECT_EQ("+04:00", result.timezone->name());

// Valid milliseconds and timezone with negative offset.
result =
parseJoda("2022-02-23T12:15:00.776-14:00", "yyyy-MM-dd'T'HH:mm:ss.SSSZ");
EXPECT_EQ(fromTimestampString("2022-02-23 12:15:00.776"), result.timestamp);
EXPECT_EQ("-14:00", tz::getTimeZoneName(result.timezoneId));
EXPECT_EQ("-14:00", result.timezone->name());

// Valid milliseconds.
EXPECT_EQ(
Expand Down
9 changes: 4 additions & 5 deletions velox/functions/prestosql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -1501,7 +1501,7 @@ struct FromIso8601Timestamp {
auto [ts, timeZone] = castResult.value();
// Input string may not contain a timezone - if so, it is interpreted in
// session timezone.
if (timeZone == nullptr) {
if (!timeZone) {
timeZone = sessionTimeZone_;
}
ts.toGMT(*timeZone);
Expand Down Expand Up @@ -1687,9 +1687,8 @@ struct ParseDateTimeFunction {

// If timezone was not parsed, fallback to the session timezone. If there's
// no session timezone, fallback to 0 (GMT).
const auto* timeZone = dateTimeResult->timezoneId != -1
? tz::locateZone(dateTimeResult->timezoneId)
: sessionTimeZone_;
const auto* timeZone =
dateTimeResult->timezone ? dateTimeResult->timezone : sessionTimeZone_;
dateTimeResult->timestamp.toGMT(*timeZone);
result = pack(dateTimeResult->timestamp, timeZone->id());
return Status::OK();
Expand Down Expand Up @@ -1819,7 +1818,7 @@ struct AtTimezoneFunction : public TimestampWithTimezoneSupport<T> {
const core::QueryConfig& config,
const arg_type<TimestampWithTimezone>* /*tsWithTz*/,
const arg_type<Varchar>* timezone) {
if (timezone != nullptr) {
if (timezone) {
targetTimezoneID_ = tz::getTimeZoneID(
std::string_view(timezone->data(), timezone->size()));
}
Expand Down
6 changes: 2 additions & 4 deletions velox/functions/sparksql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,7 @@ struct UnixTimestampParseFunction {

const tz::TimeZone* getTimeZone(const DateTimeResult& result) {
// If timezone was not parsed, fallback to the session timezone.
return result.timezoneId != -1 ? tz::locateZone(result.timezoneId)
: sessionTimeZone_;
return result.timezone ? result.timezone : sessionTimeZone_;
}

// Default if format is not specified, as per Spark documentation.
Expand Down Expand Up @@ -400,8 +399,7 @@ struct GetTimestampFunction {
const tz::TimeZone* getTimeZone(const DateTimeResult& result) const {
// If timezone was not parsed, fallback to the session timezone. If there's
// no session timezone, fallback to 0 (GMT).
return result.timezoneId != -1 ? tz::locateZone(result.timezoneId)
: sessionTimeZone_;
return result.timezone ? result.timezone : sessionTimeZone_;
}

std::shared_ptr<DateTimeFormatter> formatter_{nullptr};
Expand Down
2 changes: 1 addition & 1 deletion velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,7 @@ TEST_F(DateTimeFunctionsTest, getTimestamp) {
const auto getTimestampString =
[&](const std::optional<StringView>& dateString,
const std::string& format) {
return getTimestamp(dateString, format).value().toString();
return getTimestamp(dateString, format)->toString();
};

EXPECT_EQ(getTimestamp("1970-01-01", "yyyy-MM-dd"), Timestamp(0, 0));
Expand Down

0 comments on commit d90038d

Please sign in to comment.