From 27450699d7f2705a575c487d2aa001714f328f96 Mon Sep 17 00:00:00 2001 From: Pedro Eugenio Rocha Pedreira Date: Thu, 29 Feb 2024 14:51:44 -0800 Subject: [PATCH] parse_datetime() should allow UCT|UCT|GMT|GMT0 as 'Z' Summary: The Joda library accepts UCT|GMT|UCT and GMT0 as input for the 'Z' (capital case) identifier, even though capital Z means timezone offset. Adding support for compatibility with Joda, which is used in Presto java. Reviewed By: mbasmanova Differential Revision: D54346046 fbshipit-source-id: 82af2fea7eee47bd0eaf854b16c693e3d43d059a --- velox/docs/functions/presto/datetime.rst | 7 ++++++- velox/functions/lib/DateTimeFormatter.cpp | 16 +++++++++++++++- .../prestosql/tests/DateTimeFunctionsTest.cpp | 15 +++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/velox/docs/functions/presto/datetime.rst b/velox/docs/functions/presto/datetime.rst index be66ebb51e70..a46c9d2b1b87 100644 --- a/velox/docs/functions/presto/datetime.rst +++ b/velox/docs/functions/presto/datetime.rst @@ -227,7 +227,12 @@ The functions in this section leverage a native cpp implementation that follows a format string compatible with JodaTime’s `DateTimeFormat `_ pattern format. The symbols currently supported are ``y``, ``Y``, ``M`` , ``d``, -``H``, ``m``, ``s``, ``S``, and ``Z``. +``H``, ``m``, ``s``, ``S``, ``z`` and ``Z``. + +``z`` represents a timezone name (3-letter format), and ``Z`` a timezone offset +specified using the format ``+00``, ``+00:00`` or ``+0000`` (or ``-``). ``Z`` +also accepts ``UTC``, ``UCT``, ``GMT``, and ``GMT0`` as valid representations +of GMT. .. function:: parse_datetime(string, format) -> timestamp with time zone diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index e732862ad306..e15b4228b4d8 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -410,11 +410,25 @@ int64_t parseTimezoneOffset(const char* cur, const char* end, Date& date) { return 3; } } - // Single 'Z' character maps to GMT + // Single 'Z' character maps to GMT. else if (*cur == 'Z') { date.timezoneId = 0; return 1; } + // "UTC", "UCT", "GMT" and "GMT0" are also acceptable by joda. + else if ((end - cur) >= 3) { + if (std::strncmp(cur, "UTC", 3) == 0 || + std::strncmp(cur, "UCT", 3) == 0) { + date.timezoneId = 0; + return 3; + } else if (std::strncmp(cur, "GMT", 3) == 0) { + date.timezoneId = 0; + if ((end - cur) >= 4 && *(cur + 3) == '0') { + return 4; + } + return 3; + } + } } return -1; } diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index f8e0581c8728..7c0f6f97955a 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -2638,6 +2638,21 @@ TEST_F(DateTimeFunctionsTest, parseDatetime) { EXPECT_EQ( TimestampWithTimezone(-66600000, util::getTimeZoneID("+02:00")), parseDatetime("1969-12-31+07:30+02:00", "YYYY-MM-dd+HH:mmZZ")); + + // Joda also lets 'Z' to be UTC|UCT|GMT|GMT0. + auto ts = TimestampWithTimezone(1708840800000, util::getTimeZoneID("GMT")); + EXPECT_EQ( + ts, parseDatetime("2024-02-25+06:00:99 GMT", "yyyy-MM-dd+HH:mm:99 ZZZ")); + EXPECT_EQ( + ts, parseDatetime("2024-02-25+06:00:99 GMT0", "yyyy-MM-dd+HH:mm:99 ZZZ")); + EXPECT_EQ( + ts, parseDatetime("2024-02-25+06:00:99 UTC", "yyyy-MM-dd+HH:mm:99 ZZZ")); + EXPECT_EQ( + ts, parseDatetime("2024-02-25+06:00:99 UTC", "yyyy-MM-dd+HH:mm:99 ZZZ")); + + VELOX_ASSERT_THROW( + parseDatetime("2024-02-25+06:00:99 PST", "yyyy-MM-dd+HH:mm:99 ZZZ"), + "Invalid format: \"2024-02-25+06:00:99 PST\" is malformed at \"PST\""); } TEST_F(DateTimeFunctionsTest, formatDateTime) {