diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index 32e1ab68f060..598146f28812 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -27,6 +27,11 @@ #include "velox/type/TimestampConversion.h" #include "velox/type/tz/TimeZoneMap.h" +namespace facebook::velox::tz { +// Defined in TimeZoneLinks.cpp +extern const std::unordered_map& getTimeZoneLinks(); +} // namespace facebook::velox::tz + namespace facebook::velox::functions { static thread_local std::string timezoneBuffer = "+00:00"; @@ -1234,7 +1239,9 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const { if (timezone == nullptr) { VELOX_USER_FAIL("Timezone unknown"); } - size += timezone->name().length(); + + // The longest time zone ID is 32, America/Argentina/ComodRivadavia. + size += 32; } break; // Not supported. @@ -1494,6 +1501,16 @@ int32_t DateTimeFormatter::format( if (token.pattern.minRepresentDigits >= 3) { // Append the time zone ID. const auto& piece = timezone->name(); + + static const auto& timeZoneLinks = tz::getTimeZoneLinks(); + auto timeZoneLinksIter = timeZoneLinks.find(piece); + if (timeZoneLinksIter != timeZoneLinks.end()) { + const auto& timeZoneLink = timeZoneLinksIter->second; + std::memcpy(result, timeZoneLink.data(), timeZoneLink.length()); + result += timeZoneLink.length(); + break; + } + std::memcpy(result, piece.data(), piece.length()); result += piece.length(); break; diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index 9cce58653252..20265004aafa 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -3340,6 +3340,17 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { "Australian Central Western Standard Time", formatDatetime(parseTimestamp("1970-10-01"), "zzzz")); + // Test a time zone name that is linked to another (that gets replaced when + // converted to a string). + setQueryTimeZone("US/Pacific"); + EXPECT_EQ("PST", formatDatetime(parseTimestamp("1970-01-01"), "zzz")); + EXPECT_EQ( + "Pacific Standard Time", + formatDatetime(parseTimestamp("1970-01-01"), "zzzz")); + EXPECT_EQ( + "America/Los_Angeles", + formatDatetime(parseTimestamp("1970-01-01"), "ZZZ")); + setQueryTimeZone("Asia/Kolkata"); // Literal test cases. EXPECT_EQ("hello", formatDatetime(parseTimestamp("1970-01-01"), "'hello'")); diff --git a/velox/functions/prestosql/tests/TimestampWithTimeZoneCastTest.cpp b/velox/functions/prestosql/tests/TimestampWithTimeZoneCastTest.cpp index 6b9990ada924..ddd100c6b46a 100644 --- a/velox/functions/prestosql/tests/TimestampWithTimeZoneCastTest.cpp +++ b/velox/functions/prestosql/tests/TimestampWithTimeZoneCastTest.cpp @@ -125,7 +125,8 @@ TEST_F(TimestampWithTimeZoneCastTest, toVarchar) { "1970-01-01 01:11:37.123 America/New_York", "1969-12-31 22:11:37.123 America/Los_Angeles", "1970-01-01 14:11:37.123 Asia/Shanghai", - "1970-01-01 11:41:37.123 Asia/Calcutta", + "1970-01-01 11:41:37.123 Asia/Kolkata", // Asia/Calcutta is linked to + // Asia/Kolkata. }); auto result = evaluate("cast(c0 as varchar)", makeRowVector({input})); diff --git a/velox/type/tz/CMakeLists.txt b/velox/type/tz/CMakeLists.txt index 614b594000a7..8f7c2381b823 100644 --- a/velox/type/tz/CMakeLists.txt +++ b/velox/type/tz/CMakeLists.txt @@ -14,8 +14,12 @@ if(${VELOX_BUILD_TESTING}) add_subdirectory(tests) endif() -velox_add_library(velox_type_tz TimeZoneMap.h TimeZoneDatabase.cpp - TimeZoneMap.cpp) +velox_add_library( + velox_type_tz + TimeZoneMap.h + TimeZoneDatabase.cpp + TimeZoneLinks.cpp + TimeZoneMap.cpp) velox_link_libraries( velox_type_tz diff --git a/velox/type/tz/TimeZoneLinks.cpp b/velox/type/tz/TimeZoneLinks.cpp new file mode 100644 index 000000000000..de1e381ceb91 --- /dev/null +++ b/velox/type/tz/TimeZoneLinks.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This file is generated. Do not modify it manually. To re-generate it, run: +// +// ./velox/type/tz/gen_timezone_links.py -f /tmp/backward \ +// > velox/type/tz/TimeZoneLinks.cpp +// +// The backward file should be the same one used in JODA, +// The latest is available here : +// https://github.com/JodaOrg/global-tz/blob/global-tz/backward. +// Presto Java currently uses the one found here: +// https://github.com/JodaOrg/global-tz/blob/2024agtz/backward +// To find the current version in Presto Java, check this file at the version of +// JODA Presto Java is using: +// https://github.com/JodaOrg/joda-time/blob/v2.12.7/src/changes/changes.xml +// @generated + +#include +#include + +namespace facebook::velox::tz { + +const std::unordered_map& getTimeZoneLinks() { + static auto* tzLinks = new std::unordered_map([] { + // Work around clang compiler bug causing multi-hour compilation + // with -fsanitize=fuzzer + // https://github.com/llvm/llvm-project/issues/75666 + return std::unordered_map{ + {"Australia/ACT", "Australia/Sydney"}, + {"Australia/LHI", "Australia/Lord_Howe"}, + {"Australia/NSW", "Australia/Sydney"}, + {"Australia/North", "Australia/Darwin"}, + {"Australia/Queensland", "Australia/Brisbane"}, + {"Australia/South", "Australia/Adelaide"}, + {"Australia/Tasmania", "Australia/Hobart"}, + {"Australia/Victoria", "Australia/Melbourne"}, + {"Australia/West", "Australia/Perth"}, + {"Australia/Yancowinna", "Australia/Broken_Hill"}, + {"Brazil/Acre", "America/Rio_Branco"}, + {"Brazil/DeNoronha", "America/Noronha"}, + {"Brazil/East", "America/Sao_Paulo"}, + {"Brazil/West", "America/Manaus"}, + {"Canada/Atlantic", "America/Halifax"}, + {"Canada/Central", "America/Winnipeg"}, + {"Canada/Eastern", "America/Toronto"}, + {"Canada/Mountain", "America/Edmonton"}, + {"Canada/Newfoundland", "America/St_Johns"}, + {"Canada/Pacific", "America/Vancouver"}, + {"Canada/Saskatchewan", "America/Regina"}, + {"Canada/Yukon", "America/Whitehorse"}, + {"Chile/Continental", "America/Santiago"}, + {"Chile/EasterIsland", "Pacific/Easter"}, + {"Cuba", "America/Havana"}, + {"Egypt", "Africa/Cairo"}, + {"Eire", "Europe/Dublin"}, + {"Etc/GMT+0", "Etc/GMT"}, + {"Etc/GMT-0", "Etc/GMT"}, + {"Etc/GMT0", "Etc/GMT"}, + {"Etc/Greenwich", "Etc/GMT"}, + {"Etc/UCT", "Etc/UTC"}, + {"Etc/Universal", "Etc/UTC"}, + {"Etc/Zulu", "Etc/UTC"}, + {"GB", "Europe/London"}, + {"GB-Eire", "Europe/London"}, + {"GMT+0", "Etc/GMT"}, + {"GMT-0", "Etc/GMT"}, + {"GMT0", "Etc/GMT"}, + {"Greenwich", "Etc/GMT"}, + {"Hongkong", "Asia/Hong_Kong"}, + {"Iceland", "Atlantic/Reykjavik"}, + {"Iran", "Asia/Tehran"}, + {"Israel", "Asia/Jerusalem"}, + {"Jamaica", "America/Jamaica"}, + {"Japan", "Asia/Tokyo"}, + {"Kwajalein", "Pacific/Kwajalein"}, + {"Libya", "Africa/Tripoli"}, + {"Mexico/BajaNorte", "America/Tijuana"}, + {"Mexico/BajaSur", "America/Mazatlan"}, + {"Mexico/General", "America/Mexico_City"}, + {"NZ", "Pacific/Auckland"}, + {"NZ-CHAT", "Pacific/Chatham"}, + {"Navajo", "America/Denver"}, + {"PRC", "Asia/Shanghai"}, + {"Poland", "Europe/Warsaw"}, + {"Portugal", "Europe/Lisbon"}, + {"ROC", "Asia/Taipei"}, + {"ROK", "Asia/Seoul"}, + {"Singapore", "Asia/Singapore"}, + {"Turkey", "Europe/Istanbul"}, + {"UCT", "Etc/UTC"}, + {"US/Alaska", "America/Anchorage"}, + {"US/Aleutian", "America/Adak"}, + {"US/Arizona", "America/Phoenix"}, + {"US/Central", "America/Chicago"}, + {"US/East-Indiana", "America/Indiana/Indianapolis"}, + {"US/Eastern", "America/New_York"}, + {"US/Hawaii", "Pacific/Honolulu"}, + {"US/Indiana-Starke", "America/Indiana/Knox"}, + {"US/Michigan", "America/Detroit"}, + {"US/Mountain", "America/Denver"}, + {"US/Pacific", "America/Los_Angeles"}, + {"US/Samoa", "Pacific/Pago_Pago"}, + {"UTC", "Etc/UTC"}, + {"Universal", "Etc/UTC"}, + {"W-SU", "Europe/Moscow"}, + {"Zulu", "Etc/UTC"}, + {"America/Buenos_Aires", "America/Argentina/Buenos_Aires"}, + {"America/Catamarca", "America/Argentina/Catamarca"}, + {"America/Cordoba", "America/Argentina/Cordoba"}, + {"America/Indianapolis", "America/Indiana/Indianapolis"}, + {"America/Jujuy", "America/Argentina/Jujuy"}, + {"America/Knox_IN", "America/Indiana/Knox"}, + {"America/Louisville", "America/Kentucky/Louisville"}, + {"America/Mendoza", "America/Argentina/Mendoza"}, + {"America/Virgin", "America/St_Thomas"}, + {"Pacific/Samoa", "Pacific/Pago_Pago"}, + {"Africa/Timbuktu", "Africa/Bamako"}, + {"America/Argentina/ComodRivadavia", "America/Argentina/Catamarca"}, + {"America/Atka", "America/Adak"}, + {"America/Coral_Harbour", "America/Atikokan"}, + {"America/Ensenada", "America/Tijuana"}, + {"America/Fort_Wayne", "America/Indiana/Indianapolis"}, + {"America/Montreal", "America/Toronto"}, + {"America/Nipigon", "America/Toronto"}, + {"America/Pangnirtung", "America/Iqaluit"}, + {"America/Porto_Acre", "America/Rio_Branco"}, + {"America/Rainy_River", "America/Winnipeg"}, + {"America/Rosario", "America/Argentina/Cordoba"}, + {"America/Santa_Isabel", "America/Tijuana"}, + {"America/Shiprock", "America/Denver"}, + {"America/Thunder_Bay", "America/Toronto"}, + {"America/Yellowknife", "America/Edmonton"}, + {"Antarctica/South_Pole", "Antarctica/McMurdo"}, + {"Asia/Chongqing", "Asia/Shanghai"}, + {"Asia/Harbin", "Asia/Shanghai"}, + {"Asia/Kashgar", "Asia/Urumqi"}, + {"Asia/Tel_Aviv", "Asia/Jerusalem"}, + {"Atlantic/Jan_Mayen", "Europe/Oslo"}, + {"Australia/Canberra", "Australia/Sydney"}, + {"Australia/Currie", "Australia/Hobart"}, + {"Europe/Belfast", "Europe/London"}, + {"Europe/Tiraspol", "Europe/Chisinau"}, + {"Europe/Uzhgorod", "Europe/Kyiv"}, + {"Europe/Zaporozhye", "Europe/Kyiv"}, + {"Pacific/Enderbury", "Pacific/Kanton"}, + {"Pacific/Johnston", "Pacific/Honolulu"}, + {"Pacific/Yap", "Pacific/Chuuk"}, + {"Africa/Asmera", "Africa/Asmara"}, + {"America/Godthab", "America/Nuuk"}, + {"Asia/Ashkhabad", "Asia/Ashgabat"}, + {"Asia/Calcutta", "Asia/Kolkata"}, + {"Asia/Chungking", "Asia/Shanghai"}, + {"Asia/Dacca", "Asia/Dhaka"}, + {"Asia/Istanbul", "Europe/Istanbul"}, + {"Asia/Katmandu", "Asia/Kathmandu"}, + {"Asia/Macao", "Asia/Macau"}, + {"Asia/Rangoon", "Asia/Yangon"}, + {"Asia/Saigon", "Asia/Ho_Chi_Minh"}, + {"Asia/Thimbu", "Asia/Thimphu"}, + {"Asia/Ujung_Pandang", "Asia/Makassar"}, + {"Asia/Ulan_Bator", "Asia/Ulaanbaatar"}, + {"Atlantic/Faeroe", "Atlantic/Faroe"}, + {"Europe/Kiev", "Europe/Kyiv"}, + {"Europe/Nicosia", "Asia/Nicosia"}, + {"Pacific/Ponape", "Pacific/Pohnpei"}, + {"Pacific/Truk", "Pacific/Chuuk"}, + }; + }()); + return *tzLinks; +} + +} // namespace facebook::velox::tz diff --git a/velox/type/tz/gen_timezone_links.py b/velox/type/tz/gen_timezone_links.py new file mode 100755 index 000000000000..6b414b247c01 --- /dev/null +++ b/velox/type/tz/gen_timezone_links.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +# +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import sys +from string import Template + +cpp_template = Template( + """\ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This file is generated. Do not modify it manually. To re-generate it, run: +// +// ./velox/type/tz/gen_timezone_links.py -f /tmp/backward \\ +// > velox/type/tz/TimeZoneLinks.cpp +// +// The backward file should be the same one used in JODA, +// The latest is available here : +// https://github.com/JodaOrg/global-tz/blob/global-tz/backward. +// Presto Java currently uses the one found here: +// https://github.com/JodaOrg/global-tz/blob/2024agtz/backward +// To find the current version in Presto Java, check this file at the version of +// JODA Presto Java is using: +// https://github.com/JodaOrg/joda-time/blob/v2.12.7/src/changes/changes.xml +// @generated + +#include +#include + +namespace facebook::velox::tz { + +const std::unordered_map& getTimeZoneLinks() { + static auto* tzLinks = new std::unordered_map([] { + // Work around clang compiler bug causing multi-hour compilation + // with -fsanitize=fuzzer + // https://github.com/llvm/llvm-project/issues/75666 + return std::unordered_map{ +$entries + }; + }()); + return *tzLinks; +} + +} // namespace facebook::velox::tz\ +""" +) + +entry_template = Template(' {"$tz_key", "$tz_value"},') + + +def parse_arguments(): + parser = argparse.ArgumentParser( + description="Reads an input file (specified by -f) containing mappings from " + "time zone names to the names that should replace them and prints the " + "corresponding `TimeZoneLinks.cpp` file.", + epilog="(c) Facebook 2004-present", + ) + parser.add_argument( + "-f", + "--file-input", + required=True, + help="Input timezone links file.", + ) + return parser.parse_args() + + +def main(): + args = parse_arguments() + entries = [] + + # Read an input file with links following the format: + # + # > Link Australia/Sydney Australia/NSW + # > Link Australia/Darwin Australia/North + # > Link Australia/Brisbane Australia/Queensland + # > ... + # + # Possible containing comments (everything in a line after # is ignored) + + with open(args.file_input) as file_in: + for line in file_in: + line = line.strip() + + columns = line.split() + if len(columns) == 0: + continue + + if columns[0] != "Link": + continue + + entries.append( + entry_template.substitute(tz_key=columns[2], tz_value=columns[1]) + ) + + print(cpp_template.substitute(entries="\n".join(entries))) + return 0 + + +if __name__ == "__main__": + sys.exit(main())