From a8bd2dea2f019baea6f25ac9158c76a879bcc668 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Tue, 17 Sep 2024 20:05:11 -0700 Subject: [PATCH] Ignore time zones that are not recognizable by OS when building time zone database (#10654) Summary: Related to discussion https://github.com/facebookincubator/velox/pull/10577#discussion_r1694501747 The patch fixes fatal error ` not found in timezone database` when at least one of the timezone IDs in [file](https://github.com/facebookincubator/velox/blob/main/velox/type/tz/TimeZoneDatabase.cpp) don't exist in OS's supported timezone list. Pull Request resolved: https://github.com/facebookincubator/velox/pull/10654 Reviewed By: kgpai Differential Revision: D62785133 Pulled By: pedroerp fbshipit-source-id: c8454454750040f8fdcbe053084f505d679f3142 --- ...-add_error_type_for_invalid_timezone.patch | 51 +++++++++++++++++ velox/external/date/tz.cpp | 2 +- velox/external/date/tz.h | 27 +++++++++ velox/type/tz/TimeZoneMap.cpp | 31 ++++++++-- velox/type/tz/tests/CMakeLists.txt | 14 ++++- .../tests/TimeZoneMapExternalInvalidTest.cpp | 56 +++++++++++++++++++ 6 files changed, 175 insertions(+), 6 deletions(-) create mode 100644 velox/external/date/patches/0005-add_error_type_for_invalid_timezone.patch create mode 100644 velox/type/tz/tests/TimeZoneMapExternalInvalidTest.cpp diff --git a/velox/external/date/patches/0005-add_error_type_for_invalid_timezone.patch b/velox/external/date/patches/0005-add_error_type_for_invalid_timezone.patch new file mode 100644 index 000000000000..90d20ed37a61 --- /dev/null +++ b/velox/external/date/patches/0005-add_error_type_for_invalid_timezone.patch @@ -0,0 +1,51 @@ +diff --git a/velox/external/date/tz.cpp b/velox/external/date/tz.cpp +index c3a3a8f7e..69513d7d3 100644 +--- a/velox/external/date/tz.cpp ++++ b/velox/external/date/tz.cpp +@@ -3577,7 +3577,7 @@ tzdb::locate_zone(const std::string& tz_name) const + return &*zi; + } + #endif // !USE_OS_TZDB +- throw std::runtime_error(std::string(tz_name) + " not found in timezone database"); ++ throw invalid_timezone(std::string(tz_name)); + } + return &*zi; + } +diff --git a/velox/external/date/tz.h b/velox/external/date/tz.h +index a70bda4ad..c85b30eb7 100644 +--- a/velox/external/date/tz.h ++++ b/velox/external/date/tz.h +@@ -284,6 +284,33 @@ ambiguous_local_time::make_msg(local_time tp, const local_info& i) + return os.str(); + } + ++class invalid_timezone ++ : public std::runtime_error ++{ ++public: ++ invalid_timezone(const std::string tz_name); ++ ++private: ++ static ++ std::string ++ make_msg(const std::string tz_name); ++}; ++ ++inline ++invalid_timezone::invalid_timezone(const std::string tz_name) ++ : std::runtime_error(make_msg(tz_name)) ++{ ++} ++ ++inline ++std::string ++invalid_timezone::make_msg(const std::string tz_name) ++{ ++ std::ostringstream os; ++ os << tz_name << " not found in timezone database"; ++ return os.str(); ++} ++ + class time_zone; + + #if HAS_STRING_VIEW diff --git a/velox/external/date/tz.cpp b/velox/external/date/tz.cpp index c3a3a8f7e7d5..69513d7d3145 100644 --- a/velox/external/date/tz.cpp +++ b/velox/external/date/tz.cpp @@ -3577,7 +3577,7 @@ tzdb::locate_zone(const std::string& tz_name) const return &*zi; } #endif // !USE_OS_TZDB - throw std::runtime_error(std::string(tz_name) + " not found in timezone database"); + throw invalid_timezone(std::string(tz_name)); } return &*zi; } diff --git a/velox/external/date/tz.h b/velox/external/date/tz.h index a70bda4ad258..4ec0dbb44cfd 100644 --- a/velox/external/date/tz.h +++ b/velox/external/date/tz.h @@ -284,6 +284,33 @@ ambiguous_local_time::make_msg(local_time tp, const local_info& i) return os.str(); } +class invalid_timezone + : public std::runtime_error +{ +public: + invalid_timezone(const std::string& tz_name); + +private: + static + std::string + make_msg(const std::string& tz_name); +}; + +inline +invalid_timezone::invalid_timezone(const std::string& tz_name) + : std::runtime_error(make_msg(tz_name)) +{ +} + +inline +std::string +invalid_timezone::make_msg(const std::string& tz_name) +{ + std::ostringstream os; + os << tz_name << " not found in timezone database"; + return os.str(); +} + class time_zone; #if HAS_STRING_VIEW diff --git a/velox/type/tz/TimeZoneMap.cpp b/velox/type/tz/TimeZoneMap.cpp index 1b57faa144ef..4fc8762d1bfa 100644 --- a/velox/type/tz/TimeZoneMap.cpp +++ b/velox/type/tz/TimeZoneMap.cpp @@ -21,8 +21,11 @@ #include #include #include "velox/common/base/Exceptions.h" +#include "velox/common/testutil/TestValue.h" #include "velox/external/date/tz.h" +using facebook::velox::common::testutil::TestValue; + namespace facebook::velox::tz { using TTimeZoneDatabase = std::vector>; @@ -39,6 +42,12 @@ inline std::chrono::minutes getTimeZoneOffset(int16_t tzID) { return std::chrono::minutes{(tzID <= 840) ? (tzID - 841) : (tzID - 840)}; } +const date::time_zone* locateZoneImpl(std::string_view tz_name) { + TestValue::adjust("facebook::velox::tz::locateZoneImpl", &tz_name); + const date::time_zone* zone = date::locate_zone(tz_name); + return zone; +} + // Flattens the input vector of pairs into a vector, assuming that the // timezoneIDs are (mostly) sequential. Note that since they are "mostly" // senquential, the vector can have holes. But it is still more efficient than @@ -52,8 +61,8 @@ TTimeZoneDatabase buildTimeZoneDatabase( std::unique_ptr timeZonePtr; if (entry.first == 0) { - timeZonePtr = std::make_unique( - "UTC", entry.first, date::locate_zone("UTC")); + timeZonePtr = + std::make_unique("UTC", entry.first, locateZoneImpl("UTC")); } else if (entry.first <= 1680) { std::chrono::minutes offset = getTimeZoneOffset(entry.first); timeZonePtr = @@ -62,8 +71,22 @@ TTimeZoneDatabase buildTimeZoneDatabase( // Every single other time zone entry (outside of offsets) needs to be // available in external/date or this will throw. else { - timeZonePtr = std::make_unique( - entry.second, entry.first, date::locate_zone(entry.second)); + const date::time_zone* zone; + try { + zone = locateZoneImpl(entry.second); + } catch (date::invalid_timezone& err) { + // When this exception is thrown, it typically means the time zone name + // we are trying to locate cannot be found from OS's time zone database. + // Thus, we just command a "continue;" to skip the creation of the + // corresponding TimeZone object. + // + // Then, once this time zone is requested at runtime, a runtime error + // will be thrown and caller is expected to handle that error in code. + LOG(WARNING) << "Unable to load [" << entry.second + << "] from local timezone database: " << err.what(); + continue; + } + timeZonePtr = std::make_unique(entry.second, entry.first, zone); } tzDatabase[entry.first] = std::move(timeZonePtr); } diff --git a/velox/type/tz/tests/CMakeLists.txt b/velox/type/tz/tests/CMakeLists.txt index 0844419aea4a..5c17513afaad 100644 --- a/velox/type/tz/tests/CMakeLists.txt +++ b/velox/type/tz/tests/CMakeLists.txt @@ -13,7 +13,6 @@ # limitations under the License. add_executable(velox_type_tz_test TimeZoneMapTest.cpp) - add_test(velox_type_tz_test velox_type_tz_test) target_link_libraries( @@ -23,3 +22,16 @@ target_link_libraries( GTest::gtest GTest::gtest_main pthread) + +add_executable(velox_type_tz_ext_invalid_test + TimeZoneMapExternalInvalidTest.cpp) +add_test(velox_type_tz_ext_invalid_test velox_type_tz_ext_invalid_test) + +target_link_libraries( + velox_type_tz_ext_invalid_test + velox_type_tz + velox_test_util + velox_exception + GTest::gtest + GTest::gtest_main + pthread) diff --git a/velox/type/tz/tests/TimeZoneMapExternalInvalidTest.cpp b/velox/type/tz/tests/TimeZoneMapExternalInvalidTest.cpp new file mode 100644 index 000000000000..b5668f929d72 --- /dev/null +++ b/velox/type/tz/tests/TimeZoneMapExternalInvalidTest.cpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "velox/common/base/Exceptions.h" +#include "velox/common/base/tests/GTestUtils.h" +#include "velox/common/testutil/TestValue.h" +#include "velox/external/date/tz.h" +#include "velox/type/tz/TimeZoneMap.h" + +namespace facebook::velox::tz { +namespace { + +using namespace std::chrono; + +DEBUG_ONLY_TEST(TimeZoneMapExternalInvalidTest, externalInvalid) { + const int16_t testZoneId = 1681; + const std::string testZone = "Africa/Abidjan"; + common::testutil::TestValue::enable(); + SCOPED_TESTVALUE_SET( + "facebook::velox::tz::locateZoneImpl", + std::function( + [&](std::string_view* tz_name) -> void { + if (*tz_name == testZone) { + // Emulates an invalid_timezone error thrown from external API + // date::locate_zone. In real scenarios, this could happen when + // the timezone is not found in operating system's timezone list. + throw date::invalid_timezone(std::string(*tz_name)); + } + })); + + VELOX_ASSERT_THROW( + getTimeZoneName(testZoneId), "Unable to resolve timeZoneID"); + VELOX_ASSERT_THROW(getTimeZoneID(testZone), "Unknown time zone"); + + EXPECT_EQ("UTC", getTimeZoneName(0)); + EXPECT_EQ(0, getTimeZoneID("+00:00")); + EXPECT_EQ("Africa/Accra", getTimeZoneName(1682)); + EXPECT_EQ(1682, getTimeZoneID("Africa/Accra")); +} +} // namespace +} // namespace facebook::velox::tz