From ac81eed117d4bac3551eeb82a42bdf2b941b3a15 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 23 Oct 2024 09:15:06 +0900 Subject: [PATCH] GH-44455: [C++] Update vendored date to 3.0.3 (#44482) ### Rationale for this change IANA tzdata changed its data format. So we need to update vendored date to parse it. ### What changes are included in this PR? Update vendored date to 3.0.3. Update script is added. So all our changes are automated now. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #44455 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- ci/appveyor-cpp-build.bat | 2 +- cpp/src/arrow/CMakeLists.txt | 4 +- cpp/src/arrow/vendored/datetime.cpp | 19 + cpp/src/arrow/vendored/datetime.h | 7 +- cpp/src/arrow/vendored/datetime/README.md | 12 +- cpp/src/arrow/vendored/datetime/date.h | 7 +- cpp/src/arrow/vendored/datetime/ios.h | 7 +- cpp/src/arrow/vendored/datetime/ios.mm | 7 +- cpp/src/arrow/vendored/datetime/tz.cpp | 401 ++++++++++++++----- cpp/src/arrow/vendored/datetime/tz.h | 43 +- cpp/src/arrow/vendored/datetime/tz_private.h | 7 +- cpp/src/arrow/vendored/datetime/update.sh | 53 +++ cpp/src/arrow/vendored/datetime/visibility.h | 8 +- cpp/src/gandiva/precompiled/CMakeLists.txt | 2 +- 14 files changed, 437 insertions(+), 142 deletions(-) create mode 100644 cpp/src/arrow/vendored/datetime.cpp create mode 100755 cpp/src/arrow/vendored/datetime/update.sh diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat index 08a052e82f24d..084117f38778a 100644 --- a/ci/appveyor-cpp-build.bat +++ b/ci/appveyor-cpp-build.bat @@ -139,7 +139,7 @@ set PARQUET_HOME=%CONDA_PREFIX%\Library @rem Download IANA Timezone Database to a non-standard location to @rem test the configurability of the timezone database path -curl https://data.iana.org/time-zones/releases/tzdata2021e.tar.gz --output tzdata.tar.gz || exit /B +curl https://data.iana.org/time-zones/releases/tzdata2024b.tar.gz --output tzdata.tar.gz || exit /B mkdir %USERPROFILE%\Downloads\test\tzdata tar --extract --file tzdata.tar.gz --directory %USERPROFILE%\Downloads\test\tzdata curl https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml ^ diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index c911f0f4e9481..5f6b568460afe 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -460,7 +460,7 @@ endif() set(ARROW_VENDORED_SRCS vendored/base64.cpp - vendored/datetime/tz.cpp + vendored/datetime.cpp vendored/double-conversion/bignum-dtoa.cc vendored/double-conversion/bignum.cc vendored/double-conversion/cached-powers.cc @@ -488,7 +488,7 @@ set(ARROW_VENDORED_SRCS if(APPLE) list(APPEND ARROW_VENDORED_SRCS vendored/datetime/ios.mm) endif() -set_source_files_properties(vendored/datetime/tz.cpp +set_source_files_properties(vendored/datetime.cpp PROPERTIES SKIP_PRECOMPILE_HEADERS ON SKIP_UNITY_BUILD_INCLUSION ON) arrow_add_object_library(ARROW_VENDORED ${ARROW_VENDORED_SRCS}) diff --git a/cpp/src/arrow/vendored/datetime.cpp b/cpp/src/arrow/vendored/datetime.cpp new file mode 100644 index 0000000000000..0f0bd12c7e160 --- /dev/null +++ b/cpp/src/arrow/vendored/datetime.cpp @@ -0,0 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "datetime/visibility.h" +#include "datetime/tz.cpp" diff --git a/cpp/src/arrow/vendored/datetime.h b/cpp/src/arrow/vendored/datetime.h index e437cdcbc2dae..aea31ebe77f9e 100644 --- a/cpp/src/arrow/vendored/datetime.h +++ b/cpp/src/arrow/vendored/datetime.h @@ -17,10 +17,11 @@ #pragma once -#include "arrow/vendored/datetime/date.h" // IWYU pragma: export -#include "arrow/vendored/datetime/tz.h" // IWYU pragma: export +#include "arrow/vendored/datetime/visibility.h" // IWYU pragma: export +#include "arrow/vendored/datetime/date.h" // IWYU pragma: export +#include "arrow/vendored/datetime/tz.h" // IWYU pragma: export // Can be defined by date.h. #ifdef NOEXCEPT -#undef NOEXCEPT +# undef NOEXCEPT #endif diff --git a/cpp/src/arrow/vendored/datetime/README.md b/cpp/src/arrow/vendored/datetime/README.md index 5a0993b7b4336..89132d9cba0f5 100644 --- a/cpp/src/arrow/vendored/datetime/README.md +++ b/cpp/src/arrow/vendored/datetime/README.md @@ -17,12 +17,16 @@ copies or substantial portions of the Software. Sources for datetime are adapted from Howard Hinnant's date library (https://github.com/HowardHinnant/date). -Sources are taken from changeset 1ead6715dec030d340a316c927c877a3c4e5a00c +Sources are taken from changeset 5bdb7e6f31fac909c090a46dbd9fea27b6e609a4 of the above project. The following changes are made: - fix internal inclusion paths (from "date/xxx.h" to simply "xxx.h") - enclose the `date` namespace inside the `arrow_vendored` namespace -- include a custom "visibility.h" header from "tz.cpp" for proper DLL - exports on Windows -- disable curl-based database downloading in "tz.h" + +## How to update + +```console +$ cd cpp/src/arrow/vendored/datetime +$ ./update.sh 3.0.3 +``` diff --git a/cpp/src/arrow/vendored/datetime/date.h b/cpp/src/arrow/vendored/datetime/date.h index 75e2624296672..c17d6f3f7aa54 100644 --- a/cpp/src/arrow/vendored/datetime/date.h +++ b/cpp/src/arrow/vendored/datetime/date.h @@ -84,9 +84,7 @@ # pragma warning(disable : 4127) #endif -namespace arrow_vendored -{ -namespace date +namespace arrow_vendored::date { //---------------+ @@ -8234,8 +8232,7 @@ operator<<(std::basic_ostream& os, detail::get_units(typename Period::type{}); } -} // namespace date -} // namespace arrow_vendored +} // namespace arrow_vendored::date #ifdef _MSC_VER # pragma warning(pop) diff --git a/cpp/src/arrow/vendored/datetime/ios.h b/cpp/src/arrow/vendored/datetime/ios.h index acad28d13b558..d018e799a833e 100644 --- a/cpp/src/arrow/vendored/datetime/ios.h +++ b/cpp/src/arrow/vendored/datetime/ios.h @@ -32,9 +32,7 @@ # if TARGET_OS_IPHONE # include - namespace arrow_vendored - { - namespace date + namespace arrow_vendored::date { namespace iOSUtils { @@ -43,8 +41,7 @@ std::string get_current_timezone(); } // namespace iOSUtils - } // namespace date - } // namespace arrow_vendored + } // namespace arrow_vendored::date # endif // TARGET_OS_IPHONE #else // !__APPLE__ diff --git a/cpp/src/arrow/vendored/datetime/ios.mm b/cpp/src/arrow/vendored/datetime/ios.mm index 22b7ce6c30bc2..70ba2adf0ed58 100644 --- a/cpp/src/arrow/vendored/datetime/ios.mm +++ b/cpp/src/arrow/vendored/datetime/ios.mm @@ -47,9 +47,7 @@ #define TAR_SIZE_POSITION 124 #define TAR_SIZE_SIZE 12 -namespace arrow_vendored -{ -namespace date +namespace arrow_vendored::date { namespace iOSUtils { @@ -334,7 +332,6 @@ bool writeFile(const std::string &tzdataPath, const std::string &fileName, } } // namespace iOSUtils -} // namespace date -} // namespace arrow_vendored +} // namespace arrow_vendored::date #endif // TARGET_OS_IPHONE diff --git a/cpp/src/arrow/vendored/datetime/tz.cpp b/cpp/src/arrow/vendored/datetime/tz.cpp index 44c627775f3d7..2cf6c62a84d47 100644 --- a/cpp/src/arrow/vendored/datetime/tz.cpp +++ b/cpp/src/arrow/vendored/datetime/tz.cpp @@ -30,10 +30,6 @@ // been invented (that would involve another several millennia of evolution). // We did not mean to shout. -// NOTE(ARROW): This is required so that symbols are properly exported from the DLL -#include "visibility.h" - - #ifdef _WIN32 // windows.h will be included directly and indirectly (e.g. by curl). // We need to define these macros to prevent windows.h bringing in @@ -97,8 +93,25 @@ #endif #if defined(ANDROID) || defined(__ANDROID__) -#include -#endif +# include +# if USE_OS_TZDB +# define MISSING_LEAP_SECONDS 1 +// from https://android.googlesource.com/platform/bionic/+/master/libc/tzcode/bionic.cpp +static constexpr size_t ANDROID_TIMEZONE_NAME_LENGTH = 40; +struct bionic_tzdata_header_t { + char tzdata_version[12]; + std::int32_t index_offset; + std::int32_t data_offset; + std::int32_t final_offset; +}; +struct index_entry_t { + char buf[ANDROID_TIMEZONE_NAME_LENGTH]; + std::int32_t start; + std::int32_t length; + std::int32_t unused; // Was raw GMT offset; always 0 since tzdata2014f (L). +}; +# endif // USE_OS_TZDB +#endif // defined(ANDROID) || defined(__ANDROID__) #if USE_OS_TZDB # include @@ -122,10 +135,13 @@ #include #include -// unistd.h is used on some platforms as part of the means to get +// unistd.h is used on some platforms as part of the the means to get // the current time zone. On Win32 windows.h provides a means to do it. // gcc/mingw supports unistd.h on Win32 but MSVC does not. +#ifdef __ANDROID__ +# define INSTALL . +#endif #ifdef _WIN32 # ifdef WINAPI_FAMILY # include @@ -178,9 +194,9 @@ #ifdef _WIN32 static CONSTDATA char folder_delimiter = '\\'; -#else // !_WIN32 +#elif !defined(ANDROID) && !defined(__ANDROID__) static CONSTDATA char folder_delimiter = '/'; -#endif // !_WIN32 +#endif // !defined(WIN32) && !defined(ANDROID) && !defined(__ANDROID__) #if defined(__GNUC__) && __GNUC__ < 5 // GCC 4.9 Bug 61489 Wrong warning with -Wmissing-field-initializers @@ -191,20 +207,6 @@ static CONSTDATA char folder_delimiter = '/'; #if !USE_OS_TZDB # ifdef _WIN32 -# ifndef WINRT - -namespace -{ - struct task_mem_deleter - { - void operator()(wchar_t buf[]) - { - if (buf != nullptr) - CoTaskMemFree(buf); - } - }; - using co_task_mem_ptr = std::unique_ptr; -} static std::wstring @@ -235,6 +237,21 @@ convert_utf8_to_utf16(const std::string& s) return out; } +# ifndef WINRT + +namespace +{ + struct task_mem_deleter + { + void operator()(wchar_t buf[]) + { + if (buf != nullptr) + CoTaskMemFree(buf); + } + }; + using co_task_mem_ptr = std::unique_ptr; +} + // We might need to know certain locations even if not using the remote API, // so keep these routines out of that block for now. static @@ -372,7 +389,7 @@ class file_streambuf { # ifdef _WIN32 std::wstring wfilename = convert_utf8_to_utf16(filename); - FILE* file = ::_wfopen(wfilename.c_str(), L"rb"); + FILE* file = ::_wfopen(wfilename.c_str(), L"r"); # else // !_WIN32 FILE* file = ::fopen(filename.c_str(), "rb"); # endif // _WIN32 @@ -388,9 +405,8 @@ class file_streambuf }; #endif // !USE_OS_TZDB -namespace arrow_vendored -{ -namespace date + +namespace arrow_vendored::date { // +---------------------+ // | Begin Configuration | @@ -470,7 +486,18 @@ discover_tz_dir() { struct stat sb; using namespace std; -# ifndef __APPLE__ +# if defined(ANDROID) || defined(__ANDROID__) + CONSTDATA auto tz_dir_default = "/apex/com.android.tzdata/etc/tz"; + CONSTDATA auto tz_dir_fallback = "/system/usr/share/zoneinfo"; + + // Check updatable path first + if(stat(tz_dir_default, &sb) == 0 && S_ISDIR(sb.st_mode)) + return tz_dir_default; + else if(stat(tz_dir_fallback, &sb) == 0 && S_ISDIR(sb.st_mode)) + return tz_dir_fallback; + else + throw runtime_error("discover_tz_dir failed to find zoneinfo\n"); +# elif !defined(__APPLE__) CONSTDATA auto tz_dir_default = "/usr/share/zoneinfo"; CONSTDATA auto tz_dir_buildroot = "/usr/share/zoneinfo/uclibc"; @@ -493,9 +520,10 @@ discover_tz_dir() if (!(lstat(timezone, &sb) == 0 && S_ISLNK(sb.st_mode) && sb.st_size > 0)) throw runtime_error("discover_tz_dir failed\n"); string result; - char rp[PATH_MAX+1] = {}; - if (readlink(timezone, rp, sizeof(rp)-1) > 0) - result = string(rp); + unique_ptr rp(new char[sb.st_size]); + const auto rp_length = readlink(timezone, rp.get(), sb.st_size); + if (rp_length > 0) + result = string(rp.get(), rp_length); // readlink doesn't null-terminate else throw system_error(errno, system_category(), "readlink() failed"); auto i = result.find("zoneinfo"); @@ -527,7 +555,9 @@ get_tz_dir() static_assert(min_year <= max_year, "Configuration error"); #endif +#if !defined(ANDROID) && !defined(__ANDROID__) static std::unique_ptr init_tzdb(); +#endif // !defined(ANDROID) && !defined(__ANDROID__) tzdb_list::~tzdb_list() { @@ -586,31 +616,67 @@ get_tzdb_list() return tz_db; } +#if !defined(ANDROID) && !defined(__ANDROID__) +inline +static +char +tolower(char c) +{ + return static_cast(std::tolower(c)); +} + +inline +static +void +tolower(std::string& s) +{ + for (auto& c : s) + c = tolower(c); +} + +inline static std::string -parse3(std::istream& in) +get_alpha_word(std::istream& in) { - std::string r(3, ' '); ws(in); - r[0] = static_cast(in.get()); - r[1] = static_cast(in.get()); - r[2] = static_cast(in.get()); - return r; + std::string s; + while (!in.eof() && std::isalpha(in.peek())) + s.push_back(static_cast(in.get())); + return s; } +#endif // !defined(ANDROID) && !defined(__ANDROID__) +inline +static +bool +is_prefix_of(std::string const& key, std::string const& value) +{ + const size_t size = std::min(key.size(), value.size()); + return key.compare(0, size, value, 0, size) == 0; +} + +#if !defined(ANDROID) && !defined(__ANDROID__) static unsigned parse_month(std::istream& in) { - CONSTDATA char*const month_names[] = - {"Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; - auto s = parse3(in); - auto m = std::find(std::begin(month_names), std::end(month_names), s) - month_names; + static std::string const month_names[] = + {"january", "february", "march", "april", "may", "june", + "july", "august", "september", "october", "november", "december"}; + auto s = get_alpha_word(in); + tolower(s); + auto m = std::find_if(std::begin(month_names), std::end(month_names), + [&s](std::string const& m) + { + return is_prefix_of(s, m); + }) + - month_names; if (m >= std::end(month_names) - std::begin(month_names)) throw std::runtime_error("oops: bad month name: " + s); return static_cast(++m); } +#endif // !defined(ANDROID) && !defined(__ANDROID__) #if !USE_OS_TZDB @@ -822,10 +888,16 @@ static unsigned parse_dow(std::istream& in) { - CONSTDATA char*const dow_names[] = - {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; - auto s = parse3(in); - auto dow = std::find(std::begin(dow_names), std::end(dow_names), s) - dow_names; + static std::string const dow_names[] = + {"sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday"}; + auto s = get_alpha_word(in); + tolower(s); + auto dow = std::find_if(std::begin(dow_names), std::end(dow_names), + [&s](std::string const& dow) + { + return is_prefix_of(s, dow); + }) + - dow_names; if (dow >= std::end(dow_names) - std::begin(dow_names)) throw std::runtime_error("oops: bad dow name: " + s); return static_cast(dow); @@ -875,7 +947,7 @@ parse_signed_time(std::istream& in) detail::MonthDayTime::MonthDayTime(local_seconds tp, tz timezone) : zone_(timezone) { - using namespace date; + using namespace arrow_vendored::date; const auto dp = date::floor(tp); const auto hms = make_time(tp - dp); const auto ymd = year_month_day(dp); @@ -969,7 +1041,7 @@ sys_seconds detail::MonthDayTime::to_sys(date::year y, std::chrono::seconds offset, std::chrono::seconds save) const { - using namespace date; + using namespace arrow_vendored::date; using namespace std::chrono; auto until_utc = to_time_point(y); if (zone_ == tz::standard) @@ -1004,7 +1076,7 @@ date::sys_days detail::MonthDayTime::to_sys_days(date::year y) const { using namespace std::chrono; - using namespace date; + using namespace arrow_vendored::date; switch (type_) { case month_day: @@ -1038,7 +1110,7 @@ void detail::MonthDayTime::canonicalize(date::year y) { using namespace std::chrono; - using namespace date; + using namespace arrow_vendored::date; switch (type_) { case month_day: @@ -1076,7 +1148,7 @@ detail::MonthDayTime::canonicalize(date::year y) std::istream& detail::operator>>(std::istream& is, MonthDayTime& x) { - using namespace date; + using namespace arrow_vendored::date; using namespace std::chrono; assert(((std::ios::failbit | std::ios::badbit) & is.exceptions()) == (std::ios::failbit | std::ios::badbit)); @@ -1086,7 +1158,7 @@ detail::operator>>(std::istream& is, MonthDayTime& x) auto m = parse_month(is); if (!is.eof() && ws(is) && !is.eof() && is.peek() != '#') { - if (is.peek() == 'l') + if (tolower(is.peek()) == 'l') { for (int i = 0; i < 4; ++i) is.get(); @@ -1212,7 +1284,7 @@ detail::Rule::Rule(const std::string& s) { try { - using namespace date; + using namespace arrow_vendored::date; using namespace std::chrono; std::istringstream in(s); in.exceptions(std::ios::failbit | std::ios::badbit); @@ -1357,7 +1429,7 @@ detail::operator<(const std::string& x, const Rule& y) std::ostream& detail::operator<<(std::ostream& os, const Rule& r) { - using namespace date; + using namespace arrow_vendored::date; using namespace std::chrono; detail::save_ostream _(os); os.fill(' '); @@ -1416,7 +1488,7 @@ detail::Rule::overlaps(const Rule& x, const Rule& y) void detail::Rule::split(std::vector& rules, std::size_t i, std::size_t k, std::size_t& e) { - using namespace date; + using namespace arrow_vendored::date; using difference_type = std::vector::iterator::difference_type; // rules[i].starting_year_ <= rules[k].starting_year_ && // rules[i].ending_year_ >= rules[k].starting_year_ && @@ -1555,7 +1627,7 @@ static std::pair find_previous_rule(const Rule* r, date::year y) { - using namespace date; + using namespace arrow_vendored::date; auto const& rules = get_tzdb().rules; if (y == r->starting_year()) { @@ -1591,7 +1663,7 @@ static std::pair find_next_rule(const Rule* first_rule, const Rule* last_rule, const Rule* r, date::year y) { - using namespace date; + using namespace arrow_vendored::date; if (y == r->ending_year()) { if (r == last_rule-1) @@ -1622,7 +1694,7 @@ static std::pair find_next_rule(const Rule* r, date::year y) { - using namespace date; + using namespace arrow_vendored::date; auto const& rules = get_tzdb().rules; if (y == r->ending_year()) { @@ -1671,7 +1743,7 @@ find_rule_for_zone(const std::pair& eqr, assert(eqr.second != nullptr); using namespace std::chrono; - using namespace date; + using namespace arrow_vendored::date; auto r = eqr.first; auto ry = r->starting_year(); auto prev_save = minutes{0}; @@ -1697,7 +1769,7 @@ find_rule_for_zone(const std::pair& eqr, const local_seconds& tp_loc) { using namespace std::chrono; - using namespace date; + using namespace arrow_vendored::date; auto r = eqr.first; auto ry = r->starting_year(); auto prev_save = minutes{0}; @@ -1737,7 +1809,7 @@ find_rule(const std::pair& first_rule, const std::string& initial_abbrev) { using namespace std::chrono; - using namespace date; + using namespace arrow_vendored::date; auto r = first_rule.first; auto ry = first_rule.second; sys_info x{sys_days(year::min()/min_day), sys_days(year::max()/max_day), @@ -2152,6 +2224,9 @@ time_zone::load_data(std::istream& inf, void time_zone::init_impl() { +#if defined(ANDROID) || defined(__ANDROID__) + return; +#endif // defined(ANDROID) || defined(__ANDROID__) using namespace std; using namespace std::chrono; auto name = get_tz_dir() + ('/' + name_); @@ -2313,6 +2388,86 @@ time_zone::get_info_impl(local_seconds tp) const return i; } +#if defined(ANDROID) || defined(__ANDROID__) +void +time_zone::parse_from_android_tzdata(std::ifstream& inf, const std::size_t off) +{ + using namespace std; + using namespace std::chrono; + if (!inf.is_open()) + throw std::runtime_error{"Unable to open tzdata"}; + std::size_t restorepos = inf.tellg(); + inf.seekg(off, inf.beg); + load_header(inf); + auto v = load_version(inf); + std::int32_t tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt, + tzh_timecnt, tzh_typecnt, tzh_charcnt; + skip_reserve(inf); + load_counts(inf, tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt, + tzh_timecnt, tzh_typecnt, tzh_charcnt); + if (v == 0) + { + load_data(inf, tzh_leapcnt, tzh_timecnt, tzh_typecnt, tzh_charcnt); + } + else + { +#if !defined(NDEBUG) + inf.ignore((4+1)*tzh_timecnt + 6*tzh_typecnt + tzh_charcnt + 8*tzh_leapcnt + + tzh_ttisstdcnt + tzh_ttisgmtcnt); + load_header(inf); + auto v2 = load_version(inf); + assert(v == v2); + skip_reserve(inf); +#else // defined(NDEBUG) + inf.ignore((4+1)*tzh_timecnt + 6*tzh_typecnt + tzh_charcnt + 8*tzh_leapcnt + + tzh_ttisstdcnt + tzh_ttisgmtcnt + (4+1+15)); +#endif // defined(NDEBUG) + load_counts(inf, tzh_ttisgmtcnt, tzh_ttisstdcnt, tzh_leapcnt, + tzh_timecnt, tzh_typecnt, tzh_charcnt); + load_data(inf, tzh_leapcnt, tzh_timecnt, tzh_typecnt, tzh_charcnt); + } +#if !MISSING_LEAP_SECONDS + if (tzh_leapcnt > 0) + { + auto& leap_seconds = get_tzdb_list().front().leap_seconds; + auto itr = leap_seconds.begin(); + auto l = itr->date(); + seconds leap_count{0}; + for (auto t = std::upper_bound(transitions_.begin(), transitions_.end(), l, + [](const sys_seconds& x, const transition& ct) + { + return x < ct.timepoint; + }); + t != transitions_.end(); ++t) + { + while (t->timepoint >= l) + { + ++leap_count; + if (++itr == leap_seconds.end()) + l = sys_days(max_year/max_day); + else + l = itr->date() + leap_count; + } + t->timepoint -= leap_count; + } + } +#endif // !MISSING_LEAP_SECONDS + auto b = transitions_.begin(); + auto i = transitions_.end(); + if (i != b) + { + for (--i; i != b; --i) + { + if (i->info->offset == i[-1].info->offset && + i->info->abbrev == i[-1].info->abbrev && + i->info->is_dst == i[-1].info->is_dst) + i = transitions_.erase(i); + } + } + inf.seekg(restorepos, inf.beg); +} +#endif // defined(ANDROID) || defined(__ANDROID__) + std::ostream& operator<<(std::ostream& os, const time_zone& z) { @@ -2346,7 +2501,7 @@ time_zone::time_zone(const std::string& s, detail::undocumented) { try { - using namespace date; + using namespace arrow_vendored::date; std::istringstream in(s); in.exceptions(std::ios::failbit | std::ios::badbit); std::string word; @@ -2416,7 +2571,7 @@ time_zone::add(const std::string& s) void time_zone::parse_info(std::istream& in) { - using namespace date; + using namespace arrow_vendored::date; using namespace std::chrono; zonelets_.emplace_back(); auto& zonelet = zonelets_.back(); @@ -2449,7 +2604,7 @@ void time_zone::adjust_infos(const std::vector& rules) { using namespace std::chrono; - using namespace date; + using namespace arrow_vendored::date; const zonelet* prev_zonelet = nullptr; for (auto& z : zonelets_) { @@ -2620,7 +2775,7 @@ sys_info time_zone::get_info_impl(sys_seconds tp, int tz_int) const { using namespace std::chrono; - using namespace date; + using namespace arrow_vendored::date; tz timezone = static_cast(tz_int); assert(timezone != tz::standard); auto y = year_month_day(floor(tp)).year(); @@ -2682,7 +2837,7 @@ time_zone::get_info_impl(sys_seconds tp, int tz_int) const std::ostream& operator<<(std::ostream& os, const time_zone& z) { - using namespace date; + using namespace arrow_vendored::date; using namespace std::chrono; detail::save_ostream _(os); os.fill(' '); @@ -2744,14 +2899,14 @@ operator<<(std::ostream& os, const leap_second& x) #if USE_OS_TZDB +#if !defined(ANDROID) && !defined(__ANDROID__) static std::string get_version() { - using namespace std; - auto path = get_tz_dir() + string("/+VERSION"); - ifstream in{path}; - string version; + auto path = get_tz_dir() + std::string("/+VERSION"); + std::ifstream in{path}; + std::string version; if (in) { in >> version; @@ -2786,7 +2941,8 @@ find_read_and_leap_seconds() iss.exceptions(std::ios::failbit | std::ios::badbit); std::string word; iss >> word; - if (word == "Leap") + tolower(word); + if (is_prefix_of(word, "leap")) { int y, m, d; iss >> y; @@ -2847,6 +3003,7 @@ find_read_and_leap_seconds() #endif return {}; } +#endif // !defined(ANDROID) && !defined(__ANDROID__) static std::unique_ptr @@ -2854,6 +3011,38 @@ init_tzdb() { std::unique_ptr db(new tzdb); +#if defined(ANDROID) || defined(__ANDROID__) + auto path = get_tz_dir() + std::string("/tzdata"); + std::ifstream in{path}; + if (!in) + throw std::runtime_error("Can not open " + path); + bionic_tzdata_header_t hdr{}; + in.read(reinterpret_cast(&hdr), sizeof(bionic_tzdata_header_t)); + if (!is_prefix_of(hdr.tzdata_version, "tzdata") || hdr.tzdata_version[11] != 0) + throw std::runtime_error("Malformed tzdata - invalid magic!"); + maybe_reverse_bytes(hdr.index_offset); + maybe_reverse_bytes(hdr.data_offset); + maybe_reverse_bytes(hdr.final_offset); + if (hdr.index_offset > hdr.data_offset) + throw std::runtime_error("Malformed tzdata - hdr.index_offset > hdr.data_offset!"); + const size_t index_size = hdr.data_offset - hdr.index_offset; + if ((index_size % sizeof(index_entry_t)) != 0) + throw std::runtime_error("Malformed tzdata - index size malformed!"); + //Iterate through zone index + index_entry_t index_entry{}; + for (size_t idx = 0; idx < index_size; idx += sizeof(index_entry_t)) { + in.read(reinterpret_cast(&index_entry), sizeof(index_entry_t)); + maybe_reverse_bytes(index_entry.start); + maybe_reverse_bytes(index_entry.length); + time_zone timezone{std::string(index_entry.buf), + detail::undocumented{}}; + timezone.parse_from_android_tzdata(in, hdr.data_offset + index_entry.start); + db->zones.emplace_back(std::move(timezone)); + } + db->zones.shrink_to_fit(); + std::sort(db->zones.begin(), db->zones.end()); + db->version = std::string(hdr.tzdata_version).replace(0, 6, ""); +#else //Iterate through folders std::queue subfolders; subfolders.emplace(get_tz_dir()); @@ -2878,6 +3067,7 @@ init_tzdb() strcmp(d->d_name, "version") == 0 || strcmp(d->d_name, "zone.tab") == 0 || strcmp(d->d_name, "zone1970.tab") == 0 || + strcmp(d->d_name, "zonenow.tab") == 0 || strcmp(d->d_name, "tzdata.zi") == 0 || strcmp(d->d_name, "leapseconds") == 0 || strcmp(d->d_name, "leap-seconds.list") == 0 ) @@ -2905,6 +3095,7 @@ init_tzdb() std::sort(db->zones.begin(), db->zones.end()); db->leap_seconds = find_read_and_leap_seconds(); db->version = get_version(); +#endif // defined(ANDROID) || defined(__ANDROID__) return db; } @@ -2914,7 +3105,7 @@ init_tzdb() time_zone_link::time_zone_link(const std::string& s) { - using namespace date; + using namespace arrow_vendored::date; std::istringstream in(s); in.exceptions(std::ios::failbit | std::ios::badbit); std::string word; @@ -2924,7 +3115,7 @@ time_zone_link::time_zone_link(const std::string& s) std::ostream& operator<<(std::ostream& os, const time_zone_link& x) { - using namespace date; + using namespace arrow_vendored::date; detail::save_ostream _(os); os.fill(' '); os.flags(std::ios::dec | std::ios::left); @@ -2936,7 +3127,7 @@ operator<<(std::ostream& os, const time_zone_link& x) leap_second::leap_second(const std::string& s, detail::undocumented) { - using namespace date; + using namespace arrow_vendored::date; std::istringstream in(s); in.exceptions(std::ios::failbit | std::ios::badbit); std::string word; @@ -3568,7 +3759,7 @@ static std::unique_ptr init_tzdb() { - using namespace date; + using namespace arrow_vendored::date; const std::string install = get_install(); const std::string path = install + folder_delimiter; std::string line; @@ -3647,22 +3838,23 @@ init_tzdb() std::istringstream in(line); std::string word; in >> word; - if (word == "Rule") + tolower(word); + if (is_prefix_of(word, "rule")) { db->rules.push_back(Rule(line)); continue_zone = false; } - else if (word == "Link") + else if (is_prefix_of(word, "link")) { db->links.push_back(time_zone_link(line)); continue_zone = false; } - else if (word == "Leap") + else if (is_prefix_of(word, "leap")) { db->leap_seconds.push_back(leap_second(line, detail::undocumented{})); continue_zone = false; } - else if (word == "Zone") + else if (is_prefix_of(word, "zone")) { db->zones.push_back(time_zone(line, detail::undocumented{})); continue_zone = true; @@ -3991,10 +4183,12 @@ bool sniff_realpath(const char* timezone) { using namespace std; - char rp[PATH_MAX+1] = {}; - if (realpath(timezone, rp) == nullptr) + unique_ptr rp(realpath(timezone, nullptr), free); + if (rp.get() == nullptr) throw system_error(errno, system_category(), "realpath() failed"); - auto result = extract_tz_name(rp); + auto result = extract_tz_name(rp.get()); + if (result.find("posix") == 0) + return false; return result != "posixrules"; } @@ -4021,18 +4215,24 @@ tzdb::current_zone() const { using namespace std; static const bool use_realpath = sniff_realpath(timezone); - char rp[PATH_MAX+1] = {}; if (use_realpath) { - if (realpath(timezone, rp) == nullptr) + unique_ptr rp(realpath(timezone, nullptr), free); + if (rp.get() == nullptr) throw system_error(errno, system_category(), "realpath() failed"); + return locate_zone(extract_tz_name(rp.get())); } else { - if (readlink(timezone, rp, sizeof(rp)-1) <= 0) + // +1 because st_size doesn't include the '\0' terminator + const auto rp_size = sb.st_size + 1; + unique_ptr rp(new char[rp_size]); + const auto rp_length = readlink(timezone, rp.get(), rp_size); + if (rp_length <= 0) throw system_error(errno, system_category(), "readlink() failed"); + rp.get()[rp_length] = '\0'; // readlink doesn't null-terminate + return locate_zone(extract_tz_name(rp.get())); } - return locate_zone(extract_tz_name(rp)); } } // On embedded systems e.g. buildroot with uclibc the timezone is linked @@ -4051,9 +4251,10 @@ tzdb::current_zone() const if (lstat(timezone, &sb) == 0 && S_ISLNK(sb.st_mode) && sb.st_size > 0) { using namespace std; string result; - char rp[PATH_MAX+1] = {}; - if (readlink(timezone, rp, sizeof(rp)-1) > 0) - result = string(rp); + unique_ptr rp(new char[sb.st_size]); + const auto rp_length = readlink(timezone, rp.get(), sb.st_size); + if (rp_length > 0) + result = string(rp.get(), rp_length); // readlink doesn't null-terminate else throw system_error(errno, system_category(), "readlink() failed"); @@ -4135,6 +4336,25 @@ tzdb::current_zone() const } // Fall through to try other means. } + // On OpenWRT we need to check /etc/config/system + // It will have a line with the following structure + // ... + // option zoneName 'Europe/Berlin' + // ... + { + std::ifstream timezone_file("/etc/config/system"); + if (timezone_file.is_open()) + { + for(std::string result; std::getline(timezone_file, result);) { + std::string findStr = "option zoneName '"; + size_t startPos = result.find(findStr); + if (startPos != std::string::npos) { + size_t endPos = result.find("'", startPos + findStr.size()); + return locate_zone(result.substr(startPos + findStr.size(), endPos - startPos - findStr.size())); + } + } + } + } throw std::runtime_error("Could not get current timezone"); } @@ -4146,8 +4366,7 @@ current_zone() return get_tzdb().current_zone(); } -} // namespace date -} // namespace arrow_vendored +} // namespace arrow_vendored::date #if defined(__GNUC__) && __GNUC__ < 5 # pragma GCC diagnostic pop diff --git a/cpp/src/arrow/vendored/datetime/tz.h b/cpp/src/arrow/vendored/datetime/tz.h index df6d1a851ac9d..61ab3df106db0 100644 --- a/cpp/src/arrow/vendored/datetime/tz.h +++ b/cpp/src/arrow/vendored/datetime/tz.h @@ -43,19 +43,13 @@ // required. On Windows, the names are never "Standard" so mapping is always required. // Technically any OS may use the mapping process but currently only Windows does use it. -// NOTE(ARROW): If this is not set, then the library will attempt to -// use libcurl to obtain a timezone database, and we probably do not want this. -#ifndef _WIN32 -#define USE_OS_TZDB 1 -#endif - #ifndef USE_OS_TZDB # define USE_OS_TZDB 0 #endif #ifndef HAS_REMOTE_API # if USE_OS_TZDB == 0 -# ifdef _WIN32 +# if defined _WIN32 || defined __ANDROID__ # define HAS_REMOTE_API 0 # else # define HAS_REMOTE_API 1 @@ -140,13 +134,18 @@ static_assert(HAS_REMOTE_API == 0 ? AUTO_DOWNLOAD == 0 : true, # endif #endif -namespace arrow_vendored -{ -namespace date +namespace arrow_vendored::date { enum class choose {earliest, latest}; +#if defined(BUILD_TZ_LIB) +# if defined(ANDROID) || defined(__ANDROID__) +struct tzdb; +static std::unique_ptr init_tzdb(); +# endif // defined(ANDROID) || defined(__ANDROID__) +#endif // defined(BUILD_TZ_LIB) + namespace detail { struct undocumented; @@ -829,6 +828,12 @@ class time_zone #if !USE_OS_TZDB DATE_API void add(const std::string& s); +#else +# if defined(BUILD_TZ_LIB) +# if defined(ANDROID) || defined(__ANDROID__) + friend std::unique_ptr init_tzdb(); +# endif // defined(ANDROID) || defined(__ANDROID__) +# endif // defined(BUILD_TZ_LIB) #endif // !USE_OS_TZDB private: @@ -852,6 +857,9 @@ class time_zone DATE_API void load_data(std::istream& inf, std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt, std::int32_t tzh_typecnt, std::int32_t tzh_charcnt); +# if defined(ANDROID) || defined(__ANDROID__) + void parse_from_android_tzdata(std::ifstream& inf, const std::size_t off); +# endif // defined(ANDROID) || defined(__ANDROID__) #else // !USE_OS_TZDB DATE_API sys_info get_info_impl(sys_seconds tp, int tz_int) const; DATE_API void adjust_infos(const std::vector& rules); @@ -1198,11 +1206,11 @@ struct tzdb #endif // defined(_MSC_VER) && (_MSC_VER < 1900) #if HAS_STRING_VIEW - const time_zone* locate_zone(std::string_view tz_name) const; + DATE_API const time_zone* locate_zone(std::string_view tz_name) const; #else - const time_zone* locate_zone(const std::string& tz_name) const; + DATE_API const time_zone* locate_zone(const std::string& tz_name) const; #endif - const time_zone* current_zone() const; + DATE_API const time_zone* current_zone() const; }; using TZ_DB = tzdb; @@ -1217,9 +1225,9 @@ class tzdb_list std::atomic head_{nullptr}; public: - ~tzdb_list(); + DATE_API ~tzdb_list(); tzdb_list() = default; - tzdb_list(tzdb_list&& x) NOEXCEPT; + DATE_API tzdb_list(tzdb_list&& x) NOEXCEPT; const tzdb& front() const NOEXCEPT {return *head_;} tzdb& front() NOEXCEPT {return *head_;} @@ -1232,7 +1240,7 @@ class tzdb_list const_iterator cbegin() const NOEXCEPT; const_iterator cend() const NOEXCEPT; - const_iterator erase_after(const_iterator p) NOEXCEPT; + DATE_API const_iterator erase_after(const_iterator p) NOEXCEPT; struct undocumented_helper; private: @@ -2795,7 +2803,6 @@ to_gps_time(const tai_time& t) return gps_clock::from_utc(tai_clock::to_utc(t)); } -} // namespace date -} // namespace arrow_vendored +} // namespace arrow_vendored::date #endif // TZ_H diff --git a/cpp/src/arrow/vendored/datetime/tz_private.h b/cpp/src/arrow/vendored/datetime/tz_private.h index a6bb8fd30a0c7..1d7f858971106 100644 --- a/cpp/src/arrow/vendored/datetime/tz_private.h +++ b/cpp/src/arrow/vendored/datetime/tz_private.h @@ -34,9 +34,7 @@ #include #endif -namespace arrow_vendored -{ -namespace date +namespace arrow_vendored::date { namespace detail @@ -308,8 +306,7 @@ struct transition } // namespace detail -} // namespace date -} // namespace arrow_vendored +} // namespace arrow_vendored::date #if defined(_MSC_VER) && (_MSC_VER < 1900) #include "tz.h" diff --git a/cpp/src/arrow/vendored/datetime/update.sh b/cpp/src/arrow/vendored/datetime/update.sh new file mode 100755 index 0000000000000..b4580c042608e --- /dev/null +++ b/cpp/src/arrow/vendored/datetime/update.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +source_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 VERSION" + echo " e.g.: $0 3.0.3" + exit 1 +fi + +version="$1" + +pushd "${source_dir}" +rm -rf date +git clone \ + --branch "v${version}" \ + --depth 1 \ + https://github.com/HowardHinnant/date.git +commit_id=$(git -C date log -1 --format=format:%H) +mv date/include/date/date.h ./ +mv date/include/date/ios.h ./ +mv date/include/date/tz.h ./ +mv date/include/date/tz_private.h ./ +mv date/src/* ./ +rm -rf date +sed -i.bak -E \ + -e 's/namespace date/namespace arrow_vendored::date/g' \ + -e 's,include "date/,include ",g' \ + *.{cpp,h,mm} +sed -i.bak -E \ + -e "s/changeset [0-9a-f]+/changeset ${commit_id}/g" \ + README.md +rm *.bak +popd diff --git a/cpp/src/arrow/vendored/datetime/visibility.h b/cpp/src/arrow/vendored/datetime/visibility.h index ae031238d85ac..780c00d70bd9f 100644 --- a/cpp/src/arrow/vendored/datetime/visibility.h +++ b/cpp/src/arrow/vendored/datetime/visibility.h @@ -17,10 +17,14 @@ #pragma once +#ifndef _WIN32 +# define USE_OS_TZDB 1 +#endif + #if defined(ARROW_STATIC) // intentially empty #elif defined(ARROW_EXPORTING) -#define DATE_BUILD_DLL +# define DATE_BUILD_DLL #else -#define DATE_USE_DLL +# define DATE_USE_DLL #endif diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt index c2bc7fc02797e..e1427e25fb666 100644 --- a/cpp/src/gandiva/precompiled/CMakeLists.txt +++ b/cpp/src/gandiva/precompiled/CMakeLists.txt @@ -63,7 +63,7 @@ add_gandiva_test(precompiled-test time.cc timestamp_arithmetic.cc ../cast_time.cc - ../../arrow/vendored/datetime/tz.cpp + ../../arrow/vendored/datetime.cpp hash_test.cc hash.cc string_ops_test.cc