diff --git a/CMakeLists.txt b/CMakeLists.txt index 064e78eab2730..0ab006c5aed87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -472,6 +472,18 @@ add_compile_definitions(FOLLY_HAVE_INT128_T=1) set_source(folly) resolve_dependency(folly) +if(CMAKE_SYSTEM_NAME MATCHES "Darwin") + if(ON_APPLE_M1) + list(APPEND CMAKE_PREFIX_PATH "/opt/homebrew/opt/icu4c") + else() + list(APPEND CMAKE_PREFIX_PATH "/usr/local/opt/icu4c") + endif() +endif() + +# ICU is only needed with Boost build from source +set_source(ICU) +resolve_dependency(ICU COMPONENTS i18n uc) + if(${VELOX_BUILD_TESTING}) # Spark qury runner depends on absl, gRPC. set_source(absl) diff --git a/velox/functions/lib/CMakeLists.txt b/velox/functions/lib/CMakeLists.txt index bdff97bba2d95..5cee3af311d13 100644 --- a/velox/functions/lib/CMakeLists.txt +++ b/velox/functions/lib/CMakeLists.txt @@ -24,7 +24,8 @@ velox_link_libraries(velox_functions_util velox_vector velox_common_base) velox_add_library(velox_functions_lib_date_time_formatter DateTimeFormatter.cpp DateTimeFormatterBuilder.cpp) -velox_link_libraries(velox_functions_lib_date_time_formatter velox_type_tz) +velox_link_libraries(velox_functions_lib_date_time_formatter velox_type_tz + ICU::i18n ICU::uc) velox_add_library( velox_functions_lib diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index 2175ba10924ad..dcb9b62e1fccf 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -1173,10 +1173,10 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const { // https://en.wikipedia.org/wiki/List_of_tz_database_time_zones size += 5; } else { - VELOX_NYI( - "Date format specifier is not yet implemented: {} ({})", - getSpecifierName(token.pattern.specifier), - token.pattern.minRepresentDigits); + // The longest time zone long name is 40, Australian Central Western + // Standard Time. + // https://www.timeanddate.com/time/zones/ + size += 50; } break; @@ -1432,8 +1432,11 @@ int32_t DateTimeFormatter::format( std::memcpy(result, abbrev.data(), abbrev.length()); result += abbrev.length(); } else { - // TODO: implement full name time zone - VELOX_NYI("full time zone name is not yet supported"); + std::string longName = timezone->getLongName( + std::chrono::milliseconds(timestamp.toMillis()), + tz::TimeZone::TChoose::kEarliest); + std::memcpy(result, longName.data(), longName.length()); + result += longName.length(); } } break; diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index afa4f5a8ac919..30090173d8f85 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -3272,6 +3272,12 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "zzz")); EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "zz")); EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "z")); + EXPECT_EQ( + "India Standard Time", + formatDatetime(parseTimestamp("1970-01-01"), "zzzz")); + EXPECT_EQ( + "India Standard Time", + formatDatetime(parseTimestamp("1970-01-01"), "zzzzzzzzzzzzzzzzzzzzzz")); // Test daylight savings. setQueryTimeZone("America/Los_Angeles"); @@ -3281,16 +3287,47 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { EXPECT_EQ("PDT", formatDatetime(parseTimestamp("2024-03-10 03:00"), "z")); EXPECT_EQ("PDT", formatDatetime(parseTimestamp("2024-11-03 01:00"), "z")); EXPECT_EQ("PST", formatDatetime(parseTimestamp("2024-11-03 02:00"), "z")); + EXPECT_EQ( + "Pacific Standard Time", + formatDatetime(parseTimestamp("1970-01-01"), "zzzz")); + EXPECT_EQ( + "Pacific Daylight Time", + formatDatetime(parseTimestamp("1970-10-01"), "zzzz")); + EXPECT_EQ( + "Pacific Standard Time", + formatDatetime(parseTimestamp("2024-03-10 01:00"), "zzzz")); + EXPECT_EQ( + "Pacific Daylight Time", + formatDatetime(parseTimestamp("2024-03-10 03:00"), "zzzz")); + EXPECT_EQ( + "Pacific Daylight Time", + formatDatetime(parseTimestamp("2024-11-03 01:00"), "zzzz")); + EXPECT_EQ( + "Pacific Standard Time", + formatDatetime(parseTimestamp("2024-11-03 02:00"), "zzzz")); + + // Test ambiguous time. + EXPECT_EQ( + "PDT", formatDatetime(parseTimestamp("2024-11-03 01:30:00"), "zzz")); + EXPECT_EQ( + "Pacific Daylight Time", + formatDatetime(parseTimestamp("2024-11-03 01:30:00"), "zzzz")); // Test a long abbreviation. setQueryTimeZone("Asia/Colombo"); EXPECT_EQ("+0530", formatDatetime(parseTimestamp("1970-10-01"), "z")); + EXPECT_EQ( + "India Standard Time", + formatDatetime(parseTimestamp("1970-10-01"), "zzzz")); - setQueryTimeZone("Asia/Kolkata"); - // We don't support more than 3 'z's yet. - EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "zzzz"), VeloxRuntimeError); + // Test a long long name. + setQueryTimeZone("Australia/Eucla"); + EXPECT_EQ("+0845", formatDatetime(parseTimestamp("1970-10-01"), "z")); + EXPECT_EQ( + "Australian Central Western Standard Time", + formatDatetime(parseTimestamp("1970-10-01"), "zzzz")); + setQueryTimeZone("Asia/Kolkata"); // Literal test cases. EXPECT_EQ("hello", formatDatetime(parseTimestamp("1970-01-01"), "'hello'")); EXPECT_EQ("'", formatDatetime(parseTimestamp("1970-01-01"), "''")); diff --git a/velox/type/tz/CMakeLists.txt b/velox/type/tz/CMakeLists.txt index b7d96feb12626..614b594000a77 100644 --- a/velox/type/tz/CMakeLists.txt +++ b/velox/type/tz/CMakeLists.txt @@ -23,4 +23,6 @@ velox_link_libraries( velox_external_date Boost::regex fmt::fmt - Folly::folly) + Folly::folly + ICU::i18n + ICU::uc) diff --git a/velox/type/tz/TimeZoneMap.h b/velox/type/tz/TimeZoneMap.h index 0e036df1a6b96..227e1fc49ed6d 100644 --- a/velox/type/tz/TimeZoneMap.h +++ b/velox/type/tz/TimeZoneMap.h @@ -16,6 +16,15 @@ #pragma once +#include +#include +#include + +// The ICU libraries define TRUE/FALSE macros which frequently conflict with +// other libraries that use these as enum/variable names. +#undef TRUE +#undef FALSE + #include #include #include "velox/common/base/Exceptions.h" @@ -178,6 +187,37 @@ class TimeZone { return getZonedTime(timePoint, choose).get_info().abbrev; } + template + std::string getLongName(TDuration timestamp, TChoose choose = TChoose::kFail) + const { + static const icu::Locale locale("en", "US"); + + // Get the ICU TimeZone by name + std::unique_ptr tz(icu::TimeZone::createTimeZone( + icu::UnicodeString(timeZoneName_.data(), timeZoneName_.length()))); + VELOX_USER_CHECK_NOT_NULL(tz); + + // According to the documentation this is how to determine if DST applies to + // a given timestamp in a given time zone. + // https://howardhinnant.github.io/date/tz.html#sys_info + date::local_time timePoint{timestamp}; + bool isDst = getZonedTime(timePoint, choose).get_info().save != + std::chrono::minutes(0); + + // Construct the long name for the time zone. + // Note that ICU does not have DST information for many time zones prior to + // 1970, so it's important to specify it explicitly. + icu::UnicodeString longName; + tz->getDisplayName( + isDst, icu_53::TimeZone::EDisplayType::LONG, locale, longName); + + // Convert the UnicodeString back to a string and write it out + std::string longNameStr; + longName.toUTF8String(longNameStr); + + return longNameStr; + } + private: template date::zoned_time getZonedTime(