From e931e76426507220f2eead89a0afe238fe855dfd Mon Sep 17 00:00:00 2001 From: rui-mo Date: Mon, 29 Apr 2024 14:20:56 -0700 Subject: [PATCH] Throw user error for invalid time zone in make_timestamp Spark function (#9631) Summary: getTimeZoneID fails with VELOX_FAIL on unknown time zone, which causes unexpected failure in the fuzzer test. This PR changes getTimeZoneID to throw user error. Pull Request resolved: https://github.com/facebookincubator/velox/pull/9631 Reviewed By: pedroerp Differential Revision: D56710785 Pulled By: kagamiori fbshipit-source-id: 629038e5664a4bbfc06119bc533bb5c17ff02e1e --- velox/functions/sparksql/MakeTimestamp.cpp | 14 ++++-- .../sparksql/tests/MakeTimestampTest.cpp | 50 +++++++++++++++++++ velox/type/tz/TimeZoneMap.cpp | 2 +- 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/velox/functions/sparksql/MakeTimestamp.cpp b/velox/functions/sparksql/MakeTimestamp.cpp index 4466482195a1..febab1036f9e 100644 --- a/velox/functions/sparksql/MakeTimestamp.cpp +++ b/velox/functions/sparksql/MakeTimestamp.cpp @@ -72,9 +72,9 @@ void setTimestampOrNull( std::optional timestamp, DecodedVector* timeZoneVector, FlatVector* result) { + const auto timeZone = timeZoneVector->valueAt(row); + const auto tzID = util::getTimeZoneID(std::string_view(timeZone)); if (timestamp.has_value()) { - auto timeZone = timeZoneVector->valueAt(row); - auto tzID = util::getTimeZoneID(std::string_view(timeZone)); (*timestamp).toGMT(tzID); result->set(row, *timestamp); } else { @@ -122,7 +122,13 @@ class MakeTimestampFunction : public exec::VectorFunction { if (args[6]->isConstantEncoding()) { auto tz = args[6]->asUnchecked>()->valueAt(0); - auto constantTzID = util::getTimeZoneID(std::string_view(tz)); + int64_t constantTzID; + try { + constantTzID = util::getTimeZoneID(std::string_view(tz)); + } catch (const VeloxException& e) { + context.setErrors(rows, std::current_exception()); + return; + } rows.applyToSelected([&](vector_size_t row) { auto timestamp = makeTimeStampFromDecodedArgs( row, year, month, day, hour, minute, micros); @@ -130,7 +136,7 @@ class MakeTimestampFunction : public exec::VectorFunction { }); } else { auto* timeZone = decodedArgs.at(6); - rows.applyToSelected([&](vector_size_t row) { + context.applyToSelectedNoThrow(rows, [&](auto row) { auto timestamp = makeTimeStampFromDecodedArgs( row, year, month, day, hour, minute, micros); setTimestampOrNull(row, timestamp, timeZone, resultFlatVector); diff --git a/velox/functions/sparksql/tests/MakeTimestampTest.cpp b/velox/functions/sparksql/tests/MakeTimestampTest.cpp index e12a8a728b3b..b53bfe32bdea 100644 --- a/velox/functions/sparksql/tests/MakeTimestampTest.cpp +++ b/velox/functions/sparksql/tests/MakeTimestampTest.cpp @@ -173,5 +173,55 @@ TEST_F(MakeTimestampTest, errors) { "make_timestamp requires session time zone to be set."); } +TEST_F(MakeTimestampTest, invalidTimezone) { + const auto microsType = DECIMAL(16, 6); + const auto year = makeFlatVector({2021, 2021, 2021, 2021, 2021}); + const auto month = makeFlatVector({7, 7, 7, 7, 7}); + const auto day = makeFlatVector({11, 11, 11, 11, 11}); + const auto hour = makeFlatVector({6, 6, 6, -6, 6}); + const auto minute = makeFlatVector({30, 30, 30, 30, 30}); + const auto micros = makeNullableFlatVector( + {45678000, 1e6, 6e7, 59999999, std::nullopt}, microsType); + auto data = makeRowVector({year, month, day, hour, minute, micros}); + + // Invalid time zone from query config. + setQueryTimeZone("Invalid"); + VELOX_ASSERT_USER_THROW( + evaluate("make_timestamp(c0, c1, c2, c3, c4, c5)", data), + "Unknown time zone: 'Invalid'"); + + setQueryTimeZone(""); + VELOX_ASSERT_USER_THROW( + evaluate("make_timestamp(c0, c1, c2, c3, c4, c5)", data), + "make_timestamp requires session time zone to be set."); + + // Invalid constant time zone. + setQueryTimeZone("GMT"); + for (auto timeZone : {"Invalid", ""}) { + SCOPED_TRACE(fmt::format("timezone: {}", timeZone)); + VELOX_ASSERT_USER_THROW( + evaluate( + fmt::format( + "make_timestamp(c0, c1, c2, c3, c4, c5, '{}')", timeZone), + data), + fmt::format("Unknown time zone: '{}'", timeZone)); + } + + // Invalid timezone from vector. + auto timeZones = makeFlatVector( + {"GMT", "CET", "Asia/Shanghai", "Invalid", "GMT"}); + data = makeRowVector({year, month, day, hour, minute, micros, timeZones}); + VELOX_ASSERT_USER_THROW( + evaluate("make_timestamp(c0, c1, c2, c3, c4, c5, c6)", data), + "Unknown time zone: 'Invalid'"); + + timeZones = + makeFlatVector({"GMT", "CET", "Asia/Shanghai", "", "GMT"}); + data = makeRowVector({year, month, day, hour, minute, micros, timeZones}); + VELOX_ASSERT_USER_THROW( + evaluate("make_timestamp(c0, c1, c2, c3, c4, c5, c6)", data), + "Unknown time zone: ''"); +} + } // namespace } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/type/tz/TimeZoneMap.cpp b/velox/type/tz/TimeZoneMap.cpp index a3203ee97859..d3657a5a4788 100644 --- a/velox/type/tz/TimeZoneMap.cpp +++ b/velox/type/tz/TimeZoneMap.cpp @@ -135,7 +135,7 @@ int64_t getTimeZoneID(std::string_view timeZone, bool failOnError) { return it->second; } if (failOnError) { - VELOX_FAIL("Unknown time zone: '{}'", timeZone); + VELOX_USER_FAIL("Unknown time zone: '{}'", timeZone); } return -1; }