From da48a0880418929f4c531f1ce549912eaff8c858 Mon Sep 17 00:00:00 2001 From: Wei He Date: Tue, 10 Sep 2024 07:53:11 -0700 Subject: [PATCH] Fix TRY_CAST(VARCHAR as TIMESTAMP) incorrectly suppress errors of input outside supported ranges (#10928) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/10928 Similar to https://github.com/facebookincubator/velox/pull/10097, force CAST(VARCHAR as TIMESTAMP) to throw VeloxRuntimeErrors on inputs outside the supported ranges, so that TRY_CAST doesn't suppress them. Reviewed By: amitkdutta, kgpai Differential Revision: D62196915 fbshipit-source-id: ef6237a4b69fee38a9425710b0102aeab1dfbe87 --- .../fuzzer/ExpressionFuzzerTest.cpp | 7 ++++ .../fuzzer/SparkExpressionFuzzerTest.cpp | 9 +++- velox/expression/tests/CastExprTest.cpp | 17 ++++++++ .../lib/tests/DateTimeFormatterTest.cpp | 4 +- velox/type/Timestamp.cpp | 5 +++ velox/type/tz/TimeZoneMap.cpp | 2 +- velox/vector/fuzzer/VectorFuzzer.cpp | 41 +++++++++++++++++-- 7 files changed, 77 insertions(+), 8 deletions(-) diff --git a/velox/expression/fuzzer/ExpressionFuzzerTest.cpp b/velox/expression/fuzzer/ExpressionFuzzerTest.cpp index 90e2c7222681..83c9003d4b23 100644 --- a/velox/expression/fuzzer/ExpressionFuzzerTest.cpp +++ b/velox/expression/fuzzer/ExpressionFuzzerTest.cpp @@ -76,6 +76,13 @@ int main(int argc, char** argv) { "regexp_like", "regexp_replace", "regexp_split", + // date_format and format_datetime throw VeloxRuntimeError when input + // timestamp is out of the supported range. + "date_format", + "format_datetime", + // from_unixtime can generate timestamps out of the supported range that + // make other functions throw VeloxRuntimeErrors. + "from_unixtime", }; size_t initialSeed = FLAGS_seed == 0 ? std::time(nullptr) : FLAGS_seed; diff --git a/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp b/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp index ba5abd02bca3..18a5453b1760 100644 --- a/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp +++ b/velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp @@ -62,7 +62,14 @@ int main(int argc, char** argv) { "chr", "replace", "might_contain", - "unix_timestamp"}; + "unix_timestamp", + // from_unixtime throws VeloxRuntimeError when the timestamp is out of the + // supported range. + "from_unixtime", + // timestamp_millis(bigint) can generate timestamps out of the supported + // range that make other functions throw VeloxRuntimeErrors. + "timestamp_millis(bigint) -> timestamp", + }; // Required by spark_partition_id function. std::unordered_map queryConfigs = { diff --git a/velox/expression/tests/CastExprTest.cpp b/velox/expression/tests/CastExprTest.cpp index 40827f1c8eed..3ad743e31d21 100644 --- a/velox/expression/tests/CastExprTest.cpp +++ b/velox/expression/tests/CastExprTest.cpp @@ -581,10 +581,27 @@ TEST_F(CastExprTest, stringToTimestamp) { }; testCast("timestamp", input, expected); + // Test invalid inputs. VELOX_ASSERT_THROW( (evaluateOnce( "cast(c0 as timestamp)", "1970-01-01T00:00")), "Cannot cast VARCHAR '1970-01-01T00:00' to TIMESTAMP. Unable to parse timestamp value"); + VELOX_ASSERT_THROW( + (evaluateOnce( + "cast(c0 as timestamp)", "201915-04-23 11:46:00.000")), + "Timepoint is outside of supported year range"); + VELOX_ASSERT_THROW( + (evaluateOnce( + "try_cast(c0 as timestamp)", "201915-04-23 11:46:00.000")), + "Timepoint is outside of supported year range"); + VELOX_ASSERT_THROW( + (evaluateOnce( + "cast(c0 as timestamp)", "2045-12-31 18:00:00")), + "Unable to convert timezone 'America/Los_Angeles' past 2037-11-01 09:00:00"); + VELOX_ASSERT_THROW( + (evaluateOnce( + "try_cast(c0 as timestamp)", "2045-12-31 18:00:00")), + "Unable to convert timezone 'America/Los_Angeles' past 2037-11-01 09:00:00"); setLegacyCast(true); input = { diff --git a/velox/functions/lib/tests/DateTimeFormatterTest.cpp b/velox/functions/lib/tests/DateTimeFormatterTest.cpp index 74514ea4ca67..1066bf6cbeba 100644 --- a/velox/functions/lib/tests/DateTimeFormatterTest.cpp +++ b/velox/functions/lib/tests/DateTimeFormatterTest.cpp @@ -1447,10 +1447,10 @@ TEST_F(MysqlDateTimeTest, formatYear) { "-0001"); EXPECT_THROW( formatMysqlDateTime("%Y", fromTimestampString("-99999-01-01"), timezone), - VeloxUserError); + VeloxRuntimeError); EXPECT_THROW( formatMysqlDateTime("%Y", fromTimestampString("99999-01-01"), timezone), - VeloxUserError); + VeloxRuntimeError); } TEST_F(MysqlDateTimeTest, formatMonthDay) { diff --git a/velox/type/Timestamp.cpp b/velox/type/Timestamp.cpp index deb229fbd73e..324a41f8f658 100644 --- a/velox/type/Timestamp.cpp +++ b/velox/type/Timestamp.cpp @@ -56,6 +56,11 @@ void Timestamp::toGMT(const tz::TimeZone& zone) { } catch (const date::nonexistent_local_time& error) { // If the time does not exist, fail the conversion. VELOX_USER_FAIL(error.what()); + } catch (const std::invalid_argument& e) { + // Invalid argument means we hit a conversion not supported by + // external/date. Need to throw a RuntimeError so that try() statements do + // not suppress it. + VELOX_FAIL(e.what()); } seconds_ = sysSeconds.count(); } diff --git a/velox/type/tz/TimeZoneMap.cpp b/velox/type/tz/TimeZoneMap.cpp index 8f1ee5ff68e3..1b57faa144ef 100644 --- a/velox/type/tz/TimeZoneMap.cpp +++ b/velox/type/tz/TimeZoneMap.cpp @@ -209,7 +209,7 @@ void validateRangeImpl(time_point timePoint) { auto year = year_month_day(floor(timePoint)).year(); if (year < kMinYear || year > kMaxYear) { - VELOX_USER_FAIL( + VELOX_FAIL( "Timepoint is outside of supported year range: [{}, {}], got {}", (int)kMinYear, (int)kMaxYear, diff --git a/velox/vector/fuzzer/VectorFuzzer.cpp b/velox/vector/fuzzer/VectorFuzzer.cpp index d8fcd38d180c..075e905e638b 100644 --- a/velox/vector/fuzzer/VectorFuzzer.cpp +++ b/velox/vector/fuzzer/VectorFuzzer.cpp @@ -118,20 +118,44 @@ int128_t rand(FuzzerGenerator& rng) { return HugeInt::build(rand(rng), rand(rng)); } +template , int> = 0> +T rand(FuzzerGenerator& rng, T min, T max) { + return boost::random::uniform_int_distribution(min, max)(rng); +} + Timestamp randTimestamp(FuzzerGenerator& rng, VectorFuzzer::Options opts) { + // Generate timestamps only in the valid range to avoid datetime functions, + // such as try_cast(varchar as timestamp), throwing VeloxRuntimeError in + // fuzzers. + constexpr int64_t min = -2'140'671'600; + constexpr int64_t max = 2'140'671'600; + constexpr int64_t microInSecond = 1'000'000; + constexpr int64_t millisInSecond = 1'000; + switch (opts.timestampPrecision) { case VectorFuzzer::Options::TimestampPrecision::kNanoSeconds: - return Timestamp(rand(rng), (rand(rng) % MAX_NANOS)); + return Timestamp( + rand(rng, min, max), (rand(rng) % MAX_NANOS)); case VectorFuzzer::Options::TimestampPrecision::kMicroSeconds: - return Timestamp::fromMicros(rand(rng)); + return Timestamp::fromMicros( + rand(rng, min, max) * microInSecond + + rand(rng, -microInSecond, microInSecond)); case VectorFuzzer::Options::TimestampPrecision::kMilliSeconds: - return Timestamp::fromMillis(rand(rng)); + return Timestamp::fromMillis( + rand(rng, min, max) * millisInSecond + + rand(rng, -millisInSecond, millisInSecond)); case VectorFuzzer::Options::TimestampPrecision::kSeconds: - return Timestamp(rand(rng), 0); + return Timestamp(rand(rng, min, max), 0); } return {}; // no-op. } +int32_t randDate(FuzzerGenerator& rng) { + constexpr int64_t min = -24'450; + constexpr int64_t max = 24'450; + return rand(rng, min, max); +} + size_t getElementsVectorLength( const VectorFuzzer::Options& opts, vector_size_t size) { @@ -251,6 +275,9 @@ VectorPtr fuzzConstantPrimitiveImpl( if constexpr (std::is_same_v) { return std::make_shared>( pool, size, false, type, randTimestamp(rng, opts)); + } else if (type->isDate()) { + return std::make_shared>( + pool, size, false, type, randDate(rng)); } else if (type->isShortDecimal()) { return std::make_shared>( pool, size, false, type, randShortDecimal(type, rng)); @@ -294,6 +321,12 @@ void fuzzFlatPrimitiveImpl( } else { VELOX_NYI(); } + } else if constexpr (std::is_same_v) { + if (vector->type()->isDate()) { + flatVector->set(i, randDate(rng)); + } else { + flatVector->set(i, rand(rng)); + } } else { flatVector->set(i, rand(rng)); }