Skip to content

Commit

Permalink
Fix TRY_CAST(VARCHAR as TIMESTAMP) incorrectly suppress errors of inp…
Browse files Browse the repository at this point in the history
…ut outside supported ranges (facebookincubator#10928)

Summary:
Pull Request resolved: facebookincubator#10928

Similar to facebookincubator#10097, force CAST(VARCHAR as TIMESTAMP) to throw VeloxRuntimeErrors on inputs outside the supported ranges, so that TRY_CAST doesn't suppress them.

Reviewed By: amitkdutta, kgpai

Differential Revision: D62196915

fbshipit-source-id: ef6237a4b69fee38a9425710b0102aeab1dfbe87
  • Loading branch information
kagamiori authored and facebook-github-bot committed Sep 10, 2024
1 parent 38f9a1f commit da48a08
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 8 deletions.
7 changes: 7 additions & 0 deletions velox/expression/fuzzer/ExpressionFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ int main(int argc, char** argv) {
"regexp_like",
"regexp_replace",
"regexp_split",
// date_format and format_datetime throw VeloxRuntimeError when input
// timestamp is out of the supported range.
"date_format",
"format_datetime",
// from_unixtime can generate timestamps out of the supported range that
// make other functions throw VeloxRuntimeErrors.
"from_unixtime",
};
size_t initialSeed = FLAGS_seed == 0 ? std::time(nullptr) : FLAGS_seed;

Expand Down
9 changes: 8 additions & 1 deletion velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,14 @@ int main(int argc, char** argv) {
"chr",
"replace",
"might_contain",
"unix_timestamp"};
"unix_timestamp",
// from_unixtime throws VeloxRuntimeError when the timestamp is out of the
// supported range.
"from_unixtime",
// timestamp_millis(bigint) can generate timestamps out of the supported
// range that make other functions throw VeloxRuntimeErrors.
"timestamp_millis(bigint) -> timestamp",
};

// Required by spark_partition_id function.
std::unordered_map<std::string, std::string> queryConfigs = {
Expand Down
17 changes: 17 additions & 0 deletions velox/expression/tests/CastExprTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -581,10 +581,27 @@ TEST_F(CastExprTest, stringToTimestamp) {
};
testCast<std::string, Timestamp>("timestamp", input, expected);

// Test invalid inputs.
VELOX_ASSERT_THROW(
(evaluateOnce<Timestamp, std::string>(
"cast(c0 as timestamp)", "1970-01-01T00:00")),
"Cannot cast VARCHAR '1970-01-01T00:00' to TIMESTAMP. Unable to parse timestamp value");
VELOX_ASSERT_THROW(
(evaluateOnce<Timestamp, std::string>(
"cast(c0 as timestamp)", "201915-04-23 11:46:00.000")),
"Timepoint is outside of supported year range");
VELOX_ASSERT_THROW(
(evaluateOnce<Timestamp, std::string>(
"try_cast(c0 as timestamp)", "201915-04-23 11:46:00.000")),
"Timepoint is outside of supported year range");
VELOX_ASSERT_THROW(
(evaluateOnce<Timestamp, std::string>(
"cast(c0 as timestamp)", "2045-12-31 18:00:00")),
"Unable to convert timezone 'America/Los_Angeles' past 2037-11-01 09:00:00");
VELOX_ASSERT_THROW(
(evaluateOnce<Timestamp, std::string>(
"try_cast(c0 as timestamp)", "2045-12-31 18:00:00")),
"Unable to convert timezone 'America/Los_Angeles' past 2037-11-01 09:00:00");

setLegacyCast(true);
input = {
Expand Down
4 changes: 2 additions & 2 deletions velox/functions/lib/tests/DateTimeFormatterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1447,10 +1447,10 @@ TEST_F(MysqlDateTimeTest, formatYear) {
"-0001");
EXPECT_THROW(
formatMysqlDateTime("%Y", fromTimestampString("-99999-01-01"), timezone),
VeloxUserError);
VeloxRuntimeError);
EXPECT_THROW(
formatMysqlDateTime("%Y", fromTimestampString("99999-01-01"), timezone),
VeloxUserError);
VeloxRuntimeError);
}

TEST_F(MysqlDateTimeTest, formatMonthDay) {
Expand Down
5 changes: 5 additions & 0 deletions velox/type/Timestamp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ void Timestamp::toGMT(const tz::TimeZone& zone) {
} catch (const date::nonexistent_local_time& error) {
// If the time does not exist, fail the conversion.
VELOX_USER_FAIL(error.what());
} catch (const std::invalid_argument& e) {
// Invalid argument means we hit a conversion not supported by
// external/date. Need to throw a RuntimeError so that try() statements do
// not suppress it.
VELOX_FAIL(e.what());
}
seconds_ = sysSeconds.count();
}
Expand Down
2 changes: 1 addition & 1 deletion velox/type/tz/TimeZoneMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ void validateRangeImpl(time_point<TDuration> timePoint) {
auto year = year_month_day(floor<days>(timePoint)).year();

if (year < kMinYear || year > kMaxYear) {
VELOX_USER_FAIL(
VELOX_FAIL(
"Timepoint is outside of supported year range: [{}, {}], got {}",
(int)kMinYear,
(int)kMaxYear,
Expand Down
41 changes: 37 additions & 4 deletions velox/vector/fuzzer/VectorFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,20 +118,44 @@ int128_t rand(FuzzerGenerator& rng) {
return HugeInt::build(rand<int64_t>(rng), rand<uint64_t>(rng));
}

template <typename T, typename std::enable_if_t<std::is_integral_v<T>, int> = 0>
T rand(FuzzerGenerator& rng, T min, T max) {
return boost::random::uniform_int_distribution<T>(min, max)(rng);
}

Timestamp randTimestamp(FuzzerGenerator& rng, VectorFuzzer::Options opts) {
// Generate timestamps only in the valid range to avoid datetime functions,
// such as try_cast(varchar as timestamp), throwing VeloxRuntimeError in
// fuzzers.
constexpr int64_t min = -2'140'671'600;
constexpr int64_t max = 2'140'671'600;
constexpr int64_t microInSecond = 1'000'000;
constexpr int64_t millisInSecond = 1'000;

switch (opts.timestampPrecision) {
case VectorFuzzer::Options::TimestampPrecision::kNanoSeconds:
return Timestamp(rand<int32_t>(rng), (rand<int64_t>(rng) % MAX_NANOS));
return Timestamp(
rand<int64_t>(rng, min, max), (rand<int64_t>(rng) % MAX_NANOS));
case VectorFuzzer::Options::TimestampPrecision::kMicroSeconds:
return Timestamp::fromMicros(rand<int64_t>(rng));
return Timestamp::fromMicros(
rand<int64_t>(rng, min, max) * microInSecond +
rand<int64_t>(rng, -microInSecond, microInSecond));
case VectorFuzzer::Options::TimestampPrecision::kMilliSeconds:
return Timestamp::fromMillis(rand<int64_t>(rng));
return Timestamp::fromMillis(
rand<int64_t>(rng, min, max) * millisInSecond +
rand<int64_t>(rng, -millisInSecond, millisInSecond));
case VectorFuzzer::Options::TimestampPrecision::kSeconds:
return Timestamp(rand<int32_t>(rng), 0);
return Timestamp(rand<int64_t>(rng, min, max), 0);
}
return {}; // no-op.
}

int32_t randDate(FuzzerGenerator& rng) {
constexpr int64_t min = -24'450;
constexpr int64_t max = 24'450;
return rand<int32_t>(rng, min, max);
}

size_t getElementsVectorLength(
const VectorFuzzer::Options& opts,
vector_size_t size) {
Expand Down Expand Up @@ -251,6 +275,9 @@ VectorPtr fuzzConstantPrimitiveImpl(
if constexpr (std::is_same_v<TCpp, Timestamp>) {
return std::make_shared<ConstantVector<TCpp>>(
pool, size, false, type, randTimestamp(rng, opts));
} else if (type->isDate()) {
return std::make_shared<ConstantVector<int32_t>>(
pool, size, false, type, randDate(rng));
} else if (type->isShortDecimal()) {
return std::make_shared<ConstantVector<int64_t>>(
pool, size, false, type, randShortDecimal(type, rng));
Expand Down Expand Up @@ -294,6 +321,12 @@ void fuzzFlatPrimitiveImpl(
} else {
VELOX_NYI();
}
} else if constexpr (std::is_same_v<TCpp, int32_t>) {
if (vector->type()->isDate()) {
flatVector->set(i, randDate(rng));
} else {
flatVector->set(i, rand<TCpp>(rng));
}
} else {
flatVector->set(i, rand<TCpp>(rng));
}
Expand Down

0 comments on commit da48a08

Please sign in to comment.