Skip to content

Commit

Permalink
fix: Casting Varchar to Timestamp should handle offsets that are not …
Browse files Browse the repository at this point in the history
…recognized time zones (#11849)

Summary:
Pull Request resolved: #11849

When casting a Varchar to Timestamp, Presto Java allows an offset timestamp in place of a time zone.  This
string is of the form +/-HH:MM:SS.mmm where all units except the hour are optional, the colons are optional,
the . is optional and may also be a , and this string must be separated by the date/time by a single space.

This is not interpreted as a time zone but rather simply a milliseconds from UTC and is applied in addition to the
time zone conversion if a session time zone is present. Note that strings that fit this pattern but are valid time
zones e.g. +02:00 are still treated as time zones.

Since this is not a true time zone it is not allowed when casting from Varchar to TimestampWithTimeZone or any
other string to time conversions (at least AFAICT).

This change updates fromTimestampWithTimezoneString to handle this case and return an offsetMillis in place
of a time zone if one is present.  Casting from Varchar to Timestamp has been updated to apply this offset to
the result, while all other locations where this function is called have been updated to throw if offsetMillis is
present.

Reviewed By: kgpai

Differential Revision: D67182709

fbshipit-source-id: cb2680da7244cab3104453dafb93ba516ce3a7a2
  • Loading branch information
Kevin Wilfong authored and facebook-github-bot committed Dec 20, 2024
1 parent 8dfd2e2 commit f7931d2
Show file tree
Hide file tree
Showing 9 changed files with 364 additions and 45 deletions.
36 changes: 27 additions & 9 deletions velox/expression/PrestoCastHooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,35 @@ Expected<Timestamp> PrestoCastHooks::castStringToTimestamp(
// If the parsed string has timezone information, convert the timestamp at
// GMT at that time. For example, "1970-01-01 00:00:00 -00:01" is 60 seconds
// at GMT.
if (result.second != nullptr) {
result.first.toGMT(*result.second);
if (result.timeZone != nullptr) {
result.timestamp.toGMT(*result.timeZone);
} else if (result.offsetMillis.has_value()) {
auto seconds = result.timestamp.getSeconds();
// use int128_t to avoid overflow.
int128_t nanos = result.timestamp.getNanos();

seconds -= result.offsetMillis.value() / util::kMillisPerSecond;
nanos -= (result.offsetMillis.value() % util::kMillisPerSecond) *
util::kNanosPerMicro * util::kMicrosPerMsec;

if (nanos < 0) {
seconds -= 1;
nanos += Timestamp::kNanosInSecond;
} else if (nanos > Timestamp::kMaxNanos) {
seconds += 1;
nanos -= Timestamp::kNanosInSecond;
}

result.timestamp = Timestamp(seconds, nanos);
} else {
// If no timezone information is available in the input string, check if we
// should understand it as being at the session timezone, and if so, convert
// to GMT.
if (options_.timeZone != nullptr) {
result.timestamp.toGMT(*options_.timeZone);
}
}
// If no timezone information is available in the input string, check if we
// should understand it as being at the session timezone, and if so, convert
// to GMT.
else if (options_.timeZone != nullptr) {
result.first.toGMT(*options_.timeZone);
}
return result.first;
return result.timestamp;
}

Expected<Timestamp> PrestoCastHooks::castIntToTimestamp(int64_t seconds) const {
Expand Down
87 changes: 87 additions & 0 deletions velox/expression/tests/CastExprTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,8 +548,40 @@ TEST_F(CastExprTest, stringToTimestamp) {
"1970-01-01 00:00:00-02:00",
"1970-01-01 00:00:00 +02",
"1970-01-01 00:00:00 -0101",
// Fully specified offset.
"1970-01-02 00:00:00 +01:01:01.001",
"1970-01-01 00:00:00 -01:01:01.001",
// Offset with two digit milliseconds.
"1970-01-02 00:00:00 +01:01:01.01",
"1970-01-01 00:00:00 -01:01:01.01",
// Offset with one digit milliseconds.
"1970-01-02 00:00:00 +01:01:01.1",
"1970-01-01 00:00:00 -01:01:01.1",
// Offset without milliseconds.
"1970-01-02 00:00:00 +01:01:01",
"1970-01-01 00:00:00 -01:01:01",
// Offset without seconds.
"1970-01-02 00:00:00 +23:01",
"1970-01-01 00:00:00 -23:01",
// Offset without minutes.
"1970-01-02 00:00:00 +23",
"1970-01-01 00:00:00 -23",
// Upper and lower limits of offsets.
"2000-01-01 12:13:14.123+23:59:59.999",
"2000-01-01 12:13:14.123-23:59:59.999",
// Comma instead of period for decimal in offset.
"1970-01-01 00:00:00 -01:01:01,001",
// Trailing spaces after offset.
"1970-01-02 00:00:00 +01:01:01.001 ",
// Overflow of nanoseconds in offset.
"1970-01-01 00:00:00.999 -01:01:01.002",
// Underflow of nanoseconds in offset.
"1970-01-02 00:00:00.001 +01:01:01.002",
// No optional separators
"1970-01-01 00:00:00 -010101001",
std::nullopt,
};

std::vector<std::optional<Timestamp>> expected{
Timestamp(0, 0),
Timestamp(10800, 0),
Expand All @@ -559,6 +591,25 @@ TEST_F(CastExprTest, stringToTimestamp) {
Timestamp(7200, 0),
Timestamp(-7200, 0),
Timestamp(3660, 0),
Timestamp(82738, 999000000),
Timestamp(3661, 1000000),
Timestamp(82738, 990000000),
Timestamp(3661, 10000000),
Timestamp(82738, 900000000),
Timestamp(3661, 100000000),
Timestamp(82739, 0),
Timestamp(3661, 0),
Timestamp(3540, 0),
Timestamp(82860, 0),
Timestamp(3600, 0),
Timestamp(82800, 0),
Timestamp(946642394, 124000000),
Timestamp(946815194, 122000000),
Timestamp(3661, 1000000),
Timestamp(82738, 999000000),
Timestamp(3662, 1000000),
Timestamp(82738, 999000000),
Timestamp(3661, 1000000),
std::nullopt,
};
testCast<std::string, Timestamp>("timestamp", input, expected);
Expand All @@ -571,13 +622,24 @@ TEST_F(CastExprTest, stringToTimestamp) {
"1970-01-01 00:00 +01:00",
"1970-01-01 00:00 America/Sao_Paulo",
"2000-01-01 12:21:56Z",
"2000-01-01 12:21:56+01:01:01",
// Test going back and forth across DST boundaries.
"2024-03-10 09:59:59 -00:00:02",
"2024-03-10 10:00:01 +00:00:02",
"2024-11-03 08:59:59 -00:00:02",
"2024-11-03 09:00:01 +00:00:02",
};
expected = {
Timestamp(28800, 0),
Timestamp(0, 0),
Timestamp(-3600, 0),
Timestamp(10800, 0),
Timestamp(946729316, 0),
Timestamp(946725655, 0),
Timestamp(1710064801, 0),
Timestamp(1710064799, 0),
Timestamp(1730624401, 0),
Timestamp(1730624399, 0),
};
testCast<std::string, Timestamp>("timestamp", input, expected);

Expand All @@ -602,6 +664,31 @@ TEST_F(CastExprTest, stringToTimestamp) {
(evaluateOnce<Timestamp, std::string>(
"try_cast(c0 as timestamp)", "2045-12-31 18:00:00")),
"Unable to convert timezone 'America/Los_Angeles' past 2037-11-01 09:00:00");
// Only one white space is allowed before the offset string.
VELOX_ASSERT_THROW(
(evaluateOnce<Timestamp, std::string>(
"cast(c0 as timestamp)", "2000-01-01 00:00:00 +01:01:01")),
"Cannot cast VARCHAR '2000-01-01 00:00:00 +01:01:01' to TIMESTAMP. Unknown timezone value: \"\"");
// Hour must be in the ragne [0, 23].
VELOX_ASSERT_THROW(
(evaluateOnce<Timestamp, std::string>(
"cast(c0 as timestamp)", "2000-01-01 00:00:00 +24")),
"Cannot cast VARCHAR '2000-01-01 00:00:00 +24' to TIMESTAMP. Unknown timezone value: \"+24\"");
// Minute must be in the range [0, 59].
VELOX_ASSERT_THROW(
(evaluateOnce<Timestamp, std::string>(
"cast(c0 as timestamp)", "2000-01-01 00:00:00 +01:60")),
"Cannot cast VARCHAR '2000-01-01 00:00:00 +01:60' to TIMESTAMP. Unknown timezone value: \"+01:60\"");
// Second must be in the range [0, 59].
VELOX_ASSERT_THROW(
(evaluateOnce<Timestamp, std::string>(
"cast(c0 as timestamp)", "2000-01-01 00:00:00 +01:01:60")),
"Cannot cast VARCHAR '2000-01-01 00:00:00 +01:01:60' to TIMESTAMP. Unknown timezone value: \"+01:01:60\"");
// Millisecond must be in the range [0, 999].
VELOX_ASSERT_THROW(
(evaluateOnce<Timestamp, std::string>(
"cast(c0 as timestamp)", "2000-01-01 00:00:00 +01:01:01.1000")),
"Cannot cast VARCHAR '2000-01-01 00:00:00 +01:01:01.1000' to TIMESTAMP. Unknown timezone value: \"+01:01:01.1000\"");

setLegacyCast(true);
input = {
Expand Down
3 changes: 2 additions & 1 deletion velox/functions/prestosql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -1525,7 +1525,8 @@ struct FromIso8601Timestamp {
return castResult.error();
}

auto [ts, timeZone] = castResult.value();
auto [ts, timeZone, offsetMillis] = castResult.value();
VELOX_DCHECK(!offsetMillis.has_value());
// Input string may not contain a timezone - if so, it is interpreted in
// session timezone.
if (!timeZone) {
Expand Down
3 changes: 3 additions & 0 deletions velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4387,6 +4387,9 @@ TEST_F(DateTimeFunctionsTest, fromIso8601Timestamp) {
VELOX_ASSERT_THROW(
fromIso("1970-01-02T11:38:56.123 America/New_York"),
R"(Unable to parse timestamp value: "1970-01-02T11:38:56.123 America/New_York")");
VELOX_ASSERT_THROW(
fromIso("1970-01-02T11:38:56+16:00:01"),
"Unknown timezone value: \"+16:00:01\"");

VELOX_ASSERT_THROW(fromIso("T"), R"(Unable to parse timestamp value: "T")");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ TEST_F(TimestampWithTimeZoneCastTest, fromVarcharInvalidInput) {
const auto invalidStringVector6 =
makeNullableFlatVector<StringView>({"2012-10 America/Los_Angeles"});

const auto invalidStringVector7 =
makeNullableFlatVector<StringView>({"2012-10-01 +16:00"});

auto millis = parseTimestamp("2012-10-31 07:00:47").toMillis();
auto timestamps = std::vector<int64_t>{millis};

Expand Down Expand Up @@ -204,6 +207,9 @@ TEST_F(TimestampWithTimeZoneCastTest, fromVarcharInvalidInput) {
VELOX_ASSERT_THROW(
testCast(invalidStringVector6, expected),
"Unable to parse timestamp value: \"2012-10 America/Los_Angeles\"");
VELOX_ASSERT_THROW(
testCast(invalidStringVector7, expected),
"Unknown timezone value in: \"2012-10-01 +16:00\"");
}

TEST_F(TimestampWithTimeZoneCastTest, toTimestamp) {
Expand Down
10 changes: 9 additions & 1 deletion velox/functions/prestosql/types/TimestampWithTimeZoneType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,18 @@ void castFromString(
if (castResult.hasError()) {
context.setStatus(row, castResult.error());
} else {
auto [ts, timeZone] = castResult.value();
auto [ts, timeZone, millisOffset] = castResult.value();
// Input string may not contain a timezone - if so, it is interpreted in
// session timezone.
if (timeZone == nullptr) {
if (millisOffset.has_value()) {
context.setStatus(
row,
Status::UserError(
"Unknown timezone value in: \"{}\"",
inputVector.valueAt(row)));
return;
}
const auto& config = context.execCtx()->queryCtx()->queryConfig();
timeZone = getTimeZoneFromConfig(config);
}
Expand Down
Loading

0 comments on commit f7931d2

Please sign in to comment.