Skip to content

Commit

Permalink
Add timestamp_micros, timestamp_millis, unix_micros, unix_millis Spar…
Browse files Browse the repository at this point in the history
  • Loading branch information
zhli1142015 authored and facebook-github-bot committed Apr 30, 2024
1 parent 20a3a04 commit 6e253f7
Show file tree
Hide file tree
Showing 6 changed files with 215 additions and 0 deletions.
27 changes: 27 additions & 0 deletions velox/docs/functions/spark/datetime.rst
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,20 @@ These functions support TIMESTAMP and DATE input types.

SELECT second('2009-07-30 12:58:59'); -- 59

.. spark:function:: timestamp_micros(x) -> timestamp
Returns timestamp from the number of microseconds since UTC epoch.
Supported types are: TINYINT, SMALLINT, INTEGER and BIGINT.::

SELECT timestamp_micros(1230219000123123); -- '2008-12-25 15:30:00.123123'

.. spark:function:: timestamp_millis(x) -> timestamp
Returns timestamp from the number of milliseconds since UTC epoch.
Supported types are: TINYINT, SMALLINT, INTEGER and BIGINT.::

SELECT timestamp_millis(1230219000123); -- '2008-12-25 15:30:00.123'

.. spark:function:: to_unix_timestamp(string) -> integer
Alias for ``unix_timestamp(string) -> integer``.
Expand All @@ -244,6 +258,19 @@ These functions support TIMESTAMP and DATE input types.
SELECT unix_date('1970-01-02'); -- '1'
SELECT unix_date('1969-12-31'); -- '-1'

.. spark:function:: unix_micros(timestamp) -> bigint
Returns the number of microseconds since 1970-01-01 00:00:00 UTC.::

SELECT unix_micros('1970-01-01 00:00:01'); -- 1000000

.. spark:function:: unix_millis(timestamp) -> bigint
Returns the number of milliseconds since 1970-01-01 00:00:00 UTC. Truncates
higher levels of precision.::

SELECT unix_millis('1970-01-01 00:00:01'); -- 1000

.. spark:function:: unix_timestamp() -> integer
Returns the current UNIX timestamp in seconds.
Expand Down
8 changes: 8 additions & 0 deletions velox/functions/lib/RegistrationHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,14 @@ void registerUnaryIntegral(const std::vector<std::string>& aliases) {
registerFunction<T, int64_t, int64_t>(aliases);
}

template <template <class> class T, typename TReturn>
void registerUnaryIntegralWithTReturn(const std::vector<std::string>& aliases) {
registerFunction<T, TReturn, int8_t>(aliases);
registerFunction<T, TReturn, int16_t>(aliases);
registerFunction<T, TReturn, int32_t>(aliases);
registerFunction<T, TReturn, int64_t>(aliases);
}

template <template <class> class T>
void registerUnaryFloatingPoint(const std::vector<std::string>& aliases) {
registerFunction<T, double, double>(aliases);
Expand Down
43 changes: 43 additions & 0 deletions velox/functions/sparksql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -760,4 +760,47 @@ struct MakeYMIntervalFunction {
result = totalMonths;
}
};

template <typename T>
struct TimestampToMicrosFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
result = timestamp.toMicros();
}
};

template <typename TExec>
struct MicrosToTimestampFunction {
VELOX_DEFINE_FUNCTION_TYPES(TExec);

template <typename T>
FOLLY_ALWAYS_INLINE void call(out_type<Timestamp>& result, const T& micros) {
result = Timestamp::fromMicrosNoError(micros);
}
};

template <typename T>
struct TimestampToMillisFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
result = timestamp.toMillis();
}
};

template <typename TExec>
struct MillisToTimestampFunction {
VELOX_DEFINE_FUNCTION_TYPES(TExec);

template <typename T>
FOLLY_ALWAYS_INLINE void call(out_type<Timestamp>& result, const T& millis) {
result = Timestamp::fromMillisNoError(millis);
}
};

} // namespace facebook::velox::functions::sparksql
9 changes: 9 additions & 0 deletions velox/functions/sparksql/Register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,15 @@ void registerFunctions(const std::string& prefix) {

VELOX_REGISTER_VECTOR_FUNCTION(udf_make_timestamp, prefix + "make_timestamp");

registerFunction<TimestampToMicrosFunction, int64_t, Timestamp>(
{prefix + "unix_micros"});
registerUnaryIntegralWithTReturn<MicrosToTimestampFunction, Timestamp>(
{prefix + "timestamp_micros"});
registerFunction<TimestampToMillisFunction, int64_t, Timestamp>(
{prefix + "unix_millis"});
registerUnaryIntegralWithTReturn<MillisToTimestampFunction, Timestamp>(
{prefix + "timestamp_millis"});

// Register bloom filter function
registerFunction<BloomFilterMightContainFunction, bool, Varbinary, int64_t>(
{prefix + "might_contain"});
Expand Down
113 changes: 113 additions & 0 deletions velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ class DateTimeFunctionsTest : public SparkFunctionBaseTest {
static constexpr int16_t kMaxSmallint = std::numeric_limits<int16_t>::max();
static constexpr int8_t kMinTinyint = std::numeric_limits<int8_t>::min();
static constexpr int8_t kMaxTinyint = std::numeric_limits<int8_t>::max();
static constexpr int64_t kMinBigint = std::numeric_limits<int64_t>::min();
static constexpr int64_t kMaxBigint = std::numeric_limits<int64_t>::max();

protected:
void setQueryTimeZone(const std::string& timeZone) {
Expand Down Expand Up @@ -983,5 +985,116 @@ TEST_F(DateTimeFunctionsTest, yearOfWeek) {
EXPECT_EQ(2006, yearOfWeek(parseDate("2006-01-02")));
}

TEST_F(DateTimeFunctionsTest, microsToTimestamp) {
const auto microsToTimestamp = [&](int64_t micros) {
return evaluateOnce<Timestamp, int64_t>("timestamp_micros(c0)", micros);
};
EXPECT_EQ(
microsToTimestamp(1000000),
util::fromTimestampString("1970-01-01 00:00:01"));
EXPECT_EQ(
microsToTimestamp(1230219000123123),
util::fromTimestampString("2008-12-25 15:30:00.123123"));

EXPECT_EQ(
microsToTimestamp(kMaxTinyint),
util::fromTimestampString("1970-01-01 00:00:00.000127"));
EXPECT_EQ(
microsToTimestamp(kMinTinyint),
util::fromTimestampString("1969-12-31 23:59:59.999872"));
EXPECT_EQ(
microsToTimestamp(kMaxSmallint),
util::fromTimestampString("1970-01-01 00:00:00.032767"));
EXPECT_EQ(
microsToTimestamp(kMinSmallint),
util::fromTimestampString("1969-12-31 23:59:59.967232"));
EXPECT_EQ(
microsToTimestamp(kMax),
util::fromTimestampString("1970-01-01 00:35:47.483647"));
EXPECT_EQ(
microsToTimestamp(kMin),
util::fromTimestampString("1969-12-31 23:24:12.516352"));
EXPECT_EQ(
microsToTimestamp(kMaxBigint),
util::fromTimestampString("294247-01-10 04:00:54.775807"));
EXPECT_EQ(
microsToTimestamp(kMinBigint),
util::fromTimestampString("-290308-12-21 19:59:05.224192"));
}

TEST_F(DateTimeFunctionsTest, millisToTimestamp) {
const auto millisToTimestamp = [&](int64_t millis) {
return evaluateOnce<Timestamp, int64_t>("timestamp_millis(c0)", millis);
};
EXPECT_EQ(
millisToTimestamp(1000),
util::fromTimestampString("1970-01-01 00:00:01"));
EXPECT_EQ(
millisToTimestamp(1230219000123),
util::fromTimestampString("2008-12-25 15:30:00.123"));

EXPECT_EQ(
millisToTimestamp(kMaxTinyint),
util::fromTimestampString("1970-01-01 00:00:00.127"));
EXPECT_EQ(
millisToTimestamp(kMinTinyint),
util::fromTimestampString("1969-12-31 23:59:59.872"));
EXPECT_EQ(
millisToTimestamp(kMaxSmallint),
util::fromTimestampString("1970-01-01 00:00:32.767"));
EXPECT_EQ(
millisToTimestamp(kMinSmallint),
util::fromTimestampString("1969-12-31 23:59:27.232"));
EXPECT_EQ(
millisToTimestamp(kMax),
util::fromTimestampString("1970-01-25 20:31:23.647"));
EXPECT_EQ(
millisToTimestamp(kMin),
util::fromTimestampString("1969-12-07 03:28:36.352"));
EXPECT_EQ(
millisToTimestamp(kMaxBigint),
util::fromTimestampString("292278994-08-17T07:12:55.807"));
EXPECT_EQ(
millisToTimestamp(kMinBigint),
util::fromTimestampString("-292275055-05-16T16:47:04.192"));
}

TEST_F(DateTimeFunctionsTest, timestampToMicros) {
const auto timestampToMicros = [&](const StringView time) {
return evaluateOnce<int64_t, Timestamp>(
"unix_micros(c0)", util::fromTimestampString(time));
};
EXPECT_EQ(timestampToMicros("1970-01-01 00:00:01"), 1000000);
EXPECT_EQ(timestampToMicros("2008-12-25 15:30:00.123123"), 1230219000123123);

EXPECT_EQ(timestampToMicros("1970-01-01 00:00:00.000127"), kMaxTinyint);
EXPECT_EQ(timestampToMicros("1969-12-31 23:59:59.999872"), kMinTinyint);
EXPECT_EQ(timestampToMicros("1970-01-01 00:00:00.032767"), kMaxSmallint);
EXPECT_EQ(timestampToMicros("1969-12-31 23:59:59.967232"), kMinSmallint);
EXPECT_EQ(timestampToMicros("1970-01-01 00:35:47.483647"), kMax);
EXPECT_EQ(timestampToMicros("1969-12-31 23:24:12.516352"), kMin);
EXPECT_EQ(timestampToMicros("294247-01-10 04:00:54.775807"), kMaxBigint);
EXPECT_EQ(
timestampToMicros("-290308-12-21 19:59:06.224192"), kMinBigint + 1000000);
}

TEST_F(DateTimeFunctionsTest, timestampToMillis) {
const auto timestampToMillis = [&](const StringView time) {
return evaluateOnce<int64_t, Timestamp>(
"unix_millis(c0)", util::fromTimestampString(time));
};
EXPECT_EQ(timestampToMillis("1970-01-01 00:00:01"), 1000);
EXPECT_EQ(timestampToMillis("2008-12-25 15:30:00.123"), 1230219000123);

EXPECT_EQ(timestampToMillis("1970-01-01 00:00:00.127"), kMaxTinyint);
EXPECT_EQ(timestampToMillis("1969-12-31 23:59:59.872"), kMinTinyint);
EXPECT_EQ(timestampToMillis("1970-01-01 00:00:32.767"), kMaxSmallint);
EXPECT_EQ(timestampToMillis("1969-12-31 23:59:27.232"), kMinSmallint);
EXPECT_EQ(timestampToMillis("1970-01-25 20:31:23.647"), kMax);
EXPECT_EQ(timestampToMillis("1969-12-07 03:28:36.352"), kMin);
EXPECT_EQ(timestampToMillis("292278994-08-17T07:12:55.807"), kMaxBigint);
EXPECT_EQ(timestampToMillis("-292275055-05-16T16:47:04.192"), kMinBigint);
}

} // namespace
} // namespace facebook::velox::functions::sparksql::test
15 changes: 15 additions & 0 deletions velox/type/Timestamp.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,21 @@ struct Timestamp {
return Timestamp(second, nano);
}

static Timestamp fromMicrosNoError(int64_t micros)
#if defined(__has_feature)
#if __has_feature(__address_sanitizer__)
__attribute__((__no_sanitize__("signed-integer-overflow")))
#endif
#endif
{
if (micros >= 0 || micros % 1'000'000 == 0) {
return Timestamp(micros / 1'000'000, (micros % 1'000'000) * 1'000);
}
auto second = micros / 1'000'000 - 1;
auto nano = ((micros - second * 1'000'000) % 1'000'000) * 1'000;
return Timestamp(second, nano);
}

static Timestamp fromNanos(int64_t nanos) {
if (nanos >= 0 || nanos % 1'000'000'000 == 0) {
return Timestamp(nanos / 1'000'000'000, nanos % 1'000'000'000);
Expand Down

0 comments on commit 6e253f7

Please sign in to comment.