Skip to content

Commit

Permalink
Add next_day Spark function
Browse files Browse the repository at this point in the history
  • Loading branch information
kerwin-zk committed Dec 7, 2023
1 parent debe03c commit e14a435
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 1 deletion.
16 changes: 15 additions & 1 deletion velox/docs/functions/spark/datetime.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,21 @@ These functions support TIMESTAMP and DATE input types.
Returns the month of ``date``. ::

SELECT month('2009-07-30'); -- 7

.. spark:function:: next_day(startDate, dayOfWeek) -> date
Returns the first date which is later than ``startDate`` and named as ``dayOfWeek``.
``dayOfWeek`` is case insensitive and must be one of the following:
``SU``, ``SUN``, ``SUNDAY``, ``MO``, ``MON``, ``MONDAY``, ``TU``, ``TUE``, ``TUESDAY``,
``WE``, ``WED``, ``WEDNESDAY``, ``TH``, ``THU``, ``THURSDAY``, ``FR``, ``FRI``, ``FRIDAY``,
``SA``, ``SAT``, ``SATURDAY``. ::

SELECT next_day('2015-07-23', "Mon"); -- '2015-07-27'
SELECT next_day('2015-07-23', "mo"); -- '2015-07-27'
SELECT next_day('2015-07-23', "Tue"); -- '2015-07-28'
SELECT next_day('2015-07-23', "tu"); -- '2015-07-28'
SELECT next_day('2015-07-23', "we"); -- '2015-07-29'

.. spark:function:: to_unix_timestamp(string) -> integer
Alias for ``unix_timestamp(string) -> integer``.
Expand Down
62 changes: 62 additions & 0 deletions velox/functions/sparksql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -408,4 +408,66 @@ struct DayOfYearFunction {
result = getDayOfYear(getDateTime(date));
}
};

static const folly::F14FastMap<std::string, int8_t> kDayOfWeekNames{
{"th", 0}, {"fr", 1}, {"sa", 2}, {"su", 3},
{"mo", 4}, {"tu", 5}, {"we", 6}, {"thu", 0},
{"fri", 1}, {"sat", 2}, {"sun", 3}, {"mon", 4},
{"tue", 5}, {"wed", 6}, {"thursday", 0}, {"friday", 1},
{"saturday", 2}, {"sunday", 3}, {"monday", 4}, {"tuesday", 5},
{"wednesday", 6}};

template <typename T>
struct NextDayFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void initialize(
const core::QueryConfig& config,
const arg_type<Date>* /*startDate*/,
const arg_type<Varchar>* dayOfWeek) {
weekDay_ = getDayOfWeekFromString(*dayOfWeek);
}

FOLLY_ALWAYS_INLINE void call(
out_type<Date>& result,
const arg_type<Date>& startDate,
const arg_type<Varchar>& dayOfWeek) {
VELOX_USER_CHECK(
!weekDay_.has_value(), "Illegal input for day of week: {}", dayOfWeek)
auto nextDay = getNextDate(startDate, weekDay_);
if (nextDay != (int32_t)nextDay) {
auto dateTime = getDateTime(startDate);
VELOX_USER_FAIL(
"Integer overflow in next_day({}-{}-{}, {})",
getYear(dateTime),
getMonth(dateTime),
getDay(dateTime),
dayOfWeek);
}
result = nextDay;
}

private:
static FOLLY_ALWAYS_INLINE std::optional<int8_t> getDayOfWeekFromString(
StringView dayOfWeek) {
if (dayOfWeek != nullptr) {
std::string lowerDayOfWeek =
boost::algorithm::to_lower_copy(std::string(dayOfWeek));
auto it = kDayOfWeekNames.find(lowerDayOfWeek);
if (it != kDayOfWeekNames.end()) {
return it->second;
}
}
return std::nullopt;
}

static FOLLY_ALWAYS_INLINE int64_t
getNextDate(int64_t startDay, int8_t dayOfWeek) {
return startDay + 1 + ((dayOfWeek - 1 - startDay) % 7 + 7) % 7;
}

std::optional<int8_t> weekDay_;
bool invalidFormat_{false};
};

} // namespace facebook::velox::functions::sparksql
2 changes: 2 additions & 0 deletions velox/functions/sparksql/Register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,8 @@ void registerFunctions(const std::string& prefix) {

registerFunction<MonthFunction, int32_t, Date>({prefix + "month"});

registerFunction<NextDayFunction, Date, Date, Varchar>({prefix + "next_day"});

// Register bloom filter function
registerFunction<BloomFilterMightContainFunction, bool, Varbinary, int64_t>(
{prefix + "might_contain"});
Expand Down
61 changes: 61 additions & 0 deletions velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*/

#include <functions/lib/TimeUtils.h>

#include "velox/common/base/tests/GTestUtils.h"
#include "velox/functions/sparksql/tests/SparkFunctionBaseTest.h"
#include "velox/type/tz/TimeZoneMap.h"
Expand Down Expand Up @@ -456,5 +458,64 @@ TEST_F(DateTimeFunctionsTest, quarterDate) {
EXPECT_EQ(3, quarter("1954-08-08"));
}

TEST_F(DateTimeFunctionsTest, nextDay) {
const auto nextDay = [&](const std::string& date,
const std::string& dayOfWeek) {
auto startDates =
makeNullableFlatVector<int32_t>({parseDate(date)}, DATE());
auto dayOfWeeks = makeNullableFlatVector<std::string>({dayOfWeek});

auto result = evaluateOnce<int32_t>(
fmt::format("next_day(c0, '{}')", dayOfWeek),
makeRowVector({startDates}));

auto anotherResult = evaluateOnce<int32_t>(
"next_day(c0, c1)", makeRowVector({startDates, dayOfWeeks}));

EXPECT_EQ(result, anotherResult);

auto dateTime = getDateTime(result.value());
std::ostringstream oss;
std::ostringstream ossMonth;
std::ostringstream ossDay;
auto month = getMonth(dateTime);
auto day = getDay(dateTime);

month < 10 ? ossMonth << 0 << month : ossMonth << month;
day < 10 ? ossDay << 0 << day : ossDay << day;

oss << getYear(dateTime) << "-" << ossMonth.str() << "-" << ossDay.str();
return oss.str();
};

EXPECT_EQ(nextDay("2015-07-23", "Mon"), "2015-07-27");
EXPECT_EQ(nextDay("2015-07-23", "mo"), "2015-07-27");
EXPECT_EQ(nextDay("2015-07-23", "monday"), "2015-07-27");
EXPECT_EQ(nextDay("2015-07-23", "Tue"), "2015-07-28");
EXPECT_EQ(nextDay("2015-07-23", "tu"), "2015-07-28");
EXPECT_EQ(nextDay("2015-07-23", "tuesday"), "2015-07-28");
EXPECT_EQ(nextDay("2015-07-23", "we"), "2015-07-29");
EXPECT_EQ(nextDay("2015-07-23", "wed"), "2015-07-29");
EXPECT_EQ(nextDay("2015-07-23", "wednesday"), "2015-07-29");
EXPECT_EQ(nextDay("2015-07-23", "Thu"), "2015-07-30");
EXPECT_EQ(nextDay("2015-07-23", "TH"), "2015-07-30");
EXPECT_EQ(nextDay("2015-07-23", "thursday"), "2015-07-30");
EXPECT_EQ(nextDay("2015-07-23", "Fri"), "2015-07-24");
EXPECT_EQ(nextDay("2015-07-23", "fr"), "2015-07-24");
EXPECT_EQ(nextDay("2015-07-23", "friday"), "2015-07-24");
EXPECT_EQ(nextDay("2015-07-31", "wed"), "2015-08-05");
EXPECT_EQ(nextDay("2015-07-23", "saturday"), "2015-07-25");
EXPECT_EQ(nextDay("2015-07-23", "sunday"), "2015-07-26");
EXPECT_EQ(nextDay("2015-12-31", "Fri"), "2016-01-01");

VELOX_ASSERT_THROW(
nextDay("2015-07-23", "xx"), "Illegal input for day of week: xx");
VELOX_ASSERT_THROW(
nextDay("2015-07-23", "\"quote"),
"Illegal input for day of week: \"quote");
VELOX_ASSERT_THROW(
nextDay("2015-07-23", ""), "Illegal input for day of week: ");
}

} // namespace
} // namespace facebook::velox::functions::sparksql::test

0 comments on commit e14a435

Please sign in to comment.