Skip to content

Commit

Permalink
[improvement](function) improve date_trunc function performance when …
Browse files Browse the repository at this point in the history
…timeunit is const (apache#25824)

this PR apache#22602 have check function.
only support date_trunc(column, const), so the second must be const literal
and no need to check time unit every row.
  • Loading branch information
zclllyybb authored and xiaokang committed Nov 1, 2023
1 parent 29515a6 commit 4e78a3d
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 23 deletions.
51 changes: 28 additions & 23 deletions be/src/vec/functions/function_convert_tz.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <stddef.h>
#include <stdint.h>

#include <cstdint>
#include <map>
#include <memory>
#include <string>
Expand Down Expand Up @@ -53,18 +54,7 @@
#include "vec/data_types/data_type_time_v2.h"
#include "vec/functions/function.h"
#include "vec/io/io_helper.h"

namespace doris {
namespace vectorized {
class DataTypeDate;
class VecDateTimeValue;
struct DateTimeV2ValueType;
struct DateV2ValueType;
template <typename T>
class DateV2Value;
} // namespace vectorized
} // namespace doris

#include "vec/runtime/vdatetime_value.h"
namespace doris::vectorized {

template <typename DateValueType, typename ArgType>
Expand Down Expand Up @@ -119,8 +109,8 @@ struct ConvertTZImpl {
NullMap& result_null_map, const size_t index_now) {
DateValueType ts_value =
binary_cast<NativeType, DateValueType>(date_column->get_element(index_now));
int64_t timestamp;
cctz::time_zone from_tz {}, to_tz {};
ReturnDateType ts_value2;

if (!TimezoneUtils::find_cctz_time_zone(from_tz_name, from_tz)) {
result_null_map[index_now] = true;
Expand All @@ -134,17 +124,32 @@ struct ConvertTZImpl {
return;
}

if (!ts_value.unix_timestamp(&timestamp, from_tz)) {
result_null_map[index_now] = true;
result_column->insert_default();
return;
}
if constexpr (std::is_same_v<DateValueType, DateV2Value<DateTimeV2ValueType>>) {
std::pair<int64_t, int64_t> timestamp;
if (!ts_value.unix_timestamp(&timestamp, from_tz)) {
result_null_map[index_now] = true;
result_column->insert_default();
return;
}

ReturnDateType ts_value2;
if (!ts_value2.from_unixtime(timestamp, to_tz)) {
result_null_map[index_now] = true;
result_column->insert_default();
return;
if (!ts_value2.from_unixtime(timestamp, to_tz)) {
result_null_map[index_now] = true;
result_column->insert_default();
return;
}
} else {
int64_t timestamp;
if (!ts_value.unix_timestamp(&timestamp, from_tz)) {
result_null_map[index_now] = true;
result_column->insert_default();
return;
}

if (!ts_value2.from_unixtime(timestamp, to_tz)) {
result_null_map[index_now] = true;
result_column->insert_default();
return;
}
}

result_column->insert(binary_cast<ReturnDateType, ReturnNativeType>(ts_value2));
Expand Down
53 changes: 53 additions & 0 deletions be/src/vec/runtime/vdatetime_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3125,6 +3125,34 @@ bool DateV2Value<T>::unix_timestamp(int64_t* timestamp, const cctz::time_zone& c
}
}

template <typename T>
bool DateV2Value<T>::unix_timestamp(std::pair<int64_t, int64_t>* timestamp,
const std::string& timezone) const {
cctz::time_zone ctz;
if (!TimezoneUtils::find_cctz_time_zone(timezone, ctz)) {
return false;
}
return unix_timestamp(timestamp, ctz);
}

template <typename T>
bool DateV2Value<T>::unix_timestamp(std::pair<int64_t, int64_t>* timestamp,
const cctz::time_zone& ctz) const {
DCHECK(is_datetime) << "Function unix_timestamp with double_t timestamp only support "
"datetimev2 value type.";
if constexpr (is_datetime) {
const auto tp =
cctz::convert(cctz::civil_second(date_v2_value_.year_, date_v2_value_.month_,
date_v2_value_.day_, date_v2_value_.hour_,
date_v2_value_.minute_, date_v2_value_.second_),
ctz);
timestamp->first = tp.time_since_epoch().count();
timestamp->second = date_v2_value_.microsecond_;
} else {
}
return true;
}

template <typename T>
bool DateV2Value<T>::from_unixtime(int64_t timestamp, const std::string& timezone) {
cctz::time_zone ctz;
Expand All @@ -3147,6 +3175,31 @@ bool DateV2Value<T>::from_unixtime(int64_t timestamp, const cctz::time_zone& ctz
return true;
}

template <typename T>
bool DateV2Value<T>::from_unixtime(std::pair<int64_t, int64_t> timestamp,
const std::string& timezone) {
cctz::time_zone ctz;
if (!TimezoneUtils::find_cctz_time_zone(timezone, ctz)) {
return false;
}
return from_unixtime(timestamp, ctz);
}

template <typename T>
bool DateV2Value<T>::from_unixtime(std::pair<int64_t, int64_t> timestamp,
const cctz::time_zone& ctz) {
static const cctz::time_point<cctz::sys_seconds> epoch =
std::chrono::time_point_cast<cctz::sys_seconds>(
std::chrono::system_clock::from_time_t(0));
cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(timestamp.first);

const auto tp = cctz::convert(t, ctz);

set_time(tp.year(), tp.month(), tp.day(), tp.hour(), tp.minute(), tp.second(),
timestamp.second);
return true;
}

template <typename T>
bool DateV2Value<T>::from_unixtime(int64_t timestamp, int32_t nano_seconds,
const std::string& timezone, const int scale) {
Expand Down
4 changes: 4 additions & 0 deletions be/src/vec/runtime/vdatetime_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -960,11 +960,15 @@ class DateV2Value {
//it returns seconds of the value of date literal since '1970-01-01 00:00:00' UTC
bool unix_timestamp(int64_t* timestamp, const std::string& timezone) const;
bool unix_timestamp(int64_t* timestamp, const cctz::time_zone& ctz) const;
bool unix_timestamp(std::pair<int64_t, int64_t>* timestamp, const std::string& timezone) const;
bool unix_timestamp(std::pair<int64_t, int64_t>* timestamp, const cctz::time_zone& ctz) const;

//construct datetime_value from timestamp and timezone
//timestamp is an internal timestamp value representing seconds since '1970-01-01 00:00:00' UTC
bool from_unixtime(int64_t, const std::string& timezone);
bool from_unixtime(int64_t, const cctz::time_zone& ctz);
bool from_unixtime(std::pair<int64_t, int64_t>, const std::string& timezone);
bool from_unixtime(std::pair<int64_t, int64_t>, const cctz::time_zone& ctz);

bool from_unixtime(int64_t, int32_t, const std::string& timezone, const int scale);
bool from_unixtime(int64_t, int32_t, const cctz::time_zone& ctz, int scale);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,29 @@
-- !sql --
\N

-- !sql --
2019-08-01T13:21:03.000123 2019-07-31T22:21:03.000123
2019-08-01T13:21:03.123 2019-07-31T22:21:03.123

-- !sql --
2019-08-01T13:21:03.000123 2019-07-31T22:21:03.000123
2019-08-01T13:21:03.123 2019-07-31T22:21:03.123

-- !sql --
2019-08-01T13:21:03.000123 2019-08-01T06:21:03.000123
2019-08-01T13:21:03.123 2019-08-01T06:21:03.123

-- !sql --
2019-08-01T13:21:03.000123 2019-08-01T06:21:03.000123
2019-08-01T13:21:03.123 2019-08-01T06:21:03.123

-- !sql --
2019-08-01T13:21:03.000123 \N
2019-08-01T13:21:03.123 \N

-- !sql --
2019-08-01T08:01:02.123

-- !sql1 --
1 2019-08-01T13:21:03 Asia/Shanghai Asia/Shanghai 2019-08-01T13:21:03
2 2019-08-01T13:21:03 Asia/Singapore Asia/Shanghai 2019-08-01T13:21:03
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,31 @@ suite("test_date_function") {

sql """ truncate table ${tableName} """

// test convert_tz for datetimev2
def tableScale6 = "dtv2s6"
sql """ DROP TABLE IF EXISTS ${tableScale6} """
sql """
CREATE TABLE IF NOT EXISTS ${tableScale6} (
k1 datetimev2(6) NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(k1)
COMMENT "OLAP"
DISTRIBUTED BY HASH(k1) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """ insert into ${tableScale6} values ("2019-08-01 13:21:03.000123"),("2019-08-01 13:21:03.123") """
// convert_tz
qt_sql """ SELECT k1, convert_tz(k1, 'Asia/Shanghai', 'America/Los_Angeles') result from ${tableScale6} order by k1 """
qt_sql """ SELECT k1, convert_tz(k1, '+08:00', 'America/Los_Angeles') result from ${tableScale6} order by k1 """
qt_sql """ SELECT k1, convert_tz(k1, 'Asia/Shanghai', 'Europe/London') result from ${tableScale6} order by k1 """
qt_sql """ SELECT k1, convert_tz(k1, '+08:00', 'Europe/London') result from ${tableScale6} order by k1 """
qt_sql """ SELECT k1, convert_tz(k1, '+08:00', 'America/London') result from ${tableScale6} order by k1 """
qt_sql """ SELECT convert_tz('2019-08-01 01:01:02.123' , '+00:00', '+07:00') """

def timezoneCachedTableName = "test_convert_tz_with_timezone_cache"
sql """ DROP TABLE IF EXISTS ${timezoneCachedTableName} """
sql """
Expand Down

0 comments on commit 4e78a3d

Please sign in to comment.