From 4e78a3db4aae889e2a04cc4bec77ffe8d97350e5 Mon Sep 17 00:00:00 2001 From: zclllyybb Date: Thu, 26 Oct 2023 10:46:47 +0800 Subject: [PATCH] [improvement](function) improve date_trunc function performance when timeunit is const (#25824) this PR #22602 have check function. only support date_trunc(column, const), so the second must be const literal and no need to check time unit every row. --- be/src/vec/functions/function_convert_tz.h | 51 ++++++++++-------- be/src/vec/runtime/vdatetime_value.cpp | 53 +++++++++++++++++++ be/src/vec/runtime/vdatetime_value.h | 4 ++ .../datetime_functions/test_date_function.out | 23 ++++++++ .../test_date_function.groovy | 25 +++++++++ 5 files changed, 133 insertions(+), 23 deletions(-) diff --git a/be/src/vec/functions/function_convert_tz.h b/be/src/vec/functions/function_convert_tz.h index 12b8c04913a284..9d226b70646244 100644 --- a/be/src/vec/functions/function_convert_tz.h +++ b/be/src/vec/functions/function_convert_tz.h @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -53,18 +54,7 @@ #include "vec/data_types/data_type_time_v2.h" #include "vec/functions/function.h" #include "vec/io/io_helper.h" - -namespace doris { -namespace vectorized { -class DataTypeDate; -class VecDateTimeValue; -struct DateTimeV2ValueType; -struct DateV2ValueType; -template -class DateV2Value; -} // namespace vectorized -} // namespace doris - +#include "vec/runtime/vdatetime_value.h" namespace doris::vectorized { template @@ -119,8 +109,8 @@ struct ConvertTZImpl { NullMap& result_null_map, const size_t index_now) { DateValueType ts_value = binary_cast(date_column->get_element(index_now)); - int64_t timestamp; cctz::time_zone from_tz {}, to_tz {}; + ReturnDateType ts_value2; if (!TimezoneUtils::find_cctz_time_zone(from_tz_name, from_tz)) { result_null_map[index_now] = true; @@ -134,17 +124,32 @@ struct ConvertTZImpl { return; } - if (!ts_value.unix_timestamp(×tamp, from_tz)) { - result_null_map[index_now] = true; - result_column->insert_default(); - return; - } + if constexpr (std::is_same_v>) { + std::pair timestamp; + if (!ts_value.unix_timestamp(×tamp, from_tz)) { + result_null_map[index_now] = true; + result_column->insert_default(); + return; + } - ReturnDateType ts_value2; - if (!ts_value2.from_unixtime(timestamp, to_tz)) { - result_null_map[index_now] = true; - result_column->insert_default(); - return; + if (!ts_value2.from_unixtime(timestamp, to_tz)) { + result_null_map[index_now] = true; + result_column->insert_default(); + return; + } + } else { + int64_t timestamp; + if (!ts_value.unix_timestamp(×tamp, from_tz)) { + result_null_map[index_now] = true; + result_column->insert_default(); + return; + } + + if (!ts_value2.from_unixtime(timestamp, to_tz)) { + result_null_map[index_now] = true; + result_column->insert_default(); + return; + } } result_column->insert(binary_cast(ts_value2)); diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index e04382532ffed5..fbfa28560e793d 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -3125,6 +3125,34 @@ bool DateV2Value::unix_timestamp(int64_t* timestamp, const cctz::time_zone& c } } +template +bool DateV2Value::unix_timestamp(std::pair* timestamp, + const std::string& timezone) const { + cctz::time_zone ctz; + if (!TimezoneUtils::find_cctz_time_zone(timezone, ctz)) { + return false; + } + return unix_timestamp(timestamp, ctz); +} + +template +bool DateV2Value::unix_timestamp(std::pair* timestamp, + const cctz::time_zone& ctz) const { + DCHECK(is_datetime) << "Function unix_timestamp with double_t timestamp only support " + "datetimev2 value type."; + if constexpr (is_datetime) { + const auto tp = + cctz::convert(cctz::civil_second(date_v2_value_.year_, date_v2_value_.month_, + date_v2_value_.day_, date_v2_value_.hour_, + date_v2_value_.minute_, date_v2_value_.second_), + ctz); + timestamp->first = tp.time_since_epoch().count(); + timestamp->second = date_v2_value_.microsecond_; + } else { + } + return true; +} + template bool DateV2Value::from_unixtime(int64_t timestamp, const std::string& timezone) { cctz::time_zone ctz; @@ -3147,6 +3175,31 @@ bool DateV2Value::from_unixtime(int64_t timestamp, const cctz::time_zone& ctz return true; } +template +bool DateV2Value::from_unixtime(std::pair timestamp, + const std::string& timezone) { + cctz::time_zone ctz; + if (!TimezoneUtils::find_cctz_time_zone(timezone, ctz)) { + return false; + } + return from_unixtime(timestamp, ctz); +} + +template +bool DateV2Value::from_unixtime(std::pair timestamp, + const cctz::time_zone& ctz) { + static const cctz::time_point epoch = + std::chrono::time_point_cast( + std::chrono::system_clock::from_time_t(0)); + cctz::time_point t = epoch + cctz::seconds(timestamp.first); + + const auto tp = cctz::convert(t, ctz); + + set_time(tp.year(), tp.month(), tp.day(), tp.hour(), tp.minute(), tp.second(), + timestamp.second); + return true; +} + template bool DateV2Value::from_unixtime(int64_t timestamp, int32_t nano_seconds, const std::string& timezone, const int scale) { diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index c1d2c5bbadd432..4a5387e5a70467 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -960,11 +960,15 @@ class DateV2Value { //it returns seconds of the value of date literal since '1970-01-01 00:00:00' UTC bool unix_timestamp(int64_t* timestamp, const std::string& timezone) const; bool unix_timestamp(int64_t* timestamp, const cctz::time_zone& ctz) const; + bool unix_timestamp(std::pair* timestamp, const std::string& timezone) const; + bool unix_timestamp(std::pair* timestamp, const cctz::time_zone& ctz) const; //construct datetime_value from timestamp and timezone //timestamp is an internal timestamp value representing seconds since '1970-01-01 00:00:00' UTC bool from_unixtime(int64_t, const std::string& timezone); bool from_unixtime(int64_t, const cctz::time_zone& ctz); + bool from_unixtime(std::pair, const std::string& timezone); + bool from_unixtime(std::pair, const cctz::time_zone& ctz); bool from_unixtime(int64_t, int32_t, const std::string& timezone, const int scale); bool from_unixtime(int64_t, int32_t, const cctz::time_zone& ctz, int scale); diff --git a/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_date_function.out b/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_date_function.out index 34c466cb71bdbb..ad3b6bd5893eeb 100644 --- a/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_date_function.out +++ b/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_date_function.out @@ -23,6 +23,29 @@ -- !sql -- \N +-- !sql -- +2019-08-01T13:21:03.000123 2019-07-31T22:21:03.000123 +2019-08-01T13:21:03.123 2019-07-31T22:21:03.123 + +-- !sql -- +2019-08-01T13:21:03.000123 2019-07-31T22:21:03.000123 +2019-08-01T13:21:03.123 2019-07-31T22:21:03.123 + +-- !sql -- +2019-08-01T13:21:03.000123 2019-08-01T06:21:03.000123 +2019-08-01T13:21:03.123 2019-08-01T06:21:03.123 + +-- !sql -- +2019-08-01T13:21:03.000123 2019-08-01T06:21:03.000123 +2019-08-01T13:21:03.123 2019-08-01T06:21:03.123 + +-- !sql -- +2019-08-01T13:21:03.000123 \N +2019-08-01T13:21:03.123 \N + +-- !sql -- +2019-08-01T08:01:02.123 + -- !sql1 -- 1 2019-08-01T13:21:03 Asia/Shanghai Asia/Shanghai 2019-08-01T13:21:03 2 2019-08-01T13:21:03 Asia/Singapore Asia/Shanghai 2019-08-01T13:21:03 diff --git a/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy b/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy index 13add305b65cba..f95eea2b285de6 100644 --- a/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy +++ b/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy @@ -53,6 +53,31 @@ suite("test_date_function") { sql """ truncate table ${tableName} """ + // test convert_tz for datetimev2 + def tableScale6 = "dtv2s6" + sql """ DROP TABLE IF EXISTS ${tableScale6} """ + sql """ + CREATE TABLE IF NOT EXISTS ${tableScale6} ( + k1 datetimev2(6) NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(k1) + COMMENT "OLAP" + DISTRIBUTED BY HASH(k1) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ) + """ + sql """ insert into ${tableScale6} values ("2019-08-01 13:21:03.000123"),("2019-08-01 13:21:03.123") """ + // convert_tz + qt_sql """ SELECT k1, convert_tz(k1, 'Asia/Shanghai', 'America/Los_Angeles') result from ${tableScale6} order by k1 """ + qt_sql """ SELECT k1, convert_tz(k1, '+08:00', 'America/Los_Angeles') result from ${tableScale6} order by k1 """ + qt_sql """ SELECT k1, convert_tz(k1, 'Asia/Shanghai', 'Europe/London') result from ${tableScale6} order by k1 """ + qt_sql """ SELECT k1, convert_tz(k1, '+08:00', 'Europe/London') result from ${tableScale6} order by k1 """ + qt_sql """ SELECT k1, convert_tz(k1, '+08:00', 'America/London') result from ${tableScale6} order by k1 """ + qt_sql """ SELECT convert_tz('2019-08-01 01:01:02.123' , '+00:00', '+07:00') """ + def timezoneCachedTableName = "test_convert_tz_with_timezone_cache" sql """ DROP TABLE IF EXISTS ${timezoneCachedTableName} """ sql """