From 227815af88aa76b1c2c9750ec5784a42add159ad Mon Sep 17 00:00:00 2001 From: HappenLee Date: Sun, 30 Jul 2023 23:26:50 +0800 Subject: [PATCH] [Opt](exec) opt the performance of date parquet convert by date dict --- be/src/service/doris_main.cpp | 1 + be/src/vec/exec/format/parquet/decoder.cpp | 2 +- .../format/parquet/fix_length_dict_decoder.hpp | 10 +++++++--- .../format/parquet/fix_length_plain_decoder.cpp | 9 +++++++-- be/src/vec/runtime/vdatetime_value.cpp | 15 +++++++++++++++ be/src/vec/runtime/vdatetime_value.h | 3 +++ 6 files changed, 34 insertions(+), 6 deletions(-) diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp index 57dec228208f56b..6271b3456431b82 100644 --- a/be/src/service/doris_main.cpp +++ b/be/src/service/doris_main.cpp @@ -433,6 +433,7 @@ int main(int argc, char** argv) { auto exec_env = doris::ExecEnv::GetInstance(); doris::ExecEnv::init(exec_env, paths); doris::TabletSchemaCache::create_global_schema_cache(); + doris::vectorized::init_date_day_offset_dict(); // init s3 write buffer pool doris::io::S3FileBufferPool* s3_buffer_pool = doris::io::S3FileBufferPool::GetInstance(); diff --git a/be/src/vec/exec/format/parquet/decoder.cpp b/be/src/vec/exec/format/parquet/decoder.cpp index 539fc04a10a8e4d..ff7753538752d8f 100644 --- a/be/src/vec/exec/format/parquet/decoder.cpp +++ b/be/src/vec/exec/format/parquet/decoder.cpp @@ -181,7 +181,7 @@ void Decoder::init(FieldSchema* field_schema, cctz::time_zone* ctz) { if (_decode_params->ctz) { VecDateTimeValue t; t.from_unixtime(0, *_decode_params->ctz); - _decode_params->offset_days = doris::calc_daynr(t.year(), t.month(), t.day()); + _decode_params->offset_days = t.day() - 1; } } } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp index 817b5e7f968e016..bb95fb426f14408 100644 --- a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp +++ b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp @@ -216,7 +216,7 @@ class FixLengthDictDecoder final : public BaseDictDecoder { size_t data_index = column_data.size(); column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered()); size_t dict_index = 0; - + auto* __restrict date_day_offset_dict = get_date_day_offset_dict(); ColumnSelectVector::DataReadType read_type; while (size_t run_length = select_vector.get_next_run(&read_type)) { switch (read_type) { @@ -224,11 +224,15 @@ class FixLengthDictDecoder final : public BaseDictDecoder { for (size_t i = 0; i < run_length; ++i) { int64_t date_value = _dict_items[_indexes[dict_index++]] + _decode_params->offset_days; - auto& v = reinterpret_cast(column_data[data_index++]); - v.get_date_from_daynr(date_value); + DCHECK_LT(date_value, 25500); if constexpr (std::is_same_v) { + auto& v = reinterpret_cast(column_data[data_index++]); + v.create_from_date_v2(date_day_offset_dict[date_value], TIME_DATE); // we should cast to date if using date v1. v.cast_to_date(); + } else { + reinterpret_cast(column_data[data_index++]) = + date_day_offset_dict[date_value]; } } break; diff --git a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp index 940e70db79581e5..d7b7d9591ab033f 100644 --- a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp +++ b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp @@ -248,6 +248,7 @@ Status FixLengthPlainDecoder::_decode_date(MutableColumnPtr& doris_column, size_t data_index = column_data.size(); column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered()); ColumnSelectVector::DataReadType read_type; + auto* __restrict date_day_offset_dict = get_date_day_offset_dict(); while (size_t run_length = select_vector.get_next_run(&read_type)) { switch (read_type) { @@ -256,11 +257,15 @@ Status FixLengthPlainDecoder::_decode_date(MutableColumnPtr& doris_column, char* buf_start = _data->data + _offset; int64_t date_value = static_cast(*reinterpret_cast(buf_start)) + _decode_params->offset_days; - auto& v = reinterpret_cast(column_data[data_index++]); - v.get_date_from_daynr(date_value); + DCHECK_LT(date_value, 25500); if constexpr (std::is_same_v) { + auto& v = reinterpret_cast(column_data[data_index++]); + v.create_from_date_v2(date_day_offset_dict[date_value], TIME_DATE); // we should cast to date if using date v1. v.cast_to_date(); + } else { + reinterpret_cast(column_data[data_index++]) = + date_day_offset_dict[date_value]; } _offset += _type_length; } diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index 8aaa51a3766d959..2154a088106d707 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -2482,6 +2482,21 @@ typename DateV2Value::underlying_value DateV2Value::to_date_int_val() cons return int_val_; } +static std::array, 25500> DATE_DAY_OFFSET_DICT; + +void init_date_day_offset_dict() { + DateV2Value d; + d.set_time(1970, 1, 1, 0, 0, 0, 0); + for (int i = 0; i < DATE_DAY_OFFSET_DICT.size(); ++i) { + DATE_DAY_OFFSET_DICT[i] = d; + d += 1; + } +} + +DateV2Value* get_date_day_offset_dict() { + return DATE_DAY_OFFSET_DICT.data(); +} + template uint32_t DateV2Value::set_date_uint32(uint32_t int_val) { union DateV2UInt32Union { diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index 0abc314842ee4a7..a0b29ff5b3b91dc 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -1470,6 +1470,9 @@ class DataTypeDateTime; class DataTypeDateV2; class DataTypeDateTimeV2; +[[maybe_unused]] void init_date_day_offset_dict(); +[[maybe_unused]] DateV2Value* get_date_day_offset_dict(); + template struct DateTraits {};