Skip to content

Commit

Permalink
[Opt](exec) opt the performance of date parquet convert by date dict
Browse files Browse the repository at this point in the history
  • Loading branch information
HappenLee committed Jul 30, 2023
1 parent 2b9a95f commit 227815a
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 6 deletions.
1 change: 1 addition & 0 deletions be/src/service/doris_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,7 @@ int main(int argc, char** argv) {
auto exec_env = doris::ExecEnv::GetInstance();
doris::ExecEnv::init(exec_env, paths);
doris::TabletSchemaCache::create_global_schema_cache();
doris::vectorized::init_date_day_offset_dict();

// init s3 write buffer pool
doris::io::S3FileBufferPool* s3_buffer_pool = doris::io::S3FileBufferPool::GetInstance();
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/exec/format/parquet/decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ void Decoder::init(FieldSchema* field_schema, cctz::time_zone* ctz) {
if (_decode_params->ctz) {
VecDateTimeValue t;
t.from_unixtime(0, *_decode_params->ctz);
_decode_params->offset_days = doris::calc_daynr(t.year(), t.month(), t.day());
_decode_params->offset_days = t.day() - 1;
}
}
} // namespace doris::vectorized
10 changes: 7 additions & 3 deletions be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,19 +216,23 @@ class FixLengthDictDecoder final : public BaseDictDecoder {
size_t data_index = column_data.size();
column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered());
size_t dict_index = 0;

auto* __restrict date_day_offset_dict = get_date_day_offset_dict();
ColumnSelectVector::DataReadType read_type;
while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) {
switch (read_type) {
case ColumnSelectVector::CONTENT: {
for (size_t i = 0; i < run_length; ++i) {
int64_t date_value =
_dict_items[_indexes[dict_index++]] + _decode_params->offset_days;
auto& v = reinterpret_cast<CppType&>(column_data[data_index++]);
v.get_date_from_daynr(date_value);
DCHECK_LT(date_value, 25500);
if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
auto& v = reinterpret_cast<CppType&>(column_data[data_index++]);
v.create_from_date_v2(date_day_offset_dict[date_value], TIME_DATE);
// we should cast to date if using date v1.
v.cast_to_date();
} else {
reinterpret_cast<CppType&>(column_data[data_index++]) =
date_day_offset_dict[date_value];
}
}
break;
Expand Down
9 changes: 7 additions & 2 deletions be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ Status FixLengthPlainDecoder::_decode_date(MutableColumnPtr& doris_column,
size_t data_index = column_data.size();
column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered());
ColumnSelectVector::DataReadType read_type;
auto* __restrict date_day_offset_dict = get_date_day_offset_dict();

while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) {
switch (read_type) {
Expand All @@ -256,11 +257,15 @@ Status FixLengthPlainDecoder::_decode_date(MutableColumnPtr& doris_column,
char* buf_start = _data->data + _offset;
int64_t date_value = static_cast<int64_t>(*reinterpret_cast<int32_t*>(buf_start)) +
_decode_params->offset_days;
auto& v = reinterpret_cast<CppType&>(column_data[data_index++]);
v.get_date_from_daynr(date_value);
DCHECK_LT(date_value, 25500);
if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
auto& v = reinterpret_cast<CppType&>(column_data[data_index++]);
v.create_from_date_v2(date_day_offset_dict[date_value], TIME_DATE);
// we should cast to date if using date v1.
v.cast_to_date();
} else {
reinterpret_cast<CppType&>(column_data[data_index++]) =
date_day_offset_dict[date_value];
}
_offset += _type_length;
}
Expand Down
15 changes: 15 additions & 0 deletions be/src/vec/runtime/vdatetime_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2482,6 +2482,21 @@ typename DateV2Value<T>::underlying_value DateV2Value<T>::to_date_int_val() cons
return int_val_;
}

static std::array<DateV2Value<DateV2ValueType>, 25500> DATE_DAY_OFFSET_DICT;

void init_date_day_offset_dict() {
DateV2Value<DateV2ValueType> d;
d.set_time(1970, 1, 1, 0, 0, 0, 0);
for (int i = 0; i < DATE_DAY_OFFSET_DICT.size(); ++i) {
DATE_DAY_OFFSET_DICT[i] = d;
d += 1;
}
}

DateV2Value<DateV2ValueType>* get_date_day_offset_dict() {
return DATE_DAY_OFFSET_DICT.data();
}

template <typename T>
uint32_t DateV2Value<T>::set_date_uint32(uint32_t int_val) {
union DateV2UInt32Union {
Expand Down
3 changes: 3 additions & 0 deletions be/src/vec/runtime/vdatetime_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -1470,6 +1470,9 @@ class DataTypeDateTime;
class DataTypeDateV2;
class DataTypeDateTimeV2;

[[maybe_unused]] void init_date_day_offset_dict();
[[maybe_unused]] DateV2Value<DateV2ValueType>* get_date_day_offset_dict();

template <typename T>
struct DateTraits {};

Expand Down

0 comments on commit 227815a

Please sign in to comment.