Skip to content

Commit

Permalink
[Opt](exec) opt the performance of date parquet and orc convert to date
Browse files Browse the repository at this point in the history
  • Loading branch information
HappenLee committed Jul 29, 2023
1 parent 18cf982 commit 2f5be21
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 8 deletions.
7 changes: 7 additions & 0 deletions be/src/vec/exec/format/parquet/decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,5 +177,12 @@ void Decoder::init(FieldSchema* field_schema, cctz::time_zone* ctz) {
_decode_params->scale_to_nano_factor = 1000;
}
}

if (_decode_params->ctz) {
VecDateTimeValue t;
t.from_unixtime(0, *_decode_params->ctz);
_decode_params->offset_days =
doris::calc_daynr(t.year(), t.month(), t.day());
}
}
} // namespace doris::vectorized
1 change: 1 addition & 0 deletions be/src/vec/exec/format/parquet/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ struct DecodeParams {
static const cctz::time_zone utc0;
// schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set the time zone
cctz::time_zone* ctz = nullptr;
size_t offset_days = 0;
int64_t second_mask = 1;
int64_t scale_to_nano_factor = 1;
DecimalScaleParams decimal_scale;
Expand Down
6 changes: 2 additions & 4 deletions be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,16 +216,14 @@ class FixLengthDictDecoder final : public BaseDictDecoder {
size_t data_index = column_data.size();
column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered());
size_t dict_index = 0;
CppType t;
t.from_unixtime(0, *_decode_params->ctz);
auto days = doris::calc_daynr(t.year(), t.month(), t.day());

ColumnSelectVector::DataReadType read_type;
while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) {
switch (read_type) {
case ColumnSelectVector::CONTENT: {
for (size_t i = 0; i < run_length; ++i) {
int64_t date_value = _dict_items[_indexes[dict_index++]] + days;
int64_t date_value =
_dict_items[_indexes[dict_index++]] + _decode_params->offset_days;
auto& v = reinterpret_cast<CppType&>(column_data[data_index++]);
v.get_date_from_daynr(date_value);
if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
Expand Down
6 changes: 2 additions & 4 deletions be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,16 +248,14 @@ Status FixLengthPlainDecoder::_decode_date(MutableColumnPtr& doris_column,
size_t data_index = column_data.size();
column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered());
ColumnSelectVector::DataReadType read_type;
CppType t;
t.from_unixtime(0, *_decode_params->ctz);
auto days = doris::calc_daynr(t.year(), t.month(), t.day());

while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) {
switch (read_type) {
case ColumnSelectVector::CONTENT: {
for (size_t i = 0; i < run_length; ++i) {
char* buf_start = _data->data + _offset;
int64_t date_value = static_cast<int64_t>(*reinterpret_cast<int32_t*>(buf_start)) + days;
int64_t date_value =
static_cast<int64_t>(*reinterpret_cast<int32_t*>(buf_start)) + _decode_params->offset_days;
auto& v = reinterpret_cast<CppType&>(column_data[data_index++]);
v.get_date_from_daynr(date_value);
if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/runtime/vdatetime_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,7 @@ class VecDateTimeValue { // Now this type is a temp solution with little changes
}

bool get_date_from_daynr(uint64_t);

private:
// Used to make sure sizeof VecDateTimeValue
friend class UnusedClass;
Expand Down

0 comments on commit 2f5be21

Please sign in to comment.