Skip to content

Commit

Permalink
[Opt](exec) opt the performance of date parquet and orc convert to date
Browse files Browse the repository at this point in the history
  • Loading branch information
HappenLee committed Jul 28, 2023
1 parent 0cc3232 commit 9397e4f
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 8 deletions.
9 changes: 6 additions & 3 deletions be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,15 +216,18 @@ class FixLengthDictDecoder final : public BaseDictDecoder {
size_t data_index = column_data.size();
column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered());
size_t dict_index = 0;
CppType t;
t.from_unixtime(0, *_decode_params->ctz);
auto days = doris::calc_daynr(t.year(), t.month(), t.day());

ColumnSelectVector::DataReadType read_type;
while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) {
switch (read_type) {
case ColumnSelectVector::CONTENT: {
for (size_t i = 0; i < run_length; ++i) {
int64_t date_value = _dict_items[_indexes[dict_index++]];
int64_t date_value = _dict_items[_indexes[dict_index++]] + days;
auto& v = reinterpret_cast<CppType&>(column_data[data_index++]);
v.from_unixtime(date_value * 24 * 60 * 60,
*_decode_params->ctz); // day to seconds
v.get_date_from_daynr(date_value);
if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
// we should cast to date if using date v1.
v.cast_to_date();
Expand Down
8 changes: 6 additions & 2 deletions be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,14 +248,18 @@ Status FixLengthPlainDecoder::_decode_date(MutableColumnPtr& doris_column,
size_t data_index = column_data.size();
column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered());
ColumnSelectVector::DataReadType read_type;
CppType t;
t.from_unixtime(0, *_decode_params->ctz);
auto days = doris::calc_daynr(t.year(), t.month(), t.day());

while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) {
switch (read_type) {
case ColumnSelectVector::CONTENT: {
for (size_t i = 0; i < run_length; ++i) {
char* buf_start = _data->data + _offset;
int64_t date_value = static_cast<int64_t>(*reinterpret_cast<int32_t*>(buf_start));
int64_t date_value = static_cast<int64_t>(*reinterpret_cast<int32_t*>(buf_start)) + days;
auto& v = reinterpret_cast<CppType&>(column_data[data_index++]);
v.from_unixtime(date_value * 24 * 60 * 60, *_decode_params->ctz); // day to seconds
v.get_date_from_daynr(date_value);
if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
// we should cast to date if using date v1.
v.cast_to_date();
Expand Down
4 changes: 1 addition & 3 deletions be/src/vec/runtime/vdatetime_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,7 @@ class VecDateTimeValue { // Now this type is a temp solution with little changes
_type = TIME_DATETIME;
}

bool get_date_from_daynr(uint64_t);
private:
// Used to make sure sizeof VecDateTimeValue
friend class UnusedClass;
Expand Down Expand Up @@ -685,9 +686,6 @@ class VecDateTimeValue { // Now this type is a temp solution with little changes
static uint8_t calc_week(const VecDateTimeValue& value, uint8_t mode, uint32_t* year,
bool disable_lut = false);

// This is private function which modify date but modify `_type`
bool get_date_from_daynr(uint64_t);

// Helper to set max, min, zero
void set_zero(int type);
void set_max_time(bool neg);
Expand Down

0 comments on commit 9397e4f

Please sign in to comment.