From e739b11a396b153e9d2e1934805033946d4e8eda Mon Sep 17 00:00:00 2001 From: HappenLee Date: Sat, 15 Jul 2023 18:19:57 +0800 Subject: [PATCH] test code --- be/src/vec/columns/column_vector.cpp | 14 +++---- be/src/vec/common/hash_table/ph_hash_map.h | 40 +++++++++++++++++++ .../exec/join/process_hash_table_probe_impl.h | 26 +++++++----- be/src/vec/exec/join/vhash_join_node.h | 3 +- 4 files changed, 66 insertions(+), 17 deletions(-) diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index 388b436bc5dc40..49ee2818f40c35 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -547,16 +547,16 @@ ColumnPtr ColumnVector::replicate(const IColumn::Offsets& offsets) const { template void ColumnVector::replicate(const uint32_t* counts, size_t target_size, IColumn& column, size_t begin, int count_sz) const { - size_t size = count_sz < 0 ? data.size() : count_sz; - if (size == 0) return; - auto& res = reinterpret_cast&>(column); typename Self::Container& res_data = res.get_data(); - res_data.reserve(target_size); + res_data.resize(target_size); - size_t end = begin + size; - for (size_t i = begin; i < end; ++i) { - res_data.add_num_element_without_reserve(data[i], counts[i]); + auto* __restrict left = res_data.data(); + auto* __restrict right = data.data(); + auto* __restrict index = counts; + + for (size_t i = 0; i < target_size; ++i) { + left[i] = right[index[i]]; } } diff --git a/be/src/vec/common/hash_table/ph_hash_map.h b/be/src/vec/common/hash_table/ph_hash_map.h index da51f31cf93fee..cf59408bbe7a94 100644 --- a/be/src/vec/common/hash_table/ph_hash_map.h +++ b/be/src/vec/common/hash_table/ph_hash_map.h @@ -206,10 +206,50 @@ class PHHashMap : private boost::noncopyable { size_t hash(const Key& x) const { return _hash_map.hash(x); } + template void ALWAYS_INLINE prefetch_by_hash(size_t hash_value) { _hash_map.prefetch_hash(hash_value); } + template void ALWAYS_INLINE prefetch_by_key(Key key) { _hash_map.prefetch(key); } + template + void ALWAYS_INLINE prefetch(KeyHolder& key_holder) { + const auto& key = key_holder_get_key(key_holder); + prefetch_by_key(key); + } + + size_t get_size() { + return _hash_map.size(); + } + + std::vector sizes() const { + std::vector sizes {_hash_map.size()}; + return sizes; + } + + int64_t get_resize_timer_value() const { + return 0; + } + + int64_t get_convert_timer_value() const { + return 0; + } + + int64_t get_collisions() const { + return 0; + } + + void reset_resize_timer() { + } + + void expanse_for_add_elem(size_t num_elem) { + _hash_map.reserve(num_elem); + } + + std::vector get_buffer_sizes_in_cells() const { + return sizes(); + } + /// Call func(const Key &, Mapped &) for each hash map element. template void for_each_value(Func&& func) { diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h b/be/src/vec/exec/join/process_hash_table_probe_impl.h index 14181b160aaad9..8acd9d375e7f1a 100644 --- a/be/src/vec/exec/join/process_hash_table_probe_impl.h +++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h @@ -215,11 +215,12 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash_table_c bool is_mark_join) { auto& probe_index = _join_node->_probe_index; auto& probe_raw_ptrs = _join_node->_probe_columns; - if (probe_index == 0 && _items_counts.size() < probe_rows) { - _items_counts.resize(probe_rows); - } +// if (probe_index == 0 && _items_counts.size() < probe_rows) { +// _items_counts.resize(probe_rows); +// } if (_build_block_rows.size() < probe_rows * PROBE_SIDE_EXPLODE_RATE) { + _items_counts.resize(probe_rows * PROBE_SIDE_EXPLODE_RATE); _build_block_rows.resize(probe_rows * PROBE_SIDE_EXPLODE_RATE); _build_block_offsets.resize(probe_rows * PROBE_SIDE_EXPLODE_RATE); } @@ -263,13 +264,14 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash_table_c if (LIKELY(current_offset < _build_block_rows.size())) { _build_block_offsets[current_offset] = probe_row_match_iter->block_offset; _build_block_rows[current_offset] = probe_row_match_iter->row_num; + _items_counts[current_offset] = probe_index; } else { _build_block_offsets.emplace_back(probe_row_match_iter->block_offset); _build_block_rows.emplace_back(probe_row_match_iter->row_num); + _items_counts.template emplace_back(probe_index); } ++current_offset; } - _items_counts[probe_index] = current_offset; all_match_one &= (current_offset == 1); if (!probe_row_match_iter.ok()) { ++probe_index; @@ -283,19 +285,19 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash_table_c if constexpr (ignore_null && need_null_map_for_probe) { if ((*null_map)[probe_index]) { if constexpr (probe_all) { - _items_counts[probe_index++] = (uint32_t)1; // only full outer / left outer need insert the data of right table if (LIKELY(current_offset < _build_block_rows.size())) { _build_block_offsets[current_offset] = -1; _build_block_rows[current_offset] = -1; + _items_counts[current_offset] = probe_index; } else { _build_block_offsets.emplace_back(-1); _build_block_rows.emplace_back(-1); + _items_counts.template emplace_back(probe_index); } ++current_offset; - } else { - _items_counts[probe_index++] = (uint32_t)0; } + probe_index++; all_match_one = false; if constexpr (probe_all) { if (current_offset >= _batch_size) { @@ -320,7 +322,7 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash_table_c probe_index + PREFETCH_STEP, *_arena); } - auto current_probe_index = probe_index; +// auto current_probe_index = probe_index; if constexpr (JoinOpType == TJoinOp::LEFT_ANTI_JOIN || JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { if (is_mark_join) { @@ -361,9 +363,11 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash_table_c if (LIKELY(current_offset < _build_block_rows.size())) { _build_block_offsets[current_offset] = mapped.block_offset; _build_block_rows[current_offset] = mapped.row_num; + _items_counts[current_offset] = probe_index; } else { _build_block_offsets.emplace_back(mapped.block_offset); _build_block_rows.emplace_back(mapped.row_num); + _items_counts.template emplace_back(probe_index); } ++current_offset; } @@ -375,9 +379,11 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash_table_c if (LIKELY(current_offset < _build_block_rows.size())) { _build_block_offsets[current_offset] = it->block_offset; _build_block_rows[current_offset] = it->row_num; + _items_counts[current_offset] = probe_index; } else { _build_block_offsets.emplace_back(it->block_offset); _build_block_rows.emplace_back(it->row_num); + _items_counts.template emplace_back(probe_index); } ++current_offset; } @@ -403,9 +409,11 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash_table_c if (LIKELY(current_offset < _build_block_rows.size())) { _build_block_offsets[current_offset] = -1; _build_block_rows[current_offset] = -1; + _items_counts[current_offset] = probe_index; } else { _build_block_offsets.emplace_back(-1); _build_block_rows.emplace_back(-1); + _items_counts.template emplace_back(probe_index); } ++current_offset; } @@ -414,7 +422,7 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash_table_c } uint32_t count = (uint32_t)(current_offset - last_offset); - _items_counts[current_probe_index] = count; +// _items_counts[current_probe_index] = count; all_match_one &= (count == 1); if (current_offset >= _batch_size) { break; diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index 3d85d6b227e034..091c3649b76c85 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -109,7 +109,8 @@ struct SerializedHashTableContext { template struct PrimaryTypeHashTableContext { using Mapped = RowRefListType; - using HashTable = PartitionedHashMap>; +// using HashTable = PHPartitionedHashMap>; + using HashTable = PHHashMap>; using State = ColumnsHashing::HashMethodOneNumber; using Iter = typename HashTable::iterator;