From b869b2f579a21be7e87bf8b5112bbf83be1d8938 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Mon, 25 Nov 2024 21:16:01 +0800 Subject: [PATCH 01/24] new hash Signed-off-by: guo-shaoge --- dbms/src/Common/HashTable/Hash.h | 125 +++++++++++++++++++++++++++++ dbms/src/Interpreters/Aggregator.h | 20 ++--- 2 files changed, 135 insertions(+), 10 deletions(-) diff --git a/dbms/src/Common/HashTable/Hash.h b/dbms/src/Common/HashTable/Hash.h index b4f5d2c0a04..3f25f64bc74 100644 --- a/dbms/src/Common/HashTable/Hash.h +++ b/dbms/src/Common/HashTable/Hash.h @@ -416,3 +416,128 @@ struct IntHash32, void>> } } }; + +inline uint64_t umul128(uint64_t v, uint64_t kmul, uint64_t * high) +{ + DB::Int128 res = static_cast(v) * static_cast(kmul); + *high = static_cast(res >> 64); + return static_cast(res); +} + +template +inline void hash_combine(uint64_t & seed, const T & val) +{ + // from: https://github.com/HowardHinnant/hash_append/issues/7#issuecomment-629414712 + seed ^= std::hash{}(val) + 0x9e3779b97f4a7c15LLU + (seed << 12) + (seed >> 4); +} + +inline uint64_t hash_int128(uint64_t seed, const DB::Int128 & v) +{ + auto low = static_cast(v); + auto high = static_cast(v >> 64); + hash_combine(seed, low); + hash_combine(seed, high); + return seed; +} + +inline uint64_t hash_uint128(uint64_t seed, const DB::UInt128 & v) +{ + hash_combine(seed, v.low); + hash_combine(seed, v.high); + return seed; +} + +inline uint64_t hash_int256(uint64_t seed, const DB::Int256 & v) +{ + const auto & backend_value = v.backend(); + for (size_t i = 0; i < backend_value.size(); ++i) + { + hash_combine(seed, backend_value.limbs()[i]); + } + return seed; +} + +inline uint64_t hash_uint256(uint64_t seed, const DB::UInt256 & v) +{ + hash_combine(seed, v.a); + hash_combine(seed, v.b); + hash_combine(seed, v.c); + hash_combine(seed, v.d); + return seed; +} + +template +struct HashWithMixSeedHelper +{ + inline size_t operator()(size_t) const; +}; + +template <> +struct HashWithMixSeedHelper<4> +{ + inline size_t operator()(size_t v) const + { + // from: https://github.com/aappleby/smhasher/blob/0ff96f7835817a27d0487325b6c16033e2992eb5/src/MurmurHash3.cpp#L102 + static constexpr uint64_t kmul = 0xcc9e2d51UL; + uint64_t mul = v * kmul; + return static_cast(mul ^ (mul >> 32u)); + } +}; + +template <> +struct HashWithMixSeedHelper<8> +{ + inline size_t operator()(size_t v) const + { + // from: https://github.com/martinus/robin-hood-hashing/blob/b21730713f4b5296bec411917c46919f7b38b178/src/include/robin_hood.h#L735 + static constexpr uint64_t kmul = 0xde5fb9d2630458e9ULL; + uint64_t high = 0; + uint64_t low = umul128(v, kmul, &high); + return static_cast(high + low); + } +}; + +template +struct HashWithMixSeed +{ + inline size_t operator()(const T & v) const + { + return HashWithMixSeedHelper()(std::hash()(v)); + } +}; + +template <> +struct HashWithMixSeed +{ + inline size_t operator()(const DB::Int128 & v) const + { + return HashWithMixSeedHelper()(hash_int128(0, v)); + } +}; + +template <> +struct HashWithMixSeed +{ + inline size_t operator()(const DB::UInt128 & v) const + { + return HashWithMixSeedHelper()(hash_uint128(0, v)); + } +}; + +template <> +struct HashWithMixSeed +{ + inline size_t operator()(const DB::Int256 & v) const + { + return HashWithMixSeedHelper()(hash_int256(0, v)); + } +}; + +template <> +struct HashWithMixSeed +{ + inline size_t operator()(const DB::UInt256 & v) const + { + return HashWithMixSeedHelper()(hash_uint256(0, v)); + } +}; diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 381bfba8462..9515782793a 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -77,27 +77,27 @@ using AggregatedDataWithoutKey = AggregateDataPtr; using AggregatedDataWithUInt8Key = FixedImplicitZeroHashMapWithCalculatedSize; using AggregatedDataWithUInt16Key = FixedImplicitZeroHashMap; -using AggregatedDataWithUInt32Key = HashMap>; -using AggregatedDataWithUInt64Key = HashMap>; +using AggregatedDataWithUInt32Key = HashMap>; +using AggregatedDataWithUInt64Key = HashMap>; using AggregatedDataWithShortStringKey = StringHashMap; using AggregatedDataWithStringKey = HashMapWithSavedHash; -using AggregatedDataWithInt256Key = HashMap>; +using AggregatedDataWithInt256Key = HashMap>; -using AggregatedDataWithKeys128 = HashMap>; -using AggregatedDataWithKeys256 = HashMap>; +using AggregatedDataWithKeys128 = HashMap>; +using AggregatedDataWithKeys256 = HashMap>; -using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap>; -using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; -using AggregatedDataWithInt256KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithInt256KeyTwoLevel = TwoLevelHashMap>; using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap; using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash; -using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap>; -using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap>; +using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap>; +using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap>; /** Variants with better hash function, using more than 32 bits for hash. * Using for merging phase of external aggregation, where number of keys may be far greater than 4 billion, From e8a2df81cb2bfc4a5eb3b2660f5b2aa4c5de4d97 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Tue, 26 Nov 2024 17:31:24 +0800 Subject: [PATCH 02/24] prefetch done Signed-off-by: guo-shaoge --- dbms/src/Common/ColumnsHashing.h | 19 ++++ dbms/src/Common/ColumnsHashingImpl.h | 62 ++++++++++--- dbms/src/Common/HashTable/FixedHashTable.h | 3 +- dbms/src/Common/HashTable/Hash.h | 28 +++--- dbms/src/Common/HashTable/HashTable.h | 11 +++ dbms/src/Common/HashTable/SmallTable.h | 1 + dbms/src/Common/HashTable/StringHashMap.h | 9 +- dbms/src/Common/HashTable/StringHashTable.h | 92 ++++++++++++------- dbms/src/Common/HashTable/TwoLevelHashTable.h | 13 +++ .../HashTable/TwoLevelStringHashTable.h | 53 +++++++++-- dbms/src/Interpreters/Aggregator.cpp | 63 +++++++++++-- dbms/src/Interpreters/Aggregator.h | 7 +- 12 files changed, 275 insertions(+), 86 deletions(-) diff --git a/dbms/src/Common/ColumnsHashing.h b/dbms/src/Common/ColumnsHashing.h index 398d6605e60..e14a793567c 100644 --- a/dbms/src/Common/ColumnsHashing.h +++ b/dbms/src/Common/ColumnsHashing.h @@ -49,14 +49,17 @@ struct HashMethodOneNumber using Base = columns_hashing_impl::HashMethodBase; const FieldType * vec; + const size_t total_rows; /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise. HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const TiDB::TiDBCollators &) + : total_rows(key_columns[0]->size()) { vec = &static_cast *>(key_columns[0])->getData()[0]; } explicit HashMethodOneNumber(const IColumn * column) + : total_rows(column->size()) { vec = &static_cast *>(column)->getData()[0]; } @@ -82,6 +85,8 @@ struct HashMethodOneNumber } const FieldType * getKeyData() const { return vec; } + + size_t getTotalRows() const { return total_rows; } }; @@ -97,11 +102,13 @@ struct HashMethodString const IColumn::Offset * offsets; const UInt8 * chars; TiDB::TiDBCollatorPtr collator = nullptr; + const size_t total_rows; HashMethodString( const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const TiDB::TiDBCollators & collators) + : total_rows(key_columns[0]->size()) { const IColumn & column = *key_columns[0]; const auto & column_string = assert_cast(column); @@ -149,8 +156,10 @@ struct HashMethodStringBin const IColumn::Offset * offsets; const UInt8 * chars; + const size_t total_rows; HashMethodStringBin(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const TiDB::TiDBCollators &) + : total_rows(key_columns[0]->size()) { const IColumn & column = *key_columns[0]; const auto & column_string = assert_cast(column); @@ -346,10 +355,12 @@ struct HashMethodFastPathTwoKeysSerialized Key1Desc key_1_desc; Key2Desc key_2_desc; + const size_t total_rows; HashMethodFastPathTwoKeysSerialized(const ColumnRawPtrs & key_columns, const Sizes &, const TiDB::TiDBCollators &) : key_1_desc(key_columns[0]) , key_2_desc(key_columns[1]) + , total_rows(key_columns[0]->size()) {} ALWAYS_INLINE inline auto getKeyHolder(ssize_t row, Arena * pool, std::vector &) const @@ -384,11 +395,13 @@ struct HashMethodFixedString size_t n; const ColumnFixedString::Chars_t * chars; TiDB::TiDBCollatorPtr collator = nullptr; + const size_t total_rows; HashMethodFixedString( const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const TiDB::TiDBCollators & collators) + : total_rows(key_columns[0]->size()) { const IColumn & column = *key_columns[0]; const auto & column_string = assert_cast(column); @@ -442,6 +455,7 @@ struct HashMethodKeysFixed Sizes key_sizes; size_t keys_size; + const size_t total_rows; /// SSSE3 shuffle method can be used. Shuffle masks will be calculated and stored here. #if defined(__SSSE3__) && !defined(MEMORY_SANITIZER) @@ -467,6 +481,7 @@ struct HashMethodKeysFixed : Base(key_columns) , key_sizes(std::move(key_sizes_)) , keys_size(key_columns.size()) + , total_rows(key_columns[0]->size()) { if (usePreparedKeys(key_sizes)) { @@ -596,6 +611,7 @@ struct HashMethodSerialized ColumnRawPtrs key_columns; size_t keys_size; TiDB::TiDBCollators collators; + const size_t total_rows; HashMethodSerialized( const ColumnRawPtrs & key_columns_, @@ -604,6 +620,7 @@ struct HashMethodSerialized : key_columns(key_columns_) , keys_size(key_columns_.size()) , collators(collators_) + , total_rows(key_columns_[0]->size()) {} ALWAYS_INLINE inline SerializedKeyHolder getKeyHolder( @@ -631,10 +648,12 @@ struct HashMethodHashed ColumnRawPtrs key_columns; TiDB::TiDBCollators collators; + const size_t total_rows; HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const TiDB::TiDBCollators & collators_) : key_columns(std::move(key_columns_)) , collators(collators_) + , total_rows(key_columns[0]->size()) {} ALWAYS_INLINE inline Key getKeyHolder(size_t row, Arena *, std::vector & sort_key_containers) const diff --git a/dbms/src/Common/ColumnsHashingImpl.h b/dbms/src/Common/ColumnsHashingImpl.h index d4f4143015d..24574ed40a4 100644 --- a/dbms/src/Common/ColumnsHashingImpl.h +++ b/dbms/src/Common/ColumnsHashingImpl.h @@ -127,27 +127,53 @@ class HashMethodBase using FindResult = FindResultImpl; static constexpr bool has_mapped = !std::is_same::value; using Cache = LastElementCache; + static constexpr size_t prefetch_step = 16; - template + template ALWAYS_INLINE inline EmplaceResult emplaceKey( Data & data, size_t row, Arena & pool, - std::vector & sort_key_containers) + std::vector & sort_key_containers, + const std::vector & hashvals = {}) { auto key_holder = static_cast(*this).getKeyHolder(row, &pool, sort_key_containers); - return emplaceImpl(key_holder, data); + if constexpr (enable_prefetch) + { + const auto idx = row + prefetch_step; + if (idx < hashvals.size()) + data.prefetch(hashvals[idx]); + + return emplaceImpl(key_holder, data, hashvals[row]); + } + else + { + return emplaceImpl(key_holder, data, 0); + } } - template + template ALWAYS_INLINE inline FindResult findKey( Data & data, size_t row, Arena & pool, - std::vector & sort_key_containers) + std::vector & sort_key_containers, + const std::vector & hashvals = {}) { auto key_holder = static_cast(*this).getKeyHolder(row, &pool, sort_key_containers); - return findKeyImpl(keyHolderGetKey(key_holder), data); + if constexpr (enable_prefetch) + { + const auto idx = row + prefetch_step; + if (idx < hashvals.size()) + data.prefetch(hashvals[idx]); + + return findKeyImpl(keyHolderGetKey(key_holder), data, hashvals[row]); + } + else + { + return findKeyImpl(keyHolderGetKey(key_holder), data, 0); + } + } template @@ -155,9 +181,9 @@ class HashMethodBase const Data & data, size_t row, Arena & pool, - std::vector & sort_key_containers) + std::vector & sort_key_containers) const { - auto key_holder = static_cast(*this).getKeyHolder(row, &pool, sort_key_containers); + auto key_holder = static_cast(*this).getKeyHolder(row, &pool, sort_key_containers); return data.hash(keyHolderGetKey(key_holder)); } @@ -179,8 +205,8 @@ class HashMethodBase } } - template - ALWAYS_INLINE inline EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data) + template + ALWAYS_INLINE inline EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data, size_t hashval) { if constexpr (Cache::consecutive_keys_optimization) { @@ -195,7 +221,11 @@ class HashMethodBase typename Data::LookupResult it; bool inserted = false; - data.emplace(key_holder, it, inserted); + + if constexpr (enable_prefetch) + data.emplace(key_holder, it, inserted, hashval); + else + data.emplace(key_holder, it, inserted); [[maybe_unused]] Mapped * cached = nullptr; if constexpr (has_mapped) @@ -232,8 +262,8 @@ class HashMethodBase return EmplaceResult(inserted); } - template - ALWAYS_INLINE inline FindResult findKeyImpl(Key key, Data & data) + template + ALWAYS_INLINE inline FindResult findKeyImpl(Key key, Data & data, size_t hashval) { if constexpr (Cache::consecutive_keys_optimization) { @@ -246,7 +276,11 @@ class HashMethodBase } } - auto it = data.find(key); + typename Data::LookupResult it; + if constexpr (enable_prefetch) + it = data.find(key, hashval); + else + it = data.find(key); if constexpr (consecutive_keys_optimization) { diff --git a/dbms/src/Common/HashTable/FixedHashTable.h b/dbms/src/Common/HashTable/FixedHashTable.h index 259e90684fc..cfa562667dc 100644 --- a/dbms/src/Common/HashTable/FixedHashTable.h +++ b/dbms/src/Common/HashTable/FixedHashTable.h @@ -212,7 +212,6 @@ class FixedHashTable typename cell_type::CellExt cell; }; - public: using key_type = Key; using mapped_type = typename Cell::mapped_type; @@ -352,6 +351,8 @@ class FixedHashTable iterator end() { return iterator(this, buf ? buf + NUM_CELLS : buf); } + inline void prefetch(size_t) {} + /// The last parameter is unused but exists for compatibility with HashTable interface. void ALWAYS_INLINE emplace(const Key & x, LookupResult & it, bool & inserted, size_t /* hash */ = 0) { diff --git a/dbms/src/Common/HashTable/Hash.h b/dbms/src/Common/HashTable/Hash.h index 3f25f64bc74..883ec8ab6ff 100644 --- a/dbms/src/Common/HashTable/Hash.h +++ b/dbms/src/Common/HashTable/Hash.h @@ -469,13 +469,13 @@ inline uint64_t hash_uint256(uint64_t seed, const DB::UInt256 & v) template struct HashWithMixSeedHelper { - inline size_t operator()(size_t) const; + static inline size_t operator()(size_t); }; template <> struct HashWithMixSeedHelper<4> { - inline size_t operator()(size_t v) const + static inline size_t operator()(size_t v) { // from: https://github.com/aappleby/smhasher/blob/0ff96f7835817a27d0487325b6c16033e2992eb5/src/MurmurHash3.cpp#L102 static constexpr uint64_t kmul = 0xcc9e2d51UL; @@ -487,7 +487,7 @@ struct HashWithMixSeedHelper<4> template <> struct HashWithMixSeedHelper<8> { - inline size_t operator()(size_t v) const + static inline size_t operator()(size_t v) { // from: https://github.com/martinus/robin-hood-hashing/blob/b21730713f4b5296bec411917c46919f7b38b178/src/include/robin_hood.h#L735 static constexpr uint64_t kmul = 0xde5fb9d2630458e9ULL; @@ -500,44 +500,44 @@ struct HashWithMixSeedHelper<8> template struct HashWithMixSeed { - inline size_t operator()(const T & v) const + static size_t operator()(const T & v) { - return HashWithMixSeedHelper()(std::hash()(v)); + return HashWithMixSeedHelper::operator()(std::hash()(v)); } }; template <> struct HashWithMixSeed { - inline size_t operator()(const DB::Int128 & v) const + static size_t operator()(const DB::Int128 & v) { - return HashWithMixSeedHelper()(hash_int128(0, v)); + return HashWithMixSeedHelper::operator()(hash_int128(0, v)); } }; template <> struct HashWithMixSeed { - inline size_t operator()(const DB::UInt128 & v) const + static inline size_t operator()(const DB::UInt128 & v) { - return HashWithMixSeedHelper()(hash_uint128(0, v)); + return HashWithMixSeedHelper::operator()(hash_uint128(0, v)); } }; template <> struct HashWithMixSeed { - inline size_t operator()(const DB::Int256 & v) const + static inline size_t operator()(const DB::Int256 & v) { - return HashWithMixSeedHelper()(hash_int256(0, v)); + return HashWithMixSeedHelper::operator()(hash_int256(0, v)); } }; template <> struct HashWithMixSeed -{ - inline size_t operator()(const DB::UInt256 & v) const +{ + static inline size_t operator()(const DB::UInt256 & v) { - return HashWithMixSeedHelper()(hash_uint256(0, v)); + return HashWithMixSeedHelper::operator()(hash_uint256(0, v)); } }; diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index a4f0fe3be03..4f037f60019 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -851,6 +851,17 @@ class HashTable iterator end() { return iterator(this, buf ? buf + grower.bufSize() : buf); } + void ALWAYS_INLINE prefetch(size_t hashval) const + { + (void)hashval; +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) + size_t place_value = grower.place(hashval); + __mm_prefetch((const char*)(&buf[place_value]), _MM_HINT_NTA); +#elif defined(__GNUC__) + size_t place_value = grower.place(hashval); + __builtin_prefetch(static_cast(&buf[place_value])); +#endif + } protected: const_iterator iteratorTo(const Cell * ptr) const { return const_iterator(this, ptr); } diff --git a/dbms/src/Common/HashTable/SmallTable.h b/dbms/src/Common/HashTable/SmallTable.h index fa40b479430..a032ae76cff 100644 --- a/dbms/src/Common/HashTable/SmallTable.h +++ b/dbms/src/Common/HashTable/SmallTable.h @@ -296,6 +296,7 @@ class SmallTable iterator ALWAYS_INLINE find(Key x) { return iteratorTo(findCell(x)); } const_iterator ALWAYS_INLINE find(Key x) const { return iteratorTo(findCell(x)); } + void ALWAYS_INLINE prefetch(size_t) {} void write(DB::WriteBuffer & wb) const { diff --git a/dbms/src/Common/HashTable/StringHashMap.h b/dbms/src/Common/HashTable/StringHashMap.h index 6f7e668e1d9..cad653907fa 100644 --- a/dbms/src/Common/HashTable/StringHashMap.h +++ b/dbms/src/Common/HashTable/StringHashMap.h @@ -90,29 +90,30 @@ struct StringHashMapCell template struct StringHashMapSubMaps { + using Hash = StringHashTableHash; using T0 = StringHashTableEmpty>; using T1 = HashMapTable< StringKey8, StringHashMapCell, - StringHashTableHash, + Hash, StringHashTableGrower<>, Allocator>; using T2 = HashMapTable< StringKey16, StringHashMapCell, - StringHashTableHash, + Hash, StringHashTableGrower<>, Allocator>; using T3 = HashMapTable< StringKey24, StringHashMapCell, - StringHashTableHash, + Hash, StringHashTableGrower<>, Allocator>; using Ts = HashMapTable< StringRef, StringHashMapCell, - StringHashTableHash, + Hash, StringHashTableGrower<>, Allocator>; }; diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h index aa4825f171a..e11972d0795 100644 --- a/dbms/src/Common/HashTable/StringHashTable.h +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -20,7 +20,6 @@ #include #include - using StringKey8 = UInt64; using StringKey16 = DB::UInt128; struct StringKey24 @@ -48,45 +47,38 @@ inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n) return {reinterpret_cast(&n), 24ul - (__builtin_clzll(n.c) >> 3)}; } -struct StringHashTableHash +inline size_t hash_string_key_24(uint64_t seed, const StringKey24 & v) { -#if defined(__SSE4_2__) - size_t ALWAYS_INLINE operator()(StringKey8 key) const - { - size_t res = -1ULL; - res = _mm_crc32_u64(res, key); - return res; - } - size_t ALWAYS_INLINE operator()(const StringKey16 & key) const - { - size_t res = -1ULL; - res = _mm_crc32_u64(res, key.low); - res = _mm_crc32_u64(res, key.high); - return res; - } - size_t ALWAYS_INLINE operator()(const StringKey24 & key) const + hash_combine(seed, v.a); + hash_combine(seed, v.b); + hash_combine(seed, v.c); + return seed; +} + +template <> +struct HashWithMixSeed +{ + static inline size_t operator()(const StringKey24 & v) { - size_t res = -1ULL; - res = _mm_crc32_u64(res, key.a); - res = _mm_crc32_u64(res, key.b); - res = _mm_crc32_u64(res, key.c); - return res; + return HashWithMixSeedHelper::operator()(hash_string_key_24(0, v)); } -#else - size_t ALWAYS_INLINE operator()(StringKey8 key) const +}; + +struct StringHashTableHash +{ + static size_t ALWAYS_INLINE operator()(StringKey8 key) { - return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 8); + return HashWithMixSeed::operator()(key); } - size_t ALWAYS_INLINE operator()(const StringKey16 & key) const + static size_t ALWAYS_INLINE operator()(const StringKey16 & key) { - return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 16); + return HashWithMixSeed::operator()(key); } - size_t ALWAYS_INLINE operator()(const StringKey24 & key) const + static size_t ALWAYS_INLINE operator()(const StringKey24 & key) { - return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 24); + return HashWithMixSeed::operator()(key); } -#endif - size_t ALWAYS_INLINE operator()(StringRef key) const { return StringRefHash()(key); } + static size_t ALWAYS_INLINE operator()(const StringRef & key) { return StringRefHash()(key); } }; template @@ -150,6 +142,8 @@ struct StringHashTableEmpty //-V730 return hasZero() ? zeroValue() : nullptr; } + void ALWAYS_INLINE prefetch(size_t) {} + void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); } void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); } void read(DB::ReadBuffer & rb) { zeroValue()->read(rb); } @@ -157,6 +151,7 @@ struct StringHashTableEmpty //-V730 size_t size() const { return hasZero() ? 1 : 0; } bool empty() const { return !hasZero(); } size_t getBufferSizeInBytes() const { return sizeof(Cell); } + size_t getBufferSizeInCells() const { return 1; } void setResizeCallback(const ResizeCallback &) {} size_t getCollisions() const { return 0; } }; @@ -364,6 +359,13 @@ class StringHashTable : private boost::noncopyable this->dispatch(*this, key_holder, EmplaceCallable(it, inserted)); } + // TODO del + template + void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult &, bool &, size_t) + { + RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::emplace instead"); + } + struct FindCallable { // find() doesn't need any key memory management, so we don't work with @@ -380,12 +382,35 @@ class StringHashTable : private boost::noncopyable } }; + // We will not prefetch StringHashTable directly, instead caller should call specific submap's prefetch. + // Because StringHashTable doesn't know which submap to prefetch. + void prefetch(size_t) const + { + RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::prefetch instead"); + } + LookupResult ALWAYS_INLINE find(const Key & x) { return dispatch(*this, x, FindCallable{}); } ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return dispatch(*this, x, FindCallable{}); } + // TODO del + LookupResult ALWAYS_INLINE find(const Key &, size_t) + { + RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::find instead"); + } + ConstLookupResult ALWAYS_INLINE find(const Key &, size_t) const + { + RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::find instead"); + } + bool ALWAYS_INLINE has(const Key & x, size_t = 0) const { return dispatch(*this, x, FindCallable{}) != nullptr; } + template + size_t ALWAYS_INLINE hash(const HashKeyType & key) const + { + return SubMaps::Hash::operator()(key); + } + void write(DB::WriteBuffer & wb) const { m0.write(wb); @@ -434,6 +459,11 @@ class StringHashTable : private boost::noncopyable bool empty() const { return m0.empty() && m1.empty() && m2.empty() && m3.empty() && ms.empty(); } + size_t getBufferSizeInCells() const + { + return m0.getBufferSizeInCells() + m1.getBufferSizeInCells() + m2.getBufferSizeInCells() + + m3.getBufferSizeInCells() + ms.getBufferSizeInCells(); + } size_t getBufferSizeInBytes() const { return m0.getBufferSizeInBytes() + m1.getBufferSizeInBytes() + m2.getBufferSizeInBytes() diff --git a/dbms/src/Common/HashTable/TwoLevelHashTable.h b/dbms/src/Common/HashTable/TwoLevelHashTable.h index 6778cd4a3e8..01c14dd07c2 100644 --- a/dbms/src/Common/HashTable/TwoLevelHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelHashTable.h @@ -285,6 +285,12 @@ class TwoLevelHashTable : private boost::noncopyable impls[buck].emplace(key_holder, it, inserted, hash_value); } + void ALWAYS_INLINE prefetch(size_t hashval) const + { + size_t buck = getBucketFromHash(hashval); + impls[buck].prefetch(hashval); + } + LookupResult ALWAYS_INLINE find(Key x, size_t hash_value) { size_t buck = getBucketFromHash(hash_value); @@ -352,6 +358,13 @@ class TwoLevelHashTable : private boost::noncopyable return true; } + size_t getBufferSizeInCells() const + { + size_t res = 0; + for (const auto & impl : impls) + res += impl.getBufferSizeInCells(); + return res; + } size_t getBufferSizeInBytes() const { size_t res = 0; diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h index 5bdb24a3d13..5608d0fd0f8 100644 --- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -30,8 +30,20 @@ class TwoLevelStringHashTable : private boost::noncopyable static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET; static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1; + template + size_t ALWAYS_INLINE hash(const HashKeyType & key) const + { + return SubMaps::Hash::operator()(key); + } + + // Same reason as StringHashTable::prefetch. + void prefetch(size_t) const + { + RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::prefetch instead"); + } + // TODO: currently hashing contains redundant computations when doing distributed or external aggregations - size_t hash(const Key & x) const + size_t hashStringRef(const Key & x) const { return const_cast(*this).dispatch(*this, x, [&](const auto &, const auto &, size_t hash) { return hash; @@ -44,7 +56,7 @@ class TwoLevelStringHashTable : private boost::noncopyable impl.setResizeCallback(resize_callback); } - size_t operator()(const Key & x) const { return hash(x); } + size_t operator()(const Key & x) const { return hashStringRef(x); } /// NOTE Bad for hash tables with more than 2^32 cells. static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; } @@ -104,7 +116,6 @@ class TwoLevelStringHashTable : private boost::noncopyable #endif dispatch(Self & self, KeyHolder && key_holder, Func && func) { - StringHashTableHash hash; const StringRef & x = keyHolderGetKey(key_holder); const size_t sz = x.size; if (sz == 0) @@ -117,7 +128,7 @@ class TwoLevelStringHashTable : private boost::noncopyable { // Strings with trailing zeros are not representable as fixed-size // string keys. Put them to the generic table. - auto res = hash(x); + auto res = SubMaps::Hash::operator()(x); auto buck = getBucketFromHash(res); return func(self.impls[buck].ms, std::forward(key_holder), res); } @@ -154,7 +165,7 @@ class TwoLevelStringHashTable : private boost::noncopyable else n[0] <<= s; } - auto res = hash(k8); + auto res = SubMaps::Hash::operator()(k8); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(self.impls[buck].m1, k8, res); @@ -168,7 +179,7 @@ class TwoLevelStringHashTable : private boost::noncopyable n[1] >>= s; else n[1] <<= s; - auto res = hash(k16); + auto res = SubMaps::Hash::operator()(k16); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(self.impls[buck].m2, k16, res); @@ -182,14 +193,14 @@ class TwoLevelStringHashTable : private boost::noncopyable n[2] >>= s; else n[2] <<= s; - auto res = hash(k24); + auto res = SubMaps::Hash::operator()(k24); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(self.impls[buck].m3, k24, res); } default: { - auto res = hash(x); + auto res = SubMaps::Hash::operator()(x); auto buck = getBucketFromHash(res); return func(self.impls[buck].ms, std::forward(key_holder), res); } @@ -202,12 +213,27 @@ class TwoLevelStringHashTable : private boost::noncopyable dispatch(*this, key_holder, typename Impl::EmplaceCallable{it, inserted}); } - LookupResult ALWAYS_INLINE find(const Key x) { return dispatch(*this, x, typename Impl::FindCallable{}); } + template + void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult &, bool &, size_t) + { + RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::emplace instead"); + } + + LookupResult ALWAYS_INLINE find(const Key & x) { return dispatch(*this, x, typename Impl::FindCallable{}); } - ConstLookupResult ALWAYS_INLINE find(const Key x) const + ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return dispatch(*this, x, typename Impl::FindCallable{}); } + LookupResult ALWAYS_INLINE find(const Key &, size_t) + { + RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::find instead"); + } + + ConstLookupResult ALWAYS_INLINE find(const Key &, size_t) const + { + RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::find instead"); + } void write(DB::WriteBuffer & wb) const { @@ -259,6 +285,13 @@ class TwoLevelStringHashTable : private boost::noncopyable return true; } + size_t getBufferSizeInCells() const + { + size_t res = 0; + for (const auto & impl : impls) + res = impl.getBufferSizeInCells(); + return res; + } size_t getBufferSizeInBytes() const { size_t res = 0; diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index f25c22717e8..180799bd7ed 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -665,23 +665,43 @@ void NO_INLINE Aggregator::executeImpl( { typename Method::State state(agg_process_info.key_columns, key_sizes, collators); - executeImplBatch(method, state, aggregates_pool, agg_process_info); + if (method.data.getBufferSizeInCells() < 8192) + executeImplBatch(method, state, aggregates_pool, agg_process_info); + else + executeImplBatch(method, state, aggregates_pool, agg_process_info); +} + +template +std::vector getHashVals(size_t start_row, size_t end_row, const Data & data, const State & state, + std::vector & sort_key_containers, Arena * pool) +{ + std::vector hashvals(state.total_rows, 0); + for (size_t i = start_row; i < end_row; ++i) + { + hashvals[i] = state.getHash(data, i, *pool, sort_key_containers); + } + return hashvals; } -template +template std::optional::ResultType> Aggregator::emplaceOrFindKey( Method & method, typename Method::State & state, size_t index, Arena & aggregates_pool, - std::vector & sort_key_containers) const + std::vector & sort_key_containers, + const std::vector & hashvals) const { try { if constexpr (only_lookup) - return state.findKey(method.data, index, aggregates_pool, sort_key_containers); + { + return state.template findKey(method.data, index, aggregates_pool, sort_key_containers, hashvals); + } else - return state.emplaceKey(method.data, index, aggregates_pool, sort_key_containers); + { + return state.template emplaceKey(method.data, index, aggregates_pool, sort_key_containers, hashvals); + } } catch (ResizeException &) { @@ -689,7 +709,7 @@ std::optional::Res } } -template +template ALWAYS_INLINE void Aggregator::executeImplBatch( Method & method, typename Method::State & state, @@ -712,14 +732,28 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( { /// For all rows. AggregateDataPtr place = aggregates_pool->alloc(0); + std::vector hashvals; + if constexpr (enable_prefetch) + { + hashvals = getHashVals( + agg_process_info.start_row, + agg_process_info.end_row, + method.data, + state, + sort_key_containers, + aggregates_pool); + + } + for (size_t i = 0; i < agg_size; ++i) { - auto emplace_result_hold = emplaceOrFindKey( + auto emplace_result_hold = emplaceOrFindKey( method, state, agg_process_info.start_row, *aggregates_pool, - sort_key_containers); + sort_key_containers, + hashvals); if likely (emplace_result_hold.has_value()) { if constexpr (collect_hit_rate) @@ -784,13 +818,24 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( std::unique_ptr places(new AggregateDataPtr[agg_size]); std::optional processed_rows; + std::vector hashvals; + if constexpr (enable_prefetch) + { + hashvals = getHashVals( + agg_process_info.start_row, + agg_process_info.end_row, + method.data, + state, + sort_key_containers, + aggregates_pool); + } for (size_t i = agg_process_info.start_row; i < agg_process_info.start_row + agg_size; ++i) { AggregateDataPtr aggregate_data = nullptr; auto emplace_result_holder - = emplaceOrFindKey(method, state, i, *aggregates_pool, sort_key_containers); + = emplaceOrFindKey(method, state, i, *aggregates_pool, sort_key_containers, hashvals); if unlikely (!emplace_result_holder.has_value()) { LOG_INFO(log, "HashTable resize throw ResizeException since the data is already marked for spill"); diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 9515782793a..0f1365694ac 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -1454,20 +1454,21 @@ class Aggregator AggProcessInfo & agg_process_info, TiDB::TiDBCollators & collators) const; - template + template void executeImplBatch( Method & method, typename Method::State & state, Arena * aggregates_pool, AggProcessInfo & agg_process_info) const; - template + template std::optional::ResultType> emplaceOrFindKey( Method & method, typename Method::State & state, size_t index, Arena & aggregates_pool, - std::vector & sort_key_containers) const; + std::vector & sort_key_containers, + const std::vector & hashvals) const; /// For case when there are no keys (all aggregate into one row). static void executeWithoutKeyImpl(AggregatedDataWithoutKey & res, AggProcessInfo & agg_process_info, Arena * arena); From b3141662d11d44633e2fa9ac8dbf040b674718b5 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Wed, 27 Nov 2024 10:57:36 +0800 Subject: [PATCH 03/24] executeImplBatchStringHashMap done Signed-off-by: guo-shaoge --- dbms/src/Common/ColumnsHashingImpl.h | 54 +++- dbms/src/Common/HashTable/FixedHashTable.h | 2 + dbms/src/Common/HashTable/HashTable.h | 3 + dbms/src/Common/HashTable/SmallTable.h | 3 + dbms/src/Common/HashTable/StringHashTable.h | 164 +++++++++++- dbms/src/Common/HashTable/TwoLevelHashTable.h | 3 + .../HashTable/TwoLevelStringHashTable.h | 66 +++++ dbms/src/Interpreters/Aggregator.cpp | 237 ++++++++++++++++-- dbms/src/Interpreters/Aggregator.h | 17 ++ libs/libcommon/include/common/StringRef.h | 2 +- 10 files changed, 524 insertions(+), 27 deletions(-) diff --git a/dbms/src/Common/ColumnsHashingImpl.h b/dbms/src/Common/ColumnsHashingImpl.h index 24574ed40a4..0c8d0bc1a49 100644 --- a/dbms/src/Common/ColumnsHashingImpl.h +++ b/dbms/src/Common/ColumnsHashingImpl.h @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -144,11 +145,11 @@ class HashMethodBase if (idx < hashvals.size()) data.prefetch(hashvals[idx]); - return emplaceImpl(key_holder, data, hashvals[row]); + return emplaceImpl(key_holder, data, hashvals[row]); } else { - return emplaceImpl(key_holder, data, 0); + return emplaceImpl(key_holder, data, 0); } } @@ -167,15 +168,52 @@ class HashMethodBase if (idx < hashvals.size()) data.prefetch(hashvals[idx]); - return findKeyImpl(keyHolderGetKey(key_holder), data, hashvals[row]); + return findKeyImpl(keyHolderGetKey(key_holder), data, hashvals[row]); } else { - return findKeyImpl(keyHolderGetKey(key_holder), data, 0); + return findKeyImpl(keyHolderGetKey(key_holder), data, 0); } } + template + ALWAYS_INLINE inline EmplaceResult emplaceStringKey( + Data & data, + size_t idx, + const std::vector & datas, + const std::vector & hashvals) + { + auto & submap = typename StringHashTableSubMapSelector>::getSubMap(data); + if constexpr (enable_prefetch) + { + const auto prefetch_idx = idx + prefetch_step; + if (prefetch_idx < hashvals.size()) + submap.prefetch(hashvals[prefetch_idx]); + } + + return emplaceImpl(datas[idx], submap, hashvals[idx]); + } + + // TODO Macro with emplaceStringKey + template + ALWAYS_INLINE inline FindResult findStringKey( + Data & data, + size_t idx, + const std::vector & datas, + const std::vector & hashvals) + { + auto & submap = typename StringHashTableSubMapSelector>::getSubMap(data); + if constexpr (enable_prefetch) + { + const auto prefetch_idx = idx + prefetch_step; + if (prefetch_idx < hashvals.size()) + submap.prefetch(hashvals[prefetch_idx]); + } + + return findKeyImpl(datas[idx], submap, hashvals[idx]); + } + template ALWAYS_INLINE inline size_t getHash( const Data & data, @@ -205,7 +243,7 @@ class HashMethodBase } } - template + template ALWAYS_INLINE inline EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data, size_t hashval) { if constexpr (Cache::consecutive_keys_optimization) @@ -222,7 +260,7 @@ class HashMethodBase typename Data::LookupResult it; bool inserted = false; - if constexpr (enable_prefetch) + if constexpr (use_hashval) data.emplace(key_holder, it, inserted, hashval); else data.emplace(key_holder, it, inserted); @@ -262,7 +300,7 @@ class HashMethodBase return EmplaceResult(inserted); } - template + template ALWAYS_INLINE inline FindResult findKeyImpl(Key key, Data & data, size_t hashval) { if constexpr (Cache::consecutive_keys_optimization) @@ -277,7 +315,7 @@ class HashMethodBase } typename Data::LookupResult it; - if constexpr (enable_prefetch) + if constexpr (use_hashval) it = data.find(key, hashval); else it = data.find(key); diff --git a/dbms/src/Common/HashTable/FixedHashTable.h b/dbms/src/Common/HashTable/FixedHashTable.h index cfa562667dc..8b0b721aa8c 100644 --- a/dbms/src/Common/HashTable/FixedHashTable.h +++ b/dbms/src/Common/HashTable/FixedHashTable.h @@ -221,6 +221,8 @@ class FixedHashTable using LookupResult = Cell *; using ConstLookupResult = const Cell *; + static constexpr bool is_string_hash_map = false; + static constexpr bool is_two_level = false; size_t hash(const Key & x) const { return x; } diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index 4f037f60019..12ebc49756c 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -402,6 +402,9 @@ class HashTable using Grower = GrowerType; using Allocator = AllocatorType; + static constexpr bool is_string_hash_map = false; + static constexpr bool is_two_level = false; + protected: friend class const_iterator; friend class iterator; diff --git a/dbms/src/Common/HashTable/SmallTable.h b/dbms/src/Common/HashTable/SmallTable.h index a032ae76cff..1292a4205da 100644 --- a/dbms/src/Common/HashTable/SmallTable.h +++ b/dbms/src/Common/HashTable/SmallTable.h @@ -85,6 +85,9 @@ class SmallTable using value_type = typename Cell::value_type; using cell_type = Cell; + static constexpr bool is_string_hash_map = false; + static constexpr bool is_two_level = false; + class Reader final : private Cell::State { public: diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h index e11972d0795..f906b043a9e 100644 --- a/dbms/src/Common/HashTable/StringHashTable.h +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -66,19 +67,24 @@ struct HashWithMixSeed struct StringHashTableHash { + using StringKey8Hasher = HashWithMixSeed; + using StringKey16Hasher = HashWithMixSeed; + using StringKey24Hasher = HashWithMixSeed; + using StringRefHasher = StringRefHash; + static size_t ALWAYS_INLINE operator()(StringKey8 key) { - return HashWithMixSeed::operator()(key); + return StringKey8Hasher::operator()(key); } static size_t ALWAYS_INLINE operator()(const StringKey16 & key) { - return HashWithMixSeed::operator()(key); + return StringKey16Hasher::operator()(key); } static size_t ALWAYS_INLINE operator()(const StringKey24 & key) { - return HashWithMixSeed::operator()(key); + return StringKey24Hasher::operator()(key); } - static size_t ALWAYS_INLINE operator()(const StringRef & key) { return StringRefHash()(key); } + static size_t ALWAYS_INLINE operator()(const StringRef & key) { return StringRefHasher::operator()(key); } }; template @@ -185,6 +191,92 @@ struct StringHashTableLookupResult friend bool operator!=(const std::nullptr_t &, const StringHashTableLookupResult & b) { return b.mapped_ptr; } }; + template + static auto +#if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) + NO_INLINE NO_SANITIZE_ADDRESS NO_SANITIZE_THREAD +#else + ALWAYS_INLINE +#endif + dispatchStringHashTable(size_t row, KeyHolder && key_holder, Func0 && func0, Func8 && func8, Func16 && func16, Func24 && func24, FuncStr && func_str) + { + const StringRef & x = keyHolderGetKey(key_holder); + const size_t sz = x.size; + if (sz == 0) + { + return func0(x, row); + } + + if (x.data[sz - 1] == 0) + { + // Strings with trailing zeros are not representable as fixed-size + // string keys. Put them to the generic table. + return func_str(key_holder, row); + } + + const char * p = x.data; + // pending bits that needs to be shifted out + const char s = (-sz & 7) * 8; + union + { + StringKey8 k8; + StringKey16 k16; + StringKey24 k24; + UInt64 n[3]; + }; + switch ((sz - 1) >> 3) + { + case 0: // 1..8 bytes + { + // first half page + if ((reinterpret_cast(p) & 2048) == 0) + { + memcpy(&n[0], p, 8); + if constexpr (DB::isLittleEndian()) + n[0] &= (-1ULL >> s); + else + n[0] &= (-1ULL << s); + } + else + { + const char * lp = x.data + x.size - 8; + memcpy(&n[0], lp, 8); + if constexpr (DB::isLittleEndian()) + n[0] >>= s; + else + n[0] <<= s; + } + return func8(k8, row); + } + case 1: // 9..16 bytes + { + memcpy(&n[0], p, 8); + const char * lp = x.data + x.size - 8; + memcpy(&n[1], lp, 8); + if constexpr (DB::isLittleEndian()) + n[1] >>= s; + else + n[1] <<= s; + return func16(k16, row); + } + case 2: // 17..24 bytes + { + memcpy(&n[0], p, 16); + const char * lp = x.data + x.size - 8; + memcpy(&n[2], lp, 8); + if constexpr (DB::isLittleEndian()) + n[2] >>= s; + else + n[2] <<= s; + return func24(k24, row); + } + default: // >= 25 bytes + { + return func_str(key_holder, row); + } + } + } + template class StringHashTable : private boost::noncopyable { @@ -221,6 +313,9 @@ class StringHashTable : private boost::noncopyable using LookupResult = StringHashTableLookupResult; using ConstLookupResult = StringHashTableLookupResult; + static constexpr bool is_string_hash_map = true; + static constexpr bool is_two_level = false; + StringHashTable() = default; explicit StringHashTable(size_t reserve_for_num_elements) @@ -488,3 +583,64 @@ class StringHashTable : private boost::noncopyable ms.clearAndShrink(); } }; + +template +struct StringHashTableSubMapSelector; + +template +struct StringHashTableSubMapSelector<0, false, Data> +{ + struct Hash + { + static ALWAYS_INLINE size_t operator()(const StringRef & ) { return 0; } + }; + + typename Data::T0 & getSubMap(size_t, Data & data) + { + return data.m0; + } +}; + +template +struct StringHashTableSubMapSelector<1, false, Data> +{ + using Hash = StringHashTableHash::StringKey8Hasher; + + typename Data::T1 & getSubMap(size_t, Data & data) + { + return data.m1; + } +}; + +template +struct StringHashTableSubMapSelector<2, false, Data> +{ + using Hash = StringHashTableHash::StringKey16Hasher; + + typename Data::T2 & getSubMap(size_t, Data & data) + { + return data.m2; + } +}; + +template +struct StringHashTableSubMapSelector<3, false, Data> +{ + using Hash = StringHashTableHash::StringKey24Hasher; + + typename Data::T3 & getSubMap(size_t, Data & data) + { + return data.m3; + } +}; + +template +struct StringHashTableSubMapSelector<4, false, Data> +{ + using Hash = StringHashTableHash::StringRefHasher; + + typename Data::Ts & getSubMap(size_t, Data & data) + { + return data.ms; + } +}; diff --git a/dbms/src/Common/HashTable/TwoLevelHashTable.h b/dbms/src/Common/HashTable/TwoLevelHashTable.h index 01c14dd07c2..75a5402363d 100644 --- a/dbms/src/Common/HashTable/TwoLevelHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelHashTable.h @@ -60,6 +60,9 @@ class TwoLevelHashTable : private boost::noncopyable static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET; static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1; + static constexpr bool is_string_hash_map = false; + static constexpr bool is_two_level = true; + size_t hash(const Key & x) const { return Hash::operator()(x); } /// NOTE Bad for hash tables with more than 2^32 cells. diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h index 5608d0fd0f8..d217e0c0260 100644 --- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -30,6 +30,9 @@ class TwoLevelStringHashTable : private boost::noncopyable static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET; static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1; + static constexpr bool is_string_hash_map = true; + static constexpr bool is_two_level = true; + template size_t ALWAYS_INLINE hash(const HashKeyType & key) const { @@ -301,3 +304,66 @@ class TwoLevelStringHashTable : private boost::noncopyable return res; } }; + +template +struct StringHashTableSubMapSelector<0, true, Data> +{ + struct Hash + { + static ALWAYS_INLINE size_t operator()(const StringRef & ) { return 0; } + }; + + typename Data::T0 & getSubMap(size_t hashval, Data & data) + { + const auto bucket = Data::getBucketFromHash(hashval); + return data.impls[bucket].m0; + } +}; + +template +struct StringHashTableSubMapSelector<1, true, Data> +{ + using Hash = StringHashTableHash::StringKey8Hasher; + + typename Data::T1 & getSubMap(size_t hashval, Data & data) + { + const auto bucket = Data::getBucketFromHash(hashval); + return data.impls[bucket].m1; + } +}; + +template +struct StringHashTableSubMapSelector<2, true, Data> +{ + using Hash = StringHashTableHash::StringKey16Hasher; + + typename Data::T2 & getSubMap(size_t hashval, Data & data) + { + const auto bucket = Data::getBucketFromHash(hashval); + return data.impls[bucket].m2; + } +}; + +template +struct StringHashTableSubMapSelector<3, true, Data> +{ + using Hash = StringHashTableHash::StringKey24Hasher; + + typename Data::T3 & getSubMap(size_t hashval, Data & data) + { + const auto bucket = Data::getBucketFromHash(hashval); + return data.impls[bucket].m3; + } +}; + +template +struct StringHashTableSubMapSelector<4, true, Data> +{ + using Hash = StringHashTableHash::StringRefHasher; + + typename Data::Ts & getSubMap(size_t hashval, Data & data) + { + const auto bucket = Data::getBucketFromHash(hashval); + return data.impls[bucket].ms; + } +}; diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 180799bd7ed..54cf52c673d 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -695,13 +695,9 @@ std::optional::Res try { if constexpr (only_lookup) - { return state.template findKey(method.data, index, aggregates_pool, sort_key_containers, hashvals); - } else - { return state.template emplaceKey(method.data, index, aggregates_pool, sort_key_containers, hashvals); - } } catch (ResizeException &) { @@ -709,6 +705,73 @@ std::optional::Res } } +// StringKeyType can be StringRef/StringKey8/StringKey16/StringKey24/ArenaKeyHolder. +// return true when resize exception happens. +template +bool Aggregator::emplaceOrFindStringKey( + typename Method::Data & data, + typename Method::State & state, + const std::vector & key_infos, + const std::vector & key_datas, + Arena & aggregates_pool, + std::vector & places, + AggProcessInfo & agg_process_info) const +{ + RUNTIME_CHECK(key_infos.size() == key_datas.size()); + + using Hash = typename StringHashTableSubMapSelector>::Hash; + std::vector hashvals(key_infos.size(), 0); + for (size_t i = 0; i < key_infos.size(); ++i) + { + hashvals[i] = Hash::operator()(keyHolderGetKey(key_datas[0])); + } + + AggregateDataPtr agg_state = nullptr; + for (size_t i = 0; i < key_infos.size(); ++i) + { + try + { + if constexpr (only_lookup) + { + auto find_result = state.template findStringKey(data, i, key_datas, hashvals); + if (find_result.isFound()) + { + agg_state = find_result.getMapped(); + } + else + { + agg_process_info.not_found_rows.push_back(key_infos[i]); + } + } + else + { + auto emplace_result = state.template emplaceStringKey(data, i, key_datas, hashvals); + if (emplace_result.isInserted()) + { + emplace_result.setMapped(nullptr); + + agg_state = aggregates_pool.alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(agg_state); + + emplace_result.setMapped(agg_state); + } + else + { + agg_state = emplace_result.getMapped(); + } + places.push_back(agg_state); + } + } + catch (ResizeException &) + { + // agg_process_info.set + // TODO handle exception + return true; + } + } + return false; +} + template ALWAYS_INLINE void Aggregator::executeImplBatch( Method & method, @@ -721,10 +784,10 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( std::vector sort_key_containers; sort_key_containers.resize(params.keys_size, ""); - size_t agg_size = agg_process_info.end_row - agg_process_info.start_row; + size_t rows = agg_process_info.end_row - agg_process_info.start_row; fiu_do_on(FailPoints::force_agg_on_partial_block, { - if (agg_size > 0 && agg_process_info.start_row == 0) - agg_size = std::max(agg_size / 2, 1); + if (rows > 0 && agg_process_info.start_row == 0) + rows = std::max(rows / 2, 1); }); /// Optimization for special case when there are no aggregate functions. @@ -745,7 +808,7 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( } - for (size_t i = 0; i < agg_size; ++i) + for (size_t i = 0; i < rows; ++i) { auto emplace_result_hold = emplaceOrFindKey( method, @@ -789,7 +852,7 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( { inst->batch_that->addBatchLookupTable8( agg_process_info.start_row, - agg_size, + rows, reinterpret_cast(method.data.data()), inst->state_offset, [&](AggregateDataPtr & aggregate_data) { @@ -801,12 +864,12 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( inst->batch_arguments, aggregates_pool); } - agg_process_info.start_row += agg_size; + agg_process_info.start_row += rows; // For key8, assume all rows are hit. No need to do state switch for auto pass through hashagg. // Because HashMap of key8 is basically a vector of size 256. if constexpr (collect_hit_rate) - agg_process_info.hit_row_cnt = agg_size; + agg_process_info.hit_row_cnt = rows; // Because all rows are hit, so state will not switch to Selective. if constexpr (only_lookup) @@ -815,8 +878,7 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( } /// Generic case. - - std::unique_ptr places(new AggregateDataPtr[agg_size]); + std::unique_ptr places(new AggregateDataPtr[rows]); std::optional processed_rows; std::vector hashvals; if constexpr (enable_prefetch) @@ -830,7 +892,7 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( aggregates_pool); } - for (size_t i = agg_process_info.start_row; i < agg_process_info.start_row + agg_size; ++i) + for (size_t i = agg_process_info.start_row; i < agg_process_info.start_row + rows; ++i) { AggregateDataPtr aggregate_data = nullptr; @@ -899,6 +961,153 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( } } +// Emplace key into StringHashMap/TwoLevelStringHashMap is seperated from other situations, +// because it's easy to implement prefetch submap directly. +// TODO not support resize execption +template +ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( + Method & method, + typename Method::State & state, + Arena * aggregates_pool, + AggProcessInfo & agg_process_info) const +{ + // collect_hit_rate and only_lookup cannot be true at the same time. + static_assert(!(collect_hit_rate && only_lookup)); + static_assert(Method::Data::isStringHashMap); + + std::vector sort_key_containers; + sort_key_containers.resize(params.keys_size, ""); + + const size_t rows = agg_process_info.end_row = agg_process_info.start_row; + RUNTIME_CHECK_MSG(rows == state.total_rows, "executeImplBatchStringHashMap only handle resize exception for each Block instead of row"); + const size_t reserve_size = rows / 4; + + std::vector key0_infos; + std::vector key0_datas; + key0_infos.reserve(reserve_size); + key0_datas.reserve(reserve_size); + + std::vector key8_infos; + std::vector key8_datas; + key8_infos.reserve(reserve_size); + key8_datas.reserve(reserve_size); + + std::vector key16_infos; + std::vector key16_datas; + key16_infos.reserve(reserve_size); + key16_datas.reserve(reserve_size); + + std::vector key24_infos; + std::vector key24_datas; + key24_infos.reserve(reserve_size); + key24_datas.reserve(reserve_size); + + std::vector key_str_infos; + std::vector key_str_datas; + key_str_infos.reserve(reserve_size); + key_str_datas.reserve(reserve_size); + + auto dispatch_callback_key0 = [&key0_infos, &key0_datas](const StringRef & key, size_t row) { + key0_infos.push_back(row); + key0_datas.push_back(key); + }; + auto dispatch_callback_key8 = [&key8_infos, &key8_datas](const StringKey8 & key, size_t row) { + key8_infos.push_back(row); + key8_datas.push_back(key); + }; + auto dispatch_callback_key16 = [&key16_infos, &key16_datas](const StringKey16 & key, size_t row) { + key16_infos.push_back(row); + key16_datas.push_back(key); + }; + auto dispatch_callback_key24 = [&key24_infos, &key24_datas](const StringKey24 & key, size_t row) { + key24_infos.push_back(row); + key24_datas.push_back(key); + }; + // Argument type is ArenaKeyHolder instead of StringRef, + // because it will only be persisted when insert into HashTable. + auto dispatch_callback_key_str = [&key_str_infos, &key_str_datas](const ArenaKeyHolder & key, size_t row) { + key_str_infos.push_back(row); + key_str_datas.push_back(key); + }; + for (size_t i = 0; i < rows; ++i) + { + auto key_holder = state.getKeyHolder(i, aggregates_pool, sort_key_containers); + dispatchStringHashTable(key_holder, + dispatch_callback_key0, + dispatch_callback_key8, + dispatch_callback_key16, + dispatch_callback_key24, + dispatch_callback_key_str); + } + + std::vector key0_places; + key0_places.reserve(key0_infos.size()); + + std::vector key8_places; + key8_places.reserve(key8_infos.size()); + + std::vector key16_places; + key16_places.reserve(key16_infos.size()); + + std::vector key24_places; + key24_places.reserve(key24_infos.size()); + + std::vector key_str_places; + key_str_places.reserve(key_str_infos.size()); + + if (!key0_infos.empty()) + { + emplaceOrFindStringKey<0, false>(method.data, state, key0_infos, key0_datas, aggregates_pool, key0_places, agg_process_info); + } + +#define M(INDEX, INFO, DATA, PLACES) \ + if (!(INFO).empty()) \ + { \ + if constexpr (enable_prefetch) \ + emplaceOrFindStringKey(method.data, state, INFO, DATA, aggregates_pool, PLACES, agg_process_info); \ + else \ + emplaceOrFindStringKey(method.data, state, INFO, DATA, aggregates_pool, PLACES, agg_process_info); \ + } + + M(1, key8_infos, key8_datas, key8_places) + M(2, key16_infos, key16_datas, key16_places) + M(3, key24_infos, key24_datas, key24_places) + M(4, key_str_infos, key_str_datas, key_str_places) +#undef M + + RUNTIME_CHECK(rows == key0_places.size() + key8_places.size() + key16_places.size() + key24_places.size() + key_str_places.size()); + + std::vector places(rows, nullptr); + +#define M(INFO, PLACES) \ + for (size_t i = 0; i < (INFO).size(); ++i) \ + { \ + const auto row = (INFO)[i]; \ + places[row] = (PLACES)[i]; \ + } + + M(key0_infos, key0_places) + M(key8_infos, key8_places) + M(key16_infos, key16_places) + M(key24_infos, key24_places) + M(key_str_infos, key_str_places) +#undef M + + + for (AggregateFunctionInstruction * inst = agg_process_info.aggregate_functions_instructions.data(); inst->that; + ++inst) + { + inst->batch_that->addBatch( + agg_process_info.start_row, + rows, + &places[0], + inst->state_offset, + inst->batch_arguments, + aggregates_pool); + } + agg_process_info.start_row = rows; +} + void NO_INLINE Aggregator::executeWithoutKeyImpl(AggregatedDataWithoutKey & res, AggProcessInfo & agg_process_info, Arena * arena) { diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 0f1365694ac..6cbbde71b41 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -1461,6 +1461,13 @@ class Aggregator Arena * aggregates_pool, AggProcessInfo & agg_process_info) const; + template + void executeImplBatchStringHashMap( + Method & method, + typename Method::State & state, + Arena * aggregates_pool, + AggProcessInfo & agg_process_info) const; + template std::optional::ResultType> emplaceOrFindKey( Method & method, @@ -1470,6 +1477,16 @@ class Aggregator std::vector & sort_key_containers, const std::vector & hashvals) const; + template + bool emplaceOrFindStringKey( + typename Method::Data & data, + typename Method::State & state, + const std::vector & key_infos, + const std::vector & key_datas, + Arena & aggregates_pool, + std::vector & places, + AggProcessInfo & agg_process_info) const; + /// For case when there are no keys (all aggregate into one row). static void executeWithoutKeyImpl(AggregatedDataWithoutKey & res, AggProcessInfo & agg_process_info, Arena * arena); diff --git a/libs/libcommon/include/common/StringRef.h b/libs/libcommon/include/common/StringRef.h index a87b54a7670..bf1ab026a49 100644 --- a/libs/libcommon/include/common/StringRef.h +++ b/libs/libcommon/include/common/StringRef.h @@ -180,7 +180,7 @@ inline size_t hashLessThan8(const char * data, size_t size) struct CRC32Hash { - size_t operator()(StringRef x) const + static size_t operator()(const StringRef & x) { const char * pos = x.data; size_t size = x.size; From ec6e89231d74bee245cecd6b46f1254bc45b28fe Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Wed, 27 Nov 2024 15:21:31 +0800 Subject: [PATCH 04/24] handle resize exception done Signed-off-by: guo-shaoge --- .../AggregateFunctionGroupUniqArray.h | 6 +- .../src/AggregateFunctions/KeyHolderHelpers.h | 2 +- dbms/src/Common/ColumnsHashing.h | 6 +- dbms/src/Common/ColumnsHashingImpl.h | 32 +- dbms/src/Common/HashTable/Hash.h | 40 +- dbms/src/Common/HashTable/HashTable.h | 2 +- .../src/Common/HashTable/HashTableKeyHolder.h | 8 +- dbms/src/Common/HashTable/StringHashMap.h | 31 +- dbms/src/Common/HashTable/StringHashTable.h | 190 +++++----- .../HashTable/TwoLevelStringHashTable.h | 12 +- dbms/src/Interpreters/Aggregator.cpp | 356 +++++++++++------- dbms/src/Interpreters/Aggregator.h | 52 ++- 12 files changed, 414 insertions(+), 323 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h index 06dd57edf66..d3cbea74195 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -182,18 +182,18 @@ class AggregateFunctionGroupUniqArrayGeneric { // We have to copy the keys to our arena. assert(arena != nullptr); - cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), *arena}, it, inserted); + cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), arena}, it, inserted); } } void insertResultInto(ConstAggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - ColumnArray & arr_to = assert_cast(to); + auto & arr_to = assert_cast(to); ColumnArray::Offsets & offsets_to = arr_to.getOffsets(); IColumn & data_to = arr_to.getData(); auto & set = this->data(place).value; - offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + set.size()); + offsets_to.push_back((offsets_to.empty() ? 0 : offsets_to.back()) + set.size()); for (auto & elem : set) deserializeAndInsert(elem.getValue(), data_to); diff --git a/dbms/src/AggregateFunctions/KeyHolderHelpers.h b/dbms/src/AggregateFunctions/KeyHolderHelpers.h index 6677866f0d3..b8a4ee0def3 100644 --- a/dbms/src/AggregateFunctions/KeyHolderHelpers.h +++ b/dbms/src/AggregateFunctions/KeyHolderHelpers.h @@ -24,7 +24,7 @@ inline auto getKeyHolder(const IColumn & column, size_t row_num, Arena & arena) { if constexpr (is_plain_column) { - return ArenaKeyHolder{column.getDataAt(row_num), arena}; + return ArenaKeyHolder{column.getDataAt(row_num), &arena}; } else { diff --git a/dbms/src/Common/ColumnsHashing.h b/dbms/src/Common/ColumnsHashing.h index e14a793567c..aabe0733f8c 100644 --- a/dbms/src/Common/ColumnsHashing.h +++ b/dbms/src/Common/ColumnsHashing.h @@ -135,7 +135,7 @@ struct HashMethodString { if (likely(collator)) key = collator->sortKey(key.data, key.size, sort_key_containers[0]); - return ArenaKeyHolder{key, *pool}; + return ArenaKeyHolder{key, pool}; } else { @@ -172,7 +172,7 @@ struct HashMethodStringBin auto last_offset = row == 0 ? 0 : offsets[row - 1]; StringRef key(chars + last_offset, offsets[row] - last_offset - 1); key = BinCollatorSortKey(key.data, key.size); - return ArenaKeyHolder{key, *pool}; + return ArenaKeyHolder{key, pool}; } protected: @@ -425,7 +425,7 @@ struct HashMethodFixedString if constexpr (place_string_to_arena) { - return ArenaKeyHolder{key, *pool}; + return ArenaKeyHolder{key, pool}; } else { diff --git a/dbms/src/Common/ColumnsHashingImpl.h b/dbms/src/Common/ColumnsHashingImpl.h index 0c8d0bc1a49..aa583f1a722 100644 --- a/dbms/src/Common/ColumnsHashingImpl.h +++ b/dbms/src/Common/ColumnsHashingImpl.h @@ -16,8 +16,8 @@ #include #include -#include #include +#include #include #include #include @@ -174,17 +174,19 @@ class HashMethodBase { return findKeyImpl(keyHolderGetKey(key_holder), data, 0); } - } + // TODO emplaceStringKey merge with emplaceKey? template ALWAYS_INLINE inline EmplaceResult emplaceStringKey( - Data & data, - size_t idx, - const std::vector & datas, - const std::vector & hashvals) + Data & data, + size_t idx, + std::vector & datas, // TODO const + const std::vector & hashvals) { - auto & submap = typename StringHashTableSubMapSelector>::getSubMap(data); + auto & submap = StringHashTableSubMapSelector>::getSubMap( + hashvals[idx], + data); if constexpr (enable_prefetch) { const auto prefetch_idx = idx + prefetch_step; @@ -198,12 +200,14 @@ class HashMethodBase // TODO Macro with emplaceStringKey template ALWAYS_INLINE inline FindResult findStringKey( - Data & data, - size_t idx, - const std::vector & datas, - const std::vector & hashvals) + Data & data, + size_t idx, + std::vector & datas, // TODO const + const std::vector & hashvals) { - auto & submap = typename StringHashTableSubMapSelector>::getSubMap(data); + auto & submap = StringHashTableSubMapSelector>::getSubMap( + hashvals[idx], + data); if constexpr (enable_prefetch) { const auto prefetch_idx = idx + prefetch_step; @@ -211,7 +215,7 @@ class HashMethodBase submap.prefetch(hashvals[prefetch_idx]); } - return findKeyImpl(datas[idx], submap, hashvals[idx]); + return findKeyImpl(keyHolderGetKey(datas[idx]), submap, hashvals[idx]); } template @@ -301,7 +305,7 @@ class HashMethodBase } template - ALWAYS_INLINE inline FindResult findKeyImpl(Key key, Data & data, size_t hashval) + ALWAYS_INLINE inline FindResult findKeyImpl(Key & key, Data & data, size_t hashval) { if constexpr (Cache::consecutive_keys_optimization) { diff --git a/dbms/src/Common/HashTable/Hash.h b/dbms/src/Common/HashTable/Hash.h index 883ec8ab6ff..207919a347e 100644 --- a/dbms/src/Common/HashTable/Hash.h +++ b/dbms/src/Common/HashTable/Hash.h @@ -130,8 +130,8 @@ inline DB::UInt64 wideIntHashCRC32(const T & x, DB::UInt64 updated_value) return updated_value; } static_assert( - DB::IsDecimal< - T> || is_boost_number_v || std::is_same_v || std::is_same_v || std::is_same_v); + DB::IsDecimal || is_boost_number_v || std::is_same_v || std::is_same_v + || std::is_same_v); __builtin_unreachable(); } @@ -244,8 +244,8 @@ inline size_t defaultHash64(const std::enable_if_t, T> & key return boost::multiprecision::hash_value(key); } static_assert( - is_boost_number_v< - T> || std::is_same_v || std::is_same_v || std::is_same_v); + is_boost_number_v || std::is_same_v || std::is_same_v + || std::is_same_v); __builtin_unreachable(); } @@ -297,20 +297,26 @@ inline size_t hashCRC32(const std::enable_if_t, T> & key) template struct HashCRC32; -#define DEFINE_HASH(T) \ - template <> \ - struct HashCRC32 \ - { \ - static_assert(is_fit_register); \ - size_t operator()(T key) const { return hashCRC32(key); } \ +#define DEFINE_HASH(T) \ + template <> \ + struct HashCRC32 \ + { \ + static_assert(is_fit_register); \ + size_t operator()(T key) const \ + { \ + return hashCRC32(key); \ + } \ }; -#define DEFINE_HASH_WIDE(T) \ - template <> \ - struct HashCRC32 \ - { \ - static_assert(!is_fit_register); \ - size_t operator()(const T & key) const { return hashCRC32(key); } \ +#define DEFINE_HASH_WIDE(T) \ + template <> \ + struct HashCRC32 \ + { \ + static_assert(!is_fit_register); \ + size_t operator()(const T & key) const \ + { \ + return hashCRC32(key); \ + } \ }; DEFINE_HASH(DB::UInt8) @@ -535,7 +541,7 @@ struct HashWithMixSeed template <> struct HashWithMixSeed -{ +{ static inline size_t operator()(const DB::UInt256 & v) { return HashWithMixSeedHelper::operator()(hash_uint256(0, v)); diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index 12ebc49756c..f8d44e8c406 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -859,7 +859,7 @@ class HashTable (void)hashval; #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) size_t place_value = grower.place(hashval); - __mm_prefetch((const char*)(&buf[place_value]), _MM_HINT_NTA); + __mm_prefetch((const char *)(&buf[place_value]), _MM_HINT_NTA); #elif defined(__GNUC__) size_t place_value = grower.place(hashval); __builtin_prefetch(static_cast(&buf[place_value])); diff --git a/dbms/src/Common/HashTable/HashTableKeyHolder.h b/dbms/src/Common/HashTable/HashTableKeyHolder.h index 01b06dce87d..dd8a4b53376 100644 --- a/dbms/src/Common/HashTable/HashTableKeyHolder.h +++ b/dbms/src/Common/HashTable/HashTableKeyHolder.h @@ -91,8 +91,8 @@ namespace DB */ struct ArenaKeyHolder { - StringRef key; - Arena & pool; + StringRef key{}; + Arena * pool = nullptr; }; } // namespace DB @@ -111,14 +111,14 @@ inline void ALWAYS_INLINE keyHolderPersistKey(DB::ArenaKeyHolder & holder) { // Hash table shouldn't ask us to persist a zero key assert(holder.key.size > 0); - holder.key.data = holder.pool.insert(holder.key.data, holder.key.size); + holder.key.data = holder.pool->insert(holder.key.data, holder.key.size); } inline void ALWAYS_INLINE keyHolderPersistKey(DB::ArenaKeyHolder && holder) { // Hash table shouldn't ask us to persist a zero key assert(holder.key.size > 0); - holder.key.data = holder.pool.insert(holder.key.data, holder.key.size); + holder.key.data = holder.pool->insert(holder.key.data, holder.key.size); } inline void ALWAYS_INLINE keyHolderDiscardKey(DB::ArenaKeyHolder &) {} diff --git a/dbms/src/Common/HashTable/StringHashMap.h b/dbms/src/Common/HashTable/StringHashMap.h index cad653907fa..a070f0ef0a9 100644 --- a/dbms/src/Common/HashTable/StringHashMap.h +++ b/dbms/src/Common/HashTable/StringHashMap.h @@ -92,30 +92,13 @@ struct StringHashMapSubMaps { using Hash = StringHashTableHash; using T0 = StringHashTableEmpty>; - using T1 = HashMapTable< - StringKey8, - StringHashMapCell, - Hash, - StringHashTableGrower<>, - Allocator>; - using T2 = HashMapTable< - StringKey16, - StringHashMapCell, - Hash, - StringHashTableGrower<>, - Allocator>; - using T3 = HashMapTable< - StringKey24, - StringHashMapCell, - Hash, - StringHashTableGrower<>, - Allocator>; - using Ts = HashMapTable< - StringRef, - StringHashMapCell, - Hash, - StringHashTableGrower<>, - Allocator>; + using T1 + = HashMapTable, Hash, StringHashTableGrower<>, Allocator>; + using T2 + = HashMapTable, Hash, StringHashTableGrower<>, Allocator>; + using T3 + = HashMapTable, Hash, StringHashTableGrower<>, Allocator>; + using Ts = HashMapTable, Hash, StringHashTableGrower<>, Allocator>; }; template diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h index f906b043a9e..a511ce47671 100644 --- a/dbms/src/Common/HashTable/StringHashTable.h +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -72,18 +72,9 @@ struct StringHashTableHash using StringKey24Hasher = HashWithMixSeed; using StringRefHasher = StringRefHash; - static size_t ALWAYS_INLINE operator()(StringKey8 key) - { - return StringKey8Hasher::operator()(key); - } - static size_t ALWAYS_INLINE operator()(const StringKey16 & key) - { - return StringKey16Hasher::operator()(key); - } - static size_t ALWAYS_INLINE operator()(const StringKey24 & key) - { - return StringKey24Hasher::operator()(key); - } + static size_t ALWAYS_INLINE operator()(StringKey8 key) { return StringKey8Hasher::operator()(key); } + static size_t ALWAYS_INLINE operator()(const StringKey16 & key) { return StringKey16Hasher::operator()(key); } + static size_t ALWAYS_INLINE operator()(const StringKey24 & key) { return StringKey24Hasher::operator()(key); } static size_t ALWAYS_INLINE operator()(const StringRef & key) { return StringRefHasher::operator()(key); } }; @@ -191,97 +182,106 @@ struct StringHashTableLookupResult friend bool operator!=(const std::nullptr_t &, const StringHashTableLookupResult & b) { return b.mapped_ptr; } }; - template - static auto +template +static auto #if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) - NO_INLINE NO_SANITIZE_ADDRESS NO_SANITIZE_THREAD + NO_INLINE NO_SANITIZE_ADDRESS NO_SANITIZE_THREAD #else - ALWAYS_INLINE + ALWAYS_INLINE #endif - dispatchStringHashTable(size_t row, KeyHolder && key_holder, Func0 && func0, Func8 && func8, Func16 && func16, Func24 && func24, FuncStr && func_str) + dispatchStringHashTable( + size_t row, + KeyHolder && key_holder, + Func0 && func0, + Func8 && func8, + Func16 && func16, + Func24 && func24, + FuncStr && func_str) +{ + const StringRef & x = keyHolderGetKey(key_holder); + const size_t sz = x.size; + if (sz == 0) { - const StringRef & x = keyHolderGetKey(key_holder); - const size_t sz = x.size; - if (sz == 0) - { - return func0(x, row); - } + return func0(x, row); + } - if (x.data[sz - 1] == 0) - { - // Strings with trailing zeros are not representable as fixed-size - // string keys. Put them to the generic table. - return func_str(key_holder, row); - } + if (x.data[sz - 1] == 0) + { + // Strings with trailing zeros are not representable as fixed-size + // string keys. Put them to the generic table. + return func_str(key_holder, row); + } - const char * p = x.data; - // pending bits that needs to be shifted out - const char s = (-sz & 7) * 8; - union - { - StringKey8 k8; - StringKey16 k16; - StringKey24 k24; - UInt64 n[3]; - }; - switch ((sz - 1) >> 3) - { - case 0: // 1..8 bytes - { - // first half page - if ((reinterpret_cast(p) & 2048) == 0) - { - memcpy(&n[0], p, 8); - if constexpr (DB::isLittleEndian()) - n[0] &= (-1ULL >> s); - else - n[0] &= (-1ULL << s); - } - else - { - const char * lp = x.data + x.size - 8; - memcpy(&n[0], lp, 8); - if constexpr (DB::isLittleEndian()) - n[0] >>= s; - else - n[0] <<= s; - } - return func8(k8, row); - } - case 1: // 9..16 bytes + const char * p = x.data; + // pending bits that needs to be shifted out + const char s = (-sz & 7) * 8; + union + { + StringKey8 k8; + StringKey16 k16; + StringKey24 k24; + UInt64 n[3]; + }; + switch ((sz - 1) >> 3) + { + case 0: // 1..8 bytes + { + // first half page + if ((reinterpret_cast(p) & 2048) == 0) { memcpy(&n[0], p, 8); - const char * lp = x.data + x.size - 8; - memcpy(&n[1], lp, 8); if constexpr (DB::isLittleEndian()) - n[1] >>= s; + n[0] &= (-1ULL >> s); else - n[1] <<= s; - return func16(k16, row); + n[0] &= (-1ULL << s); } - case 2: // 17..24 bytes + else { - memcpy(&n[0], p, 16); const char * lp = x.data + x.size - 8; - memcpy(&n[2], lp, 8); + memcpy(&n[0], lp, 8); if constexpr (DB::isLittleEndian()) - n[2] >>= s; + n[0] >>= s; else - n[2] <<= s; - return func24(k24, row); - } - default: // >= 25 bytes - { - return func_str(key_holder, row); - } + n[0] <<= s; } + return func8(k8, row); + } + case 1: // 9..16 bytes + { + memcpy(&n[0], p, 8); + const char * lp = x.data + x.size - 8; + memcpy(&n[1], lp, 8); + if constexpr (DB::isLittleEndian()) + n[1] >>= s; + else + n[1] <<= s; + return func16(k16, row); + } + case 2: // 17..24 bytes + { + memcpy(&n[0], p, 16); + const char * lp = x.data + x.size - 8; + memcpy(&n[2], lp, 8); + if constexpr (DB::isLittleEndian()) + n[2] >>= s; + else + n[2] <<= s; + return func24(k24, row); + } + default: // >= 25 bytes + { + return func_str(key_holder, row); } + } +} template class StringHashTable : private boost::noncopyable { protected: static constexpr size_t NUM_MAPS = 5; + using Self = StringHashTable; + // Map for storing empty string using T0 = typename SubMaps::T0; @@ -292,10 +292,11 @@ class StringHashTable : private boost::noncopyable // Long strings are stored as StringRef along with saved hash using Ts = typename SubMaps::Ts; - using Self = StringHashTable; template friend class TwoLevelStringHashTable; + template + friend struct StringHashTableSubMapSelector; T0 m0; T1 m1; @@ -592,13 +593,10 @@ struct StringHashTableSubMapSelector<0, false, Data> { struct Hash { - static ALWAYS_INLINE size_t operator()(const StringRef & ) { return 0; } + static ALWAYS_INLINE size_t operator()(const StringRef &) { return 0; } }; - typename Data::T0 & getSubMap(size_t, Data & data) - { - return data.m0; - } + static typename Data::T0 & getSubMap(size_t, Data & data) { return data.m0; } }; template @@ -606,10 +604,7 @@ struct StringHashTableSubMapSelector<1, false, Data> { using Hash = StringHashTableHash::StringKey8Hasher; - typename Data::T1 & getSubMap(size_t, Data & data) - { - return data.m1; - } + static typename Data::T1 & getSubMap(size_t, Data & data) { return data.m1; } }; template @@ -617,10 +612,7 @@ struct StringHashTableSubMapSelector<2, false, Data> { using Hash = StringHashTableHash::StringKey16Hasher; - typename Data::T2 & getSubMap(size_t, Data & data) - { - return data.m2; - } + static typename Data::T2 & getSubMap(size_t, Data & data) { return data.m2; } }; template @@ -628,10 +620,7 @@ struct StringHashTableSubMapSelector<3, false, Data> { using Hash = StringHashTableHash::StringKey24Hasher; - typename Data::T3 & getSubMap(size_t, Data & data) - { - return data.m3; - } + static typename Data::T3 & getSubMap(size_t, Data & data) { return data.m3; } }; template @@ -639,8 +628,5 @@ struct StringHashTableSubMapSelector<4, false, Data> { using Hash = StringHashTableHash::StringRefHasher; - typename Data::Ts & getSubMap(size_t, Data & data) - { - return data.ms; - } + static typename Data::Ts & getSubMap(size_t, Data & data) { return data.ms; } }; diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h index d217e0c0260..e7ea1bb8fce 100644 --- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -310,10 +310,10 @@ struct StringHashTableSubMapSelector<0, true, Data> { struct Hash { - static ALWAYS_INLINE size_t operator()(const StringRef & ) { return 0; } + static ALWAYS_INLINE size_t operator()(const StringRef &) { return 0; } }; - typename Data::T0 & getSubMap(size_t hashval, Data & data) + static typename Data::Impl::T0 & getSubMap(size_t hashval, Data & data) { const auto bucket = Data::getBucketFromHash(hashval); return data.impls[bucket].m0; @@ -325,7 +325,7 @@ struct StringHashTableSubMapSelector<1, true, Data> { using Hash = StringHashTableHash::StringKey8Hasher; - typename Data::T1 & getSubMap(size_t hashval, Data & data) + static typename Data::Impl::T1 & getSubMap(size_t hashval, Data & data) { const auto bucket = Data::getBucketFromHash(hashval); return data.impls[bucket].m1; @@ -337,7 +337,7 @@ struct StringHashTableSubMapSelector<2, true, Data> { using Hash = StringHashTableHash::StringKey16Hasher; - typename Data::T2 & getSubMap(size_t hashval, Data & data) + static typename Data::Impl::T2 & getSubMap(size_t hashval, Data & data) { const auto bucket = Data::getBucketFromHash(hashval); return data.impls[bucket].m2; @@ -349,7 +349,7 @@ struct StringHashTableSubMapSelector<3, true, Data> { using Hash = StringHashTableHash::StringKey24Hasher; - typename Data::T3 & getSubMap(size_t hashval, Data & data) + static typename Data::Impl::T3 & getSubMap(size_t hashval, Data & data) { const auto bucket = Data::getBucketFromHash(hashval); return data.impls[bucket].m3; @@ -361,7 +361,7 @@ struct StringHashTableSubMapSelector<4, true, Data> { using Hash = StringHashTableHash::StringRefHasher; - typename Data::Ts & getSubMap(size_t hashval, Data & data) + static typename Data::Impl::Ts & getSubMap(size_t hashval, Data & data) { const auto bucket = Data::getBucketFromHash(hashval); return data.impls[bucket].ms; diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 54cf52c673d..4faec37ce9d 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -666,14 +666,37 @@ void NO_INLINE Aggregator::executeImpl( typename Method::State state(agg_process_info.key_columns, key_sizes, collators); if (method.data.getBufferSizeInCells() < 8192) - executeImplBatch(method, state, aggregates_pool, agg_process_info); + { + if constexpr (Method::Data::is_string_hash_map) + executeImplBatchStringHashMap( + method, + state, + aggregates_pool, + agg_process_info); + else + executeImplBatch(method, state, aggregates_pool, agg_process_info); + } else - executeImplBatch(method, state, aggregates_pool, agg_process_info); + { + if constexpr (Method::Data::is_string_hash_map) + executeImplBatchStringHashMap( + method, + state, + aggregates_pool, + agg_process_info); + else + executeImplBatch(method, state, aggregates_pool, agg_process_info); + } } template -std::vector getHashVals(size_t start_row, size_t end_row, const Data & data, const State & state, - std::vector & sort_key_containers, Arena * pool) +std::vector getHashVals( + size_t start_row, + size_t end_row, + const Data & data, + const State & state, + std::vector & sort_key_containers, + Arena * pool) { std::vector hashvals(state.total_rows, 0); for (size_t i = start_row; i < end_row; ++i) @@ -695,9 +718,15 @@ std::optional::Res try { if constexpr (only_lookup) - return state.template findKey(method.data, index, aggregates_pool, sort_key_containers, hashvals); + return state + .template findKey(method.data, index, aggregates_pool, sort_key_containers, hashvals); else - return state.template emplaceKey(method.data, index, aggregates_pool, sort_key_containers, hashvals); + return state.template emplaceKey( + method.data, + index, + aggregates_pool, + sort_key_containers, + hashvals); } catch (ResizeException &) { @@ -707,19 +736,27 @@ std::optional::Res // StringKeyType can be StringRef/StringKey8/StringKey16/StringKey24/ArenaKeyHolder. // return true when resize exception happens. -template -bool Aggregator::emplaceOrFindStringKey( - typename Method::Data & data, - typename Method::State & state, - const std::vector & key_infos, - const std::vector & key_datas, - Arena & aggregates_pool, - std::vector & places, - AggProcessInfo & agg_process_info) const +template < + size_t SubMapIndex, + bool collect_hit_rate, + bool only_lookup, + bool enable_prefetch, + typename Data, + typename State, + typename StringKeyType> +size_t Aggregator::emplaceOrFindStringKey( + Data & data, + State & state, + const std::vector & key_infos, + std::vector & key_datas, // TODO const + Arena & aggregates_pool, + std::vector & places, + AggProcessInfo & agg_process_info) const { + static_assert(!(collect_hit_rate && only_lookup)); RUNTIME_CHECK(key_infos.size() == key_datas.size()); - using Hash = typename StringHashTableSubMapSelector>::Hash; + using Hash = typename StringHashTableSubMapSelector>::Hash; std::vector hashvals(key_infos.size(), 0); for (size_t i = 0; i < key_infos.size(); ++i) { @@ -733,7 +770,8 @@ bool Aggregator::emplaceOrFindStringKey( { if constexpr (only_lookup) { - auto find_result = state.template findStringKey(data, i, key_datas, hashvals); + auto find_result + = state.template findStringKey(data, i, key_datas, hashvals); if (find_result.isFound()) { agg_state = find_result.getMapped(); @@ -745,7 +783,8 @@ bool Aggregator::emplaceOrFindStringKey( } else { - auto emplace_result = state.template emplaceStringKey(data, i, key_datas, hashvals); + auto emplace_result + = state.template emplaceStringKey(data, i, key_datas, hashvals); if (emplace_result.isInserted()) { emplace_result.setMapped(nullptr); @@ -758,18 +797,19 @@ bool Aggregator::emplaceOrFindStringKey( else { agg_state = emplace_result.getMapped(); + + if constexpr (collect_hit_rate) + ++agg_process_info.hit_row_cnt; } - places.push_back(agg_state); + places[i] = agg_state; } } catch (ResizeException &) { - // agg_process_info.set - // TODO handle exception - return true; + return i; } } - return false; + return key_infos.size(); } template @@ -799,13 +839,12 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( if constexpr (enable_prefetch) { hashvals = getHashVals( - agg_process_info.start_row, - agg_process_info.end_row, - method.data, - state, - sort_key_containers, - aggregates_pool); - + agg_process_info.start_row, + agg_process_info.end_row, + method.data, + state, + sort_key_containers, + aggregates_pool); } for (size_t i = 0; i < rows; ++i) @@ -884,20 +923,25 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( if constexpr (enable_prefetch) { hashvals = getHashVals( - agg_process_info.start_row, - agg_process_info.end_row, - method.data, - state, - sort_key_containers, - aggregates_pool); + agg_process_info.start_row, + agg_process_info.end_row, + method.data, + state, + sort_key_containers, + aggregates_pool); } for (size_t i = agg_process_info.start_row; i < agg_process_info.start_row + rows; ++i) { AggregateDataPtr aggregate_data = nullptr; - auto emplace_result_holder - = emplaceOrFindKey(method, state, i, *aggregates_pool, sort_key_containers, hashvals); + auto emplace_result_holder = emplaceOrFindKey( + method, + state, + i, + *aggregates_pool, + sort_key_containers, + hashvals); if unlikely (!emplace_result_holder.has_value()) { LOG_INFO(log, "HashTable resize throw ResizeException since the data is already marked for spill"); @@ -961,129 +1005,174 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( } } +#define M(SUBMAPINDEX) \ + template \ + void setupExceptionRecoveryInfoForStringHashTable( \ + Aggregator::AggProcessInfo & agg_process_info, \ + size_t row, \ + const std::vector & key_infos, \ + const std::vector & key_datas, \ + std::integral_constant) \ + { \ + agg_process_info.submap_m##SUBMAPINDEX##_infos \ + = std::vector(key_infos.begin() + row, key_infos.end()); \ + agg_process_info.submap_m##SUBMAPINDEX##_datas \ + = std::vector(key_datas.begin() + row, key_datas.end()); \ + } + +M(0) +M(1) +M(2) +M(3) +M(4) + +#undef M + // Emplace key into StringHashMap/TwoLevelStringHashMap is seperated from other situations, // because it's easy to implement prefetch submap directly. -// TODO not support resize execption template ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( - Method & method, - typename Method::State & state, - Arena * aggregates_pool, - AggProcessInfo & agg_process_info) const + Method & method, + typename Method::State & state, + Arena * aggregates_pool, + AggProcessInfo & agg_process_info) const { // collect_hit_rate and only_lookup cannot be true at the same time. static_assert(!(collect_hit_rate && only_lookup)); - static_assert(Method::Data::isStringHashMap); + static_assert(Method::Data::is_string_hash_map); + +#define M(SUBMAPINDEX) \ + RUNTIME_CHECK( \ + agg_process_info.submap_m##SUBMAPINDEX##_infos.size() \ + == agg_process_info.submap_m##SUBMAPINDEX##_datas.size()); + + M(0) + M(1) + M(2) + M(3) + M(4) +#undef M std::vector sort_key_containers; sort_key_containers.resize(params.keys_size, ""); - const size_t rows = agg_process_info.end_row = agg_process_info.start_row; - RUNTIME_CHECK_MSG(rows == state.total_rows, "executeImplBatchStringHashMap only handle resize exception for each Block instead of row"); - const size_t reserve_size = rows / 4; - - std::vector key0_infos; - std::vector key0_datas; - key0_infos.reserve(reserve_size); - key0_datas.reserve(reserve_size); - - std::vector key8_infos; - std::vector key8_datas; - key8_infos.reserve(reserve_size); - key8_datas.reserve(reserve_size); - - std::vector key16_infos; - std::vector key16_datas; - key16_infos.reserve(reserve_size); - key16_datas.reserve(reserve_size); - - std::vector key24_infos; - std::vector key24_datas; - key24_infos.reserve(reserve_size); - key24_datas.reserve(reserve_size); - - std::vector key_str_infos; - std::vector key_str_datas; - key_str_infos.reserve(reserve_size); - key_str_datas.reserve(reserve_size); - - auto dispatch_callback_key0 = [&key0_infos, &key0_datas](const StringRef & key, size_t row) { - key0_infos.push_back(row); - key0_datas.push_back(key); - }; - auto dispatch_callback_key8 = [&key8_infos, &key8_datas](const StringKey8 & key, size_t row) { - key8_infos.push_back(row); - key8_datas.push_back(key); - }; - auto dispatch_callback_key16 = [&key16_infos, &key16_datas](const StringKey16 & key, size_t row) { - key16_infos.push_back(row); - key16_datas.push_back(key); - }; - auto dispatch_callback_key24 = [&key24_infos, &key24_datas](const StringKey24 & key, size_t row) { - key24_infos.push_back(row); - key24_datas.push_back(key); - }; - // Argument type is ArenaKeyHolder instead of StringRef, - // because it will only be persisted when insert into HashTable. - auto dispatch_callback_key_str = [&key_str_infos, &key_str_datas](const ArenaKeyHolder & key, size_t row) { - key_str_infos.push_back(row); - key_str_datas.push_back(key); - }; - for (size_t i = 0; i < rows; ++i) +#define M(INFO, DATA, KEYTYPE) \ + std::vector(INFO); \ + std::vector(DATA); + + M(key0_infos, key0_datas, StringRef) + M(key8_infos, key8_datas, StringKey8) + M(key16_infos, key16_datas, StringKey16) + M(key24_infos, key24_datas, StringKey24) + M(key_str_infos, key_str_datas, ArenaKeyHolder) +#undef M + + const size_t rows = agg_process_info.end_row - agg_process_info.start_row; + + if likely (agg_process_info.allBlockDataHandled()) { - auto key_holder = state.getKeyHolder(i, aggregates_pool, sort_key_containers); - dispatchStringHashTable(key_holder, + // No resize exception happens, so this is a new Block. + RUNTIME_CHECK(agg_process_info.start_row == 0); + RUNTIME_CHECK_MSG( + rows == state.total_rows, + "executeImplBatchStringHashMap only handle resize exception for each Block instead of row"); + const size_t reserve_size = rows / 4; + +#define M(INFO, DATA, SUBMAPINDEX, KEYTYPE) \ + (INFO).reserve(reserve_size); \ + (DATA).reserve(reserve_size); \ + auto dispatch_callback_key##SUBMAPINDEX = [&INFO, &DATA](const KEYTYPE & key, size_t row) { \ + (INFO).push_back(row); \ + (DATA).push_back(key); \ + }; + + M(key0_infos, key0_datas, 0, StringRef) + M(key8_infos, key8_datas, 8, StringKey8) + M(key16_infos, key16_datas, 16, StringKey16) + M(key24_infos, key24_datas, 24, StringKey24) + // Argument type is ArenaKeyHolder instead of StringRef, + // because it will only be persisted when insert into HashTable. + M(key_str_infos, key_str_datas, str, ArenaKeyHolder) +#undef M + + for (size_t i = 0; i < rows; ++i) + { + auto key_holder = state.getKeyHolder(i, aggregates_pool, sort_key_containers); + dispatchStringHashTable( + i, + key_holder, dispatch_callback_key0, dispatch_callback_key8, dispatch_callback_key16, dispatch_callback_key24, - dispatch_callback_key_str); + dispatch_callback_keystr); + } } - - std::vector key0_places; - key0_places.reserve(key0_infos.size()); - - std::vector key8_places; - key8_places.reserve(key8_infos.size()); - - std::vector key16_places; - key16_places.reserve(key16_infos.size()); - - std::vector key24_places; - key24_places.reserve(key24_infos.size()); - - std::vector key_str_places; - key_str_places.reserve(key_str_infos.size()); - - if (!key0_infos.empty()) + else { - emplaceOrFindStringKey<0, false>(method.data, state, key0_infos, key0_datas, aggregates_pool, key0_places, agg_process_info); - } +#define M(INFO, DATA, SUBMAPINDEX) \ + (INFO) = agg_process_info.submap_m##SUBMAPINDEX##_infos; \ + (DATA) = agg_process_info.submap_m##SUBMAPINDEX##_datas; -#define M(INDEX, INFO, DATA, PLACES) \ - if (!(INFO).empty()) \ - { \ - if constexpr (enable_prefetch) \ - emplaceOrFindStringKey(method.data, state, INFO, DATA, aggregates_pool, PLACES, agg_process_info); \ - else \ - emplaceOrFindStringKey(method.data, state, INFO, DATA, aggregates_pool, PLACES, agg_process_info); \ + M(key0_infos, key0_datas, 0) + M(key8_infos, key8_datas, 1) + M(key16_infos, key16_datas, 2) + M(key24_infos, key24_datas, 3) + M(key_str_infos, key_str_datas, 4) +#undef M } + std::vector key0_places(key0_infos.size(), nullptr); + std::vector key8_places(key8_infos.size(), nullptr); + std::vector key16_places(key16_infos.size(), nullptr); + std::vector key24_places(key24_infos.size(), nullptr); + std::vector key_str_places(key_str_infos.size(), nullptr); + + bool got_resize_exception = false; + size_t emplaced_index = 0; + +#define M(INDEX, INFO, DATA, PLACES) \ + if unlikely (got_resize_exception) \ + { \ + emplaced_index = 0; \ + } \ + else if (!(INFO).empty()) \ + { \ + emplaced_index = emplaceOrFindStringKey( \ + method.data, \ + state, \ + (INFO), \ + (DATA), \ + *aggregates_pool, \ + (PLACES), \ + agg_process_info); \ + if unlikely (emplaced_index != (INFO).size()) \ + got_resize_exception = true; \ + } \ + setupExceptionRecoveryInfoForStringHashTable( \ + agg_process_info, \ + emplaced_index, \ + INFO, \ + DATA, \ + std::integral_constant{}); + + M(0, key0_infos, key0_datas, key0_places) M(1, key8_infos, key8_datas, key8_places) M(2, key16_infos, key16_datas, key16_places) M(3, key24_infos, key24_datas, key24_places) M(4, key_str_infos, key_str_datas, key_str_places) #undef M - RUNTIME_CHECK(rows == key0_places.size() + key8_places.size() + key16_places.size() + key24_places.size() + key_str_places.size()); + RUNTIME_CHECK( + rows + == key0_places.size() + key8_places.size() + key16_places.size() + key24_places.size() + key_str_places.size()); std::vector places(rows, nullptr); - -#define M(INFO, PLACES) \ +#define M(INFO, PLACES) \ for (size_t i = 0; i < (INFO).size(); ++i) \ - { \ - const auto row = (INFO)[i]; \ - places[row] = (PLACES)[i]; \ + { \ + const auto row = (INFO)[i]; \ + places[row] = (PLACES)[i]; \ } M(key0_infos, key0_places) @@ -1093,7 +1182,6 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( M(key_str_infos, key_str_places) #undef M - for (AggregateFunctionInstruction * inst = agg_process_info.aggregate_functions_instructions.data(); inst->that; ++inst) { @@ -1105,7 +1193,8 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( inst->batch_arguments, aggregates_pool); } - agg_process_info.start_row = rows; + // For StringHashTable, start_row is meanless, instead submap_mx_infos/submap_mx_datas are used. + agg_process_info.start_row = got_resize_exception ? 0 : rows; } void NO_INLINE @@ -1130,7 +1219,6 @@ Aggregator::executeWithoutKeyImpl(AggregatedDataWithoutKey & res, AggProcessInfo agg_process_info.start_row += agg_size; } - void Aggregator::prepareAggregateInstructions( Columns columns, AggregateColumns & aggregate_columns, diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 6cbbde71b41..7142077c8ea 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -1319,11 +1319,28 @@ class Aggregator size_t hit_row_cnt = 0; std::vector not_found_rows; + // For StringHashTable resize exception. + std::vector submap_m0_infos{}; + std::vector submap_m1_infos{}; + std::vector submap_m2_infos{}; + std::vector submap_m3_infos{}; + std::vector submap_m4_infos{}; + + std::vector submap_m0_datas{}; + std::vector submap_m1_datas{}; + std::vector submap_m2_datas{}; + std::vector submap_m3_datas{}; + std::vector submap_m4_datas{}; + void prepareForAgg(); bool allBlockDataHandled() const { assert(start_row <= end_row); - return start_row == end_row || aggregator->isCancelled(); + // submap_mx_infos.size() and submap_mx_datas.size() are always equal. + // So only need to check submap_m0_infos is enough. + return (start_row == end_row && !submap_m0_infos.empty() && !submap_m1_infos.empty() + && !submap_m3_infos.empty() && !submap_m4_infos.empty()) + || aggregator->isCancelled(); } void resetBlock(const Block & block_) { @@ -1463,10 +1480,10 @@ class Aggregator template void executeImplBatchStringHashMap( - Method & method, - typename Method::State & state, - Arena * aggregates_pool, - AggProcessInfo & agg_process_info) const; + Method & method, + typename Method::State & state, + Arena * aggregates_pool, + AggProcessInfo & agg_process_info) const; template std::optional::ResultType> emplaceOrFindKey( @@ -1477,15 +1494,22 @@ class Aggregator std::vector & sort_key_containers, const std::vector & hashvals) const; - template - bool emplaceOrFindStringKey( - typename Method::Data & data, - typename Method::State & state, - const std::vector & key_infos, - const std::vector & key_datas, - Arena & aggregates_pool, - std::vector & places, - AggProcessInfo & agg_process_info) const; + template < + size_t SubMapIndex, + bool collect_hit_rate, + bool only_lookup, + bool enable_prefetch, + typename Data, + typename State, + typename StringKeyType> + size_t emplaceOrFindStringKey( + Data & data, + State & state, + const std::vector & key_infos, + std::vector & key_datas, + Arena & aggregates_pool, + std::vector & places, + AggProcessInfo & agg_process_info) const; /// For case when there are no keys (all aggregate into one row). static void executeWithoutKeyImpl(AggregatedDataWithoutKey & res, AggProcessInfo & agg_process_info, Arena * arena); From 9dc702dac5e4d8baba90e127607d5e015562fac3 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Wed, 27 Nov 2024 22:37:37 +0800 Subject: [PATCH 05/24] tmp save Signed-off-by: guo-shaoge --- dbms/src/Common/Arena.h | 5 ++ dbms/src/Common/ColumnsHashing.h | 80 +++++++++++++++++++------ dbms/src/Interpreters/Aggregator.cpp | 88 ++++++++++++++++++---------- dbms/src/Interpreters/Aggregator.h | 1 + 4 files changed, 126 insertions(+), 48 deletions(-) diff --git a/dbms/src/Common/Arena.h b/dbms/src/Common/Arena.h index b9999f6b179..eb86e1c283c 100644 --- a/dbms/src/Common/Arena.h +++ b/dbms/src/Common/Arena.h @@ -212,5 +212,10 @@ class Arena : private boost::noncopyable using ArenaPtr = std::shared_ptr; using Arenas = std::vector; +size_t alignOf16(size_t l) +{ + return (l + 15) & ~15; +} + } // namespace DB diff --git a/dbms/src/Common/ColumnsHashing.h b/dbms/src/Common/ColumnsHashing.h index aabe0733f8c..0d8e8d60ef7 100644 --- a/dbms/src/Common/ColumnsHashing.h +++ b/dbms/src/Common/ColumnsHashing.h @@ -52,7 +52,7 @@ struct HashMethodOneNumber const size_t total_rows; /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise. - HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const TiDB::TiDBCollators &) + HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const TiDB::TiDBCollators &, Arena *) : total_rows(key_columns[0]->size()) { vec = &static_cast *>(key_columns[0])->getData()[0]; @@ -107,7 +107,8 @@ struct HashMethodString HashMethodString( const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, - const TiDB::TiDBCollators & collators) + const TiDB::TiDBCollators & collators, + Arena *) : total_rows(key_columns[0]->size()) { const IColumn & column = *key_columns[0]; @@ -158,7 +159,7 @@ struct HashMethodStringBin const UInt8 * chars; const size_t total_rows; - HashMethodStringBin(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const TiDB::TiDBCollators &) + HashMethodStringBin(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const TiDB::TiDBCollators &, Arena *) : total_rows(key_columns[0]->size()) { const IColumn & column = *key_columns[0]; @@ -344,6 +345,43 @@ struct KeyDescStringBinPadding : KeyDescStringBin } }; +void serializeColumnToBuffer(Arena * pool, + const ColumnRawPtrs & key_columns, + PaddedPODArray & pos, + PaddedPODArray & sizes) +{ + RUNTIME_CHECK(!key_columns.empty()); + RUNTIME_CHECK(pos.empty() && sizes.empty()); + + const auto rows = key_columns[0]->size(); + pos.resize(rows, nullptr); + sizes.resize(rows, 0); + + for (const auto * col_ptr : key_columns) + col_ptr->countSerializeByteSize(sizes); + + std::vector aligned_sizes; + aligned_sizes.reserve(sizes.size()); + + size_t total_byte_size = 0; + for (auto size : sizes) + { + auto aligned = alignOf16(size); + total_byte_size += aligned; + aligned_sizes.push_back(aligned); + } + + auto * buffer = pool->alloc(total_byte_size); + for (size_t i = 0; i < aligned_sizes.size(); ++i) + { + pos[i] = buffer; + buffer += aligned_sizes[i]; + } + + for (const auto * col_ptr : key_columns) + col_ptr->serializeToPos(pos, 0, rows, col_ptr->isColumnNullable()); +} + /// For the case when there are 2 keys. template struct HashMethodFastPathTwoKeysSerialized @@ -356,12 +394,16 @@ struct HashMethodFastPathTwoKeysSerialized Key1Desc key_1_desc; Key2Desc key_2_desc; const size_t total_rows; + PaddedPODArray pos; + PaddedPODArray sizes; - HashMethodFastPathTwoKeysSerialized(const ColumnRawPtrs & key_columns, const Sizes &, const TiDB::TiDBCollators &) + HashMethodFastPathTwoKeysSerialized(const ColumnRawPtrs & key_columns, const Sizes &, const TiDB::TiDBCollators &, Arena * pool) : key_1_desc(key_columns[0]) , key_2_desc(key_columns[1]) , total_rows(key_columns[0]->size()) - {} + { + serializeColumnToBuffer(pool, key_columns, pos, sizes); + } ALWAYS_INLINE inline auto getKeyHolder(ssize_t row, Arena * pool, std::vector &) const { @@ -400,7 +442,8 @@ struct HashMethodFixedString HashMethodFixedString( const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, - const TiDB::TiDBCollators & collators) + const TiDB::TiDBCollators & collators, + Arena *) : total_rows(key_columns[0]->size()) { const IColumn & column = *key_columns[0]; @@ -477,7 +520,7 @@ struct HashMethodKeysFixed return true; } - HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes_, const TiDB::TiDBCollators &) + HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes_, const TiDB::TiDBCollators &, Arena *) : Base(key_columns) , key_sizes(std::move(key_sizes_)) , keys_size(key_columns.size()) @@ -612,25 +655,28 @@ struct HashMethodSerialized size_t keys_size; TiDB::TiDBCollators collators; const size_t total_rows; + PaddedPODArray pos; + PaddedPODArray sizes; HashMethodSerialized( const ColumnRawPtrs & key_columns_, const Sizes & /*key_sizes*/, - const TiDB::TiDBCollators & collators_) + const TiDB::TiDBCollators & collators_, + Arena * pool) : key_columns(key_columns_) , keys_size(key_columns_.size()) , collators(collators_) , total_rows(key_columns_[0]->size()) - {} + { + serializeColumnToBuffer(pool, key_columns_, pos, sizes); + } - ALWAYS_INLINE inline SerializedKeyHolder getKeyHolder( - size_t row, - Arena * pool, - std::vector & sort_key_containers) const + ALWAYS_INLINE inline StringRef getKeyHolder( + size_t row, + Arena *, + std::vector &) const { - return SerializedKeyHolder{ - serializeKeysToPoolContiguous(row, keys_size, key_columns, collators, sort_key_containers, *pool), - *pool}; + return StringRef(pos[row], sizes[row]); } protected: @@ -650,7 +696,7 @@ struct HashMethodHashed TiDB::TiDBCollators collators; const size_t total_rows; - HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const TiDB::TiDBCollators & collators_) + HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const TiDB::TiDBCollators & collators_, Arena *) : key_columns(std::move(key_columns_)) , collators(collators_) , total_rows(key_columns[0]->size()) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 4faec37ce9d..1738121b665 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -741,6 +741,7 @@ template < bool collect_hit_rate, bool only_lookup, bool enable_prefetch, + bool zero_agg_func_size, typename Data, typename State, typename StringKeyType> @@ -763,7 +764,8 @@ size_t Aggregator::emplaceOrFindStringKey( hashvals[i] = Hash::operator()(keyHolderGetKey(key_datas[0])); } - AggregateDataPtr agg_state = nullptr; + // alloc 0 bytes is useful when agg func size is zero. + AggregateDataPtr agg_state = aggregates_pool.alloc(0); for (size_t i = 0; i < key_infos.size(); ++i) { try @@ -787,21 +789,31 @@ size_t Aggregator::emplaceOrFindStringKey( = state.template emplaceStringKey(data, i, key_datas, hashvals); if (emplace_result.isInserted()) { - emplace_result.setMapped(nullptr); + if constexpr (zero_agg_func_size) + { + emplace_result.setMapped(agg_state); + } + else + { + emplace_result.setMapped(nullptr); - agg_state = aggregates_pool.alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(agg_state); + agg_state + = aggregates_pool.alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(agg_state); - emplace_result.setMapped(agg_state); + emplace_result.setMapped(agg_state); + } } else { - agg_state = emplace_result.getMapped(); + if constexpr (!zero_agg_func_size) + agg_state = emplace_result.getMapped(); if constexpr (collect_hit_rate) ++agg_process_info.hit_row_cnt; } - places[i] = agg_state; + if constexpr (!zero_agg_func_size) + places[i] = agg_state; } } catch (ResizeException &) @@ -1130,30 +1142,41 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( bool got_resize_exception = false; size_t emplaced_index = 0; - -#define M(INDEX, INFO, DATA, PLACES) \ - if unlikely (got_resize_exception) \ - { \ - emplaced_index = 0; \ - } \ - else if (!(INFO).empty()) \ - { \ - emplaced_index = emplaceOrFindStringKey( \ - method.data, \ - state, \ - (INFO), \ - (DATA), \ - *aggregates_pool, \ - (PLACES), \ - agg_process_info); \ - if unlikely (emplaced_index != (INFO).size()) \ - got_resize_exception = true; \ - } \ - setupExceptionRecoveryInfoForStringHashTable( \ - agg_process_info, \ - emplaced_index, \ - INFO, \ - DATA, \ + bool zero_agg_func_size = (params.aggregates_size == 0); + +#define M(INDEX, INFO, DATA, PLACES) \ + if unlikely (got_resize_exception) \ + { \ + emplaced_index = 0; \ + } \ + else if (!(INFO).empty()) \ + { \ + if (zero_agg_func_size) \ + emplaced_index = emplaceOrFindStringKey( \ + method.data, \ + state, \ + (INFO), \ + (DATA), \ + *aggregates_pool, \ + (PLACES), \ + agg_process_info); \ + else \ + emplaced_index = emplaceOrFindStringKey( \ + method.data, \ + state, \ + (INFO), \ + (DATA), \ + *aggregates_pool, \ + (PLACES), \ + agg_process_info); \ + if unlikely (emplaced_index != (INFO).size()) \ + got_resize_exception = true; \ + } \ + setupExceptionRecoveryInfoForStringHashTable( \ + agg_process_info, \ + emplaced_index, \ + INFO, \ + DATA, \ std::integral_constant{}); M(0, key0_infos, key0_datas, key0_places) @@ -1163,6 +1186,9 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( M(4, key_str_infos, key_str_datas, key_str_places) #undef M + if (zero_agg_func_size) + return; + RUNTIME_CHECK( rows == key0_places.size() + key8_places.size() + key16_places.size() + key24_places.size() + key_str_places.size()); diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 7142077c8ea..cc2dcd2a408 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -1499,6 +1499,7 @@ class Aggregator bool collect_hit_rate, bool only_lookup, bool enable_prefetch, + bool zero_agg_func_size, typename Data, typename State, typename StringKeyType> From ce1f76754e7a454d380c98c9330273481d170556 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Thu, 28 Nov 2024 16:29:05 +0800 Subject: [PATCH 06/24] revert Serialized Key changes Signed-off-by: guo-shaoge --- dbms/src/Common/Arena.h | 5 -- dbms/src/Common/ColumnsHashing.h | 80 +++++++------------------------- 2 files changed, 17 insertions(+), 68 deletions(-) diff --git a/dbms/src/Common/Arena.h b/dbms/src/Common/Arena.h index eb86e1c283c..b9999f6b179 100644 --- a/dbms/src/Common/Arena.h +++ b/dbms/src/Common/Arena.h @@ -212,10 +212,5 @@ class Arena : private boost::noncopyable using ArenaPtr = std::shared_ptr; using Arenas = std::vector; -size_t alignOf16(size_t l) -{ - return (l + 15) & ~15; -} - } // namespace DB diff --git a/dbms/src/Common/ColumnsHashing.h b/dbms/src/Common/ColumnsHashing.h index 0d8e8d60ef7..aabe0733f8c 100644 --- a/dbms/src/Common/ColumnsHashing.h +++ b/dbms/src/Common/ColumnsHashing.h @@ -52,7 +52,7 @@ struct HashMethodOneNumber const size_t total_rows; /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise. - HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const TiDB::TiDBCollators &, Arena *) + HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const TiDB::TiDBCollators &) : total_rows(key_columns[0]->size()) { vec = &static_cast *>(key_columns[0])->getData()[0]; @@ -107,8 +107,7 @@ struct HashMethodString HashMethodString( const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, - const TiDB::TiDBCollators & collators, - Arena *) + const TiDB::TiDBCollators & collators) : total_rows(key_columns[0]->size()) { const IColumn & column = *key_columns[0]; @@ -159,7 +158,7 @@ struct HashMethodStringBin const UInt8 * chars; const size_t total_rows; - HashMethodStringBin(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const TiDB::TiDBCollators &, Arena *) + HashMethodStringBin(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const TiDB::TiDBCollators &) : total_rows(key_columns[0]->size()) { const IColumn & column = *key_columns[0]; @@ -345,43 +344,6 @@ struct KeyDescStringBinPadding : KeyDescStringBin } }; -void serializeColumnToBuffer(Arena * pool, - const ColumnRawPtrs & key_columns, - PaddedPODArray & pos, - PaddedPODArray & sizes) -{ - RUNTIME_CHECK(!key_columns.empty()); - RUNTIME_CHECK(pos.empty() && sizes.empty()); - - const auto rows = key_columns[0]->size(); - pos.resize(rows, nullptr); - sizes.resize(rows, 0); - - for (const auto * col_ptr : key_columns) - col_ptr->countSerializeByteSize(sizes); - - std::vector aligned_sizes; - aligned_sizes.reserve(sizes.size()); - - size_t total_byte_size = 0; - for (auto size : sizes) - { - auto aligned = alignOf16(size); - total_byte_size += aligned; - aligned_sizes.push_back(aligned); - } - - auto * buffer = pool->alloc(total_byte_size); - for (size_t i = 0; i < aligned_sizes.size(); ++i) - { - pos[i] = buffer; - buffer += aligned_sizes[i]; - } - - for (const auto * col_ptr : key_columns) - col_ptr->serializeToPos(pos, 0, rows, col_ptr->isColumnNullable()); -} - /// For the case when there are 2 keys. template struct HashMethodFastPathTwoKeysSerialized @@ -394,16 +356,12 @@ struct HashMethodFastPathTwoKeysSerialized Key1Desc key_1_desc; Key2Desc key_2_desc; const size_t total_rows; - PaddedPODArray pos; - PaddedPODArray sizes; - HashMethodFastPathTwoKeysSerialized(const ColumnRawPtrs & key_columns, const Sizes &, const TiDB::TiDBCollators &, Arena * pool) + HashMethodFastPathTwoKeysSerialized(const ColumnRawPtrs & key_columns, const Sizes &, const TiDB::TiDBCollators &) : key_1_desc(key_columns[0]) , key_2_desc(key_columns[1]) , total_rows(key_columns[0]->size()) - { - serializeColumnToBuffer(pool, key_columns, pos, sizes); - } + {} ALWAYS_INLINE inline auto getKeyHolder(ssize_t row, Arena * pool, std::vector &) const { @@ -442,8 +400,7 @@ struct HashMethodFixedString HashMethodFixedString( const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, - const TiDB::TiDBCollators & collators, - Arena *) + const TiDB::TiDBCollators & collators) : total_rows(key_columns[0]->size()) { const IColumn & column = *key_columns[0]; @@ -520,7 +477,7 @@ struct HashMethodKeysFixed return true; } - HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes_, const TiDB::TiDBCollators &, Arena *) + HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes_, const TiDB::TiDBCollators &) : Base(key_columns) , key_sizes(std::move(key_sizes_)) , keys_size(key_columns.size()) @@ -655,28 +612,25 @@ struct HashMethodSerialized size_t keys_size; TiDB::TiDBCollators collators; const size_t total_rows; - PaddedPODArray pos; - PaddedPODArray sizes; HashMethodSerialized( const ColumnRawPtrs & key_columns_, const Sizes & /*key_sizes*/, - const TiDB::TiDBCollators & collators_, - Arena * pool) + const TiDB::TiDBCollators & collators_) : key_columns(key_columns_) , keys_size(key_columns_.size()) , collators(collators_) , total_rows(key_columns_[0]->size()) - { - serializeColumnToBuffer(pool, key_columns_, pos, sizes); - } + {} - ALWAYS_INLINE inline StringRef getKeyHolder( - size_t row, - Arena *, - std::vector &) const + ALWAYS_INLINE inline SerializedKeyHolder getKeyHolder( + size_t row, + Arena * pool, + std::vector & sort_key_containers) const { - return StringRef(pos[row], sizes[row]); + return SerializedKeyHolder{ + serializeKeysToPoolContiguous(row, keys_size, key_columns, collators, sort_key_containers, *pool), + *pool}; } protected: @@ -696,7 +650,7 @@ struct HashMethodHashed TiDB::TiDBCollators collators; const size_t total_rows; - HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const TiDB::TiDBCollators & collators_, Arena *) + HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const TiDB::TiDBCollators & collators_) : key_columns(std::move(key_columns_)) , collators(collators_) , total_rows(key_columns[0]->size()) From 8ac8bebe1280ba49a570fb20cef9da78c4d0454f Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Thu, 28 Nov 2024 17:50:30 +0800 Subject: [PATCH 07/24] refine Signed-off-by: guo-shaoge --- dbms/src/Common/ColumnsHashingImpl.h | 38 ++++++++-------- dbms/src/Common/HashTable/StringHashTable.h | 44 +++---------------- .../HashTable/TwoLevelStringHashTable.h | 31 +------------ dbms/src/Interpreters/Aggregator.h | 9 ++-- 4 files changed, 31 insertions(+), 91 deletions(-) diff --git a/dbms/src/Common/ColumnsHashingImpl.h b/dbms/src/Common/ColumnsHashingImpl.h index aa583f1a722..ffaffdcd758 100644 --- a/dbms/src/Common/ColumnsHashingImpl.h +++ b/dbms/src/Common/ColumnsHashingImpl.h @@ -130,6 +130,14 @@ class HashMethodBase using Cache = LastElementCache; static constexpr size_t prefetch_step = 16; + template + static ALWAYS_INLINE inline void prefetch(Map & map, size_t idx, const std::vector & hashvals) + { + const auto prefetch_idx = idx + prefetch_step; + if likely (prefetch_idx < hashvals.size()) + map.prefetch(hashvals[prefetch_idx]); + } + template ALWAYS_INLINE inline EmplaceResult emplaceKey( Data & data, @@ -141,10 +149,8 @@ class HashMethodBase auto key_holder = static_cast(*this).getKeyHolder(row, &pool, sort_key_containers); if constexpr (enable_prefetch) { - const auto idx = row + prefetch_step; - if (idx < hashvals.size()) - data.prefetch(hashvals[idx]); - + assert(hashvals.size() == static_cast(*this).total_rows); + prefetch(data, row, hashvals); return emplaceImpl(key_holder, data, hashvals[row]); } else @@ -164,10 +170,8 @@ class HashMethodBase auto key_holder = static_cast(*this).getKeyHolder(row, &pool, sort_key_containers); if constexpr (enable_prefetch) { - const auto idx = row + prefetch_step; - if (idx < hashvals.size()) - data.prefetch(hashvals[idx]); - + assert(hashvals.size() == static_cast(*this).total_rows); + prefetch(data, row, hashvals); return findKeyImpl(keyHolderGetKey(key_holder), data, hashvals[row]); } else @@ -176,7 +180,6 @@ class HashMethodBase } } - // TODO emplaceStringKey merge with emplaceKey? template ALWAYS_INLINE inline EmplaceResult emplaceStringKey( Data & data, @@ -184,20 +187,17 @@ class HashMethodBase std::vector & datas, // TODO const const std::vector & hashvals) { + assert(hashvals.size() == static_cast(*this).total_rows); + auto & submap = StringHashTableSubMapSelector>::getSubMap( hashvals[idx], data); if constexpr (enable_prefetch) - { - const auto prefetch_idx = idx + prefetch_step; - if (prefetch_idx < hashvals.size()) - submap.prefetch(hashvals[prefetch_idx]); - } + prefetch(submap, idx, hashvals); return emplaceImpl(datas[idx], submap, hashvals[idx]); } - // TODO Macro with emplaceStringKey template ALWAYS_INLINE inline FindResult findStringKey( Data & data, @@ -205,15 +205,13 @@ class HashMethodBase std::vector & datas, // TODO const const std::vector & hashvals) { + assert(hashvals.size() == static_cast(*this).total_rows); + auto & submap = StringHashTableSubMapSelector>::getSubMap( hashvals[idx], data); if constexpr (enable_prefetch) - { - const auto prefetch_idx = idx + prefetch_step; - if (prefetch_idx < hashvals.size()) - submap.prefetch(hashvals[prefetch_idx]); - } + prefetch(submap, idx, hashvals); return findKeyImpl(keyHolderGetKey(datas[idx]), submap, hashvals[idx]); } diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h index a511ce47671..ef668864120 100644 --- a/dbms/src/Common/HashTable/StringHashTable.h +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -139,8 +139,7 @@ struct StringHashTableEmpty //-V730 return hasZero() ? zeroValue() : nullptr; } - void ALWAYS_INLINE prefetch(size_t) {} - + ALWAYS_INLINE inline void prefetch() {} void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); } void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); } void read(DB::ReadBuffer & rb) { zeroValue()->read(rb); } @@ -348,7 +347,6 @@ class StringHashTable : private boost::noncopyable #endif dispatch(Self & self, KeyHolder && key_holder, Func && func) { - StringHashTableHash hash; const StringRef & x = keyHolderGetKey(key_holder); const size_t sz = x.size; if (sz == 0) @@ -361,7 +359,7 @@ class StringHashTable : private boost::noncopyable { // Strings with trailing zeros are not representable as fixed-size // string keys. Put them to the generic table. - return func(self.ms, std::forward(key_holder), hash(x)); + return func(self.ms, std::forward(key_holder), StringHashTableHash::operator()(x)); } const char * p = x.data; @@ -397,7 +395,7 @@ class StringHashTable : private boost::noncopyable n[0] <<= s; } keyHolderDiscardKey(key_holder); - return func(self.m1, k8, hash(k8)); + return func(self.m1, k8, StringHashTableHash::operator()(k8)); } case 1: // 9..16 bytes { @@ -409,7 +407,7 @@ class StringHashTable : private boost::noncopyable else n[1] <<= s; keyHolderDiscardKey(key_holder); - return func(self.m2, k16, hash(k16)); + return func(self.m2, k16, StringHashTableHash::operator()(k16)); } case 2: // 17..24 bytes { @@ -421,11 +419,11 @@ class StringHashTable : private boost::noncopyable else n[2] <<= s; keyHolderDiscardKey(key_holder); - return func(self.m3, k24, hash(k24)); + return func(self.m3, k24, StringHashTableHash::operator()(k24)); } default: // >= 25 bytes { - return func(self.ms, std::forward(key_holder), hash(x)); + return func(self.ms, std::forward(key_holder), StringHashTableHash::operator()(x)); } } } @@ -455,13 +453,6 @@ class StringHashTable : private boost::noncopyable this->dispatch(*this, key_holder, EmplaceCallable(it, inserted)); } - // TODO del - template - void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult &, bool &, size_t) - { - RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::emplace instead"); - } - struct FindCallable { // find() doesn't need any key memory management, so we don't work with @@ -478,35 +469,12 @@ class StringHashTable : private boost::noncopyable } }; - // We will not prefetch StringHashTable directly, instead caller should call specific submap's prefetch. - // Because StringHashTable doesn't know which submap to prefetch. - void prefetch(size_t) const - { - RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::prefetch instead"); - } - LookupResult ALWAYS_INLINE find(const Key & x) { return dispatch(*this, x, FindCallable{}); } ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return dispatch(*this, x, FindCallable{}); } - // TODO del - LookupResult ALWAYS_INLINE find(const Key &, size_t) - { - RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::find instead"); - } - ConstLookupResult ALWAYS_INLINE find(const Key &, size_t) const - { - RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::find instead"); - } - bool ALWAYS_INLINE has(const Key & x, size_t = 0) const { return dispatch(*this, x, FindCallable{}) != nullptr; } - template - size_t ALWAYS_INLINE hash(const HashKeyType & key) const - { - return SubMaps::Hash::operator()(key); - } - void write(DB::WriteBuffer & wb) const { m0.write(wb); diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h index e7ea1bb8fce..5ea460769ab 100644 --- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -33,20 +33,8 @@ class TwoLevelStringHashTable : private boost::noncopyable static constexpr bool is_string_hash_map = true; static constexpr bool is_two_level = true; - template - size_t ALWAYS_INLINE hash(const HashKeyType & key) const - { - return SubMaps::Hash::operator()(key); - } - - // Same reason as StringHashTable::prefetch. - void prefetch(size_t) const - { - RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::prefetch instead"); - } - // TODO: currently hashing contains redundant computations when doing distributed or external aggregations - size_t hashStringRef(const Key & x) const + size_t hash(const Key & x) const { return const_cast(*this).dispatch(*this, x, [&](const auto &, const auto &, size_t hash) { return hash; @@ -59,7 +47,7 @@ class TwoLevelStringHashTable : private boost::noncopyable impl.setResizeCallback(resize_callback); } - size_t operator()(const Key & x) const { return hashStringRef(x); } + size_t operator()(const Key & x) const { return hash(x); } /// NOTE Bad for hash tables with more than 2^32 cells. static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; } @@ -216,27 +204,12 @@ class TwoLevelStringHashTable : private boost::noncopyable dispatch(*this, key_holder, typename Impl::EmplaceCallable{it, inserted}); } - template - void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult &, bool &, size_t) - { - RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::emplace instead"); - } - LookupResult ALWAYS_INLINE find(const Key & x) { return dispatch(*this, x, typename Impl::FindCallable{}); } ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return dispatch(*this, x, typename Impl::FindCallable{}); } - LookupResult ALWAYS_INLINE find(const Key &, size_t) - { - RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::find instead"); - } - - ConstLookupResult ALWAYS_INLINE find(const Key &, size_t) const - { - RUNTIME_CHECK_MSG(false, "shouldn't reach here, you should use submap::find instead"); - } void write(DB::WriteBuffer & wb) const { diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index cc2dcd2a408..53bc989dcbc 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -1319,7 +1319,8 @@ class Aggregator size_t hit_row_cnt = 0; std::vector not_found_rows; - // For StringHashTable resize exception. + // For StringHashTable, when resize exception happens, the process will be interrupted. + // So we need these infos to continue. std::vector submap_m0_infos{}; std::vector submap_m1_infos{}; std::vector submap_m2_infos{}; @@ -1337,9 +1338,9 @@ class Aggregator { assert(start_row <= end_row); // submap_mx_infos.size() and submap_mx_datas.size() are always equal. - // So only need to check submap_m0_infos is enough. - return (start_row == end_row && !submap_m0_infos.empty() && !submap_m1_infos.empty() - && !submap_m3_infos.empty() && !submap_m4_infos.empty()) + // So only need to check submap_mx_infos is enough. + return (start_row == end_row && submap_m0_infos.empty() && submap_m1_infos.empty() + && submap_m3_infos.empty() && submap_m4_infos.empty()) || aggregator->isCancelled(); } void resetBlock(const Block & block_) From 0053ce8a82f19cd8772ce5f00cea160b20934250 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Thu, 28 Nov 2024 18:03:50 +0800 Subject: [PATCH 08/24] refine Signed-off-by: guo-shaoge --- dbms/src/Common/HashTable/StringHashTable.h | 2 +- dbms/src/Interpreters/Aggregator.cpp | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h index ef668864120..fde0f810ae6 100644 --- a/dbms/src/Common/HashTable/StringHashTable.h +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -139,7 +139,7 @@ struct StringHashTableEmpty //-V730 return hasZero() ? zeroValue() : nullptr; } - ALWAYS_INLINE inline void prefetch() {} + ALWAYS_INLINE inline void prefetch(size_t) {} void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); } void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); } void read(DB::ReadBuffer & rb) { zeroValue()->read(rb); } diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 1738121b665..d8210fc9a9c 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -1090,13 +1090,14 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( "executeImplBatchStringHashMap only handle resize exception for each Block instead of row"); const size_t reserve_size = rows / 4; -#define M(INFO, DATA, SUBMAPINDEX, KEYTYPE) \ - (INFO).reserve(reserve_size); \ - (DATA).reserve(reserve_size); \ - auto dispatch_callback_key##SUBMAPINDEX = [&INFO, &DATA](const KEYTYPE & key, size_t row) { \ - (INFO).push_back(row); \ - (DATA).push_back(key); \ - }; +#define M(INFO, DATA, SUBMAPINDEX, KEYTYPE) \ + (INFO).reserve(reserve_size); \ + (DATA).reserve(reserve_size); \ + auto dispatch_callback_key##SUBMAPINDEX \ + = [&INFO, &DATA](const KEYTYPE & key, size_t row) { /* NOLINT(bugprone-macro-parentheses) */ \ + (INFO).push_back(row); \ + (DATA).push_back(key); \ + }; M(key0_infos, key0_datas, 0, StringRef) M(key8_infos, key8_datas, 8, StringKey8) From fcf8ed2b49ebc8e7d3c8a0c44e035bc4477c5663 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Fri, 29 Nov 2024 17:18:00 +0800 Subject: [PATCH 09/24] fix unit test Signed-off-by: guo-shaoge --- dbms/src/Common/ColumnsHashingImpl.h | 6 ++++-- dbms/src/Interpreters/Aggregator.cpp | 30 ++++++++++++---------------- dbms/src/Interpreters/Aggregator.h | 9 ++++++--- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/dbms/src/Common/ColumnsHashingImpl.h b/dbms/src/Common/ColumnsHashingImpl.h index ffaffdcd758..1f4e3dbaedf 100644 --- a/dbms/src/Common/ColumnsHashingImpl.h +++ b/dbms/src/Common/ColumnsHashingImpl.h @@ -187,7 +187,9 @@ class HashMethodBase std::vector & datas, // TODO const const std::vector & hashvals) { - assert(hashvals.size() == static_cast(*this).total_rows); + // For spill, hashvals.size() will be le to total_rows. + // Because only remaining rows that didn't insert into HashMap will be handled here. + assert(hashvals.size() <= static_cast(*this).total_rows); auto & submap = StringHashTableSubMapSelector>::getSubMap( hashvals[idx], @@ -205,7 +207,7 @@ class HashMethodBase std::vector & datas, // TODO const const std::vector & hashvals) { - assert(hashvals.size() == static_cast(*this).total_rows); + assert(hashvals.size() <= static_cast(*this).total_rows); auto & submap = StringHashTableSubMapSelector>::getSubMap( hashvals[idx], diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index d8210fc9a9c..041f77fc155 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -760,9 +760,7 @@ size_t Aggregator::emplaceOrFindStringKey( using Hash = typename StringHashTableSubMapSelector>::Hash; std::vector hashvals(key_infos.size(), 0); for (size_t i = 0; i < key_infos.size(); ++i) - { - hashvals[i] = Hash::operator()(keyHolderGetKey(key_datas[0])); - } + hashvals[i] = Hash::operator()(keyHolderGetKey(key_datas[i])); // alloc 0 bytes is useful when agg func size is zero. AggregateDataPtr agg_state = aggregates_pool.alloc(0); @@ -1080,14 +1078,12 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( #undef M const size_t rows = agg_process_info.end_row - agg_process_info.start_row; + // If no resize exception happens, so this is a new Block. + // If resize exception happens, start_row also set as zero. + RUNTIME_CHECK(agg_process_info.start_row == 0); - if likely (agg_process_info.allBlockDataHandled()) + if likely (agg_process_info.stringHashTableRecoveryInfoEmpty()) { - // No resize exception happens, so this is a new Block. - RUNTIME_CHECK(agg_process_info.start_row == 0); - RUNTIME_CHECK_MSG( - rows == state.total_rows, - "executeImplBatchStringHashMap only handle resize exception for each Block instead of row"); const size_t reserve_size = rows / 4; #define M(INFO, DATA, SUBMAPINDEX, KEYTYPE) \ @@ -1146,11 +1142,7 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( bool zero_agg_func_size = (params.aggregates_size == 0); #define M(INDEX, INFO, DATA, PLACES) \ - if unlikely (got_resize_exception) \ - { \ - emplaced_index = 0; \ - } \ - else if (!(INFO).empty()) \ + if (!(INFO).empty()) \ { \ if (zero_agg_func_size) \ emplaced_index = emplaceOrFindStringKey( \ @@ -1173,11 +1165,15 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( if unlikely (emplaced_index != (INFO).size()) \ got_resize_exception = true; \ } \ + else \ + { \ + emplaced_index = 0; \ + } \ setupExceptionRecoveryInfoForStringHashTable( \ agg_process_info, \ emplaced_index, \ - INFO, \ - DATA, \ + (INFO), \ + (DATA), \ std::integral_constant{}); M(0, key0_infos, key0_datas, key0_places) @@ -1221,7 +1217,7 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( aggregates_pool); } // For StringHashTable, start_row is meanless, instead submap_mx_infos/submap_mx_datas are used. - agg_process_info.start_row = got_resize_exception ? 0 : rows; + agg_process_info.start_row = got_resize_exception ? 0 : agg_process_info.end_row; } void NO_INLINE diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 53bc989dcbc..f5217058ff8 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -1339,9 +1339,12 @@ class Aggregator assert(start_row <= end_row); // submap_mx_infos.size() and submap_mx_datas.size() are always equal. // So only need to check submap_mx_infos is enough. - return (start_row == end_row && submap_m0_infos.empty() && submap_m1_infos.empty() - && submap_m3_infos.empty() && submap_m4_infos.empty()) - || aggregator->isCancelled(); + return (start_row == end_row && stringHashTableRecoveryInfoEmpty()) || aggregator->isCancelled(); + } + bool stringHashTableRecoveryInfoEmpty() const + { + return submap_m0_infos.empty() && submap_m1_infos.empty() && + submap_m3_infos.empty() && submap_m4_infos.empty(); } void resetBlock(const Block & block_) { From ae7b969f3f83b9e6dd88f99b0bb478926895be56 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Mon, 2 Dec 2024 15:13:55 +0800 Subject: [PATCH 10/24] refine Signed-off-by: guo-shaoge --- dbms/src/Common/ColumnsHashing.h | 90 ++++++++-------- dbms/src/Common/HashTable/StringHashTable.h | 2 + dbms/src/Interpreters/Aggregator.cpp | 45 ++++---- dbms/src/Interpreters/Aggregator.h | 13 +-- dbms/src/Interpreters/JoinPartition.cpp | 28 ++--- dbms/src/Interpreters/SetVariants.h | 4 +- dbms/src/TiDB/Collation/Collator.cpp | 112 ++++++++++++++++++++ dbms/src/TiDB/Collation/Collator.h | 10 ++ 8 files changed, 221 insertions(+), 83 deletions(-) diff --git a/dbms/src/Common/ColumnsHashing.h b/dbms/src/Common/ColumnsHashing.h index aabe0733f8c..94526714250 100644 --- a/dbms/src/Common/ColumnsHashing.h +++ b/dbms/src/Common/ColumnsHashing.h @@ -91,12 +91,11 @@ struct HashMethodOneNumber /// For the case when there is one string key. -template +template struct HashMethodString - : public columns_hashing_impl:: - HashMethodBase, Value, Mapped, use_cache> + : public columns_hashing_impl::HashMethodBase, Value, Mapped, use_cache> { - using Self = HashMethodString; + using Self = HashMethodString; using Base = columns_hashing_impl::HashMethodBase; const IColumn::Offset * offsets; @@ -115,36 +114,40 @@ struct HashMethodString offsets = column_string.getOffsets().data(); chars = column_string.getChars().data(); if (!collators.empty()) - { - if constexpr (!place_string_to_arena) - throw Exception("String with collator must be placed on arena.", ErrorCodes::LOGICAL_ERROR); collator = collators[0]; - } } - ALWAYS_INLINE inline auto getKeyHolder( + ALWAYS_INLINE inline ArenaKeyHolder getKeyHolder( ssize_t row, [[maybe_unused]] Arena * pool, - std::vector & sort_key_containers) const + [[maybe_unused]] std::vector & sort_key_containers) const { - auto last_offset = row == 0 ? 0 : offsets[row - 1]; - // Remove last zero byte. - StringRef key(chars + last_offset, offsets[row] - last_offset - 1); + auto key = getKey(row); + if (likely(collator)) + key = collator->sortKey(key.data, key.size, sort_key_containers[0]); - if constexpr (place_string_to_arena) - { - if (likely(collator)) - key = collator->sortKey(key.data, key.size, sort_key_containers[0]); - return ArenaKeyHolder{key, pool}; - } - else - { - return key; - } + return ArenaKeyHolder{key, pool}; + } + + ALWAYS_INLINE inline ArenaKeyHolder getKeyHolder(ssize_t row, Arena * pool, Arena * sort_key_pool) const + { + auto key = getKey(row); + if (likely(collator)) + key = collator->sortKey(key.data, key.size, *sort_key_pool); + + return ArenaKeyHolder{key, pool}; } protected: friend class columns_hashing_impl::HashMethodBase; + +private: + ALWAYS_INLINE inline StringRef getKey(size_t row) const + { + auto last_offset = row == 0 ? 0 : offsets[row - 1]; + // Remove last zero byte. + return StringRef(chars + last_offset, offsets[row] - last_offset - 1); + } }; template @@ -168,6 +171,11 @@ struct HashMethodStringBin } ALWAYS_INLINE inline auto getKeyHolder(ssize_t row, Arena * pool, std::vector &) const + { + return getKeyHolder(row, pool, nullptr); + } + + ALWAYS_INLINE inline auto getKeyHolder(ssize_t row, Arena * pool, Arena *) const { auto last_offset = row == 0 ? 0 : offsets[row - 1]; StringRef key(chars + last_offset, offsets[row] - last_offset - 1); @@ -381,15 +389,12 @@ struct HashMethodFastPathTwoKeysSerialized /// For the case when there is one fixed-length string key. -template +template struct HashMethodFixedString - : public columns_hashing_impl::HashMethodBase< - HashMethodFixedString, - Value, - Mapped, - use_cache> + : public columns_hashing_impl:: + HashMethodBase, Value, Mapped, use_cache> { - using Self = HashMethodFixedString; + using Self = HashMethodFixedString; using Base = columns_hashing_impl::HashMethodBase; size_t n; @@ -411,26 +416,25 @@ struct HashMethodFixedString collator = collators[0]; } - ALWAYS_INLINE inline auto getKeyHolder( + ALWAYS_INLINE inline ArenaKeyHolder getKeyHolder( size_t row, - [[maybe_unused]] Arena * pool, + Arena * pool, std::vector & sort_key_containers) const { StringRef key(&(*chars)[row * n], n); - if (collator) - { key = collator->sortKeyFastPath(key.data, key.size, sort_key_containers[0]); - } - if constexpr (place_string_to_arena) - { - return ArenaKeyHolder{key, pool}; - } - else - { - return key; - } + return ArenaKeyHolder{key, pool}; + } + + ALWAYS_INLINE inline ArenaKeyHolder getKeyHolder(size_t row, Arena * pool, Arena * sort_key_pool) const + { + StringRef key(&(*chars)[row * n], n); + if (collator) + key = collator->sortKeyFastPath(key.data, key.size, *sort_key_pool); + + return ArenaKeyHolder{key, pool}; } protected: diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h index fde0f810ae6..a43f35fdbbf 100644 --- a/dbms/src/Common/HashTable/StringHashTable.h +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -16,7 +16,9 @@ #include #include +#include #include +#include #include #include diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 041f77fc155..543885b6248 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -755,7 +755,7 @@ size_t Aggregator::emplaceOrFindStringKey( AggProcessInfo & agg_process_info) const { static_assert(!(collect_hit_rate && only_lookup)); - RUNTIME_CHECK(key_infos.size() == key_datas.size()); + assert(key_infos.size() == key_datas.size()); using Hash = typename StringHashTableSubMapSelector>::Hash; std::vector hashvals(key_infos.size(), 0); @@ -1017,7 +1017,7 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( #define M(SUBMAPINDEX) \ template \ - void setupExceptionRecoveryInfoForStringHashTable( \ + ALWAYS_INLINE inline void setupExceptionRecoveryInfoForStringHashTable( \ Aggregator::AggProcessInfo & agg_process_info, \ size_t row, \ const std::vector & key_infos, \ @@ -1038,8 +1038,10 @@ M(4) #undef M -// Emplace key into StringHashMap/TwoLevelStringHashMap is seperated from other situations, -// because it's easy to implement prefetch submap directly. +// In this function, we will prefetch/empalce each specifix submap directly instead of accessing StringHashMap interface, +// which is good for performance. +// NOTE: this function is column-wise, which means sort key buffer cannot be reused. +// This buffer will not be release until this block is processed done. template ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( Method & method, @@ -1063,8 +1065,9 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( M(4) #undef M + const size_t rows = agg_process_info.end_row - agg_process_info.start_row; + auto sort_key_pool = std::make_unique(); std::vector sort_key_containers; - sort_key_containers.resize(params.keys_size, ""); #define M(INFO, DATA, KEYTYPE) \ std::vector(INFO); \ @@ -1077,13 +1080,15 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( M(key_str_infos, key_str_datas, ArenaKeyHolder) #undef M - const size_t rows = agg_process_info.end_row - agg_process_info.start_row; // If no resize exception happens, so this is a new Block. // If resize exception happens, start_row also set as zero. RUNTIME_CHECK(agg_process_info.start_row == 0); if likely (agg_process_info.stringHashTableRecoveryInfoEmpty()) { + // sort_key_pool should already been reset by AggProcessInfo::restBlock() + RUNTIME_CHECK(!agg_process_info.sort_key_pool); + const size_t reserve_size = rows / 4; #define M(INFO, DATA, SUBMAPINDEX, KEYTYPE) \ @@ -1106,7 +1111,9 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( for (size_t i = 0; i < rows; ++i) { - auto key_holder = state.getKeyHolder(i, aggregates_pool, sort_key_containers); + // Use Arena for collation sort key, because we are doing agg in column-wise way. + // So a big arena is needed to store decoded key, and we can avoid resize std::string by using Arena. + auto key_holder = state.getKeyHolder(i, aggregates_pool, sort_key_pool.get()); dispatchStringHashTable( i, key_holder, @@ -1142,7 +1149,7 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( bool zero_agg_func_size = (params.aggregates_size == 0); #define M(INDEX, INFO, DATA, PLACES) \ - if (!(INFO).empty()) \ + if (!got_resize_exception && !(INFO).empty()) \ { \ if (zero_agg_func_size) \ emplaced_index = emplaceOrFindStringKey( \ @@ -1165,15 +1172,15 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( if unlikely (emplaced_index != (INFO).size()) \ got_resize_exception = true; \ } \ - else \ - { \ - emplaced_index = 0; \ - } \ + else \ + { \ + emplaced_index = 0; \ + } \ setupExceptionRecoveryInfoForStringHashTable( \ agg_process_info, \ emplaced_index, \ - (INFO), \ - (DATA), \ + (INFO), \ + (DATA), \ std::integral_constant{}); M(0, key0_infos, key0_datas, key0_places) @@ -1186,10 +1193,6 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( if (zero_agg_func_size) return; - RUNTIME_CHECK( - rows - == key0_places.size() + key8_places.size() + key16_places.size() + key24_places.size() + key_str_places.size()); - std::vector places(rows, nullptr); #define M(INFO, PLACES) \ for (size_t i = 0; i < (INFO).size(); ++i) \ @@ -1218,6 +1221,12 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( } // For StringHashTable, start_row is meanless, instead submap_mx_infos/submap_mx_datas are used. agg_process_info.start_row = got_resize_exception ? 0 : agg_process_info.end_row; + + if unlikely (got_resize_exception) + { + RUNTIME_CHECK(!agg_process_info.stringHashTableRecoveryInfoEmpty()); + agg_process_info.sort_key_pool = std::move(sort_key_pool); + } } void NO_INLINE diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index f5217058ff8..c6e78fb5618 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -231,8 +231,7 @@ struct AggregationMethodStringNoCache : data(other.data) {} - using State = ColumnsHashing:: - HashMethodString; + using State = ColumnsHashing::HashMethodString; template struct EmplaceOrFindKeyResult { @@ -528,7 +527,7 @@ struct AggregationMethodFixedStringNoCache : data(other.data) {} - using State = ColumnsHashing::HashMethodFixedString; + using State = ColumnsHashing::HashMethodFixedString; template struct EmplaceOrFindKeyResult { @@ -1326,12 +1325,12 @@ class Aggregator std::vector submap_m2_infos{}; std::vector submap_m3_infos{}; std::vector submap_m4_infos{}; - std::vector submap_m0_datas{}; std::vector submap_m1_datas{}; std::vector submap_m2_datas{}; std::vector submap_m3_datas{}; std::vector submap_m4_datas{}; + std::unique_ptr sort_key_pool; void prepareForAgg(); bool allBlockDataHandled() const @@ -1343,8 +1342,8 @@ class Aggregator } bool stringHashTableRecoveryInfoEmpty() const { - return submap_m0_infos.empty() && submap_m1_infos.empty() && - submap_m3_infos.empty() && submap_m4_infos.empty(); + return submap_m0_infos.empty() && submap_m1_infos.empty() && submap_m3_infos.empty() + && submap_m4_infos.empty(); } void resetBlock(const Block & block_) { @@ -1358,6 +1357,8 @@ class Aggregator hit_row_cnt = 0; not_found_rows.clear(); not_found_rows.reserve(block_.rows() / 2); + + sort_key_pool.reset(); } }; diff --git a/dbms/src/Interpreters/JoinPartition.cpp b/dbms/src/Interpreters/JoinPartition.cpp index a060878c4f7..294c72c19a3 100644 --- a/dbms/src/Interpreters/JoinPartition.cpp +++ b/dbms/src/Interpreters/JoinPartition.cpp @@ -412,7 +412,7 @@ struct KeyGetterForTypeImpl template struct KeyGetterForTypeImpl { - using Type = ColumnsHashing::HashMethodString; + using Type = ColumnsHashing::HashMethodString; }; template struct KeyGetterForTypeImpl @@ -427,7 +427,7 @@ struct KeyGetterForTypeImpl template struct KeyGetterForTypeImpl { - using Type = ColumnsHashing::HashMethodFixedString; + using Type = ColumnsHashing::HashMethodFixedString; }; template struct KeyGetterForTypeImpl @@ -652,18 +652,18 @@ void NO_INLINE insertBlockIntoMapsTypeCase( insert_indexes.emplace_back(insert_index); } -#define INSERT_TO_MAP(join_partition, segment_index) \ - auto & current_map = (join_partition)->getHashMap(); \ - for (auto & s_i : (segment_index)) \ - { \ - Inserter::insert( \ - current_map, \ - key_getter, \ - stored_block, \ - s_i, \ - pool, \ - sort_key_containers, \ - probe_cache_column_threshold); \ +#define INSERT_TO_MAP(join_partition, segment_index) \ + auto & current_map = (join_partition) -> getHashMap(); \ + for (auto & s_i : (segment_index)) \ + { \ + Inserter::insert( \ + current_map, \ + key_getter, \ + stored_block, \ + s_i, \ + pool, \ + sort_key_containers, \ + probe_cache_column_threshold); \ } #define INSERT_TO_NOT_INSERTED_MAP \ diff --git a/dbms/src/Interpreters/SetVariants.h b/dbms/src/Interpreters/SetVariants.h index a1591f8c13a..5c503240b7b 100644 --- a/dbms/src/Interpreters/SetVariants.h +++ b/dbms/src/Interpreters/SetVariants.h @@ -54,7 +54,7 @@ struct SetMethodString Data data; - using State = ColumnsHashing::HashMethodString; + using State = ColumnsHashing::HashMethodString; }; template @@ -77,7 +77,7 @@ struct SetMethodFixedString Data data; - using State = ColumnsHashing::HashMethodFixedString; + using State = ColumnsHashing::HashMethodFixedString; }; namespace set_impl diff --git a/dbms/src/TiDB/Collation/Collator.cpp b/dbms/src/TiDB/Collation/Collator.cpp index bf27400f8c4..4365f1f0988 100644 --- a/dbms/src/TiDB/Collation/Collator.cpp +++ b/dbms/src/TiDB/Collation/Collator.cpp @@ -192,6 +192,11 @@ class BinCollator final : public ITiDBCollator return DB::BinCollatorSortKey(s, length); } + StringRef sortKey(const char * s, size_t length, DB::Arena &) const override + { + return DB::BinCollatorSortKey(s, length); + } + StringRef sortKeyNoTrim(const char * s, size_t length, std::string &) const override { return convertForBinCollator(s, length, nullptr); @@ -273,11 +278,54 @@ class GeneralCICollator final : public ITiDBCollator return convertImpl(s, length, container, nullptr); } + StringRef sortKey(const char * s, size_t length, DB::Arena & pool) const override + { + return convertImpl(s, length, pool, nullptr); + } + StringRef sortKeyNoTrim(const char * s, size_t length, std::string & container) const override { return convertImpl(s, length, container, nullptr); } + template + StringRef convertImpl(const char * s, size_t length, DB::Arena & pool, std::vector * lens) const + { + std::string_view v; + + if constexpr (need_trim) + v = rtrim(s, length); + else + v = std::string_view(s, length); + + const size_t size = length * sizeof(WeightType); + auto * buffer = pool.alignedAlloc(size, 16); + + size_t offset = 0; + size_t total_size = 0; + size_t v_length = v.length(); + + if constexpr (need_len) + { + if (lens->capacity() < v_length) + lens->reserve(v_length); + lens->resize(0); + } + + while (offset < v_length) + { + auto c = decodeChar(s, offset); + auto sk = weight(c); + buffer[total_size++] = static_cast(sk >> 8); + buffer[total_size++] = static_cast(sk); + + if constexpr (need_len) + lens->push_back(2); + } + + return StringRef(buffer, total_size); + } + template StringRef convertImpl(const char * s, size_t length, std::string & container, std::vector * lens) const { @@ -479,11 +527,65 @@ class UCACICollator final : public ITiDBCollator return convertImpl(s, length, container, nullptr); } + StringRef sortKey(const char * s, size_t length, DB::Arena & pool) const override + { + return convertImpl(s, length, pool, nullptr); + } + StringRef sortKeyNoTrim(const char * s, size_t length, std::string & container) const override { return convertImpl(s, length, container, nullptr); } + // Use Arena to store decoded string. Normally it's used by column-wise Agg/Join, + // because column-wise process cannot reuse string container. + template + StringRef convertImpl(const char * s, size_t length, DB::Arena & pool, std::vector * lens) const + { + std::string_view v; + + if constexpr (need_trim) + v = preprocess(s, length); + else + v = std::string_view(s, length); + + // every char have 8 uint16 at most. + const auto size = 8 * length * sizeof(uint16_t); + auto * buffer = pool.alignedAlloc(size, 16); + + size_t offset = 0; + size_t total_size = 0; + size_t v_length = v.length(); + + uint64_t first = 0, second = 0; + + if constexpr (need_len) + { + if (lens->capacity() < v_length) + lens->reserve(v_length); + lens->resize(0); + } + + while (offset < v_length) + { + weight(first, second, offset, v_length, s); + + if constexpr (need_len) + lens->push_back(total_size); + + writeResult(first, buffer, total_size); + writeResult(second, buffer, total_size); + + if constexpr (need_len) + { + size_t end_idx = lens->size() - 1; + (*lens)[end_idx] = total_size - (*lens)[end_idx]; + } + } + + return StringRef(buffer, total_size); + } + template StringRef convertImpl(const char * s, size_t length, std::string & container, std::vector * lens) const { @@ -550,6 +652,16 @@ class UCACICollator final : public ITiDBCollator } } + static inline void writeResult(uint64_t & w, char * buffer, size_t & total_size) + { + while (w != 0) + { + buffer[total_size++] = static_cast(w >> 8); + buffer[total_size++] = static_cast(w); + w >>= 16; + } + } + static inline bool regexEq(CharType a, CharType b) { return T::regexEq(a, b); } static inline void weight(uint64_t & first, uint64_t & second, size_t & offset, size_t length, const char * s) diff --git a/dbms/src/TiDB/Collation/Collator.h b/dbms/src/TiDB/Collation/Collator.h index 6bb87883ef1..08c017ba57d 100644 --- a/dbms/src/TiDB/Collation/Collator.h +++ b/dbms/src/TiDB/Collation/Collator.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -101,6 +102,7 @@ class ITiDBCollator = 0; virtual StringRef sortKeyNoTrim(const char * s, size_t length, std::string & container) const = 0; virtual StringRef sortKey(const char * s, size_t length, std::string & container) const = 0; + virtual StringRef sortKey(const char * s, size_t length, DB::Arena &) const = 0; virtual std::unique_ptr pattern() const = 0; int32_t getCollatorId() const { return collator_id; } CollatorType getCollatorType() const { return collator_type; } @@ -135,6 +137,14 @@ class ITiDBCollator } return sortKey(s, length, container); } + ALWAYS_INLINE inline StringRef sortKeyFastPath(const char * s, size_t length, DB::Arena & pool) const + { + if (likely(isPaddingBinary())) + { + return DB::BinCollatorSortKey(s, length); + } + return sortKey(s, length, pool); + } protected: explicit ITiDBCollator(int32_t collator_id_); From 3a86617a9c870be81bf28247d5b6d033cc9b6a1d Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Mon, 2 Dec 2024 17:16:26 +0800 Subject: [PATCH 11/24] unit test Signed-off-by: guo-shaoge --- dbms/src/Common/FailPoint.cpp | 1 + .../tests/gtest_aggregation_executor.cpp | 111 +++++++++++------- dbms/src/Flash/tests/gtest_compute_server.cpp | 4 + dbms/src/Interpreters/Aggregator.cpp | 9 +- 4 files changed, 79 insertions(+), 46 deletions(-) diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index f6025741325..f73f273dd48 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -114,6 +114,7 @@ namespace DB M(force_set_parallel_prehandle_threshold) \ M(force_raise_prehandle_exception) \ M(force_agg_on_partial_block) \ + M(force_agg_prefetch) \ M(force_set_fap_candidate_store_id) \ M(force_not_clean_fap_on_destroy) \ M(force_fap_worker_throw) \ diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp index 7193f24eddb..8c7f5277916 100644 --- a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -24,6 +24,7 @@ namespace DB namespace FailPoints { extern const char force_agg_on_partial_block[]; +extern const char force_agg_prefetch[]; extern const char force_agg_two_level_hash_table_before_merge[]; } // namespace FailPoints namespace tests @@ -238,16 +239,22 @@ class AggExecutorTestRunner : public ExecutorTest ColumnWithUInt64 col_pr{1, 2, 0, 3290124, 968933, 3125, 31236, 4327, 80000}; }; -#define WRAP_FOR_AGG_PARTIAL_BLOCK_START \ - std::vector partial_blocks{true, false}; \ - for (auto partial_block : partial_blocks) \ - { \ - if (partial_block) \ - FailPointHelper::enableFailPoint(FailPoints::force_agg_on_partial_block); \ - else \ - FailPointHelper::disableFailPoint(FailPoints::force_agg_on_partial_block); +#define WRAP_FOR_AGG_FAILPOINTS_START \ + std::vector enables{true, false}; \ + for (auto enable : enables) \ + { \ + if (enable) \ + { \ + FailPointHelper::enableFailPoint(FailPoints::force_agg_on_partial_block); \ + FailPointHelper::enableFailPoint(FailPoints::force_agg_prefetch); \ + } \ + else \ + { \ + FailPointHelper::disableFailPoint(FailPoints::force_agg_on_partial_block); \ + FailPointHelper::disableFailPoint(FailPoints::force_agg_prefetch); \ + } -#define WRAP_FOR_AGG_PARTIAL_BLOCK_END } +#define WRAP_FOR_AGG_FAILPOINTS_END } /// Guarantee the correctness of group by TEST_F(AggExecutorTestRunner, GroupBy) @@ -363,9 +370,9 @@ try FailPointHelper::enableFailPoint(FailPoints::force_agg_two_level_hash_table_before_merge); else FailPointHelper::disableFailPoint(FailPoints::force_agg_two_level_hash_table_before_merge); - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expect_cols[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } } } @@ -429,9 +436,9 @@ try FailPointHelper::enableFailPoint(FailPoints::force_agg_two_level_hash_table_before_merge); else FailPointHelper::disableFailPoint(FailPoints::force_agg_two_level_hash_table_before_merge); - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expect_cols[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } } } @@ -464,9 +471,9 @@ try for (size_t i = 0; i < test_num; ++i) { request = buildDAGRequest(std::make_pair(db_name, table_name), agg_funcs[i], group_by_exprs[i], projections[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expect_cols[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } /// Min function tests @@ -485,9 +492,9 @@ try for (size_t i = 0; i < test_num; ++i) { request = buildDAGRequest(std::make_pair(db_name, table_name), agg_funcs[i], group_by_exprs[i], projections[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expect_cols[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } } CATCH @@ -545,9 +552,9 @@ try { request = buildDAGRequest(std::make_pair(db_name, table_name), {agg_funcs[i]}, group_by_exprs[i], projections[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expect_cols[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } } CATCH @@ -615,9 +622,9 @@ try {agg_func}, group_by_exprs[i], projections[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expect_cols[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } } { @@ -629,9 +636,9 @@ try {agg_func}, group_by_exprs[i], projections[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expect_cols[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } } for (auto collation_id : {0, static_cast(TiDB::ITiDBCollator::BINARY)}) @@ -668,9 +675,9 @@ try {agg_func}, group_by_exprs[i], projections[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expect_cols[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } } } @@ -683,9 +690,9 @@ try executeAndAssertColumnsEqual(request, {{toNullableVec({"banana"})}}); request = context.scan("aggnull_test", "t1").aggregation({}, {col("s1")}).build(context); - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, {{toNullableVec("s1", {{}, "banana"})}}); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } CATCH @@ -697,9 +704,9 @@ try = {toNullableVec({3}), toNullableVec({1}), toVec({6})}; auto test_single_function = [&](size_t index) { auto request = context.scan("test_db", "test_table").aggregation({functions[index]}, {}).build(context); - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, {functions_result[index]}); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END }; for (size_t i = 0; i < functions.size(); ++i) test_single_function(i); @@ -720,9 +727,9 @@ try results.push_back(functions_result[k]); auto request = context.scan("test_db", "test_table").aggregation(funcs, {}).build(context); - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, results); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END funcs.pop_back(); results.pop_back(); @@ -758,9 +765,9 @@ try context.context->setSetting( "group_by_two_level_threshold", Field(static_cast(two_level_threshold))); - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expect); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } } } @@ -791,7 +798,7 @@ try "group_by_two_level_threshold", Field(static_cast(two_level_threshold))); context.context->setSetting("max_block_size", Field(static_cast(block_size))); - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START auto blocks = getExecuteStreamsReturnBlocks(request, concurrency); size_t actual_row = 0; for (auto & block : blocks) @@ -800,7 +807,7 @@ try actual_row += block.rows(); } ASSERT_EQ(actual_row, expect_rows[i]); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } } } @@ -914,7 +921,7 @@ try "group_by_two_level_threshold", Field(static_cast(two_level_threshold))); context.context->setSetting("max_block_size", Field(static_cast(block_size))); - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START auto blocks = getExecuteStreamsReturnBlocks(request, concurrency); for (auto & block : blocks) { @@ -939,7 +946,7 @@ try vstackBlocks(std::move(blocks)).getColumnsWithTypeAndName(), false)); } - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } } } @@ -967,18 +974,18 @@ try request = context.receive("empty_recv", 5).aggregation({Max(col("s1"))}, {col("s2")}, 5).build(context); { - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, {}); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } request = context.scan("test_db", "empty_table") .aggregation({Count(lit(Field(static_cast(1))))}, {}) .build(context); { - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, {toVec({0})}); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } } CATCH @@ -1049,7 +1056,9 @@ try toNullableVec("first_row(col_tinyint)", ColumnWithNullableInt8{0, 1, 2, 3}), toVec("col_int", ColumnWithInt32{0, 1, 2, 3}), toVec("col_tinyint", ColumnWithInt8{0, 1, 2, 3})}; + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expected); + WRAP_FOR_AGG_FAILPOINTS_END } { @@ -1065,7 +1074,9 @@ try = {toVec("count(1)", ColumnWithUInt64{rows_per_type, rows_per_type, rows_per_type, rows_per_type}), toNullableVec("first_row(col_int)", ColumnWithNullableInt32{0, 1, 2, 3}), toVec("col_int", ColumnWithInt32{0, 1, 2, 3})}; + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expected); + WRAP_FOR_AGG_FAILPOINTS_END } { @@ -1082,7 +1093,9 @@ try toNullableVec("first_row(col_string_no_collator)", ColumnWithNullableString{"a", "b", "c", "d"}), toVec("col_string_no_collator", ColumnWithString{"a", "b", "c", "d"}), }; + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expected); + WRAP_FOR_AGG_FAILPOINTS_END } { @@ -1099,7 +1112,9 @@ try toNullableVec("first_row(col_string_with_collator)", ColumnWithNullableString{"a", "b", "c", "d"}), toVec("col_string_with_collator", ColumnWithString{"a", "b", "c", "d"}), }; + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expected); + WRAP_FOR_AGG_FAILPOINTS_END } { @@ -1116,7 +1131,9 @@ try toVec("count(1)", ColumnWithUInt64{rows_per_type, rows_per_type, rows_per_type, rows_per_type}), toVec("first_row(col_string_with_collator)", ColumnWithString{"a", "b", "c", "d"}), }; + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expected); + WRAP_FOR_AGG_FAILPOINTS_END } // case-5: none @@ -1138,7 +1155,9 @@ try toVec("col_int", ColumnWithInt32{0, 1, 2, 3}), toVec("col_string_no_collator", ColumnWithString{"a", "b", "c", "d"}), }; + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expected); + WRAP_FOR_AGG_FAILPOINTS_END } { @@ -1155,7 +1174,9 @@ try toNullableVec("first_row(col_string_with_collator)", ColumnWithNullableString{"a", "b", "c", "d"}), toVec("col_string_with_collator", ColumnWithString{"a", "b", "c", "d"}), toVec("col_int", ColumnWithInt32{0, 1, 2, 3})}; + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expected); + WRAP_FOR_AGG_FAILPOINTS_END } } CATCH @@ -1205,15 +1226,15 @@ try auto baseline = executeStreams(gen_request(1), 1); for (size_t exchange_concurrency : exchange_receiver_concurrency) { - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(gen_request(exchange_concurrency), baseline); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END } } CATCH -#undef WRAP_FOR_AGG_PARTIAL_BLOCK_START -#undef WRAP_FOR_AGG_PARTIAL_BLOCK_END +#undef WRAP_FOR_AGG_FAILPOINTS_START +#undef WRAP_FOR_AGG_FAILPOINTS_END } // namespace tests } // namespace DB diff --git a/dbms/src/Flash/tests/gtest_compute_server.cpp b/dbms/src/Flash/tests/gtest_compute_server.cpp index 69b2242df3d..3c4020db45e 100644 --- a/dbms/src/Flash/tests/gtest_compute_server.cpp +++ b/dbms/src/Flash/tests/gtest_compute_server.cpp @@ -39,6 +39,7 @@ extern const char exception_before_mpp_root_task_run[]; extern const char exception_during_mpp_non_root_task_run[]; extern const char exception_during_mpp_root_task_run[]; extern const char exception_during_query_run[]; +extern const char force_agg_prefetch[]; } // namespace FailPoints namespace tests @@ -1369,6 +1370,7 @@ try FailPoints::exception_during_mpp_non_root_task_run, FailPoints::exception_during_mpp_root_task_run, FailPoints::exception_during_query_run, + FailPoints::force_agg_prefetch, }; size_t query_index = 0; for (const auto & failpoint : failpoint_names) @@ -1843,6 +1845,7 @@ try auto_pass_through_test_data.nullable_high_ndv_tbl_name, auto_pass_through_test_data.nullable_medium_ndv_tbl_name, }; + FailPointHelper::enableFailPoint(FailPoints::force_agg_prefetch); for (const auto & tbl_name : workloads) { const String db_name = auto_pass_through_test_data.db_name; @@ -1868,6 +1871,7 @@ try res_no_pass_through); WRAP_FOR_SERVER_TEST_END } + FailPointHelper::disableFailPoint(FailPoints::force_agg_prefetch); } CATCH diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 543885b6248..6e3262c15d4 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -43,6 +43,7 @@ extern const char random_aggregate_create_state_failpoint[]; extern const char random_aggregate_merge_failpoint[]; extern const char force_agg_on_partial_block[]; extern const char random_fail_in_resize_callback[]; +extern const char force_agg_prefetch[]; } // namespace FailPoints #define AggregationMethodName(NAME) AggregatedDataVariants::AggregationMethod_##NAME @@ -665,7 +666,13 @@ void NO_INLINE Aggregator::executeImpl( { typename Method::State state(agg_process_info.key_columns, key_sizes, collators); - if (method.data.getBufferSizeInCells() < 8192) +#ifndef NDEBUG + bool disable_prefetch = (method.data.getBufferSizeInCells() < 8192); + fiu_do_on(FailPoints::force_agg_prefetch, { disable_prefetch = false; }); +#else + const bool disable_prefetch = (method.data.getBufferSizeInCells() < 8192); +#endif + if (disable_prefetch) { if constexpr (Method::Data::is_string_hash_map) executeImplBatchStringHashMap( From 623fef57f5a160e201e5f765f22430eef88f8300 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Mon, 2 Dec 2024 20:14:41 +0800 Subject: [PATCH 12/24] prefetch Signed-off-by: guo-shaoge --- dbms/src/Common/HashTable/HashTable.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index f8d44e8c406..c0f066edbb0 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -856,14 +856,8 @@ class HashTable void ALWAYS_INLINE prefetch(size_t hashval) const { - (void)hashval; -#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) - size_t place_value = grower.place(hashval); - __mm_prefetch((const char *)(&buf[place_value]), _MM_HINT_NTA); -#elif defined(__GNUC__) - size_t place_value = grower.place(hashval); + const size_t place_value = grower.place(hashval); __builtin_prefetch(static_cast(&buf[place_value])); -#endif } protected: From 19f320daa2b5250ee45456ee0d1401f1cd32fa53 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Mon, 2 Dec 2024 20:28:40 +0800 Subject: [PATCH 13/24] fix Signed-off-by: guo-shaoge --- dbms/src/Common/ColumnsHashing.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Common/ColumnsHashing.h b/dbms/src/Common/ColumnsHashing.h index 94526714250..a9817308616 100644 --- a/dbms/src/Common/ColumnsHashing.h +++ b/dbms/src/Common/ColumnsHashing.h @@ -85,8 +85,6 @@ struct HashMethodOneNumber } const FieldType * getKeyData() const { return vec; } - - size_t getTotalRows() const { return total_rows; } }; From 3a226dfb518caf0afa1769617d7b800f2c97ca12 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Tue, 3 Dec 2024 14:14:14 +0800 Subject: [PATCH 14/24] refine Signed-off-by: guo-shaoge --- dbms/src/Common/ColumnsHashingImpl.h | 4 +- .../HashTable/TwoLevelStringHashTable.h | 2 +- .../tests/gtest_aggregation_executor.cpp | 53 ++++++++++++------- dbms/src/Interpreters/Aggregator.cpp | 21 +++++--- 4 files changed, 50 insertions(+), 30 deletions(-) diff --git a/dbms/src/Common/ColumnsHashingImpl.h b/dbms/src/Common/ColumnsHashingImpl.h index 1f4e3dbaedf..b5b61fb8630 100644 --- a/dbms/src/Common/ColumnsHashingImpl.h +++ b/dbms/src/Common/ColumnsHashingImpl.h @@ -184,7 +184,7 @@ class HashMethodBase ALWAYS_INLINE inline EmplaceResult emplaceStringKey( Data & data, size_t idx, - std::vector & datas, // TODO const + std::vector & datas, const std::vector & hashvals) { // For spill, hashvals.size() will be le to total_rows. @@ -204,7 +204,7 @@ class HashMethodBase ALWAYS_INLINE inline FindResult findStringKey( Data & data, size_t idx, - std::vector & datas, // TODO const + std::vector & datas, const std::vector & hashvals) { assert(hashvals.size() <= static_cast(*this).total_rows); diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h index 5ea460769ab..ac2ab483e46 100644 --- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -265,7 +265,7 @@ class TwoLevelStringHashTable : private boost::noncopyable { size_t res = 0; for (const auto & impl : impls) - res = impl.getBufferSizeInCells(); + res += impl.getBufferSizeInCells(); return res; } size_t getBufferSizeInBytes() const diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp index 8c7f5277916..3a79025f244 100644 --- a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -1042,6 +1042,24 @@ try toVec("col_tinyint", col_data_tinyint), }); + std::vector max_block_sizes{1, 2, DEFAULT_BLOCK_SIZE}; + std::vector two_level_thresholds{0, 1}; + + context.context->setSetting("group_by_two_level_threshold_bytes", Field(static_cast(0))); +#define WRAP_FOR_AGG_STRING_TEST_BEGIN \ + for (const auto & max_block_size : max_block_sizes) \ + { \ + for (const auto & two_level_threshold : two_level_thresholds) \ + { \ + context.context->setSetting( \ + "group_by_two_level_threshold", \ + Field(static_cast(two_level_threshold))); \ + context.context->setSetting("max_block_size", Field(static_cast(max_block_size))); +#define WRAP_FOR_AGG_STRING_TEST_END \ + } \ + } + + FailPointHelper::enableFailPoint(FailPoints::force_agg_prefetch); { // case-1: select count(1), col_tinyint from t group by col_int, col_tinyint // agg method: keys64(AggregationMethodKeysFixed) @@ -1056,9 +1074,9 @@ try toNullableVec("first_row(col_tinyint)", ColumnWithNullableInt8{0, 1, 2, 3}), toVec("col_int", ColumnWithInt32{0, 1, 2, 3}), toVec("col_tinyint", ColumnWithInt8{0, 1, 2, 3})}; - WRAP_FOR_AGG_FAILPOINTS_START + WRAP_FOR_AGG_STRING_TEST_BEGIN executeAndAssertColumnsEqual(request, expected); - WRAP_FOR_AGG_FAILPOINTS_END + WRAP_FOR_AGG_STRING_TEST_END } { @@ -1074,9 +1092,9 @@ try = {toVec("count(1)", ColumnWithUInt64{rows_per_type, rows_per_type, rows_per_type, rows_per_type}), toNullableVec("first_row(col_int)", ColumnWithNullableInt32{0, 1, 2, 3}), toVec("col_int", ColumnWithInt32{0, 1, 2, 3})}; - WRAP_FOR_AGG_FAILPOINTS_START + WRAP_FOR_AGG_STRING_TEST_BEGIN executeAndAssertColumnsEqual(request, expected); - WRAP_FOR_AGG_FAILPOINTS_END + WRAP_FOR_AGG_STRING_TEST_END } { @@ -1093,9 +1111,7 @@ try toNullableVec("first_row(col_string_no_collator)", ColumnWithNullableString{"a", "b", "c", "d"}), toVec("col_string_no_collator", ColumnWithString{"a", "b", "c", "d"}), }; - WRAP_FOR_AGG_FAILPOINTS_START executeAndAssertColumnsEqual(request, expected); - WRAP_FOR_AGG_FAILPOINTS_END } { @@ -1112,9 +1128,9 @@ try toNullableVec("first_row(col_string_with_collator)", ColumnWithNullableString{"a", "b", "c", "d"}), toVec("col_string_with_collator", ColumnWithString{"a", "b", "c", "d"}), }; - WRAP_FOR_AGG_FAILPOINTS_START + WRAP_FOR_AGG_STRING_TEST_BEGIN executeAndAssertColumnsEqual(request, expected); - WRAP_FOR_AGG_FAILPOINTS_END + WRAP_FOR_AGG_STRING_TEST_END } { @@ -1131,9 +1147,9 @@ try toVec("count(1)", ColumnWithUInt64{rows_per_type, rows_per_type, rows_per_type, rows_per_type}), toVec("first_row(col_string_with_collator)", ColumnWithString{"a", "b", "c", "d"}), }; - WRAP_FOR_AGG_FAILPOINTS_START + WRAP_FOR_AGG_STRING_TEST_BEGIN executeAndAssertColumnsEqual(request, expected); - WRAP_FOR_AGG_FAILPOINTS_END + WRAP_FOR_AGG_STRING_TEST_END } // case-5: none @@ -1155,9 +1171,9 @@ try toVec("col_int", ColumnWithInt32{0, 1, 2, 3}), toVec("col_string_no_collator", ColumnWithString{"a", "b", "c", "d"}), }; - WRAP_FOR_AGG_FAILPOINTS_START + WRAP_FOR_AGG_STRING_TEST_BEGIN executeAndAssertColumnsEqual(request, expected); - WRAP_FOR_AGG_FAILPOINTS_END + WRAP_FOR_AGG_STRING_TEST_END } { @@ -1174,10 +1190,13 @@ try toNullableVec("first_row(col_string_with_collator)", ColumnWithNullableString{"a", "b", "c", "d"}), toVec("col_string_with_collator", ColumnWithString{"a", "b", "c", "d"}), toVec("col_int", ColumnWithInt32{0, 1, 2, 3})}; - WRAP_FOR_AGG_FAILPOINTS_START + WRAP_FOR_AGG_STRING_TEST_BEGIN executeAndAssertColumnsEqual(request, expected); - WRAP_FOR_AGG_FAILPOINTS_END + WRAP_FOR_AGG_STRING_TEST_END } + FailPointHelper::disableFailPoint(FailPoints::force_agg_prefetch); +#undef WRAP_FOR_AGG_STRING_TEST_BEGIN +#undef WRAP_FOR_AGG_STRING_TEST_END } CATCH @@ -1208,13 +1227,9 @@ try context .addExchangeReceiver("exchange_receiver_1_concurrency", column_infos, column_data, 1, partition_column_infos); - context - .addExchangeReceiver("exchange_receiver_3_concurrency", column_infos, column_data, 3, partition_column_infos); - context - .addExchangeReceiver("exchange_receiver_5_concurrency", column_infos, column_data, 5, partition_column_infos); context .addExchangeReceiver("exchange_receiver_10_concurrency", column_infos, column_data, 10, partition_column_infos); - std::vector exchange_receiver_concurrency = {1, 3, 5, 10}; + std::vector exchange_receiver_concurrency = {1, 10}; auto gen_request = [&](size_t exchange_concurrency) { return context diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 6e3262c15d4..2c1aba6b2af 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -741,8 +741,9 @@ std::optional::Res } } +// This is only used by executeImplBatchStringHashMap. +// It will choose specifix submap of StringHashMap then do emplace/find. // StringKeyType can be StringRef/StringKey8/StringKey16/StringKey24/ArenaKeyHolder. -// return true when resize exception happens. template < size_t SubMapIndex, bool collect_hit_rate, @@ -756,7 +757,7 @@ size_t Aggregator::emplaceOrFindStringKey( Data & data, State & state, const std::vector & key_infos, - std::vector & key_datas, // TODO const + std::vector & key_datas, Arena & aggregates_pool, std::vector & places, AggProcessInfo & agg_process_info) const @@ -1045,8 +1046,8 @@ M(4) #undef M -// In this function, we will prefetch/empalce each specifix submap directly instead of accessing StringHashMap interface, -// which is good for performance. +// prefetch/empalce each specifix submap directly instead of accessing StringHashMap interface, +// which is better for performance. // NOTE: this function is column-wise, which means sort key buffer cannot be reused. // This buffer will not be release until this block is processed done. template @@ -1088,7 +1089,7 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( #undef M // If no resize exception happens, so this is a new Block. - // If resize exception happens, start_row also set as zero. + // If resize exception happens, start_row has already been set to zero at the end of this function. RUNTIME_CHECK(agg_process_info.start_row == 0); if likely (agg_process_info.stringHashTableRecoveryInfoEmpty()) @@ -1226,13 +1227,17 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( inst->batch_arguments, aggregates_pool); } - // For StringHashTable, start_row is meanless, instead submap_mx_infos/submap_mx_datas are used. - agg_process_info.start_row = got_resize_exception ? 0 : agg_process_info.end_row; - if unlikely (got_resize_exception) { RUNTIME_CHECK(!agg_process_info.stringHashTableRecoveryInfoEmpty()); agg_process_info.sort_key_pool = std::move(sort_key_pool); + // For StringHashTable, start_row is meanless, instead submap_mx_infos/submap_mx_datas are used. + // So set it to zero when got_resize_exception. + agg_process_info.start_row = 0; + } + else + { + agg_process_info.start_row = agg_process_info.end_row; } } From c44ace7030558dff7952fde6aae9aedb6c2bd400 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Tue, 3 Dec 2024 15:50:56 +0800 Subject: [PATCH 15/24] refine Signed-off-by: guo-shaoge --- dbms/src/Interpreters/Aggregator.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 2c1aba6b2af..e5e214c7791 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -674,13 +674,13 @@ void NO_INLINE Aggregator::executeImpl( #endif if (disable_prefetch) { - if constexpr (Method::Data::is_string_hash_map) - executeImplBatchStringHashMap( - method, - state, - aggregates_pool, - agg_process_info); - else + // if constexpr (Method::Data::is_string_hash_map) + // executeImplBatchStringHashMap( + // method, + // state, + // aggregates_pool, + // agg_process_info); + // else executeImplBatch(method, state, aggregates_pool, agg_process_info); } else From 3e30f9561e38d09c39d1b0dc73735b6dcffb7991 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Tue, 3 Dec 2024 16:08:34 +0800 Subject: [PATCH 16/24] revert new hasher Signed-off-by: guo-shaoge --- dbms/src/Common/HashTable/StringHashTable.h | 66 +++++++++++++++---- .../HashTable/TwoLevelStringHashTable.h | 8 +-- dbms/src/Interpreters/Aggregator.h | 20 +++--- 3 files changed, 67 insertions(+), 27 deletions(-) diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h index a43f35fdbbf..322523388cc 100644 --- a/dbms/src/Common/HashTable/StringHashTable.h +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -67,17 +67,57 @@ struct HashWithMixSeed } }; +// struct StringHashTableHash +// { +// using StringKey8Hasher = HashWithMixSeed; +// using StringKey16Hasher = HashWithMixSeed; +// using StringKey24Hasher = HashWithMixSeed; +// using StringRefHasher = StringRefHash; +// +// static size_t ALWAYS_INLINE operator()(StringKey8 key) { return StringKey8Hasher::operator()(key); } +// static size_t ALWAYS_INLINE operator()(const StringKey16 & key) { return StringKey16Hasher::operator()(key); } +// static size_t ALWAYS_INLINE operator()(const StringKey24 & key) { return StringKey24Hasher::operator()(key); } +// static size_t ALWAYS_INLINE operator()(const StringRef & key) { return StringRefHasher::operator()(key); } +// }; struct StringHashTableHash { - using StringKey8Hasher = HashWithMixSeed; - using StringKey16Hasher = HashWithMixSeed; - using StringKey24Hasher = HashWithMixSeed; - using StringRefHasher = StringRefHash; - - static size_t ALWAYS_INLINE operator()(StringKey8 key) { return StringKey8Hasher::operator()(key); } - static size_t ALWAYS_INLINE operator()(const StringKey16 & key) { return StringKey16Hasher::operator()(key); } - static size_t ALWAYS_INLINE operator()(const StringKey24 & key) { return StringKey24Hasher::operator()(key); } - static size_t ALWAYS_INLINE operator()(const StringRef & key) { return StringRefHasher::operator()(key); } +#if defined(__SSE4_2__) + static size_t ALWAYS_INLINE operator()(StringKey8 key) + { + size_t res = -1ULL; + res = _mm_crc32_u64(res, key); + return res; + } + static size_t ALWAYS_INLINE operator()(const StringKey16 & key) + { + size_t res = -1ULL; + res = _mm_crc32_u64(res, key.low); + res = _mm_crc32_u64(res, key.high); + return res; + } + static size_t ALWAYS_INLINE operator()(const StringKey24 & key) + { + size_t res = -1ULL; + res = _mm_crc32_u64(res, key.a); + res = _mm_crc32_u64(res, key.b); + res = _mm_crc32_u64(res, key.c); + return res; + } +#else + static size_t ALWAYS_INLINE operator()(StringKey8 key) + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 8); + } + static size_t ALWAYS_INLINE operator()(const StringKey16 & key) + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 16); + } + static size_t ALWAYS_INLINE operator()(const StringKey24 & key) + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 24); + } +#endif + static size_t ALWAYS_INLINE operator()(StringRef key){ return StringRefHash()(key); } }; template @@ -572,7 +612,7 @@ struct StringHashTableSubMapSelector<0, false, Data> template struct StringHashTableSubMapSelector<1, false, Data> { - using Hash = StringHashTableHash::StringKey8Hasher; + using Hash = StringHashTableHash; static typename Data::T1 & getSubMap(size_t, Data & data) { return data.m1; } }; @@ -580,7 +620,7 @@ struct StringHashTableSubMapSelector<1, false, Data> template struct StringHashTableSubMapSelector<2, false, Data> { - using Hash = StringHashTableHash::StringKey16Hasher; + using Hash = StringHashTableHash; static typename Data::T2 & getSubMap(size_t, Data & data) { return data.m2; } }; @@ -588,7 +628,7 @@ struct StringHashTableSubMapSelector<2, false, Data> template struct StringHashTableSubMapSelector<3, false, Data> { - using Hash = StringHashTableHash::StringKey24Hasher; + using Hash = StringHashTableHash; static typename Data::T3 & getSubMap(size_t, Data & data) { return data.m3; } }; @@ -596,7 +636,7 @@ struct StringHashTableSubMapSelector<3, false, Data> template struct StringHashTableSubMapSelector<4, false, Data> { - using Hash = StringHashTableHash::StringRefHasher; + using Hash = StringHashTableHash; static typename Data::Ts & getSubMap(size_t, Data & data) { return data.ms; } }; diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h index ac2ab483e46..403b8d3941c 100644 --- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -296,7 +296,7 @@ struct StringHashTableSubMapSelector<0, true, Data> template struct StringHashTableSubMapSelector<1, true, Data> { - using Hash = StringHashTableHash::StringKey8Hasher; + using Hash = StringHashTableHash; static typename Data::Impl::T1 & getSubMap(size_t hashval, Data & data) { @@ -308,7 +308,7 @@ struct StringHashTableSubMapSelector<1, true, Data> template struct StringHashTableSubMapSelector<2, true, Data> { - using Hash = StringHashTableHash::StringKey16Hasher; + using Hash = StringHashTableHash; static typename Data::Impl::T2 & getSubMap(size_t hashval, Data & data) { @@ -320,7 +320,7 @@ struct StringHashTableSubMapSelector<2, true, Data> template struct StringHashTableSubMapSelector<3, true, Data> { - using Hash = StringHashTableHash::StringKey24Hasher; + using Hash = StringHashTableHash; static typename Data::Impl::T3 & getSubMap(size_t hashval, Data & data) { @@ -332,7 +332,7 @@ struct StringHashTableSubMapSelector<3, true, Data> template struct StringHashTableSubMapSelector<4, true, Data> { - using Hash = StringHashTableHash::StringRefHasher; + using Hash = StringHashTableHash; static typename Data::Impl::Ts & getSubMap(size_t hashval, Data & data) { diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index c6e78fb5618..eb68bc50ae9 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -77,27 +77,27 @@ using AggregatedDataWithoutKey = AggregateDataPtr; using AggregatedDataWithUInt8Key = FixedImplicitZeroHashMapWithCalculatedSize; using AggregatedDataWithUInt16Key = FixedImplicitZeroHashMap; -using AggregatedDataWithUInt32Key = HashMap>; -using AggregatedDataWithUInt64Key = HashMap>; +using AggregatedDataWithUInt32Key = HashMap>; +using AggregatedDataWithUInt64Key = HashMap>; using AggregatedDataWithShortStringKey = StringHashMap; using AggregatedDataWithStringKey = HashMapWithSavedHash; -using AggregatedDataWithInt256Key = HashMap>; +using AggregatedDataWithInt256Key = HashMap>; -using AggregatedDataWithKeys128 = HashMap>; -using AggregatedDataWithKeys256 = HashMap>; +using AggregatedDataWithKeys128 = HashMap>; +using AggregatedDataWithKeys256 = HashMap>; -using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap>; -using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; -using AggregatedDataWithInt256KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithInt256KeyTwoLevel = TwoLevelHashMap>; using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap; using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash; -using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap>; -using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap>; +using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap>; +using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap>; /** Variants with better hash function, using more than 32 bits for hash. * Using for merging phase of external aggregation, where number of keys may be far greater than 4 billion, From ea85d19ff8dc4e97abbbf71bb05a424632b17684 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Tue, 3 Dec 2024 17:10:09 +0800 Subject: [PATCH 17/24] debug low distinct value Signed-off-by: guo-shaoge --- dbms/src/Common/ColumnsHashingImpl.h | 121 ++++++++++++++++++++- dbms/src/Interpreters/Aggregator.cpp | 154 ++++++++++++++++++++------- dbms/src/Interpreters/Aggregator.h | 8 ++ 3 files changed, 244 insertions(+), 39 deletions(-) diff --git a/dbms/src/Common/ColumnsHashingImpl.h b/dbms/src/Common/ColumnsHashingImpl.h index b5b61fb8630..f5cc03d82c8 100644 --- a/dbms/src/Common/ColumnsHashingImpl.h +++ b/dbms/src/Common/ColumnsHashingImpl.h @@ -138,13 +138,35 @@ class HashMethodBase map.prefetch(hashvals[prefetch_idx]); } + template + ALWAYS_INLINE inline EmplaceResult emplaceKey( + Data & data, + size_t row, + Arena & pool, + std::vector & sort_key_containers) + { + auto key_holder = static_cast(*this).getKeyHolder(row, &pool, sort_key_containers); + return emplaceImpl(key_holder, data); + } + + template + ALWAYS_INLINE inline FindResult findKey( + Data & data, + size_t row, + Arena & pool, + std::vector & sort_key_containers) + { + auto key_holder = static_cast(*this).getKeyHolder(row, &pool, sort_key_containers); + return findKeyImpl(keyHolderGetKey(key_holder), data, 0); + } + template ALWAYS_INLINE inline EmplaceResult emplaceKey( Data & data, size_t row, Arena & pool, std::vector & sort_key_containers, - const std::vector & hashvals = {}) + const std::vector & hashvals) { auto key_holder = static_cast(*this).getKeyHolder(row, &pool, sort_key_containers); if constexpr (enable_prefetch) @@ -165,7 +187,7 @@ class HashMethodBase size_t row, Arena & pool, std::vector & sort_key_containers, - const std::vector & hashvals = {}) + const std::vector & hashvals) { auto key_holder = static_cast(*this).getKeyHolder(row, &pool, sort_key_containers); if constexpr (enable_prefetch) @@ -247,6 +269,60 @@ class HashMethodBase } } + template + ALWAYS_INLINE inline EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data) + { + if constexpr (Cache::consecutive_keys_optimization) + { + if (cache.found && cache.check(keyHolderGetKey(key_holder))) + { + if constexpr (has_mapped) + return EmplaceResult(cache.value.second, cache.value.second, false); + else + return EmplaceResult(false); + } + } + + typename Data::LookupResult it; + bool inserted = false; + + data.emplace(key_holder, it, inserted); + + [[maybe_unused]] Mapped * cached = nullptr; + if constexpr (has_mapped) + cached = &it->getMapped(); + + if (inserted) + { + if constexpr (has_mapped) + { + new (&it->getMapped()) Mapped(); + } + } + + if constexpr (consecutive_keys_optimization) + { + cache.found = true; + cache.empty = false; + + if constexpr (has_mapped) + { + cache.value.first = it->getKey(); + cache.value.second = it->getMapped(); + cached = &cache.value.second; + } + else + { + cache.value = it->getKey(); + } + } + + if constexpr (has_mapped) + return EmplaceResult(it->getMapped(), *cached, inserted); + else + return EmplaceResult(inserted); + } + template ALWAYS_INLINE inline EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data, size_t hashval) { @@ -304,6 +380,47 @@ class HashMethodBase return EmplaceResult(inserted); } + template + ALWAYS_INLINE inline FindResult findKeyImpl(Key & key, Data & data) + { + if constexpr (Cache::consecutive_keys_optimization) + { + if (cache.check(key)) + { + if constexpr (has_mapped) + return FindResult(&cache.value.second, cache.found); + else + return FindResult(cache.found); + } + } + + typename Data::LookupResult it; + it = data.find(key); + + if constexpr (consecutive_keys_optimization) + { + cache.found = it != nullptr; + cache.empty = false; + + if constexpr (has_mapped) + { + cache.value.first = key; + if (it) + { + cache.value.second = it->getMapped(); + } + } + else + { + cache.value = key; + } + } + + if constexpr (has_mapped) + return FindResult(it ? &it->getMapped() : nullptr, it != nullptr); + else + return FindResult(it != nullptr); + } template ALWAYS_INLINE inline FindResult findKeyImpl(Key & key, Data & data, size_t hashval) { diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index e5e214c7791..3368a3e9bfe 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -741,6 +741,27 @@ std::optional::Res } } +template +std::optional::ResultType> Aggregator::emplaceOrFindKey( + Method & method, + typename Method::State & state, + size_t index, + Arena & aggregates_pool, + std::vector & sort_key_containers) const +{ + try + { + if constexpr (only_lookup) + return state.findKey(method.data, index, aggregates_pool, sort_key_containers); + else + return state.emplaceKey(method.data, index, aggregates_pool, sort_key_containers); + } + catch (ResizeException &) + { + return {}; + } +} + // This is only used by executeImplBatchStringHashMap. // It will choose specifix submap of StringHashMap then do emplace/find. // StringKeyType can be StringRef/StringKey8/StringKey16/StringKey24/ArenaKeyHolder. @@ -937,9 +958,9 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( /// Generic case. std::unique_ptr places(new AggregateDataPtr[rows]); std::optional processed_rows; - std::vector hashvals; if constexpr (enable_prefetch) { + std::vector hashvals; hashvals = getHashVals( agg_process_info.start_row, agg_process_info.end_row, @@ -947,64 +968,123 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( state, sort_key_containers, aggregates_pool); - } - - for (size_t i = agg_process_info.start_row; i < agg_process_info.start_row + rows; ++i) - { - AggregateDataPtr aggregate_data = nullptr; - auto emplace_result_holder = emplaceOrFindKey( - method, - state, - i, - *aggregates_pool, - sort_key_containers, - hashvals); - if unlikely (!emplace_result_holder.has_value()) + for (size_t i = agg_process_info.start_row; i < agg_process_info.start_row + rows; ++i) { - LOG_INFO(log, "HashTable resize throw ResizeException since the data is already marked for spill"); - break; - } + AggregateDataPtr aggregate_data = nullptr; - auto & emplace_result = emplace_result_holder.value(); + auto emplace_result_holder = emplaceOrFindKey( + method, + state, + i, + *aggregates_pool, + sort_key_containers, + hashvals); + if unlikely (!emplace_result_holder.has_value()) + { + LOG_INFO(log, "HashTable resize throw ResizeException since the data is already marked for spill"); + break; + } - if constexpr (only_lookup) - { - if (emplace_result.isFound()) + auto & emplace_result = emplace_result_holder.value(); + + if constexpr (only_lookup) { - aggregate_data = emplace_result.getMapped(); + if (emplace_result.isFound()) + { + aggregate_data = emplace_result.getMapped(); + } + else + { + agg_process_info.not_found_rows.push_back(i); + } } else { - agg_process_info.not_found_rows.push_back(i); + /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. + if (emplace_result.isInserted()) + { + /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. + emplace_result.setMapped(nullptr); + + aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(aggregate_data); + + emplace_result.setMapped(aggregate_data); + } + else + { + aggregate_data = emplace_result.getMapped(); + + if constexpr (collect_hit_rate) + ++agg_process_info.hit_row_cnt; + } } + + places[i - agg_process_info.start_row] = aggregate_data; + processed_rows = i; } - else + } + else + { + LOG_DEBUG(log, "gjt debug original path"); + for (size_t i = agg_process_info.start_row; i < agg_process_info.start_row + rows; ++i) { - /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. - if (emplace_result.isInserted()) + AggregateDataPtr aggregate_data = nullptr; + + auto emplace_result_holder = emplaceOrFindKey( + method, + state, + i, + *aggregates_pool, + sort_key_containers); + if unlikely (!emplace_result_holder.has_value()) { - /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. - emplace_result.setMapped(nullptr); + LOG_INFO(log, "HashTable resize throw ResizeException since the data is already marked for spill"); + break; + } - aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(aggregate_data); + auto & emplace_result = emplace_result_holder.value(); - emplace_result.setMapped(aggregate_data); + if constexpr (only_lookup) + { + if (emplace_result.isFound()) + { + aggregate_data = emplace_result.getMapped(); + } + else + { + agg_process_info.not_found_rows.push_back(i); + } } else { - aggregate_data = emplace_result.getMapped(); + /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. + if (emplace_result.isInserted()) + { + /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. + emplace_result.setMapped(nullptr); - if constexpr (collect_hit_rate) - ++agg_process_info.hit_row_cnt; + aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + createAggregateStates(aggregate_data); + + emplace_result.setMapped(aggregate_data); + } + else + { + aggregate_data = emplace_result.getMapped(); + + if constexpr (collect_hit_rate) + ++agg_process_info.hit_row_cnt; + } } - } - places[i - agg_process_info.start_row] = aggregate_data; - processed_rows = i; + places[i - agg_process_info.start_row] = aggregate_data; + processed_rows = i; + } } + if (processed_rows) { /// Add values to the aggregate functions. diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index eb68bc50ae9..729ba863130 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -1499,6 +1499,14 @@ class Aggregator std::vector & sort_key_containers, const std::vector & hashvals) const; + template + std::optional::ResultType> emplaceOrFindKey( + Method & method, + typename Method::State & state, + size_t index, + Arena & aggregates_pool, + std::vector & sort_key_containers) const; + template < size_t SubMapIndex, bool collect_hit_rate, From 16937ff2cdd5e3fb7cde826887c0bfd252940bf3 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Tue, 3 Dec 2024 17:36:18 +0800 Subject: [PATCH 18/24] Revert "revert new hasher" This reverts commit 3e30f9561e38d09c39d1b0dc73735b6dcffb7991. --- dbms/src/Common/HashTable/StringHashTable.h | 66 ++++--------------- .../HashTable/TwoLevelStringHashTable.h | 8 +-- dbms/src/Interpreters/Aggregator.h | 20 +++--- 3 files changed, 27 insertions(+), 67 deletions(-) diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h index 322523388cc..a43f35fdbbf 100644 --- a/dbms/src/Common/HashTable/StringHashTable.h +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -67,57 +67,17 @@ struct HashWithMixSeed } }; -// struct StringHashTableHash -// { -// using StringKey8Hasher = HashWithMixSeed; -// using StringKey16Hasher = HashWithMixSeed; -// using StringKey24Hasher = HashWithMixSeed; -// using StringRefHasher = StringRefHash; -// -// static size_t ALWAYS_INLINE operator()(StringKey8 key) { return StringKey8Hasher::operator()(key); } -// static size_t ALWAYS_INLINE operator()(const StringKey16 & key) { return StringKey16Hasher::operator()(key); } -// static size_t ALWAYS_INLINE operator()(const StringKey24 & key) { return StringKey24Hasher::operator()(key); } -// static size_t ALWAYS_INLINE operator()(const StringRef & key) { return StringRefHasher::operator()(key); } -// }; struct StringHashTableHash { -#if defined(__SSE4_2__) - static size_t ALWAYS_INLINE operator()(StringKey8 key) - { - size_t res = -1ULL; - res = _mm_crc32_u64(res, key); - return res; - } - static size_t ALWAYS_INLINE operator()(const StringKey16 & key) - { - size_t res = -1ULL; - res = _mm_crc32_u64(res, key.low); - res = _mm_crc32_u64(res, key.high); - return res; - } - static size_t ALWAYS_INLINE operator()(const StringKey24 & key) - { - size_t res = -1ULL; - res = _mm_crc32_u64(res, key.a); - res = _mm_crc32_u64(res, key.b); - res = _mm_crc32_u64(res, key.c); - return res; - } -#else - static size_t ALWAYS_INLINE operator()(StringKey8 key) - { - return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 8); - } - static size_t ALWAYS_INLINE operator()(const StringKey16 & key) - { - return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 16); - } - static size_t ALWAYS_INLINE operator()(const StringKey24 & key) - { - return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 24); - } -#endif - static size_t ALWAYS_INLINE operator()(StringRef key){ return StringRefHash()(key); } + using StringKey8Hasher = HashWithMixSeed; + using StringKey16Hasher = HashWithMixSeed; + using StringKey24Hasher = HashWithMixSeed; + using StringRefHasher = StringRefHash; + + static size_t ALWAYS_INLINE operator()(StringKey8 key) { return StringKey8Hasher::operator()(key); } + static size_t ALWAYS_INLINE operator()(const StringKey16 & key) { return StringKey16Hasher::operator()(key); } + static size_t ALWAYS_INLINE operator()(const StringKey24 & key) { return StringKey24Hasher::operator()(key); } + static size_t ALWAYS_INLINE operator()(const StringRef & key) { return StringRefHasher::operator()(key); } }; template @@ -612,7 +572,7 @@ struct StringHashTableSubMapSelector<0, false, Data> template struct StringHashTableSubMapSelector<1, false, Data> { - using Hash = StringHashTableHash; + using Hash = StringHashTableHash::StringKey8Hasher; static typename Data::T1 & getSubMap(size_t, Data & data) { return data.m1; } }; @@ -620,7 +580,7 @@ struct StringHashTableSubMapSelector<1, false, Data> template struct StringHashTableSubMapSelector<2, false, Data> { - using Hash = StringHashTableHash; + using Hash = StringHashTableHash::StringKey16Hasher; static typename Data::T2 & getSubMap(size_t, Data & data) { return data.m2; } }; @@ -628,7 +588,7 @@ struct StringHashTableSubMapSelector<2, false, Data> template struct StringHashTableSubMapSelector<3, false, Data> { - using Hash = StringHashTableHash; + using Hash = StringHashTableHash::StringKey24Hasher; static typename Data::T3 & getSubMap(size_t, Data & data) { return data.m3; } }; @@ -636,7 +596,7 @@ struct StringHashTableSubMapSelector<3, false, Data> template struct StringHashTableSubMapSelector<4, false, Data> { - using Hash = StringHashTableHash; + using Hash = StringHashTableHash::StringRefHasher; static typename Data::Ts & getSubMap(size_t, Data & data) { return data.ms; } }; diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h index 403b8d3941c..ac2ab483e46 100644 --- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -296,7 +296,7 @@ struct StringHashTableSubMapSelector<0, true, Data> template struct StringHashTableSubMapSelector<1, true, Data> { - using Hash = StringHashTableHash; + using Hash = StringHashTableHash::StringKey8Hasher; static typename Data::Impl::T1 & getSubMap(size_t hashval, Data & data) { @@ -308,7 +308,7 @@ struct StringHashTableSubMapSelector<1, true, Data> template struct StringHashTableSubMapSelector<2, true, Data> { - using Hash = StringHashTableHash; + using Hash = StringHashTableHash::StringKey16Hasher; static typename Data::Impl::T2 & getSubMap(size_t hashval, Data & data) { @@ -320,7 +320,7 @@ struct StringHashTableSubMapSelector<2, true, Data> template struct StringHashTableSubMapSelector<3, true, Data> { - using Hash = StringHashTableHash; + using Hash = StringHashTableHash::StringKey24Hasher; static typename Data::Impl::T3 & getSubMap(size_t hashval, Data & data) { @@ -332,7 +332,7 @@ struct StringHashTableSubMapSelector<3, true, Data> template struct StringHashTableSubMapSelector<4, true, Data> { - using Hash = StringHashTableHash; + using Hash = StringHashTableHash::StringRefHasher; static typename Data::Impl::Ts & getSubMap(size_t hashval, Data & data) { diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 729ba863130..e3666b3d187 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -77,27 +77,27 @@ using AggregatedDataWithoutKey = AggregateDataPtr; using AggregatedDataWithUInt8Key = FixedImplicitZeroHashMapWithCalculatedSize; using AggregatedDataWithUInt16Key = FixedImplicitZeroHashMap; -using AggregatedDataWithUInt32Key = HashMap>; -using AggregatedDataWithUInt64Key = HashMap>; +using AggregatedDataWithUInt32Key = HashMap>; +using AggregatedDataWithUInt64Key = HashMap>; using AggregatedDataWithShortStringKey = StringHashMap; using AggregatedDataWithStringKey = HashMapWithSavedHash; -using AggregatedDataWithInt256Key = HashMap>; +using AggregatedDataWithInt256Key = HashMap>; -using AggregatedDataWithKeys128 = HashMap>; -using AggregatedDataWithKeys256 = HashMap>; +using AggregatedDataWithKeys128 = HashMap>; +using AggregatedDataWithKeys256 = HashMap>; -using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap>; -using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; -using AggregatedDataWithInt256KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithInt256KeyTwoLevel = TwoLevelHashMap>; using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap; using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash; -using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap>; -using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap>; +using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap>; +using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap>; /** Variants with better hash function, using more than 32 bits for hash. * Using for merging phase of external aggregation, where number of keys may be far greater than 4 billion, From d2fba576ce82eaa6cd97620f1d4f17a173edc1d3 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Tue, 3 Dec 2024 20:07:18 +0800 Subject: [PATCH 19/24] refine original code path Signed-off-by: guo-shaoge --- dbms/src/Common/ColumnsHashingImpl.h | 302 ++++++++++----------------- dbms/src/Interpreters/Aggregator.cpp | 206 +++++++----------- dbms/src/Interpreters/Aggregator.h | 2 +- 3 files changed, 192 insertions(+), 318 deletions(-) diff --git a/dbms/src/Common/ColumnsHashingImpl.h b/dbms/src/Common/ColumnsHashingImpl.h index f5cc03d82c8..3c4fd601487 100644 --- a/dbms/src/Common/ColumnsHashingImpl.h +++ b/dbms/src/Common/ColumnsHashingImpl.h @@ -157,7 +157,7 @@ class HashMethodBase std::vector & sort_key_containers) { auto key_holder = static_cast(*this).getKeyHolder(row, &pool, sort_key_containers); - return findKeyImpl(keyHolderGetKey(key_holder), data, 0); + return findKeyImpl(keyHolderGetKey(key_holder), data); } template @@ -173,11 +173,11 @@ class HashMethodBase { assert(hashvals.size() == static_cast(*this).total_rows); prefetch(data, row, hashvals); - return emplaceImpl(key_holder, data, hashvals[row]); + return emplaceImpl(key_holder, data, hashvals[row]); } else { - return emplaceImpl(key_holder, data, 0); + return emplaceImpl(key_holder, data); } } @@ -194,11 +194,11 @@ class HashMethodBase { assert(hashvals.size() == static_cast(*this).total_rows); prefetch(data, row, hashvals); - return findKeyImpl(keyHolderGetKey(key_holder), data, hashvals[row]); + return findKeyImpl(keyHolderGetKey(key_holder), data, hashvals[row]); } else { - return findKeyImpl(keyHolderGetKey(key_holder), data, 0); + return findKeyImpl(keyHolderGetKey(key_holder), data); } } @@ -219,7 +219,7 @@ class HashMethodBase if constexpr (enable_prefetch) prefetch(submap, idx, hashvals); - return emplaceImpl(datas[idx], submap, hashvals[idx]); + return emplaceImpl(datas[idx], submap, hashvals[idx]); } template @@ -237,7 +237,7 @@ class HashMethodBase if constexpr (enable_prefetch) prefetch(submap, idx, hashvals); - return findKeyImpl(keyHolderGetKey(datas[idx]), submap, hashvals[idx]); + return findKeyImpl(keyHolderGetKey(datas[idx]), submap, hashvals[idx]); } template @@ -269,202 +269,128 @@ class HashMethodBase } } +#define DEFINE_EMPLACE_IMPL_BEGIN \ + if constexpr (Cache::consecutive_keys_optimization) \ + { \ + if (cache.found && cache.check(keyHolderGetKey(key_holder))) \ + { \ + if constexpr (has_mapped) \ + return EmplaceResult(cache.value.second, cache.value.second, false); \ + else \ + return EmplaceResult(false); \ + } \ + } \ + typename Data::LookupResult it; \ + bool inserted = false; + +#define DEFINE_EMPLACE_IMPL_END \ + [[maybe_unused]] Mapped * cached = nullptr; \ + if constexpr (has_mapped) \ + cached = &it->getMapped(); \ + \ + if (inserted) \ + { \ + if constexpr (has_mapped) \ + { \ + new (&it->getMapped()) Mapped(); \ + } \ + } \ + \ + if constexpr (consecutive_keys_optimization) \ + { \ + cache.found = true; \ + cache.empty = false; \ + \ + if constexpr (has_mapped) \ + { \ + cache.value.first = it->getKey(); \ + cache.value.second = it->getMapped(); \ + cached = &cache.value.second; \ + } \ + else \ + { \ + cache.value = it->getKey(); \ + } \ + } \ + \ + if constexpr (has_mapped) \ + return EmplaceResult(it->getMapped(), *cached, inserted); \ + else \ + return EmplaceResult(inserted); + template - ALWAYS_INLINE inline EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data) + ALWAYS_INLINE inline EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data, size_t hashval) { - if constexpr (Cache::consecutive_keys_optimization) - { - if (cache.found && cache.check(keyHolderGetKey(key_holder))) - { - if constexpr (has_mapped) - return EmplaceResult(cache.value.second, cache.value.second, false); - else - return EmplaceResult(false); - } - } - - typename Data::LookupResult it; - bool inserted = false; - - data.emplace(key_holder, it, inserted); - - [[maybe_unused]] Mapped * cached = nullptr; - if constexpr (has_mapped) - cached = &it->getMapped(); - - if (inserted) - { - if constexpr (has_mapped) - { - new (&it->getMapped()) Mapped(); - } - } - - if constexpr (consecutive_keys_optimization) - { - cache.found = true; - cache.empty = false; - - if constexpr (has_mapped) - { - cache.value.first = it->getKey(); - cache.value.second = it->getMapped(); - cached = &cache.value.second; - } - else - { - cache.value = it->getKey(); - } - } - - if constexpr (has_mapped) - return EmplaceResult(it->getMapped(), *cached, inserted); - else - return EmplaceResult(inserted); + DEFINE_EMPLACE_IMPL_BEGIN + data.emplace(key_holder, it, inserted, hashval); + DEFINE_EMPLACE_IMPL_END } - template - ALWAYS_INLINE inline EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data, size_t hashval) + template + ALWAYS_INLINE inline EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data) { - if constexpr (Cache::consecutive_keys_optimization) - { - if (cache.found && cache.check(keyHolderGetKey(key_holder))) - { - if constexpr (has_mapped) - return EmplaceResult(cache.value.second, cache.value.second, false); - else - return EmplaceResult(false); - } - } - - typename Data::LookupResult it; - bool inserted = false; - - if constexpr (use_hashval) - data.emplace(key_holder, it, inserted, hashval); - else - data.emplace(key_holder, it, inserted); - - [[maybe_unused]] Mapped * cached = nullptr; - if constexpr (has_mapped) - cached = &it->getMapped(); - - if (inserted) - { - if constexpr (has_mapped) - { - new (&it->getMapped()) Mapped(); - } - } - - if constexpr (consecutive_keys_optimization) - { - cache.found = true; - cache.empty = false; - - if constexpr (has_mapped) - { - cache.value.first = it->getKey(); - cache.value.second = it->getMapped(); - cached = &cache.value.second; - } - else - { - cache.value = it->getKey(); - } - } - - if constexpr (has_mapped) - return EmplaceResult(it->getMapped(), *cached, inserted); - else - return EmplaceResult(inserted); + DEFINE_EMPLACE_IMPL_BEGIN + data.emplace(key_holder, it, inserted); + DEFINE_EMPLACE_IMPL_END } +#undef DEFINE_EMPLACE_IMPL_BEGIN +#undef DEFINE_EMPLACE_IMPL_END + +#define DEFINE_FIND_IMPL_BEGIN \ + if constexpr (Cache::consecutive_keys_optimization) \ + { \ + if (cache.check(key)) \ + { \ + if constexpr (has_mapped) \ + return FindResult(&cache.value.second, cache.found); \ + else \ + return FindResult(cache.found); \ + } \ + } \ + typename Data::LookupResult it; + +#define DEFINE_FIND_IMPL_END \ + if constexpr (consecutive_keys_optimization) \ + { \ + cache.found = it != nullptr; \ + cache.empty = false; \ + \ + if constexpr (has_mapped) \ + { \ + cache.value.first = key; \ + if (it) \ + { \ + cache.value.second = it->getMapped(); \ + } \ + } \ + else \ + { \ + cache.value = key; \ + } \ + } \ + \ + if constexpr (has_mapped) \ + return FindResult(it ? &it->getMapped() : nullptr, it != nullptr); \ + else \ + return FindResult(it != nullptr); template ALWAYS_INLINE inline FindResult findKeyImpl(Key & key, Data & data) { - if constexpr (Cache::consecutive_keys_optimization) - { - if (cache.check(key)) - { - if constexpr (has_mapped) - return FindResult(&cache.value.second, cache.found); - else - return FindResult(cache.found); - } - } - - typename Data::LookupResult it; + DEFINE_FIND_IMPL_BEGIN it = data.find(key); - - if constexpr (consecutive_keys_optimization) - { - cache.found = it != nullptr; - cache.empty = false; - - if constexpr (has_mapped) - { - cache.value.first = key; - if (it) - { - cache.value.second = it->getMapped(); - } - } - else - { - cache.value = key; - } - } - - if constexpr (has_mapped) - return FindResult(it ? &it->getMapped() : nullptr, it != nullptr); - else - return FindResult(it != nullptr); + DEFINE_FIND_IMPL_END } - template + + template ALWAYS_INLINE inline FindResult findKeyImpl(Key & key, Data & data, size_t hashval) { - if constexpr (Cache::consecutive_keys_optimization) - { - if (cache.check(key)) - { - if constexpr (has_mapped) - return FindResult(&cache.value.second, cache.found); - else - return FindResult(cache.found); - } - } - - typename Data::LookupResult it; - if constexpr (use_hashval) - it = data.find(key, hashval); - else - it = data.find(key); - - if constexpr (consecutive_keys_optimization) - { - cache.found = it != nullptr; - cache.empty = false; - - if constexpr (has_mapped) - { - cache.value.first = key; - if (it) - { - cache.value.second = it->getMapped(); - } - } - else - { - cache.value = key; - } - } - - if constexpr (has_mapped) - return FindResult(it ? &it->getMapped() : nullptr, it != nullptr); - else - return FindResult(it != nullptr); + DEFINE_FIND_IMPL_BEGIN + it = data.find(key, hashval); + DEFINE_FIND_IMPL_END } +#undef DEFINE_FIND_IMPL_BEGIN +#undef DEFINE_FIND_IMPL_END }; diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 3368a3e9bfe..8e12e7383ab 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -681,7 +681,7 @@ void NO_INLINE Aggregator::executeImpl( // aggregates_pool, // agg_process_info); // else - executeImplBatch(method, state, aggregates_pool, agg_process_info); + executeImplBatch(method, state, aggregates_pool, agg_process_info); } else { @@ -713,7 +713,7 @@ std::vector getHashVals( return hashvals; } -template +template std::optional::ResultType> Aggregator::emplaceOrFindKey( Method & method, typename Method::State & state, @@ -725,10 +725,14 @@ std::optional::Res try { if constexpr (only_lookup) - return state - .template findKey(method.data, index, aggregates_pool, sort_key_containers, hashvals); + return state.template findKey( + method.data, + index, + aggregates_pool, + sort_key_containers, + hashvals); else - return state.template emplaceKey( + return state.template emplaceKey( method.data, index, aggregates_pool, @@ -875,26 +879,16 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( /// For all rows. AggregateDataPtr place = aggregates_pool->alloc(0); std::vector hashvals; - if constexpr (enable_prefetch) - { - hashvals = getHashVals( - agg_process_info.start_row, - agg_process_info.end_row, - method.data, - state, - sort_key_containers, - aggregates_pool); - } for (size_t i = 0; i < rows; ++i) { - auto emplace_result_hold = emplaceOrFindKey( + // TODO prefetch + auto emplace_result_hold = emplaceOrFindKey( method, state, agg_process_info.start_row, *aggregates_pool, - sort_key_containers, - hashvals); + sort_key_containers); if likely (emplace_result_hold.has_value()) { if constexpr (collect_hit_rate) @@ -958,6 +952,56 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( /// Generic case. std::unique_ptr places(new AggregateDataPtr[rows]); std::optional processed_rows; + +#define WRAP_EMPLACE_AGG_KEY_BEGIN \ + for (size_t i = agg_process_info.start_row; i < agg_process_info.start_row + rows; ++i) \ + { \ + AggregateDataPtr aggregate_data = nullptr; + +#define WRAP_EMPLACE_AGG_KEY_END \ + if unlikely (!emplace_result_holder.has_value()) \ + { \ + LOG_INFO(log, "HashTable resize throw ResizeException since the data is already marked for spill"); \ + break; \ + } \ + \ + auto & emplace_result = emplace_result_holder.value(); \ + \ + if constexpr (only_lookup) \ + { \ + if (emplace_result.isFound()) \ + { \ + aggregate_data = emplace_result.getMapped(); \ + } \ + else \ + { \ + agg_process_info.not_found_rows.push_back(i); \ + } \ + } \ + else \ + { \ + if (emplace_result.isInserted()) \ + { \ + emplace_result.setMapped(nullptr); \ + \ + aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); \ + createAggregateStates(aggregate_data); \ + \ + emplace_result.setMapped(aggregate_data); \ + } \ + else \ + { \ + aggregate_data = emplace_result.getMapped(); \ + \ + if constexpr (collect_hit_rate) \ + ++agg_process_info.hit_row_cnt; \ + } \ + } \ + \ + places[i - agg_process_info.start_row] = aggregate_data; \ + processed_rows = i; \ + } + if constexpr (enable_prefetch) { std::vector hashvals; @@ -969,121 +1013,25 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( sort_key_containers, aggregates_pool); - for (size_t i = agg_process_info.start_row; i < agg_process_info.start_row + rows; ++i) - { - AggregateDataPtr aggregate_data = nullptr; - - auto emplace_result_holder = emplaceOrFindKey( - method, - state, - i, - *aggregates_pool, - sort_key_containers, - hashvals); - if unlikely (!emplace_result_holder.has_value()) - { - LOG_INFO(log, "HashTable resize throw ResizeException since the data is already marked for spill"); - break; - } - - auto & emplace_result = emplace_result_holder.value(); - - if constexpr (only_lookup) - { - if (emplace_result.isFound()) - { - aggregate_data = emplace_result.getMapped(); - } - else - { - agg_process_info.not_found_rows.push_back(i); - } - } - else - { - /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. - if (emplace_result.isInserted()) - { - /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. - emplace_result.setMapped(nullptr); - - aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(aggregate_data); - - emplace_result.setMapped(aggregate_data); - } - else - { - aggregate_data = emplace_result.getMapped(); - - if constexpr (collect_hit_rate) - ++agg_process_info.hit_row_cnt; - } - } - - places[i - agg_process_info.start_row] = aggregate_data; - processed_rows = i; - } + WRAP_EMPLACE_AGG_KEY_BEGIN + auto emplace_result_holder = emplaceOrFindKey( + method, + state, + i, + *aggregates_pool, + sort_key_containers, + hashvals); + WRAP_EMPLACE_AGG_KEY_END } else { - LOG_DEBUG(log, "gjt debug original path"); - for (size_t i = agg_process_info.start_row; i < agg_process_info.start_row + rows; ++i) - { - AggregateDataPtr aggregate_data = nullptr; - - auto emplace_result_holder = emplaceOrFindKey( - method, - state, - i, - *aggregates_pool, - sort_key_containers); - if unlikely (!emplace_result_holder.has_value()) - { - LOG_INFO(log, "HashTable resize throw ResizeException since the data is already marked for spill"); - break; - } - - auto & emplace_result = emplace_result_holder.value(); - - if constexpr (only_lookup) - { - if (emplace_result.isFound()) - { - aggregate_data = emplace_result.getMapped(); - } - else - { - agg_process_info.not_found_rows.push_back(i); - } - } - else - { - /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. - if (emplace_result.isInserted()) - { - /// exception-safety - if you can not allocate memory or create states, then destructors will not be called. - emplace_result.setMapped(nullptr); - - aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); - createAggregateStates(aggregate_data); - - emplace_result.setMapped(aggregate_data); - } - else - { - aggregate_data = emplace_result.getMapped(); - - if constexpr (collect_hit_rate) - ++agg_process_info.hit_row_cnt; - } - } - - places[i - agg_process_info.start_row] = aggregate_data; - processed_rows = i; - } + WRAP_EMPLACE_AGG_KEY_BEGIN + auto emplace_result_holder + = emplaceOrFindKey(method, state, i, *aggregates_pool, sort_key_containers); + WRAP_EMPLACE_AGG_KEY_END } - +#undef WRAP_EMPLACE_AGG_KEY_BEGIN +#undef WRAP_EMPLACE_AGG_KEY_END if (processed_rows) { diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index e3666b3d187..eb6dfed68f9 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -1490,7 +1490,7 @@ class Aggregator Arena * aggregates_pool, AggProcessInfo & agg_process_info) const; - template + template std::optional::ResultType> emplaceOrFindKey( Method & method, typename Method::State & state, From 71b6ecd5fa3c664a32a47513683dd90c96c3c54e Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Tue, 3 Dec 2024 20:58:50 +0800 Subject: [PATCH 20/24] Reapply "revert new hasher" This reverts commit 16937ff2cdd5e3fb7cde826887c0bfd252940bf3. --- dbms/src/Common/HashTable/StringHashTable.h | 66 +++++++++++++++---- .../HashTable/TwoLevelStringHashTable.h | 8 +-- dbms/src/Interpreters/Aggregator.h | 20 +++--- 3 files changed, 67 insertions(+), 27 deletions(-) diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h index a43f35fdbbf..322523388cc 100644 --- a/dbms/src/Common/HashTable/StringHashTable.h +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -67,17 +67,57 @@ struct HashWithMixSeed } }; +// struct StringHashTableHash +// { +// using StringKey8Hasher = HashWithMixSeed; +// using StringKey16Hasher = HashWithMixSeed; +// using StringKey24Hasher = HashWithMixSeed; +// using StringRefHasher = StringRefHash; +// +// static size_t ALWAYS_INLINE operator()(StringKey8 key) { return StringKey8Hasher::operator()(key); } +// static size_t ALWAYS_INLINE operator()(const StringKey16 & key) { return StringKey16Hasher::operator()(key); } +// static size_t ALWAYS_INLINE operator()(const StringKey24 & key) { return StringKey24Hasher::operator()(key); } +// static size_t ALWAYS_INLINE operator()(const StringRef & key) { return StringRefHasher::operator()(key); } +// }; struct StringHashTableHash { - using StringKey8Hasher = HashWithMixSeed; - using StringKey16Hasher = HashWithMixSeed; - using StringKey24Hasher = HashWithMixSeed; - using StringRefHasher = StringRefHash; - - static size_t ALWAYS_INLINE operator()(StringKey8 key) { return StringKey8Hasher::operator()(key); } - static size_t ALWAYS_INLINE operator()(const StringKey16 & key) { return StringKey16Hasher::operator()(key); } - static size_t ALWAYS_INLINE operator()(const StringKey24 & key) { return StringKey24Hasher::operator()(key); } - static size_t ALWAYS_INLINE operator()(const StringRef & key) { return StringRefHasher::operator()(key); } +#if defined(__SSE4_2__) + static size_t ALWAYS_INLINE operator()(StringKey8 key) + { + size_t res = -1ULL; + res = _mm_crc32_u64(res, key); + return res; + } + static size_t ALWAYS_INLINE operator()(const StringKey16 & key) + { + size_t res = -1ULL; + res = _mm_crc32_u64(res, key.low); + res = _mm_crc32_u64(res, key.high); + return res; + } + static size_t ALWAYS_INLINE operator()(const StringKey24 & key) + { + size_t res = -1ULL; + res = _mm_crc32_u64(res, key.a); + res = _mm_crc32_u64(res, key.b); + res = _mm_crc32_u64(res, key.c); + return res; + } +#else + static size_t ALWAYS_INLINE operator()(StringKey8 key) + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 8); + } + static size_t ALWAYS_INLINE operator()(const StringKey16 & key) + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 16); + } + static size_t ALWAYS_INLINE operator()(const StringKey24 & key) + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 24); + } +#endif + static size_t ALWAYS_INLINE operator()(StringRef key){ return StringRefHash()(key); } }; template @@ -572,7 +612,7 @@ struct StringHashTableSubMapSelector<0, false, Data> template struct StringHashTableSubMapSelector<1, false, Data> { - using Hash = StringHashTableHash::StringKey8Hasher; + using Hash = StringHashTableHash; static typename Data::T1 & getSubMap(size_t, Data & data) { return data.m1; } }; @@ -580,7 +620,7 @@ struct StringHashTableSubMapSelector<1, false, Data> template struct StringHashTableSubMapSelector<2, false, Data> { - using Hash = StringHashTableHash::StringKey16Hasher; + using Hash = StringHashTableHash; static typename Data::T2 & getSubMap(size_t, Data & data) { return data.m2; } }; @@ -588,7 +628,7 @@ struct StringHashTableSubMapSelector<2, false, Data> template struct StringHashTableSubMapSelector<3, false, Data> { - using Hash = StringHashTableHash::StringKey24Hasher; + using Hash = StringHashTableHash; static typename Data::T3 & getSubMap(size_t, Data & data) { return data.m3; } }; @@ -596,7 +636,7 @@ struct StringHashTableSubMapSelector<3, false, Data> template struct StringHashTableSubMapSelector<4, false, Data> { - using Hash = StringHashTableHash::StringRefHasher; + using Hash = StringHashTableHash; static typename Data::Ts & getSubMap(size_t, Data & data) { return data.ms; } }; diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h index ac2ab483e46..403b8d3941c 100644 --- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -296,7 +296,7 @@ struct StringHashTableSubMapSelector<0, true, Data> template struct StringHashTableSubMapSelector<1, true, Data> { - using Hash = StringHashTableHash::StringKey8Hasher; + using Hash = StringHashTableHash; static typename Data::Impl::T1 & getSubMap(size_t hashval, Data & data) { @@ -308,7 +308,7 @@ struct StringHashTableSubMapSelector<1, true, Data> template struct StringHashTableSubMapSelector<2, true, Data> { - using Hash = StringHashTableHash::StringKey16Hasher; + using Hash = StringHashTableHash; static typename Data::Impl::T2 & getSubMap(size_t hashval, Data & data) { @@ -320,7 +320,7 @@ struct StringHashTableSubMapSelector<2, true, Data> template struct StringHashTableSubMapSelector<3, true, Data> { - using Hash = StringHashTableHash::StringKey24Hasher; + using Hash = StringHashTableHash; static typename Data::Impl::T3 & getSubMap(size_t hashval, Data & data) { @@ -332,7 +332,7 @@ struct StringHashTableSubMapSelector<3, true, Data> template struct StringHashTableSubMapSelector<4, true, Data> { - using Hash = StringHashTableHash::StringRefHasher; + using Hash = StringHashTableHash; static typename Data::Impl::Ts & getSubMap(size_t hashval, Data & data) { diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index eb6dfed68f9..81252b8b3c6 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -77,27 +77,27 @@ using AggregatedDataWithoutKey = AggregateDataPtr; using AggregatedDataWithUInt8Key = FixedImplicitZeroHashMapWithCalculatedSize; using AggregatedDataWithUInt16Key = FixedImplicitZeroHashMap; -using AggregatedDataWithUInt32Key = HashMap>; -using AggregatedDataWithUInt64Key = HashMap>; +using AggregatedDataWithUInt32Key = HashMap>; +using AggregatedDataWithUInt64Key = HashMap>; using AggregatedDataWithShortStringKey = StringHashMap; using AggregatedDataWithStringKey = HashMapWithSavedHash; -using AggregatedDataWithInt256Key = HashMap>; +using AggregatedDataWithInt256Key = HashMap>; -using AggregatedDataWithKeys128 = HashMap>; -using AggregatedDataWithKeys256 = HashMap>; +using AggregatedDataWithKeys128 = HashMap>; +using AggregatedDataWithKeys256 = HashMap>; -using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap>; -using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; -using AggregatedDataWithInt256KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithInt256KeyTwoLevel = TwoLevelHashMap>; using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap; using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash; -using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap>; -using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap>; +using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap>; +using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap>; /** Variants with better hash function, using more than 32 bits for hash. * Using for merging phase of external aggregation, where number of keys may be far greater than 4 billion, From 40ceb089d4535895456461465279cdd6a09c975f Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Tue, 3 Dec 2024 21:23:36 +0800 Subject: [PATCH 21/24] one level old hash; two level new hash Signed-off-by: guo-shaoge --- dbms/src/Common/HashTable/HashTable.h | 3 +- dbms/src/Common/HashTable/TwoLevelHashTable.h | 9 +++--- .../HashTable/TwoLevelStringHashTable.h | 30 +++++++++++++------ dbms/src/Interpreters/Aggregator.h | 30 +++++++++---------- dbms/src/Interpreters/Settings.h | 2 +- 5 files changed, 44 insertions(+), 30 deletions(-) diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index c0f066edbb0..5496f263dde 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -1020,7 +1020,8 @@ class HashTable } /// Copy the cell from another hash table. It is assumed that the cell is not zero, and also that there was no such key in the table yet. - void ALWAYS_INLINE insertUniqueNonZero(const Cell * cell, size_t hash_value) + template + void ALWAYS_INLINE insertUniqueNonZero(const InsertCellType * cell, size_t hash_value) { size_t place_value = findEmptyCell(grower.place(hash_value)); diff --git a/dbms/src/Common/HashTable/TwoLevelHashTable.h b/dbms/src/Common/HashTable/TwoLevelHashTable.h index 75a5402363d..8eb22f851eb 100644 --- a/dbms/src/Common/HashTable/TwoLevelHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelHashTable.h @@ -115,9 +115,9 @@ class TwoLevelHashTable : private boost::noncopyable /// Copy the data from another (normal) hash table. It should have the same hash function. template - explicit TwoLevelHashTable(const Source & src) + explicit TwoLevelHashTable(Source & src) { - typename Source::const_iterator it = src.begin(); + typename Source::iterator it = src.begin(); /// It is assumed that the zero key (stored separately) is first in iteration order. if (it != src.end() && it.getPtr()->isZero(src)) @@ -128,8 +128,9 @@ class TwoLevelHashTable : private boost::noncopyable for (; it != src.end(); ++it) { - const Cell * cell = it.getPtr(); - size_t hash_value = cell->getHash(src); + auto * cell = it.getPtr(); + // size_t hash_value = cell->getHash(src); + size_t hash_value = Hash::operator()(cell->getKey()); size_t buck = getBucketFromHash(hash_value); impls[buck].insertUniqueNonZero(cell, hash_value); } diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h index 403b8d3941c..526de846fef 100644 --- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -65,32 +65,40 @@ class TwoLevelStringHashTable : private boost::noncopyable TwoLevelStringHashTable() = default; template - explicit TwoLevelStringHashTable(const Source & src) + explicit TwoLevelStringHashTable(Source & src) { if (src.m0.hasZero()) impls[0].m0.setHasZero(*src.m0.zeroValue()); for (auto & v : src.m1) { - size_t hash_value = v.getHash(src.m1); + // size_t hash_value = v.getHash(src.m1); + const size_t hash_value = ImplTable::T1::Hash::operator()(v.getKey()); + v.setHash(hash_value); size_t buck = getBucketFromHash(hash_value); impls[buck].m1.insertUniqueNonZero(&v, hash_value); } for (auto & v : src.m2) { - size_t hash_value = v.getHash(src.m2); + // size_t hash_value = v.getHash(src.m2); + const size_t hash_value = ImplTable::T2::Hash::operator()(v.getKey()); + v.setHash(hash_value); size_t buck = getBucketFromHash(hash_value); impls[buck].m2.insertUniqueNonZero(&v, hash_value); } for (auto & v : src.m3) { - size_t hash_value = v.getHash(src.m3); + // size_t hash_value = v.getHash(src.m3); + const size_t hash_value = ImplTable::T3::Hash::operator()(v.getKey()); + v.setHash(hash_value); size_t buck = getBucketFromHash(hash_value); impls[buck].m3.insertUniqueNonZero(&v, hash_value); } for (auto & v : src.ms) { - size_t hash_value = v.getHash(src.ms); + // size_t hash_value = v.getHash(src.ms); + const size_t hash_value = ImplTable::Ts::Hash::operator()(v.getKey()); + v.setHash(hash_value); size_t buck = getBucketFromHash(hash_value); impls[buck].ms.insertUniqueNonZero(&v, hash_value); } @@ -296,7 +304,8 @@ struct StringHashTableSubMapSelector<0, true, Data> template struct StringHashTableSubMapSelector<1, true, Data> { - using Hash = StringHashTableHash; + // using Hash = StringHashTableHash; + using Hash = HashWithMixSeed; static typename Data::Impl::T1 & getSubMap(size_t hashval, Data & data) { @@ -308,7 +317,8 @@ struct StringHashTableSubMapSelector<1, true, Data> template struct StringHashTableSubMapSelector<2, true, Data> { - using Hash = StringHashTableHash; + // using Hash = StringHashTableHash; + using Hash = HashWithMixSeed; static typename Data::Impl::T2 & getSubMap(size_t hashval, Data & data) { @@ -320,7 +330,8 @@ struct StringHashTableSubMapSelector<2, true, Data> template struct StringHashTableSubMapSelector<3, true, Data> { - using Hash = StringHashTableHash; + // using Hash = StringHashTableHash; + using Hash = HashWithMixSeed; static typename Data::Impl::T3 & getSubMap(size_t hashval, Data & data) { @@ -332,7 +343,8 @@ struct StringHashTableSubMapSelector<3, true, Data> template struct StringHashTableSubMapSelector<4, true, Data> { - using Hash = StringHashTableHash; + // using Hash = StringHashTableHash; + using Hash = StringRefHash; static typename Data::Impl::Ts & getSubMap(size_t hashval, Data & data) { diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 81252b8b3c6..d900cc3e231 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -88,16 +88,16 @@ using AggregatedDataWithInt256Key = HashMap>; using AggregatedDataWithKeys256 = HashMap>; -using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap>; -using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; -using AggregatedDataWithInt256KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithInt256KeyTwoLevel = TwoLevelHashMap>; using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap; using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash; -using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap>; -using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap>; +using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap>; +using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap>; /** Variants with better hash function, using more than 32 bits for hash. * Using for merging phase of external aggregation, where number of keys may be far greater than 4 billion, @@ -125,7 +125,7 @@ struct AggregationMethodOneNumber AggregationMethodOneNumber() = default; template - explicit AggregationMethodOneNumber(const Other & other) + explicit AggregationMethodOneNumber(Other & other) : data(other.data) {} @@ -179,7 +179,7 @@ struct AggregationMethodString AggregationMethodString() = default; template - explicit AggregationMethodString(const Other & other) + explicit AggregationMethodString(Other & other) : data(other.data) {} @@ -227,7 +227,7 @@ struct AggregationMethodStringNoCache AggregationMethodStringNoCache() = default; template - explicit AggregationMethodStringNoCache(const Other & other) + explicit AggregationMethodStringNoCache(Other & other) : data(other.data) {} @@ -275,7 +275,7 @@ struct AggregationMethodOneKeyStringNoCache AggregationMethodOneKeyStringNoCache() = default; template - explicit AggregationMethodOneKeyStringNoCache(const Other & other) + explicit AggregationMethodOneKeyStringNoCache(Other & other) : data(other.data) {} @@ -325,7 +325,7 @@ struct AggregationMethodMultiStringNoCache AggregationMethodMultiStringNoCache() = default; template - explicit AggregationMethodMultiStringNoCache(const Other & other) + explicit AggregationMethodMultiStringNoCache(Other & other) : data(other.data) {} @@ -355,7 +355,7 @@ struct AggregationMethodFastPathTwoKeysNoCache AggregationMethodFastPathTwoKeysNoCache() = default; template - explicit AggregationMethodFastPathTwoKeysNoCache(const Other & other) + explicit AggregationMethodFastPathTwoKeysNoCache(Other & other) : data(other.data) {} @@ -475,7 +475,7 @@ struct AggregationMethodFixedString AggregationMethodFixedString() = default; template - explicit AggregationMethodFixedString(const Other & other) + explicit AggregationMethodFixedString(Other & other) : data(other.data) {} @@ -523,7 +523,7 @@ struct AggregationMethodFixedStringNoCache AggregationMethodFixedStringNoCache() = default; template - explicit AggregationMethodFixedStringNoCache(const Other & other) + explicit AggregationMethodFixedStringNoCache(Other & other) : data(other.data) {} @@ -572,7 +572,7 @@ struct AggregationMethodKeysFixed AggregationMethodKeysFixed() = default; template - explicit AggregationMethodKeysFixed(const Other & other) + explicit AggregationMethodKeysFixed(Other & other) : data(other.data) {} @@ -679,7 +679,7 @@ struct AggregationMethodSerialized AggregationMethodSerialized() = default; template - explicit AggregationMethodSerialized(const Other & other) + explicit AggregationMethodSerialized(Other & other) : data(other.data) {} diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 4c2e5dbeca4..5f46d74fd30 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -84,7 +84,7 @@ struct Settings M(SettingLoadBalancing, load_balancing, LoadBalancing::RANDOM, "Which replicas (among healthy replicas) to preferably send a query to (on the first attempt) for distributed processing.") \ \ M(SettingUInt64, group_by_two_level_threshold, 100000, "From what number of keys, a two-level aggregation starts. 0 - the threshold is not set.") \ - M(SettingUInt64, group_by_two_level_threshold_bytes, 100000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. " \ + M(SettingUInt64, group_by_two_level_threshold_bytes, 32000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. " \ "Two-level aggregation is used when at least one of the thresholds is triggered.") \ M(SettingUInt64, aggregation_memory_efficient_merge_threads, 0, "Number of threads to use for merge intermediate aggregation results in memory efficient mode. When bigger, then more memory is " \ "consumed. 0 means - same as 'max_threads'.") \ From 4cb24c21fcba3cb700cb6cbdd5d978e7fe214ad3 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Wed, 4 Dec 2024 10:40:11 +0800 Subject: [PATCH 22/24] Revert "one level old hash; two level new hash" This reverts commit 40ceb089d4535895456461465279cdd6a09c975f. --- dbms/src/Common/HashTable/HashTable.h | 3 +- dbms/src/Common/HashTable/TwoLevelHashTable.h | 9 +++--- .../HashTable/TwoLevelStringHashTable.h | 30 ++++++------------- dbms/src/Interpreters/Aggregator.h | 30 +++++++++---------- dbms/src/Interpreters/Settings.h | 2 +- 5 files changed, 30 insertions(+), 44 deletions(-) diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index 5496f263dde..c0f066edbb0 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -1020,8 +1020,7 @@ class HashTable } /// Copy the cell from another hash table. It is assumed that the cell is not zero, and also that there was no such key in the table yet. - template - void ALWAYS_INLINE insertUniqueNonZero(const InsertCellType * cell, size_t hash_value) + void ALWAYS_INLINE insertUniqueNonZero(const Cell * cell, size_t hash_value) { size_t place_value = findEmptyCell(grower.place(hash_value)); diff --git a/dbms/src/Common/HashTable/TwoLevelHashTable.h b/dbms/src/Common/HashTable/TwoLevelHashTable.h index 8eb22f851eb..75a5402363d 100644 --- a/dbms/src/Common/HashTable/TwoLevelHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelHashTable.h @@ -115,9 +115,9 @@ class TwoLevelHashTable : private boost::noncopyable /// Copy the data from another (normal) hash table. It should have the same hash function. template - explicit TwoLevelHashTable(Source & src) + explicit TwoLevelHashTable(const Source & src) { - typename Source::iterator it = src.begin(); + typename Source::const_iterator it = src.begin(); /// It is assumed that the zero key (stored separately) is first in iteration order. if (it != src.end() && it.getPtr()->isZero(src)) @@ -128,9 +128,8 @@ class TwoLevelHashTable : private boost::noncopyable for (; it != src.end(); ++it) { - auto * cell = it.getPtr(); - // size_t hash_value = cell->getHash(src); - size_t hash_value = Hash::operator()(cell->getKey()); + const Cell * cell = it.getPtr(); + size_t hash_value = cell->getHash(src); size_t buck = getBucketFromHash(hash_value); impls[buck].insertUniqueNonZero(cell, hash_value); } diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h index 526de846fef..403b8d3941c 100644 --- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -65,40 +65,32 @@ class TwoLevelStringHashTable : private boost::noncopyable TwoLevelStringHashTable() = default; template - explicit TwoLevelStringHashTable(Source & src) + explicit TwoLevelStringHashTable(const Source & src) { if (src.m0.hasZero()) impls[0].m0.setHasZero(*src.m0.zeroValue()); for (auto & v : src.m1) { - // size_t hash_value = v.getHash(src.m1); - const size_t hash_value = ImplTable::T1::Hash::operator()(v.getKey()); - v.setHash(hash_value); + size_t hash_value = v.getHash(src.m1); size_t buck = getBucketFromHash(hash_value); impls[buck].m1.insertUniqueNonZero(&v, hash_value); } for (auto & v : src.m2) { - // size_t hash_value = v.getHash(src.m2); - const size_t hash_value = ImplTable::T2::Hash::operator()(v.getKey()); - v.setHash(hash_value); + size_t hash_value = v.getHash(src.m2); size_t buck = getBucketFromHash(hash_value); impls[buck].m2.insertUniqueNonZero(&v, hash_value); } for (auto & v : src.m3) { - // size_t hash_value = v.getHash(src.m3); - const size_t hash_value = ImplTable::T3::Hash::operator()(v.getKey()); - v.setHash(hash_value); + size_t hash_value = v.getHash(src.m3); size_t buck = getBucketFromHash(hash_value); impls[buck].m3.insertUniqueNonZero(&v, hash_value); } for (auto & v : src.ms) { - // size_t hash_value = v.getHash(src.ms); - const size_t hash_value = ImplTable::Ts::Hash::operator()(v.getKey()); - v.setHash(hash_value); + size_t hash_value = v.getHash(src.ms); size_t buck = getBucketFromHash(hash_value); impls[buck].ms.insertUniqueNonZero(&v, hash_value); } @@ -304,8 +296,7 @@ struct StringHashTableSubMapSelector<0, true, Data> template struct StringHashTableSubMapSelector<1, true, Data> { - // using Hash = StringHashTableHash; - using Hash = HashWithMixSeed; + using Hash = StringHashTableHash; static typename Data::Impl::T1 & getSubMap(size_t hashval, Data & data) { @@ -317,8 +308,7 @@ struct StringHashTableSubMapSelector<1, true, Data> template struct StringHashTableSubMapSelector<2, true, Data> { - // using Hash = StringHashTableHash; - using Hash = HashWithMixSeed; + using Hash = StringHashTableHash; static typename Data::Impl::T2 & getSubMap(size_t hashval, Data & data) { @@ -330,8 +320,7 @@ struct StringHashTableSubMapSelector<2, true, Data> template struct StringHashTableSubMapSelector<3, true, Data> { - // using Hash = StringHashTableHash; - using Hash = HashWithMixSeed; + using Hash = StringHashTableHash; static typename Data::Impl::T3 & getSubMap(size_t hashval, Data & data) { @@ -343,8 +332,7 @@ struct StringHashTableSubMapSelector<3, true, Data> template struct StringHashTableSubMapSelector<4, true, Data> { - // using Hash = StringHashTableHash; - using Hash = StringRefHash; + using Hash = StringHashTableHash; static typename Data::Impl::Ts & getSubMap(size_t hashval, Data & data) { diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index d900cc3e231..81252b8b3c6 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -88,16 +88,16 @@ using AggregatedDataWithInt256Key = HashMap>; using AggregatedDataWithKeys256 = HashMap>; -using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap>; -using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; -using AggregatedDataWithInt256KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithInt256KeyTwoLevel = TwoLevelHashMap>; using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap; using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash; -using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap>; -using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap>; +using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap>; +using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap>; /** Variants with better hash function, using more than 32 bits for hash. * Using for merging phase of external aggregation, where number of keys may be far greater than 4 billion, @@ -125,7 +125,7 @@ struct AggregationMethodOneNumber AggregationMethodOneNumber() = default; template - explicit AggregationMethodOneNumber(Other & other) + explicit AggregationMethodOneNumber(const Other & other) : data(other.data) {} @@ -179,7 +179,7 @@ struct AggregationMethodString AggregationMethodString() = default; template - explicit AggregationMethodString(Other & other) + explicit AggregationMethodString(const Other & other) : data(other.data) {} @@ -227,7 +227,7 @@ struct AggregationMethodStringNoCache AggregationMethodStringNoCache() = default; template - explicit AggregationMethodStringNoCache(Other & other) + explicit AggregationMethodStringNoCache(const Other & other) : data(other.data) {} @@ -275,7 +275,7 @@ struct AggregationMethodOneKeyStringNoCache AggregationMethodOneKeyStringNoCache() = default; template - explicit AggregationMethodOneKeyStringNoCache(Other & other) + explicit AggregationMethodOneKeyStringNoCache(const Other & other) : data(other.data) {} @@ -325,7 +325,7 @@ struct AggregationMethodMultiStringNoCache AggregationMethodMultiStringNoCache() = default; template - explicit AggregationMethodMultiStringNoCache(Other & other) + explicit AggregationMethodMultiStringNoCache(const Other & other) : data(other.data) {} @@ -355,7 +355,7 @@ struct AggregationMethodFastPathTwoKeysNoCache AggregationMethodFastPathTwoKeysNoCache() = default; template - explicit AggregationMethodFastPathTwoKeysNoCache(Other & other) + explicit AggregationMethodFastPathTwoKeysNoCache(const Other & other) : data(other.data) {} @@ -475,7 +475,7 @@ struct AggregationMethodFixedString AggregationMethodFixedString() = default; template - explicit AggregationMethodFixedString(Other & other) + explicit AggregationMethodFixedString(const Other & other) : data(other.data) {} @@ -523,7 +523,7 @@ struct AggregationMethodFixedStringNoCache AggregationMethodFixedStringNoCache() = default; template - explicit AggregationMethodFixedStringNoCache(Other & other) + explicit AggregationMethodFixedStringNoCache(const Other & other) : data(other.data) {} @@ -572,7 +572,7 @@ struct AggregationMethodKeysFixed AggregationMethodKeysFixed() = default; template - explicit AggregationMethodKeysFixed(Other & other) + explicit AggregationMethodKeysFixed(const Other & other) : data(other.data) {} @@ -679,7 +679,7 @@ struct AggregationMethodSerialized AggregationMethodSerialized() = default; template - explicit AggregationMethodSerialized(Other & other) + explicit AggregationMethodSerialized(const Other & other) : data(other.data) {} diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 5f46d74fd30..4c2e5dbeca4 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -84,7 +84,7 @@ struct Settings M(SettingLoadBalancing, load_balancing, LoadBalancing::RANDOM, "Which replicas (among healthy replicas) to preferably send a query to (on the first attempt) for distributed processing.") \ \ M(SettingUInt64, group_by_two_level_threshold, 100000, "From what number of keys, a two-level aggregation starts. 0 - the threshold is not set.") \ - M(SettingUInt64, group_by_two_level_threshold_bytes, 32000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. " \ + M(SettingUInt64, group_by_two_level_threshold_bytes, 100000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. " \ "Two-level aggregation is used when at least one of the thresholds is triggered.") \ M(SettingUInt64, aggregation_memory_efficient_merge_threads, 0, "Number of threads to use for merge intermediate aggregation results in memory efficient mode. When bigger, then more memory is " \ "consumed. 0 means - same as 'max_threads'.") \ From c02cf71e90ea12a93dceaea8396e6f1daad49a37 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Wed, 4 Dec 2024 10:58:11 +0800 Subject: [PATCH 23/24] revert new hasher; refine original code path Signed-off-by: guo-shaoge --- dbms/src/Common/ColumnsHashingImpl.h | 3 + dbms/src/Common/HashTable/Hash.h | 125 ----------- dbms/src/Common/HashTable/StringHashTable.h | 31 +-- dbms/src/Interpreters/Aggregator.cpp | 227 ++++++++++---------- 4 files changed, 121 insertions(+), 265 deletions(-) diff --git a/dbms/src/Common/ColumnsHashingImpl.h b/dbms/src/Common/ColumnsHashingImpl.h index 3c4fd601487..fcbfc4bc358 100644 --- a/dbms/src/Common/ColumnsHashingImpl.h +++ b/dbms/src/Common/ColumnsHashingImpl.h @@ -138,6 +138,7 @@ class HashMethodBase map.prefetch(hashvals[prefetch_idx]); } + // Emplace key without hashval, and this method doesn't support prefetch. template ALWAYS_INLINE inline EmplaceResult emplaceKey( Data & data, @@ -160,6 +161,7 @@ class HashMethodBase return findKeyImpl(keyHolderGetKey(key_holder), data); } + // Emplace key using hashval, you can enable prefetch or not. template ALWAYS_INLINE inline EmplaceResult emplaceKey( Data & data, @@ -318,6 +320,7 @@ class HashMethodBase else \ return EmplaceResult(inserted); + // This method is performance critical, so there are two emplaceImpl to make sure caller can use the one they need. template ALWAYS_INLINE inline EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data, size_t hashval) { diff --git a/dbms/src/Common/HashTable/Hash.h b/dbms/src/Common/HashTable/Hash.h index 207919a347e..457b4b9f3c0 100644 --- a/dbms/src/Common/HashTable/Hash.h +++ b/dbms/src/Common/HashTable/Hash.h @@ -422,128 +422,3 @@ struct IntHash32, void>> } } }; - -inline uint64_t umul128(uint64_t v, uint64_t kmul, uint64_t * high) -{ - DB::Int128 res = static_cast(v) * static_cast(kmul); - *high = static_cast(res >> 64); - return static_cast(res); -} - -template -inline void hash_combine(uint64_t & seed, const T & val) -{ - // from: https://github.com/HowardHinnant/hash_append/issues/7#issuecomment-629414712 - seed ^= std::hash{}(val) + 0x9e3779b97f4a7c15LLU + (seed << 12) + (seed >> 4); -} - -inline uint64_t hash_int128(uint64_t seed, const DB::Int128 & v) -{ - auto low = static_cast(v); - auto high = static_cast(v >> 64); - hash_combine(seed, low); - hash_combine(seed, high); - return seed; -} - -inline uint64_t hash_uint128(uint64_t seed, const DB::UInt128 & v) -{ - hash_combine(seed, v.low); - hash_combine(seed, v.high); - return seed; -} - -inline uint64_t hash_int256(uint64_t seed, const DB::Int256 & v) -{ - const auto & backend_value = v.backend(); - for (size_t i = 0; i < backend_value.size(); ++i) - { - hash_combine(seed, backend_value.limbs()[i]); - } - return seed; -} - -inline uint64_t hash_uint256(uint64_t seed, const DB::UInt256 & v) -{ - hash_combine(seed, v.a); - hash_combine(seed, v.b); - hash_combine(seed, v.c); - hash_combine(seed, v.d); - return seed; -} - -template -struct HashWithMixSeedHelper -{ - static inline size_t operator()(size_t); -}; - -template <> -struct HashWithMixSeedHelper<4> -{ - static inline size_t operator()(size_t v) - { - // from: https://github.com/aappleby/smhasher/blob/0ff96f7835817a27d0487325b6c16033e2992eb5/src/MurmurHash3.cpp#L102 - static constexpr uint64_t kmul = 0xcc9e2d51UL; - uint64_t mul = v * kmul; - return static_cast(mul ^ (mul >> 32u)); - } -}; - -template <> -struct HashWithMixSeedHelper<8> -{ - static inline size_t operator()(size_t v) - { - // from: https://github.com/martinus/robin-hood-hashing/blob/b21730713f4b5296bec411917c46919f7b38b178/src/include/robin_hood.h#L735 - static constexpr uint64_t kmul = 0xde5fb9d2630458e9ULL; - uint64_t high = 0; - uint64_t low = umul128(v, kmul, &high); - return static_cast(high + low); - } -}; - -template -struct HashWithMixSeed -{ - static size_t operator()(const T & v) - { - return HashWithMixSeedHelper::operator()(std::hash()(v)); - } -}; - -template <> -struct HashWithMixSeed -{ - static size_t operator()(const DB::Int128 & v) - { - return HashWithMixSeedHelper::operator()(hash_int128(0, v)); - } -}; - -template <> -struct HashWithMixSeed -{ - static inline size_t operator()(const DB::UInt128 & v) - { - return HashWithMixSeedHelper::operator()(hash_uint128(0, v)); - } -}; - -template <> -struct HashWithMixSeed -{ - static inline size_t operator()(const DB::Int256 & v) - { - return HashWithMixSeedHelper::operator()(hash_int256(0, v)); - } -}; - -template <> -struct HashWithMixSeed -{ - static inline size_t operator()(const DB::UInt256 & v) - { - return HashWithMixSeedHelper::operator()(hash_uint256(0, v)); - } -}; diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h index 322523388cc..9bbdabb91fa 100644 --- a/dbms/src/Common/HashTable/StringHashTable.h +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -50,35 +50,6 @@ inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n) return {reinterpret_cast(&n), 24ul - (__builtin_clzll(n.c) >> 3)}; } -inline size_t hash_string_key_24(uint64_t seed, const StringKey24 & v) -{ - hash_combine(seed, v.a); - hash_combine(seed, v.b); - hash_combine(seed, v.c); - return seed; -} - -template <> -struct HashWithMixSeed -{ - static inline size_t operator()(const StringKey24 & v) - { - return HashWithMixSeedHelper::operator()(hash_string_key_24(0, v)); - } -}; - -// struct StringHashTableHash -// { -// using StringKey8Hasher = HashWithMixSeed; -// using StringKey16Hasher = HashWithMixSeed; -// using StringKey24Hasher = HashWithMixSeed; -// using StringRefHasher = StringRefHash; -// -// static size_t ALWAYS_INLINE operator()(StringKey8 key) { return StringKey8Hasher::operator()(key); } -// static size_t ALWAYS_INLINE operator()(const StringKey16 & key) { return StringKey16Hasher::operator()(key); } -// static size_t ALWAYS_INLINE operator()(const StringKey24 & key) { return StringKey24Hasher::operator()(key); } -// static size_t ALWAYS_INLINE operator()(const StringRef & key) { return StringRefHasher::operator()(key); } -// }; struct StringHashTableHash { #if defined(__SSE4_2__) @@ -117,7 +88,7 @@ struct StringHashTableHash return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 24); } #endif - static size_t ALWAYS_INLINE operator()(StringRef key){ return StringRefHash()(key); } + static size_t ALWAYS_INLINE operator()(StringRef key) { return StringRefHash()(key); } }; template diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 8e12e7383ab..b714bacce04 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -674,13 +674,6 @@ void NO_INLINE Aggregator::executeImpl( #endif if (disable_prefetch) { - // if constexpr (Method::Data::is_string_hash_map) - // executeImplBatchStringHashMap( - // method, - // state, - // aggregates_pool, - // agg_process_info); - // else executeImplBatch(method, state, aggregates_pool, agg_process_info); } else @@ -725,14 +718,14 @@ std::optional::Res try { if constexpr (only_lookup) - return state.template findKey( + return state.template findKey( method.data, index, aggregates_pool, sort_key_containers, hashvals); else - return state.template emplaceKey( + return state.template emplaceKey( method.data, index, aggregates_pool, @@ -878,41 +871,64 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( { /// For all rows. AggregateDataPtr place = aggregates_pool->alloc(0); - std::vector hashvals; +#define HANDLE_AGG_EMPLACE_RESULT \ + if likely (emplace_result_hold.has_value()) \ + { \ + if constexpr (collect_hit_rate) \ + { \ + ++agg_process_info.hit_row_cnt; \ + } \ + \ + if constexpr (only_lookup) \ + { \ + if (!emplace_result_hold.value().isFound()) \ + agg_process_info.not_found_rows.push_back(i); \ + } \ + else \ + { \ + emplace_result_hold.value().setMapped(place); \ + } \ + ++agg_process_info.start_row; \ + } \ + else \ + { \ + LOG_INFO(log, "HashTable resize throw ResizeException since the data is already marked for spill"); \ + break; \ + } for (size_t i = 0; i < rows; ++i) { - // TODO prefetch - auto emplace_result_hold = emplaceOrFindKey( - method, - state, - agg_process_info.start_row, - *aggregates_pool, - sort_key_containers); - if likely (emplace_result_hold.has_value()) + if constexpr (enable_prefetch) { - if constexpr (collect_hit_rate) - { - ++agg_process_info.hit_row_cnt; - } - - if constexpr (only_lookup) - { - if (!emplace_result_hold.value().isFound()) - agg_process_info.not_found_rows.push_back(i); - } - else - { - emplace_result_hold.value().setMapped(place); - } - ++agg_process_info.start_row; + auto hashvals = getHashVals( + agg_process_info.start_row, + agg_process_info.end_row, + method.data, + state, + sort_key_containers, + aggregates_pool); + + auto emplace_result_hold = emplaceOrFindKey( + method, + state, + agg_process_info.start_row, + *aggregates_pool, + sort_key_containers, + hashvals); + HANDLE_AGG_EMPLACE_RESULT } else { - LOG_INFO(log, "HashTable resize throw ResizeException since the data is already marked for spill"); - break; + auto emplace_result_hold = emplaceOrFindKey( + method, + state, + agg_process_info.start_row, + *aggregates_pool, + sort_key_containers); + HANDLE_AGG_EMPLACE_RESULT } } +#undef HANDLE_AGG_EMPLACE_RESULT return; } @@ -953,85 +969,76 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( std::unique_ptr places(new AggregateDataPtr[rows]); std::optional processed_rows; -#define WRAP_EMPLACE_AGG_KEY_BEGIN \ - for (size_t i = agg_process_info.start_row; i < agg_process_info.start_row + rows; ++i) \ - { \ +#define HANDLE_AGG_EMPLACE_RESULT \ + if unlikely (!emplace_result_holder.has_value()) \ + { \ + LOG_INFO(log, "HashTable resize throw ResizeException since the data is already marked for spill"); \ + break; \ + } \ + \ + auto & emplace_result = emplace_result_holder.value(); \ + \ + if constexpr (only_lookup) \ + { \ + if (emplace_result.isFound()) \ + { \ + aggregate_data = emplace_result.getMapped(); \ + } \ + else \ + { \ + agg_process_info.not_found_rows.push_back(i); \ + } \ + } \ + else \ + { \ + if (emplace_result.isInserted()) \ + { \ + emplace_result.setMapped(nullptr); \ + \ + aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); \ + createAggregateStates(aggregate_data); \ + \ + emplace_result.setMapped(aggregate_data); \ + } \ + else \ + { \ + aggregate_data = emplace_result.getMapped(); \ + \ + if constexpr (collect_hit_rate) \ + ++agg_process_info.hit_row_cnt; \ + } \ + } \ + \ + places[i - agg_process_info.start_row] = aggregate_data; \ + processed_rows = i; + + for (size_t i = agg_process_info.start_row; i < agg_process_info.start_row + rows; ++i) + { AggregateDataPtr aggregate_data = nullptr; + if constexpr (enable_prefetch) + { + auto hashvals = getHashVals( + agg_process_info.start_row, + agg_process_info.end_row, + method.data, + state, + sort_key_containers, + aggregates_pool); -#define WRAP_EMPLACE_AGG_KEY_END \ - if unlikely (!emplace_result_holder.has_value()) \ - { \ - LOG_INFO(log, "HashTable resize throw ResizeException since the data is already marked for spill"); \ - break; \ - } \ - \ - auto & emplace_result = emplace_result_holder.value(); \ - \ - if constexpr (only_lookup) \ - { \ - if (emplace_result.isFound()) \ - { \ - aggregate_data = emplace_result.getMapped(); \ - } \ - else \ - { \ - agg_process_info.not_found_rows.push_back(i); \ - } \ - } \ - else \ - { \ - if (emplace_result.isInserted()) \ - { \ - emplace_result.setMapped(nullptr); \ - \ - aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); \ - createAggregateStates(aggregate_data); \ - \ - emplace_result.setMapped(aggregate_data); \ - } \ - else \ - { \ - aggregate_data = emplace_result.getMapped(); \ - \ - if constexpr (collect_hit_rate) \ - ++agg_process_info.hit_row_cnt; \ - } \ - } \ - \ - places[i - agg_process_info.start_row] = aggregate_data; \ - processed_rows = i; \ - } - - if constexpr (enable_prefetch) - { - std::vector hashvals; - hashvals = getHashVals( - agg_process_info.start_row, - agg_process_info.end_row, - method.data, - state, - sort_key_containers, - aggregates_pool); + auto emplace_result_holder + = emplaceOrFindKey(method, state, i, *aggregates_pool, sort_key_containers, hashvals); - WRAP_EMPLACE_AGG_KEY_BEGIN - auto emplace_result_holder = emplaceOrFindKey( - method, - state, - i, - *aggregates_pool, - sort_key_containers, - hashvals); - WRAP_EMPLACE_AGG_KEY_END - } - else - { - WRAP_EMPLACE_AGG_KEY_BEGIN - auto emplace_result_holder - = emplaceOrFindKey(method, state, i, *aggregates_pool, sort_key_containers); - WRAP_EMPLACE_AGG_KEY_END + HANDLE_AGG_EMPLACE_RESULT + } + else + { + auto emplace_result_holder + = emplaceOrFindKey(method, state, i, *aggregates_pool, sort_key_containers); + + HANDLE_AGG_EMPLACE_RESULT + } } -#undef WRAP_EMPLACE_AGG_KEY_BEGIN -#undef WRAP_EMPLACE_AGG_KEY_END +#undef HANDLE_AGG_EMPLACE_RESULT if (processed_rows) { From 352b710bdebcdc803b2baca3001cc2b32ca4a85f Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Thu, 5 Dec 2024 11:51:24 +0800 Subject: [PATCH 24/24] fix case Signed-off-by: guo-shaoge --- .../Flash/tests/gtest_spill_aggregation.cpp | 46 +++++----- dbms/src/Interpreters/Aggregator.cpp | 84 ++++++++++++------- dbms/src/Interpreters/Aggregator.h | 4 +- 3 files changed, 82 insertions(+), 52 deletions(-) diff --git a/dbms/src/Flash/tests/gtest_spill_aggregation.cpp b/dbms/src/Flash/tests/gtest_spill_aggregation.cpp index b19aaf03c4c..583e6e038fa 100644 --- a/dbms/src/Flash/tests/gtest_spill_aggregation.cpp +++ b/dbms/src/Flash/tests/gtest_spill_aggregation.cpp @@ -23,6 +23,7 @@ namespace FailPoints { extern const char force_agg_on_partial_block[]; extern const char force_thread_0_no_agg_spill[]; +extern const char force_agg_prefetch[]; } // namespace FailPoints namespace tests @@ -37,16 +38,22 @@ class SpillAggregationTestRunner : public DB::tests::ExecutorTest } }; -#define WRAP_FOR_AGG_PARTIAL_BLOCK_START \ - std::vector partial_blocks{true, false}; \ - for (auto partial_block : partial_blocks) \ - { \ - if (partial_block) \ - FailPointHelper::enableFailPoint(FailPoints::force_agg_on_partial_block); \ - else \ - FailPointHelper::disableFailPoint(FailPoints::force_agg_on_partial_block); +#define WRAP_FOR_AGG_FAILPOINTS_START \ + std::vector enables{true, false}; \ + for (auto enable : enables) \ + { \ + if (enable) \ + { \ + FailPointHelper::enableFailPoint(FailPoints::force_agg_on_partial_block); \ + FailPointHelper::enableFailPoint(FailPoints::force_agg_prefetch); \ + } \ + else \ + { \ + FailPointHelper::disableFailPoint(FailPoints::force_agg_on_partial_block); \ + FailPointHelper::disableFailPoint(FailPoints::force_agg_prefetch); \ + } -#define WRAP_FOR_AGG_PARTIAL_BLOCK_END } +#define WRAP_FOR_AGG_FAILPOINTS_END } #define WRAP_FOR_AGG_THREAD_0_NO_SPILL_START \ for (auto thread_0_no_spill : {true, false}) \ @@ -114,13 +121,13 @@ try context.context->setSetting("group_by_two_level_threshold_bytes", Field(static_cast(1))); /// don't use `executeAndAssertColumnsEqual` since it takes too long to run /// test single thread aggregation - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START WRAP_FOR_AGG_THREAD_0_NO_SPILL_START ASSERT_COLUMNS_EQ_UR(ref_columns, executeStreams(request, 1)); /// test parallel aggregation ASSERT_COLUMNS_EQ_UR(ref_columns, executeStreams(request, original_max_streams)); WRAP_FOR_AGG_THREAD_0_NO_SPILL_END - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END /// enable spill and use small max_cached_data_bytes_in_spiller context.context->setSetting("max_cached_data_bytes_in_spiller", Field(static_cast(total_data_size / 200))); /// test single thread aggregation @@ -262,7 +269,7 @@ try Field(static_cast(max_bytes_before_external_agg))); context.context->setSetting("max_block_size", Field(static_cast(max_block_size))); WRAP_FOR_SPILL_TEST_BEGIN - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START WRAP_FOR_AGG_THREAD_0_NO_SPILL_START auto blocks = getExecuteStreamsReturnBlocks(request, concurrency); for (auto & block : blocks) @@ -289,7 +296,7 @@ try false)); } WRAP_FOR_AGG_THREAD_0_NO_SPILL_END - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END WRAP_FOR_SPILL_TEST_END } } @@ -369,6 +376,7 @@ try { for (const auto & agg_func : agg_funcs) { + FailPointHelper::disableFailPoint(FailPoints::force_agg_prefetch); context.setCollation(collator_id); const auto * current_collator = TiDB::ITiDBCollator::getCollator(collator_id); ASSERT_TRUE(current_collator != nullptr); @@ -417,7 +425,7 @@ try Field(static_cast(max_bytes_before_external_agg))); context.context->setSetting("max_block_size", Field(static_cast(max_block_size))); WRAP_FOR_SPILL_TEST_BEGIN - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START WRAP_FOR_AGG_THREAD_0_NO_SPILL_START auto blocks = getExecuteStreamsReturnBlocks(request, concurrency); for (auto & block : blocks) @@ -444,7 +452,7 @@ try false)); } WRAP_FOR_AGG_THREAD_0_NO_SPILL_END - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END WRAP_FOR_SPILL_TEST_END } } @@ -518,9 +526,9 @@ try /// don't use `executeAndAssertColumnsEqual` since it takes too long to run auto request = gen_request(exchange_concurrency); WRAP_FOR_SPILL_TEST_BEGIN - WRAP_FOR_AGG_PARTIAL_BLOCK_START + WRAP_FOR_AGG_FAILPOINTS_START ASSERT_COLUMNS_EQ_UR(baseline, executeStreams(request, exchange_concurrency)); - WRAP_FOR_AGG_PARTIAL_BLOCK_END + WRAP_FOR_AGG_FAILPOINTS_END WRAP_FOR_SPILL_TEST_END } } @@ -528,8 +536,8 @@ CATCH #undef WRAP_FOR_SPILL_TEST_BEGIN #undef WRAP_FOR_SPILL_TEST_END -#undef WRAP_FOR_AGG_PARTIAL_BLOCK_START -#undef WRAP_FOR_AGG_PARTIAL_BLOCK_END +#undef WRAP_FOR_AGG_FAILPOINTS_START +#undef WRAP_FOR_AGG_FAILPOINTS_END } // namespace tests } // namespace DB diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index b714bacce04..537d13d1441 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -666,26 +666,38 @@ void NO_INLINE Aggregator::executeImpl( { typename Method::State state(agg_process_info.key_columns, key_sizes, collators); + // start_row!=0 and stringHashTableRecoveryInfo not empty and cannot be true at the same time. + RUNTIME_CHECK(!(agg_process_info.start_row != 0 && !agg_process_info.stringHashTableRecoveryInfoEmpty())); + #ifndef NDEBUG bool disable_prefetch = (method.data.getBufferSizeInCells() < 8192); fiu_do_on(FailPoints::force_agg_prefetch, { disable_prefetch = false; }); #else const bool disable_prefetch = (method.data.getBufferSizeInCells() < 8192); #endif - if (disable_prefetch) - { - executeImplBatch(method, state, aggregates_pool, agg_process_info); - } - else + + if constexpr (Method::Data::is_string_hash_map) { - if constexpr (Method::Data::is_string_hash_map) - executeImplBatchStringHashMap( + // When will handled by column-wise(executeImplStringHashMapByCol): + // 1. For StringHashMap, which is composed by 5 submaps, needs be handled by column-wise when prefetch is enabled. + // 2. If agg_process_info.start_row != 0, it means the computation process of the current block was interrupted by resize exception in executeImplByRow. + // For clarity and simplicity of implementation, the processing functions for column-wise and row-wise methods handle the entire block independently. + // A block will not be processed first by the row-wise method and then by the column-wise method, or vice-versa. + if (!disable_prefetch && likely(agg_process_info.start_row == 0)) + executeImplStringHashMapByCol( method, state, aggregates_pool, agg_process_info); else - executeImplBatch(method, state, aggregates_pool, agg_process_info); + executeImplByRow(method, state, aggregates_pool, agg_process_info); + } + else + { + if (disable_prefetch) + executeImplByRow(method, state, aggregates_pool, agg_process_info); + else + executeImplByRow(method, state, aggregates_pool, agg_process_info); } } @@ -759,7 +771,7 @@ std::optional::Res } } -// This is only used by executeImplBatchStringHashMap. +// This is only used by executeImplStringHashMapByCol. // It will choose specifix submap of StringHashMap then do emplace/find. // StringKeyType can be StringRef/StringKey8/StringKey16/StringKey24/ArenaKeyHolder. template < @@ -849,7 +861,7 @@ size_t Aggregator::emplaceOrFindStringKey( } template -ALWAYS_INLINE void Aggregator::executeImplBatch( +ALWAYS_INLINE void Aggregator::executeImplByRow( Method & method, typename Method::State & state, Arena * aggregates_pool, @@ -857,6 +869,11 @@ ALWAYS_INLINE void Aggregator::executeImplBatch( { // collect_hit_rate and only_lookup cannot be true at the same time. static_assert(!(collect_hit_rate && only_lookup)); + // If agg_process_info.stringHashTableRecoveryInfoEmpty() is false, it means the current block was + // handled by executeImplStringHashMapByCol(column-wise) before, and resize execption happened. + // This situation is unexpected because for the sake of clarity, we assume that a block will be **fully** processed + // either column-wise or row-wise and cannot be split for processing. + RUNTIME_CHECK(agg_process_info.stringHashTableRecoveryInfoEmpty()); std::vector sort_key_containers; sort_key_containers.resize(params.keys_size, ""); @@ -1086,7 +1103,7 @@ M(4) // NOTE: this function is column-wise, which means sort key buffer cannot be reused. // This buffer will not be release until this block is processed done. template -ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( +ALWAYS_INLINE void Aggregator::executeImplStringHashMapByCol( Method & method, typename Method::State & state, Arena * aggregates_pool, @@ -1125,7 +1142,11 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( // If no resize exception happens, so this is a new Block. // If resize exception happens, start_row has already been set to zero at the end of this function. - RUNTIME_CHECK(agg_process_info.start_row == 0); + RUNTIME_CHECK_MSG( + agg_process_info.start_row == 0, + "unexpected agg_process_info.start_row: {}, end_row: {}", + agg_process_info.start_row, + agg_process_info.end_row); if likely (agg_process_info.stringHashTableRecoveryInfoEmpty()) { @@ -1233,10 +1254,9 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( M(4, key_str_infos, key_str_datas, key_str_places) #undef M - if (zero_agg_func_size) - return; - - std::vector places(rows, nullptr); + if (!zero_agg_func_size) + { + std::vector places(rows, nullptr); #define M(INFO, PLACES) \ for (size_t i = 0; i < (INFO).size(); ++i) \ { \ @@ -1244,24 +1264,26 @@ ALWAYS_INLINE void Aggregator::executeImplBatchStringHashMap( places[row] = (PLACES)[i]; \ } - M(key0_infos, key0_places) - M(key8_infos, key8_places) - M(key16_infos, key16_places) - M(key24_infos, key24_places) - M(key_str_infos, key_str_places) + M(key0_infos, key0_places) + M(key8_infos, key8_places) + M(key16_infos, key16_places) + M(key24_infos, key24_places) + M(key_str_infos, key_str_places) #undef M - for (AggregateFunctionInstruction * inst = agg_process_info.aggregate_functions_instructions.data(); inst->that; - ++inst) - { - inst->batch_that->addBatch( - agg_process_info.start_row, - rows, - &places[0], - inst->state_offset, - inst->batch_arguments, - aggregates_pool); + for (AggregateFunctionInstruction * inst = agg_process_info.aggregate_functions_instructions.data(); inst->that; + ++inst) + { + inst->batch_that->addBatch( + agg_process_info.start_row, + rows, + &places[0], + inst->state_offset, + inst->batch_arguments, + aggregates_pool); + } } + if unlikely (got_resize_exception) { RUNTIME_CHECK(!agg_process_info.stringHashTableRecoveryInfoEmpty()); diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 81252b8b3c6..d88a97278f9 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -1477,14 +1477,14 @@ class Aggregator TiDB::TiDBCollators & collators) const; template - void executeImplBatch( + void executeImplByRow( Method & method, typename Method::State & state, Arena * aggregates_pool, AggProcessInfo & agg_process_info) const; template - void executeImplBatchStringHashMap( + void executeImplStringHashMapByCol( Method & method, typename Method::State & state, Arena * aggregates_pool,