Skip to content

Commit

Permalink
Aggregator support prefetch (#9679)
Browse files Browse the repository at this point in the history
close #9680

Signed-off-by: guo-shaoge <[email protected]>

Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com>
  • Loading branch information
guo-shaoge and ti-chi-bot[bot] authored Jan 17, 2025
1 parent 294d600 commit 999018c
Show file tree
Hide file tree
Showing 22 changed files with 655 additions and 328 deletions.
6 changes: 3 additions & 3 deletions dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,18 +182,18 @@ class AggregateFunctionGroupUniqArrayGeneric
{
// We have to copy the keys to our arena.
assert(arena != nullptr);
cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), *arena}, it, inserted);
cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), arena}, it, inserted);
}
}

void insertResultInto(ConstAggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
auto & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
IColumn & data_to = arr_to.getData();

auto & set = this->data(place).value;
offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + set.size());
offsets_to.push_back((offsets_to.empty() ? 0 : offsets_to.back()) + set.size());

for (auto & elem : set)
deserializeAndInsert<is_plain_column>(elem.getValue(), data_to);
Expand Down
4 changes: 2 additions & 2 deletions dbms/src/AggregateFunctions/KeyHolderHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ inline auto getKeyHolder(const IColumn & column, size_t row_num, Arena & arena)
{
if constexpr (is_plain_column)
{
return ArenaKeyHolder{column.getDataAt(row_num), arena};
return ArenaKeyHolder{column.getDataAt(row_num), &arena};
}
else
{
const char * begin = nullptr;
StringRef serialized = column.serializeValueIntoArena(row_num, arena, begin);
assert(serialized.data != nullptr);
return SerializedKeyHolder{serialized, arena};
return SerializedKeyHolder{serialized, &arena};
}
}

Expand Down
101 changes: 48 additions & 53 deletions dbms/src/Common/ColumnsHashing.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ struct HashMethodOneNumber
{
using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
using KeyHolderType = FieldType;

static constexpr bool is_serialized_key = false;

const FieldType * vec;

Expand All @@ -73,7 +76,7 @@ struct HashMethodOneNumber
using Base::getHash; /// (const Data & data, size_t row, Arena & pool) -> size_t

/// Is used for default implementation in HashMethodBase.
ALWAYS_INLINE inline FieldType getKeyHolder(size_t row, Arena *, std::vector<String> &) const
ALWAYS_INLINE inline KeyHolderType getKeyHolder(size_t row, Arena *, std::vector<String> &) const
{
if constexpr (std::is_same_v<FieldType, Int256>)
return vec[row];
Expand All @@ -86,13 +89,15 @@ struct HashMethodOneNumber


/// For the case when there is one string key.
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
template <typename Value, typename Mapped, bool use_cache = true>
struct HashMethodString
: public columns_hashing_impl::
HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache>
: public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, use_cache>, Value, Mapped, use_cache>
{
using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache>;
using Self = HashMethodString<Value, Mapped, use_cache>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
using KeyHolderType = ArenaKeyHolder;

static constexpr bool is_serialized_key = false;

const IColumn::Offset * offsets;
const UInt8 * chars;
Expand All @@ -108,32 +113,21 @@ struct HashMethodString
offsets = column_string.getOffsets().data();
chars = column_string.getChars().data();
if (!collators.empty())
{
if constexpr (!place_string_to_arena)
throw Exception("String with collator must be placed on arena.", ErrorCodes::LOGICAL_ERROR);
collator = collators[0];
}
}

ALWAYS_INLINE inline auto getKeyHolder(
ALWAYS_INLINE inline KeyHolderType getKeyHolder(
ssize_t row,
[[maybe_unused]] Arena * pool,
std::vector<String> & sort_key_containers) const
{
auto last_offset = row == 0 ? 0 : offsets[row - 1];
// Remove last zero byte.
StringRef key(chars + last_offset, offsets[row] - last_offset - 1);
if (likely(collator))
key = collator->sortKey(key.data, key.size, sort_key_containers[0]);

if constexpr (place_string_to_arena)
{
if (likely(collator))
key = collator->sortKey(key.data, key.size, sort_key_containers[0]);
return ArenaKeyHolder{key, *pool};
}
else
{
return key;
}
return ArenaKeyHolder{key, pool};
}

protected:
Expand All @@ -146,6 +140,9 @@ struct HashMethodStringBin
{
using Self = HashMethodStringBin<Value, Mapped, padding>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
using KeyHolderType = ArenaKeyHolder;

static constexpr bool is_serialized_key = false;

const IColumn::Offset * offsets;
const UInt8 * chars;
Expand All @@ -158,12 +155,12 @@ struct HashMethodStringBin
chars = column_string.getChars().data();
}

ALWAYS_INLINE inline auto getKeyHolder(ssize_t row, Arena * pool, std::vector<String> &) const
ALWAYS_INLINE inline KeyHolderType getKeyHolder(ssize_t row, Arena * pool, std::vector<String> &) const
{
auto last_offset = row == 0 ? 0 : offsets[row - 1];
StringRef key(chars + last_offset, offsets[row] - last_offset - 1);
key = BinCollatorSortKey<padding>(key.data, key.size);
return ArenaKeyHolder{key, *pool};
return ArenaKeyHolder{key, pool};
}

protected:
Expand Down Expand Up @@ -343,6 +340,9 @@ struct HashMethodFastPathTwoKeysSerialized
{
using Self = HashMethodFastPathTwoKeysSerialized<Key1Desc, Key2Desc, Value, Mapped>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
using KeyHolderType = SerializedKeyHolder;

static constexpr bool is_serialized_key = true;

Key1Desc key_1_desc;
Key2Desc key_2_desc;
Expand All @@ -352,13 +352,13 @@ struct HashMethodFastPathTwoKeysSerialized
, key_2_desc(key_columns[1])
{}

ALWAYS_INLINE inline auto getKeyHolder(ssize_t row, Arena * pool, std::vector<String> &) const
ALWAYS_INLINE inline KeyHolderType getKeyHolder(ssize_t row, Arena * pool, std::vector<String> &) const
{
StringRef key1;
StringRef key2;
size_t alloc_size = key_1_desc.getKey(row, key1) + key_2_desc.getKey(row, key2);
char * start = pool->alloc(alloc_size);
SerializedKeyHolder ret{{start, alloc_size}, *pool};
SerializedKeyHolder ret{{start, alloc_size}, pool};
Key1Desc::serializeKey(start, key1);
Key2Desc::serializeKey(start, key2);
return ret;
Expand All @@ -370,16 +370,16 @@ struct HashMethodFastPathTwoKeysSerialized


/// For the case when there is one fixed-length string key.
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
template <typename Value, typename Mapped, bool use_cache = true>
struct HashMethodFixedString
: public columns_hashing_impl::HashMethodBase<
HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>,
Value,
Mapped,
use_cache>
: public columns_hashing_impl::
HashMethodBase<HashMethodFixedString<Value, Mapped, use_cache>, Value, Mapped, use_cache>
{
using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>;
using Self = HashMethodFixedString<Value, Mapped, use_cache>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
using KeyHolderType = ArenaKeyHolder;

static constexpr bool is_serialized_key = false;

size_t n;
const ColumnFixedString::Chars_t * chars;
Expand All @@ -398,26 +398,14 @@ struct HashMethodFixedString
collator = collators[0];
}

ALWAYS_INLINE inline auto getKeyHolder(
size_t row,
[[maybe_unused]] Arena * pool,
std::vector<String> & sort_key_containers) const
ALWAYS_INLINE inline KeyHolderType getKeyHolder(size_t row, Arena * pool, std::vector<String> & sort_key_containers)
const
{
StringRef key(&(*chars)[row * n], n);

if (collator)
{
key = collator->sortKeyFastPath(key.data, key.size, sort_key_containers[0]);
}

if constexpr (place_string_to_arena)
{
return ArenaKeyHolder{key, *pool};
}
else
{
return key;
}
return ArenaKeyHolder{key, pool};
}

protected:
Expand All @@ -437,7 +425,9 @@ struct HashMethodKeysFixed
using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, use_cache>;
using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>;
using KeyHolderType = Key;

static constexpr bool is_serialized_key = false;
static constexpr bool has_nullable_keys = has_nullable_keys_;

Sizes key_sizes;
Expand Down Expand Up @@ -526,7 +516,7 @@ struct HashMethodKeysFixed
#endif
}

ALWAYS_INLINE inline Key getKeyHolder(size_t row, Arena *, std::vector<String> &) const
ALWAYS_INLINE inline KeyHolderType getKeyHolder(size_t row, Arena *, std::vector<String> &) const
{
if constexpr (has_nullable_keys)
{
Expand Down Expand Up @@ -592,6 +582,9 @@ struct HashMethodSerialized
{
using Self = HashMethodSerialized<Value, Mapped>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
using KeyHolderType = SerializedKeyHolder;

static constexpr bool is_serialized_key = true;

ColumnRawPtrs key_columns;
size_t keys_size;
Expand All @@ -606,14 +599,12 @@ struct HashMethodSerialized
, collators(collators_)
{}

ALWAYS_INLINE inline SerializedKeyHolder getKeyHolder(
size_t row,
Arena * pool,
std::vector<String> & sort_key_containers) const
ALWAYS_INLINE inline KeyHolderType getKeyHolder(size_t row, Arena * pool, std::vector<String> & sort_key_containers)
const
{
return SerializedKeyHolder{
serializeKeysToPoolContiguous(row, keys_size, key_columns, collators, sort_key_containers, *pool),
*pool};
pool};
}

protected:
Expand All @@ -628,6 +619,9 @@ struct HashMethodHashed
using Key = UInt128;
using Self = HashMethodHashed<Value, Mapped, use_cache>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
using KeyHolderType = Key;

static constexpr bool is_serialized_key = false;

ColumnRawPtrs key_columns;
TiDB::TiDBCollators collators;
Expand All @@ -637,7 +631,8 @@ struct HashMethodHashed
, collators(collators_)
{}

ALWAYS_INLINE inline Key getKeyHolder(size_t row, Arena *, std::vector<String> & sort_key_containers) const
ALWAYS_INLINE inline KeyHolderType getKeyHolder(size_t row, Arena *, std::vector<String> & sort_key_containers)
const
{
return hash128(row, key_columns.size(), key_columns, collators, sort_key_containers);
}
Expand Down
Loading

0 comments on commit 999018c

Please sign in to comment.