Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Aggregator support prefetch #9679

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,18 +182,18 @@ class AggregateFunctionGroupUniqArrayGeneric
{
// We have to copy the keys to our arena.
assert(arena != nullptr);
cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), *arena}, it, inserted);
cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), arena}, it, inserted);
}
}

void insertResultInto(ConstAggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
auto & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
IColumn & data_to = arr_to.getData();

auto & set = this->data(place).value;
offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + set.size());
offsets_to.push_back((offsets_to.empty() ? 0 : offsets_to.back()) + set.size());

for (auto & elem : set)
deserializeAndInsert<is_plain_column>(elem.getValue(), data_to);
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/AggregateFunctions/KeyHolderHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ inline auto getKeyHolder(const IColumn & column, size_t row_num, Arena & arena)
{
if constexpr (is_plain_column)
{
return ArenaKeyHolder{column.getDataAt(row_num), arena};
return ArenaKeyHolder{column.getDataAt(row_num), &arena};
}
else
{
Expand Down
109 changes: 65 additions & 44 deletions dbms/src/Common/ColumnsHashing.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,17 @@ struct HashMethodOneNumber
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;

const FieldType * vec;
const size_t total_rows;

/// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise.
HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const TiDB::TiDBCollators &)
: total_rows(key_columns[0]->size())
{
vec = &static_cast<const ColumnVector<FieldType> *>(key_columns[0])->getData()[0];
}

explicit HashMethodOneNumber(const IColumn * column)
: total_rows(column->size())
{
vec = &static_cast<const ColumnVector<FieldType> *>(column)->getData()[0];
}
Expand Down Expand Up @@ -86,58 +89,63 @@ struct HashMethodOneNumber


/// For the case when there is one string key.
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
template <typename Value, typename Mapped, bool use_cache = true>
struct HashMethodString
: public columns_hashing_impl::
HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache>
: public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, use_cache>, Value, Mapped, use_cache>
{
using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache>;
using Self = HashMethodString<Value, Mapped, use_cache>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;

const IColumn::Offset * offsets;
const UInt8 * chars;
TiDB::TiDBCollatorPtr collator = nullptr;
const size_t total_rows;

HashMethodString(
const ColumnRawPtrs & key_columns,
const Sizes & /*key_sizes*/,
const TiDB::TiDBCollators & collators)
: total_rows(key_columns[0]->size())
{
const IColumn & column = *key_columns[0];
const auto & column_string = assert_cast<const ColumnString &>(column);
offsets = column_string.getOffsets().data();
chars = column_string.getChars().data();
if (!collators.empty())
{
if constexpr (!place_string_to_arena)
throw Exception("String with collator must be placed on arena.", ErrorCodes::LOGICAL_ERROR);
collator = collators[0];
}
}

ALWAYS_INLINE inline auto getKeyHolder(
ALWAYS_INLINE inline ArenaKeyHolder getKeyHolder(
ssize_t row,
[[maybe_unused]] Arena * pool,
std::vector<String> & sort_key_containers) const
[[maybe_unused]] std::vector<String> & sort_key_containers) const
{
auto last_offset = row == 0 ? 0 : offsets[row - 1];
// Remove last zero byte.
StringRef key(chars + last_offset, offsets[row] - last_offset - 1);
auto key = getKey(row);
if (likely(collator))
key = collator->sortKey(key.data, key.size, sort_key_containers[0]);

if constexpr (place_string_to_arena)
{
if (likely(collator))
key = collator->sortKey(key.data, key.size, sort_key_containers[0]);
return ArenaKeyHolder{key, *pool};
}
else
{
return key;
}
return ArenaKeyHolder{key, pool};
}

ALWAYS_INLINE inline ArenaKeyHolder getKeyHolder(ssize_t row, Arena * pool, Arena * sort_key_pool) const
{
auto key = getKey(row);
if (likely(collator))
key = collator->sortKey(key.data, key.size, *sort_key_pool);

return ArenaKeyHolder{key, pool};
}

protected:
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;

private:
ALWAYS_INLINE inline StringRef getKey(size_t row) const
{
auto last_offset = row == 0 ? 0 : offsets[row - 1];
// Remove last zero byte.
return StringRef(chars + last_offset, offsets[row] - last_offset - 1);
}
};

template <typename Value, typename Mapped, bool padding>
Expand All @@ -149,8 +157,10 @@ struct HashMethodStringBin

const IColumn::Offset * offsets;
const UInt8 * chars;
const size_t total_rows;

HashMethodStringBin(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const TiDB::TiDBCollators &)
: total_rows(key_columns[0]->size())
{
const IColumn & column = *key_columns[0];
const auto & column_string = assert_cast<const ColumnString &>(column);
Expand All @@ -159,11 +169,16 @@ struct HashMethodStringBin
}

ALWAYS_INLINE inline auto getKeyHolder(ssize_t row, Arena * pool, std::vector<String> &) const
{
return getKeyHolder(row, pool, nullptr);
}

ALWAYS_INLINE inline auto getKeyHolder(ssize_t row, Arena * pool, Arena *) const
{
auto last_offset = row == 0 ? 0 : offsets[row - 1];
StringRef key(chars + last_offset, offsets[row] - last_offset - 1);
key = BinCollatorSortKey<padding>(key.data, key.size);
return ArenaKeyHolder{key, *pool};
return ArenaKeyHolder{key, pool};
}

protected:
Expand Down Expand Up @@ -346,10 +361,12 @@ struct HashMethodFastPathTwoKeysSerialized

Key1Desc key_1_desc;
Key2Desc key_2_desc;
const size_t total_rows;

HashMethodFastPathTwoKeysSerialized(const ColumnRawPtrs & key_columns, const Sizes &, const TiDB::TiDBCollators &)
: key_1_desc(key_columns[0])
, key_2_desc(key_columns[1])
, total_rows(key_columns[0]->size())
{}

ALWAYS_INLINE inline auto getKeyHolder(ssize_t row, Arena * pool, std::vector<String> &) const
Expand All @@ -370,25 +387,24 @@ struct HashMethodFastPathTwoKeysSerialized


/// For the case when there is one fixed-length string key.
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
template <typename Value, typename Mapped, bool use_cache = true>
struct HashMethodFixedString
: public columns_hashing_impl::HashMethodBase<
HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>,
Value,
Mapped,
use_cache>
: public columns_hashing_impl::
HashMethodBase<HashMethodFixedString<Value, Mapped, use_cache>, Value, Mapped, use_cache>
{
using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>;
using Self = HashMethodFixedString<Value, Mapped, use_cache>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;

size_t n;
const ColumnFixedString::Chars_t * chars;
TiDB::TiDBCollatorPtr collator = nullptr;
const size_t total_rows;

HashMethodFixedString(
const ColumnRawPtrs & key_columns,
const Sizes & /*key_sizes*/,
const TiDB::TiDBCollators & collators)
: total_rows(key_columns[0]->size())
{
const IColumn & column = *key_columns[0];
const auto & column_string = assert_cast<const ColumnFixedString &>(column);
Expand All @@ -398,26 +414,25 @@ struct HashMethodFixedString
collator = collators[0];
}

ALWAYS_INLINE inline auto getKeyHolder(
ALWAYS_INLINE inline ArenaKeyHolder getKeyHolder(
size_t row,
[[maybe_unused]] Arena * pool,
Arena * pool,
std::vector<String> & sort_key_containers) const
{
StringRef key(&(*chars)[row * n], n);

if (collator)
{
key = collator->sortKeyFastPath(key.data, key.size, sort_key_containers[0]);
}

if constexpr (place_string_to_arena)
{
return ArenaKeyHolder{key, *pool};
}
else
{
return key;
}
return ArenaKeyHolder{key, pool};
}

ALWAYS_INLINE inline ArenaKeyHolder getKeyHolder(size_t row, Arena * pool, Arena * sort_key_pool) const
{
StringRef key(&(*chars)[row * n], n);
if (collator)
key = collator->sortKeyFastPath(key.data, key.size, *sort_key_pool);

return ArenaKeyHolder{key, pool};
}

protected:
Expand All @@ -442,6 +457,7 @@ struct HashMethodKeysFixed

Sizes key_sizes;
size_t keys_size;
const size_t total_rows;

/// SSSE3 shuffle method can be used. Shuffle masks will be calculated and stored here.
#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
Expand All @@ -467,6 +483,7 @@ struct HashMethodKeysFixed
: Base(key_columns)
, key_sizes(std::move(key_sizes_))
, keys_size(key_columns.size())
, total_rows(key_columns[0]->size())
{
if (usePreparedKeys(key_sizes))
{
Expand Down Expand Up @@ -596,6 +613,7 @@ struct HashMethodSerialized
ColumnRawPtrs key_columns;
size_t keys_size;
TiDB::TiDBCollators collators;
const size_t total_rows;

HashMethodSerialized(
const ColumnRawPtrs & key_columns_,
Expand All @@ -604,6 +622,7 @@ struct HashMethodSerialized
: key_columns(key_columns_)
, keys_size(key_columns_.size())
, collators(collators_)
, total_rows(key_columns_[0]->size())
{}

ALWAYS_INLINE inline SerializedKeyHolder getKeyHolder(
Expand Down Expand Up @@ -631,10 +650,12 @@ struct HashMethodHashed

ColumnRawPtrs key_columns;
TiDB::TiDBCollators collators;
const size_t total_rows;

HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const TiDB::TiDBCollators & collators_)
: key_columns(std::move(key_columns_))
, collators(collators_)
, total_rows(key_columns[0]->size())
{}

ALWAYS_INLINE inline Key getKeyHolder(size_t row, Arena *, std::vector<String> & sort_key_containers) const
Expand Down
Loading