Skip to content

Commit

Permalink
Merge pull request #341 from 1261385937/column-construction
Browse files Browse the repository at this point in the history
add Reserve for column. Optimize large block insertion
  • Loading branch information
Enmk authored Nov 2, 2023
2 parents c5225bd + d343428 commit 0f8b396
Show file tree
Hide file tree
Showing 31 changed files with 161 additions and 55 deletions.
5 changes: 5 additions & 0 deletions clickhouse/columns/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ ColumnRef ColumnArray::CloneEmpty() const {
return std::make_shared<ColumnArray>(data_->CloneEmpty());
}

void ColumnArray::Reserve(size_t new_cap) {
data_->Reserve(new_cap);
offsets_->Reserve(new_cap);
}

void ColumnArray::Append(ColumnRef column) {
if (auto col = column->As<ColumnArray>()) {
for (size_t i = 0; i < col->Size(); ++i) {
Expand Down
3 changes: 3 additions & 0 deletions clickhouse/columns/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ class ColumnArray : public Column {
}

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;

Expand Down
3 changes: 3 additions & 0 deletions clickhouse/columns/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ class Column : public std::enable_shared_from_this<Column> {
/// Appends content of given column to the end of current one.
virtual void Append(ColumnRef column) = 0;

/// Increase the capacity of the column for large block insertion.
virtual void Reserve(size_t new_cap) = 0;

/// Template method to load column data from input stream. It'll call LoadPrefix and LoadBody.
/// Should be called only once from the client. Derived classes should not call it.
bool Load(InputStream* input, size_t rows);
Expand Down
5 changes: 5 additions & 0 deletions clickhouse/columns/date.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,11 @@ std::string ColumnDateTime64::Timezone() const {
return type_->As<DateTime64Type>()->Timezone();
}

void ColumnDateTime64::Reserve(size_t new_cap)
{
data_->Reserve(new_cap);
}

void ColumnDateTime64::Append(ColumnRef column) {
if (auto col = column->As<ColumnDateTime64>()) {
data_->Append(col->data_);
Expand Down
26 changes: 15 additions & 11 deletions clickhouse/columns/date.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ class ColumnDate : public Column {
/// Get Raw Vector Contents
std::vector<uint16_t>& GetWritableData();

/// Increase the capacity of the column
void Reserve(size_t new_cap);
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Returns the capacity of the column
size_t Capacity() const;
Expand Down Expand Up @@ -79,9 +79,6 @@ class ColumnDate32 : public Column {
/// The implementation is fundamentally wrong, ignores timezones, leap years and daylight saving.
std::time_t At(size_t n) const;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;

inline std::time_t operator [] (size_t n) const { return At(n); }

/// Do append data as is -- number of day in Unix epoch (32bit signed), no conversions performed.
Expand All @@ -91,12 +88,16 @@ class ColumnDate32 : public Column {
/// Get Raw Vector Contents
std::vector<int32_t>& GetWritableData();

/// Increase the capacity of the column
void Reserve(size_t new_cap);

/// Returns the capacity of the column
size_t Capacity() const;

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;

/// Loads column data from input stream.
bool LoadBody(InputStream* input, size_t rows) override;

Expand Down Expand Up @@ -148,13 +149,13 @@ class ColumnDateTime : public Column {
/// Get Raw Vector Contents
std::vector<uint32_t>& GetWritableData();

/// Increase the capacity of the column
void Reserve(size_t new_cap);

/// Returns the capacity of the column
size_t Capacity() const;

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;

Expand Down Expand Up @@ -205,6 +206,9 @@ class ColumnDateTime64 : public Column {
std::string Timezone() const;

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;

Expand Down
4 changes: 4 additions & 0 deletions clickhouse/columns/decimal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,10 @@ Int128 ColumnDecimal::At(size_t i) const {
}
}

void ColumnDecimal::Reserve(size_t new_cap) {
data_->Reserve(new_cap);
}

void ColumnDecimal::Append(ColumnRef column) {
if (auto col = column->As<ColumnDecimal>()) {
data_->Append(col->data_);
Expand Down
2 changes: 2 additions & 0 deletions clickhouse/columns/decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ class ColumnDecimal : public Column {
inline auto operator[](size_t i) const { return At(i); }

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;
void Append(ColumnRef column) override;
bool LoadBody(InputStream* input, size_t rows) override;
void SaveBody(OutputStream* output) override;
Expand Down
5 changes: 5 additions & 0 deletions clickhouse/columns/enum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ void ColumnEnum<T>::SetNameAt(size_t n, const std::string& name) {
data_.at(n) = static_cast<T>(type_->As<EnumType>()->GetEnumValue(name));
}

template<typename T>
void ColumnEnum<T>::Reserve(size_t new_cap) {
data_.reserve(new_cap);
}

template <typename T>
void ColumnEnum<T>::Append(ColumnRef column) {
if (auto col = column->As<ColumnEnum<T>>()) {
Expand Down
3 changes: 3 additions & 0 deletions clickhouse/columns/enum.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ class ColumnEnum : public Column {
void SetNameAt(size_t n, const std::string& name);

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;

Expand Down
5 changes: 5 additions & 0 deletions clickhouse/columns/geo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ const typename ColumnGeo<NestedColumnType, type_code>::ValueType ColumnGeo<Neste
return data_->At(n);
}

template<typename NestedColumnType, Type::Code type_code>
void ColumnGeo<NestedColumnType, type_code>::Reserve(size_t new_cap) {
data_->Reserve(new_cap);
}

template <typename NestedColumnType, Type::Code type_code>
void ColumnGeo<NestedColumnType, type_code>::Append(ColumnRef column) {
if (auto col = column->template As<ColumnGeo>()) {
Expand Down
3 changes: 3 additions & 0 deletions clickhouse/columns/geo.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ class ColumnGeo : public Column {
inline const ValueType operator[](size_t n) const { return At(n); }

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;

Expand Down
4 changes: 4 additions & 0 deletions clickhouse/columns/ip4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ std::string ColumnIPv4::AsString(size_t n) const {
return ip_str;
}

void ColumnIPv4::Reserve(size_t new_cap) {
data_->Reserve(new_cap);
}

void ColumnIPv4::Append(ColumnRef column) {
if (auto col = column->As<ColumnIPv4>()) {
data_->Append(col->data_);
Expand Down
3 changes: 3 additions & 0 deletions clickhouse/columns/ip4.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ class ColumnIPv4 : public Column {
std::string AsString(size_t n) const;

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;

Expand Down
4 changes: 4 additions & 0 deletions clickhouse/columns/ip6.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ in6_addr ColumnIPv6::operator [] (size_t n) const {
return *reinterpret_cast<const in6_addr*>(data_->At(n).data());
}

void ColumnIPv6::Reserve(size_t new_cap) {
data_->Reserve(new_cap);
}

void ColumnIPv6::Append(ColumnRef column) {
if (auto col = column->As<ColumnIPv6>()) {
data_->Append(col->data_);
Expand Down
3 changes: 3 additions & 0 deletions clickhouse/columns/ip6.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ class ColumnIPv6 : public Column {
std::string AsString(size_t n) const;

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;

Expand Down
5 changes: 5 additions & 0 deletions clickhouse/columns/lowcardinality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,11 @@ ColumnLowCardinality::ColumnLowCardinality(std::shared_ptr<ColumnNullable> dicti
ColumnLowCardinality::~ColumnLowCardinality()
{}

void ColumnLowCardinality::Reserve(size_t new_cap) {
dictionary_column_->Reserve(new_cap);
index_column_->Reserve(new_cap);
}

void ColumnLowCardinality::Setup(ColumnRef dictionary_column) {
AppendDefaultItem();

Expand Down
3 changes: 3 additions & 0 deletions clickhouse/columns/lowcardinality.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ class ColumnLowCardinality : public Column {

~ColumnLowCardinality();

/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends another LowCardinality column to the end of this one, updating dictionary.
void Append(ColumnRef /*column*/) override;

Expand Down
4 changes: 4 additions & 0 deletions clickhouse/columns/map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ ColumnMap::ColumnMap(ColumnRef data)
: Column(GetMapType(data->GetType())), data_(data->As<ColumnArray>()) {
}

void ColumnMap::Reserve(size_t new_cap) {
data_->Reserve(new_cap);
}

void ColumnMap::Clear() {
data_->Clear();
}
Expand Down
3 changes: 3 additions & 0 deletions clickhouse/columns/map.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ class ColumnMap : public Column {
*/
explicit ColumnMap(ColumnRef data);

/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;

Expand Down
3 changes: 3 additions & 0 deletions clickhouse/columns/nothing.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ class ColumnNothing : public Column {
{
}

/// Increase the capacity of the column for large block insertion.
void Reserve(size_t) override {};

/// Appends one element to the column.
void Append(std::unique_ptr<void*>) { ++size_; }

Expand Down
5 changes: 5 additions & 0 deletions clickhouse/columns/nullable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ ColumnRef ColumnNullable::Nulls() const
return nulls_;
}

void ColumnNullable::Reserve(size_t new_cap) {
nested_->Reserve(new_cap);
nulls_->Reserve(new_cap);
}

void ColumnNullable::Append(ColumnRef column) {
if (auto col = column->As<ColumnNullable>()) {
if (!col->nested_->Type()->IsEqual(nested_->Type())) {
Expand Down
3 changes: 3 additions & 0 deletions clickhouse/columns/nullable.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ class ColumnNullable : public Column {
ColumnRef Nulls() const;

public:
/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends content of given column to the end of current one.
void Append(ColumnRef column) override;

Expand Down
6 changes: 3 additions & 3 deletions clickhouse/columns/numeric.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ class ColumnVector : public Column {
explicit ColumnVector(const std::vector<T>& data);
explicit ColumnVector(std::vector<T> && data);

/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends one element to the end of column.
void Append(const T& value);

Expand All @@ -33,9 +36,6 @@ class ColumnVector : public Column {
/// Get Raw Vector Contents
std::vector<T>& GetWritableData();

/// Increase the capacity of the column
void Reserve(size_t new_cap);

/// Returns the capacity of the column
size_t Capacity() const;

Expand Down
20 changes: 16 additions & 4 deletions clickhouse/columns/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ ColumnFixedString::ColumnFixedString(size_t n)
{
}

void ColumnFixedString::Reserve(size_t new_cap) {
data_.reserve(string_size_ * new_cap);
}

void ColumnFixedString::Append(std::string_view str) {
if (str.size() > string_size_) {
throw ValidationError("Expected string of length not greater than "
Expand All @@ -45,8 +49,10 @@ void ColumnFixedString::Append(std::string_view str) {

data_.insert(data_.size(), str);
// Pad up to string_size_ with zeroes.
const auto padding_size = string_size_ - str.size();
data_.resize(data_.size() + padding_size, char(0));
if (str.size() < string_size_) {
const auto padding_size = string_size_ - str.size();
data_.resize(data_.size() + padding_size, char(0));
}
}

void ColumnFixedString::Clear() {
Expand Down Expand Up @@ -160,8 +166,8 @@ ColumnString::ColumnString(size_t element_count)
: Column(Type::CreateString())
{
items_.reserve(element_count);
// 100 is arbitrary number, assumption that string values are about ~40 bytes long.
blocks_.reserve(std::max<size_t>(1, element_count / 100));
// 16 is arbitrary number, assumption that string values are about ~256 bytes long.
blocks_.reserve(std::max<size_t>(1, element_count / 16));
}

ColumnString::ColumnString(const std::vector<std::string>& data)
Expand Down Expand Up @@ -190,6 +196,12 @@ ColumnString::ColumnString(std::vector<std::string>&& data)
ColumnString::~ColumnString()
{}

void ColumnString::Reserve(size_t new_cap) {
items_.reserve(new_cap);
// 16 is arbitrary number, assumption that string values are about ~256 bytes long.
blocks_.reserve(std::max<size_t>(1, new_cap / 16));
}

void ColumnString::Append(std::string_view str) {
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) {
blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size()));
Expand Down
6 changes: 6 additions & 0 deletions clickhouse/columns/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ class ColumnFixedString : public Column {
Append(v);
}

/// Increase the capacity of the column for large block insertion.
void Reserve(size_t) override;

/// Appends one element to the column.
void Append(std::string_view str);

Expand Down Expand Up @@ -84,6 +87,9 @@ class ColumnString : public Column {
ColumnString& operator=(const ColumnString&) = delete;
ColumnString(const ColumnString&) = delete;

/// Increase the capacity of the column for large block insertion.
void Reserve(size_t new_cap) override;

/// Appends one element to the column.
void Append(std::string_view str);

Expand Down
6 changes: 6 additions & 0 deletions clickhouse/columns/tuple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ size_t ColumnTuple::TupleSize() const {
return columns_.size();
}

void ColumnTuple::Reserve(size_t new_cap) {
for (auto& column : columns_) {
column->Reserve(new_cap);
}
}

void ColumnTuple::Append(ColumnRef column) {
if (!this->Type()->IsEqual(column->Type())) {
throw ValidationError(
Expand Down
Loading

0 comments on commit 0f8b396

Please sign in to comment.