diff --git a/clickhouse/columns/array.cpp b/clickhouse/columns/array.cpp index 5e5e72e7..1867d778 100644 --- a/clickhouse/columns/array.cpp +++ b/clickhouse/columns/array.cpp @@ -52,6 +52,11 @@ ColumnRef ColumnArray::CloneEmpty() const { return std::make_shared(data_->CloneEmpty()); } +void ColumnArray::Reserve(size_t new_cap) { + data_->Reserve(new_cap); + offsets_->Reserve(new_cap); +} + void ColumnArray::Append(ColumnRef column) { if (auto col = column->As()) { for (size_t i = 0; i < col->Size(); ++i) { diff --git a/clickhouse/columns/array.h b/clickhouse/columns/array.h index 0ea33d5a..ea51c778 100644 --- a/clickhouse/columns/array.h +++ b/clickhouse/columns/array.h @@ -47,6 +47,9 @@ class ColumnArray : public Column { } public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/column.h b/clickhouse/columns/column.h index b54cbdee..475df89a 100644 --- a/clickhouse/columns/column.h +++ b/clickhouse/columns/column.h @@ -52,6 +52,9 @@ class Column : public std::enable_shared_from_this { /// Appends content of given column to the end of current one. virtual void Append(ColumnRef column) = 0; + /// Increase the capacity of the column for large block insertion. + virtual void Reserve(size_t new_cap) = 0; + /// Template method to load column data from input stream. It'll call LoadPrefix and LoadBody. /// Should be called only once from the client. Derived classes should not call it. bool Load(InputStream* input, size_t rows); diff --git a/clickhouse/columns/date.cpp b/clickhouse/columns/date.cpp index c3476c7f..132c6fc7 100644 --- a/clickhouse/columns/date.cpp +++ b/clickhouse/columns/date.cpp @@ -303,6 +303,11 @@ std::string ColumnDateTime64::Timezone() const { return type_->As()->Timezone(); } +void ColumnDateTime64::Reserve(size_t new_cap) +{ + data_->Reserve(new_cap); +} + void ColumnDateTime64::Append(ColumnRef column) { if (auto col = column->As()) { data_->Append(col->data_); diff --git a/clickhouse/columns/date.h b/clickhouse/columns/date.h index c6e32234..bf501723 100644 --- a/clickhouse/columns/date.h +++ b/clickhouse/columns/date.h @@ -34,8 +34,8 @@ class ColumnDate : public Column { /// Get Raw Vector Contents std::vector& GetWritableData(); - /// Increase the capacity of the column - void Reserve(size_t new_cap); + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; /// Returns the capacity of the column size_t Capacity() const; @@ -79,9 +79,6 @@ class ColumnDate32 : public Column { /// The implementation is fundamentally wrong, ignores timezones, leap years and daylight saving. std::time_t At(size_t n) const; - /// Appends content of given column to the end of current one. - void Append(ColumnRef column) override; - inline std::time_t operator [] (size_t n) const { return At(n); } /// Do append data as is -- number of day in Unix epoch (32bit signed), no conversions performed. @@ -91,12 +88,16 @@ class ColumnDate32 : public Column { /// Get Raw Vector Contents std::vector& GetWritableData(); - /// Increase the capacity of the column - void Reserve(size_t new_cap); - /// Returns the capacity of the column size_t Capacity() const; +public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + + /// Appends content of given column to the end of current one. + void Append(ColumnRef column) override; + /// Loads column data from input stream. bool LoadBody(InputStream* input, size_t rows) override; @@ -148,13 +149,13 @@ class ColumnDateTime : public Column { /// Get Raw Vector Contents std::vector& GetWritableData(); - /// Increase the capacity of the column - void Reserve(size_t new_cap); - /// Returns the capacity of the column size_t Capacity() const; public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; @@ -205,6 +206,9 @@ class ColumnDateTime64 : public Column { std::string Timezone() const; public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/decimal.cpp b/clickhouse/columns/decimal.cpp index d44dc0c0..2d214ecf 100644 --- a/clickhouse/columns/decimal.cpp +++ b/clickhouse/columns/decimal.cpp @@ -191,6 +191,10 @@ Int128 ColumnDecimal::At(size_t i) const { } } +void ColumnDecimal::Reserve(size_t new_cap) { + data_->Reserve(new_cap); +} + void ColumnDecimal::Append(ColumnRef column) { if (auto col = column->As()) { data_->Append(col->data_); diff --git a/clickhouse/columns/decimal.h b/clickhouse/columns/decimal.h index 4b09553a..aa499a12 100644 --- a/clickhouse/columns/decimal.h +++ b/clickhouse/columns/decimal.h @@ -21,6 +21,8 @@ class ColumnDecimal : public Column { inline auto operator[](size_t i) const { return At(i); } public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; void Append(ColumnRef column) override; bool LoadBody(InputStream* input, size_t rows) override; void SaveBody(OutputStream* output) override; diff --git a/clickhouse/columns/enum.cpp b/clickhouse/columns/enum.cpp index c84d9847..43fab893 100644 --- a/clickhouse/columns/enum.cpp +++ b/clickhouse/columns/enum.cpp @@ -68,6 +68,11 @@ void ColumnEnum::SetNameAt(size_t n, const std::string& name) { data_.at(n) = static_cast(type_->As()->GetEnumValue(name)); } +template +void ColumnEnum::Reserve(size_t new_cap) { + data_.reserve(new_cap); +} + template void ColumnEnum::Append(ColumnRef column) { if (auto col = column->As>()) { diff --git a/clickhouse/columns/enum.h b/clickhouse/columns/enum.h index 1d962751..43900f6c 100644 --- a/clickhouse/columns/enum.h +++ b/clickhouse/columns/enum.h @@ -30,6 +30,9 @@ class ColumnEnum : public Column { void SetNameAt(size_t n, const std::string& name); public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/geo.cpp b/clickhouse/columns/geo.cpp index e618fbe5..fa987732 100644 --- a/clickhouse/columns/geo.cpp +++ b/clickhouse/columns/geo.cpp @@ -54,6 +54,11 @@ const typename ColumnGeo::ValueType ColumnGeoAt(n); } +template +void ColumnGeo::Reserve(size_t new_cap) { + data_->Reserve(new_cap); +} + template void ColumnGeo::Append(ColumnRef column) { if (auto col = column->template As()) { diff --git a/clickhouse/columns/geo.h b/clickhouse/columns/geo.h index c3757f8a..1b129739 100644 --- a/clickhouse/columns/geo.h +++ b/clickhouse/columns/geo.h @@ -29,6 +29,9 @@ class ColumnGeo : public Column { inline const ValueType operator[](size_t n) const { return At(n); } public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/ip4.cpp b/clickhouse/columns/ip4.cpp index 5e7ca892..8790afb6 100644 --- a/clickhouse/columns/ip4.cpp +++ b/clickhouse/columns/ip4.cpp @@ -74,6 +74,10 @@ std::string ColumnIPv4::AsString(size_t n) const { return ip_str; } +void ColumnIPv4::Reserve(size_t new_cap) { + data_->Reserve(new_cap); +} + void ColumnIPv4::Append(ColumnRef column) { if (auto col = column->As()) { data_->Append(col->data_); diff --git a/clickhouse/columns/ip4.h b/clickhouse/columns/ip4.h index 103be527..2253e305 100644 --- a/clickhouse/columns/ip4.h +++ b/clickhouse/columns/ip4.h @@ -39,6 +39,9 @@ class ColumnIPv4 : public Column { std::string AsString(size_t n) const; public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/ip6.cpp b/clickhouse/columns/ip6.cpp index 838bcce0..0d47b5e8 100644 --- a/clickhouse/columns/ip6.cpp +++ b/clickhouse/columns/ip6.cpp @@ -65,6 +65,10 @@ in6_addr ColumnIPv6::operator [] (size_t n) const { return *reinterpret_cast(data_->At(n).data()); } +void ColumnIPv6::Reserve(size_t new_cap) { + data_->Reserve(new_cap); +} + void ColumnIPv6::Append(ColumnRef column) { if (auto col = column->As()) { data_->Append(col->data_); diff --git a/clickhouse/columns/ip6.h b/clickhouse/columns/ip6.h index 74d8c1e1..41af0d58 100644 --- a/clickhouse/columns/ip6.h +++ b/clickhouse/columns/ip6.h @@ -35,6 +35,9 @@ class ColumnIPv6 : public Column { std::string AsString(size_t n) const; public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/lowcardinality.cpp b/clickhouse/columns/lowcardinality.cpp index c0c12319..19369d33 100644 --- a/clickhouse/columns/lowcardinality.cpp +++ b/clickhouse/columns/lowcardinality.cpp @@ -174,6 +174,11 @@ ColumnLowCardinality::ColumnLowCardinality(std::shared_ptr dicti ColumnLowCardinality::~ColumnLowCardinality() {} +void ColumnLowCardinality::Reserve(size_t new_cap) { + dictionary_column_->Reserve(new_cap); + index_column_->Reserve(new_cap); +} + void ColumnLowCardinality::Setup(ColumnRef dictionary_column) { AppendDefaultItem(); diff --git a/clickhouse/columns/lowcardinality.h b/clickhouse/columns/lowcardinality.h index afadae22..17e3ce99 100644 --- a/clickhouse/columns/lowcardinality.h +++ b/clickhouse/columns/lowcardinality.h @@ -65,6 +65,9 @@ class ColumnLowCardinality : public Column { ~ColumnLowCardinality(); + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends another LowCardinality column to the end of this one, updating dictionary. void Append(ColumnRef /*column*/) override; diff --git a/clickhouse/columns/map.cpp b/clickhouse/columns/map.cpp index 3f5616df..839b0668 100644 --- a/clickhouse/columns/map.cpp +++ b/clickhouse/columns/map.cpp @@ -33,6 +33,10 @@ ColumnMap::ColumnMap(ColumnRef data) : Column(GetMapType(data->GetType())), data_(data->As()) { } +void ColumnMap::Reserve(size_t new_cap) { + data_->Reserve(new_cap); +} + void ColumnMap::Clear() { data_->Clear(); } diff --git a/clickhouse/columns/map.h b/clickhouse/columns/map.h index ac5dc0a7..4d644802 100644 --- a/clickhouse/columns/map.h +++ b/clickhouse/columns/map.h @@ -25,6 +25,9 @@ class ColumnMap : public Column { */ explicit ColumnMap(ColumnRef data); + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/nothing.h b/clickhouse/columns/nothing.h index 0b28d572..8e1a4e30 100644 --- a/clickhouse/columns/nothing.h +++ b/clickhouse/columns/nothing.h @@ -26,6 +26,9 @@ class ColumnNothing : public Column { { } + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t) override {}; + /// Appends one element to the column. void Append(std::unique_ptr) { ++size_; } diff --git a/clickhouse/columns/nullable.cpp b/clickhouse/columns/nullable.cpp index dd863545..23940c12 100644 --- a/clickhouse/columns/nullable.cpp +++ b/clickhouse/columns/nullable.cpp @@ -34,6 +34,11 @@ ColumnRef ColumnNullable::Nulls() const return nulls_; } +void ColumnNullable::Reserve(size_t new_cap) { + nested_->Reserve(new_cap); + nulls_->Reserve(new_cap); +} + void ColumnNullable::Append(ColumnRef column) { if (auto col = column->As()) { if (!col->nested_->Type()->IsEqual(nested_->Type())) { diff --git a/clickhouse/columns/nullable.h b/clickhouse/columns/nullable.h index c1924af0..1946e8b9 100644 --- a/clickhouse/columns/nullable.h +++ b/clickhouse/columns/nullable.h @@ -27,6 +27,9 @@ class ColumnNullable : public Column { ColumnRef Nulls() const; public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/numeric.h b/clickhouse/columns/numeric.h index 1cbcca9c..e2a7675e 100644 --- a/clickhouse/columns/numeric.h +++ b/clickhouse/columns/numeric.h @@ -19,6 +19,9 @@ class ColumnVector : public Column { explicit ColumnVector(const std::vector& data); explicit ColumnVector(std::vector && data); + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends one element to the end of column. void Append(const T& value); @@ -33,9 +36,6 @@ class ColumnVector : public Column { /// Get Raw Vector Contents std::vector& GetWritableData(); - /// Increase the capacity of the column - void Reserve(size_t new_cap); - /// Returns the capacity of the column size_t Capacity() const; diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 62ec464b..791c2c6c 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -30,6 +30,10 @@ ColumnFixedString::ColumnFixedString(size_t n) { } +void ColumnFixedString::Reserve(size_t new_cap) { + data_.reserve(string_size_ * new_cap); +} + void ColumnFixedString::Append(std::string_view str) { if (str.size() > string_size_) { throw ValidationError("Expected string of length not greater than " @@ -45,8 +49,10 @@ void ColumnFixedString::Append(std::string_view str) { data_.insert(data_.size(), str); // Pad up to string_size_ with zeroes. - const auto padding_size = string_size_ - str.size(); - data_.resize(data_.size() + padding_size, char(0)); + if (str.size() < string_size_) { + const auto padding_size = string_size_ - str.size(); + data_.resize(data_.size() + padding_size, char(0)); + } } void ColumnFixedString::Clear() { @@ -160,8 +166,8 @@ ColumnString::ColumnString(size_t element_count) : Column(Type::CreateString()) { items_.reserve(element_count); - // 100 is arbitrary number, assumption that string values are about ~40 bytes long. - blocks_.reserve(std::max(1, element_count / 100)); + // 16 is arbitrary number, assumption that string values are about ~256 bytes long. + blocks_.reserve(std::max(1, element_count / 16)); } ColumnString::ColumnString(const std::vector& data) @@ -190,6 +196,12 @@ ColumnString::ColumnString(std::vector&& data) ColumnString::~ColumnString() {} +void ColumnString::Reserve(size_t new_cap) { + items_.reserve(new_cap); + // 16 is arbitrary number, assumption that string values are about ~256 bytes long. + blocks_.reserve(std::max(1, new_cap / 16)); +} + void ColumnString::Append(std::string_view str) { if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) { blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size())); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index aa78270e..d6006556 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -27,6 +27,9 @@ class ColumnFixedString : public Column { Append(v); } + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t) override; + /// Appends one element to the column. void Append(std::string_view str); @@ -84,6 +87,9 @@ class ColumnString : public Column { ColumnString& operator=(const ColumnString&) = delete; ColumnString(const ColumnString&) = delete; + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends one element to the column. void Append(std::string_view str); diff --git a/clickhouse/columns/tuple.cpp b/clickhouse/columns/tuple.cpp index 42dc6e63..56858590 100644 --- a/clickhouse/columns/tuple.cpp +++ b/clickhouse/columns/tuple.cpp @@ -20,6 +20,12 @@ size_t ColumnTuple::TupleSize() const { return columns_.size(); } +void ColumnTuple::Reserve(size_t new_cap) { + for (auto& column : columns_) { + column->Reserve(new_cap); + } +} + void ColumnTuple::Append(ColumnRef column) { if (!this->Type()->IsEqual(column->Type())) { throw ValidationError( diff --git a/clickhouse/columns/tuple.h b/clickhouse/columns/tuple.h index c9795565..ebc1b895 100644 --- a/clickhouse/columns/tuple.h +++ b/clickhouse/columns/tuple.h @@ -26,6 +26,9 @@ class ColumnTuple : public Column { } public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/uuid.cpp b/clickhouse/columns/uuid.cpp index 36a7229c..fbaff97d 100644 --- a/clickhouse/columns/uuid.cpp +++ b/clickhouse/columns/uuid.cpp @@ -34,6 +34,10 @@ const UUID ColumnUUID::At(size_t n) const { return UUID(data_->At(n * 2), data_->At(n * 2 + 1)); } +void ColumnUUID::Reserve(size_t new_cap) { + data_->Reserve(new_cap); +} + void ColumnUUID::Append(ColumnRef column) { if (auto col = column->As()) { data_->Append(col->data_); diff --git a/clickhouse/columns/uuid.h b/clickhouse/columns/uuid.h index 4f6c9192..ccd03f84 100644 --- a/clickhouse/columns/uuid.h +++ b/clickhouse/columns/uuid.h @@ -26,6 +26,9 @@ class ColumnUUID : public Column { inline const UUID operator [] (size_t n) const { return At(n); } public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/ut/Column_ut.cpp b/ut/Column_ut.cpp index 9eb3c7cf..bac666ec 100644 --- a/ut/Column_ut.cpp +++ b/ut/Column_ut.cpp @@ -376,25 +376,18 @@ TYPED_TEST(GenericColumnTest, Swap) { #endif TYPED_TEST(GenericColumnTest, ReserveAndCapacity) { - if constexpr ( - // TODO(venemkov): test that ColumnType has Reserve() and Capacity() methods - is_one_of_v) { - + using column_type = typename TestFixture::ColumnType; + auto [column0, values] = this->MakeColumnWithValues(2); + auto values_copy = values; + EXPECT_NO_THROW(column0->Reserve(0u)); + EXPECT_EQ(2u, column0->Size()); + EXPECT_TRUE(CompareRecursive(values, values_copy)); + + auto column1 = this->MakeColumn(); + column1->Reserve(10u); + EXPECT_EQ(0u, column1->Size()); + + if constexpr (has_method_Reserve_v && has_method_Capacity_v) { auto column = this->MakeColumn(); EXPECT_EQ(0u, column->Capacity()); EXPECT_NO_THROW(column->Reserve(100u)); @@ -408,24 +401,7 @@ TYPED_TEST(GenericColumnTest, ReserveAndCapacity) { TYPED_TEST(GenericColumnTest, GetWritableData) { - if constexpr ( - // TODO(venemkov): test that ColumnType has GetWritableData() method - is_one_of_v) { + if constexpr (has_method_GetWritableData_v) { auto [column, values] = this->MakeColumnWithValues(111); // Do conversion from time_t to internal representation, similar to what ColumnDate and ColumnDate32 do if constexpr (is_one_of_v { template inline constexpr bool is_one_of_v = is_one_of::value; + + +#define HAS_METHOD(FUN) \ +template \ +struct has_method_##FUN : std::false_type {}; \ +template \ +struct has_method_##FUN>> \ +: std::true_type {}; \ +template \ +constexpr bool has_method_##FUN##_v = has_method_##FUN::value; + +HAS_METHOD(Reserve); +HAS_METHOD(Capacity); +HAS_METHOD(GetWritableData);