diff --git a/clickhouse/types/types.cpp b/clickhouse/types/types.cpp index e926643e..f9ac8ca9 100644 --- a/clickhouse/types/types.cpp +++ b/clickhouse/types/types.cpp @@ -1,10 +1,15 @@ #include "types.h" +#include + #include namespace clickhouse { -Type::Type(const Code code) : code_(code) {} +Type::Type(const Code code) + : code_(code) + , type_unique_id_(0) +{} std::string Type::GetName() const { switch (code_) { @@ -72,6 +77,62 @@ std::string Type::GetName() const { return std::string(); } +uint64_t Type::GetTypeUniqueId() const { + // Helper method to optimize equality checks of types with Type::IsEqual(), + // base invariant: types with same names produce same unique id (and hence considered equal). + // As an optimization, full type name is constructed at most once, and only for complex types. + switch (code_) { + case Void: + case Int8: + case Int16: + case Int32: + case Int64: + case Int128: + case UInt8: + case UInt16: + case UInt32: + case UInt64: + case UUID: + case Float32: + case Float64: + case String: + case IPv4: + case IPv6: + case Date: + // For simple types, unique ID is the same as Type::Code + return code_; + + case FixedString: + case DateTime: + case DateTime64: + case Array: + case Nullable: + case Tuple: + case Enum8: + case Enum16: + case Decimal: + case Decimal32: + case Decimal64: + case Decimal128: + case LowCardinality: { + // For complex types, exact unique ID depends on nested types and/or parameters, + // the easiest way is to lazy-compute unique ID from name once. + // Here we do not care if multiple threads are computing value simultaneosly since it is both: + // 1. going to be the same + // 2. going to be stored atomically + + if (type_unique_id_ == 0) { + const auto name = GetName(); + type_unique_id_ = CityHash64WithSeed(name.c_str(), name.size(), code_); + } + + return type_unique_id_; + } + } + assert(false); + return 0; +} + TypeRef Type::CreateArray(TypeRef item_type) { return TypeRef(new ArrayType(item_type)); } diff --git a/clickhouse/types/types.h b/clickhouse/types/types.h index 4592d5e5..8f4e50e4 100644 --- a/clickhouse/types/types.h +++ b/clickhouse/types/types.h @@ -2,6 +2,7 @@ #include "absl/numeric/int128.h" +#include #include #include #include @@ -73,7 +74,12 @@ class Type { std::string GetName() const; /// Is given type same as current one. - bool IsEqual(const Type& other) const { return this->GetName() == other.GetName(); } + bool IsEqual(const Type& other) const { + return this == &other + // GetTypeUniqueId() is relatively heavy, so avoid calling it when comparing obviously different types. + || (this->GetCode() == other.GetCode() && this->GetTypeUniqueId() == other.GetTypeUniqueId()); + } + bool IsEqual(const TypeRef& other) const { return IsEqual(*other); } public: @@ -113,7 +119,10 @@ class Type { static TypeRef CreateLowCardinality(TypeRef item_type); private: + uint64_t GetTypeUniqueId() const; + const Code code_; + mutable std::atomic type_unique_id_; }; inline bool operator==(const Type & left, const Type & right) { diff --git a/ut/types_ut.cpp b/ut/types_ut.cpp index 98d9a800..11182314 100644 --- a/ut/types_ut.cpp +++ b/ut/types_ut.cpp @@ -1,4 +1,6 @@ #include +#include + #include using namespace clickhouse; @@ -70,3 +72,57 @@ TEST(TypesCase, EnumTypesEmpty) { TEST(TypesCase, DecimalTypes) { // TODO: implement this test. } + +TEST(TypesCase, IsEqual) { + const std::string type_names[] = { + "UInt8", + "Int8", + "UInt128", + "String", + "FixedString(0)", + "FixedString(10000)", + "DateTime('UTC')", + "DateTime64(3, 'UTC')", + "Decimal(9,3)", + "Decimal(18,3)", + "Enum8()", + "Enum16()", + "Enum8('ONE' = 1)", + "Enum8('ONE' = 1, 'TWO' = 2)", + "Enum16('ONE' = 1, 'TWO' = 2, 'THREE' = 3, 'FOUR' = 4)", + "Nullable(FixedString(10000))", + "Nullable(LowCardinality(FixedString(10000)))", + "Array(Int8)", + "Array(UInt8)", + "Array(String)", + "Array(Nullable(LowCardinality(FixedString(10000))))", + "Array(Enum8('ONE' = 1, 'TWO' = 2))" + "Tuple(String, Int8, Date, DateTime)", + "Nullable(Tuple(String, Int8, Date, DateTime))", + "Array(Nullable(Tuple(String, Int8, Date, DateTime)))", + "Array(Array(Nullable(Tuple(String, Int8, Date, DateTime))))", + "Array(Array(Array(Nullable(Tuple(String, Int8, Date, DateTime)))))", + "Array(Array(Array(Array(Nullable(Tuple(String, Int8, Date, DateTime('UTC')))))))" + "Array(Array(Array(Array(Nullable(Tuple(String, Int8, Date, DateTime('UTC'), Tuple(LowCardinality(String), Enum8('READ'=1, 'WRITE'=0))))))))", + }; + + // Check that Type::IsEqual returns true only if: + // - same Type instance + // - same Type layout (matching outer type with all nested types and/or parameters) + for (const auto & type_name : type_names) { + SCOPED_TRACE(type_name); + const auto type = clickhouse::CreateColumnByType(type_name)->Type(); + + // Should be equal to itself + EXPECT_TRUE(type->IsEqual(type)); + EXPECT_TRUE(type->IsEqual(*type)); + + for (const auto & other_type_name : type_names) { + const auto other_type = clickhouse::CreateColumnByType(other_type_name)->Type(); + + const auto should_be_equal = type_name == other_type_name; + EXPECT_EQ(should_be_equal, type->IsEqual(other_type)) + << "For types: " << type_name << " and " << other_type_name; + } + } +}