Skip to content

Commit

Permalink
[feature](decimal) support decimal256
Browse files Browse the repository at this point in the history
  • Loading branch information
jacktengg committed Oct 23, 2023
1 parent 13780e4 commit f5736e9
Show file tree
Hide file tree
Showing 160 changed files with 5,704 additions and 327 deletions.
12 changes: 12 additions & 0 deletions be/src/common/consts.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,20 @@ const std::string ROWID_COL = "__DORIS_ROWID_COL__";
const std::string ROW_STORE_COL = "__DORIS_ROW_STORE_COL__";
const std::string DYNAMIC_COLUMN_NAME = "__DORIS_DYNAMIC_COL__";

/// The maximum precision representable by a 4-byte decimal (Decimal4Value)
constexpr int MAX_DECIMAL32_PRECISION = 9;
/// The maximum precision representable by a 8-byte decimal (Decimal8Value)
constexpr int MAX_DECIMAL64_PRECISION = 18;
/// The maximum precision representable by a 16-byte decimal
constexpr int MAX_DECIMAL128_PRECISION = 38;
/// The maximum precision representable by a 32-byte decimal
constexpr int MAX_DECIMAL256_PRECISION = 76;

/// Must be kept in sync with FE's max precision/scale.
static constexpr int MAX_DECIMALV2_PRECISION = MAX_DECIMAL128_PRECISION;
static constexpr int MAX_DECIMALV2_SCALE = MAX_DECIMALV2_PRECISION;

static constexpr int MAX_DECIMALV3_PRECISION = MAX_DECIMAL256_PRECISION;
static constexpr int MAX_DECIMALV3_SCALE = MAX_DECIMALV3_PRECISION;
} // namespace BeConsts
} // namespace doris
21 changes: 11 additions & 10 deletions be/src/exec/olap_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ std::string cast_to_string(T value, int scale) {
return ((vectorized::Decimal<int64_t>)value).to_string(scale);
} else if constexpr (primitive_type == TYPE_DECIMAL128I) {
return ((vectorized::Decimal<int128_t>)value).to_string(scale);
} else if constexpr (primitive_type == TYPE_DECIMAL256) {
return ((vectorized::Decimal<Int256>)value).to_string(scale);
} else if constexpr (primitive_type == TYPE_TINYINT) {
return std::to_string(static_cast<int>(value));
} else if constexpr (primitive_type == TYPE_LARGEINT) {
Expand Down Expand Up @@ -501,16 +503,15 @@ class OlapScanKeys {
bool _is_convertible;
};

using ColumnValueRangeType =
std::variant<ColumnValueRange<TYPE_TINYINT>, ColumnValueRange<TYPE_SMALLINT>,
ColumnValueRange<TYPE_INT>, ColumnValueRange<TYPE_BIGINT>,
ColumnValueRange<TYPE_LARGEINT>, ColumnValueRange<TYPE_CHAR>,
ColumnValueRange<TYPE_VARCHAR>, ColumnValueRange<TYPE_STRING>,
ColumnValueRange<TYPE_DATE>, ColumnValueRange<TYPE_DATEV2>,
ColumnValueRange<TYPE_DATETIME>, ColumnValueRange<TYPE_DATETIMEV2>,
ColumnValueRange<TYPE_DECIMALV2>, ColumnValueRange<TYPE_BOOLEAN>,
ColumnValueRange<TYPE_HLL>, ColumnValueRange<TYPE_DECIMAL32>,
ColumnValueRange<TYPE_DECIMAL64>, ColumnValueRange<TYPE_DECIMAL128I>>;
using ColumnValueRangeType = std::variant<
ColumnValueRange<TYPE_TINYINT>, ColumnValueRange<TYPE_SMALLINT>, ColumnValueRange<TYPE_INT>,
ColumnValueRange<TYPE_BIGINT>, ColumnValueRange<TYPE_LARGEINT>, ColumnValueRange<TYPE_CHAR>,
ColumnValueRange<TYPE_VARCHAR>, ColumnValueRange<TYPE_STRING>, ColumnValueRange<TYPE_DATE>,
ColumnValueRange<TYPE_DATEV2>, ColumnValueRange<TYPE_DATETIME>,
ColumnValueRange<TYPE_DATETIMEV2>, ColumnValueRange<TYPE_DECIMALV2>,
ColumnValueRange<TYPE_BOOLEAN>, ColumnValueRange<TYPE_HLL>,
ColumnValueRange<TYPE_DECIMAL32>, ColumnValueRange<TYPE_DECIMAL64>,
ColumnValueRange<TYPE_DECIMAL128I>, ColumnValueRange<TYPE_DECIMAL256>>;

template <PrimitiveType primitive_type>
const typename ColumnValueRange<primitive_type>::CppType
Expand Down
6 changes: 6 additions & 0 deletions be/src/exec/schema_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,12 @@ Status SchemaScanner::fill_dest_column_for_range(vectorized::Block* block, size_
reinterpret_cast<const char*>(&num), 0);
break;
}
// case TYPE_DECIMAL256: {
// const vectorized::Int256 num = (reinterpret_cast<PackedInt256*>(data))->value;
// reinterpret_cast<vectorized::ColumnDecimal256*>(col_ptr)->insert_data(
// reinterpret_cast<const char*>(&num), 0);
// break;
// }

case TYPE_DECIMAL32: {
const int32_t num = *reinterpret_cast<int32_t*>(data);
Expand Down
4 changes: 3 additions & 1 deletion be/src/exec/schema_scanner/schema_columns_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ std::string SchemaColumnsScanner::_to_mysql_data_type_string(TColumnDesc& desc)
case TPrimitiveType::DECIMAL32:
case TPrimitiveType::DECIMAL64:
case TPrimitiveType::DECIMAL128I:
case TPrimitiveType::DECIMAL256:
case TPrimitiveType::DECIMALV2: {
return "decimal";
}
Expand Down Expand Up @@ -208,7 +209,8 @@ std::string SchemaColumnsScanner::_type_to_string(TColumnDesc& desc) {
}
case TPrimitiveType::DECIMAL32:
case TPrimitiveType::DECIMAL64:
case TPrimitiveType::DECIMAL128I: {
case TPrimitiveType::DECIMAL128I:
case TPrimitiveType::DECIMAL256: {
fmt::memory_buffer debug_string_buffer;
fmt::format_to(
debug_string_buffer, "decimalv3({}, {})",
Expand Down
3 changes: 2 additions & 1 deletion be/src/exec/table_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,8 @@ Status TableConnector::convert_column_data(const vectorized::ColumnPtr& column_p
}
case TYPE_DECIMAL32:
case TYPE_DECIMAL64:
case TYPE_DECIMAL128I: {
case TYPE_DECIMAL128I:
case TYPE_DECIMAL256: {
auto decimal_type = remove_nullable(type_ptr);
auto val = decimal_type->to_string(*column, row);
fmt::format_to(_insert_stmt_buffer, "{}", val);
Expand Down
13 changes: 13 additions & 0 deletions be/src/exec/text_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "vec/columns/column_struct.h"
#include "vec/columns/column_vector.h"
#include "vec/core/types.h"
#include "vec/core/wide_integer.h"
#include "vec/runtime/vdatetime_value.h"

namespace doris {
Expand Down Expand Up @@ -290,6 +291,18 @@ bool TextConverter::_write_data(const TypeDescriptor& type_desc,
.resize_fill(origin_size + rows, value);
break;
}
case TYPE_DECIMAL256: {
StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
wide::Int256 value = StringParser::string_to_decimal<TYPE_DECIMAL256>(
data, len, type_desc.precision, type_desc.scale, &result);
if (result != StringParser::PARSE_SUCCESS) {
parse_result = StringParser::PARSE_FAILURE;
break;
}
reinterpret_cast<vectorized::ColumnVector<wide::Int256>*>(col_ptr)->get_data().resize_fill(
origin_size + rows, value);
break;
}
case TYPE_ARRAY: {
auto col = reinterpret_cast<vectorized::ColumnArray*>(col_ptr);

Expand Down
8 changes: 3 additions & 5 deletions be/src/exprs/create_predicate_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,7 @@ class MinmaxFunctionTraits {
using BasePtr = MinMaxFuncBase*;
template <PrimitiveType type, size_t N>
static BasePtr get_function() {
return new MinMaxNumFunc<std::conditional_t<
type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 || type == TYPE_DECIMAL128I,
vectorized::Decimal<typename PrimitiveTypeTraits<type>::CppType>,
typename PrimitiveTypeTraits<type>::CppType>>();
return new MinMaxNumFunc<typename PrimitiveTypeTraits<type>::CppType>();
}
};

Expand Down Expand Up @@ -106,7 +103,8 @@ class PredicateFunctionCreator {
M(TYPE_STRING) \
M(TYPE_DECIMAL32) \
M(TYPE_DECIMAL64) \
M(TYPE_DECIMAL128I)
M(TYPE_DECIMAL128I) \
M(TYPE_DECIMAL256)

template <class Traits, size_t N = 0>
typename Traits::BasePtr create_predicate_function(PrimitiveType type) {
Expand Down
43 changes: 43 additions & 0 deletions be/src/exprs/runtime_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
#include "vec/columns/column.h"
#include "vec/columns/column_complex.h"
#include "vec/common/assert_cast.h"
#include "vec/core/wide_integer.h"
#include "vec/core/wide_integer_to_string.h"
#include "vec/exprs/vbitmap_predicate.h"
#include "vec/exprs/vbloom_predicate.h"
#include "vec/exprs/vdirect_in_predicate.h"
Expand Down Expand Up @@ -99,6 +101,8 @@ PColumnType to_proto(PrimitiveType type) {
return PColumnType::COLUMN_TYPE_DECIMAL64;
case TYPE_DECIMAL128I:
return PColumnType::COLUMN_TYPE_DECIMAL128I;
case TYPE_DECIMAL256:
return PColumnType::COLUMN_TYPE_DECIMAL256;
case TYPE_CHAR:
return PColumnType::COLUMN_TYPE_CHAR;
case TYPE_VARCHAR:
Expand Down Expand Up @@ -148,6 +152,8 @@ PrimitiveType to_primitive_type(PColumnType type) {
return TYPE_DECIMAL64;
case PColumnType::COLUMN_TYPE_DECIMAL128I:
return TYPE_DECIMAL128I;
case PColumnType::COLUMN_TYPE_DECIMAL256:
return TYPE_DECIMAL256;
case PColumnType::COLUMN_TYPE_VARCHAR:
return TYPE_VARCHAR;
case PColumnType::COLUMN_TYPE_CHAR:
Expand Down Expand Up @@ -790,6 +796,18 @@ class RuntimePredicateWrapper {
});
break;
}
case TYPE_DECIMAL256: {
batch_assign(in_filter, [](std::shared_ptr<HybridSetBase>& set, PColumnValue& column,
ObjectPool* pool) {
auto string_val = column.stringval();
StringParser::ParseResult result;
auto int_val = StringParser::string_to_int<wide::Int256>(
string_val.c_str(), string_val.length(), &result);
DCHECK(result == StringParser::PARSE_SUCCESS);
set->insert(&int_val);
});
break;
}
case TYPE_VARCHAR:
case TYPE_CHAR:
case TYPE_STRING: {
Expand Down Expand Up @@ -923,6 +941,18 @@ class RuntimePredicateWrapper {
DCHECK(result == StringParser::PARSE_SUCCESS);
return _context.minmax_func->assign(&min_val, &max_val);
}
case TYPE_DECIMAL256: {
auto min_string_val = minmax_filter->min_val().stringval();
auto max_string_val = minmax_filter->max_val().stringval();
StringParser::ParseResult result;
auto min_val = StringParser::string_to_int<wide::Int256>(
min_string_val.c_str(), min_string_val.length(), &result);
DCHECK(result == StringParser::PARSE_SUCCESS);
auto max_val = StringParser::string_to_int<wide::Int256>(
max_string_val.c_str(), max_string_val.length(), &result);
DCHECK(result == StringParser::PARSE_SUCCESS);
return _context.minmax_func->assign(&min_val, &max_val);
}
case TYPE_VARCHAR:
case TYPE_CHAR:
case TYPE_STRING: {
Expand Down Expand Up @@ -1578,6 +1608,12 @@ void IRuntimeFilter::to_protobuf(PInFilter* filter) {
});
return;
}
case TYPE_DECIMAL256: {
batch_copy<wide::Int256>(filter, it, [](PColumnValue* column, const wide::Int256* value) {
column->set_stringval(wide::to_string(*value));
});
return;
}
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_STRING: {
Expand Down Expand Up @@ -1686,6 +1722,13 @@ void IRuntimeFilter::to_protobuf(PMinMaxFilter* filter) {
LargeIntValue::to_string(*reinterpret_cast<const int128_t*>(max_data)));
return;
}
case TYPE_DECIMAL256: {
filter->mutable_min_val()->set_stringval(
wide::to_string(*reinterpret_cast<const wide::Int256*>(min_data)));
filter->mutable_max_val()->set_stringval(
wide::to_string(*reinterpret_cast<const wide::Int256*>(max_data)));
return;
}
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_STRING: {
Expand Down
12 changes: 12 additions & 0 deletions be/src/gutil/endian.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "gutil/int128.h"
#include "gutil/integral_types.h"
#include "gutil/port.h"
#include "vec/core/wide_integer.h"

inline uint64 gbswap_64(uint64 host_int) {
#if defined(__GNUC__) && defined(__x86_64__) && !defined(__APPLE__)
Expand All @@ -59,6 +60,11 @@ inline unsigned __int128 gbswap_128(unsigned __int128 host_int) {
(static_cast<unsigned __int128>(bswap_64(static_cast<uint64>(host_int))) << 64);
}

inline wide::UInt256 gbswap_256(wide::UInt256 host_int) {
wide::UInt256 result{gbswap_64(host_int.items[0]), gbswap_64(host_int.items[1]), gbswap_64(host_int.items[2]), gbswap_64(host_int.items[3])};
return result;
}

// Swap bytes of a 24-bit value.
inline uint32_t bswap_24(uint32_t x) {
return ((x & 0x0000ffULL) << 16) | ((x & 0x00ff00ULL)) | ((x & 0xff0000ULL) >> 16);
Expand Down Expand Up @@ -252,6 +258,9 @@ class BigEndian {
static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); }
static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); }

static wide::UInt256 FromHost256(wide::UInt256 x) { return gbswap_256(x); }
static wide::UInt256 ToHost256(wide::UInt256 x) { return gbswap_256(x); }

static bool IsLittleEndian() { return true; }

#elif defined IS_BIG_ENDIAN
Expand All @@ -271,6 +280,9 @@ class BigEndian {
static uint128 FromHost128(uint128 x) { return x; }
static uint128 ToHost128(uint128 x) { return x; }

static wide::UInt256 FromHost256(wide::UInt256 x) { return x; }
static wide::UInt256 ToHost256(wide::UInt256 x) { return x; }

static bool IsLittleEndian() { return false; }

#endif /* ENDIAN */
Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/delete_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ bool DeleteHandler::is_condition_value_valid(const TabletColumn& column,
return valid_decimal(value_str, column.precision(), column.frac());
case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
return valid_decimal(value_str, column.precision(), column.frac());
case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
return valid_decimal(value_str, column.precision(), column.frac());
case FieldType::OLAP_FIELD_TYPE_CHAR:
case FieldType::OLAP_FIELD_TYPE_VARCHAR:
return value_str.size() <= column.length();
Expand Down
4 changes: 4 additions & 0 deletions be/src/olap/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,8 @@ class FieldFactory {
[[fallthrough]];
case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
[[fallthrough]];
case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
[[fallthrough]];
case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
Field* field = new Field(column);
field->set_precision(column.precision());
Expand Down Expand Up @@ -579,6 +581,8 @@ class FieldFactory {
[[fallthrough]];
case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
[[fallthrough]];
case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
[[fallthrough]];
case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
Field* field = new Field(column);
field->set_precision(column.precision());
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/in_list_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class InListPredicateBase : public ColumnPredicate {
if constexpr (Type == TYPE_STRING || Type == TYPE_CHAR) {
tmp = convert(*col, condition, arena);
} else if constexpr (Type == TYPE_DECIMAL32 || Type == TYPE_DECIMAL64 ||
Type == TYPE_DECIMAL128I) {
Type == TYPE_DECIMAL128I || Type == TYPE_DECIMAL256) {
tmp = convert(*col, condition);
} else {
tmp = convert(condition);
Expand Down
1 change: 1 addition & 0 deletions be/src/olap/key_coder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class KeyCoderResolver {
add_mapping<FieldType::OLAP_FIELD_TYPE_DECIMAL32>();
add_mapping<FieldType::OLAP_FIELD_TYPE_DECIMAL64>();
add_mapping<FieldType::OLAP_FIELD_TYPE_DECIMAL128I>();
add_mapping<FieldType::OLAP_FIELD_TYPE_DECIMAL256>();
}

template <FieldType field_type>
Expand Down
33 changes: 19 additions & 14 deletions be/src/olap/key_coder.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class KeyCoderTraits<
field_type,
typename std::enable_if<
std::is_integral<typename CppTypeTraits<field_type>::CppType>::value ||
field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 ||
vectorized::IsDecimalNumber<typename CppTypeTraits<field_type>::CppType>>::type> {
public:
using CppType = typename CppTypeTraits<field_type>::CppType;
Expand All @@ -93,20 +94,24 @@ class KeyCoderTraits<
private:
// Swap value's endian from/to big endian
static UnsignedCppType swap_big_endian(UnsignedCppType val) {
switch (sizeof(UnsignedCppType)) {
case 1:
return val;
case 2:
return BigEndian::FromHost16(val);
case 4:
return BigEndian::FromHost32(val);
case 8:
return BigEndian::FromHost64(val);
case 16:
return BigEndian::FromHost128(val);
default:
LOG(FATAL) << "Invalid type to big endian, type=" << int(field_type)
<< ", size=" << sizeof(UnsignedCppType);
if constexpr (field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256) {
return BigEndian::FromHost256(val);
} else {
switch (sizeof(UnsignedCppType)) {
case 1:
return val;
case 2:
return BigEndian::FromHost16(val);
case 4:
return BigEndian::FromHost32(val);
case 8:
return BigEndian::FromHost64(val);
case 16:
return BigEndian::FromHost128(val);
default:
LOG(FATAL) << "Invalid type to big endian, type=" << int(field_type)
<< ", size=" << sizeof(UnsignedCppType);
}
}
}

Expand Down
4 changes: 3 additions & 1 deletion be/src/olap/olap_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ enum class FieldType {
OLAP_FIELD_TYPE_DECIMAL128I = 33,
OLAP_FIELD_TYPE_JSONB = 34,
OLAP_FIELD_TYPE_VARIANT = 35,
OLAP_FIELD_TYPE_AGG_STATE = 36
OLAP_FIELD_TYPE_AGG_STATE = 36,
OLAP_FIELD_TYPE_DECIMAL256 = 37,
};

// Define all aggregation methods supported by Field
Expand Down Expand Up @@ -197,6 +198,7 @@ constexpr bool field_is_numeric_type(const FieldType& field_type) {
field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL32 ||
field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL64 ||
field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I ||
field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 ||
field_type == FieldType::OLAP_FIELD_TYPE_BOOL;
}

Expand Down
Loading

0 comments on commit f5736e9

Please sign in to comment.