From ffcb52e14aafdd403fa3b1d027c3288351b6cc5d Mon Sep 17 00:00:00 2001 From: eldenmoon <15605149486@163.com> Date: Thu, 29 Aug 2024 19:37:02 +0800 Subject: [PATCH] modify fe modify be --- be/src/olap/rowset/segment_v2/segment.cpp | 5 +- .../segment_v2/vertical_segment_writer.cpp | 32 ++- be/src/olap/tablet_reader.cpp | 1 + be/src/olap/tablet_schema.cpp | 8 +- be/src/olap/tablet_schema.h | 7 + be/src/runtime/types.cpp | 16 ++ be/src/vec/columns/column_object.cpp | 67 ++++-- be/src/vec/columns/column_object.h | 10 +- be/src/vec/columns/subcolumn_tree.h | 1 + be/src/vec/common/schema_util.cpp | 23 +- be/src/vec/core/field.h | 34 ++- .../vec/data_types/convert_field_to_type.cpp | 12 +- be/src/vec/data_types/data_type.h | 9 + be/src/vec/data_types/data_type_array.cpp | 24 ++ be/src/vec/data_types/data_type_array.h | 2 + be/src/vec/data_types/data_type_bitmap.h | 5 + be/src/vec/data_types/data_type_date_time.h | 1 + be/src/vec/data_types/data_type_decimal.h | 7 + be/src/vec/data_types/data_type_factory.cpp | 28 ++- be/src/vec/data_types/data_type_factory.hpp | 6 +- be/src/vec/data_types/data_type_jsonb.h | 9 + be/src/vec/data_types/data_type_nullable.h | 8 + be/src/vec/data_types/data_type_number_base.h | 8 + be/src/vec/data_types/data_type_string.h | 7 + be/src/vec/data_types/data_type_time.h | 7 + be/src/vec/data_types/data_type_time_v2.h | 11 + be/src/vec/data_types/get_least_supertype.cpp | 4 + .../serde/data_type_array_serde.cpp | 38 +-- .../data_types/serde/data_type_array_serde.h | 5 +- .../data_types/serde/data_type_date64_serde.h | 7 + .../serde/data_type_datetimev2_serde.h | 7 + .../data_types/serde/data_type_datev2_serde.h | 7 + .../serde/data_type_decimal_serde.h | 7 + .../data_types/serde/data_type_ipv4_serde.h | 7 + .../data_types/serde/data_type_ipv6_serde.h | 7 + .../serde/data_type_jsonb_serde.cpp | 3 +- .../data_types/serde/data_type_jsonb_serde.h | 2 +- .../serde/data_type_nothing_serde.h | 8 + .../serde/data_type_nullable_serde.cpp | 12 +- .../serde/data_type_nullable_serde.h | 6 +- .../data_types/serde/data_type_number_serde.h | 6 +- .../serde/data_type_object_serde.cpp | 33 +++ .../data_types/serde/data_type_object_serde.h | 5 + .../vec/data_types/serde/data_type_serde.cpp | 86 +------ be/src/vec/data_types/serde/data_type_serde.h | 11 +- .../data_types/serde/data_type_string_serde.h | 2 +- .../data_types/serde/data_type_time_serde.h | 13 ++ be/src/vec/functions/function_cast.h | 31 ++- .../org/apache/doris/catalog/ScalarType.java | 6 +- .../org/apache/doris/catalog/StructField.java | 8 +- .../java/org/apache/doris/catalog/Type.java | 29 +++ .../org/apache/doris/catalog/VariantType.java | 79 ++++++- .../org/apache/doris/nereids/DorisParser.g4 | 8 + fe/fe-core/src/main/cup/sql_parser.cup | 22 ++ .../doris/alter/SchemaChangeHandler.java | 5 +- .../org/apache/doris/analysis/CastExpr.java | 3 + .../doris/analysis/InvertedIndexUtil.java | 4 +- .../java/org/apache/doris/catalog/Column.java | 36 ++- .../nereids/parser/LogicalPlanBuilder.java | 33 ++- .../plans/commands/info/ColumnDefinition.java | 14 ++ .../doris/nereids/types/StructField.java | 19 +- .../doris/nereids/types/VariantType.java | 54 ++++- regression-test/data/variant_p0/nested.out | 18 +- .../data/variant_p0/predefine/load.out | 125 ++++++++++ .../data/variant_p0/predefine/sql/q01.out | 34 +++ .../data/variant_p0/predefine/sql/q02.out | 103 +++++++++ regression-test/suites/variant_p0/load.groovy | 31 ++- .../suites/variant_p0/nested.groovy | 1 - .../suites/variant_p0/predefine/load.groovy | 217 ++++++++++++++++++ .../suites/variant_p0/predefine/sql/q01.sql | 2 + .../suites/variant_p0/predefine/sql/q02.sql | 24 ++ 71 files changed, 1289 insertions(+), 241 deletions(-) create mode 100644 regression-test/data/variant_p0/predefine/load.out create mode 100644 regression-test/data/variant_p0/predefine/sql/q01.out create mode 100644 regression-test/data/variant_p0/predefine/sql/q02.out create mode 100644 regression-test/suites/variant_p0/predefine/load.groovy create mode 100644 regression-test/suites/variant_p0/predefine/sql/q01.sql create mode 100644 regression-test/suites/variant_p0/predefine/sql/q02.sql diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index fe5a4c58ad232c0..558b23535085131 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -1006,10 +1006,7 @@ Status Segment::seek_and_read_by_rowid(const TabletSchema& schema, SlotDescripto } RETURN_IF_ERROR( iterator_hint->read_by_rowids(single_row_loc.data(), 1, file_storage_column)); - // iterator_hint.reset(nullptr); - // Get it's inner field, for JSONB case - vectorized::Field field = remove_nullable(storage_type)->get_default(); - file_storage_column->get(0, field); + vectorized::Field field = storage_type->get_type_field(*file_storage_column, 0); result->insert(field); } else { int index = (slot->col_unique_id() >= 0) ? schema.field_index(slot->col_unique_id()) diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index 5d9275b7742ee8a..0e7e91603b3f9d7 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -17,6 +17,7 @@ #include "olap/rowset/segment_v2/vertical_segment_writer.h" +#include #include #include @@ -42,7 +43,8 @@ #include "olap/olap_common.h" #include "olap/partial_update_info.h" #include "olap/primary_key_index.h" -#include "olap/row_cursor.h" // RowCursor // IWYU pragma: keep +#include "olap/row_cursor.h" // RowCursor // IWYU pragma: keep +#include "olap/rowset/rowset_fwd.h" #include "olap/rowset/rowset_writer_context.h" // RowsetWriterContext #include "olap/rowset/segment_creator.h" #include "olap/rowset/segment_v2/column_writer.h" // ColumnWriter @@ -64,11 +66,15 @@ #include "vec/columns/column_nullable.h" #include "vec/columns/column_vector.h" #include "vec/columns/columns_number.h" +#include "vec/common/hash_table/hash_map_context_creator.h" #include "vec/common/schema_util.h" #include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" #include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_factory.hpp" #include "vec/io/reader_buffer.h" +#include "vec/json/path_in_data.h" #include "vec/jsonb/serialize.h" #include "vec/olap/olap_data_convertor.h" @@ -596,6 +602,10 @@ Status VerticalSegmentWriter::_append_block_with_variant_subcolumns(RowsInBlock& remove_nullable(column_ref)->assume_mutable_ref()); const TabletColumnPtr& parent_column = _tablet_schema->columns()[i]; + std::map typed_columns; + for (const auto& col : parent_column->get_sub_columns()) { + typed_columns[col->name()] = col; + } // generate column info by entry info auto generate_column_info = [&](const auto& entry) { const std::string& column_name = @@ -606,6 +616,12 @@ Status VerticalSegmentWriter::_append_block_with_variant_subcolumns(RowsInBlock& auto full_path = full_path_builder.append(parent_column->name_lower_case(), false) .append(entry->path.get_parts(), false) .build(); + if (typed_columns.contains(entry->path.get_path())) { + TabletColumn typed_column = *typed_columns[entry->path.get_path()]; + typed_column.set_path_info(full_path); + typed_column.set_parent_unique_id(parent_column->unique_id()); + return typed_column; + } return vectorized::schema_util::get_column_by_type( final_data_type_from_object, column_name, vectorized::schema_util::ExtraInfo { @@ -625,14 +641,22 @@ Status VerticalSegmentWriter::_append_block_with_variant_subcolumns(RowsInBlock& CHECK(entry->data.is_finalized()); int current_column_id = column_id++; TabletColumn tablet_column = generate_column_info(entry); + DataTypePtr storage_type = + vectorized::DataTypeFactory::instance().create_data_type(tablet_column); + DataTypePtr finalized_type = entry->data.get_least_common_type(); + vectorized::ColumnPtr current_column = + entry->data.get_finalized_column_ptr()->get_ptr(); + if (!storage_type->equals(*finalized_type)) { + RETURN_IF_ERROR(vectorized::schema_util::cast_column( + {current_column, finalized_type, ""}, storage_type, ¤t_column)); + } vectorized::schema_util::inherit_column_attributes(*parent_column, tablet_column, _flush_schema); RETURN_IF_ERROR(_create_column_writer(current_column_id /*unused*/, tablet_column, _flush_schema)); RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_column( - {entry->data.get_finalized_column_ptr()->get_ptr(), - entry->data.get_least_common_type(), tablet_column.name()}, - data.row_pos, data.num_rows, current_column_id)); + {current_column->get_ptr(), storage_type, tablet_column.name()}, data.row_pos, + data.num_rows, current_column_id)); // convert column data from engine format to storage layer format auto [status, column] = _olap_data_convertor->convert_column_data(current_column_id); if (!status.ok()) { diff --git a/be/src/olap/tablet_reader.cpp b/be/src/olap/tablet_reader.cpp index 9ab9e4b1b365f5d..61a18d04ff52d80 100644 --- a/be/src/olap/tablet_reader.cpp +++ b/be/src/olap/tablet_reader.cpp @@ -276,6 +276,7 @@ TabletColumn TabletReader::materialize_column(const TabletColumn& orig) { cast_type.type); } column_with_cast_type.set_type(filed_type); + column_with_cast_type.set_precision_frac(cast_type.precision, cast_type.scale); return column_with_cast_type; } diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 83b2bd4f7025719..c7789d0c8ed00ec 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -121,6 +121,10 @@ FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) { return FieldType::OLAP_FIELD_TYPE_JSONB; case PrimitiveType::TYPE_VARIANT: return FieldType::OLAP_FIELD_TYPE_VARIANT; + case PrimitiveType::TYPE_IPV4: + return FieldType::OLAP_FIELD_TYPE_IPV4; + case PrimitiveType::TYPE_IPV6: + return FieldType::OLAP_FIELD_TYPE_IPV6; case PrimitiveType::TYPE_LAMBDA_FUNCTION: return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented case PrimitiveType::TYPE_AGG_STATE: @@ -608,8 +612,10 @@ void TabletColumn::to_schema_pb(ColumnPB* column) const { if (_has_default_value) { column->set_default_value(_default_value); } - if (_is_decimal) { + if (_precision >= 0) { column->set_precision(_precision); + } + if (_frac >= 0) { column->set_frac(_frac); } column->set_length(_length); diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index b7fe0e9310183dc..4f6d5a9d6ec6cd5 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -177,6 +178,12 @@ class TabletColumn { const std::vector& sparse_columns() const; size_t num_sparse_columns() const { return _num_sparse_columns; } + void set_precision_frac(int32_t precision, int32_t frac, bool is_decimal = true) { + _precision = precision; + _frac = frac; + _is_decimal = is_decimal; + } + Status check_valid() const { if (type() != FieldType::OLAP_FIELD_TYPE_ARRAY && type() != FieldType::OLAP_FIELD_TYPE_STRUCT && diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index 14ba4b2cebdece8..a18d73c0e9cc56e 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -28,6 +28,7 @@ #include #include "olap/olap_define.h" +#include "runtime/define_primitive_type.h" #include "runtime/primitive_type.h" namespace doris { @@ -108,6 +109,21 @@ TypeDescriptor::TypeDescriptor(const std::vector& types, int* idx) contains_nulls.push_back(node.contains_nulls[1]); break; } + case TTypeNodeType::VARIANT: { + // complex variant type + DCHECK(!node.__isset.scalar_type); + DCHECK_LT(*idx, types.size() - 1); + DCHECK(!node.__isset.contains_nulls); + type = TYPE_VARIANT; + contains_nulls.reserve(node.struct_fields.size()); + for (size_t i = 0; i < node.struct_fields.size(); i++) { + ++(*idx); + children.push_back(TypeDescriptor(types, idx)); + field_names.push_back(node.struct_fields[i].name); + contains_nulls.push_back(node.struct_fields[i].contains_null); + } + break; + } default: DCHECK(false) << node.type; } diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 134b096d8badc17..7924789b5bc5a1e 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -45,6 +45,7 @@ #include "exprs/json_functions.h" #include "olap/olap_common.h" #include "util/defer_op.h" +#include "util/jsonb_utils.h" #include "util/simd/bits.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/aggregate_functions/helpers.h" @@ -73,6 +74,7 @@ #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_object.h" #include "vec/data_types/get_least_supertype.h" +#include "vec/functions/function_binary_arithmetic.h" #include "vec/json/path_in_data.h" #ifdef __AVX2__ @@ -84,14 +86,16 @@ namespace doris::vectorized { namespace { -DataTypePtr create_array_of_type(TypeIndex type, size_t num_dimensions, bool is_nullable) { +DataTypePtr create_array_of_type(TypeIndex type, size_t num_dimensions, bool is_nullable, + int precision = -1, int scale = -1) { if (type == ColumnObject::MOST_COMMON_TYPE_ID) { // JSONB type MUST NOT wrapped in ARRAY column, it should be top level. // So we ignored num_dimensions. return is_nullable ? make_nullable(std::make_shared()) : std::make_shared(); } - DataTypePtr result = DataTypeFactory::instance().create_data_type(type, is_nullable); + DataTypePtr result = + DataTypeFactory::instance().create_data_type(type, is_nullable, precision, scale); for (size_t i = 0; i < num_dimensions; ++i) { result = std::make_shared(result); if (is_nullable) { @@ -341,7 +345,44 @@ void get_field_info_impl(const Field& field, FieldInfo* info) { }; } +void get_base_field_info(const Field& field, FieldInfo* info) { + if (field.get_type_id() == TypeIndex::Array) { + if (field.safe_get().empty()) { + info->scalar_type_id = TypeIndex::Nothing; + ++info->num_dimensions; + info->have_nulls = true; + info->need_convert = false; + } else { + ++info->num_dimensions; + get_base_field_info(field.safe_get()[0], info); + } + return; + } + + // handle scalar types + info->scalar_type_id = field.get_type_id(); + info->have_nulls = true; + info->need_convert = false; + info->scale = field.get_scale(); + info->precision = field.get_precision(); + + // Currently the jsonb type should be the top level type, so we should not wrap it in array, + // see create_array_of_type. + // TODO we need to support array correctly + if (UNLIKELY(field.get_type_id() == TypeIndex::JSONB && info->num_dimensions > 0)) { + info->num_dimensions = 0; + info->need_convert = true; + } +} + void get_field_info(const Field& field, FieldInfo* info) { + if (field.get_type_id() != TypeIndex::Nothing) { + // Currently we support specify predefined schema for other types include decimal, datetime ...etc + // so we should set specified info to create correct types, and those predefined types are static and + // type no need to deduce + get_base_field_info(field, info); + return; + } if (field.is_complex_field()) { get_field_info_impl(field, info); } else { @@ -424,7 +465,11 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info) { type_changed = true; } if (data.empty()) { - add_new_column_part(create_array_of_type(base_type.idx, value_dim, is_nullable)); + // Currently we support specify predefined schema for other types include decimal, datetime ...etc + // so we should set specified info to create correct types, and those predefined types are static and + // no conflict, so we can set them directly. + add_new_column_part(create_array_of_type(base_type.idx, value_dim, is_nullable, + info.precision, info.scale)); } else if (least_common_type.get_base_type_id() != base_type.idx && !base_type.is_nothing()) { if (schema_util::is_conversion_required_between_integers( base_type.idx, least_common_type.get_base_type_id())) { @@ -947,14 +992,9 @@ void ColumnObject::Subcolumn::get(size_t n, Field& res) const { return; } if (is_finalized()) { - if (least_common_type.get_base_type_id() == TypeIndex::JSONB) { - // JsonbFiled is special case - res = JsonbField(); - } - get_finalized_column().get(n, res); + res = get_least_common_type()->get_type_field(get_finalized_column(), n); return; } - size_t ind = n; if (ind < num_of_defaults_in_prefix) { res = least_common_type.get()->get_default(); @@ -1347,12 +1387,6 @@ Status find_and_set_leave_value(const IColumn* column, const PathInData& path, rapidjson::Value& root, rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, int row) { - // sanitize type and column - if (column->get_name() != type->create_column()->get_name()) { - return Status::InternalError( - "failed to set value for path {}, expected type {}, but got {} at row {}", - path.get_path(), type->get_name(), column->get_name(), row); - } const auto* nullable = check_and_get_column(column); if (skip_empty_json(nullable, type, row, path)) { return Status::OK(); @@ -1367,7 +1401,8 @@ Status find_and_set_leave_value(const IColumn* column, const PathInData& path, << ", root: " << std::string(buffer.GetString(), buffer.GetSize()); return Status::NotFound("Not found path {}", path.get_path()); } - RETURN_IF_ERROR(type_serde->write_one_cell_to_json(*column, *target, allocator, mem_pool, row)); + RETURN_IF_ERROR( + type_serde->write_one_cell_to_json(*column, *target, allocator, mem_pool, row, type)); return Status::OK(); } diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 96a27e44e92a2c9..a1d0848fed86710 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -72,7 +72,11 @@ struct FieldInfo { /// we need to convert scalars to the common type. bool need_convert; /// Number of dimension in array. 0 if field is scalar. - size_t num_dimensions; + size_t num_dimensions = 0; + + // decimal info + int scale = 0; + int precision = 0; }; void get_field_info(const Field& field, FieldInfo* info); @@ -120,6 +124,10 @@ class ColumnObject final : public COWHelper { const DataTypePtr& get_least_common_type() const { return least_common_type.get(); } + const TypeIndex& get_least_common_base_type_id() const { + return least_common_type.get_base_type_id(); + } + const DataTypePtr& get_least_common_typeBase() const { return least_common_type.get_base(); } diff --git a/be/src/vec/columns/subcolumn_tree.h b/be/src/vec/columns/subcolumn_tree.h index 81e835b58c45c1f..4768567ac7eaa0c 100644 --- a/be/src/vec/columns/subcolumn_tree.h +++ b/be/src/vec/columns/subcolumn_tree.h @@ -20,6 +20,7 @@ #pragma once #include +#include #include "runtime/exec_env.h" #include "runtime/thread_context.h" diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index c0b48e01307012d..69dcce366a90150 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -32,8 +32,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -45,6 +47,7 @@ #include "olap/tablet_schema.h" #include "runtime/client_cache.h" #include "runtime/exec_env.h" +#include "runtime/runtime_state.h" #include "udf/udf.h" #include "util/defer_op.h" #include "vec/columns/column.h" @@ -157,7 +160,8 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co } Block tmp_block {arguments}; size_t result_column = tmp_block.columns(); - auto ctx = FunctionContext::create_context(nullptr, {}, {}); + RuntimeState state; + auto ctx = FunctionContext::create_context(&state, {}, {}); // To prevent from null info lost, we should not call function since the function framework will wrap // nullable to Variant instead of the root of Variant @@ -231,9 +235,14 @@ void get_column_by_type(const vectorized::DataTypePtr& data_type, const std::str column.set_length(data_type->get_size_of_value_in_memory()); return; } - // TODO handle more types like struct/date/datetime/decimal... - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + if (WhichDataType(*data_type).is_decimal()) { + column.set_precision_frac(data_type->get_precision(), data_type->get_scale()); + return; + } + if (WhichDataType(*data_type).is_date_time_v2()) { + column.set_precision_frac(-1, data_type->get_scale(), false); + return; + } } TabletColumn get_column_by_type(const vectorized::DataTypePtr& data_type, const std::string& name, @@ -309,7 +318,7 @@ void update_least_common_schema(const std::vector& schemas, // Get subcolumns of this variant if (col->has_path_info() && col->parent_unique_id() > 0 && col->parent_unique_id() == variant_col_unique_id) { - subcolumns_types[*col->path_info_ptr()].push_back( + subcolumns_types[*col->path_info_ptr()].emplace_back( DataTypeFactory::instance().create_data_type(*col, col->is_nullable())); } } @@ -326,7 +335,7 @@ void update_least_common_schema(const std::vector& schemas, col->parent_unique_id() == variant_col_unique_id && // this column have been found in origin columns subcolumns_types.find(*col->path_info_ptr()) != subcolumns_types.end()) { - subcolumns_types[*col->path_info_ptr()].push_back( + subcolumns_types[*col->path_info_ptr()].emplace_back( DataTypeFactory::instance().create_data_type(*col, col->is_nullable())); } } @@ -351,7 +360,7 @@ void update_least_sparse_column(const std::vector& schemas, if (col->has_path_info() && col->parent_unique_id() > 0 && col->parent_unique_id() == variant_col_unique_id && path_set.find(*col->path_info_ptr()) == path_set.end()) { - subcolumns_types[*col->path_info_ptr()].push_back( + subcolumns_types[*col->path_info_ptr()].emplace_back( DataTypeFactory::instance().create_data_type(*col, col->is_nullable())); } } diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index 87459f19ce6b72f..1680b87068514bf 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -339,6 +339,7 @@ class DecimalField { * Used to represent a single value of one of several types in memory. * Warning! Prefer to use chunks of columns instead of single values. See Column.h */ + class Field { public: struct Types { @@ -448,9 +449,15 @@ class Field { /** Despite the presence of a template constructor, this constructor is still needed, * since, in its absence, the compiler will still generate the default constructor. */ - Field(const Field& rhs) { create(rhs); } + Field(const Field& rhs) { + copy_type_info(rhs); + create(rhs); + } - Field(Field&& rhs) { create(std::move(rhs)); } + Field(Field&& rhs) { + copy_type_info(rhs); + create(std::move(rhs)); + } template requires(!std::is_same_v, Field>) @@ -467,6 +474,16 @@ class Field { create(data, size); } + void set_type_info(TypeIndex type, int precision = -1, int scale = -1) { + this->type = type; + this->precision = precision; + this->scale = scale; + } + + int get_precision() const { return precision; } + int get_scale() const { return scale; } + TypeIndex get_type_id() const { return type; } + void assign_string(const unsigned char* data, size_t size) { destroy(); create(data, size); @@ -483,6 +500,7 @@ class Field { } Field& operator=(const Field& rhs) { + copy_type_info(rhs); if (this != &rhs) { if (which != rhs.which) { destroy(); @@ -499,6 +517,7 @@ class Field { } Field& operator=(Field&& rhs) { + copy_type_info(rhs); if (this != &rhs) { if (which != rhs.which) { destroy(); @@ -706,6 +725,11 @@ class Field { storage; Types::Which which; + // detailed_type_info is used to store the real type of the field, for example, the real type of a Int64 is DateTimeV2 + // or real type of a Decimal32 is Decimal(27, 9) + TypeIndex type = TypeIndex::Nothing; + int scale = -1; + int precision = -1; /// Assuming there was no allocated state or it was deallocated (see destroy). template @@ -762,6 +786,12 @@ class Field { which = Types::JSONB; } + void copy_type_info(const Field& rhs) { + this->type = rhs.type; + this->precision = rhs.precision; + this->scale = rhs.scale; + } + void create_jsonb(const unsigned char* data, size_t size) { new (&storage) JsonbField(reinterpret_cast(data), size); which = Types::JSONB; diff --git a/be/src/vec/data_types/convert_field_to_type.cpp b/be/src/vec/data_types/convert_field_to_type.cpp index ecbce03ba6b10a9..b30657070706eee 100644 --- a/be/src/vec/data_types/convert_field_to_type.cpp +++ b/be/src/vec/data_types/convert_field_to_type.cpp @@ -33,6 +33,7 @@ #include "common/exception.h" #include "common/status.h" #include "util/bitmap_value.h" +#include "util/jsonb_document.h" #include "util/jsonb_writer.h" #include "vec/common/field_visitors.h" #include "vec/common/typeid_cast.h" @@ -111,6 +112,14 @@ class FieldVisitorToJsonb : public StaticVisitor { writer->writeString(x); writer->writeEndString(); } + void operator()(const JsonbField& x, JsonbWriter* writer) const { + const JsonbValue* value = JsonbDocument::createValue(x.get_value(), x.get_size()); + if (value == nullptr) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to create JsonbValue"); + } + writer->writeValue(value); + } + void operator()(const Array& x, JsonbWriter* writer) const; void operator()(const Tuple& x, JsonbWriter* writer) const { @@ -146,9 +155,6 @@ class FieldVisitorToJsonb : public StaticVisitor { void operator()(const Map& x, JsonbWriter* writer) const { throw doris::Exception(doris::ErrorCode::NOT_IMPLEMENTED_ERROR, "Not implemeted"); } - void operator()(const JsonbField& x, JsonbWriter* writer) const { - throw doris::Exception(doris::ErrorCode::NOT_IMPLEMENTED_ERROR, "Not implemeted"); - } }; void FieldVisitorToJsonb::operator()(const Array& x, JsonbWriter* writer) const { diff --git a/be/src/vec/data_types/data_type.h b/be/src/vec/data_types/data_type.h index 48743c926a50ec0..8135e65d45088e4 100644 --- a/be/src/vec/data_types/data_type.h +++ b/be/src/vec/data_types/data_type.h @@ -35,6 +35,7 @@ #include "runtime/define_primitive_type.h" #include "vec/columns/column_string.h" #include "vec/common/cow.h" +#include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/serde/data_type_serde.h" @@ -115,6 +116,14 @@ class IDataType : private boost::noncopyable { virtual Field get_field(const TExprNode& node) const = 0; + // Return Field which wrapped with the real type. + virtual Field get_type_field(const IColumn& column, int row) const { + Field field; + column.get(row, field); + field.set_type_info(get_type_id()); + return field; + } + /// Checks that two instances belong to the same type virtual bool equals(const IDataType& rhs) const = 0; diff --git a/be/src/vec/data_types/data_type_array.cpp b/be/src/vec/data_types/data_type_array.cpp index a4303b74d39bc92..83907750d8308f8 100644 --- a/be/src/vec/data_types/data_type_array.cpp +++ b/be/src/vec/data_types/data_type_array.cpp @@ -38,6 +38,8 @@ #include "vec/common/string_buffer.hpp" #include "vec/common/string_ref.h" #include "vec/common/typeid_cast.h" +#include "vec/core/field.h" +#include "vec/core/types.h" #include "vec/data_types/data_type_nullable.h" #include "vec/io/reader_buffer.h" @@ -308,4 +310,26 @@ Status DataTypeArray::from_string(ReadBuffer& rb, IColumn* column) const { return Status::OK(); } +Field DataTypeArray::get_type_field(const IColumn& column, int row) const { + const auto& array = assert_cast(column); + size_t offset = array.offset_at(row); + size_t size = array.size_at(row); + + if (size > max_array_size_as_field) { + throw doris::Exception( + ErrorCode::INTERNAL_ERROR, + "Array of size {}, is too large to be manipulated as single field, maximum size {}", + size, max_array_size_as_field); + } + + Array res(size); + + for (size_t i = 0; i < size; ++i) { + res[i] = get_nested_type()->get_type_field(array.get_data(), offset + i); + } + Field typed_res(res); + typed_res.set_type_info(TypeIndex::Array); + return typed_res; +} + } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_array.h b/be/src/vec/data_types/data_type_array.h index ae58efc0ca1b217..d7e8f52781a51f4 100644 --- a/be/src/vec/data_types/data_type_array.h +++ b/be/src/vec/data_types/data_type_array.h @@ -73,6 +73,8 @@ class DataTypeArray final : public IDataType { const char* get_family_name() const override { return "Array"; } + Field get_type_field(const IColumn& column, int row) const override; + MutableColumnPtr create_column() const override; Field get_default() const override; diff --git a/be/src/vec/data_types/data_type_bitmap.h b/be/src/vec/data_types/data_type_bitmap.h index 14fc1128ff0d8dd..2e8673d49512049 100644 --- a/be/src/vec/data_types/data_type_bitmap.h +++ b/be/src/vec/data_types/data_type_bitmap.h @@ -110,6 +110,11 @@ class DataTypeBitMap : public IDataType { __builtin_unreachable(); } + Field get_type_field(const IColumn& column, int row) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "Unimplemented get_type_field for BitMap"); + } + static void serialize_as_stream(const BitmapValue& value, BufferWritable& buf); static void deserialize_as_stream(BitmapValue& value, BufferReadable& buf); diff --git a/be/src/vec/data_types/data_type_date_time.h b/be/src/vec/data_types/data_type_date_time.h index 99507a370391e18..43590b84f83314d 100644 --- a/be/src/vec/data_types/data_type_date_time.h +++ b/be/src/vec/data_types/data_type_date_time.h @@ -29,6 +29,7 @@ #include "common/status.h" #include "runtime/define_primitive_type.h" +#include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_number_base.h" diff --git a/be/src/vec/data_types/data_type_decimal.h b/be/src/vec/data_types/data_type_decimal.h index b18487d1fb09668..5843867c3e0defb 100644 --- a/be/src/vec/data_types/data_type_decimal.h +++ b/be/src/vec/data_types/data_type_decimal.h @@ -237,6 +237,13 @@ class DataTypeDecimal final : public IDataType { DataTypeSerDeSPtr get_serde(int nesting_level = 1) const override { return std::make_shared>(scale, precision, nesting_level); }; + Field get_type_field(const IColumn& column, int row) const override { + const auto& decimal_column = static_cast&>(column); + Field field; + decimal_column.get(row, field); + field.set_type_info(TypeId::value, static_cast(precision), static_cast(scale)); + return field; + } /// Decimal specific diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 2f5a4122f109c31..14cf622d6c0da8f 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -249,7 +249,8 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo return nested; } -DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool is_nullable) { +DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool is_nullable, + int precision, int scale) { DataTypePtr nested = nullptr; switch (type_index) { case TypeIndex::UInt8: @@ -298,7 +299,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool nested = std::make_shared(); break; case TypeIndex::DateTimeV2: - nested = std::make_shared(); + nested = std::make_shared(scale > 0 ? scale : 0); break; case TypeIndex::DateTime: nested = std::make_shared(); @@ -310,22 +311,29 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool nested = std::make_shared("", true); break; case TypeIndex::Decimal32: - nested = std::make_shared>(BeConsts::MAX_DECIMAL32_PRECISION, 0); + nested = std::make_shared>( + precision > 0 ? precision : BeConsts::MAX_DECIMAL32_PRECISION, + scale > 0 ? scale : 0); break; case TypeIndex::Decimal64: - nested = std::make_shared>(BeConsts::MAX_DECIMAL64_PRECISION, 0); + nested = std::make_shared>( + precision > 0 ? precision : BeConsts::MAX_DECIMAL64_PRECISION, + scale > 0 ? scale : 0); break; case TypeIndex::Decimal128V2: - nested = std::make_shared>(BeConsts::MAX_DECIMALV2_PRECISION, - 0); + nested = std::make_shared>( + precision > 0 ? precision : BeConsts::MAX_DECIMALV2_PRECISION, + scale > 0 ? scale : 0); break; case TypeIndex::Decimal128V3: - nested = std::make_shared>(BeConsts::MAX_DECIMAL128_PRECISION, - 0); + nested = std::make_shared>( + precision > 0 ? precision : BeConsts::MAX_DECIMAL128_PRECISION, + scale > 0 ? scale : 0); break; case TypeIndex::Decimal256: - nested = std::make_shared>(BeConsts::MAX_DECIMAL256_PRECISION, - 0); + nested = std::make_shared>( + precision > 0 ? precision : BeConsts::MAX_DECIMAL256_PRECISION, + scale > 0 ? scale : 0); break; case TypeIndex::JSONB: nested = std::make_shared(); diff --git a/be/src/vec/data_types/data_type_factory.hpp b/be/src/vec/data_types/data_type_factory.hpp index bf2a78d62420d1a..80b52a871f0017e 100644 --- a/be/src/vec/data_types/data_type_factory.hpp +++ b/be/src/vec/data_types/data_type_factory.hpp @@ -50,9 +50,6 @@ enum class TypeIndex; namespace doris::vectorized { class DataTypeFactory { - using DataTypeMap = std::unordered_map; - using InvertedDataTypeMap = std::vector>; - public: static DataTypeFactory& instance() { static DataTypeFactory instance; @@ -60,7 +57,8 @@ class DataTypeFactory { } DataTypePtr create_data_type(const doris::Field& col_desc); - DataTypePtr create_data_type(const TypeIndex& type_index, bool is_nullable = false); + DataTypePtr create_data_type(const TypeIndex& type_index, bool is_nullable = false, + int precision = -1, int scale = -1); DataTypePtr create_data_type(const TabletColumn& col_desc, bool is_nullable = false); DataTypePtr create_data_type(const TypeDescriptor& col_desc, bool is_nullable = true); diff --git a/be/src/vec/data_types/data_type_jsonb.h b/be/src/vec/data_types/data_type_jsonb.h index 02a8d8a13c03bd5..22a4260bb42bc5b 100644 --- a/be/src/vec/data_types/data_type_jsonb.h +++ b/be/src/vec/data_types/data_type_jsonb.h @@ -80,6 +80,15 @@ class DataTypeJsonb final : public IDataType { return String(value.value(), value.size()); } + // Return JsonbField. + Field get_type_field(const IColumn& column, int row) const override { + const auto& column_data = static_cast(column); + Field field = + JsonbField(column_data.get_data_at(row).data, column_data.get_data_at(row).size); + field.set_type_info(TypeIndex::JSONB); + return field; + } + bool equals(const IDataType& rhs) const override; bool get_is_parametric() const override { return false; } diff --git a/be/src/vec/data_types/data_type_nullable.h b/be/src/vec/data_types/data_type_nullable.h index 12410b70bd13223..f81c8f207f57204 100644 --- a/be/src/vec/data_types/data_type_nullable.h +++ b/be/src/vec/data_types/data_type_nullable.h @@ -86,6 +86,14 @@ class DataTypeNullable final : public IDataType { return nested_data_type->get_field(node); } + Field get_type_field(const IColumn& column, int row) const override { + const auto& nullable_column = assert_cast(column); + if (nullable_column.is_null_at(row)) { + return Null(); + } + return nested_data_type->get_type_field(nullable_column.get_nested_column(), row); + } + bool equals(const IDataType& rhs) const override; bool is_value_unambiguously_represented_in_contiguous_memory_region() const override { diff --git a/be/src/vec/data_types/data_type_number_base.h b/be/src/vec/data_types/data_type_number_base.h index 5859300b846fd7a..b3b791e00e85f8c 100644 --- a/be/src/vec/data_types/data_type_number_base.h +++ b/be/src/vec/data_types/data_type_number_base.h @@ -164,6 +164,14 @@ class DataTypeNumberBase : public IDataType { return std::make_shared>(nesting_level); }; + // Return Field which wrapped with the real type. + Field get_type_field(const IColumn& column, int row) const override { + const auto& column_data = static_cast&>(column); + Field field = column_data.get_data()[row]; + field.set_type_info(get_type_id()); + return field; + } + protected: template void to_string_batch_impl(const IColumn& column, ColumnString& column_to) const { diff --git a/be/src/vec/data_types/data_type_string.h b/be/src/vec/data_types/data_type_string.h index a03e32f787b4f7b..cb8572b096cb838 100644 --- a/be/src/vec/data_types/data_type_string.h +++ b/be/src/vec/data_types/data_type_string.h @@ -93,6 +93,13 @@ class DataTypeString : public IDataType { DataTypeSerDeSPtr get_serde(int nesting_level = 1) const override { return std::make_shared(nesting_level); }; + // Return Field. + Field get_type_field(const IColumn& column, int row) const override { + const auto& column_data = static_cast(column); + Field field(column_data.get_data_at(row).data, column_data.get_data_at(row).size); + field.set_type_info(TypeIndex::String); + return field; + } }; } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_time.h b/be/src/vec/data_types/data_type_time.h index 7575129cfc31722..4ef62d7a576f3a1 100644 --- a/be/src/vec/data_types/data_type_time.h +++ b/be/src/vec/data_types/data_type_time.h @@ -114,6 +114,13 @@ class DataTypeTimeV2 final : public DataTypeNumberBase { const char* get_family_name() const override { return "timev2"; } UInt32 get_scale() const override { return _scale; } + Field get_type_field(const IColumn& column, int row) const override { + Field field; + column.get(row, field); + field.set_type_info(get_type_id(), 0, static_cast(get_scale())); + return field; + } + private: UInt32 _scale; }; diff --git a/be/src/vec/data_types/data_type_time_v2.h b/be/src/vec/data_types/data_type_time_v2.h index 7688a04a9a86f72..b066894de17dc43 100644 --- a/be/src/vec/data_types/data_type_time_v2.h +++ b/be/src/vec/data_types/data_type_time_v2.h @@ -30,6 +30,7 @@ #include "common/compiler_util.h" // IWYU pragma: keep #include "common/status.h" #include "runtime/define_primitive_type.h" +#include "vec/common/assert_cast.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_number_base.h" @@ -159,12 +160,22 @@ class DataTypeDateTimeV2 final : public DataTypeNumberBase { node.date_literal.value); } } + MutableColumnPtr create_column() const override; UInt32 get_scale() const override { return _scale; } void to_pb_column_meta(PColumnMeta* col_meta) const override; + Field get_type_field(const IColumn& column, int row) const override { + const auto& column_data = + assert_cast(column); + Field field; + column_data.get(row, field); + field.set_type_info(get_type_id(), 0, static_cast(get_scale())); + return field; + } + static void cast_to_date(const UInt64 from, Int64& to); static void cast_to_date_time(const UInt64 from, Int64& to); static void cast_to_date_v2(const UInt64 from, UInt32& to); diff --git a/be/src/vec/data_types/get_least_supertype.cpp b/be/src/vec/data_types/get_least_supertype.cpp index 82bea452923ed5c..384c706b589f23f 100644 --- a/be/src/vec/data_types/get_least_supertype.cpp +++ b/be/src/vec/data_types/get_least_supertype.cpp @@ -252,6 +252,10 @@ void get_least_supertype_jsonb(const DataTypes& types, DataTypePtr* type) { for (const auto& type : types) { type_ids.insert(type->get_type_id()); } + if (type_ids.size() == 1) { + *type = types[0]; + return; + } get_least_supertype_jsonb(type_ids, type); } diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index 0d5dea7fc3dcd7a..aa8cc8eff487c06 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -27,6 +27,9 @@ #include "vec/columns/column_const.h" #include "vec/common/assert_cast.h" #include "vec/common/string_ref.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" +#include "vec/functions/function_helpers.h" namespace doris { @@ -232,21 +235,28 @@ void DataTypeArraySerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWri Status DataTypeArraySerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - Arena& mem_pool, int row_num) const { - // Use allocator instead of stack memory, since rapidjson hold the reference of String value - // otherwise causes stack use after free - auto& column_array = static_cast(column); - if (row_num > column_array.size()) { - return Status::InternalError("row num {} out of range {}!", row_num, column_array.size()); - } - // void* mem = allocator.Malloc(sizeof(vectorized::Field)); - void* mem = mem_pool.alloc(sizeof(vectorized::Field)); - if (!mem) { - return Status::InternalError("Malloc failed"); - } - vectorized::Field* array = new (mem) vectorized::Field(column_array[row_num]); + Arena& mem_pool, int row_num, + const DataTypePtr& type) const { + const DataTypePtr& nested_type = + check_and_get_data_type(type.get())->get_nested_type(); + auto res = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = res.first; + row_num = res.second; + + const auto& data_column = assert_cast(*ptr); + const auto& offsets = data_column.get_offsets(); + + size_t offset = offsets[row_num - 1]; + size_t next_offset = offsets[row_num]; - convert_field_to_rapidjson(*array, result, allocator); + const IColumn& nested_column = data_column.get_data(); + result.SetArray(); + for (size_t i = offset; i < next_offset; ++i) { + rapidjson::Value val; + RETURN_IF_ERROR(nested_serde->write_one_cell_to_json(nested_column, val, allocator, + mem_pool, i, nested_type)); + result.PushBack(val, allocator); + } return Status::OK(); } diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h b/be/src/vec/data_types/serde/data_type_array_serde.h index 13c40e607772589..1912bb52763f66d 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.h +++ b/be/src/vec/data_types/serde/data_type_array_serde.h @@ -20,6 +20,7 @@ #include #include +#include #include #include "common/status.h" @@ -33,6 +34,7 @@ class JsonbValue; namespace vectorized { class IColumn; class Arena; +class IDataType; class DataTypeArraySerDe : public DataTypeSerDe { public: @@ -73,7 +75,8 @@ class DataTypeArraySerDe : public DataTypeSerDe { Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, - int row_num) const override; + int row_num, + const std::shared_ptr& type) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.h b/be/src/vec/data_types/serde/data_type_date64_serde.h index 497ac2aeff4d5fa..3e69de2b6c9cfa1 100644 --- a/be/src/vec/data_types/serde/data_type_date64_serde.h +++ b/be/src/vec/data_types/serde/data_type_date64_serde.h @@ -34,6 +34,7 @@ #include "vec/columns/column_vector.h" #include "vec/common/string_ref.h" #include "vec/core/types.h" +#include "vec/data_types/serde/data_type_serde.h" namespace doris { class JsonbOutStream; @@ -72,6 +73,12 @@ class DataTypeDate64SerDe : public DataTypeNumberSerDe { const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } private: template diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h index ef4aa6843a068c5..4b97620864aff3d 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h @@ -82,6 +82,13 @@ class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe { const FormatOptions& options) const override; void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h b/be/src/vec/data_types/serde/data_type_datev2_serde.h index 52e4cec364ebb69..767cf0ce604d78d 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h @@ -80,6 +80,13 @@ class DataTypeDateV2SerDe : public DataTypeNumberSerDe { void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h b/be/src/vec/data_types/serde/data_type_decimal_serde.h index dd31c3321afcc92..15e0d8278def04c 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.h +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h @@ -115,6 +115,13 @@ class DataTypeDecimalSerDe : public DataTypeSerDe { int start, int end, std::vector& buffer_list) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, int* num_deserialized, const FormatOptions& options) const override; diff --git a/be/src/vec/data_types/serde/data_type_ipv4_serde.h b/be/src/vec/data_types/serde/data_type_ipv4_serde.h index 65bd2469b5bbb24..8905e6c6091b600 100644 --- a/be/src/vec/data_types/serde/data_type_ipv4_serde.h +++ b/be/src/vec/data_types/serde/data_type_ipv4_serde.h @@ -61,6 +61,13 @@ class DataTypeIPv4SerDe : public DataTypeNumberSerDe { void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_ipv6_serde.h b/be/src/vec/data_types/serde/data_type_ipv6_serde.h index d308da418592ddd..bb3d3c3978f2982 100644 --- a/be/src/vec/data_types/serde/data_type_ipv6_serde.h +++ b/be/src/vec/data_types/serde/data_type_ipv6_serde.h @@ -71,6 +71,13 @@ class DataTypeIPv6SerDe : public DataTypeNumberSerDe { void write_one_cell_to_jsonb(const IColumn& column, JsonbWriterT& result, Arena* mem_pool, int unique_id, int row_num) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp index 08514a6eea764a1..b645f2bfd1dacc1 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp @@ -208,7 +208,8 @@ void convert_jsonb_to_rapidjson(const JsonbValue& val, rapidjson::Value& target, Status DataTypeJsonbSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - Arena& mem_pool, int row_num) const { + Arena& mem_pool, int row_num, + const DataTypePtr& type) const { const auto& data = assert_cast(column); const auto jsonb_val = data.get_data_at(row_num); if (jsonb_val.empty()) { diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h b/be/src/vec/data_types/serde/data_type_jsonb_serde.h index f1c5969f4c0749e..99953787e3421ae 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h @@ -65,7 +65,7 @@ class DataTypeJsonbSerDe : public DataTypeStringSerDe { std::vector& buffer_list) const override; Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, - int row_num) const override; + int row_num, const DataTypePtr& type) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; diff --git a/be/src/vec/data_types/serde/data_type_nothing_serde.h b/be/src/vec/data_types/serde/data_type_nothing_serde.h index b39c3b5776ece46..520a30314b00a9f 100644 --- a/be/src/vec/data_types/serde/data_type_nothing_serde.h +++ b/be/src/vec/data_types/serde/data_type_nothing_serde.h @@ -107,6 +107,14 @@ class DataTypeNothingSerde : public DataTypeSerDe { std::vector& buffer_list) const override { return Status::NotSupported("write_column_to_orc with type " + column.get_name()); } + + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int row_num, + const std::shared_ptr& type) const override { + result.SetNull(); + return Status::OK(); + } }; } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index 1af85bd040d1e22..e7167074551fd6d 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -364,14 +364,16 @@ Status DataTypeNullableSerDe::write_column_to_orc(const std::string& timezone, Status DataTypeNullableSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - Arena& mem_pool, int row_num) const { - auto& col = static_cast(column); - auto& nested_col = col.get_nested_column(); + Arena& mem_pool, int row_num, + const DataTypePtr& type) const { + const auto& col = static_cast(column); + const auto& nullable_type = static_cast(*type); + const auto& nested_col = col.get_nested_column(); if (col.is_null_at(row_num)) { result.SetNull(); } else { - RETURN_IF_ERROR(nested_serde->write_one_cell_to_json(nested_col, result, allocator, - mem_pool, row_num)); + RETURN_IF_ERROR(nested_serde->write_one_cell_to_json( + nested_col, result, allocator, mem_pool, row_num, nullable_type.get_nested_type())); } return Status::OK(); } diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index e9af344fb65f75c..8fae553ea2c7c0a 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -19,6 +19,8 @@ #include +#include + #include "common/status.h" #include "data_type_serde.h" #include "util/jsonb_writer.h" @@ -30,6 +32,7 @@ class JsonbValue; namespace vectorized { class IColumn; class Arena; +class IDataType; class DataTypeNullableSerDe : public DataTypeSerDe { public: @@ -96,7 +99,8 @@ class DataTypeNullableSerDe : public DataTypeSerDe { Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, - int row_num) const override; + int row_num, + const std::shared_ptr& type) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index 69fdd6e045e1f7f..1baac87999ce92a 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -35,6 +35,7 @@ #include "vec/columns/column_vector.h" #include "vec/common/string_ref.h" #include "vec/core/types.h" +#include "vec/data_types/data_type.h" namespace doris { class JsonbOutStream; @@ -104,7 +105,7 @@ class DataTypeNumberSerDe : public DataTypeSerDe { std::vector& buffer_list) const override; Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, - int row_num) const override; + int row_num, const DataTypePtr& type) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; private: @@ -303,7 +304,8 @@ template Status DataTypeNumberSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - Arena& mem_pool, int row_num) const { + Arena& mem_pool, int row_num, + const DataTypePtr& type) const { const auto& data = reinterpret_cast(column).get_data(); if constexpr (std::is_same_v || std::is_same_v || std::is_same_v) { result.SetInt(data[row_num]); diff --git a/be/src/vec/data_types/serde/data_type_object_serde.cpp b/be/src/vec/data_types/serde/data_type_object_serde.cpp index 49efa8c829c370a..0a3177bc7819b06 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_object_serde.cpp @@ -160,6 +160,39 @@ void DataTypeObjectSerDe::write_column_to_arrow(const IColumn& column, const Nul } } +Status DataTypeObjectSerDe::write_one_cell_to_json( + const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, int row_num, + const std::shared_ptr& type) const { + const auto& var = assert_cast(column); + if (!var.is_finalized()) { + var.assume_mutable()->finalize(); + } + result.SetObject(); + // sort to make output stable, todo add a config + auto subcolumns = schema_util::get_sorted_subcolumns(var.get_subcolumns()); + for (const auto& entry : subcolumns) { + const auto& subcolumn = entry->data.get_finalized_column(); + const auto& subtype_serde = entry->data.get_least_common_type_serde(); + const auto& subtype = entry->data.get_least_common_type(); + if (subcolumn.is_null_at(row_num)) { + continue; + } + rapidjson::Value key; + key.SetString(entry->path.get_path().data(), entry->path.get_path().size()); + rapidjson::Value val; + RETURN_IF_ERROR(subtype_serde->write_one_cell_to_json(subcolumn, val, allocator, mem_pool, + row_num, subtype)); + if (val.IsNull() && entry->path.empty()) { + // skip null value with empty key, indicate the null json value of root in variant map, + // usally padding in nested arrays + continue; + } + result.AddMember(key, val, allocator); + } + return Status::OK(); +} + } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_object_serde.h b/be/src/vec/data_types/serde/data_type_object_serde.h index 9351b200f5344e6..5c973895b2fbdd5 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.h +++ b/be/src/vec/data_types/serde/data_type_object_serde.h @@ -93,6 +93,11 @@ class DataTypeObjectSerDe : public DataTypeSerDe { return Status::NotSupported("write_column_to_orc with type " + column.get_name()); } + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int row_num, + const std::shared_ptr& type) const override; + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_serde.cpp b/be/src/vec/data_types/serde/data_type_serde.cpp index 0709df03bb733d8..5b2d35068b2a620 100644 --- a/be/src/vec/data_types/serde/data_type_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_serde.cpp @@ -46,88 +46,22 @@ DataTypeSerDeSPtrs create_data_type_serdes(const std::vector& s return serdes; } -void DataTypeSerDe::convert_variant_map_to_rapidjson( - const vectorized::VariantMap& map, rapidjson::Value& target, - rapidjson::Document::AllocatorType& allocator) { - target.SetObject(); - for (const auto& item : map) { - if (item.second.is_null()) { - continue; - } - rapidjson::Value key; - key.SetString(item.first.data(), item.first.size()); - rapidjson::Value val; - convert_field_to_rapidjson(item.second, val, allocator); - if (val.IsNull() && item.first.empty()) { - // skip null value with empty key, indicate the null json value of root in variant map, - // usally padding in nested arrays - continue; - } - target.AddMember(key, val, allocator); - } -} - -void DataTypeSerDe::convert_array_to_rapidjson(const vectorized::Array& array, - rapidjson::Value& target, - rapidjson::Document::AllocatorType& allocator) { - target.SetArray(); - for (const vectorized::Field& item : array) { - rapidjson::Value val; - convert_field_to_rapidjson(item, val, allocator); - target.PushBack(val, allocator); - } -} - -void DataTypeSerDe::convert_field_to_rapidjson(const vectorized::Field& field, - rapidjson::Value& target, - rapidjson::Document::AllocatorType& allocator) { - switch (field.get_type()) { - case vectorized::Field::Types::Null: - target.SetNull(); - break; - case vectorized::Field::Types::Int64: - target.SetInt64(field.get()); - break; - case vectorized::Field::Types::Float64: - target.SetDouble(field.get()); - break; - case vectorized::Field::Types::JSONB: { - const auto& val = field.get(); - JsonbValue* json_val = JsonbDocument::createValue(val.get_value(), val.get_size()); - convert_jsonb_to_rapidjson(*json_val, target, allocator); - break; - } - case vectorized::Field::Types::String: { - const String& val = field.get(); - target.SetString(val.data(), val.size()); - break; - } - case vectorized::Field::Types::Array: { - const vectorized::Array& array = field.get(); - convert_array_to_rapidjson(array, target, allocator); - break; - } - case vectorized::Field::Types::VariantMap: { - const vectorized::VariantMap& map = field.get(); - convert_variant_map_to_rapidjson(map, target, allocator); - break; - } - default: - throw doris::Exception(ErrorCode::INTERNAL_ERROR, "unkown field type: {}", - field.get_type_name()); - break; - } -} - Status DataTypeSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - Arena& mem_pool, int row_num) const { - return Status::InternalError("Not support write {} to rapidjson", column.get_name()); + Arena& mem_pool, int row_num, + const DataTypePtr& type) const { + const std::string str_rep = type->to_string(column, row_num); + // allocate memory to prevent from heap use after free + void* mem = allocator.Malloc(str_rep.size()); + memcpy(mem, str_rep.data(), str_rep.size()); + result.SetString((const char*)mem, str_rep.size()); + return Status::OK(); } Status DataTypeSerDe::read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const { - return Status::NotSupported("Not support read {} from rapidjson", column.get_name()); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "Not support read {} from rapidjson", + column.get_name()); } const std::string DataTypeSerDe::NULL_IN_COMPLEX_TYPE = "null"; diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 262f9cae6a8a62f..c6385468ec60907 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -320,7 +320,8 @@ class DataTypeSerDe { // rapidjson virtual Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - Arena& mem_pool, int row_num) const; + Arena& mem_pool, int row_num, + const std::shared_ptr& type) const; virtual Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const; protected: @@ -331,14 +332,6 @@ class DataTypeSerDe { // The _nesting_level of StructSerde is 1 // The _nesting_level of StringSerde is 2 int _nesting_level = 1; - - static void convert_field_to_rapidjson(const vectorized::Field& field, rapidjson::Value& target, - rapidjson::Document::AllocatorType& allocator); - static void convert_array_to_rapidjson(const vectorized::Array& array, rapidjson::Value& target, - rapidjson::Document::AllocatorType& allocator); - static void convert_variant_map_to_rapidjson(const vectorized::VariantMap& array, - rapidjson::Value& target, - rapidjson::Document::AllocatorType& allocator); }; /// Invert values since Arrow interprets 1 as a non-null value, while doris as a null diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index fe09ff615f47427..ecbf644ee67d8f1 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -339,7 +339,7 @@ class DataTypeStringSerDeBase : public DataTypeSerDe { } Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, - int row_num) const override { + int row_num, const DataTypePtr& type) const override { const auto& col = assert_cast(column); const auto& data_ref = col.get_data_at(row_num); result.SetString(data_ref.data, data_ref.size); diff --git a/be/src/vec/data_types/serde/data_type_time_serde.h b/be/src/vec/data_types/serde/data_type_time_serde.h index d9a373b2f5a53db..35359f7056c4e35 100644 --- a/be/src/vec/data_types/serde/data_type_time_serde.h +++ b/be/src/vec/data_types/serde/data_type_time_serde.h @@ -41,6 +41,13 @@ class DataTypeTimeSerDe : public DataTypeNumberSerDe { int row_idx, bool col_const, const FormatOptions& options) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, @@ -56,6 +63,12 @@ class DataTypeTimeV2SerDe : public DataTypeNumberSerDe { Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const, const FormatOptions& options) const override; + Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int row_num, const DataTypePtr& type) const override { + return DataTypeSerDe::write_one_cell_to_json(column, result, allocator, mem_pool, row_num, + type); + } private: template diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 153c3ef0f2e3aba..721d31061ded43f 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -93,6 +93,7 @@ #include "vec/functions/function.h" #include "vec/functions/function_convert_tz.h" #include "vec/functions/function_helpers.h" +#include "vec/io/io_helper.h" #include "vec/io/reader_buffer.h" #include "vec/runtime/vdatetime_value.h" #include "vec/utils/util.hpp" @@ -836,7 +837,7 @@ struct ConvertNothingToJsonb { } }; -template +template struct ConvertImplFromJsonb { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t input_rows_count) { @@ -850,16 +851,12 @@ struct ConvertImplFromJsonb { auto& null_map = null_map_col->get_data(); auto col_to = ColumnType::create(); - //IColumn & col_to = *res; - // size_t size = col_from.size(); col_to->reserve(input_rows_count); auto& res = col_to->get_data(); res.resize(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { const auto& val = column_string->get_data_at(i); - // ReadBuffer read_buffer((char*)(val.data), val.size); - // RETURN_IF_ERROR(data_type_to->from_string(read_buffer, col_to)); if (val.size == 0) { null_map[i] = 1; @@ -882,6 +879,15 @@ struct ConvertImplFromJsonb { res[i] = 0; continue; } + if (value->isString()) { + // convert by parse + const auto& data = static_cast(value)->getBlob(); + size_t len = static_cast(value)->getBlobLen(); + ReadBuffer rb((char*)(data), len); + bool parsed = try_parse_impl(res[i], rb, context); + null_map[i] = !parsed; + continue; + } if constexpr (type_index == TypeIndex::UInt8) { // cast from json value to boolean type if (value->isTrue()) { @@ -1976,19 +1982,20 @@ class FunctionCast final : public IFunctionBase { bool jsonb_string_as_string) const { switch (to_type->get_type_id()) { case TypeIndex::UInt8: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int8: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int16: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int32: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int64: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int128: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Float64: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::String: if (!jsonb_string_as_string) { // Conversion from String through parsing. diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java index e9f1b50c0dfad87..45c91bec5bbd8e7 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java @@ -550,10 +550,8 @@ public static ScalarType createJsonbType() { } public static ScalarType createVariantType() { - // length checked in analysis - ScalarType type = new ScalarType(PrimitiveType.VARIANT); - type.len = MAX_STRING_LENGTH; - return type; + // Not return ScalarType return VariantType instead for compatibility reason + return new VariantType(); } public static ScalarType createVarchar(int len) { diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/StructField.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/StructField.java index ecbfd30ca23538e..c08c93135a8d3cb 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/StructField.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/StructField.java @@ -42,13 +42,17 @@ public class StructField { public static final String DEFAULT_FIELD_NAME = "col"; - public StructField(String name, Type type, String comment, boolean containsNull) { - this.name = name.toLowerCase(); + public StructField(String name, Type type, String comment, boolean containsNull, boolean nameCaseSensitive) { + this.name = (nameCaseSensitive ? name : name.toLowerCase()); this.type = type; this.comment = comment; this.containsNull = containsNull; } + public StructField(String name, Type type, String comment, boolean containsNull) { + this(name, type, comment, containsNull, false); + } + public StructField(String name, Type type) { this(name, type, null, true); } diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java index 7a8dda5aabedefc..82447eea72556f8 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java @@ -135,6 +135,8 @@ public abstract class Type { private static final ArrayList arraySubTypes; private static final ArrayList mapSubTypes; private static final ArrayList structSubTypes; + + private static final ArrayList variantSubTypes; private static final ArrayList trivialTypes; static { @@ -170,6 +172,8 @@ public abstract class Type { typeMap.put("MAP", Type.MAP); typeMap.put("OBJECT", Type.UNSUPPORTED); typeMap.put("ARRAY", Type.ARRAY); + typeMap.put("IPV4", Type.IPV4); + typeMap.put("IPV6", Type.IPV6); typeMap.put("QUANTILE_STATE", Type.QUANTILE_STATE); } @@ -306,6 +310,27 @@ public abstract class Type { structSubTypes.add(ARRAY); structSubTypes.add(MAP); structSubTypes.add(STRUCT); + + variantSubTypes = Lists.newArrayList(); + variantSubTypes.add(BOOLEAN); + variantSubTypes.addAll(integerTypes); + variantSubTypes.add(FLOAT); + variantSubTypes.add(DOUBLE); + variantSubTypes.add(DECIMAL32); // same DEFAULT_DECIMALV3 + variantSubTypes.add(DECIMAL64); + variantSubTypes.add(DECIMAL128); + variantSubTypes.add(DECIMAL256); + variantSubTypes.add(DATE); + variantSubTypes.add(DATETIME); + variantSubTypes.add(DATEV2); + variantSubTypes.add(DATETIMEV2); + variantSubTypes.add(IPV4); + variantSubTypes.add(IPV6); + variantSubTypes.add(CHAR); + variantSubTypes.add(VARCHAR); + variantSubTypes.add(STRING); + variantSubTypes.add(ARRAY); + variantSubTypes.add(NULL); } public static final Set DATE_SUPPORTED_JAVA_TYPE = Sets.newHashSet(LocalDate.class, java.util.Date.class, @@ -373,6 +398,10 @@ public static ArrayList getStructSubTypes() { return structSubTypes; } + public static ArrayList getVariantSubTypes() { + return variantSubTypes; + } + /** * Return true if this is complex type and support subType */ diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java index 924b197e4d739e4..ea0d4915ed9c32d 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java @@ -21,22 +21,95 @@ import org.apache.doris.thrift.TTypeNode; import org.apache.doris.thrift.TTypeNodeType; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.gson.annotations.SerializedName; + +import java.util.ArrayList; +import java.util.HashMap; + public class VariantType extends ScalarType { + @SerializedName(value = "fieldMap") + private final HashMap fieldMap = Maps.newHashMap(); + + @SerializedName(value = "fields") + private final ArrayList predefinedFields; + public VariantType() { super(PrimitiveType.VARIANT); + this.predefinedFields = Lists.newArrayList(); + } + + public VariantType(ArrayList fields) { + super(PrimitiveType.VARIANT); + Preconditions.checkNotNull(fields); + this.predefinedFields = fields; + for (int i = 0; i < this.predefinedFields.size(); ++i) { + this.predefinedFields.get(i).setPosition(i); + fieldMap.put(this.predefinedFields.get(i).getName(), this.predefinedFields.get(i)); + } + } + + @Override + public String toSql(int depth) { + if (predefinedFields.isEmpty()) { + return "variant"; + } + if (depth >= MAX_NESTING_DEPTH) { + return "variant<...>"; + } + ArrayList fieldsSql = Lists.newArrayList(); + for (StructField f : predefinedFields) { + fieldsSql.add(f.toSql(depth + 1)); + } + return String.format("variant<%s>", Joiner.on(",").join(fieldsSql)); + } + + public ArrayList getPredefinedFields() { + return predefinedFields; } @Override public void toThrift(TTypeDesc container) { - // not use ScalarType's toThrift for compatibility, because VariantType is not extends ScalarType previously + // use ScalarType's toThrift for compatibility, because VariantType use ScalarType to thrift previously + if (predefinedFields.isEmpty()) { + super.toThrift(container); + return; + } TTypeNode node = new TTypeNode(); container.types.add(node); node.setType(TTypeNodeType.VARIANT); + // predefined fields + node.setStructFields(new ArrayList<>()); + for (StructField field : predefinedFields) { + field.toThrift(container, node); + } + } + + @Override + public boolean supportSubType(Type subType) { + for (Type supportedType : Type.getVariantSubTypes()) { + if (subType.getPrimitiveType() == supportedType.getPrimitiveType()) { + return true; + } + } + return false; + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof VariantType)) { + return false; + } + VariantType otherVariantType = (VariantType) other; + return otherVariantType.getPredefinedFields().equals(predefinedFields); } @Override - public boolean matchesType(Type t) { - return t.isVariantType() || t.isStringType(); + public boolean matchesType(Type type) { + return type.isVariantType(); } } diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 7fcb43db48944ac..c8319a68e4fc1c5 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -1670,6 +1670,7 @@ dataType : complex=ARRAY LT dataType GT #complexDataType | complex=MAP LT dataType COMMA dataType GT #complexDataType | complex=STRUCT LT complexColTypeList GT #complexDataType + | VARIANT LT variantSubColTypeList GT #variantPredefinedFields | AGG_STATE LT functionNameIdentifier LEFT_PAREN dataTypes+=dataTypeWithNullable (COMMA dataTypes+=dataTypeWithNullable)* RIGHT_PAREN GT #aggStateDataType @@ -1720,6 +1721,13 @@ complexColType : identifier COLON dataType commentSpec? ; +variantSubColTypeList + : variantSubColType (COMMA variantSubColType)* + ; +variantSubColType + : qualifiedName COLON dataType commentSpec? + ; + commentSpec : COMMENT STRING_LITERAL ; diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 0ffcaf0c488547f..b7a686f281916a6 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -43,6 +43,7 @@ import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Type; import org.apache.doris.catalog.ArrayType; +import org.apache.doris.catalog.VariantType; import org.apache.doris.catalog.GeneratedColumnInfo; import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.StructField; @@ -816,7 +817,9 @@ nonterminal Expr function_call_expr, array_expr, map_expr; nonterminal ArrayLiteral array_literal; nonterminal MapLiteral map_literal; nonterminal StructField struct_field; +nonterminal StructField variant_field; nonterminal ArrayList struct_field_list; +nonterminal ArrayList variant_field_list; nonterminal StructLiteral struct_literal; nonterminal AnalyticWindow opt_window_clause; nonterminal AnalyticWindow.Type window_type; @@ -6930,6 +6933,8 @@ type ::= {: RESULT = new MapType(key_type,value_type); :} | KW_STRUCT LESSTHAN struct_field_list:fields GREATERTHAN {: RESULT = new StructType(fields); :} + | KW_VARIANT LESSTHAN variant_field_list:fields GREATERTHAN + {: RESULT = new VariantType(fields); :} | KW_CHAR LPAREN INTEGER_LITERAL:len RPAREN {: ScalarType type = ScalarType.createCharType(len.intValue()); RESULT = type; @@ -7245,6 +7250,23 @@ struct_field_list ::= :} ; +variant_field ::= + ident:name COLON type:type opt_comment:comment + {: RESULT = new StructField(name, type, comment, true); :} + ; + +variant_field_list ::= + variant_field:field + {: + RESULT = Lists.newArrayList(field); + :} + | variant_field_list:fields COMMA struct_field:field + {: + fields.add(field); + RESULT = fields; + :} + ; + struct_literal ::= LBRACE expr_list:list RBRACE {: diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index 91bc4c13695e899..e2c781ccb901f04 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -611,8 +611,9 @@ private boolean processModifyColumn(ModifyColumnClause alterClause, OlapTable ol if (!col.equals(modColumn)) { typeChanged = true; // TODO:the case where columnPos is not empty has not been considered - if (columnPos == null && col.getDataType() == PrimitiveType.VARCHAR - && modColumn.getDataType() == PrimitiveType.VARCHAR) { + if (columnPos == null && (col.getDataType() == PrimitiveType.VARCHAR + && modColumn.getDataType() == PrimitiveType.VARCHAR) + || (col.getDataType().isVariantType() && modColumn.getDataType().isVariantType())) { col.checkSchemaChangeAllowed(modColumn); lightSchemaChange = olapTable.getEnableLightSchemaChange(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java index de257991ca6ba46..e18a66099e130bd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java @@ -158,6 +158,9 @@ public CastExpr(Type targetType, Expr e, Void v) { if (from.isComplexType() && type.isJsonbType()) { nullableMode = Function.NullableMode.ALWAYS_NULLABLE; } + if (from.isVariantType() || to.isVariantType()) { + nullableMode = Function.NullableMode.ALWAYS_NULLABLE; + } Preconditions.checkState(nullableMode != null, "cannot find nullable node for cast from " + from + " to " + to); fn = new Function(new FunctionName(getFnName(type)), Lists.newArrayList(e.type), type, diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java index abba2762d5664df..c0cccd7ce53c8cb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java @@ -41,6 +41,7 @@ public class InvertedIndexUtil { public static String INVERTED_INDEX_PARSER_COARSE_GRANULARITY = "coarse_grained"; public static String INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE = "char_filter_type"; + public static String INVERTED_INDEX_SUB_COLUMN_PATH = "sub_column_path"; public static String INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN = "char_filter_pattern"; public static String INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT = "char_filter_replacement"; @@ -157,7 +158,8 @@ public static void checkInvertedIndexProperties(Map properties) INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT, INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY, INVERTED_INDEX_PARSER_LOWERCASE_KEY, - INVERTED_INDEX_PARSER_STOPWORDS_KEY + INVERTED_INDEX_PARSER_STOPWORDS_KEY, + INVERTED_INDEX_SUB_COLUMN_PATH )); for (String key : properties.keySet()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index 7e0ab33aa7c14dc..577ce0f3a983095 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -332,11 +332,19 @@ public void createChildrenColumn(Type type, Column column) { column.addChildrenColumn(v); } else if (type.isStructType()) { ArrayList fields = ((StructType) type).getFields(); - for (StructField field : fields) { - Column c = new Column(field.getName(), field.getType()); - c.setIsAllowNull(field.getContainsNull()); - column.addChildrenColumn(c); - } + addChildren(column, fields); + } else if (type.isVariantType()) { + // variant may contain predefined structured fields + ArrayList fields = ((VariantType) type).getPredefinedFields(); + addChildren(column, fields); + } + } + + private void addChildren(Column column, ArrayList fields) { + for (StructField field : fields) { + Column c = new Column(field.getName(), field.getType()); + c.setIsAllowNull(field.getContainsNull()); + column.addChildrenColumn(c); } } @@ -657,7 +665,8 @@ private void toChildrenThrift(Column column, TColumn tColumn) { tColumn.setChildrenColumn(new ArrayList<>()); setChildrenTColumn(k, tColumn); setChildrenTColumn(v, tColumn); - } else if (column.type.isStructType()) { + } else if (column.type.isStructType() + || (column.type.isVariantType() && !((VariantType) (column.type)).getPredefinedFields().isEmpty())) { List childrenColumns = column.getChildren(); tColumn.setChildrenColumn(new ArrayList<>()); for (Column children : childrenColumns) { @@ -803,15 +812,22 @@ public OlapFile.ColumnPB toPb(Set bfColumns, List indexes) throws Column v = this.getChildren().get(1); builder.addChildrenColumns(v.toPb(Sets.newHashSet(), Lists.newArrayList())); } else if (this.type.isStructType()) { - List childrenColumns = this.getChildren(); - for (Column c : childrenColumns) { - builder.addChildrenColumns(c.toPb(Sets.newHashSet(), Lists.newArrayList())); - } + addChildren(builder); + } else if (this.type.isVariantType()) { + // variant may contain predefined structured fields + addChildren(builder); } OlapFile.ColumnPB col = builder.build(); return col; } + + private void addChildren(OlapFile.ColumnPB.Builder builder) throws DdlException { + List childrenColumns = this.getChildren(); + for (Column c : childrenColumns) { + builder.addChildrenColumns(c.toPb(Sets.newHashSet(), Lists.newArrayList())); + } + } // CLOUD_CODE_END public void checkSchemaChangeAllowed(Column other) throws DdlException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index d5a48f8e1129c68..9618be0c58aec9b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -205,6 +205,9 @@ import org.apache.doris.nereids.DorisParser.UpdateContext; import org.apache.doris.nereids.DorisParser.UserIdentifyContext; import org.apache.doris.nereids.DorisParser.UserVariableContext; +import org.apache.doris.nereids.DorisParser.VariantPredefinedFieldsContext; +import org.apache.doris.nereids.DorisParser.VariantSubColTypeContext; +import org.apache.doris.nereids.DorisParser.VariantSubColTypeListContext; import org.apache.doris.nereids.DorisParser.WhereClauseContext; import org.apache.doris.nereids.DorisParser.WindowFrameContext; import org.apache.doris.nereids.DorisParser.WindowSpecContext; @@ -473,6 +476,7 @@ import org.apache.doris.nereids.types.StructField; import org.apache.doris.nereids.types.StructType; import org.apache.doris.nereids.types.VarcharType; +import org.apache.doris.nereids.types.VariantType; import org.apache.doris.nereids.types.coercion.CharacterType; import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.nereids.util.RelationUtil; @@ -2775,6 +2779,8 @@ public ColumnDefinition visitColumnDef(ColumnDefContext ctx) { ? visitPrimitiveDataType(((PrimitiveDataTypeContext) ctx.type)) : ctx.type instanceof ComplexDataTypeContext ? visitComplexDataType((ComplexDataTypeContext) ctx.type) + : ctx.type instanceof VariantPredefinedFieldsContext + ? visitVariantPredefinedFields((VariantPredefinedFieldsContext) ctx.type) : visitAggStateDataType((AggStateDataTypeContext) ctx.type); colType = colType.conversion(); boolean isKey = ctx.KEY() != null; @@ -3613,6 +3619,30 @@ public DataType visitPrimitiveDataType(PrimitiveDataTypeContext ctx) { }); } + @Override + public DataType visitVariantPredefinedFields(VariantPredefinedFieldsContext ctx) { + return new VariantType(visitVariantSubColTypeList(ctx.variantSubColTypeList())); + } + + @Override + public List visitVariantSubColTypeList(VariantSubColTypeListContext ctx) { + return ctx.variantSubColType().stream().map( + this::visitVariantSubColType).collect(ImmutableList.toImmutableList()); + } + + @Override + public StructField visitVariantSubColType(VariantSubColTypeContext ctx) { + String comment; + if (ctx.commentSpec() != null) { + comment = ctx.commentSpec().STRING_LITERAL().getText(); + comment = LogicalPlanBuilderAssistant.escapeBackSlash(comment.substring(1, comment.length() - 1)); + } else { + comment = ""; + } + return new StructField(ctx.qualifiedName().getText(), + typedVisit(ctx.dataType()), true, comment, true /*name case-sensitive*/); + } + @Override public DataType visitComplexDataType(ComplexDataTypeContext ctx) { return ParserUtils.withOrigin(ctx, () -> { @@ -3643,7 +3673,8 @@ public StructField visitComplexColType(ComplexColTypeContext ctx) { } else { comment = ""; } - return new StructField(ctx.identifier().getText(), typedVisit(ctx.dataType()), true, comment); + return new StructField(ctx.identifier().getText(), + typedVisit(ctx.dataType()), true, comment, false /*name case-insensitive*/); } private String parseConstant(ConstantContext context) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/ColumnDefinition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/ColumnDefinition.java index 184b6d2fa555f61..195f39b79d3d4c9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/ColumnDefinition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/ColumnDefinition.java @@ -25,6 +25,7 @@ import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Type; +import org.apache.doris.catalog.VariantType; import org.apache.doris.common.FeNameFormat; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.types.ArrayType; @@ -639,6 +640,19 @@ private void validateScalarType(ScalarType scalarType) { } break; } + case VARIANT: + ArrayList predefinedFields = + ((org.apache.doris.catalog.VariantType) scalarType).getPredefinedFields(); + Set fieldNames = new HashSet<>(); + for (org.apache.doris.catalog.StructField field : predefinedFields) { + Type fieldType = field.getType(); + validateNestedType(scalarType, fieldType); + if (!fieldNames.add(field.getName())) { + throw new AnalysisException("Duplicate field name " + field.getName() + + " in struct " + scalarType.toSql()); + } + } + break; case INVALID_TYPE: throw new AnalysisException("Invalid type."); default: diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/StructField.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/StructField.java index e095f25aa66bc82..7567d9295a0be91 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/StructField.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/StructField.java @@ -32,19 +32,28 @@ public class StructField { private final boolean nullable; private final String comment; + private final boolean nameCaseSensitive; + /** * StructField Constructor * @param name The name of this field * @param dataType The data type of this field * @param nullable Indicates if values of this field can be `null` values + * @param nameCaseSensitive Indicates if name is case-sensitive */ - public StructField(String name, DataType dataType, boolean nullable, String comment) { - this.name = Objects.requireNonNull(name, "name should not be null").toLowerCase(Locale.ROOT); + public StructField(String name, DataType dataType, boolean nullable, String comment, boolean nameCaseSensitive) { + this.nameCaseSensitive = nameCaseSensitive; + this.name = nameCaseSensitive ? Objects.requireNonNull(name, "name should not be null") + : Objects.requireNonNull(name, "name should not be null").toLowerCase(Locale.ROOT); this.dataType = Objects.requireNonNull(dataType, "dataType should not be null"); this.nullable = nullable; this.comment = Objects.requireNonNull(comment, "comment should not be null"); } + public StructField(String name, DataType dataType, boolean nullable, String comment) { + this(name, dataType, nullable, comment, false); + } + public String getName() { return name; } @@ -69,16 +78,16 @@ public StructField conversion() { } public StructField withDataType(DataType dataType) { - return new StructField(name, dataType, nullable, comment); + return new StructField(name, dataType, nullable, comment, nameCaseSensitive); } public StructField withDataTypeAndNullable(DataType dataType, boolean nullable) { - return new StructField(name, dataType, nullable, comment); + return new StructField(name, dataType, nullable, comment, nameCaseSensitive); } public org.apache.doris.catalog.StructField toCatalogDataType() { return new org.apache.doris.catalog.StructField( - name, dataType.toCatalogDataType(), comment, nullable); + name, dataType.toCatalogDataType(), comment, nullable, nameCaseSensitive); } public String toSql() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java index 63752594998b3a9..2bbaa7982bcf808 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java @@ -18,26 +18,62 @@ package org.apache.doris.nereids.types; import org.apache.doris.catalog.Type; -import org.apache.doris.nereids.annotation.Developing; +import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.types.coercion.PrimitiveType; +import com.google.common.base.Suppliers; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import java.util.Objects; +import java.util.function.Supplier; +import java.util.stream.Collectors; /** * Variant type in Nereids. * Why Variant is not complex type? Since it's nested structure is not pre-defined, then using * primitive type will be easy to handle meta info in FE. + * Also, could predefine some fields of nested columns. + * Example: VARIANT <`a.b`:INT, a.c:DATETIMEV2> + * */ -@Developing public class VariantType extends PrimitiveType { public static final VariantType INSTANCE = new VariantType(); public static final int WIDTH = 24; + private final List predefinedFields; + private final Supplier> pathToFields; + + // No predefined fields + public VariantType() { + predefinedFields = Lists.newArrayList(); + pathToFields = Suppliers.memoize(Maps::newTreeMap); + } + + /** + * Contains predefined fields like struct + */ + public VariantType(List fields) { + this.predefinedFields = ImmutableList.copyOf(Objects.requireNonNull(fields, "fields should not be null")); + this.pathToFields = Suppliers.memoize(() -> this.predefinedFields.stream().collect(ImmutableMap.toImmutableMap( + StructField::getName, f -> f, (f1, f2) -> { + throw new AnalysisException("The name of the struct field cannot be repeated." + + " same name fields are " + f1 + " and " + f2); + }))); + } + @Override public Type toCatalogDataType() { - return Type.VARIANT; + return new org.apache.doris.catalog.VariantType(predefinedFields.stream() + .map(StructField::toCatalogDataType) + .collect(Collectors.toCollection(ArrayList::new))); } @Override @@ -46,8 +82,11 @@ public boolean acceptsType(DataType other) { } @Override - public String simpleString() { - return "variant"; + public String toSql() { + if (predefinedFields.isEmpty()) { + return "VARIANT"; + } + return "VARIANT<" + predefinedFields.stream().map(StructField::toSql).collect(Collectors.joining(",")) + ">"; } @Override @@ -71,11 +110,6 @@ public int width() { return WIDTH; } - @Override - public String toSql() { - return "VARIANT"; - } - @Override public String toString() { return toSql(); diff --git a/regression-test/data/variant_p0/nested.out b/regression-test/data/variant_p0/nested.out index 2c105a68778a0dc..8510505e757bc69 100644 --- a/regression-test/data/variant_p0/nested.out +++ b/regression-test/data/variant_p0/nested.out @@ -157,15 +157,15 @@ v.xx tinyint Yes false \N NONE 10 {"xx":10} -- !sql -- -[] -[{"ba":"11111"}, {"a":"1111"}, {"axxxb":100,"xxxy111":111}, {"aaa":"11","ddsss":1024}, {"xx":10}] -[] -[{"baaa":"11111"}, {"ax1111":"1111"}, {"axxxb":100,"xxxy111":111}, {"aaa":"11","ddsss":1024}, {"xx":10}] -[{"ba":"11111"}, {"a":"1111"}, {"axxxb":100,"xxxy111":111}, {"aaa":"11","ddsss":1024}, {"xx":10}] -[{"mmm":"11111"}, {"ax1111":"1111"}, {"axxxb":100,"xxxy111":111}, {"aaa":"11","ddsss":1024}, {"xx":10}] -[{"ba":"11111"}, {"a":"1111"}, {"axxxb":100,"xxxy111":111}, {"aaa":"11","ddsss":1024}, {"xx":10}] -[{"yyy":"11111"}, {"ax1111":"1111"}, {"axxxb":100,"xxxy111":111}, {"aaa":"11","ddsss":1024}, {"xx":10}] -[{"yyy":"11111"}, {"ax1111":"1111"}, {"axxxb":100,"xxxy111":111}, {"aaa":"11","ddsss":1024}, {"xx":10}] +{} +[{"ba":"11111"},{"a":"1111"},{"axxxb":100,"xxxy111":111},{"aaa":"11","ddsss":1024},{"xx":10}] +{} +[{"baaa":"11111"},{"ax1111":"1111"},{"axxxb":100,"xxxy111":111},{"aaa":"11","ddsss":1024},{"xx":10}] +[{"ba":"11111"},{"a":"1111"},{"axxxb":100,"xxxy111":111},{"aaa":"11","ddsss":1024},{"xx":10}] +[{"mmm":"11111"},{"ax1111":"1111"},{"axxxb":100,"xxxy111":111},{"aaa":"11","ddsss":1024},{"xx":10}] +[{"ba":"11111"},{"a":"1111"},{"axxxb":100,"xxxy111":111},{"aaa":"11","ddsss":1024},{"xx":10}] +[{"yyy":"11111"},{"ax1111":"1111"},{"axxxb":100,"xxxy111":111},{"aaa":"11","ddsss":1024},{"xx":10}] +[{"yyy":"11111"},{"ax1111":"1111"},{"axxxb":100,"xxxy111":111},{"aaa":"11","ddsss":1024},{"xx":10}] -- !explode_sql -- 19 10 diff --git a/regression-test/data/variant_p0/predefine/load.out b/regression-test/data/variant_p0/predefine/load.out new file mode 100644 index 000000000000000..7362407917a5903 --- /dev/null +++ b/regression-test/data/variant_p0/predefine/load.out @@ -0,0 +1,125 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 1 {"a":{"b":{"c":123456,"d":11.111}},"dcm":"123.456000000","dt":"2021-01-01 00:00:00","ip":"127.0.0.1","ss":"199991111"} +2 2 {"a":{"b":{"c":678910,"d":22.222}},"dcm":"456.123000000","dt":"2022-01-01 11:11:11","ip":"127.0.0.1","ss":"29999111"} +3 3 {"dcm":"789.123000000","dt":"2025-01-01 11:11:11","ip":"127.0.0.1"} +4 4 {"a":{"b":{"c":678910,"d":33.222}}} +5 5 {} +6 6 \N +7 7 {"xxx":12345} +8 8 {"yyy":111.111} +9 2 {"a":{"b":{"c":678910,"d":22.222}},"dcm":"456.123000000","dt":"2022-01-01 11:11:11","ip":"127.0.0.1","ss":"29999111"} +10 1 {"a":{"b":{"c":123456,"d":11.111}},"dcm":"123.456000000","dt":"2021-01-01 00:00:00","ip":"127.0.0.1","ss":"199991111"} +11 4 {"a":{"b":{"c":678910,"d":33.222}}} +12 3 {"dcm":"789.123000000","dt":"2025-01-01 11:11:11","ip":"127.0.0.1"} + +-- !sql -- +id bigint No true \N +type varchar(30) Yes false \N NONE +v1 variant Yes false \N NONE +v1.a.b.c int Yes false \N NONE +v1.a.b.d double Yes false \N NONE +v1.dcm decimal(38,0) Yes false \N NONE +v1.dt datetime Yes false \N NONE +v1.ip ipv4 Yes false \N NONE +v1.ss text Yes false \N NONE +v1.xxx smallint Yes false \N NONE +v1.yyy double Yes false \N NONE + +-- !sql -- +127.0.0.1 +127.0.0.1 +127.0.0.1 +127.0.0.1 +127.0.0.1 +127.0.0.1 + +-- !sql -- +123.456000000 +123.456000000 + +-- !sql -- +123.456000000 +456.123000000 +789.123000000 +\N +\N +\N +\N +\N +456.123000000 +123.456000000 +\N +789.123000000 + +-- !sql -- +2022-01-01 11:11:11 +2022-01-01 11:11:11 + +-- !sql -- +2022-01-01 11:11:11 +2022-01-01 11:11:11 + +-- !sql -- +2 2 {"a":{"b":{"c":678910,"d":22.222}},"dcm":"456.123000000","dt":"2022-01-01 11:11:11","ip":"127.0.0.1","ss":"29999111"} +9 2 {"a":{"b":{"c":678910,"d":22.222}},"dcm":"456.123000000","dt":"2022-01-01 11:11:11","ip":"127.0.0.1","ss":"29999111"} + +-- !sql -- +1 1 {"a":{"b":{"c":123456,"d":11.111}},"dcm":"123.456000000","dt":"2021-01-01 00:00:00","ip":"127.0.0.1","ss":"199991111"} +2 2 {"a":{"b":{"c":678910,"d":22.222}},"dcm":"456.123000000","dt":"2022-01-01 11:11:11","ip":"127.0.0.1","ss":"29999111"} +3 3 {"dcm":"789.123000000","dt":"2025-01-01 11:11:11","ip":"127.0.0.1"} +9 2 {"a":{"b":{"c":678910,"d":22.222}},"dcm":"456.123000000","dt":"2022-01-01 11:11:11","ip":"127.0.0.1","ss":"29999111"} +10 1 {"a":{"b":{"c":123456,"d":11.111}},"dcm":"123.456000000","dt":"2021-01-01 00:00:00","ip":"127.0.0.1","ss":"199991111"} +12 3 {"dcm":"789.123000000","dt":"2025-01-01 11:11:11","ip":"127.0.0.1"} + +-- !sql -- +id bigint No true \N +v1 variant Yes false \N NONE +v1.PREDEFINE_COL1 smallint Yes false \N NONE +v1.PREDEFINE_COL2 double Yes false \N NONE +v1.PREDEFINE_COL3 text Yes false \N NONE +v1.PREDEFINE_COL4 text Yes false \N NONE +v1.predefine_col1 smallint Yes false \N NONE +v1.predefine_col2 double Yes false \N NONE +v1.predefine_col3 text Yes false \N NONE +v1.predefine_col4 text Yes false \N NONE + +-- !sql -- +1 {"predefine_col1":1024} +2 {"predefine_col2":1.11111} +3 {"predefine_col3":"11111.00000"} +4 {"predefine_col4":"2020-01-01-01"} +5 {"PREDEFINE_COL1":1024} +6 {"PREDEFINE_COL2":1.11111} +7 {"PREDEFINE_COL3":"11111.00000"} +8 {"PREDEFINE_COL4":"2020-01-01-01"} + +-- !sql -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +4 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !sql -- +101 {"a":1} {"dcm":"1111111"} \N +102 {"a":1} {"dcm":"1111111"} {"dcm":1111111} +103 {"a":1} {"dcm":"1111111"} {"dt":"2021-01-01 11:11:11"} + +-- !sql -- +id bigint No true \N +v1 variant Yes false \N NONE +v2 variant Yes false \N NONE +v3 variant Yes false \N NONE +v1.PREDEFINE_COL1 smallint Yes false \N NONE +v1.PREDEFINE_COL2 double Yes false \N NONE +v1.PREDEFINE_COL3 text Yes false \N NONE +v1.PREDEFINE_COL4 text Yes false \N NONE +v1.a tinyint Yes false \N NONE +v1.predefine_col1 smallint Yes false \N NONE +v1.predefine_col2 double Yes false \N NONE +v1.predefine_col3 text Yes false \N NONE +v1.predefine_col4 text Yes false \N NONE +v2.dcm decimal(9,0) Yes false \N NONE +v3.dcm int Yes false \N NONE +v3.dt datetime Yes false \N NONE + diff --git a/regression-test/data/variant_p0/predefine/sql/q01.out b/regression-test/data/variant_p0/predefine/sql/q01.out new file mode 100644 index 000000000000000..2c5b015c7cd03df --- /dev/null +++ b/regression-test/data/variant_p0/predefine/sql/q01.out @@ -0,0 +1,34 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !q01 -- +0 + +-- !q01_2 -- +id bigint No true \N +v1 variant,array_string:array,array_decimal:array,array_datetime:array,array_datetimev2:array,array_date:array,array_datev2:array,array_ipv4:array,array_ipv6:array,array_float:array,array_boolean:array,int_:int,string_:text,decimal_:decimalv3(27,9),datetime_:datetime,datetimev2_:datetimev2(6),date_:date,datev2_:datev2,ipv4_:ipv4,ipv6_:ipv6,float_:float,boolean_:boolean,varchar_:varchar(65533)> Yes false \N NONE +v1.array_boolean array Yes false \N NONE +v1.array_date array Yes false \N NONE +v1.array_datetime array Yes false \N NONE +v1.array_datetimev2 array Yes false \N NONE +v1.array_datev2 array Yes false \N NONE +v1.array_decimal array Yes false \N NONE +v1.array_float array Yes false \N NONE +v1.array_int array Yes false \N NONE +v1.array_ipv4 array Yes false \N NONE +v1.array_ipv6 array Yes false \N NONE +v1.array_string array Yes false \N NONE +v1.boolean_ boolean Yes false \N NONE +v1.date_ date Yes false \N NONE +v1.datetime_ datetime Yes false \N NONE +v1.datetimev2_ datetime Yes false \N NONE +v1.datev2_ date Yes false \N NONE +v1.decimal_ decimal(38,0) Yes false \N NONE +v1.ext_1 double Yes false \N NONE +v1.ext_2 text Yes false \N NONE +v1.ext_3 array Yes false \N NONE +v1.float_ float Yes false \N NONE +v1.int_ int Yes false \N NONE +v1.ipv4_ ipv4 Yes false \N NONE +v1.ipv6_ ipv6 Yes false \N NONE +v1.string_ text Yes false \N NONE +v1.varchar_ text Yes false \N NONE + diff --git a/regression-test/data/variant_p0/predefine/sql/q02.out b/regression-test/data/variant_p0/predefine/sql/q02.out new file mode 100644 index 000000000000000..df34d03b16352f6 --- /dev/null +++ b/regression-test/data/variant_p0/predefine/sql/q02.out @@ -0,0 +1,103 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !q02 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +4 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +10 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +11 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +12 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +13 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +14 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +15 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_2 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +4 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_3 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +4 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_4 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_5 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_6 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +4 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_7 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_8 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_9 -- +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_10 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_11 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} + +-- !q02_12 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_13 -- +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_14 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_15 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_16 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_17 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_18 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} +4 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"ext_1":1.111111,"ext_2":"this is an extra field","ext_3":[1,2,3],"float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_19 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} + +-- !q02_20 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} + +-- !q02_21 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} +2 {"array_boolean":[1,0,1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[2.222219944000244],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"world hello"} +3 {"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_int":[1,2,3],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","decimal_":"219911111111.011121933","float_":1.1111111640930176,"ipv4_":"127.0.0.1","varchar_":"world hello"} + +-- !q02_22 -- +1 {"array_boolean":[1,0,1],"array_date":["2021-01-01","2022-01-01","2023-01-01"],"array_datetime":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datetimev2":["2021-01-01 00:00:00","2022-01-01 00:00:00","2023-01-01 00:00:00"],"array_datev2":["2021-01-01","2022-01-01","2023-01-01"],"array_decimal":["1.100000000","2.200000000","3.300000000"],"array_float":[1.111109972000122],"array_int":[1,2,3],"array_ipv4":["127.0.0.1","172.0.1.1"],"array_ipv6":["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"],"array_string":["a","b","c"],"boolean_":1,"date_":"2022-01-01","datetime_":"2022-01-01 11:11:11","datetimev2_":"2022-01-01 11:11:11.999999","datev2_":"2022-01-01","decimal_":"188118222.011121920","float_":128.11099243164063,"int_":11111122,"ipv4_":"127.0.0.1","ipv6_":"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe","string_":"12111222113","varchar_":"hello world"} + diff --git a/regression-test/suites/variant_p0/load.groovy b/regression-test/suites/variant_p0/load.groovy index 8f4b9a3cee53fdd..f009ed2fcb4d14e 100644 --- a/regression-test/suites/variant_p0/load.groovy +++ b/regression-test/suites/variant_p0/load.groovy @@ -442,24 +442,19 @@ suite("regression_test_variant", "p0"){ qt_sql "select * from var_as_key order by k" test { - sql """select * from ghdata where cast(v['actor']['url'] as ipv4) = '127.0.0.1'""" - exception("Invalid type for variant column: 36") - } - - test { - sql """ - create table var( - `key` int, - `content` variant - ) - DUPLICATE KEY(`key`) - distributed by hash(`key`) buckets 8 - properties( - "replication_allocation" = "tag.location.default: 1", - "light_schema_change" = "false" - ); - """ - exception("errCode = 2, detailMessage = Variant type rely on light schema change") + sql """ + create table var( + `key` int, + `content` variant + ) + DUPLICATE KEY(`key`) + distributed by hash(`key`) buckets 8 + properties( + "replication_allocation" = "tag.location.default: 1", + "light_schema_change" = "false" + ); + """ + exception("errCode = 2, detailMessage = Variant type rely on light schema change") } } finally { diff --git a/regression-test/suites/variant_p0/nested.groovy b/regression-test/suites/variant_p0/nested.groovy index 90728df25326686..31295c00605a26c 100644 --- a/regression-test/suites/variant_p0/nested.groovy +++ b/regression-test/suites/variant_p0/nested.groovy @@ -21,7 +21,6 @@ suite("regression_test_variant_nested", "p0"){ getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); try { - def table_name = "var_nested" sql "DROP TABLE IF EXISTS ${table_name}" diff --git a/regression-test/suites/variant_p0/predefine/load.groovy b/regression-test/suites/variant_p0/predefine/load.groovy new file mode 100644 index 000000000000000..d895ed7d781979b --- /dev/null +++ b/regression-test/suites/variant_p0/predefine/load.groovy @@ -0,0 +1,217 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant_predefine_schema", "p0"){ + sql """DROP TABLE IF EXISTS test_predefine""" + sql """ + CREATE TABLE `test_predefine` ( + `id` bigint NOT NULL, + `type` varchar(30) NULL, + `v1` variant NULL, + INDEX idx_var_sub(`v1`) USING INVERTED PROPERTIES("parser" = "english", "sub_column_path" = "a.b.c") ) + ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( "replication_allocation" = "tag.location.default: 1"); + """ + + sql """insert into test_predefine values(1, '1', '{"a" : {"b" : {"c" : "123456", "d" : "11.111"}}, "ss" : 199991111, "dcm" : 123.456, "dt" : "2021-01-01 00:00:00", "ip" : "127.0.0.1"}')""" + sql """insert into test_predefine values(2, '2', '{"a" : {"b" : {"c" : 678910, "d" : 22.222}}, "ss" : "29999111", "dcm" : "456.123", "dt" : "2022-01-01 11:11:11", "ip" : "127.0.0.1"}')""" + sql """insert into test_predefine values(3, '3', '{"dcm" : 789.123, "dt" : "2025-01-01 11:11:11.1", "ip" : "127.0.0.1"}')""" + sql """insert into test_predefine values(4, '4', '{"a" : {"b" : {"c" : "678910", "d" : "33.222"}}}')""" + sql """insert into test_predefine values(5, '5', 'null')""" + sql """insert into test_predefine values(6, '6', null)""" + sql """insert into test_predefine values(7, '7', '{"xxx" : 12345}')""" + sql """insert into test_predefine values(8, '8', '{"yyy" : 111.111}')""" + sql """insert into test_predefine values(9, '2', '{"a" : {"b" : {"c" : 678910, "d" : 22.222}}, "ss" : "29999111", "dcm" : "456.123", "dt" : "2022-01-01 11:11:11", "ip" : "127.0.0.1"}')""" + sql """insert into test_predefine values(10, '1', '{"a" : {"b" : {"c" : "123456", "d" : "11.111"}}, "ss" : 199991111, "dcm" : 123.456, "dt" : "2021-01-01 00:00:00", "ip" : "127.0.0.1"}')""" + sql """insert into test_predefine values(12, '3', '{"dcm" : 789.123, "dt" : "2025-01-01 11:11:11.1", "ip" : "127.0.0.1"}')""" + sql """insert into test_predefine values(11, '4', '{"a" : {"b" : {"c" : "678910", "d" : "33.222"}}}')""" + qt_sql """select * from test_predefine order by id""" + sql """set describe_extend_variant_column = true""" + qt_sql "desc test_predefine" + + qt_sql """select cast(v1['ip'] as ipv4) from test_predefine where cast(v1['ip'] as ipv4) = '127.0.0.1';""" + qt_sql """select cast(v1['dcm'] as decimal) from test_predefine where cast(v1['dcm'] as decimal) = '123.456';""" + qt_sql """select v1['dcm'] from test_predefine order by id;""" + qt_sql """select v1['dt'] from test_predefine where cast(v1['dt'] as datetime) = '2022-01-01 11:11:11';""" + qt_sql """select v1['dt'] from test_predefine where cast(v1['dt'] as datetime) = '2022-01-01 11:11:11' order by id limit 10""" + qt_sql """select * from test_predefine where cast(v1['dt'] as datetime) = '2022-01-01 11:11:11' order by id limit 10;""" + qt_sql """select * from test_predefine where v1['dt'] is not null order by id limit 10;""" + + sql """DROP TABLE IF EXISTS test_predefine1""" + sql """ + CREATE TABLE `test_predefine1` ( + `id` bigint NOT NULL, + `v1` variant NULL, + INDEX idx_var_sub(`v1`) USING INVERTED PROPERTIES("parser" = "english", "sub_column_path" = "a.b.c") ) + ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 2 + PROPERTIES ( "replication_allocation" = "tag.location.default: 1"); + """ + sql """insert into test_predefine1 values(1, '{"predefine_col1" : 1024}')""" + sql """insert into test_predefine1 values(2, '{"predefine_col2" : 1.11111}')""" + sql """insert into test_predefine1 values(3, '{"predefine_col3" : "11111.00000"}')""" + sql """insert into test_predefine1 values(4, '{"predefine_col4" : "2020-01-01-01"}')""" + + sql """insert into test_predefine1 values(5, '{"PREDEFINE_COL1" : 1024}')""" + sql """insert into test_predefine1 values(6, '{"PREDEFINE_COL2" : 1.11111}')""" + sql """insert into test_predefine1 values(7, '{"PREDEFINE_COL3" : "11111.00000"}')""" + sql """insert into test_predefine1 values(8, '{"PREDEFINE_COL4" : "2020-01-01-01"}')""" + sql """select * from test_predefine1 order by id limit 1""" + qt_sql """desc test_predefine1""" + qt_sql """select * from test_predefine1 order by id""" + + + // complex types with scalar types + sql "DROP TABLE IF EXISTS test_predefine2" + sql """ + CREATE TABLE `test_predefine2` ( + `id` bigint NOT NULL, + `v1` variant< + array_int:array, + array_string:array, + array_decimal:array, + array_datetime:array, + array_datetimev2:array, + array_date:array, + array_datev2:array, + array_ipv4:array, + array_ipv6:array, + array_float:array, + array_boolean:array, + int_:int, + string_:string, + decimal_:decimalv3(27,9), + datetime_:datetime, + datetimev2_:datetimev2(6), + date_:date, + datev2_:datev2, + ipv4_:ipv4, + ipv6_:ipv6, + float_:float, + boolean_:boolean, + varchar_:varchar + > NULL + ) ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 2 + PROPERTIES ( "replication_allocation" = "tag.location.default: 1"); + """ + def json1 = """ + { + "array_int" : [1, 2, 3], + "array_string" : ["a", "b", "c"], + "array_decimal" : [1.1, 2.2, 3.3], + "array_datetime" : ["2021-01-01 00:00:00", "2022-01-01 00:00:00", "2023-01-01 00:00:00"], + "array_datetimev2" : ["2021-01-01 00:00:00", "2022-01-01 00:00:00", "2023-01-01 00:00:00"], + "array_date" : ["2021-01-01", "2022-01-01", "2023-01-01"], + "array_datev2" : ["2021-01-01", "2022-01-01", "2023-01-01"], + "array_ipv4" : ["127.0.0.1", "172.0.1.1"], + "array_ipv6" : ["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"], + "array_float" : ["1.11111"], + "array_boolean" : [true, false, true], + "int_" : 11111122, + "string_" : 12111222113.0, + "decimal_" : 188118222.011121933, + "datetime_" : "2022-01-01 11:11:11", + "datetimev2_" : "2022-01-01 11:11:11.999999", + "date_" : "2022-01-01", + "datev2_" : "2022-01-01", + "ipv4_" : "127.0.0.1", + "ipv6_" : "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe", + "float_" : "128.111", + "boolean_" : true, + "varchar_" : "hello world" + } + """ + def json2 = """ + { + "array_int" : ["1", "2", 3], + "array_string" : ["a", "b", "c"], + "array_decimal" : [1.1, 2.2, 3.3], + "array_datetime" : ["2021-01-01 00:00:00", "2022-01-01 00:00:00", "2023-01-01 00:00:00"], + "array_datetimev2" : ["2021-01-01 00:00:00", "2022-01-01 00:00:00", "2023-01-01 00:00:00"], + "array_date" : ["2021-01-01", "2022-01-01", "2023-01-01"], + "array_datev2" : ["2021-01-01", "2022-01-01", "2023-01-01"], + "array_ipv4" : ["127.0.0.1", "172.0.1.1"], + "array_ipv6" : ["ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe"], + "array_float" : [2.22222], + "array_boolean" : [1, 0, 1, 0, 1], + "int_" : "3333333333", + "string_" : 12111222113.0, + "decimal_" : "219911111111.011121933", + "datetime_" : "2022-01-01 11:11:11", + "datetimev2_" : "2022-01-01 11:11:11.999999", + "date_" : "2022-01-01", + "datev2_" : "2022-01-01", + "ipv4_" : "127.0.0.1", + "ipv6_" : "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe", + "float_" : 1.111111111, + "boolean_" : true, + "varchar_" : "world hello" + } + """ + def json3 = """ + { + "array_int" : ["1", "2", 3], + "array_string" : ["a", "b", "c"], + "array_datetimev2" : ["2021-01-01 00:00:00", "2022-01-01 00:00:00", "2023-01-01 00:00:00"], + "int_" : "3333333333", + "decimal_" : "219911111111.011121933", + "date_" : "2022-01-01", + "ipv4_" : "127.0.0.1", + "float_" : 1.111111111, + "boolean_" : true, + "varchar_" : "world hello" + } + """ + def json4 = """ + { + "array_int" : ["1", "2", 3], + "array_string" : ["a", "b", "c"], + "array_datetimev2" : ["2021-01-01 00:00:00", "2022-01-01 00:00:00", "2023-01-01 00:00:00"], + "ipv4_" : "127.0.0.1", + "float_" : 1.111111111, + "varchar_" : "world hello", + "ext_1" : 1.111111, + "ext_2" : "this is an extra field", + "ext_3" : [1, 2, 3] + } + """ + sql "insert into test_predefine2 values(1, '${json1}')" + sql "insert into test_predefine2 values(2, '${json2}')" + sql "insert into test_predefine2 values(3, '${json3}')" + sql "insert into test_predefine2 values(4, '${json4}')" + + qt_sql """select * from test_predefine2 order by id""" + + for (int i = 10; i < 100; i++) { + sql "insert into test_predefine2 values(${i}, '${json4}')" + } + + // // schema change + // // 1. add column + sql "alter table test_predefine1 add column v2 variant default null" + sql """insert into test_predefine1 values(101, '{"a" :1}', '{"dcm": 1111111}')""" + sql "alter table test_predefine1 add column v3 variant default null" + sql """insert into test_predefine1 values(102, '{"a" :1}', '{"dcm": 1111111}', '{"dcm": 1111111}');""" + // 2. alter column type + sql "alter table test_predefine1 modify column v3 variant" + sql """insert into test_predefine1 values(103, '{"a" :1}', '{"dcm": 1111111}', '{"dt": "2021-01-01 11:11:11"}');""" + qt_sql """select * from test_predefine1 where id >= 100 order by id""" + // 3. drop column + qt_sql "desc test_predefine1" + sql "alter table test_predefine1 drop column v3" + + sql """insert into test_predefine1 select id, v1, v1 from test_predefine2""" +} \ No newline at end of file diff --git a/regression-test/suites/variant_p0/predefine/sql/q01.sql b/regression-test/suites/variant_p0/predefine/sql/q01.sql new file mode 100644 index 000000000000000..ccc510f2a08689c --- /dev/null +++ b/regression-test/suites/variant_p0/predefine/sql/q01.sql @@ -0,0 +1,2 @@ +set describe_extend_variant_column = true; +desc test_predefine2; \ No newline at end of file diff --git a/regression-test/suites/variant_p0/predefine/sql/q02.sql b/regression-test/suites/variant_p0/predefine/sql/q02.sql new file mode 100644 index 000000000000000..290af22ded6b562 --- /dev/null +++ b/regression-test/suites/variant_p0/predefine/sql/q02.sql @@ -0,0 +1,24 @@ +select * from test_predefine2 order by id limit 10; +select * from test_predefine2 where array_contains(cast(v1['array_int'] as array), 1) order by id limit 4; +select * from test_predefine2 where array_contains(cast(v1['array_string'] as array), 'b') order by id limit 4; +select * from test_predefine2 where array_contains(cast(v1['array_decimal'] as array), 1.1) order by id limit 4; +select * from test_predefine2 where array_contains(cast(v1['array_datetime'] as array), '2021-01-01 00:00:00') order by id limit 4; +select * from test_predefine2 where array_contains(cast(v1['array_datetimev2'] as array), '2021-01-01 00:00:00') order by id limit 4; +select * from test_predefine2 where array_contains(cast(v1['array_date'] as array), '2021-01-01') order by id limit 4; +select * from test_predefine2 where array_contains(cast(v1['array_datev2'] as array), '2021-01-01') order by id limit 4; +-- select * from test_predefine2 where array_contains(cast(v1['array_ipv4'] as array), '127.0.0.1') order by id limit 4; +-- select * from test_predefine2 where array_contains(cast(v1['array_ipv6'] as array), 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe') order by id limit 4; +select * from test_predefine2 where cast(v1['array_float'] as array)[1] >= 1.11111 order by id limit 4; +select * from test_predefine2 where array_contains(cast(v1['array_boolean'] as array), 1) order by id limit 4; +select * from test_predefine2 where cast(v1['int_'] as int) = 11111122 order by id limit 4; +select * from test_predefine2 where cast(v1['string_'] as string) = '12111222113' order by id limit 4; +select * from test_predefine2 where cast(v1['decimal_'] as decimal) >= 188118222.011121933 order by id limit 4; +select * from test_predefine2 where cast(v1['datetime_'] as datetime) = '2022-01-01 11:11:11' order by id limit 4; +select * from test_predefine2 where cast(v1['datetimev2_'] as datetimev2(6)) = '2022-01-01 11:11:11.999999' order by id limit 4; +select * from test_predefine2 where cast(v1['date_'] as date) = '2022-01-01' order by id limit 4; +select * from test_predefine2 where cast(v1['datev2_'] as datev2) = '2022-01-01' order by id limit 4; +select * from test_predefine2 where cast(v1['ipv4_'] as ipv4) = '127.0.0.1' order by id limit 4; +select * from test_predefine2 where cast(v1['ipv6_'] as ipv6) = 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe' order by id limit 4; +select * from test_predefine2 where cast(v1['float_'] as float) >= 128.11 order by id limit 4; +select * from test_predefine2 where cast(v1['boolean_'] as boolean) = 1 order by id limit 4; +select * from test_predefine2 where cast(v1['varchar_'] as varchar) = 'hello world' order by id limit 4; \ No newline at end of file