From 68f496d8da662bfb1062748b5bafb3cc2b6f4142 Mon Sep 17 00:00:00 2001 From: forwardxu Date: Sat, 20 Jul 2024 15:49:55 +0800 Subject: [PATCH 01/18] feat(spec): Add primitive data types --- crates/paimon/Cargo.toml | 2 + crates/paimon/src/spec/mod.rs | 3 + crates/paimon/src/spec/schema.rs | 34 +- crates/paimon/src/spec/types.rs | 979 +++++++++++++++++++++++++++++++ 4 files changed, 986 insertions(+), 32 deletions(-) create mode 100644 crates/paimon/src/spec/types.rs diff --git a/crates/paimon/Cargo.toml b/crates/paimon/Cargo.toml index 6d057d5..4d6cdee 100644 --- a/crates/paimon/Cargo.toml +++ b/crates/paimon/Cargo.toml @@ -27,8 +27,10 @@ license.workspace = true version.workspace = true [dependencies] +bitflags = "2.6.0" chrono = {version = "0.4.38", features = ["serde"]} serde = { version = "1", features = ["derive"] } +serde_json = "1.0" serde_with = "3.8.3" snafu = "0.8.3" typed-builder = "^0.18" diff --git a/crates/paimon/src/spec/mod.rs b/crates/paimon/src/spec/mod.rs index eb25755..fc09dcd 100644 --- a/crates/paimon/src/spec/mod.rs +++ b/crates/paimon/src/spec/mod.rs @@ -27,3 +27,6 @@ pub use schema::*; mod snapshot; pub use snapshot::*; + +mod types; +pub use types::*; diff --git a/crates/paimon/src/spec/schema.rs b/crates/paimon/src/spec/schema.rs index b6a5498..b90df9f 100644 --- a/crates/paimon/src/spec/schema.rs +++ b/crates/paimon/src/spec/schema.rs @@ -15,12 +15,11 @@ // specific language governing permissions and limitations // under the License. -use crate::error::Error; +use crate::spec::types::DataType; use serde::{Deserialize, Serialize}; use serde_with::{serde_as, DisplayFromStr}; use std::collections::HashMap; -use std::fmt::{Display, Formatter}; -use std::str::FromStr; + /// The table schema for paimon table. /// @@ -53,32 +52,3 @@ pub struct DataField { typ: DataType, description: Option, } - -/// Data type for paimon table. -/// -/// Impl Reference: -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct DataType { - is_nullable: bool, - type_root: DataTypeRoot, -} - -impl Display for DataType { - fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result { - todo!() - } -} - -impl FromStr for DataType { - type Err = Error; - - fn from_str(_: &str) -> Result { - todo!() - } -} - -/// The root of data type. -/// -/// Impl Reference: -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum DataTypeRoot {} diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs new file mode 100644 index 0000000..e193aa2 --- /dev/null +++ b/crates/paimon/src/spec/types.rs @@ -0,0 +1,979 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use bitflags::bitflags; +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; + +bitflags! { +/// An enumeration of Data type families for clustering {@link DataTypeRoot}s into categories. +/// +/// Impl Reference: +#[derive(Debug, Clone, PartialEq, Eq)] + pub struct DataTypeFamily: u32 { + const PREDEFINED = 1 << 0; + const CONSTRUCTED = 1 << 1; + const CHARACTER_STRING = 1 << 2; + const BINARY_STRING = 1 << 3; + const NUMERIC = 1 << 4; + const INTEGER_NUMERIC = 1 << 5; + const EXACT_NUMERIC = 1 << 6; + const APPROXIMATE_NUMERIC = 1 << 7; + const DATETIME = 1 << 8; + const TIME = 1 << 9; + const TIMESTAMP = 1 << 10; + const COLLECTION = 1 << 11; + const EXTENSION = 1 << 12; + } +} + +/// The root of data type. +/// +/// Impl Reference: +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub enum DataTypeRoot { + Char, + Varchar, + Boolean, + Binary, + Varbinary, + Decimal, + Tinyint, + Smallint, + Integer, + Bigint, + Float, + Double, + Date, + TimeWithoutTimeZone, + TimestampWithoutTimeZone, + TimestampWithLocalTimeZone, + Array, + Multiset, + Map, + Row, +} + +impl DataTypeRoot { + pub fn families(&self) -> DataTypeFamily { + match self { + Self::Char => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Varchar => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Boolean => DataTypeFamily::PREDEFINED, + Self::Binary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Varbinary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Decimal => { + DataTypeFamily::PREDEFINED | DataTypeFamily::NUMERIC | DataTypeFamily::EXACT_NUMERIC + } + Self::Tinyint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Smallint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Integer => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Bigint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Float => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Double => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Date => DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME, + Self::TimeWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIME + } + Self::TimestampWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIMESTAMP + } + Self::TimestampWithLocalTimeZone => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::DATETIME + | DataTypeFamily::TIMESTAMP + | DataTypeFamily::EXTENSION + } + Self::Array => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Multiset => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Map => DataTypeFamily::CONSTRUCTED | DataTypeFamily::EXTENSION, + Self::Row => DataTypeFamily::CONSTRUCTED, + } + } +} + +/// A visitor that can visit different data types. +pub trait DataTypeVisitor { + fn visit(&mut self, data_type: &DataType) -> R; +} + +/// Data type for paimon table. +/// +/// Impl Reference: +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct DataType { + is_nullable: bool, + type_root: DataTypeRoot, +} + +impl Display for DataType { + fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl FromStr for DataType { + type Err = Error; + + fn from_str(_: &str) -> Result { + todo!() + } +} + +impl DataType { + fn new(is_nullable: bool, type_root: DataTypeRoot) -> Self { + Self { + is_nullable, + type_root, + } + } + + /// Returns true if the data type is nullable. + /// + /// Impl Reference: + fn is_nullable(&self) -> bool { + self.is_nullable + } + + /// Returns the root of the data type. + /// + /// Impl Reference: + fn get_type_root(&self) -> &DataTypeRoot { + &self.type_root + } + + /// Returns the family of the data type. + /// + /// Impl Reference: + fn is(&self, type_root: &DataTypeRoot) -> bool { + &self.type_root == type_root + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: + fn is_with_family(&self, family: DataTypeFamily) -> bool { + self.type_root.families().contains(family) + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: + fn is_any_of(&self, type_roots: &[DataTypeRoot]) -> bool { + type_roots.iter().any(|tr: &DataTypeRoot| self.is(tr)) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: + fn is_any_of_family(&self, families: &[DataTypeFamily]) -> bool { + families + .iter() + .any(|f: &DataTypeFamily| self.is_with_family(f.clone())) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: + fn copy(&self, is_nullable: bool) -> Self { + Self { + is_nullable, + type_root: self.type_root.clone(), + } + } + + /// Returns true if the data type is with the family. + /// Impl Reference: + fn copy_with_nullable(&self) -> Self { + self.copy(self.is_nullable) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: + fn copy_ignore_nullable(&self) -> Self { + self.copy(false) + } + + fn as_sql_string(&self) -> String { + match self.type_root { + DataTypeRoot::Char => "CHAR".to_string(), + DataTypeRoot::Varchar => "VARCHAR".to_string(), + DataTypeRoot::Boolean => "BOOLEAN".to_string(), + DataTypeRoot::Binary => "BINARY".to_string(), + DataTypeRoot::Varbinary => "VARBINARY".to_string(), + DataTypeRoot::Decimal => "DECIMAL".to_string(), + DataTypeRoot::Tinyint => "TINYINT".to_string(), + DataTypeRoot::Smallint => "SMALLINT".to_string(), + DataTypeRoot::Integer => "INTEGER".to_string(), + DataTypeRoot::Bigint => "BIGINT".to_string(), + DataTypeRoot::Float => "FLOAT".to_string(), + DataTypeRoot::Double => "DOUBLE".to_string(), + DataTypeRoot::Date => "DATE".to_string(), + DataTypeRoot::TimeWithoutTimeZone => "TIME".to_string(), + DataTypeRoot::TimestampWithoutTimeZone => "TIMESTAMP".to_string(), + DataTypeRoot::TimestampWithLocalTimeZone => { + "TIMESTAMP WITH LOCAL TIME ZONE".to_string() + } + DataTypeRoot::Array => "ARRAY".to_string(), + DataTypeRoot::Multiset => "MULTISET".to_string(), + DataTypeRoot::Map => "MAP".to_string(), + DataTypeRoot::Row => "ROW".to_string(), + } + } + + fn serialize_json(&self) -> String { + serde_json::to_string(self).unwrap() + } + + fn with_nullability(&self, format: &str, params: &[&str]) -> String { + if !self.is_nullable() { + format!("{} NOT NULL", format!("{}{}", format, params.concat())) + } else { + format!("{}{}", format, params.concat()) + } + } + + fn to_string(&self) -> String { + self.as_sql_string() + } + + fn accept(&self, visitor: &mut T) + where + T: DataTypeVisitor, + { + visitor.visit(self); + } + + fn collect_field_ids(&self, field_ids: &mut Vec) { + } + + fn not_null(&self) -> Self { + self.copy(false) + } + + fn nullable(&self) -> Self { + self.copy(true) + } +} + +/// ArrayType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +#[serde(rename_all = "camelCase")] +pub struct ArrayType { + pub element_type: DataType, +} + +impl ArrayType { + pub const FORMAT: &str = "ARRAY<{}>"; + + pub fn new(is_nullable: bool) -> Self { + Self { + element_type: DataType::new(is_nullable, DataTypeRoot::Array), + } + } + + pub fn default() -> Self { + ArrayType::new(true) + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// BigIntType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct BigIntType { + pub element_type: DataType, +} + +impl BigIntType { + pub fn new(is_nullable: bool) -> Self { + Self { + element_type: DataType::new(is_nullable, DataTypeRoot::Bigint), + } + } + + pub fn default() -> Self { + BigIntType::new(true) + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// BinaryType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +#[serde(rename_all = "camelCase")] +pub struct BinaryType { + pub element_type: DataType, + length: usize, +} + +impl BinaryType { + pub const MIN_LENGTH: usize = 1; + + pub const MAX_LENGTH: usize = isize::MAX as usize; + + pub const DEFAULT_LENGTH: usize = 1; + + pub fn new(is_nullable: bool, length: usize) -> Self { + BinaryType::new_with_result(is_nullable, length).unwrap() + } + + pub fn new_with_result(is_nullable: bool, length: usize) -> Result { + if length < BinaryType::MIN_LENGTH { + Err("Binary string length must be at least 1.") + } else { + Ok(BinaryType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::Binary, + }, + length, + }) + } + } + + pub fn with_length(length: usize) -> Self { + BinaryType::new(true, length) + } + + pub fn default() -> Self { + BinaryType::with_length(BinaryType::DEFAULT_LENGTH) + } + + pub fn get_length(&self) -> usize { + self.length + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// BooleanType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct BooleanType { + pub element_type: DataType, +} + +impl BooleanType { + pub fn new(is_nullable: bool) -> Self { + Self { + element_type: DataType::new(is_nullable, DataTypeRoot::Boolean), + } + } + + pub fn default() -> Self { + BooleanType::new(true) + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// CharType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct CharType { + element_type: DataType, + length: usize, +} + +impl CharType { + pub const DEFAULT_LENGTH: usize = 1; + + pub const MIN_LENGTH: usize = 1; + + pub const MAX_LENGTH: usize = 255; + + pub const FORMAT: &str = "CHAR(%d)"; + + pub fn new(is_nullable: bool, length: usize) -> Self { + CharType::new_with_result(is_nullable, length).unwrap() + } + + pub fn new_with_result(is_nullable: bool, length: usize) -> Result { + if length < Self::MIN_LENGTH || length > Self::MAX_LENGTH { + Err("Character string length must be between 1 and 255 (both inclusive).") + } else { + Ok(CharType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::Char, + }, + length, + }) + } + } + + pub fn with_length(length: usize) -> Self { + CharType::new(true, length) + } + + pub fn default() -> Self { + CharType::with_length(CharType::DEFAULT_LENGTH) + } + + pub fn get_length(&self) -> usize { + self.length + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// DateType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct DateType { + element_type: DataType, +} + +impl DateType { + pub fn new(is_nullable: bool) -> Self { + Self { + element_type: DataType::new(is_nullable, DataTypeRoot::Date), + } + } + + pub fn default() -> Self { + DateType::new(true) + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// DecimalType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct DecimalType { + element_type: DataType, + precision: u32, + scale: u32, +} + +impl DecimalType { + pub const MIN_PRECISION: u32 = 1; + + pub const MAX_PRECISION: u32 = 38; + + pub const DEFAULT_PRECISION: u32 = 10; + + pub const MIN_SCALE: u32 = 0; + + pub const DEFAULT_SCALE: u32 = 0; + + pub fn new(is_nullable: bool, precision: u32, scale: u32) -> Self { + DecimalType::new_with_result(is_nullable, precision, scale).unwrap() + } + + pub fn new_with_result(is_nullable: bool, precision: u32, scale: u32) -> Result { + if precision < Self::MIN_PRECISION || precision > Self::MAX_PRECISION { + return Err(format!( + "Decimal precision must be between {} and {} (both inclusive).", + Self::MIN_PRECISION, + Self::MAX_PRECISION + )); + } + if scale < Self::MIN_SCALE || scale > precision { + return Err(format!( + "Decimal scale must be between {} and the precision {} (both inclusive).", + Self::MIN_SCALE, + precision + )); + } + + Ok(DecimalType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::Decimal, + }, + precision, + scale, + }) + } + + pub fn with_precision_and_scale(precision: u32, scale: u32) -> Self { + DecimalType::new(true, precision, scale) + } + + pub fn default() -> Self { + DecimalType::with_precision_and_scale( + DecimalType::DEFAULT_PRECISION, + DecimalType::DEFAULT_SCALE, + ) + } + + pub fn get_precision(&self) -> u32 { + self.precision + } + + pub fn get_scale(&self) -> u32 { + self.scale + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// DoubleType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct DoubleType { + element_type: DataType, +} + +impl DoubleType { + pub fn new(is_nullable: bool) -> Self { + Self { + element_type: DataType::new(is_nullable, DataTypeRoot::Double), + } + } + + pub fn default() -> Self { + DoubleType::new(true) + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// FloatType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct FloatType { + element_type: DataType, +} + +impl FloatType { + pub fn new(is_nullable: bool) -> Self { + Self { + element_type: DataType::new(is_nullable, DataTypeRoot::Float), + } + } + + pub fn default() -> Self { + FloatType::new(true) + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// IntType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct IntType { + element_type: DataType, +} + +impl IntType { + pub fn new(is_nullable: bool) -> Self { + Self { + element_type: DataType::new(is_nullable, DataTypeRoot::Integer), + } + } + + pub fn default() -> Self { + IntType::new(true) + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// LocalZonedTimestampType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct LocalZonedTimestampType { + element_type: DataType, + precision: u32, +} + +impl LocalZonedTimestampType { + pub const MIN_PRECISION: u32 = TimestampType::MIN_PRECISION; + + pub const MAX_PRECISION: u32 = TimestampType::MAX_PRECISION; + + pub const DEFAULT_PRECISION: u32 = TimestampType::DEFAULT_PRECISION; + + pub fn new(is_nullable: bool, precision: u32) -> Self { + LocalZonedTimestampType::new_with_result(is_nullable, precision).unwrap() + } + + pub fn new_with_result(is_nullable: bool, precision: u32) -> Result { + if precision < Self::MIN_PRECISION || precision > Self::MAX_PRECISION { + return Err(format!( + "Timestamp precision must be between {} and {} (both inclusive).", + Self::MIN_PRECISION, + Self::MAX_PRECISION + )); + } + + Ok(LocalZonedTimestampType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::TimestampWithLocalTimeZone, + }, + precision, + }) + } + + pub fn with_precision(precision: u32) -> Self { + LocalZonedTimestampType::new(true, precision) + } + + pub fn default() -> Self { + LocalZonedTimestampType::with_precision(LocalZonedTimestampType::DEFAULT_PRECISION) + } + + pub fn get_precision(&self) -> u32 { + self.precision + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// Next TODO: MapType、MultisetType、RowType + +/// SmallIntType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct SmallIntType { + element_type: DataType, +} + +impl SmallIntType { + pub fn new(is_nullable: bool) -> Self { + Self { + element_type: DataType::new(is_nullable, DataTypeRoot::Smallint), + } + } + + pub fn default() -> Self { + SmallIntType::new(true) + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// TimeType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct TimeType { + element_type: DataType, + precision: u32, +} + +impl TimeType { + pub const MIN_PRECISION: u32 = 0; + + pub const MAX_PRECISION: u32 = 9; + + pub const DEFAULT_PRECISION: u32 = 0; + + pub fn new(is_nullable: bool, precision: u32) -> Self { + TimeType::new_with_result(is_nullable, precision).unwrap() + } + + pub fn new_with_result(is_nullable: bool, precision: u32) -> Result { + if precision < Self::MIN_PRECISION || precision > Self::MAX_PRECISION { + return Err(format!( + "Time precision must be between {} and {} (both inclusive).", + Self::MIN_PRECISION, + Self::MAX_PRECISION + )); + } + + Ok(TimeType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::TimeWithoutTimeZone, + }, + precision, + }) + } + + pub fn with_precision(precision: u32) -> Self { + TimeType::new(true, precision) + } + + pub fn default() -> Self { + TimeType::with_precision(TimeType::DEFAULT_PRECISION) + } + + pub fn get_precision(&self) -> u32 { + self.precision + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// TimestampType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct TimestampType { + element_type: DataType, + precision: u32, +} + +impl TimestampType { + pub const MIN_PRECISION: u32 = 0; + + pub const MAX_PRECISION: u32 = 9; + + pub const DEFAULT_PRECISION: u32 = 6; + + pub fn new(is_nullable: bool, precision: u32) -> Self { + TimestampType::new_with_result(is_nullable, precision).unwrap() + } + + pub fn new_with_result(is_nullable: bool, precision: u32) -> Result { + if precision < Self::MIN_PRECISION || precision > Self::MAX_PRECISION { + return Err(format!( + "Timestamp precision must be between {} and {} (both inclusive).", + Self::MIN_PRECISION, + Self::MAX_PRECISION + )); + } + + Ok(TimestampType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::TimestampWithoutTimeZone, + }, + precision, + }) + } + + pub fn with_precision(precision: u32) -> Self { + TimestampType::new(true, precision) + } + + pub fn default() -> Self { + TimestampType::with_precision(TimestampType::DEFAULT_PRECISION) + } + + pub fn get_precision(&self) -> u32 { + self.precision + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// TinyIntType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct TinyIntType { + element_type: DataType, +} + +impl TinyIntType { + pub fn new(is_nullable: bool) -> Self { + Self { + element_type: DataType::new(is_nullable, DataTypeRoot::Tinyint), + } + } + + pub fn default() -> Self { + TinyIntType::new(true) + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// VarBinaryType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct VarBinaryType { + element_type: DataType, + length: u32, +} + +impl VarBinaryType { + pub const MIN_LENGTH: u32 = 1; + + pub const MAX_LENGTH: u32 = isize::MAX as u32; + + pub const DEFAULT_LENGTH: u32 = 1; + + pub fn new(is_nullable: bool, length: u32) -> Self { + VarBinaryType::new_with_result(is_nullable, length).unwrap() + } + + pub fn new_with_result(is_nullable: bool, length: u32) -> Result { + if length < VarBinaryType::MIN_LENGTH { + return Err("Binary string length must be at least 1.".to_string()); + } + + Ok(VarBinaryType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::Varbinary, + }, + length, + }) + } + + pub fn with_length(length: u32) -> Self { + VarBinaryType::new(true, length) + } + + pub fn default() -> Self { + VarBinaryType::with_length(VarBinaryType::DEFAULT_LENGTH) + } + + pub fn get_length(&self) -> u32 { + self.length + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +/// VarCharType for paimon. +/// +/// Impl Reference: . +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct VarCharType { + element_type: DataType, + length: u32, +} + +impl VarCharType { + pub const MIN_LENGTH: u32 = 1; + + pub const MAX_LENGTH: u32 = isize::MAX as u32; + + pub const DEFAULT_LENGTH: u32 = 1; + + pub fn new(is_nullable: bool, length: u32) -> Self { + VarCharType::new_with_result(is_nullable, length).unwrap() + } + + pub fn new_with_result(is_nullable: bool, length: u32) -> Result { + if length < VarCharType::MIN_LENGTH || length > VarCharType::MAX_LENGTH { + return Err(format!( + "Character string length must be between {} and {} (both inclusive).", + VarCharType::MIN_LENGTH, + VarCharType::MAX_LENGTH + )); + } + + Ok(VarCharType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::Varchar, + }, + length, + }) + } + + pub fn with_length(length: u32) -> Self { + VarCharType::new(true, length) + } + + pub fn default() -> Self { + VarCharType::with_length(VarCharType::DEFAULT_LENGTH) + } + + pub fn get_length(&self) -> u32 { + self.length + } + + pub fn collect_field_ids(&self, field_ids: &mut Vec) { + self.element_type.collect_field_ids(field_ids); + } +} + +fn main() { + let data_type: DataType = DataType::new(true, DataTypeRoot::Char); + println!("{:?}", data_type); + + let array_type: ArrayType = ArrayType::new(true); +} From 20942160ff94b76cf4bafd40b849d07e8a913a84 Mon Sep 17 00:00:00 2001 From: forwardxu Date: Sat, 20 Jul 2024 15:52:22 +0800 Subject: [PATCH 02/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/schema.rs | 1 - crates/paimon/src/spec/types.rs | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/paimon/src/spec/schema.rs b/crates/paimon/src/spec/schema.rs index b90df9f..7a9b0d0 100644 --- a/crates/paimon/src/spec/schema.rs +++ b/crates/paimon/src/spec/schema.rs @@ -20,7 +20,6 @@ use serde::{Deserialize, Serialize}; use serde_with::{serde_as, DisplayFromStr}; use std::collections::HashMap; - /// The table schema for paimon table. /// /// Impl References: diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index e193aa2..213e0f8 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -286,8 +286,7 @@ impl DataType { visitor.visit(self); } - fn collect_field_ids(&self, field_ids: &mut Vec) { - } + fn collect_field_ids(&self, field_ids: &mut Vec) {} fn not_null(&self) -> Self { self.copy(false) @@ -309,7 +308,7 @@ pub struct ArrayType { impl ArrayType { pub const FORMAT: &str = "ARRAY<{}>"; - + pub fn new(is_nullable: bool) -> Self { Self { element_type: DataType::new(is_nullable, DataTypeRoot::Array), From 06068d790a3499ff22843cd71e46474dd4f99cdc Mon Sep 17 00:00:00 2001 From: forwardxu Date: Sat, 20 Jul 2024 15:56:25 +0800 Subject: [PATCH 03/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index 213e0f8..5a58a8c 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -969,10 +969,3 @@ impl VarCharType { self.element_type.collect_field_ids(field_ids); } } - -fn main() { - let data_type: DataType = DataType::new(true, DataTypeRoot::Char); - println!("{:?}", data_type); - - let array_type: ArrayType = ArrayType::new(true); -} From 5c5db0b5f7f3124bd3a0987d088305b47225a984 Mon Sep 17 00:00:00 2001 From: forwardxu Date: Sat, 20 Jul 2024 15:59:58 +0800 Subject: [PATCH 04/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index 5a58a8c..3264cd5 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -315,7 +315,7 @@ impl ArrayType { } } - pub fn default() -> Self { + pub fn default_value() -> Self { ArrayType::new(true) } @@ -339,7 +339,7 @@ impl BigIntType { } } - pub fn default() -> Self { + pub fn default_value() -> Self { BigIntType::new(true) } @@ -387,7 +387,7 @@ impl BinaryType { BinaryType::new(true, length) } - pub fn default() -> Self { + pub fn default_value() -> Self { BinaryType::with_length(BinaryType::DEFAULT_LENGTH) } @@ -415,7 +415,7 @@ impl BooleanType { } } - pub fn default() -> Self { + pub fn default_value() -> Self { BooleanType::new(true) } @@ -465,7 +465,7 @@ impl CharType { CharType::new(true, length) } - pub fn default() -> Self { + pub fn default_value() -> Self { CharType::with_length(CharType::DEFAULT_LENGTH) } @@ -493,7 +493,7 @@ impl DateType { } } - pub fn default() -> Self { + pub fn default_value() -> Self { DateType::new(true) } @@ -557,7 +557,7 @@ impl DecimalType { DecimalType::new(true, precision, scale) } - pub fn default() -> Self { + pub fn default_value() -> Self { DecimalType::with_precision_and_scale( DecimalType::DEFAULT_PRECISION, DecimalType::DEFAULT_SCALE, @@ -592,7 +592,7 @@ impl DoubleType { } } - pub fn default() -> Self { + pub fn default_value() -> Self { DoubleType::new(true) } @@ -616,7 +616,7 @@ impl FloatType { } } - pub fn default() -> Self { + pub fn default_value() -> Self { FloatType::new(true) } @@ -640,7 +640,7 @@ impl IntType { } } - pub fn default() -> Self { + pub fn default_value() -> Self { IntType::new(true) } @@ -691,7 +691,7 @@ impl LocalZonedTimestampType { LocalZonedTimestampType::new(true, precision) } - pub fn default() -> Self { + pub fn default_value() -> Self { LocalZonedTimestampType::with_precision(LocalZonedTimestampType::DEFAULT_PRECISION) } @@ -721,7 +721,7 @@ impl SmallIntType { } } - pub fn default() -> Self { + pub fn default_value() -> Self { SmallIntType::new(true) } @@ -772,7 +772,7 @@ impl TimeType { TimeType::new(true, precision) } - pub fn default() -> Self { + pub fn default_value() -> Self { TimeType::with_precision(TimeType::DEFAULT_PRECISION) } @@ -827,7 +827,7 @@ impl TimestampType { TimestampType::new(true, precision) } - pub fn default() -> Self { + pub fn default_value() -> Self { TimestampType::with_precision(TimestampType::DEFAULT_PRECISION) } @@ -855,7 +855,7 @@ impl TinyIntType { } } - pub fn default() -> Self { + pub fn default_value() -> Self { TinyIntType::new(true) } @@ -902,7 +902,7 @@ impl VarBinaryType { VarBinaryType::new(true, length) } - pub fn default() -> Self { + pub fn default_value() -> Self { VarBinaryType::with_length(VarBinaryType::DEFAULT_LENGTH) } @@ -957,7 +957,7 @@ impl VarCharType { VarCharType::new(true, length) } - pub fn default() -> Self { + pub fn default_value() -> Self { VarCharType::with_length(VarCharType::DEFAULT_LENGTH) } From 79d15645366722bc4f19dddee23381dc11460d73 Mon Sep 17 00:00:00 2001 From: forwardxu Date: Sat, 20 Jul 2024 17:03:46 +0800 Subject: [PATCH 05/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 103 +++++--------------------------- 1 file changed, 14 insertions(+), 89 deletions(-) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index 3264cd5..0c1e5f9 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -151,8 +151,8 @@ pub struct DataType { } impl Display for DataType { - fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result { - todo!() + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_sql_string()) } } @@ -164,6 +164,7 @@ impl FromStr for DataType { } } +#[allow(dead_code)] impl DataType { fn new(is_nullable: bool, type_root: DataTypeRoot) -> Self { Self { @@ -267,18 +268,14 @@ impl DataType { serde_json::to_string(self).unwrap() } - fn with_nullability(&self, format: &str, params: &[&str]) -> String { + fn with_nullability(&self, _format: &str, params: &[&str]) -> String { if !self.is_nullable() { - format!("{} NOT NULL", format!("{}{}", format, params.concat())) + format!("{}{} NOT NULL", _format, params.concat()) } else { - format!("{}{}", format, params.concat()) + format!("{}{}", _format, params.concat()) } } - fn to_string(&self) -> String { - self.as_sql_string() - } - fn accept(&self, visitor: &mut T) where T: DataTypeVisitor, @@ -286,8 +283,6 @@ impl DataType { visitor.visit(self); } - fn collect_field_ids(&self, field_ids: &mut Vec) {} - fn not_null(&self) -> Self { self.copy(false) } @@ -307,7 +302,6 @@ pub struct ArrayType { } impl ArrayType { - pub const FORMAT: &str = "ARRAY<{}>"; pub fn new(is_nullable: bool) -> Self { Self { @@ -318,10 +312,6 @@ impl ArrayType { pub fn default_value() -> Self { ArrayType::new(true) } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// BigIntType for paimon. @@ -342,10 +332,6 @@ impl BigIntType { pub fn default_value() -> Self { BigIntType::new(true) } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// BinaryType for paimon. @@ -394,10 +380,6 @@ impl BinaryType { pub fn get_length(&self) -> usize { self.length } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// BooleanType for paimon. @@ -418,10 +400,6 @@ impl BooleanType { pub fn default_value() -> Self { BooleanType::new(true) } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// CharType for paimon. @@ -441,14 +419,12 @@ impl CharType { pub const MAX_LENGTH: usize = 255; - pub const FORMAT: &str = "CHAR(%d)"; - pub fn new(is_nullable: bool, length: usize) -> Self { CharType::new_with_result(is_nullable, length).unwrap() } pub fn new_with_result(is_nullable: bool, length: usize) -> Result { - if length < Self::MIN_LENGTH || length > Self::MAX_LENGTH { + if !(Self::MIN_LENGTH..=Self::MAX_LENGTH).contains(&length) { Err("Character string length must be between 1 and 255 (both inclusive).") } else { Ok(CharType { @@ -472,10 +448,6 @@ impl CharType { pub fn get_length(&self) -> usize { self.length } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// DateType for paimon. @@ -496,10 +468,6 @@ impl DateType { pub fn default_value() -> Self { DateType::new(true) } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// DecimalType for paimon. @@ -528,14 +496,15 @@ impl DecimalType { } pub fn new_with_result(is_nullable: bool, precision: u32, scale: u32) -> Result { - if precision < Self::MIN_PRECISION || precision > Self::MAX_PRECISION { + if !(Self::MIN_PRECISION..=Self::MAX_PRECISION).contains(&precision) { return Err(format!( "Decimal precision must be between {} and {} (both inclusive).", Self::MIN_PRECISION, Self::MAX_PRECISION )); } - if scale < Self::MIN_SCALE || scale > precision { + + if !(Self::MIN_SCALE..=precision).contains(&scale) { return Err(format!( "Decimal scale must be between {} and the precision {} (both inclusive).", Self::MIN_SCALE, @@ -571,10 +540,6 @@ impl DecimalType { pub fn get_scale(&self) -> u32 { self.scale } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// DoubleType for paimon. @@ -595,10 +560,6 @@ impl DoubleType { pub fn default_value() -> Self { DoubleType::new(true) } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// FloatType for paimon. @@ -619,10 +580,6 @@ impl FloatType { pub fn default_value() -> Self { FloatType::new(true) } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// IntType for paimon. @@ -643,10 +600,6 @@ impl IntType { pub fn default_value() -> Self { IntType::new(true) } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// LocalZonedTimestampType for paimon. @@ -670,7 +623,7 @@ impl LocalZonedTimestampType { } pub fn new_with_result(is_nullable: bool, precision: u32) -> Result { - if precision < Self::MIN_PRECISION || precision > Self::MAX_PRECISION { + if !(Self::MIN_PRECISION..=Self::MAX_PRECISION).contains(&precision) { return Err(format!( "Timestamp precision must be between {} and {} (both inclusive).", Self::MIN_PRECISION, @@ -698,10 +651,6 @@ impl LocalZonedTimestampType { pub fn get_precision(&self) -> u32 { self.precision } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// Next TODO: MapType、MultisetType、RowType @@ -724,10 +673,6 @@ impl SmallIntType { pub fn default_value() -> Self { SmallIntType::new(true) } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// TimeType for paimon. @@ -751,7 +696,7 @@ impl TimeType { } pub fn new_with_result(is_nullable: bool, precision: u32) -> Result { - if precision < Self::MIN_PRECISION || precision > Self::MAX_PRECISION { + if !(Self::MIN_PRECISION..=Self::MAX_PRECISION).contains(&precision) { return Err(format!( "Time precision must be between {} and {} (both inclusive).", Self::MIN_PRECISION, @@ -779,10 +724,6 @@ impl TimeType { pub fn get_precision(&self) -> u32 { self.precision } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// TimestampType for paimon. @@ -806,7 +747,7 @@ impl TimestampType { } pub fn new_with_result(is_nullable: bool, precision: u32) -> Result { - if precision < Self::MIN_PRECISION || precision > Self::MAX_PRECISION { + if !(Self::MIN_PRECISION..=Self::MAX_PRECISION).contains(&precision) { return Err(format!( "Timestamp precision must be between {} and {} (both inclusive).", Self::MIN_PRECISION, @@ -834,10 +775,6 @@ impl TimestampType { pub fn get_precision(&self) -> u32 { self.precision } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// TinyIntType for paimon. @@ -858,10 +795,6 @@ impl TinyIntType { pub fn default_value() -> Self { TinyIntType::new(true) } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// VarBinaryType for paimon. @@ -909,10 +842,6 @@ impl VarBinaryType { pub fn get_length(&self) -> u32 { self.length } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } /// VarCharType for paimon. @@ -936,7 +865,7 @@ impl VarCharType { } pub fn new_with_result(is_nullable: bool, length: u32) -> Result { - if length < VarCharType::MIN_LENGTH || length > VarCharType::MAX_LENGTH { + if !(Self::MIN_LENGTH..=Self::MAX_LENGTH).contains(&length) { return Err(format!( "Character string length must be between {} and {} (both inclusive).", VarCharType::MIN_LENGTH, @@ -964,8 +893,4 @@ impl VarCharType { pub fn get_length(&self) -> u32 { self.length } - - pub fn collect_field_ids(&self, field_ids: &mut Vec) { - self.element_type.collect_field_ids(field_ids); - } } From 3e1fed962bbf08867ed322c9c8d43a5ff0030ad0 Mon Sep 17 00:00:00 2001 From: forwardxu Date: Sat, 20 Jul 2024 17:05:11 +0800 Subject: [PATCH 06/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index 0c1e5f9..1ef6b70 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -270,7 +270,7 @@ impl DataType { fn with_nullability(&self, _format: &str, params: &[&str]) -> String { if !self.is_nullable() { - format!("{}{} NOT NULL", _format, params.concat()) + format!("{}{} NOT NULL", _format, params.concat()) } else { format!("{}{}", _format, params.concat()) } @@ -302,7 +302,6 @@ pub struct ArrayType { } impl ArrayType { - pub fn new(is_nullable: bool) -> Self { Self { element_type: DataType::new(is_nullable, DataTypeRoot::Array), @@ -503,8 +502,8 @@ impl DecimalType { Self::MAX_PRECISION )); } - - if !(Self::MIN_SCALE..=precision).contains(&scale) { + + if !(Self::MIN_SCALE..=precision).contains(&scale) { return Err(format!( "Decimal scale must be between {} and the precision {} (both inclusive).", Self::MIN_SCALE, @@ -623,7 +622,7 @@ impl LocalZonedTimestampType { } pub fn new_with_result(is_nullable: bool, precision: u32) -> Result { - if !(Self::MIN_PRECISION..=Self::MAX_PRECISION).contains(&precision) { + if !(Self::MIN_PRECISION..=Self::MAX_PRECISION).contains(&precision) { return Err(format!( "Timestamp precision must be between {} and {} (both inclusive).", Self::MIN_PRECISION, @@ -696,7 +695,7 @@ impl TimeType { } pub fn new_with_result(is_nullable: bool, precision: u32) -> Result { - if !(Self::MIN_PRECISION..=Self::MAX_PRECISION).contains(&precision) { + if !(Self::MIN_PRECISION..=Self::MAX_PRECISION).contains(&precision) { return Err(format!( "Time precision must be between {} and {} (both inclusive).", Self::MIN_PRECISION, @@ -747,7 +746,7 @@ impl TimestampType { } pub fn new_with_result(is_nullable: bool, precision: u32) -> Result { - if !(Self::MIN_PRECISION..=Self::MAX_PRECISION).contains(&precision) { + if !(Self::MIN_PRECISION..=Self::MAX_PRECISION).contains(&precision) { return Err(format!( "Timestamp precision must be between {} and {} (both inclusive).", Self::MIN_PRECISION, @@ -865,7 +864,7 @@ impl VarCharType { } pub fn new_with_result(is_nullable: bool, length: u32) -> Result { - if !(Self::MIN_LENGTH..=Self::MAX_LENGTH).contains(&length) { + if !(Self::MIN_LENGTH..=Self::MAX_LENGTH).contains(&length) { return Err(format!( "Character string length must be between {} and {} (both inclusive).", VarCharType::MIN_LENGTH, From bb78dfa2790d9dad1bde94a635091ec67446e31f Mon Sep 17 00:00:00 2001 From: forwardxu Date: Sat, 20 Jul 2024 17:49:47 +0800 Subject: [PATCH 07/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 64 +++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index 1ef6b70..8b1b9dd 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -311,6 +311,10 @@ impl ArrayType { pub fn default_value() -> Self { ArrayType::new(true) } + + pub fn as_sql_string(&self) -> String { + format!("ARRAY<{}>", self.element_type.as_sql_string()) + } } /// BigIntType for paimon. @@ -331,6 +335,10 @@ impl BigIntType { pub fn default_value() -> Self { BigIntType::new(true) } + + pub fn as_sql_string(&self) -> String { + "BIGINT".to_string() + } } /// BinaryType for paimon. @@ -379,6 +387,10 @@ impl BinaryType { pub fn get_length(&self) -> usize { self.length } + + pub fn as_sql_string(&self) -> String { + format!("BINARY({})", self.length) + } } /// BooleanType for paimon. @@ -447,6 +459,10 @@ impl CharType { pub fn get_length(&self) -> usize { self.length } + + pub fn as_sql_string(&self) -> String { + format!("CHAR({})", self.length) + } } /// DateType for paimon. @@ -467,6 +483,10 @@ impl DateType { pub fn default_value() -> Self { DateType::new(true) } + + pub fn as_sql_string(&self) -> String { + "DATE".to_string() + } } /// DecimalType for paimon. @@ -539,6 +559,10 @@ impl DecimalType { pub fn get_scale(&self) -> u32 { self.scale } + + pub fn as_sql_string(&self) -> String { + format!("DECIMAL({}, {})", self.precision, self.scale) + } } /// DoubleType for paimon. @@ -559,6 +583,10 @@ impl DoubleType { pub fn default_value() -> Self { DoubleType::new(true) } + + pub fn as_sql_string(&self) -> String { + "DOUBLE".to_string() + } } /// FloatType for paimon. @@ -579,6 +607,10 @@ impl FloatType { pub fn default_value() -> Self { FloatType::new(true) } + + pub fn as_sql_string(&self) -> String { + "FLOAT".to_string() + } } /// IntType for paimon. @@ -599,6 +631,10 @@ impl IntType { pub fn default_value() -> Self { IntType::new(true) } + + pub fn as_sql_string(&self) -> String { + "INTEGER".to_string() + } } /// LocalZonedTimestampType for paimon. @@ -650,6 +686,10 @@ impl LocalZonedTimestampType { pub fn get_precision(&self) -> u32 { self.precision } + + pub fn as_sql_string(&self) -> String { + format!("TIMESTAMP WITH LOCAL TIME ZONE({})", self.precision) + } } /// Next TODO: MapType、MultisetType、RowType @@ -672,6 +712,10 @@ impl SmallIntType { pub fn default_value() -> Self { SmallIntType::new(true) } + + pub fn as_sql_string(&self) -> String { + "SMALLINT".to_string() + } } /// TimeType for paimon. @@ -723,6 +767,10 @@ impl TimeType { pub fn get_precision(&self) -> u32 { self.precision } + + pub fn as_sql_string(&self) -> String { + format!("TIME({})", self.precision) + } } /// TimestampType for paimon. @@ -774,6 +822,10 @@ impl TimestampType { pub fn get_precision(&self) -> u32 { self.precision } + + pub fn as_sql_string(&self) -> String { + format!("TIMESTAMP({})", self.precision) + } } /// TinyIntType for paimon. @@ -794,6 +846,10 @@ impl TinyIntType { pub fn default_value() -> Self { TinyIntType::new(true) } + + pub fn as_sql_string(&self) -> String { + "TINYINT".to_string() + } } /// VarBinaryType for paimon. @@ -841,6 +897,10 @@ impl VarBinaryType { pub fn get_length(&self) -> u32 { self.length } + + pub fn as_sql_string(&self) -> String { + format!("VARBINARY({})", self.length) + } } /// VarCharType for paimon. @@ -892,4 +952,8 @@ impl VarCharType { pub fn get_length(&self) -> u32 { self.length } + + pub fn as_sql_string(&self) -> String { + format!("VARCHAR({})", self.length) + } } From 18a158a00ce8506fb82968ecfe2ef8a867703e76 Mon Sep 17 00:00:00 2001 From: forwardxu Date: Mon, 22 Jul 2024 10:37:23 +0800 Subject: [PATCH 08/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 77 ++++++++++++++++----------------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index 8b1b9dd..181528d 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -309,7 +309,7 @@ impl ArrayType { } pub fn default_value() -> Self { - ArrayType::new(true) + Self::new(true) } pub fn as_sql_string(&self) -> String { @@ -333,7 +333,7 @@ impl BigIntType { } pub fn default_value() -> Self { - BigIntType::new(true) + Self::new(true) } pub fn as_sql_string(&self) -> String { @@ -359,14 +359,14 @@ impl BinaryType { pub const DEFAULT_LENGTH: usize = 1; pub fn new(is_nullable: bool, length: usize) -> Self { - BinaryType::new_with_result(is_nullable, length).unwrap() + Self::new_with_result(is_nullable, length).unwrap() } pub fn new_with_result(is_nullable: bool, length: usize) -> Result { - if length < BinaryType::MIN_LENGTH { + if length < Self::MIN_LENGTH { Err("Binary string length must be at least 1.") } else { - Ok(BinaryType { + Ok(Self { element_type: DataType { is_nullable, type_root: DataTypeRoot::Binary, @@ -377,11 +377,11 @@ impl BinaryType { } pub fn with_length(length: usize) -> Self { - BinaryType::new(true, length) + Self::new(true, length) } pub fn default_value() -> Self { - BinaryType::with_length(BinaryType::DEFAULT_LENGTH) + Self::with_length(Self::DEFAULT_LENGTH) } pub fn get_length(&self) -> usize { @@ -409,7 +409,7 @@ impl BooleanType { } pub fn default_value() -> Self { - BooleanType::new(true) + Self::new(true) } } @@ -431,7 +431,7 @@ impl CharType { pub const MAX_LENGTH: usize = 255; pub fn new(is_nullable: bool, length: usize) -> Self { - CharType::new_with_result(is_nullable, length).unwrap() + Self::new_with_result(is_nullable, length).unwrap() } pub fn new_with_result(is_nullable: bool, length: usize) -> Result { @@ -449,11 +449,11 @@ impl CharType { } pub fn with_length(length: usize) -> Self { - CharType::new(true, length) + Self::new(true, length) } pub fn default_value() -> Self { - CharType::with_length(CharType::DEFAULT_LENGTH) + Self::with_length(Self::DEFAULT_LENGTH) } pub fn get_length(&self) -> usize { @@ -481,7 +481,7 @@ impl DateType { } pub fn default_value() -> Self { - DateType::new(true) + Self::new(true) } pub fn as_sql_string(&self) -> String { @@ -511,7 +511,7 @@ impl DecimalType { pub const DEFAULT_SCALE: u32 = 0; pub fn new(is_nullable: bool, precision: u32, scale: u32) -> Self { - DecimalType::new_with_result(is_nullable, precision, scale).unwrap() + Self::new_with_result(is_nullable, precision, scale).unwrap() } pub fn new_with_result(is_nullable: bool, precision: u32, scale: u32) -> Result { @@ -542,14 +542,11 @@ impl DecimalType { } pub fn with_precision_and_scale(precision: u32, scale: u32) -> Self { - DecimalType::new(true, precision, scale) + Self::new(true, precision, scale) } pub fn default_value() -> Self { - DecimalType::with_precision_and_scale( - DecimalType::DEFAULT_PRECISION, - DecimalType::DEFAULT_SCALE, - ) + Self::with_precision_and_scale(Self::DEFAULT_PRECISION, Self::DEFAULT_SCALE) } pub fn get_precision(&self) -> u32 { @@ -581,7 +578,7 @@ impl DoubleType { } pub fn default_value() -> Self { - DoubleType::new(true) + Self::new(true) } pub fn as_sql_string(&self) -> String { @@ -605,7 +602,7 @@ impl FloatType { } pub fn default_value() -> Self { - FloatType::new(true) + Self::new(true) } pub fn as_sql_string(&self) -> String { @@ -629,7 +626,7 @@ impl IntType { } pub fn default_value() -> Self { - IntType::new(true) + Self::new(true) } pub fn as_sql_string(&self) -> String { @@ -676,11 +673,11 @@ impl LocalZonedTimestampType { } pub fn with_precision(precision: u32) -> Self { - LocalZonedTimestampType::new(true, precision) + Self::new(true, precision) } pub fn default_value() -> Self { - LocalZonedTimestampType::with_precision(LocalZonedTimestampType::DEFAULT_PRECISION) + Self::with_precision(Self::DEFAULT_PRECISION) } pub fn get_precision(&self) -> u32 { @@ -710,7 +707,7 @@ impl SmallIntType { } pub fn default_value() -> Self { - SmallIntType::new(true) + Self::new(true) } pub fn as_sql_string(&self) -> String { @@ -735,7 +732,7 @@ impl TimeType { pub const DEFAULT_PRECISION: u32 = 0; pub fn new(is_nullable: bool, precision: u32) -> Self { - TimeType::new_with_result(is_nullable, precision).unwrap() + Self::new_with_result(is_nullable, precision).unwrap() } pub fn new_with_result(is_nullable: bool, precision: u32) -> Result { @@ -757,11 +754,11 @@ impl TimeType { } pub fn with_precision(precision: u32) -> Self { - TimeType::new(true, precision) + Self::new(true, precision) } pub fn default_value() -> Self { - TimeType::with_precision(TimeType::DEFAULT_PRECISION) + Self::with_precision(TimeType::DEFAULT_PRECISION) } pub fn get_precision(&self) -> u32 { @@ -790,7 +787,7 @@ impl TimestampType { pub const DEFAULT_PRECISION: u32 = 6; pub fn new(is_nullable: bool, precision: u32) -> Self { - TimestampType::new_with_result(is_nullable, precision).unwrap() + Self::new_with_result(is_nullable, precision).unwrap() } pub fn new_with_result(is_nullable: bool, precision: u32) -> Result { @@ -812,11 +809,11 @@ impl TimestampType { } pub fn with_precision(precision: u32) -> Self { - TimestampType::new(true, precision) + Self::new(true, precision) } pub fn default_value() -> Self { - TimestampType::with_precision(TimestampType::DEFAULT_PRECISION) + Self::with_precision(Self::DEFAULT_PRECISION) } pub fn get_precision(&self) -> u32 { @@ -844,7 +841,7 @@ impl TinyIntType { } pub fn default_value() -> Self { - TinyIntType::new(true) + Self::new(true) } pub fn as_sql_string(&self) -> String { @@ -869,11 +866,11 @@ impl VarBinaryType { pub const DEFAULT_LENGTH: u32 = 1; pub fn new(is_nullable: bool, length: u32) -> Self { - VarBinaryType::new_with_result(is_nullable, length).unwrap() + Self::new_with_result(is_nullable, length).unwrap() } pub fn new_with_result(is_nullable: bool, length: u32) -> Result { - if length < VarBinaryType::MIN_LENGTH { + if length < Self::MIN_LENGTH { return Err("Binary string length must be at least 1.".to_string()); } @@ -887,11 +884,11 @@ impl VarBinaryType { } pub fn with_length(length: u32) -> Self { - VarBinaryType::new(true, length) + Self::new(true, length) } pub fn default_value() -> Self { - VarBinaryType::with_length(VarBinaryType::DEFAULT_LENGTH) + Self::with_length(Self::DEFAULT_LENGTH) } pub fn get_length(&self) -> u32 { @@ -920,15 +917,15 @@ impl VarCharType { pub const DEFAULT_LENGTH: u32 = 1; pub fn new(is_nullable: bool, length: u32) -> Self { - VarCharType::new_with_result(is_nullable, length).unwrap() + Self::new_with_result(is_nullable, length).unwrap() } pub fn new_with_result(is_nullable: bool, length: u32) -> Result { if !(Self::MIN_LENGTH..=Self::MAX_LENGTH).contains(&length) { return Err(format!( "Character string length must be between {} and {} (both inclusive).", - VarCharType::MIN_LENGTH, - VarCharType::MAX_LENGTH + Self::MIN_LENGTH, + Self::MAX_LENGTH )); } @@ -942,11 +939,11 @@ impl VarCharType { } pub fn with_length(length: u32) -> Self { - VarCharType::new(true, length) + Self::new(true, length) } pub fn default_value() -> Self { - VarCharType::with_length(VarCharType::DEFAULT_LENGTH) + Self::with_length(Self::DEFAULT_LENGTH) } pub fn get_length(&self) -> u32 { From edc15f6f52a60a2da34a36a6075cdae0885e691a Mon Sep 17 00:00:00 2001 From: forwardxu Date: Mon, 22 Jul 2024 20:19:58 +0800 Subject: [PATCH 09/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index 181528d..5d3227c 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -152,7 +152,11 @@ pub struct DataType { impl Display for DataType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.as_sql_string()) + if !self.is_nullable() { + write!(f, "{} NOT NULL", self.as_sql_string()) + } else { + write!(f, "{}", self.as_sql_string()) + } } } From 6943c7b1d2562b362e2f562ee2501775e658cd77 Mon Sep 17 00:00:00 2001 From: forwardxu Date: Tue, 23 Jul 2024 13:49:49 +0800 Subject: [PATCH 10/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 147 +++++++++++++++++++------------- 1 file changed, 86 insertions(+), 61 deletions(-) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index 5d3227c..7cdcc1b 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -18,7 +18,7 @@ use crate::error::Error; use bitflags::bitflags; use serde::{Deserialize, Serialize}; -use std::fmt::{Display, Formatter}; +use std::fmt::{Arguments, Display, Formatter}; use std::str::FromStr; bitflags! { @@ -191,37 +191,37 @@ impl DataType { &self.type_root } - /// Returns the family of the data type. + /// Returns whether the root of the type equals to the type_root or not. /// - /// Impl Reference: + /// Impl Reference: fn is(&self, type_root: &DataTypeRoot) -> bool { &self.type_root == type_root } - /// Returns true if the data type is with the family. + /// Returns whether the family type of the type equals to the family or not. /// - /// Impl Reference: + /// Impl Reference: fn is_with_family(&self, family: DataTypeFamily) -> bool { self.type_root.families().contains(family) } - /// Returns true if the data type is with the family. + /// Returns whether the root of the type equals to at least on of the type_roots or not. /// - /// Impl Reference: + /// Impl Reference: fn is_any_of(&self, type_roots: &[DataTypeRoot]) -> bool { type_roots.iter().any(|tr: &DataTypeRoot| self.is(tr)) } - /// Returns true if the data type is with the family. - /// Impl Reference: + /// Returns whether the root of the type is part of at least one family of the families or not. + /// Impl Reference: fn is_any_of_family(&self, families: &[DataTypeFamily]) -> bool { families .iter() .any(|f: &DataTypeFamily| self.is_with_family(f.clone())) } - /// Returns true if the data type is with the family. - /// Impl Reference: + /// Returns a deep copy of this type with possibly different nullability. + /// Impl Reference: fn copy(&self, is_nullable: bool) -> Self { Self { is_nullable, @@ -229,54 +229,27 @@ impl DataType { } } - /// Returns true if the data type is with the family. - /// Impl Reference: + /// Returns a deep copy of this type. It requires an implementation of {@link #copy(boolean)}. + /// Impl Reference: fn copy_with_nullable(&self) -> Self { self.copy(self.is_nullable) } - /// Returns true if the data type is with the family. - /// Impl Reference: + /// Compare two data types without nullable. + /// Impl Reference: fn copy_ignore_nullable(&self) -> Self { self.copy(false) } - fn as_sql_string(&self) -> String { - match self.type_root { - DataTypeRoot::Char => "CHAR".to_string(), - DataTypeRoot::Varchar => "VARCHAR".to_string(), - DataTypeRoot::Boolean => "BOOLEAN".to_string(), - DataTypeRoot::Binary => "BINARY".to_string(), - DataTypeRoot::Varbinary => "VARBINARY".to_string(), - DataTypeRoot::Decimal => "DECIMAL".to_string(), - DataTypeRoot::Tinyint => "TINYINT".to_string(), - DataTypeRoot::Smallint => "SMALLINT".to_string(), - DataTypeRoot::Integer => "INTEGER".to_string(), - DataTypeRoot::Bigint => "BIGINT".to_string(), - DataTypeRoot::Float => "FLOAT".to_string(), - DataTypeRoot::Double => "DOUBLE".to_string(), - DataTypeRoot::Date => "DATE".to_string(), - DataTypeRoot::TimeWithoutTimeZone => "TIME".to_string(), - DataTypeRoot::TimestampWithoutTimeZone => "TIMESTAMP".to_string(), - DataTypeRoot::TimestampWithLocalTimeZone => { - "TIMESTAMP WITH LOCAL TIME ZONE".to_string() - } - DataTypeRoot::Array => "ARRAY".to_string(), - DataTypeRoot::Multiset => "MULTISET".to_string(), - DataTypeRoot::Map => "MAP".to_string(), - DataTypeRoot::Row => "ROW".to_string(), - } - } - fn serialize_json(&self) -> String { serde_json::to_string(self).unwrap() } - fn with_nullability(&self, _format: &str, params: &[&str]) -> String { + fn with_nullability(&self, args: Arguments) -> String { if !self.is_nullable() { - format!("{}{} NOT NULL", _format, params.concat()) + format!("{} NOT NULL", args) } else { - format!("{}{}", _format, params.concat()) + format!("{}", args) } } @@ -294,6 +267,35 @@ impl DataType { fn nullable(&self) -> Self { self.copy(true) } + + fn as_sql_string(&self) -> String { + match self.type_root { + DataTypeRoot::Char => CharType::default_value().as_sql_string(), + DataTypeRoot::Varchar => VarCharType::default_value().as_sql_string(), + DataTypeRoot::Boolean => BooleanType::default_value().as_sql_string(), + DataTypeRoot::Binary => BinaryType::default_value().as_sql_string(), + DataTypeRoot::Varbinary => VarBinaryType::default_value().as_sql_string(), + DataTypeRoot::Decimal => DecimalType::default_value().as_sql_string(), + DataTypeRoot::Tinyint => TinyIntType::default_value().as_sql_string(), + DataTypeRoot::Smallint => SmallIntType::default_value().as_sql_string(), + DataTypeRoot::Integer => IntType::default_value().as_sql_string(), + DataTypeRoot::Bigint => BigIntType::default_value().as_sql_string(), + DataTypeRoot::Float => FloatType::default_value().as_sql_string(), + DataTypeRoot::Double => DoubleType::default_value().as_sql_string(), + DataTypeRoot::Date => DateType::default_value().as_sql_string(), + DataTypeRoot::TimeWithoutTimeZone => TimeType::default_value().as_sql_string(), + DataTypeRoot::TimestampWithoutTimeZone => { + TimestampType::default_value().as_sql_string() + } + DataTypeRoot::TimestampWithLocalTimeZone => { + LocalZonedTimestampType::default_value().as_sql_string() + } + DataTypeRoot::Array => ArrayType::default_value().as_sql_string(), + DataTypeRoot::Multiset => todo!(), + DataTypeRoot::Map => todo!(), + DataTypeRoot::Row => todo!(), + } + } } /// ArrayType for paimon. @@ -317,7 +319,8 @@ impl ArrayType { } pub fn as_sql_string(&self) -> String { - format!("ARRAY<{}>", self.element_type.as_sql_string()) + self.element_type + .with_nullability(format_args!("ARRAY<{}>", self.element_type)) } } @@ -341,7 +344,8 @@ impl BigIntType { } pub fn as_sql_string(&self) -> String { - "BIGINT".to_string() + self.element_type + .with_nullability(format_args!("{}", "BIGINT")) } } @@ -393,7 +397,8 @@ impl BinaryType { } pub fn as_sql_string(&self) -> String { - format!("BINARY({})", self.length) + self.element_type + .with_nullability(format_args!("BINARY({})", self.length)) } } @@ -415,6 +420,11 @@ impl BooleanType { pub fn default_value() -> Self { Self::new(true) } + + pub fn as_sql_string(&self) -> String { + self.element_type + .with_nullability(format_args!("{}", "BOOLEAN")) + } } /// CharType for paimon. @@ -465,7 +475,8 @@ impl CharType { } pub fn as_sql_string(&self) -> String { - format!("CHAR({})", self.length) + self.element_type + .with_nullability(format_args!("CHAR({})", self.length)) } } @@ -489,7 +500,8 @@ impl DateType { } pub fn as_sql_string(&self) -> String { - "DATE".to_string() + self.element_type + .with_nullability(format_args!("{}", "DATE")) } } @@ -562,7 +574,8 @@ impl DecimalType { } pub fn as_sql_string(&self) -> String { - format!("DECIMAL({}, {})", self.precision, self.scale) + self.element_type + .with_nullability(format_args!("DECIMAL({}, {})", self.precision, self.scale)) } } @@ -586,7 +599,8 @@ impl DoubleType { } pub fn as_sql_string(&self) -> String { - "DOUBLE".to_string() + self.element_type + .with_nullability(format_args!("{}", "DOUBLE")) } } @@ -610,7 +624,8 @@ impl FloatType { } pub fn as_sql_string(&self) -> String { - "FLOAT".to_string() + self.element_type + .with_nullability(format_args!("{}", "FLOAT")) } } @@ -634,7 +649,8 @@ impl IntType { } pub fn as_sql_string(&self) -> String { - "INTEGER".to_string() + self.element_type + .with_nullability(format_args!("{}", "INTEGER")) } } @@ -689,7 +705,10 @@ impl LocalZonedTimestampType { } pub fn as_sql_string(&self) -> String { - format!("TIMESTAMP WITH LOCAL TIME ZONE({})", self.precision) + self.element_type.with_nullability(format_args!( + "TIMESTAMP WITH LOCAL TIME ZONE({})", + self.precision + )) } } @@ -715,7 +734,8 @@ impl SmallIntType { } pub fn as_sql_string(&self) -> String { - "SMALLINT".to_string() + self.element_type + .with_nullability(format_args!("{}", "SMALLINT")) } } @@ -770,7 +790,8 @@ impl TimeType { } pub fn as_sql_string(&self) -> String { - format!("TIME({})", self.precision) + self.element_type + .with_nullability(format_args!("TIME({})", self.precision)) } } @@ -825,7 +846,8 @@ impl TimestampType { } pub fn as_sql_string(&self) -> String { - format!("TIMESTAMP({})", self.precision) + self.element_type + .with_nullability(format_args!("TIMESTAMP({})", self.precision)) } } @@ -849,7 +871,8 @@ impl TinyIntType { } pub fn as_sql_string(&self) -> String { - "TINYINT".to_string() + self.element_type + .with_nullability(format_args!("{}", "TINYINT")) } } @@ -900,7 +923,8 @@ impl VarBinaryType { } pub fn as_sql_string(&self) -> String { - format!("VARBINARY({})", self.length) + self.element_type + .with_nullability(format_args!("VARBINARY({})", self.length)) } } @@ -955,6 +979,7 @@ impl VarCharType { } pub fn as_sql_string(&self) -> String { - format!("VARCHAR({})", self.length) + self.element_type + .with_nullability(format_args!("VARCHAR({})", self.length)) } } From e43f41bbb57a535d494d85bf54ca0c7a3719d4a7 Mon Sep 17 00:00:00 2001 From: forwardxu Date: Tue, 23 Jul 2024 22:12:26 +0800 Subject: [PATCH 11/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 323 +++++++++++++++++++------------- 1 file changed, 192 insertions(+), 131 deletions(-) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index 7cdcc1b..cb98d85 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -48,12 +48,12 @@ bitflags! { /// Impl Reference: #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] pub enum DataTypeRoot { - Char, + Char { length: usize }, Varchar, Boolean, - Binary, + Binary { length: usize }, Varbinary, - Decimal, + Decimal { precision: u32, scale: u32 }, Tinyint, Smallint, Integer, @@ -73,12 +73,19 @@ pub enum DataTypeRoot { impl DataTypeRoot { pub fn families(&self) -> DataTypeFamily { match self { - Self::Char => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Char { length: _ } => { + DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING + } Self::Varchar => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, Self::Boolean => DataTypeFamily::PREDEFINED, - Self::Binary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Binary { length: _ } => { + DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING + } Self::Varbinary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, - Self::Decimal => { + Self::Decimal { + precision: _, + scale: _, + } => { DataTypeFamily::PREDEFINED | DataTypeFamily::NUMERIC | DataTypeFamily::EXACT_NUMERIC } Self::Tinyint => { @@ -151,12 +158,8 @@ pub struct DataType { } impl Display for DataType { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - if !self.is_nullable() { - write!(f, "{} NOT NULL", self.as_sql_string()) - } else { - write!(f, "{}", self.as_sql_string()) - } + fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result { + todo!() } } @@ -267,35 +270,6 @@ impl DataType { fn nullable(&self) -> Self { self.copy(true) } - - fn as_sql_string(&self) -> String { - match self.type_root { - DataTypeRoot::Char => CharType::default_value().as_sql_string(), - DataTypeRoot::Varchar => VarCharType::default_value().as_sql_string(), - DataTypeRoot::Boolean => BooleanType::default_value().as_sql_string(), - DataTypeRoot::Binary => BinaryType::default_value().as_sql_string(), - DataTypeRoot::Varbinary => VarBinaryType::default_value().as_sql_string(), - DataTypeRoot::Decimal => DecimalType::default_value().as_sql_string(), - DataTypeRoot::Tinyint => TinyIntType::default_value().as_sql_string(), - DataTypeRoot::Smallint => SmallIntType::default_value().as_sql_string(), - DataTypeRoot::Integer => IntType::default_value().as_sql_string(), - DataTypeRoot::Bigint => BigIntType::default_value().as_sql_string(), - DataTypeRoot::Float => FloatType::default_value().as_sql_string(), - DataTypeRoot::Double => DoubleType::default_value().as_sql_string(), - DataTypeRoot::Date => DateType::default_value().as_sql_string(), - DataTypeRoot::TimeWithoutTimeZone => TimeType::default_value().as_sql_string(), - DataTypeRoot::TimestampWithoutTimeZone => { - TimestampType::default_value().as_sql_string() - } - DataTypeRoot::TimestampWithLocalTimeZone => { - LocalZonedTimestampType::default_value().as_sql_string() - } - DataTypeRoot::Array => ArrayType::default_value().as_sql_string(), - DataTypeRoot::Multiset => todo!(), - DataTypeRoot::Map => todo!(), - DataTypeRoot::Row => todo!(), - } - } } /// ArrayType for paimon. @@ -307,6 +281,16 @@ pub struct ArrayType { pub element_type: DataType, } +impl Display for ArrayType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "ARRAY NOT NULL") + } else { + write!(f, "ARRAY") + } + } +} + impl ArrayType { pub fn new(is_nullable: bool) -> Self { Self { @@ -317,11 +301,6 @@ impl ArrayType { pub fn default_value() -> Self { Self::new(true) } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("ARRAY<{}>", self.element_type)) - } } /// BigIntType for paimon. @@ -332,6 +311,16 @@ pub struct BigIntType { pub element_type: DataType, } +impl Display for BigIntType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "BIGINT NOT NULL") + } else { + write!(f, "BIGINT") + } + } +} + impl BigIntType { pub fn new(is_nullable: bool) -> Self { Self { @@ -342,11 +331,6 @@ impl BigIntType { pub fn default_value() -> Self { Self::new(true) } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("{}", "BIGINT")) - } } /// BinaryType for paimon. @@ -359,6 +343,16 @@ pub struct BinaryType { length: usize, } +impl Display for BinaryType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "BINARY({}) NOT NULL", self.length) + } else { + write!(f, "BINARY({})", self.length) + } + } +} + impl BinaryType { pub const MIN_LENGTH: usize = 1; @@ -377,7 +371,7 @@ impl BinaryType { Ok(Self { element_type: DataType { is_nullable, - type_root: DataTypeRoot::Binary, + type_root: DataTypeRoot::Binary { length }, }, length, }) @@ -395,11 +389,6 @@ impl BinaryType { pub fn get_length(&self) -> usize { self.length } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("BINARY({})", self.length)) - } } /// BooleanType for paimon. @@ -410,6 +399,16 @@ pub struct BooleanType { pub element_type: DataType, } +impl Display for BooleanType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "BOOLEAN NOT NULL") + } else { + write!(f, "BOOLEAN") + } + } +} + impl BooleanType { pub fn new(is_nullable: bool) -> Self { Self { @@ -420,11 +419,6 @@ impl BooleanType { pub fn default_value() -> Self { Self::new(true) } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("{}", "BOOLEAN")) - } } /// CharType for paimon. @@ -437,6 +431,16 @@ pub struct CharType { length: usize, } +impl Display for CharType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "CHAR({}) NOT NULL", self.length) + } else { + write!(f, "CHAR({})", self.length) + } + } +} + impl CharType { pub const DEFAULT_LENGTH: usize = 1; @@ -455,7 +459,7 @@ impl CharType { Ok(CharType { element_type: DataType { is_nullable, - type_root: DataTypeRoot::Char, + type_root: DataTypeRoot::Char { length }, }, length, }) @@ -473,11 +477,6 @@ impl CharType { pub fn get_length(&self) -> usize { self.length } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("CHAR({})", self.length)) - } } /// DateType for paimon. @@ -488,6 +487,16 @@ pub struct DateType { element_type: DataType, } +impl Display for DateType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "DATE NOT NULL") + } else { + write!(f, "DATE") + } + } +} + impl DateType { pub fn new(is_nullable: bool) -> Self { Self { @@ -498,11 +507,6 @@ impl DateType { pub fn default_value() -> Self { Self::new(true) } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("{}", "DATE")) - } } /// DecimalType for paimon. @@ -515,6 +519,16 @@ pub struct DecimalType { scale: u32, } +impl Display for DecimalType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "DECIMAL({}, {}) NOT NULL", self.precision, self.scale) + } else { + write!(f, "DECIMAL({}, {})", self.precision, self.scale) + } + } +} + impl DecimalType { pub const MIN_PRECISION: u32 = 1; @@ -550,7 +564,7 @@ impl DecimalType { Ok(DecimalType { element_type: DataType { is_nullable, - type_root: DataTypeRoot::Decimal, + type_root: DataTypeRoot::Decimal { precision, scale }, }, precision, scale, @@ -572,11 +586,6 @@ impl DecimalType { pub fn get_scale(&self) -> u32 { self.scale } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("DECIMAL({}, {})", self.precision, self.scale)) - } } /// DoubleType for paimon. @@ -587,6 +596,16 @@ pub struct DoubleType { element_type: DataType, } +impl Display for DoubleType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "DOUBLE NOT NULL") + } else { + write!(f, "DOUBLE") + } + } +} + impl DoubleType { pub fn new(is_nullable: bool) -> Self { Self { @@ -597,11 +616,6 @@ impl DoubleType { pub fn default_value() -> Self { Self::new(true) } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("{}", "DOUBLE")) - } } /// FloatType for paimon. @@ -612,6 +626,16 @@ pub struct FloatType { element_type: DataType, } +impl Display for FloatType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "FLOAT NOT NULL") + } else { + write!(f, "FLOAT") + } + } +} + impl FloatType { pub fn new(is_nullable: bool) -> Self { Self { @@ -622,11 +646,6 @@ impl FloatType { pub fn default_value() -> Self { Self::new(true) } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("{}", "FLOAT")) - } } /// IntType for paimon. @@ -637,6 +656,16 @@ pub struct IntType { element_type: DataType, } +impl Display for IntType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "INTEGER NOT NULL") + } else { + write!(f, "INTEGER") + } + } +} + impl IntType { pub fn new(is_nullable: bool) -> Self { Self { @@ -647,11 +676,6 @@ impl IntType { pub fn default_value() -> Self { Self::new(true) } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("{}", "INTEGER")) - } } /// LocalZonedTimestampType for paimon. @@ -663,6 +687,20 @@ pub struct LocalZonedTimestampType { precision: u32, } +impl Display for LocalZonedTimestampType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!( + f, + "TIMESTAMP WITH LOCAL TIME ZONE({}) NOT NULL", + self.precision + ) + } else { + write!(f, "TIMESTAMP WITH LOCAL TIME ZONE({})", self.precision) + } + } +} + impl LocalZonedTimestampType { pub const MIN_PRECISION: u32 = TimestampType::MIN_PRECISION; @@ -703,13 +741,6 @@ impl LocalZonedTimestampType { pub fn get_precision(&self) -> u32 { self.precision } - - pub fn as_sql_string(&self) -> String { - self.element_type.with_nullability(format_args!( - "TIMESTAMP WITH LOCAL TIME ZONE({})", - self.precision - )) - } } /// Next TODO: MapType、MultisetType、RowType @@ -722,6 +753,16 @@ pub struct SmallIntType { element_type: DataType, } +impl Display for SmallIntType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "SMALLINT NOT NULL") + } else { + write!(f, "SMALLINT") + } + } +} + impl SmallIntType { pub fn new(is_nullable: bool) -> Self { Self { @@ -732,11 +773,6 @@ impl SmallIntType { pub fn default_value() -> Self { Self::new(true) } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("{}", "SMALLINT")) - } } /// TimeType for paimon. @@ -748,6 +784,16 @@ pub struct TimeType { precision: u32, } +impl Display for TimeType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "TIME({}) NOT NULL", self.precision) + } else { + write!(f, "TIME({})", self.precision) + } + } +} + impl TimeType { pub const MIN_PRECISION: u32 = 0; @@ -788,11 +834,6 @@ impl TimeType { pub fn get_precision(&self) -> u32 { self.precision } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("TIME({})", self.precision)) - } } /// TimestampType for paimon. @@ -804,6 +845,16 @@ pub struct TimestampType { precision: u32, } +impl Display for TimestampType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "TIMESTAMP({}) NOT NULL", self.precision) + } else { + write!(f, "TIMESTAMP({})", self.precision) + } + } +} + impl TimestampType { pub const MIN_PRECISION: u32 = 0; @@ -844,11 +895,6 @@ impl TimestampType { pub fn get_precision(&self) -> u32 { self.precision } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("TIMESTAMP({})", self.precision)) - } } /// TinyIntType for paimon. @@ -859,6 +905,16 @@ pub struct TinyIntType { element_type: DataType, } +impl Display for TinyIntType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "TINYINT NOT NULL") + } else { + write!(f, "TINYINT") + } + } +} + impl TinyIntType { pub fn new(is_nullable: bool) -> Self { Self { @@ -869,11 +925,6 @@ impl TinyIntType { pub fn default_value() -> Self { Self::new(true) } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("{}", "TINYINT")) - } } /// VarBinaryType for paimon. @@ -885,6 +936,16 @@ pub struct VarBinaryType { length: u32, } +impl Display for VarBinaryType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "VARBINARY({}) NOT NULL", self.length) + } else { + write!(f, "VARBINARY({})", self.length) + } + } +} + impl VarBinaryType { pub const MIN_LENGTH: u32 = 1; @@ -921,11 +982,6 @@ impl VarBinaryType { pub fn get_length(&self) -> u32 { self.length } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("VARBINARY({})", self.length)) - } } /// VarCharType for paimon. @@ -937,6 +993,16 @@ pub struct VarCharType { length: u32, } +impl Display for VarCharType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.element_type.is_nullable() { + write!(f, "VARCHAR({}) NOT NULL", self.length) + } else { + write!(f, "VARCHAR({})", self.length) + } + } +} + impl VarCharType { pub const MIN_LENGTH: u32 = 1; @@ -977,9 +1043,4 @@ impl VarCharType { pub fn get_length(&self) -> u32 { self.length } - - pub fn as_sql_string(&self) -> String { - self.element_type - .with_nullability(format_args!("VARCHAR({})", self.length)) - } } From 3b66d34f8ba4754ed19ca87059c680c1258dd4db Mon Sep 17 00:00:00 2001 From: forwardxu Date: Wed, 24 Jul 2024 11:18:49 +0800 Subject: [PATCH 12/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index cb98d85..8f933ae 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -48,12 +48,12 @@ bitflags! { /// Impl Reference: #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] pub enum DataTypeRoot { - Char { length: usize }, + Char, Varchar, Boolean, - Binary { length: usize }, + Binary, Varbinary, - Decimal { precision: u32, scale: u32 }, + Decimal, Tinyint, Smallint, Integer, @@ -73,19 +73,12 @@ pub enum DataTypeRoot { impl DataTypeRoot { pub fn families(&self) -> DataTypeFamily { match self { - Self::Char { length: _ } => { - DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING - } + Self::Char => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, Self::Varchar => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, Self::Boolean => DataTypeFamily::PREDEFINED, - Self::Binary { length: _ } => { - DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING - } + Self::Binary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, Self::Varbinary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, - Self::Decimal { - precision: _, - scale: _, - } => { + Self::Decimal => { DataTypeFamily::PREDEFINED | DataTypeFamily::NUMERIC | DataTypeFamily::EXACT_NUMERIC } Self::Tinyint => { @@ -245,7 +238,7 @@ impl DataType { } fn serialize_json(&self) -> String { - serde_json::to_string(self).unwrap() + serde_json::to_string(&self.to_string()).unwrap() } fn with_nullability(&self, args: Arguments) -> String { @@ -371,7 +364,7 @@ impl BinaryType { Ok(Self { element_type: DataType { is_nullable, - type_root: DataTypeRoot::Binary { length }, + type_root: DataTypeRoot::Binary, }, length, }) @@ -459,7 +452,7 @@ impl CharType { Ok(CharType { element_type: DataType { is_nullable, - type_root: DataTypeRoot::Char { length }, + type_root: DataTypeRoot::Char, }, length, }) @@ -564,7 +557,7 @@ impl DecimalType { Ok(DecimalType { element_type: DataType { is_nullable, - type_root: DataTypeRoot::Decimal { precision, scale }, + type_root: DataTypeRoot::Decimal, }, precision, scale, From f6cb291db01f93e1ef69ba6340393597aa772dac Mon Sep 17 00:00:00 2001 From: forwardxu Date: Thu, 25 Jul 2024 10:17:17 +0800 Subject: [PATCH 13/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 408 ++++++++++++++++---------------- 1 file changed, 202 insertions(+), 206 deletions(-) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index 8f933ae..40883da 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -18,7 +18,7 @@ use crate::error::Error; use bitflags::bitflags; use serde::{Deserialize, Serialize}; -use std::fmt::{Arguments, Display, Formatter}; +use std::fmt::{Display, Formatter}; use std::str::FromStr; bitflags! { @@ -183,7 +183,7 @@ impl DataType { /// Returns the root of the data type. /// /// Impl Reference: - fn get_type_root(&self) -> &DataTypeRoot { + fn type_root(&self) -> &DataTypeRoot { &self.type_root } @@ -197,7 +197,7 @@ impl DataType { /// Returns whether the family type of the type equals to the family or not. /// /// Impl Reference: - fn is_with_family(&self, family: DataTypeFamily) -> bool { + fn with_family(&self, family: DataTypeFamily) -> bool { self.type_root.families().contains(family) } @@ -213,56 +213,24 @@ impl DataType { fn is_any_of_family(&self, families: &[DataTypeFamily]) -> bool { families .iter() - .any(|f: &DataTypeFamily| self.is_with_family(f.clone())) + .any(|f: &DataTypeFamily| self.with_family(f.clone())) } /// Returns a deep copy of this type with possibly different nullability. /// Impl Reference: - fn copy(&self, is_nullable: bool) -> Self { + fn with_nullable(&self, is_nullable: bool) -> Self { Self { is_nullable, type_root: self.type_root.clone(), } } - /// Returns a deep copy of this type. It requires an implementation of {@link #copy(boolean)}. - /// Impl Reference: - fn copy_with_nullable(&self) -> Self { - self.copy(self.is_nullable) - } - - /// Compare two data types without nullable. - /// Impl Reference: - fn copy_ignore_nullable(&self) -> Self { - self.copy(false) - } - - fn serialize_json(&self) -> String { - serde_json::to_string(&self.to_string()).unwrap() - } - - fn with_nullability(&self, args: Arguments) -> String { - if !self.is_nullable() { - format!("{} NOT NULL", args) - } else { - format!("{}", args) - } - } - fn accept(&self, visitor: &mut T) where T: DataTypeVisitor, { visitor.visit(self); } - - fn not_null(&self) -> Self { - self.copy(false) - } - - fn nullable(&self) -> Self { - self.copy(true) - } } /// ArrayType for paimon. @@ -271,29 +239,31 @@ impl DataType { #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] #[serde(rename_all = "camelCase")] pub struct ArrayType { - pub element_type: DataType, + element_type: DataType, } impl Display for ArrayType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - if !self.element_type.is_nullable() { - write!(f, "ARRAY NOT NULL") - } else { + if self.element_type.is_nullable() { write!(f, "ARRAY") + } else { + write!(f, "ARRAY NOT NULL") } } } +impl Default for ArrayType { + fn default() -> Self { + Self::new(true) + } +} + impl ArrayType { pub fn new(is_nullable: bool) -> Self { Self { element_type: DataType::new(is_nullable, DataTypeRoot::Array), } } - - pub fn default_value() -> Self { - Self::new(true) - } } /// BigIntType for paimon. @@ -301,16 +271,22 @@ impl ArrayType { /// Impl Reference: . #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] pub struct BigIntType { - pub element_type: DataType, + element_type: DataType, } impl Display for BigIntType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "BIGINT")?; if !self.element_type.is_nullable() { - write!(f, "BIGINT NOT NULL") - } else { - write!(f, "BIGINT") + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for BigIntType { + fn default() -> Self { + Self::new(true) } } @@ -320,10 +296,6 @@ impl BigIntType { element_type: DataType::new(is_nullable, DataTypeRoot::Bigint), } } - - pub fn default_value() -> Self { - Self::new(true) - } } /// BinaryType for paimon. @@ -332,53 +304,54 @@ impl BigIntType { #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] #[serde(rename_all = "camelCase")] pub struct BinaryType { - pub element_type: DataType, + element_type: DataType, length: usize, } impl Display for BinaryType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "BINARY({})", self.length)?; if !self.element_type.is_nullable() { - write!(f, "BINARY({}) NOT NULL", self.length) - } else { - write!(f, "BINARY({})", self.length) + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for BinaryType { + fn default() -> Self { + Self::new(true, Self::DEFAULT_LENGTH) } } impl BinaryType { pub const MIN_LENGTH: usize = 1; - pub const MAX_LENGTH: usize = isize::MAX as usize; + pub const MAX_LENGTH: usize = usize::MAX; pub const DEFAULT_LENGTH: usize = 1; pub fn new(is_nullable: bool, length: usize) -> Self { - Self::new_with_result(is_nullable, length).unwrap() + Self::try_new(is_nullable, length).unwrap() } - pub fn new_with_result(is_nullable: bool, length: usize) -> Result { + pub fn try_new(is_nullable: bool, length: usize) -> Result { if length < Self::MIN_LENGTH { - Err("Binary string length must be at least 1.") - } else { - Ok(Self { - element_type: DataType { - is_nullable, - type_root: DataTypeRoot::Binary, - }, - length, - }) + return Err("Binary string length must be at least 1."); } + Ok(Self { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::Binary, + }, + length, + }) } pub fn with_length(length: usize) -> Self { Self::new(true, length) } - pub fn default_value() -> Self { - Self::with_length(Self::DEFAULT_LENGTH) - } - pub fn get_length(&self) -> usize { self.length } @@ -394,11 +367,17 @@ pub struct BooleanType { impl Display for BooleanType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "BOOLEAN")?; if !self.element_type.is_nullable() { - write!(f, "BOOLEAN NOT NULL") - } else { - write!(f, "BOOLEAN") + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for BooleanType { + fn default() -> Self { + Self::new(true) } } @@ -408,10 +387,6 @@ impl BooleanType { element_type: DataType::new(is_nullable, DataTypeRoot::Boolean), } } - - pub fn default_value() -> Self { - Self::new(true) - } } /// CharType for paimon. @@ -426,11 +401,17 @@ pub struct CharType { impl Display for CharType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "CHAR({})", self.length)?; if !self.element_type.is_nullable() { - write!(f, "CHAR({}) NOT NULL", self.length) - } else { - write!(f, "CHAR({})", self.length) + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for CharType { + fn default() -> Self { + Self::with_length(Self::DEFAULT_LENGTH) } } @@ -442,32 +423,27 @@ impl CharType { pub const MAX_LENGTH: usize = 255; pub fn new(is_nullable: bool, length: usize) -> Self { - Self::new_with_result(is_nullable, length).unwrap() + Self::try_new(is_nullable, length).unwrap() } - pub fn new_with_result(is_nullable: bool, length: usize) -> Result { + pub fn try_new(is_nullable: bool, length: usize) -> Result { if !(Self::MIN_LENGTH..=Self::MAX_LENGTH).contains(&length) { - Err("Character string length must be between 1 and 255 (both inclusive).") - } else { - Ok(CharType { - element_type: DataType { - is_nullable, - type_root: DataTypeRoot::Char, - }, - length, - }) + return Err("Character string length must be between 1 and 255 (both inclusive)."); } + Ok(CharType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::Char, + }, + length, + }) } pub fn with_length(length: usize) -> Self { Self::new(true, length) } - pub fn default_value() -> Self { - Self::with_length(Self::DEFAULT_LENGTH) - } - - pub fn get_length(&self) -> usize { + pub fn length(&self) -> usize { self.length } } @@ -482,11 +458,17 @@ pub struct DateType { impl Display for DateType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "DATE")?; if !self.element_type.is_nullable() { - write!(f, "DATE NOT NULL") - } else { - write!(f, "DATE") + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for DateType { + fn default() -> Self { + Self::new(true) } } @@ -496,10 +478,6 @@ impl DateType { element_type: DataType::new(is_nullable, DataTypeRoot::Date), } } - - pub fn default_value() -> Self { - Self::new(true) - } } /// DecimalType for paimon. @@ -514,11 +492,17 @@ pub struct DecimalType { impl Display for DecimalType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "DECIMAL({}, {})", self.precision, self.scale)?; if !self.element_type.is_nullable() { - write!(f, "DECIMAL({}, {}) NOT NULL", self.precision, self.scale) - } else { - write!(f, "DECIMAL({}, {})", self.precision, self.scale) + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for DecimalType { + fn default() -> Self { + Self::with_precision_and_scale(Self::DEFAULT_PRECISION, Self::DEFAULT_SCALE) } } @@ -534,10 +518,10 @@ impl DecimalType { pub const DEFAULT_SCALE: u32 = 0; pub fn new(is_nullable: bool, precision: u32, scale: u32) -> Self { - Self::new_with_result(is_nullable, precision, scale).unwrap() + Self::try_new(is_nullable, precision, scale).unwrap() } - pub fn new_with_result(is_nullable: bool, precision: u32, scale: u32) -> Result { + pub fn try_new(is_nullable: bool, precision: u32, scale: u32) -> Result { if !(Self::MIN_PRECISION..=Self::MAX_PRECISION).contains(&precision) { return Err(format!( "Decimal precision must be between {} and {} (both inclusive).", @@ -568,15 +552,11 @@ impl DecimalType { Self::new(true, precision, scale) } - pub fn default_value() -> Self { - Self::with_precision_and_scale(Self::DEFAULT_PRECISION, Self::DEFAULT_SCALE) - } - - pub fn get_precision(&self) -> u32 { + pub fn precision(&self) -> u32 { self.precision } - pub fn get_scale(&self) -> u32 { + pub fn scale(&self) -> u32 { self.scale } } @@ -591,11 +571,17 @@ pub struct DoubleType { impl Display for DoubleType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "DOUBLE")?; if !self.element_type.is_nullable() { - write!(f, "DOUBLE NOT NULL") - } else { - write!(f, "DOUBLE") + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for DoubleType { + fn default() -> Self { + Self::new(true) } } @@ -605,10 +591,6 @@ impl DoubleType { element_type: DataType::new(is_nullable, DataTypeRoot::Double), } } - - pub fn default_value() -> Self { - Self::new(true) - } } /// FloatType for paimon. @@ -621,11 +603,17 @@ pub struct FloatType { impl Display for FloatType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "FLOAT")?; if !self.element_type.is_nullable() { - write!(f, "FLOAT NOT NULL") - } else { - write!(f, "FLOAT") + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for FloatType { + fn default() -> Self { + Self::new(true) } } @@ -635,10 +623,6 @@ impl FloatType { element_type: DataType::new(is_nullable, DataTypeRoot::Float), } } - - pub fn default_value() -> Self { - Self::new(true) - } } /// IntType for paimon. @@ -651,11 +635,17 @@ pub struct IntType { impl Display for IntType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "INTEGER")?; if !self.element_type.is_nullable() { - write!(f, "INTEGER NOT NULL") - } else { - write!(f, "INTEGER") + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for IntType { + fn default() -> Self { + Self::new(true) } } @@ -665,10 +655,6 @@ impl IntType { element_type: DataType::new(is_nullable, DataTypeRoot::Integer), } } - - pub fn default_value() -> Self { - Self::new(true) - } } /// LocalZonedTimestampType for paimon. @@ -682,15 +668,17 @@ pub struct LocalZonedTimestampType { impl Display for LocalZonedTimestampType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "TIMESTAMP WITH LOCAL TIME ZONE({})", self.precision)?; if !self.element_type.is_nullable() { - write!( - f, - "TIMESTAMP WITH LOCAL TIME ZONE({}) NOT NULL", - self.precision - ) - } else { - write!(f, "TIMESTAMP WITH LOCAL TIME ZONE({})", self.precision) + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for LocalZonedTimestampType { + fn default() -> Self { + Self::with_precision(Self::DEFAULT_PRECISION) } } @@ -702,10 +690,10 @@ impl LocalZonedTimestampType { pub const DEFAULT_PRECISION: u32 = TimestampType::DEFAULT_PRECISION; pub fn new(is_nullable: bool, precision: u32) -> Self { - LocalZonedTimestampType::new_with_result(is_nullable, precision).unwrap() + LocalZonedTimestampType::try_new(is_nullable, precision).unwrap() } - pub fn new_with_result(is_nullable: bool, precision: u32) -> Result { + pub fn try_new(is_nullable: bool, precision: u32) -> Result { if !(Self::MIN_PRECISION..=Self::MAX_PRECISION).contains(&precision) { return Err(format!( "Timestamp precision must be between {} and {} (both inclusive).", @@ -727,11 +715,7 @@ impl LocalZonedTimestampType { Self::new(true, precision) } - pub fn default_value() -> Self { - Self::with_precision(Self::DEFAULT_PRECISION) - } - - pub fn get_precision(&self) -> u32 { + pub fn precision(&self) -> u32 { self.precision } } @@ -748,11 +732,17 @@ pub struct SmallIntType { impl Display for SmallIntType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "SMALLINT")?; if !self.element_type.is_nullable() { - write!(f, "SMALLINT NOT NULL") - } else { - write!(f, "SMALLINT") + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for SmallIntType { + fn default() -> Self { + Self::new(true) } } @@ -762,10 +752,6 @@ impl SmallIntType { element_type: DataType::new(is_nullable, DataTypeRoot::Smallint), } } - - pub fn default_value() -> Self { - Self::new(true) - } } /// TimeType for paimon. @@ -779,11 +765,17 @@ pub struct TimeType { impl Display for TimeType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "TIME({})", self.precision)?; if !self.element_type.is_nullable() { - write!(f, "TIME({}) NOT NULL", self.precision) - } else { - write!(f, "TIME({})", self.precision) + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for TimeType { + fn default() -> Self { + Self::with_precision(TimeType::DEFAULT_PRECISION) } } @@ -795,10 +787,10 @@ impl TimeType { pub const DEFAULT_PRECISION: u32 = 0; pub fn new(is_nullable: bool, precision: u32) -> Self { - Self::new_with_result(is_nullable, precision).unwrap() + Self::try_new(is_nullable, precision).unwrap() } - pub fn new_with_result(is_nullable: bool, precision: u32) -> Result { + pub fn try_new(is_nullable: bool, precision: u32) -> Result { if !(Self::MIN_PRECISION..=Self::MAX_PRECISION).contains(&precision) { return Err(format!( "Time precision must be between {} and {} (both inclusive).", @@ -820,11 +812,7 @@ impl TimeType { Self::new(true, precision) } - pub fn default_value() -> Self { - Self::with_precision(TimeType::DEFAULT_PRECISION) - } - - pub fn get_precision(&self) -> u32 { + pub fn precision(&self) -> u32 { self.precision } } @@ -840,11 +828,17 @@ pub struct TimestampType { impl Display for TimestampType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "TIMESTAMP({})", self.precision)?; if !self.element_type.is_nullable() { - write!(f, "TIMESTAMP({}) NOT NULL", self.precision) - } else { - write!(f, "TIMESTAMP({})", self.precision) + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for TimestampType { + fn default() -> Self { + Self::with_precision(Self::DEFAULT_PRECISION) } } @@ -856,10 +850,10 @@ impl TimestampType { pub const DEFAULT_PRECISION: u32 = 6; pub fn new(is_nullable: bool, precision: u32) -> Self { - Self::new_with_result(is_nullable, precision).unwrap() + Self::try_new(is_nullable, precision).unwrap() } - pub fn new_with_result(is_nullable: bool, precision: u32) -> Result { + pub fn try_new(is_nullable: bool, precision: u32) -> Result { if !(Self::MIN_PRECISION..=Self::MAX_PRECISION).contains(&precision) { return Err(format!( "Timestamp precision must be between {} and {} (both inclusive).", @@ -881,11 +875,7 @@ impl TimestampType { Self::new(true, precision) } - pub fn default_value() -> Self { - Self::with_precision(Self::DEFAULT_PRECISION) - } - - pub fn get_precision(&self) -> u32 { + pub fn precision(&self) -> u32 { self.precision } } @@ -900,11 +890,17 @@ pub struct TinyIntType { impl Display for TinyIntType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "TINYINT")?; if !self.element_type.is_nullable() { - write!(f, "TINYINT NOT NULL") - } else { - write!(f, "TINYINT") + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for TinyIntType { + fn default() -> Self { + Self::new(true) } } @@ -914,10 +910,6 @@ impl TinyIntType { element_type: DataType::new(is_nullable, DataTypeRoot::Tinyint), } } - - pub fn default_value() -> Self { - Self::new(true) - } } /// VarBinaryType for paimon. @@ -931,11 +923,17 @@ pub struct VarBinaryType { impl Display for VarBinaryType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "VARBINARY({})", self.length)?; if !self.element_type.is_nullable() { - write!(f, "VARBINARY({}) NOT NULL", self.length) - } else { - write!(f, "VARBINARY({})", self.length) + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for VarBinaryType { + fn default() -> Self { + Self::with_length(Self::DEFAULT_LENGTH) } } @@ -947,10 +945,10 @@ impl VarBinaryType { pub const DEFAULT_LENGTH: u32 = 1; pub fn new(is_nullable: bool, length: u32) -> Self { - Self::new_with_result(is_nullable, length).unwrap() + Self::try_new(is_nullable, length).unwrap() } - pub fn new_with_result(is_nullable: bool, length: u32) -> Result { + pub fn try_new(is_nullable: bool, length: u32) -> Result { if length < Self::MIN_LENGTH { return Err("Binary string length must be at least 1.".to_string()); } @@ -968,11 +966,7 @@ impl VarBinaryType { Self::new(true, length) } - pub fn default_value() -> Self { - Self::with_length(Self::DEFAULT_LENGTH) - } - - pub fn get_length(&self) -> u32 { + pub fn length(&self) -> u32 { self.length } } @@ -988,11 +982,17 @@ pub struct VarCharType { impl Display for VarCharType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "VARCHAR({})", self.length)?; if !self.element_type.is_nullable() { - write!(f, "VARCHAR({}) NOT NULL", self.length) - } else { - write!(f, "VARCHAR({})", self.length) + write!(f, " NOT NULL")?; } + Ok(()) + } +} + +impl Default for VarCharType { + fn default() -> Self { + Self::with_length(Self::DEFAULT_LENGTH) } } @@ -1004,10 +1004,10 @@ impl VarCharType { pub const DEFAULT_LENGTH: u32 = 1; pub fn new(is_nullable: bool, length: u32) -> Self { - Self::new_with_result(is_nullable, length).unwrap() + Self::try_new(is_nullable, length).unwrap() } - pub fn new_with_result(is_nullable: bool, length: u32) -> Result { + pub fn try_new(is_nullable: bool, length: u32) -> Result { if !(Self::MIN_LENGTH..=Self::MAX_LENGTH).contains(&length) { return Err(format!( "Character string length must be between {} and {} (both inclusive).", @@ -1029,11 +1029,7 @@ impl VarCharType { Self::new(true, length) } - pub fn default_value() -> Self { - Self::with_length(Self::DEFAULT_LENGTH) - } - - pub fn get_length(&self) -> u32 { + pub fn length(&self) -> u32 { self.length } } From 48e3b30934168ffe582c6b14b46330fcc89d5a2f Mon Sep 17 00:00:00 2001 From: forwardxu Date: Thu, 25 Jul 2024 10:19:50 +0800 Subject: [PATCH 14/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index 40883da..e0045a5 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -352,7 +352,7 @@ impl BinaryType { Self::new(true, length) } - pub fn get_length(&self) -> usize { + pub fn length(&self) -> usize { self.length } } From 25479de637cf0c8fc8d7cdae5973d66cde861b16 Mon Sep 17 00:00:00 2001 From: forwardxu Date: Thu, 25 Jul 2024 10:30:27 +0800 Subject: [PATCH 15/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index e0045a5..d97831c 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -210,7 +210,7 @@ impl DataType { /// Returns whether the root of the type is part of at least one family of the families or not. /// Impl Reference: - fn is_any_of_family(&self, families: &[DataTypeFamily]) -> bool { + fn is_any_with_family(&self, families: &[DataTypeFamily]) -> bool { families .iter() .any(|f: &DataTypeFamily| self.with_family(f.clone())) From b302655633f74e9f88478ec6d99ff5e28bbcd223 Mon Sep 17 00:00:00 2001 From: forwardxu Date: Thu, 25 Jul 2024 10:40:09 +0800 Subject: [PATCH 16/18] feat(spec): Add primitive data types --- crates/paimon/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/paimon/Cargo.toml b/crates/paimon/Cargo.toml index 4d6cdee..1e33ce0 100644 --- a/crates/paimon/Cargo.toml +++ b/crates/paimon/Cargo.toml @@ -30,7 +30,6 @@ version.workspace = true bitflags = "2.6.0" chrono = {version = "0.4.38", features = ["serde"]} serde = { version = "1", features = ["derive"] } -serde_json = "1.0" serde_with = "3.8.3" snafu = "0.8.3" typed-builder = "^0.18" From 167a048ea502cab0ad587a6ecef86cf2d85067dc Mon Sep 17 00:00:00 2001 From: forwardxu Date: Thu, 25 Jul 2024 11:51:01 +0800 Subject: [PATCH 17/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index d97831c..85a21cc 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -46,7 +46,7 @@ bitflags! { /// The root of data type. /// /// Impl Reference: -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, Hash)] pub enum DataTypeRoot { Char, Varchar, @@ -144,7 +144,7 @@ pub trait DataTypeVisitor { /// Data type for paimon table. /// /// Impl Reference: -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, Hash)] pub struct DataType { is_nullable: bool, type_root: DataTypeRoot, @@ -221,7 +221,7 @@ impl DataType { fn with_nullable(&self, is_nullable: bool) -> Self { Self { is_nullable, - type_root: self.type_root.clone(), + type_root: self.type_root, } } From f4f84743979461e749559a7a35d3eb4062e85ca8 Mon Sep 17 00:00:00 2001 From: forwardxu Date: Thu, 25 Jul 2024 14:14:41 +0800 Subject: [PATCH 18/18] feat(spec): Add primitive data types --- crates/paimon/src/spec/types.rs | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs index 85a21cc..b44dc2d 100644 --- a/crates/paimon/src/spec/types.rs +++ b/crates/paimon/src/spec/types.rs @@ -25,7 +25,7 @@ bitflags! { /// An enumeration of Data type families for clustering {@link DataTypeRoot}s into categories. /// /// Impl Reference: -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct DataTypeFamily: u32 { const PREDEFINED = 1 << 0; const CONSTRUCTED = 1 << 1; @@ -173,6 +173,15 @@ impl DataType { } } + /// Returns a deep copy of this type with possibly different nullability. + /// Impl Reference: + fn with_nullable(&self, is_nullable: bool) -> Self { + Self { + is_nullable, + type_root: self.type_root, + } + } + /// Returns true if the data type is nullable. /// /// Impl Reference: @@ -197,7 +206,7 @@ impl DataType { /// Returns whether the family type of the type equals to the family or not. /// /// Impl Reference: - fn with_family(&self, family: DataTypeFamily) -> bool { + fn is_family(&self, family: DataTypeFamily) -> bool { self.type_root.families().contains(family) } @@ -210,19 +219,8 @@ impl DataType { /// Returns whether the root of the type is part of at least one family of the families or not. /// Impl Reference: - fn is_any_with_family(&self, families: &[DataTypeFamily]) -> bool { - families - .iter() - .any(|f: &DataTypeFamily| self.with_family(f.clone())) - } - - /// Returns a deep copy of this type with possibly different nullability. - /// Impl Reference: - fn with_nullable(&self, is_nullable: bool) -> Self { - Self { - is_nullable, - type_root: self.type_root, - } + fn is_any_of_family(&self, families: &[DataTypeFamily]) -> bool { + families.iter().any(|f: &DataTypeFamily| self.is_family(*f)) } fn accept(&self, visitor: &mut T)