From 6248a6ccf50eb1f14b6f771ae979361e341cd013 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Mon, 11 Nov 2024 16:06:23 +0800 Subject: [PATCH] feat(index): support SQL to specify inverted index columns (#4929) * feat(index): support building inverted index for the field column Signed-off-by: Zhenchi * feat(index): support SQL to specify inverted index columns Signed-off-by: Zhenchi * test: fix sqlness Signed-off-by: Zhenchi * fix: consider compatibility Signed-off-by: Zhenchi * polish Signed-off-by: Zhenchi * compatibility Signed-off-by: Zhenchi * fix Signed-off-by: Zhenchi * fix: ignore case Signed-off-by: Zhenchi * refactor: reduce dup Signed-off-by: Zhenchi * fix: clippy Signed-off-by: Zhenchi --------- Signed-off-by: Zhenchi --- src/api/src/v1/column_def.rs | 40 +++- src/datatypes/src/schema.rs | 2 +- src/datatypes/src/schema/column_schema.rs | 21 +- src/mito2/src/read/scan_region.rs | 25 +-- src/mito2/src/sst/index.rs | 20 +- src/operator/src/expr_factory.rs | 84 ++++++-- src/query/src/sql/show_create_table.rs | 46 ++-- src/sql/src/lib.rs | 1 - src/sql/src/parsers/create_parser.rs | 204 ++++++++++-------- src/sql/src/statements.rs | 34 ++- src/sql/src/statements/create.rs | 49 +++-- src/store-api/src/metadata.rs | 30 +++ .../standalone/common/show/show_create.result | 70 ++++++ .../standalone/common/show/show_create.sql | 28 +++ 14 files changed, 442 insertions(+), 212 deletions(-) diff --git a/src/api/src/v1/column_def.rs b/src/api/src/v1/column_def.rs index b4d3425215c8..3b150e850234 100644 --- a/src/api/src/v1/column_def.rs +++ b/src/api/src/v1/column_def.rs @@ -16,6 +16,7 @@ use std::collections::HashMap; use datatypes::schema::{ ColumnDefaultConstraint, ColumnSchema, FulltextOptions, COMMENT_KEY, FULLTEXT_KEY, + INVERTED_INDEX_KEY, }; use snafu::ResultExt; @@ -25,6 +26,8 @@ use crate::v1::{ColumnDef, ColumnOptions, SemanticType}; /// Key used to store fulltext options in gRPC column options. const FULLTEXT_GRPC_KEY: &str = "fulltext"; +/// Key used to store inverted index options in gRPC column options. +const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index"; /// Tries to construct a `ColumnSchema` from the given `ColumnDef`. pub fn try_as_column_schema(column_def: &ColumnDef) -> Result { @@ -49,10 +52,13 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result { if !column_def.comment.is_empty() { metadata.insert(COMMENT_KEY.to_string(), column_def.comment.clone()); } - if let Some(options) = column_def.options.as_ref() - && let Some(fulltext) = options.options.get(FULLTEXT_GRPC_KEY) - { - metadata.insert(FULLTEXT_KEY.to_string(), fulltext.to_string()); + if let Some(options) = column_def.options.as_ref() { + if let Some(fulltext) = options.options.get(FULLTEXT_GRPC_KEY) { + metadata.insert(FULLTEXT_KEY.to_string(), fulltext.clone()); + } + if let Some(inverted_index) = options.options.get(INVERTED_INDEX_GRPC_KEY) { + metadata.insert(INVERTED_INDEX_KEY.to_string(), inverted_index.clone()); + } } ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable) @@ -70,7 +76,12 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option Self { + let _ = self + .metadata + .insert(INVERTED_INDEX_KEY.to_string(), value.to_string()); + self + } + + pub fn is_inverted_indexed(&self) -> bool { + self.metadata + .get(INVERTED_INDEX_KEY) + .map(|v| v.eq_ignore_ascii_case("true")) + .unwrap_or(false) + } + + pub fn has_inverted_index_key(&self) -> bool { + self.metadata.contains_key(INVERTED_INDEX_KEY) + } + /// Set default constraint. /// /// If a default constraint exists for the column, this method will diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index 1a7fb29b2e56..04dadf924486 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -393,29 +393,20 @@ impl ScanRegion { .and_then(|c| c.index_cache()) .cloned(); - // TODO(zhongzc): currently we only index tag columns, need to support field columns. - let ignore_column_ids = &self - .version - .options - .index_options - .inverted_index - .ignore_column_ids; - let indexed_column_ids = self - .version - .metadata - .primary_key - .iter() - .filter(|id| !ignore_column_ids.contains(id)) - .copied() - .collect::>(); - InvertedIndexApplierBuilder::new( self.access_layer.region_dir().to_string(), self.access_layer.object_store().clone(), file_cache, index_cache, self.version.metadata.as_ref(), - indexed_column_ids, + self.version.metadata.inverted_indexed_column_ids( + self.version + .options + .index_options + .inverted_index + .ignore_column_ids + .iter(), + ), self.access_layer.puffin_manager_factory().clone(), ) .build(&self.request.filters) diff --git a/src/mito2/src/sst/index.rs b/src/mito2/src/sst/index.rs index f0ee66ab01c3..a4f4ab9e446b 100644 --- a/src/mito2/src/sst/index.rs +++ b/src/mito2/src/sst/index.rs @@ -20,7 +20,6 @@ pub(crate) mod puffin_manager; mod statistics; mod store; -use std::collections::HashSet; use std::num::NonZeroUsize; use common_telemetry::{debug, warn}; @@ -213,28 +212,15 @@ impl<'a> IndexerBuilder<'a> { segment_row_count = row_group_size; } - // TODO(zhongzc): currently we only index tag columns, need to support field columns. - let indexed_column_ids = self - .metadata - .primary_key - .iter() - .filter(|id| { - !self - .index_options - .inverted_index - .ignore_column_ids - .contains(id) - }) - .copied() - .collect::>(); - let indexer = InvertedIndexer::new( self.file_id, self.metadata, self.intermediate_manager.clone(), self.inverted_index_config.mem_threshold_on_create(), segment_row_count, - indexed_column_ids, + self.metadata.inverted_indexed_column_ids( + self.index_options.inverted_index.ignore_column_ids.iter(), + ), ); Some(indexer) diff --git a/src/operator/src/expr_factory.rs b/src/operator/src/expr_factory.rs index 499441603f1b..19f9648719bb 100644 --- a/src/operator/src/expr_factory.rs +++ b/src/operator/src/expr_factory.rs @@ -35,10 +35,10 @@ use query::sql::{ use session::context::QueryContextRef; use session::table_name::table_idents_to_full_name; use snafu::{ensure, OptionExt, ResultExt}; -use sql::ast::{ColumnOption, TableConstraint}; +use sql::ast::ColumnOption; use sql::statements::alter::{AlterTable, AlterTableOperation}; use sql::statements::create::{ - Column as SqlColumn, CreateExternalTable, CreateFlow, CreateTable, CreateView, TIME_INDEX, + Column as SqlColumn, CreateExternalTable, CreateFlow, CreateTable, CreateView, TableConstraint, }; use sql::statements::{ column_to_schema, sql_column_def_to_grpc_column_def, sql_data_type_to_concrete_data_type, @@ -130,8 +130,14 @@ pub(crate) async fn create_external_expr( // expanded form let time_index = find_time_index(&create.constraints)?; let primary_keys = find_primary_keys(&create.columns, &create.constraints)?; - let column_schemas = - columns_to_column_schemas(&create.columns, &time_index, Some(&query_ctx.timezone()))?; + let inverted_index_cols = find_inverted_index_cols(&create.columns, &create.constraints)?; + let column_schemas = columns_to_column_schemas( + &create.columns, + &time_index, + &inverted_index_cols, + &primary_keys, + Some(&query_ctx.timezone()), + )?; (time_index, primary_keys, column_schemas) } else { // inferred form @@ -186,6 +192,7 @@ pub fn create_to_expr( ); let primary_keys = find_primary_keys(&create.columns, &create.constraints)?; + let inverted_index_cols = find_inverted_index_cols(&create.columns, &create.constraints)?; let expr = CreateTableExpr { catalog_name, @@ -196,6 +203,7 @@ pub fn create_to_expr( &create.columns, &time_index, &primary_keys, + &inverted_index_cols, Some(&query_ctx.timezone()), )?, time_index, @@ -304,9 +312,9 @@ fn find_primary_keys( let constraints_pk = constraints .iter() .filter_map(|constraint| match constraint { - TableConstraint::PrimaryKey { - name: _, columns, .. - } => Some(columns.iter().map(|ident| ident.value.clone())), + TableConstraint::PrimaryKey { columns, .. } => { + Some(columns.iter().map(|ident| ident.value.clone())) + } _ => None, }) .flatten() @@ -329,20 +337,9 @@ pub fn find_time_index(constraints: &[TableConstraint]) -> Result { let time_index = constraints .iter() .filter_map(|constraint| match constraint { - TableConstraint::Unique { - name: Some(name), - columns, - .. - } => { - if name.value == TIME_INDEX { - Some(columns.iter().map(|ident| &ident.value)) - } else { - None - } - } + TableConstraint::TimeIndex { column, .. } => Some(&column.value), _ => None, }) - .flatten() .collect::>(); ensure!( time_index.len() == 1, @@ -353,25 +350,70 @@ pub fn find_time_index(constraints: &[TableConstraint]) -> Result { Ok(time_index.first().unwrap().to_string()) } +/// Finds the inverted index columns from the constraints. If no inverted index +/// columns are provided in the constraints, return `None`. +fn find_inverted_index_cols( + columns: &[SqlColumn], + constraints: &[TableConstraint], +) -> Result>> { + let inverted_index_cols = constraints.iter().find_map(|constraint| { + if let TableConstraint::InvertedIndex { columns } = constraint { + Some( + columns + .iter() + .map(|ident| ident.value.clone()) + .collect::>(), + ) + } else { + None + } + }); + + let Some(inverted_index_cols) = inverted_index_cols else { + return Ok(None); + }; + + for col in &inverted_index_cols { + if !columns.iter().any(|c| c.name().value == *col) { + return InvalidSqlSnafu { + err_msg: format!("inverted index column `{}` not found in column list", col), + } + .fail(); + } + } + + Ok(Some(inverted_index_cols)) +} + fn columns_to_expr( column_defs: &[SqlColumn], time_index: &str, primary_keys: &[String], + invereted_index_cols: &Option>, timezone: Option<&Timezone>, ) -> Result> { - let column_schemas = columns_to_column_schemas(column_defs, time_index, timezone)?; + let column_schemas = columns_to_column_schemas( + column_defs, + time_index, + invereted_index_cols, + primary_keys, + timezone, + )?; column_schemas_to_defs(column_schemas, primary_keys) } fn columns_to_column_schemas( columns: &[SqlColumn], time_index: &str, + invereted_index_cols: &Option>, + primary_keys: &[String], timezone: Option<&Timezone>, ) -> Result> { columns .iter() .map(|c| { - column_to_schema(c, c.name().to_string() == time_index, timezone).context(ParseSqlSnafu) + column_to_schema(c, time_index, invereted_index_cols, primary_keys, timezone) + .context(ParseSqlSnafu) }) .collect::>>() } diff --git a/src/query/src/sql/show_create_table.rs b/src/query/src/sql/show_create_table.rs index 5e6accc4e458..29cd94766611 100644 --- a/src/query/src/sql/show_create_table.rs +++ b/src/query/src/sql/show_create_table.rs @@ -20,15 +20,12 @@ use common_meta::SchemaOptions; use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, SchemaRef, COMMENT_KEY}; use humantime::format_duration; use snafu::ResultExt; -use sql::ast::{ - ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName, TableConstraint, -}; +use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName}; use sql::dialect::GreptimeDbDialect; use sql::parser::ParserContext; -use sql::statements::create::{Column, ColumnExtensions, CreateTable, TIME_INDEX}; +use sql::statements::create::{Column, ColumnExtensions, CreateTable, TableConstraint}; use sql::statements::{self, OptionMap}; use sql::{COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE}; -use sqlparser::ast::KeyOrIndexDisplay; use store_api::metric_engine_consts::{is_metric_engine, is_metric_engine_internal_column}; use table::metadata::{TableInfoRef, TableMeta}; use table::requests::{FILE_TABLE_META_KEY, TTL_KEY, WRITE_BUFFER_SIZE_KEY}; @@ -140,14 +137,8 @@ fn create_table_constraints( let mut constraints = Vec::with_capacity(2); if let Some(timestamp_column) = schema.timestamp_column() { let column_name = ×tamp_column.name; - constraints.push(TableConstraint::Unique { - name: Some(TIME_INDEX.into()), - columns: vec![Ident::with_quote(quote_style, column_name)], - characteristics: None, - index_name: None, - index_type_display: KeyOrIndexDisplay::None, - index_type: None, - index_options: vec![], + constraints.push(TableConstraint::TimeIndex { + column: Ident::with_quote(quote_style, column_name), }); } if !table_meta.primary_key_indices.is_empty() { @@ -162,13 +153,22 @@ fn create_table_constraints( } }) .collect(); - constraints.push(TableConstraint::PrimaryKey { - name: None, - columns, - characteristics: None, - index_name: None, - index_type: None, - index_options: vec![], + constraints.push(TableConstraint::PrimaryKey { columns }); + } + + let inverted_index_set = schema + .column_schemas() + .iter() + .any(|c| c.has_inverted_index_key()); + if inverted_index_set { + let inverted_index_cols = schema + .column_schemas() + .iter() + .filter(|c| c.is_inverted_indexed()) + .map(|c| Ident::with_quote(quote_style, &c.name)) + .collect::>(); + constraints.push(TableConstraint::InvertedIndex { + columns: inverted_index_cols, }); } @@ -229,7 +229,8 @@ mod tests { fn test_show_create_table_sql() { let schema = vec![ ColumnSchema::new("id", ConcreteDataType::uint32_datatype(), true), - ColumnSchema::new("host", ConcreteDataType::string_datatype(), true), + ColumnSchema::new("host", ConcreteDataType::string_datatype(), true) + .set_inverted_index(true), ColumnSchema::new("cpu", ConcreteDataType::float64_datatype(), true), ColumnSchema::new("disk", ConcreteDataType::float32_datatype(), true), ColumnSchema::new("msg", ConcreteDataType::string_datatype(), true) @@ -295,7 +296,8 @@ CREATE TABLE IF NOT EXISTS "system_metrics" ( "msg" STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'false'), "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), TIME INDEX ("ts"), - PRIMARY KEY ("id", "host") + PRIMARY KEY ("id", "host"), + INVERTED INDEX ("host") ) ENGINE=mito "#, diff --git a/src/sql/src/lib.rs b/src/sql/src/lib.rs index 47fcf72cb7a7..117c8b8f5014 100644 --- a/src/sql/src/lib.rs +++ b/src/sql/src/lib.rs @@ -28,4 +28,3 @@ pub use parsers::create_parser::{ COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, ENGINE, MAXVALUE, }; pub use parsers::tql_parser::TQL; -pub use statements::create::TIME_INDEX; diff --git a/src/sql/src/parsers/create_parser.rs b/src/sql/src/parsers/create_parser.rs index 764c93573dc0..c05a7b9711cd 100644 --- a/src/sql/src/parsers/create_parser.rs +++ b/src/sql/src/parsers/create_parser.rs @@ -20,7 +20,7 @@ use datatypes::arrow::datatypes::{DataType as ArrowDataType, IntervalUnit}; use datatypes::data_type::ConcreteDataType; use itertools::Itertools; use snafu::{ensure, OptionExt, ResultExt}; -use sqlparser::ast::{ColumnOption, ColumnOptionDef, DataType, Expr, KeyOrIndexDisplay}; +use sqlparser::ast::{ColumnOption, ColumnOptionDef, DataType, Expr}; use sqlparser::dialect::keywords::Keyword; use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::IsOptional::Mandatory; @@ -29,7 +29,7 @@ use sqlparser::tokenizer::{Token, TokenWithLocation, Word}; use table::requests::validate_table_option; use super::utils; -use crate::ast::{ColumnDef, Ident, TableConstraint}; +use crate::ast::{ColumnDef, Ident}; use crate::error::{ self, InvalidColumnOptionSnafu, InvalidDatabaseOptionSnafu, InvalidIntervalSnafu, InvalidSqlSnafu, InvalidTableOptionSnafu, InvalidTimeIndexSnafu, MissingTimeIndexSnafu, Result, @@ -38,7 +38,7 @@ use crate::error::{ use crate::parser::{ParserContext, FLOW}; use crate::statements::create::{ Column, ColumnExtensions, CreateDatabase, CreateExternalTable, CreateFlow, CreateTable, - CreateTableLike, CreateView, Partitions, TIME_INDEX, + CreateTableLike, CreateView, Partitions, TableConstraint, }; use crate::statements::statement::Statement; use crate::statements::{ @@ -51,6 +51,7 @@ pub const MAXVALUE: &str = "MAXVALUE"; pub const SINK: &str = "SINK"; pub const EXPIRE: &str = "EXPIRE"; pub const AFTER: &str = "AFTER"; +pub const INVERTED: &str = "INVERTED"; const DB_OPT_KEY_TTL: &str = "ttl"; @@ -500,20 +501,11 @@ impl<'a> ParserContext<'a> { ); time_index_opt_idx = Some(index); - let constraint = TableConstraint::Unique { - name: Some(Ident { - value: TIME_INDEX.to_owned(), - quote_style: None, - }), - columns: vec![Ident { + let constraint = TableConstraint::TimeIndex { + column: Ident { value: column.name().value.clone(), quote_style: None, - }], - characteristics: None, - index_name: None, - index_type_display: KeyOrIndexDisplay::None, - index_type: None, - index_options: vec![], + }, }; constraints.push(constraint); } @@ -730,12 +722,6 @@ impl<'a> ParserContext<'a> { } fn parse_optional_table_constraint(&mut self) -> Result> { - let name = if self.parser.parse_keyword(Keyword::CONSTRAINT) { - let raw_name = self.parse_identifier().context(SyntaxSnafu)?; - Some(Self::canonicalize_identifier(raw_name)) - } else { - None - }; match self.parser.next_token() { TokenWithLocation { token: Token::Word(w), @@ -755,14 +741,7 @@ impl<'a> ParserContext<'a> { .into_iter() .map(Self::canonicalize_identifier) .collect(); - Ok(Some(TableConstraint::PrimaryKey { - name, - index_name: None, - index_type: None, - columns, - index_options: vec![], - characteristics: None, - })) + Ok(Some(TableConstraint::PrimaryKey { columns })) } TokenWithLocation { token: Token::Word(w), @@ -779,7 +758,7 @@ impl<'a> ParserContext<'a> { .parser .parse_parenthesized_column_list(Mandatory, false) .context(error::SyntaxSnafu)?; - let columns = raw_columns + let mut columns = raw_columns .into_iter() .map(Self::canonicalize_identifier) .collect::>(); @@ -791,28 +770,35 @@ impl<'a> ParserContext<'a> { } ); - // TODO(dennis): TableConstraint doesn't support dialect right now, - // so we use unique constraint with special key to represent TIME INDEX. - Ok(Some(TableConstraint::Unique { - name: Some(Ident { - value: TIME_INDEX.to_owned(), - quote_style: None, - }), - columns, - characteristics: None, - index_name: None, - index_type_display: KeyOrIndexDisplay::None, - index_type: None, - index_options: vec![], + Ok(Some(TableConstraint::TimeIndex { + column: columns.pop().unwrap(), })) } - unexpected => { - if name.is_some() { - self.expected("PRIMARY, TIME", unexpected) - } else { - self.parser.prev_token(); - Ok(None) - } + TokenWithLocation { + token: Token::Word(w), + .. + } if w.value == INVERTED => { + self.parser + .expect_keyword(Keyword::INDEX) + .context(error::UnexpectedSnafu { + expected: "INDEX", + actual: self.peek_token_as_string(), + })?; + + let raw_columns = self + .parser + // allow empty list to unset inverted index + .parse_parenthesized_column_list(Mandatory, true) + .context(error::SyntaxSnafu)?; + let columns = raw_columns + .into_iter() + .map(Self::canonicalize_identifier) + .collect::>(); + Ok(Some(TableConstraint::InvertedIndex { columns })) + } + _ => { + self.parser.prev_token(); + Ok(None) } } } @@ -842,21 +828,9 @@ impl<'a> ParserContext<'a> { fn validate_time_index(columns: &[Column], constraints: &[TableConstraint]) -> Result<()> { let time_index_constraints: Vec<_> = constraints .iter() - .filter_map(|c| { - if let TableConstraint::Unique { - name: Some(ident), - columns, - .. - } = c - { - if ident.value == TIME_INDEX { - Some(columns) - } else { - None - } - } else { - None - } + .filter_map(|c| match c { + TableConstraint::TimeIndex { column } => Some(column), + _ => None, }) .unique() .collect(); @@ -871,16 +845,10 @@ fn validate_time_index(columns: &[Column], constraints: &[TableConstraint]) -> R ), } ); - ensure!( - time_index_constraints[0].len() == 1, - InvalidTimeIndexSnafu { - msg: "it should contain only one column in time index", - } - ); // It's safe to use time_index_constraints[0][0], // we already check the bound above. - let time_index_column_ident = &time_index_constraints[0][0]; + let time_index_column_ident = &time_index_constraints[0]; let time_index_column = columns .iter() .find(|c| c.name().value == *time_index_column_ident.value) @@ -1120,7 +1088,8 @@ mod tests { cpu float32 default 0, memory float64, TIME INDEX (ts), - PRIMARY KEY(ts, host) + PRIMARY KEY(ts, host), + INVERTED INDEX(host) ) with(location='/var/data/city.csv',format='csv');"; let options = HashMap::from([ @@ -1144,11 +1113,24 @@ mod tests { assert_column_def(&columns[3].column_def, "memory", "FLOAT64"); let constraints = &c.constraints; - assert!(matches!(&constraints[0], TableConstraint::Unique { - name: Some(name), - .. - } if name.value == TIME_INDEX)); - assert_matches!(&constraints[1], TableConstraint::PrimaryKey { .. }); + assert_eq!( + &constraints[0], + &TableConstraint::TimeIndex { + column: Ident::new("ts"), + } + ); + assert_eq!( + &constraints[1], + &TableConstraint::PrimaryKey { + columns: vec![Ident::new("ts"), Ident::new("host")] + } + ); + assert_eq!( + &constraints[2], + &TableConstraint::InvertedIndex { + columns: vec![Ident::new("host")] + } + ); } _ => unreachable!(), } @@ -1478,10 +1460,8 @@ ENGINE=mito"; assert_eq!(c.constraints.len(), 2); let tc = c.constraints[0].clone(); match tc { - TableConstraint::Unique { name, columns, .. } => { - assert_eq!(name.unwrap().to_string(), "__time_index"); - assert_eq!(columns.len(), 1); - assert_eq!(&columns[0].value, "ts"); + TableConstraint::TimeIndex { column } => { + assert_eq!(&column.value, "ts"); } _ => panic!("should be time index constraint"), }; @@ -1679,10 +1659,8 @@ ENGINE=mito"; if let Statement::CreateTable(c) = &result[0] { let tc = c.constraints[0].clone(); match tc { - TableConstraint::Unique { name, columns, .. } => { - assert_eq!(name.unwrap().to_string(), "__time_index"); - assert_eq!(columns.len(), 1); - assert_eq!(&columns[0].value, "ts"); + TableConstraint::TimeIndex { column } => { + assert_eq!(&column.value, "ts"); } _ => panic!("should be time index constraint"), } @@ -1769,7 +1747,9 @@ ENGINE=mito"; cpu float32 default 0, memory float64, TIME INDEX (ts), - PRIMARY KEY(ts, host)) engine=mito + PRIMARY KEY(ts, host), + INVERTED INDEX(host) + ) engine=mito with(ttl='10s'); "; let result = @@ -1789,11 +1769,24 @@ ENGINE=mito"; assert_column_def(&columns[3].column_def, "memory", "FLOAT64"); let constraints = &c.constraints; - assert!(matches!(&constraints[0], TableConstraint::Unique { - name: Some(name), - .. - } if name.value == TIME_INDEX)); - assert_matches!(&constraints[1], TableConstraint::PrimaryKey { .. }); + assert_eq!( + &constraints[0], + &TableConstraint::TimeIndex { + column: Ident::new("ts"), + } + ); + assert_eq!( + &constraints[1], + &TableConstraint::PrimaryKey { + columns: vec![Ident::new("ts"), Ident::new("host")] + } + ); + assert_eq!( + &constraints[2], + &TableConstraint::InvertedIndex { + columns: vec![Ident::new("host")] + } + ); assert_eq!(1, c.options.len()); assert_eq!( [("ttl", "10s")].into_iter().collect::>(), @@ -1851,6 +1844,33 @@ ENGINE=mito"; assert_matches!(result, Err(crate::error::Error::InvalidTimeIndex { .. })); } + #[test] + fn test_inverted_index_empty_list() { + let sql = r"create table demo( + host string, + ts timestamp time index, + cpu float64 default 0, + memory float64, + TIME INDEX (ts), + INVERTED INDEX() + ) engine=mito; + "; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + + if let Statement::CreateTable(c) = &result[0] { + let tc = &c + .constraints + .iter() + .find(|c| matches!(c, TableConstraint::InvertedIndex { .. })) + .unwrap(); + assert_eq!(*tc, &TableConstraint::InvertedIndex { columns: vec![] }); + } else { + unreachable!("should be create table statement"); + } + } + #[test] fn test_invalid_column_name() { let sql = "create table foo(user string, i timestamp time index)"; diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index e62565d32359..ea565a3b3e08 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -453,9 +453,13 @@ pub fn has_primary_key_option(column_def: &ColumnDef) -> bool { /// Create a `ColumnSchema` from `Column`. pub fn column_to_schema( column: &Column, - is_time_index: bool, + time_index: &str, + invereted_index_cols: &Option>, + primary_keys: &[String], timezone: Option<&Timezone>, ) -> Result { + let is_time_index = column.name().value == time_index; + let is_nullable = column .options() .iter() @@ -474,6 +478,20 @@ pub fn column_to_schema( column: &column.name().value, })?; + // To keep compatibility, + // 1. if inverted index columns is not set, leave it empty meaning primary key columns will be used + // 2. if inverted index columns is set and non-empty, set selected columns to be inverted indexed + // 3. if inverted index columns is set and empty, set primary key columns to be non-inverted indexed explicitly + if let Some(inverted_index_cols) = invereted_index_cols { + if inverted_index_cols.is_empty() { + if primary_keys.contains(&column.name().value) { + column_schema = column_schema.set_inverted_index(false); + } + } else if inverted_index_cols.contains(&column.name().value) { + column_schema = column_schema.set_inverted_index(true); + } + } + if let Some(ColumnOption::Comment(c)) = column.options().iter().find_map(|o| { if matches!(o.option, ColumnOption::Comment(_)) { Some(&o.option) @@ -1337,7 +1355,7 @@ mod tests { extensions: ColumnExtensions::default(), }; - let column_schema = column_to_schema(&column_def, false, None).unwrap(); + let column_schema = column_to_schema(&column_def, "ts", &None, &[], None).unwrap(); assert_eq!("col", column_schema.name); assert_eq!( @@ -1347,7 +1365,7 @@ mod tests { assert!(column_schema.is_nullable()); assert!(!column_schema.is_time_index()); - let column_schema = column_to_schema(&column_def, true, None).unwrap(); + let column_schema = column_to_schema(&column_def, "col", &None, &[], None).unwrap(); assert_eq!("col", column_schema.name); assert_eq!( @@ -1376,7 +1394,7 @@ mod tests { extensions: ColumnExtensions::default(), }; - let column_schema = column_to_schema(&column_def, false, None).unwrap(); + let column_schema = column_to_schema(&column_def, "ts", &None, &[], None).unwrap(); assert_eq!("col2", column_schema.name); assert_eq!(ConcreteDataType::string_datatype(), column_schema.data_type); @@ -1410,7 +1428,9 @@ mod tests { let column_schema = column_to_schema( &column, - false, + "ts", + &None, + &[], Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap()), ) .unwrap(); @@ -1429,7 +1449,7 @@ mod tests { ); // without timezone - let column_schema = column_to_schema(&column, false, None).unwrap(); + let column_schema = column_to_schema(&column, "ts", &None, &[], None).unwrap(); assert_eq!("col", column_schema.name); assert_eq!( @@ -1471,7 +1491,7 @@ mod tests { }, }; - let column_schema = column_to_schema(&column, false, None).unwrap(); + let column_schema = column_to_schema(&column, "ts", &None, &[], None).unwrap(); assert_eq!("col", column_schema.name); assert_eq!(ConcreteDataType::string_datatype(), column_schema.data_type); let fulltext_options = column_schema.fulltext_options().unwrap().unwrap(); diff --git a/src/sql/src/statements/create.rs b/src/sql/src/statements/create.rs index a67bcdd54795..bba7c666bf4e 100644 --- a/src/sql/src/statements/create.rs +++ b/src/sql/src/statements/create.rs @@ -20,7 +20,7 @@ use itertools::Itertools; use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query}; use sqlparser_derive::{Visit, VisitMut}; -use crate::ast::{ColumnDef, Ident, ObjectName, TableConstraint, Value as SqlValue}; +use crate::ast::{ColumnDef, Ident, ObjectName, Value as SqlValue}; use crate::error::{FulltextInvalidOptionSnafu, Result}; use crate::statements::statement::Statement; use crate::statements::OptionMap; @@ -52,31 +52,34 @@ macro_rules! format_list_comma { } fn format_table_constraint(constraints: &[TableConstraint]) -> String { - constraints - .iter() - .map(|c| { - if is_time_index(c) { - let TableConstraint::Unique { columns, .. } = c else { - unreachable!() - }; - - format_indent!("{}TIME INDEX ({})", format_list_comma!(columns)) - } else { - format_indent!(c) - } - }) - .join(LINE_SEP) + constraints.iter().map(|c| format_indent!(c)).join(LINE_SEP) } -/// Time index name, used in table constraints. -pub const TIME_INDEX: &str = "__time_index"; +/// Table constraint for create table statement. +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +pub enum TableConstraint { + /// Primary key constraint. + PrimaryKey { columns: Vec }, + /// Time index constraint. + TimeIndex { column: Ident }, + /// Inverted index constraint. + InvertedIndex { columns: Vec }, +} -#[inline] -pub fn is_time_index(constraint: &TableConstraint) -> bool { - matches!(constraint, TableConstraint::Unique { - name: Some(name), - .. - } if name.value == TIME_INDEX) +impl Display for TableConstraint { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + TableConstraint::PrimaryKey { columns } => { + write!(f, "PRIMARY KEY ({})", format_list_comma!(columns)) + } + TableConstraint::TimeIndex { column } => { + write!(f, "TIME INDEX ({})", column) + } + TableConstraint::InvertedIndex { columns } => { + write!(f, "INVERTED INDEX ({})", format_list_comma!(columns)) + } + } + } } #[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] diff --git a/src/store-api/src/metadata.rs b/src/store-api/src/metadata.rs index 3a08523b900d..936a20a7c3cb 100644 --- a/src/store-api/src/metadata.rs +++ b/src/store-api/src/metadata.rs @@ -323,6 +323,36 @@ impl RegionMetadata { }) } + /// Gets the column ids to be indexed by inverted index. + /// + /// If there is no column with inverted index key, it will use primary key columns. + pub fn inverted_indexed_column_ids<'a>( + &self, + ignore_column_ids: impl Iterator, + ) -> HashSet { + // Default to use primary key columns as inverted index columns. + let pk_as_inverted_index = !self + .column_metadatas + .iter() + .any(|c| c.column_schema.has_inverted_index_key()); + + let mut inverted_index: HashSet<_> = if pk_as_inverted_index { + self.primary_key_columns().map(|c| c.column_id).collect() + } else { + self.column_metadatas + .iter() + .filter(|column| column.column_schema.is_inverted_indexed()) + .map(|column| column.column_id) + .collect() + }; + + for ignored in ignore_column_ids { + inverted_index.remove(ignored); + } + + inverted_index + } + /// Checks whether the metadata is valid. fn validate(&self) -> Result<()> { // Id to name. diff --git a/tests/cases/standalone/common/show/show_create.result b/tests/cases/standalone/common/show/show_create.result index 02830c775142..ec692c0f293a 100644 --- a/tests/cases/standalone/common/show/show_create.result +++ b/tests/cases/standalone/common/show/show_create.result @@ -200,3 +200,73 @@ show create table information_schema.columns; Error: 1001(Unsupported), Show create table only for base table. greptime.information_schema.columns is TEMPORARY +CREATE TABLE "specify_invereted_index_cols" ( + "ts" TIMESTAMP(3) NOT NULL, + "val" DOUBLE NULL, + "host" STRING NULL, + "job" STRING NULL, + TIME INDEX ("ts"), + PRIMARY KEY ("host", "job"), + INVERTED INDEX ("job") +); + +Affected Rows: 0 + +show create table specify_invereted_index_cols; + ++------------------------------+-------------------------------------------------------------+ +| Table | Create Table | ++------------------------------+-------------------------------------------------------------+ +| specify_invereted_index_cols | CREATE TABLE IF NOT EXISTS "specify_invereted_index_cols" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "val" DOUBLE NULL, | +| | "host" STRING NULL, | +| | "job" STRING NULL, | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host", "job"), | +| | INVERTED INDEX ("job") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++------------------------------+-------------------------------------------------------------+ + +drop table specify_invereted_index_cols; + +Affected Rows: 0 + +CREATE TABLE "specify_empty_invereted_index_cols" ( + "ts" TIMESTAMP(3) NOT NULL, + "val" DOUBLE NULL, + "host" STRING NULL, + "job" STRING NULL, + TIME INDEX ("ts"), + PRIMARY KEY ("host", "job"), + INVERTED INDEX () +); + +Affected Rows: 0 + +show create table specify_empty_invereted_index_cols; + ++------------------------------------+-------------------------------------------------------------------+ +| Table | Create Table | ++------------------------------------+-------------------------------------------------------------------+ +| specify_empty_invereted_index_cols | CREATE TABLE IF NOT EXISTS "specify_empty_invereted_index_cols" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "val" DOUBLE NULL, | +| | "host" STRING NULL, | +| | "job" STRING NULL, | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host", "job"), | +| | INVERTED INDEX () | +| | ) | +| | | +| | ENGINE=mito | +| | | ++------------------------------------+-------------------------------------------------------------------+ + +drop table specify_empty_invereted_index_cols; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/show/show_create.sql b/tests/cases/standalone/common/show/show_create.sql index 9a45136ed4ca..45c8f7a3ef4c 100644 --- a/tests/cases/standalone/common/show/show_create.sql +++ b/tests/cases/standalone/common/show/show_create.sql @@ -83,3 +83,31 @@ drop table phy; show create table numbers; show create table information_schema.columns; + +CREATE TABLE "specify_invereted_index_cols" ( + "ts" TIMESTAMP(3) NOT NULL, + "val" DOUBLE NULL, + "host" STRING NULL, + "job" STRING NULL, + TIME INDEX ("ts"), + PRIMARY KEY ("host", "job"), + INVERTED INDEX ("job") +); + +show create table specify_invereted_index_cols; + +drop table specify_invereted_index_cols; + +CREATE TABLE "specify_empty_invereted_index_cols" ( + "ts" TIMESTAMP(3) NOT NULL, + "val" DOUBLE NULL, + "host" STRING NULL, + "job" STRING NULL, + TIME INDEX ("ts"), + PRIMARY KEY ("host", "job"), + INVERTED INDEX () +); + +show create table specify_empty_invereted_index_cols; + +drop table specify_empty_invereted_index_cols;