diff --git a/src/parser/alter.rs b/src/parser/alter.rs index 3ac4ab0c7..1c404d2c8 100644 --- a/src/parser/alter.rs +++ b/src/parser/alter.rs @@ -15,28 +15,71 @@ #[cfg(not(feature = "std"))] use alloc::vec; -use super::{Parser, ParserError}; -use crate::{ - ast::{ - AlterPolicyOperation, AlterRoleOperation, Expr, Password, ResetConfig, RoleOption, - SetConfigValue, Statement, - }, - dialect::{MsSqlDialect, PostgreSqlDialect}, - keywords::Keyword, - tokenizer::Token, -}; +use crate::parser::*; impl Parser<'_> { - pub fn parse_alter_role(&mut self) -> Result { - if dialect_of!(self is PostgreSqlDialect) { - return self.parse_pg_alter_role(); - } else if dialect_of!(self is MsSqlDialect) { - return self.parse_mssql_alter_role(); - } + pub fn parse_alter(&mut self) -> Result { + let object_type = self.expect_one_of_keywords(&[ + Keyword::VIEW, + Keyword::TABLE, + Keyword::INDEX, + Keyword::ROLE, + Keyword::POLICY, + ])?; + match object_type { + Keyword::VIEW => self.parse_alter_view(), + Keyword::TABLE => { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let only = self.parse_keyword(Keyword::ONLY); // [ ONLY ] + let table_name = self.parse_object_name(false)?; + let on_cluster = self.parse_optional_on_cluster()?; + let operations = self.parse_comma_separated(Parser::parse_alter_table_operation)?; - Err(ParserError::ParserError( - "ALTER ROLE is only support for PostgreSqlDialect, MsSqlDialect".into(), - )) + let mut location = None; + if self.parse_keyword(Keyword::LOCATION) { + location = Some(HiveSetLocation { + has_set: false, + location: self.parse_identifier(false)?, + }); + } else if self.parse_keywords(&[Keyword::SET, Keyword::LOCATION]) { + location = Some(HiveSetLocation { + has_set: true, + location: self.parse_identifier(false)?, + }); + } + + Ok(Statement::AlterTable { + name: table_name, + if_exists, + only, + operations, + location, + on_cluster, + }) + } + Keyword::INDEX => { + let index_name = self.parse_object_name(false)?; + let operation = if self.parse_keyword(Keyword::RENAME) { + if self.parse_keyword(Keyword::TO) { + let index_name = self.parse_object_name(false)?; + AlterIndexOperation::RenameIndex { index_name } + } else { + return self.expected("TO after RENAME", self.peek_token()); + } + } else { + return self.expected("RENAME after ALTER INDEX", self.peek_token()); + }; + + Ok(Statement::AlterIndex { + name: index_name, + operation, + }) + } + Keyword::ROLE => self.parse_alter_role(), + Keyword::POLICY => self.parse_alter_policy(), + // unreachable because expect_one_of_keywords used above + _ => unreachable!(), + } } /// Parse ALTER POLICY statement @@ -99,169 +142,462 @@ impl Parser<'_> { } } - fn parse_mssql_alter_role(&mut self) -> Result { - let role_name = self.parse_identifier(false)?; - - let operation = if self.parse_keywords(&[Keyword::ADD, Keyword::MEMBER]) { - let member_name = self.parse_identifier(false)?; - AlterRoleOperation::AddMember { member_name } - } else if self.parse_keywords(&[Keyword::DROP, Keyword::MEMBER]) { - let member_name = self.parse_identifier(false)?; - AlterRoleOperation::DropMember { member_name } - } else if self.parse_keywords(&[Keyword::WITH, Keyword::NAME]) { - if self.consume_token(&Token::Eq) { - let role_name = self.parse_identifier(false)?; - AlterRoleOperation::RenameRole { role_name } - } else { - return self.expected("= after WITH NAME ", self.peek_token()); - } - } else { - return self.expected("'ADD' or 'DROP' or 'WITH NAME'", self.peek_token()); - }; + pub fn parse_alter_role(&mut self) -> Result { + if dialect_of!(self is PostgreSqlDialect) { + return self.parse_pg_alter_role(); + } else if dialect_of!(self is MsSqlDialect) { + return self.parse_mssql_alter_role(); + } - Ok(Statement::AlterRole { - name: role_name, - operation, - }) + Err(ParserError::ParserError( + "ALTER ROLE is only support for PostgreSqlDialect, MsSqlDialect".into(), + )) } - fn parse_pg_alter_role(&mut self) -> Result { - let role_name = self.parse_identifier(false)?; - - // [ IN DATABASE _`database_name`_ ] - let in_database = if self.parse_keywords(&[Keyword::IN, Keyword::DATABASE]) { - self.parse_object_name(false).ok() - } else { - None - }; + pub fn parse_alter_table_add_projection(&mut self) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + let query = self.parse_projection_select()?; + Ok(AlterTableOperation::AddProjection { + if_not_exists, + name, + select: query, + }) + } - let operation = if self.parse_keyword(Keyword::RENAME) { - if self.parse_keyword(Keyword::TO) { - let role_name = self.parse_identifier(false)?; - AlterRoleOperation::RenameRole { role_name } + pub fn parse_alter_table_operation(&mut self) -> Result { + let operation = if self.parse_keyword(Keyword::ADD) { + if let Some(constraint) = self.parse_optional_table_constraint()? { + AlterTableOperation::AddConstraint(constraint) + } else if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::PROJECTION) + { + return self.parse_alter_table_add_projection(); } else { - return self.expected("TO after RENAME", self.peek_token()); - } - // SET - } else if self.parse_keyword(Keyword::SET) { - let config_name = self.parse_object_name(false)?; - // FROM CURRENT - if self.parse_keywords(&[Keyword::FROM, Keyword::CURRENT]) { - AlterRoleOperation::Set { - config_name, - config_value: SetConfigValue::FromCurrent, - in_database, - } - // { TO | = } { value | DEFAULT } - } else if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { - if self.parse_keyword(Keyword::DEFAULT) { - AlterRoleOperation::Set { - config_name, - config_value: SetConfigValue::Default, - in_database, + let if_not_exists = + self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let mut new_partitions = vec![]; + loop { + if self.parse_keyword(Keyword::PARTITION) { + new_partitions.push(self.parse_partition()?); + } else { + break; } - } else if let Ok(expr) = self.parse_expr() { - AlterRoleOperation::Set { - config_name, - config_value: SetConfigValue::Value(expr), - in_database, + } + if !new_partitions.is_empty() { + AlterTableOperation::AddPartitions { + if_not_exists, + new_partitions, } } else { - self.expected("config value", self.peek_token())? + let column_keyword = self.parse_keyword(Keyword::COLUMN); + + let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | DuckDbDialect | GenericDialect) + { + self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]) + || if_not_exists + } else { + false + }; + + let column_def = self.parse_column_def()?; + + let column_position = self.parse_column_position()?; + + AlterTableOperation::AddColumn { + column_keyword, + if_not_exists, + column_def, + column_position, + } } + } + } else if self.parse_keyword(Keyword::RENAME) { + if dialect_of!(self is PostgreSqlDialect) && self.parse_keyword(Keyword::CONSTRAINT) { + let old_name = self.parse_identifier(false)?; + self.expect_keyword(Keyword::TO)?; + let new_name = self.parse_identifier(false)?; + AlterTableOperation::RenameConstraint { old_name, new_name } + } else if self.parse_keyword(Keyword::TO) { + let table_name = self.parse_object_name(false)?; + AlterTableOperation::RenameTable { table_name } } else { - self.expected("'TO' or '=' or 'FROM CURRENT'", self.peek_token())? + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let old_column_name = self.parse_identifier(false)?; + self.expect_keyword(Keyword::TO)?; + let new_column_name = self.parse_identifier(false)?; + AlterTableOperation::RenameColumn { + old_column_name, + new_column_name, + } } - // RESET - } else if self.parse_keyword(Keyword::RESET) { - if self.parse_keyword(Keyword::ALL) { - AlterRoleOperation::Reset { - config_name: ResetConfig::ALL, - in_database, + } else if self.parse_keyword(Keyword::DISABLE) { + if self.parse_keywords(&[Keyword::ROW, Keyword::LEVEL, Keyword::SECURITY]) { + AlterTableOperation::DisableRowLevelSecurity {} + } else if self.parse_keyword(Keyword::RULE) { + let name = self.parse_identifier(false)?; + AlterTableOperation::DisableRule { name } + } else if self.parse_keyword(Keyword::TRIGGER) { + let name = self.parse_identifier(false)?; + AlterTableOperation::DisableTrigger { name } + } else { + return self.expected( + "ROW LEVEL SECURITY, RULE, or TRIGGER after DISABLE", + self.peek_token(), + ); + } + } else if self.parse_keyword(Keyword::ENABLE) { + if self.parse_keywords(&[Keyword::ALWAYS, Keyword::RULE]) { + let name = self.parse_identifier(false)?; + AlterTableOperation::EnableAlwaysRule { name } + } else if self.parse_keywords(&[Keyword::ALWAYS, Keyword::TRIGGER]) { + let name = self.parse_identifier(false)?; + AlterTableOperation::EnableAlwaysTrigger { name } + } else if self.parse_keywords(&[Keyword::ROW, Keyword::LEVEL, Keyword::SECURITY]) { + AlterTableOperation::EnableRowLevelSecurity {} + } else if self.parse_keywords(&[Keyword::REPLICA, Keyword::RULE]) { + let name = self.parse_identifier(false)?; + AlterTableOperation::EnableReplicaRule { name } + } else if self.parse_keywords(&[Keyword::REPLICA, Keyword::TRIGGER]) { + let name = self.parse_identifier(false)?; + AlterTableOperation::EnableReplicaTrigger { name } + } else if self.parse_keyword(Keyword::RULE) { + let name = self.parse_identifier(false)?; + AlterTableOperation::EnableRule { name } + } else if self.parse_keyword(Keyword::TRIGGER) { + let name = self.parse_identifier(false)?; + AlterTableOperation::EnableTrigger { name } + } else { + return self.expected( + "ALWAYS, REPLICA, ROW LEVEL SECURITY, RULE, or TRIGGER after ENABLE", + self.peek_token(), + ); + } + } else if self.parse_keywords(&[Keyword::CLEAR, Keyword::PROJECTION]) + && dialect_of!(self is ClickHouseDialect|GenericDialect) + { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + let partition = if self.parse_keywords(&[Keyword::IN, Keyword::PARTITION]) { + Some(self.parse_identifier(false)?) + } else { + None + }; + AlterTableOperation::ClearProjection { + if_exists, + name, + partition, + } + } else if self.parse_keywords(&[Keyword::MATERIALIZE, Keyword::PROJECTION]) + && dialect_of!(self is ClickHouseDialect|GenericDialect) + { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + let partition = if self.parse_keywords(&[Keyword::IN, Keyword::PARTITION]) { + Some(self.parse_identifier(false)?) + } else { + None + }; + AlterTableOperation::MaterializeProjection { + if_exists, + name, + partition, + } + } else if self.parse_keyword(Keyword::DROP) { + if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: true, + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: false, } + } else if self.parse_keyword(Keyword::CONSTRAINT) { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + let cascade = self.parse_keyword(Keyword::CASCADE); + AlterTableOperation::DropConstraint { + if_exists, + name, + cascade, + } + } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) + && dialect_of!(self is MySqlDialect | GenericDialect) + { + AlterTableOperation::DropPrimaryKey + } else if self.parse_keyword(Keyword::PROJECTION) + && dialect_of!(self is ClickHouseDialect|GenericDialect) + { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + AlterTableOperation::DropProjection { if_exists, name } + } else if self.parse_keywords(&[Keyword::CLUSTERING, Keyword::KEY]) { + AlterTableOperation::DropClusteringKey } else { - let config_name = self.parse_object_name(false)?; - AlterRoleOperation::Reset { - config_name: ResetConfig::ConfigName(config_name), - in_database, + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let column_name = self.parse_identifier(false)?; + let cascade = self.parse_keyword(Keyword::CASCADE); + AlterTableOperation::DropColumn { + column_name, + if_exists, + cascade, } } - // option - } else { - // [ WITH ] - let _ = self.parse_keyword(Keyword::WITH); - // option - let mut options = vec![]; - while let Some(opt) = self.maybe_parse(|parser| parser.parse_pg_role_option())? { - options.push(opt); + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let before = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::RENAME)?; + self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; + self.expect_token(&Token::LParen)?; + let renames = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::RenamePartitions { + old_partitions: before, + new_partitions: renames, } - // check option - if options.is_empty() { - return self.expected("option", self.peek_token())?; + } else if self.parse_keyword(Keyword::CHANGE) { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let old_name = self.parse_identifier(false)?; + let new_name = self.parse_identifier(false)?; + let data_type = self.parse_data_type()?; + let mut options = vec![]; + while let Some(option) = self.parse_optional_column_option()? { + options.push(option); } - AlterRoleOperation::WithOptions { options } - }; + let column_position = self.parse_column_position()?; - Ok(Statement::AlterRole { - name: role_name, - operation, - }) - } + AlterTableOperation::ChangeColumn { + old_name, + new_name, + data_type, + options, + column_position, + } + } else if self.parse_keyword(Keyword::MODIFY) { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let col_name = self.parse_identifier(false)?; + let data_type = self.parse_data_type()?; + let mut options = vec![]; + while let Some(option) = self.parse_optional_column_option()? { + options.push(option); + } + + let column_position = self.parse_column_position()?; - fn parse_pg_role_option(&mut self) -> Result { - let option = match self.parse_one_of_keywords(&[ - Keyword::BYPASSRLS, - Keyword::NOBYPASSRLS, - Keyword::CONNECTION, - Keyword::CREATEDB, - Keyword::NOCREATEDB, - Keyword::CREATEROLE, - Keyword::NOCREATEROLE, - Keyword::INHERIT, - Keyword::NOINHERIT, - Keyword::LOGIN, - Keyword::NOLOGIN, - Keyword::PASSWORD, - Keyword::REPLICATION, - Keyword::NOREPLICATION, - Keyword::SUPERUSER, - Keyword::NOSUPERUSER, - Keyword::VALID, - ]) { - Some(Keyword::BYPASSRLS) => RoleOption::BypassRLS(true), - Some(Keyword::NOBYPASSRLS) => RoleOption::BypassRLS(false), - Some(Keyword::CONNECTION) => { - self.expect_keyword(Keyword::LIMIT)?; - RoleOption::ConnectionLimit(Expr::Value(self.parse_number_value()?)) + AlterTableOperation::ModifyColumn { + col_name, + data_type, + options, + column_position, } - Some(Keyword::CREATEDB) => RoleOption::CreateDB(true), - Some(Keyword::NOCREATEDB) => RoleOption::CreateDB(false), - Some(Keyword::CREATEROLE) => RoleOption::CreateRole(true), - Some(Keyword::NOCREATEROLE) => RoleOption::CreateRole(false), - Some(Keyword::INHERIT) => RoleOption::Inherit(true), - Some(Keyword::NOINHERIT) => RoleOption::Inherit(false), - Some(Keyword::LOGIN) => RoleOption::Login(true), - Some(Keyword::NOLOGIN) => RoleOption::Login(false), - Some(Keyword::PASSWORD) => { - let password = if self.parse_keyword(Keyword::NULL) { - Password::NullPassword + } else if self.parse_keyword(Keyword::ALTER) { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let column_name = self.parse_identifier(false)?; + let is_postgresql = dialect_of!(self is PostgreSqlDialect); + + let op: AlterColumnOperation = if self.parse_keywords(&[ + Keyword::SET, + Keyword::NOT, + Keyword::NULL, + ]) { + AlterColumnOperation::SetNotNull {} + } else if self.parse_keywords(&[Keyword::DROP, Keyword::NOT, Keyword::NULL]) { + AlterColumnOperation::DropNotNull {} + } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { + AlterColumnOperation::SetDefault { + value: self.parse_expr()?, + } + } else if self.parse_keywords(&[Keyword::DROP, Keyword::DEFAULT]) { + AlterColumnOperation::DropDefault {} + } else if self.parse_keywords(&[Keyword::SET, Keyword::DATA, Keyword::TYPE]) + || (is_postgresql && self.parse_keyword(Keyword::TYPE)) + { + let data_type = self.parse_data_type()?; + let using = if is_postgresql && self.parse_keyword(Keyword::USING) { + Some(self.parse_expr()?) + } else { + None + }; + AlterColumnOperation::SetDataType { data_type, using } + } else if self.parse_keywords(&[Keyword::ADD, Keyword::GENERATED]) { + let generated_as = if self.parse_keyword(Keyword::ALWAYS) { + Some(GeneratedAs::Always) + } else if self.parse_keywords(&[Keyword::BY, Keyword::DEFAULT]) { + Some(GeneratedAs::ByDefault) } else { - Password::Password(Expr::Value(self.parse_value()?)) + None }; - RoleOption::Password(password) + + self.expect_keywords(&[Keyword::AS, Keyword::IDENTITY])?; + + let mut sequence_options: Option> = None; + + if self.peek_token().token == Token::LParen { + self.expect_token(&Token::LParen)?; + sequence_options = Some(self.parse_create_sequence_options()?); + self.expect_token(&Token::RParen)?; + } + + AlterColumnOperation::AddGenerated { + generated_as, + sequence_options, + } + } else { + let message = if is_postgresql { + "SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE, or ADD GENERATED after ALTER COLUMN" + } else { + "SET/DROP NOT NULL, SET DEFAULT, or SET DATA TYPE after ALTER COLUMN" + }; + + return self.expected(message, self.peek_token()); + }; + AlterTableOperation::AlterColumn { column_name, op } + } else if self.parse_keyword(Keyword::SWAP) { + self.expect_keyword(Keyword::WITH)?; + let table_name = self.parse_object_name(false)?; + AlterTableOperation::SwapWith { table_name } + } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) + && self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) + { + let new_owner = self.parse_owner()?; + AlterTableOperation::OwnerTo { new_owner } + } else if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::ATTACH) + { + AlterTableOperation::AttachPartition { + partition: self.parse_part_or_partition()?, } - Some(Keyword::REPLICATION) => RoleOption::Replication(true), - Some(Keyword::NOREPLICATION) => RoleOption::Replication(false), - Some(Keyword::SUPERUSER) => RoleOption::SuperUser(true), - Some(Keyword::NOSUPERUSER) => RoleOption::SuperUser(false), - Some(Keyword::VALID) => { - self.expect_keyword(Keyword::UNTIL)?; - RoleOption::ValidUntil(Expr::Value(self.parse_value()?)) + } else if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::DETACH) + { + AlterTableOperation::DetachPartition { + partition: self.parse_part_or_partition()?, + } + } else if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::FREEZE) + { + let partition = self.parse_part_or_partition()?; + let with_name = if self.parse_keyword(Keyword::WITH) { + self.expect_keyword(Keyword::NAME)?; + Some(self.parse_identifier(false)?) + } else { + None + }; + AlterTableOperation::FreezePartition { + partition, + with_name, + } + } else if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::UNFREEZE) + { + let partition = self.parse_part_or_partition()?; + let with_name = if self.parse_keyword(Keyword::WITH) { + self.expect_keyword(Keyword::NAME)?; + Some(self.parse_identifier(false)?) + } else { + None + }; + AlterTableOperation::UnfreezePartition { + partition, + with_name, + } + } else if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let exprs = self.parse_comma_separated(|parser| parser.parse_expr())?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::ClusterBy { exprs } + } else if self.parse_keywords(&[Keyword::SUSPEND, Keyword::RECLUSTER]) { + AlterTableOperation::SuspendRecluster + } else if self.parse_keywords(&[Keyword::RESUME, Keyword::RECLUSTER]) { + AlterTableOperation::ResumeRecluster + } else { + let options: Vec = + self.parse_options_with_keywords(&[Keyword::SET, Keyword::TBLPROPERTIES])?; + if !options.is_empty() { + AlterTableOperation::SetTblProperties { + table_properties: options, + } + } else { + return self.expected( + "ADD, RENAME, PARTITION, SWAP, DROP, or SET TBLPROPERTIES after ALTER TABLE", + self.peek_token(), + ); } - _ => self.expected("option", self.peek_token())?, }; + Ok(operation) + } + + pub fn parse_alter_view(&mut self) -> Result { + let name = self.parse_object_name(false)?; + let columns = self.parse_parenthesized_column_list(Optional, false)?; + + let with_options = self.parse_options(Keyword::WITH)?; + + self.expect_keyword(Keyword::AS)?; + let query = self.parse_query()?; - Ok(option) + Ok(Statement::AlterView { + name, + columns, + query, + with_options, + }) + } + + pub fn parse_partition(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Ok(Partition::Partitions(partitions)) + } + + pub fn parse_projection_select(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + self.expect_keyword(Keyword::SELECT)?; + let projection = self.parse_projection()?; + let group_by = self.parse_optional_group_by()?; + let order_by = self.parse_optional_order_by()?; + self.expect_token(&Token::RParen)?; + Ok(ProjectionSelect { + projection, + group_by, + order_by, + }) + } + + fn parse_column_position(&mut self) -> Result, ParserError> { + if dialect_of!(self is MySqlDialect | GenericDialect) { + if self.parse_keyword(Keyword::FIRST) { + Ok(Some(MySQLColumnPosition::First)) + } else if self.parse_keyword(Keyword::AFTER) { + let ident = self.parse_identifier(false)?; + Ok(Some(MySQLColumnPosition::After(ident))) + } else { + Ok(None) + } + } else { + Ok(None) + } + } + + fn parse_part_or_partition(&mut self) -> Result { + let keyword = self.expect_one_of_keywords(&[Keyword::PART, Keyword::PARTITION])?; + match keyword { + Keyword::PART => Ok(Partition::Part(self.parse_expr()?)), + Keyword::PARTITION => Ok(Partition::Expr(self.parse_expr()?)), + // unreachable because expect_one_of_keywords used above + _ => unreachable!(), + } } } diff --git a/src/parser/analyze.rs b/src/parser/analyze.rs new file mode 100644 index 000000000..2bc4a5d17 --- /dev/null +++ b/src/parser/analyze.rs @@ -0,0 +1,59 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_analyze(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name(false)?; + let mut for_columns = false; + let mut cache_metadata = false; + let mut noscan = false; + let mut partitions = None; + let mut compute_statistics = false; + let mut columns = vec![]; + loop { + match self.parse_one_of_keywords(&[ + Keyword::PARTITION, + Keyword::FOR, + Keyword::CACHE, + Keyword::NOSCAN, + Keyword::COMPUTE, + ]) { + Some(Keyword::PARTITION) => { + self.expect_token(&Token::LParen)?; + partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + } + Some(Keyword::NOSCAN) => noscan = true, + Some(Keyword::FOR) => { + self.expect_keyword(Keyword::COLUMNS)?; + + columns = self + .maybe_parse(|parser| { + parser.parse_comma_separated(|p| p.parse_identifier(false)) + })? + .unwrap_or_default(); + for_columns = true + } + Some(Keyword::CACHE) => { + self.expect_keyword(Keyword::METADATA)?; + cache_metadata = true + } + Some(Keyword::COMPUTE) => { + self.expect_keyword(Keyword::STATISTICS)?; + compute_statistics = true + } + _ => break, + } + } + + Ok(Statement::Analyze { + table_name, + for_columns, + columns, + partitions, + cache_metadata, + noscan, + compute_statistics, + }) + } +} diff --git a/src/parser/assert.rs b/src/parser/assert.rs new file mode 100644 index 000000000..735352ab0 --- /dev/null +++ b/src/parser/assert.rs @@ -0,0 +1,14 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_assert(&mut self) -> Result { + let condition = self.parse_expr()?; + let message = if self.parse_keyword(Keyword::AS) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(Statement::Assert { condition, message }) + } +} diff --git a/src/parser/assignment.rs b/src/parser/assignment.rs new file mode 100644 index 000000000..edfce42e2 --- /dev/null +++ b/src/parser/assignment.rs @@ -0,0 +1,23 @@ +use crate::parser::*; + +impl Parser<'_> { + /// Parse a `var = expr` assignment, used in an UPDATE statement + pub fn parse_assignment(&mut self) -> Result { + let target = self.parse_assignment_target()?; + self.expect_token(&Token::Eq)?; + let value = self.parse_expr()?; + Ok(Assignment { target, value }) + } + + /// Parse the left-hand side of an assignment, used in an UPDATE statement + pub fn parse_assignment_target(&mut self) -> Result { + if self.consume_token(&Token::LParen) { + let columns = self.parse_comma_separated(|p| p.parse_object_name(false))?; + self.expect_token(&Token::RParen)?; + Ok(AssignmentTarget::Tuple(columns)) + } else { + let column = self.parse_object_name(false)?; + Ok(AssignmentTarget::ColumnName(column)) + } + } +} diff --git a/src/parser/attach.rs b/src/parser/attach.rs new file mode 100644 index 000000000..8b1cb9672 --- /dev/null +++ b/src/parser/attach.rs @@ -0,0 +1,15 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_attach_database(&mut self) -> Result { + let database = self.parse_keyword(Keyword::DATABASE); + let database_file_name = self.parse_expr()?; + self.expect_keyword(Keyword::AS)?; + let schema_name = self.parse_identifier(false)?; + Ok(Statement::AttachDatabase { + database, + schema_name, + database_file_name, + }) + } +} diff --git a/src/parser/cache.rs b/src/parser/cache.rs new file mode 100644 index 000000000..140ea0e5c --- /dev/null +++ b/src/parser/cache.rs @@ -0,0 +1,93 @@ +use crate::parser::*; + +impl Parser<'_> { + /// Parse a CACHE TABLE statement + pub fn parse_cache_table(&mut self) -> Result { + let (mut table_flag, mut options, mut has_as, mut query) = (None, vec![], false, None); + if self.parse_keyword(Keyword::TABLE) { + let table_name = self.parse_object_name(false)?; + if self.peek_token().token != Token::EOF { + if let Token::Word(word) = self.peek_token().token { + if word.keyword == Keyword::OPTIONS { + options = self.parse_options(Keyword::OPTIONS)? + } + }; + + if self.peek_token().token != Token::EOF { + let (a, q) = self.parse_as_query()?; + has_as = a; + query = Some(q); + } + + Ok(Statement::Cache { + table_flag, + table_name, + has_as, + options, + query, + }) + } else { + Ok(Statement::Cache { + table_flag, + table_name, + has_as, + options, + query, + }) + } + } else { + table_flag = Some(self.parse_object_name(false)?); + if self.parse_keyword(Keyword::TABLE) { + let table_name = self.parse_object_name(false)?; + if self.peek_token() != Token::EOF { + if let Token::Word(word) = self.peek_token().token { + if word.keyword == Keyword::OPTIONS { + options = self.parse_options(Keyword::OPTIONS)? + } + }; + + if self.peek_token() != Token::EOF { + let (a, q) = self.parse_as_query()?; + has_as = a; + query = Some(q); + } + + Ok(Statement::Cache { + table_flag, + table_name, + has_as, + options, + query, + }) + } else { + Ok(Statement::Cache { + table_flag, + table_name, + has_as, + options, + query, + }) + } + } else { + if self.peek_token() == Token::EOF { + self.prev_token(); + } + self.expected("a `TABLE` keyword", self.peek_token()) + } + } + } + + /// Parse 'AS' before as query,such as `WITH XXX AS SELECT XXX` oer `CACHE TABLE AS SELECT XXX` + pub fn parse_as_query(&mut self) -> Result<(bool, Box), ParserError> { + match self.peek_token().token { + Token::Word(word) => match word.keyword { + Keyword::AS => { + self.next_token(); + Ok((true, self.parse_query()?)) + } + _ => Ok((false, self.parse_query()?)), + }, + _ => self.expected("a QUERY statement", self.peek_token()), + } + } +} diff --git a/src/parser/call.rs b/src/parser/call.rs new file mode 100644 index 000000000..9d101513d --- /dev/null +++ b/src/parser/call.rs @@ -0,0 +1,82 @@ +use crate::parser::*; + +use crate::parser_err; + +impl Parser<'_> { + /// Parse a `CALL procedure_name(arg1, arg2, ...)` + /// or `CALL procedure_name` statement + pub fn parse_call(&mut self) -> Result { + let object_name = self.parse_object_name(false)?; + if self.peek_token().token == Token::LParen { + match self.parse_function(object_name)? { + Expr::Function(f) => Ok(Statement::Call(f)), + other => parser_err!( + format!("Expected a simple procedure call but found: {other}"), + self.peek_token().span.start + ), + } + } else { + Ok(Statement::Call(Function { + name: object_name, + parameters: FunctionArguments::None, + args: FunctionArguments::None, + over: None, + filter: None, + null_treatment: None, + within_group: vec![], + })) + } + } + + pub fn parse_function_desc(&mut self) -> Result { + let name = self.parse_object_name(false)?; + + let args = if self.consume_token(&Token::LParen) { + if self.consume_token(&Token::RParen) { + None + } else { + let args = self.parse_comma_separated(Parser::parse_function_arg)?; + self.expect_token(&Token::RParen)?; + Some(args) + } + } else { + None + }; + + Ok(FunctionDesc { name, args }) + } + + pub(crate) fn parse_function_arg(&mut self) -> Result { + let mode = if self.parse_keyword(Keyword::IN) { + Some(ArgMode::In) + } else if self.parse_keyword(Keyword::OUT) { + Some(ArgMode::Out) + } else if self.parse_keyword(Keyword::INOUT) { + Some(ArgMode::InOut) + } else { + None + }; + + // parse: [ argname ] argtype + let mut name = None; + let mut data_type = self.parse_data_type()?; + if let DataType::Custom(n, _) = &data_type { + // the first token is actually a name + name = Some(n.0[0].clone()); + data_type = self.parse_data_type()?; + } + + let default_expr = if self.parse_keyword(Keyword::DEFAULT) || self.consume_token(&Token::Eq) + { + Some(self.parse_expr()?) + } else { + None + }; + Ok(OperateFunctionArg { + mode, + name, + data_type, + default_expr, + }) + } +} diff --git a/src/parser/close.rs b/src/parser/close.rs new file mode 100644 index 000000000..630c6fa6a --- /dev/null +++ b/src/parser/close.rs @@ -0,0 +1,15 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_close(&mut self) -> Result { + let cursor = if self.parse_keyword(Keyword::ALL) { + CloseCursor::All + } else { + let name = self.parse_identifier(false)?; + + CloseCursor::Specific { name } + }; + + Ok(Statement::Close { cursor }) + } +} diff --git a/src/parser/columns.rs b/src/parser/columns.rs new file mode 100644 index 000000000..ad3c0caca --- /dev/null +++ b/src/parser/columns.rs @@ -0,0 +1,433 @@ +use super::*; + +impl Parser<'_> { + pub fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { + let mut columns = vec![]; + let mut constraints = vec![]; + if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { + return Ok((columns, constraints)); + } + + loop { + if let Some(constraint) = self.parse_optional_table_constraint()? { + constraints.push(constraint); + } else if let Token::Word(_) = self.peek_token().token { + columns.push(self.parse_column_def()?); + } else { + return self.expected("column name or constraint definition", self.peek_token()); + } + + let comma = self.consume_token(&Token::Comma); + let rparen = self.peek_token().token == Token::RParen; + + if !comma && !rparen { + return self.expected("',' or ')' after column definition", self.peek_token()); + }; + + if rparen && (!comma || self.options.trailing_commas) { + let _ = self.consume_token(&Token::RParen); + break; + } + } + + Ok((columns, constraints)) + } + + pub fn parse_column_def(&mut self) -> Result { + let name = self.parse_identifier(false)?; + let data_type = if self.is_column_type_sqlite_unspecified() { + DataType::Unspecified + } else { + self.parse_data_type()? + }; + let mut collation = if self.parse_keyword(Keyword::COLLATE) { + Some(self.parse_object_name(false)?) + } else { + None + }; + let mut options = vec![]; + loop { + if self.parse_keyword(Keyword::CONSTRAINT) { + let name = Some(self.parse_identifier(false)?); + if let Some(option) = self.parse_optional_column_option()? { + options.push(ColumnOptionDef { name, option }); + } else { + return self.expected( + "constraint details after CONSTRAINT ", + self.peek_token(), + ); + } + } else if let Some(option) = self.parse_optional_column_option()? { + options.push(ColumnOptionDef { name: None, option }); + } else if dialect_of!(self is MySqlDialect | SnowflakeDialect | GenericDialect) + && self.parse_keyword(Keyword::COLLATE) + { + collation = Some(self.parse_object_name(false)?); + } else { + break; + }; + } + Ok(ColumnDef { + name, + data_type, + collation, + options, + }) + } + + fn is_column_type_sqlite_unspecified(&mut self) -> bool { + if dialect_of!(self is SQLiteDialect) { + match self.peek_token().token { + Token::Word(word) => matches!( + word.keyword, + Keyword::CONSTRAINT + | Keyword::PRIMARY + | Keyword::NOT + | Keyword::UNIQUE + | Keyword::CHECK + | Keyword::DEFAULT + | Keyword::COLLATE + | Keyword::REFERENCES + | Keyword::GENERATED + | Keyword::AS + ), + _ => true, // e.g. comma immediately after column name + } + } else { + false + } + } + + pub fn parse_optional_column_option(&mut self) -> Result, ParserError> { + if let Some(option) = self.dialect.parse_column_option(self)? { + return option; + } + + if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) { + Ok(Some(ColumnOption::CharacterSet( + self.parse_object_name(false)?, + ))) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { + Ok(Some(ColumnOption::NotNull)) + } else if self.parse_keywords(&[Keyword::COMMENT]) { + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(value, ..) => Ok(Some(ColumnOption::Comment(value))), + _ => self.expected("string", next_token), + } + } else if self.parse_keyword(Keyword::NULL) { + Ok(Some(ColumnOption::Null)) + } else if self.parse_keyword(Keyword::DEFAULT) { + Ok(Some(ColumnOption::Default(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::MATERIALIZED) + { + Ok(Some(ColumnOption::Materialized(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::ALIAS) + { + Ok(Some(ColumnOption::Alias(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::EPHEMERAL) + { + // The expression is optional for the EPHEMERAL syntax, so we need to check + // if the column definition has remaining tokens before parsing the expression. + if matches!(self.peek_token().token, Token::Comma | Token::RParen) { + Ok(Some(ColumnOption::Ephemeral(None))) + } else { + Ok(Some(ColumnOption::Ephemeral(Some(self.parse_expr()?)))) + } + } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { + let characteristics = self.parse_constraint_characteristics()?; + Ok(Some(ColumnOption::Unique { + is_primary: true, + characteristics, + })) + } else if self.parse_keyword(Keyword::UNIQUE) { + let characteristics = self.parse_constraint_characteristics()?; + Ok(Some(ColumnOption::Unique { + is_primary: false, + characteristics, + })) + } else if self.parse_keyword(Keyword::REFERENCES) { + let foreign_table = self.parse_object_name(false)?; + // PostgreSQL allows omitting the column list and + // uses the primary key column of the foreign table by default + let referred_columns = self.parse_parenthesized_column_list(Optional, false)?; + let mut on_delete = None; + let mut on_update = None; + loop { + if on_delete.is_none() && self.parse_keywords(&[Keyword::ON, Keyword::DELETE]) { + on_delete = Some(self.parse_referential_action()?); + } else if on_update.is_none() + && self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) + { + on_update = Some(self.parse_referential_action()?); + } else { + break; + } + } + let characteristics = self.parse_constraint_characteristics()?; + + Ok(Some(ColumnOption::ForeignKey { + foreign_table, + referred_columns, + on_delete, + on_update, + characteristics, + })) + } else if self.parse_keyword(Keyword::CHECK) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Ok(Some(ColumnOption::Check(expr))) + } else if self.parse_keyword(Keyword::AUTO_INCREMENT) + && dialect_of!(self is MySqlDialect | GenericDialect) + { + // Support AUTO_INCREMENT for MySQL + Ok(Some(ColumnOption::DialectSpecific(vec![ + Token::make_keyword("AUTO_INCREMENT"), + ]))) + } else if self.parse_keyword(Keyword::AUTOINCREMENT) + && dialect_of!(self is SQLiteDialect | GenericDialect) + { + // Support AUTOINCREMENT for SQLite + Ok(Some(ColumnOption::DialectSpecific(vec![ + Token::make_keyword("AUTOINCREMENT"), + ]))) + } else if self.parse_keyword(Keyword::ASC) + && self.dialect.supports_asc_desc_in_column_definition() + { + // Support ASC for SQLite + Ok(Some(ColumnOption::DialectSpecific(vec![ + Token::make_keyword("ASC"), + ]))) + } else if self.parse_keyword(Keyword::DESC) + && self.dialect.supports_asc_desc_in_column_definition() + { + // Support DESC for SQLite + Ok(Some(ColumnOption::DialectSpecific(vec![ + Token::make_keyword("DESC"), + ]))) + } else if self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) + && dialect_of!(self is MySqlDialect | GenericDialect) + { + let expr = self.parse_expr()?; + Ok(Some(ColumnOption::OnUpdate(expr))) + } else if self.parse_keyword(Keyword::GENERATED) { + self.parse_optional_column_option_generated() + } else if dialect_of!(self is BigQueryDialect | GenericDialect) + && self.parse_keyword(Keyword::OPTIONS) + { + self.prev_token(); + Ok(Some(ColumnOption::Options( + self.parse_options(Keyword::OPTIONS)?, + ))) + } else if self.parse_keyword(Keyword::AS) + && dialect_of!(self is MySqlDialect | SQLiteDialect | DuckDbDialect | GenericDialect) + { + self.parse_optional_column_option_as() + } else if self.parse_keyword(Keyword::IDENTITY) + && dialect_of!(self is MsSqlDialect | GenericDialect) + { + let parameters = if self.consume_token(&Token::LParen) { + let seed = self.parse_number()?; + self.expect_token(&Token::Comma)?; + let increment = self.parse_number()?; + self.expect_token(&Token::RParen)?; + + Some(IdentityPropertyFormatKind::FunctionCall( + IdentityParameters { seed, increment }, + )) + } else { + None + }; + Ok(Some(ColumnOption::Identity( + IdentityPropertyKind::Identity(IdentityProperty { + parameters, + order: None, + }), + ))) + } else if dialect_of!(self is SQLiteDialect | GenericDialect) + && self.parse_keywords(&[Keyword::ON, Keyword::CONFLICT]) + { + // Support ON CONFLICT for SQLite + Ok(Some(ColumnOption::OnConflict( + self.expect_one_of_keywords(&[ + Keyword::ROLLBACK, + Keyword::ABORT, + Keyword::FAIL, + Keyword::IGNORE, + Keyword::REPLACE, + ])?, + ))) + } else { + Ok(None) + } + } + + pub(crate) fn parse_tag(&mut self) -> Result { + let name = self.parse_identifier(false)?; + self.expect_token(&Token::Eq)?; + let value = self.parse_literal_string()?; + + Ok(Tag::new(name, value)) + } + + fn parse_optional_column_option_generated( + &mut self, + ) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::ALWAYS, Keyword::AS, Keyword::IDENTITY]) { + let mut sequence_options = vec![]; + if self.expect_token(&Token::LParen).is_ok() { + sequence_options = self.parse_create_sequence_options()?; + self.expect_token(&Token::RParen)?; + } + Ok(Some(ColumnOption::Generated { + generated_as: GeneratedAs::Always, + sequence_options: Some(sequence_options), + generation_expr: None, + generation_expr_mode: None, + generated_keyword: true, + })) + } else if self.parse_keywords(&[ + Keyword::BY, + Keyword::DEFAULT, + Keyword::AS, + Keyword::IDENTITY, + ]) { + let mut sequence_options = vec![]; + if self.expect_token(&Token::LParen).is_ok() { + sequence_options = self.parse_create_sequence_options()?; + self.expect_token(&Token::RParen)?; + } + Ok(Some(ColumnOption::Generated { + generated_as: GeneratedAs::ByDefault, + sequence_options: Some(sequence_options), + generation_expr: None, + generation_expr_mode: None, + generated_keyword: true, + })) + } else if self.parse_keywords(&[Keyword::ALWAYS, Keyword::AS]) { + if self.expect_token(&Token::LParen).is_ok() { + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + let (gen_as, expr_mode) = if self.parse_keywords(&[Keyword::STORED]) { + Ok(( + GeneratedAs::ExpStored, + Some(GeneratedExpressionMode::Stored), + )) + } else if dialect_of!(self is PostgreSqlDialect) { + // Postgres' AS IDENTITY branches are above, this one needs STORED + self.expected("STORED", self.peek_token()) + } else if self.parse_keywords(&[Keyword::VIRTUAL]) { + Ok((GeneratedAs::Always, Some(GeneratedExpressionMode::Virtual))) + } else { + Ok((GeneratedAs::Always, None)) + }?; + + Ok(Some(ColumnOption::Generated { + generated_as: gen_as, + sequence_options: None, + generation_expr: Some(expr), + generation_expr_mode: expr_mode, + generated_keyword: true, + })) + } else { + Ok(None) + } + } else { + Ok(None) + } + } + + fn parse_optional_column_option_as(&mut self) -> Result, ParserError> { + // Some DBs allow 'AS (expr)', shorthand for GENERATED ALWAYS AS + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + + let (gen_as, expr_mode) = if self.parse_keywords(&[Keyword::STORED]) { + ( + GeneratedAs::ExpStored, + Some(GeneratedExpressionMode::Stored), + ) + } else if self.parse_keywords(&[Keyword::VIRTUAL]) { + (GeneratedAs::Always, Some(GeneratedExpressionMode::Virtual)) + } else { + (GeneratedAs::Always, None) + }; + + Ok(Some(ColumnOption::Generated { + generated_as: gen_as, + sequence_options: None, + generation_expr: Some(expr), + generation_expr_mode: expr_mode, + generated_keyword: false, + })) + } + + /// Parses a parenthesized, comma-separated list of column definitions within a view. + pub(crate) fn parse_view_columns(&mut self) -> Result, ParserError> { + if self.consume_token(&Token::LParen) { + if self.peek_token().token == Token::RParen { + self.next_token(); + Ok(vec![]) + } else { + let cols = self.parse_comma_separated(Parser::parse_view_column)?; + self.expect_token(&Token::RParen)?; + Ok(cols) + } + } else { + Ok(vec![]) + } + } + + /// Parses a column definition within a view. + fn parse_view_column(&mut self) -> Result { + let name = self.parse_identifier(false)?; + let options = if (dialect_of!(self is BigQueryDialect | GenericDialect) + && self.parse_keyword(Keyword::OPTIONS)) + || (dialect_of!(self is SnowflakeDialect | GenericDialect) + && self.parse_keyword(Keyword::COMMENT)) + { + self.prev_token(); + self.parse_optional_column_option()? + .map(|option| vec![option]) + } else { + None + }; + let data_type = if dialect_of!(self is ClickHouseDialect) { + Some(self.parse_data_type()?) + } else { + None + }; + Ok(ViewColumnDef { + name, + data_type, + options, + }) + } + + /// Parse a parenthesized comma-separated list of unqualified, possibly quoted identifiers + pub fn parse_parenthesized_column_list( + &mut self, + optional: IsOptional, + allow_empty: bool, + ) -> Result, ParserError> { + if self.consume_token(&Token::LParen) { + if allow_empty && self.peek_token().token == Token::RParen { + self.next_token(); + Ok(vec![]) + } else { + let cols = self.parse_comma_separated(|p| p.parse_identifier(false))?; + self.expect_token(&Token::RParen)?; + Ok(cols) + } + } else if optional == Optional { + Ok(vec![]) + } else { + self.expected("a list of columns in parentheses", self.peek_token()) + } + } +} diff --git a/src/parser/comment.rs b/src/parser/comment.rs new file mode 100644 index 000000000..c55f16c99 --- /dev/null +++ b/src/parser/comment.rs @@ -0,0 +1,48 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_comment(&mut self) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + + self.expect_keyword(Keyword::ON)?; + let token = self.next_token(); + + let (object_type, object_name) = match token.token { + Token::Word(w) if w.keyword == Keyword::COLUMN => { + (CommentObject::Column, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::TABLE => { + (CommentObject::Table, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::EXTENSION => { + (CommentObject::Extension, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::SCHEMA => { + (CommentObject::Schema, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::DATABASE => { + (CommentObject::Database, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::USER => { + (CommentObject::User, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::ROLE => { + (CommentObject::Role, self.parse_object_name(false)?) + } + _ => self.expected("comment object_type", token)?, + }; + + self.expect_keyword(Keyword::IS)?; + let comment = if self.parse_keyword(Keyword::NULL) { + None + } else { + Some(self.parse_literal_string()?) + }; + Ok(Statement::Comment { + object_type, + object_name, + comment, + if_exists, + }) + } +} diff --git a/src/parser/commit.rs b/src/parser/commit.rs new file mode 100644 index 000000000..0b208732a --- /dev/null +++ b/src/parser/commit.rs @@ -0,0 +1,9 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_commit(&mut self) -> Result { + Ok(Statement::Commit { + chain: self.parse_commit_rollback_chain()?, + }) + } +} diff --git a/src/parser/copy.rs b/src/parser/copy.rs new file mode 100644 index 000000000..1bc318a7b --- /dev/null +++ b/src/parser/copy.rs @@ -0,0 +1,224 @@ +use super::*; + +use crate::parser_err; + +impl Parser<'_> { + /// Parse a copy statement + pub fn parse_copy(&mut self) -> Result { + let source; + if self.consume_token(&Token::LParen) { + source = CopySource::Query(self.parse_query()?); + self.expect_token(&Token::RParen)?; + } else { + let table_name = self.parse_object_name(false)?; + let columns = self.parse_parenthesized_column_list(Optional, false)?; + source = CopySource::Table { + table_name, + columns, + }; + } + let to = match self.parse_one_of_keywords(&[Keyword::FROM, Keyword::TO]) { + Some(Keyword::FROM) => false, + Some(Keyword::TO) => true, + _ => self.expected("FROM or TO", self.peek_token())?, + }; + if !to { + // Use a separate if statement to prevent Rust compiler from complaining about + // "if statement in this position is unstable: https://github.com/rust-lang/rust/issues/53667" + if let CopySource::Query(_) = source { + return Err(ParserError::ParserError( + "COPY ... FROM does not support query as a source".to_string(), + )); + } + } + let target = if self.parse_keyword(Keyword::STDIN) { + CopyTarget::Stdin + } else if self.parse_keyword(Keyword::STDOUT) { + CopyTarget::Stdout + } else if self.parse_keyword(Keyword::PROGRAM) { + CopyTarget::Program { + command: self.parse_literal_string()?, + } + } else { + CopyTarget::File { + filename: self.parse_literal_string()?, + } + }; + let _ = self.parse_keyword(Keyword::WITH); // [ WITH ] + let mut options = vec![]; + if self.consume_token(&Token::LParen) { + options = self.parse_comma_separated(Parser::parse_copy_option)?; + self.expect_token(&Token::RParen)?; + } + let mut legacy_options = vec![]; + while let Some(opt) = self.maybe_parse(|parser| parser.parse_copy_legacy_option())? { + legacy_options.push(opt); + } + let values = if let CopyTarget::Stdin = target { + self.expect_token(&Token::SemiColon)?; + self.parse_tsv() + } else { + vec![] + }; + Ok(Statement::Copy { + source, + to, + target, + options, + legacy_options, + values, + }) + } + + fn parse_copy_option(&mut self) -> Result { + let ret = match self.parse_one_of_keywords(&[ + Keyword::FORMAT, + Keyword::FREEZE, + Keyword::DELIMITER, + Keyword::NULL, + Keyword::HEADER, + Keyword::QUOTE, + Keyword::ESCAPE, + Keyword::FORCE_QUOTE, + Keyword::FORCE_NOT_NULL, + Keyword::FORCE_NULL, + Keyword::ENCODING, + ]) { + Some(Keyword::FORMAT) => CopyOption::Format(self.parse_identifier(false)?), + Some(Keyword::FREEZE) => CopyOption::Freeze(!matches!( + self.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]), + Some(Keyword::FALSE) + )), + Some(Keyword::DELIMITER) => CopyOption::Delimiter(self.parse_literal_char()?), + Some(Keyword::NULL) => CopyOption::Null(self.parse_literal_string()?), + Some(Keyword::HEADER) => CopyOption::Header(!matches!( + self.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]), + Some(Keyword::FALSE) + )), + Some(Keyword::QUOTE) => CopyOption::Quote(self.parse_literal_char()?), + Some(Keyword::ESCAPE) => CopyOption::Escape(self.parse_literal_char()?), + Some(Keyword::FORCE_QUOTE) => { + CopyOption::ForceQuote(self.parse_parenthesized_column_list(Mandatory, false)?) + } + Some(Keyword::FORCE_NOT_NULL) => { + CopyOption::ForceNotNull(self.parse_parenthesized_column_list(Mandatory, false)?) + } + Some(Keyword::FORCE_NULL) => { + CopyOption::ForceNull(self.parse_parenthesized_column_list(Mandatory, false)?) + } + Some(Keyword::ENCODING) => CopyOption::Encoding(self.parse_literal_string()?), + _ => self.expected("option", self.peek_token())?, + }; + Ok(ret) + } + + fn parse_copy_legacy_option(&mut self) -> Result { + let ret = match self.parse_one_of_keywords(&[ + Keyword::BINARY, + Keyword::DELIMITER, + Keyword::NULL, + Keyword::CSV, + ]) { + Some(Keyword::BINARY) => CopyLegacyOption::Binary, + Some(Keyword::DELIMITER) => { + let _ = self.parse_keyword(Keyword::AS); // [ AS ] + CopyLegacyOption::Delimiter(self.parse_literal_char()?) + } + Some(Keyword::NULL) => { + let _ = self.parse_keyword(Keyword::AS); // [ AS ] + CopyLegacyOption::Null(self.parse_literal_string()?) + } + Some(Keyword::CSV) => CopyLegacyOption::Csv({ + let mut opts = vec![]; + while let Some(opt) = + self.maybe_parse(|parser| parser.parse_copy_legacy_csv_option())? + { + opts.push(opt); + } + opts + }), + _ => self.expected("option", self.peek_token())?, + }; + Ok(ret) + } + + fn parse_copy_legacy_csv_option(&mut self) -> Result { + let ret = match self.parse_one_of_keywords(&[ + Keyword::HEADER, + Keyword::QUOTE, + Keyword::ESCAPE, + Keyword::FORCE, + ]) { + Some(Keyword::HEADER) => CopyLegacyCsvOption::Header, + Some(Keyword::QUOTE) => { + let _ = self.parse_keyword(Keyword::AS); // [ AS ] + CopyLegacyCsvOption::Quote(self.parse_literal_char()?) + } + Some(Keyword::ESCAPE) => { + let _ = self.parse_keyword(Keyword::AS); // [ AS ] + CopyLegacyCsvOption::Escape(self.parse_literal_char()?) + } + Some(Keyword::FORCE) if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) => { + CopyLegacyCsvOption::ForceNotNull( + self.parse_comma_separated(|p| p.parse_identifier(false))?, + ) + } + Some(Keyword::FORCE) if self.parse_keywords(&[Keyword::QUOTE]) => { + CopyLegacyCsvOption::ForceQuote( + self.parse_comma_separated(|p| p.parse_identifier(false))?, + ) + } + _ => self.expected("csv option", self.peek_token())?, + }; + Ok(ret) + } + + fn parse_literal_char(&mut self) -> Result { + let s = self.parse_literal_string()?; + if s.len() != 1 { + let loc = self + .tokens + .get(self.index - 1) + .map_or(Location { line: 0, column: 0 }, |t| t.span.start); + return parser_err!(format!("Expect a char, found {s:?}"), loc); + } + Ok(s.chars().next().unwrap()) + } + + /// Parse a tab separated values in + /// COPY payload + pub fn parse_tsv(&mut self) -> Vec> { + self.parse_tab_value() + } + + pub fn parse_tab_value(&mut self) -> Vec> { + let mut values = vec![]; + let mut content = String::from(""); + while let Some(t) = self.next_token_no_skip().map(|t| &t.token) { + match t { + Token::Whitespace(Whitespace::Tab) => { + values.push(Some(content.to_string())); + content.clear(); + } + Token::Whitespace(Whitespace::Newline) => { + values.push(Some(content.to_string())); + content.clear(); + } + Token::Backslash => { + if self.consume_token(&Token::Period) { + return values; + } + if let Token::Word(w) = self.next_token().token { + if w.value == "N" { + values.push(None); + } + } + } + _ => { + content.push_str(&t.to_string()); + } + } + } + values + } +} diff --git a/src/parser/create.rs b/src/parser/create.rs new file mode 100644 index 000000000..c6deab598 --- /dev/null +++ b/src/parser/create.rs @@ -0,0 +1,1669 @@ +use super::*; + +use crate::parser_err; + +impl Parser<'_> { + /// Parse a SQL CREATE statement + pub fn parse_create(&mut self) -> Result { + let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); + let or_alter = self.parse_keywords(&[Keyword::OR, Keyword::ALTER]); + let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some(); + let global = self.parse_one_of_keywords(&[Keyword::GLOBAL]).is_some(); + let transient = self.parse_one_of_keywords(&[Keyword::TRANSIENT]).is_some(); + let global: Option = if global { + Some(true) + } else if local { + Some(false) + } else { + None + }; + let temporary = self + .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) + .is_some(); + let persistent = dialect_of!(self is DuckDbDialect) + && self.parse_one_of_keywords(&[Keyword::PERSISTENT]).is_some(); + if self.parse_keyword(Keyword::TABLE) { + self.parse_create_table(or_replace, temporary, global, transient) + } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { + self.prev_token(); + self.parse_create_view(or_replace, temporary) + } else if self.parse_keyword(Keyword::POLICY) { + self.parse_create_policy() + } else if self.parse_keyword(Keyword::EXTERNAL) { + self.parse_create_external_table(or_replace) + } else if self.parse_keyword(Keyword::FUNCTION) { + self.parse_create_function(or_replace, temporary) + } else if self.parse_keyword(Keyword::TRIGGER) { + self.parse_create_trigger(or_replace, false) + } else if self.parse_keywords(&[Keyword::CONSTRAINT, Keyword::TRIGGER]) { + self.parse_create_trigger(or_replace, true) + } else if self.parse_keyword(Keyword::MACRO) { + self.parse_create_macro(or_replace, temporary) + } else if self.parse_keyword(Keyword::SECRET) { + self.parse_create_secret(or_replace, temporary, persistent) + } else if or_replace { + self.expected( + "[EXTERNAL] TABLE or [MATERIALIZED] VIEW or FUNCTION after CREATE OR REPLACE", + self.peek_token(), + ) + } else if self.parse_keyword(Keyword::EXTENSION) { + self.parse_create_extension() + } else if self.parse_keyword(Keyword::INDEX) { + self.parse_create_index(false) + } else if self.parse_keywords(&[Keyword::UNIQUE, Keyword::INDEX]) { + self.parse_create_index(true) + } else if self.parse_keyword(Keyword::VIRTUAL) { + self.parse_create_virtual_table() + } else if self.parse_keyword(Keyword::SCHEMA) { + self.parse_create_schema() + } else if self.parse_keyword(Keyword::DATABASE) { + self.parse_create_database() + } else if self.parse_keyword(Keyword::ROLE) { + self.parse_create_role() + } else if self.parse_keyword(Keyword::SEQUENCE) { + self.parse_create_sequence(temporary) + } else if self.parse_keyword(Keyword::TYPE) { + self.parse_create_type() + } else if self.parse_keyword(Keyword::PROCEDURE) { + self.parse_create_procedure(or_alter) + } else { + self.expected("an object type after CREATE", self.peek_token()) + } + } + + pub fn parse_create_procedure(&mut self, or_alter: bool) -> Result { + let name = self.parse_object_name(false)?; + let params = self.parse_optional_procedure_parameters()?; + self.expect_keyword(Keyword::AS)?; + self.expect_keyword(Keyword::BEGIN)?; + let statements = self.parse_statements()?; + self.expect_keyword(Keyword::END)?; + Ok(Statement::CreateProcedure { + name, + or_alter, + params, + body: statements, + }) + } + + /// ```sql + /// CREATE [ { TEMPORARY | TEMP } ] SEQUENCE [ IF NOT EXISTS ] + /// ``` + /// + /// See [Postgres docs](https://www.postgresql.org/docs/current/sql-createsequence.html) for more details. + pub fn parse_create_sequence(&mut self, temporary: bool) -> Result { + //[ IF NOT EXISTS ] + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + //name + let name = self.parse_object_name(false)?; + //[ AS data_type ] + let mut data_type: Option = None; + if self.parse_keywords(&[Keyword::AS]) { + data_type = Some(self.parse_data_type()?) + } + let sequence_options = self.parse_create_sequence_options()?; + // [ OWNED BY { table_name.column_name | NONE } ] + let owned_by = if self.parse_keywords(&[Keyword::OWNED, Keyword::BY]) { + if self.parse_keywords(&[Keyword::NONE]) { + Some(ObjectName(vec![Ident::new("NONE")])) + } else { + Some(self.parse_object_name(false)?) + } + } else { + None + }; + Ok(Statement::CreateSequence { + temporary, + if_not_exists, + name, + data_type, + sequence_options, + owned_by, + }) + } + + pub fn parse_create_schema(&mut self) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + + let schema_name = self.parse_schema_name()?; + + Ok(Statement::CreateSchema { + schema_name, + if_not_exists, + }) + } + + pub fn parse_create_type(&mut self) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::AS)?; + + let mut attributes = vec![]; + if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { + return Ok(Statement::CreateType { + name, + representation: UserDefinedTypeRepresentation::Composite { attributes }, + }); + } + + loop { + let attr_name = self.parse_identifier(false)?; + let attr_data_type = self.parse_data_type()?; + let attr_collation = if self.parse_keyword(Keyword::COLLATE) { + Some(self.parse_object_name(false)?) + } else { + None + }; + attributes.push(UserDefinedTypeCompositeAttributeDef { + name: attr_name, + data_type: attr_data_type, + collation: attr_collation, + }); + let comma = self.consume_token(&Token::Comma); + if self.consume_token(&Token::RParen) { + // allow a trailing comma + break; + } else if !comma { + return self.expected("',' or ')' after attribute definition", self.peek_token()); + } + } + + Ok(Statement::CreateType { + name, + representation: UserDefinedTypeRepresentation::Composite { attributes }, + }) + } + + pub fn parse_index_options(&mut self) -> Result, ParserError> { + let mut options = Vec::new(); + + loop { + match self.parse_optional_index_option()? { + Some(index_option) => options.push(index_option), + None => return Ok(options), + } + } + } + + pub fn parse_index_type(&mut self) -> Result { + if self.parse_keyword(Keyword::BTREE) { + Ok(IndexType::BTree) + } else if self.parse_keyword(Keyword::HASH) { + Ok(IndexType::Hash) + } else { + self.expected("index type {BTREE | HASH}", self.peek_token()) + } + } + + #[must_use] + pub fn parse_index_type_display(&mut self) -> KeyOrIndexDisplay { + if self.parse_keyword(Keyword::KEY) { + KeyOrIndexDisplay::Key + } else if self.parse_keyword(Keyword::INDEX) { + KeyOrIndexDisplay::Index + } else { + KeyOrIndexDisplay::None + } + } + + pub fn parse_optional_index_option(&mut self) -> Result, ParserError> { + if let Some(index_type) = self.parse_optional_using_then_index_type()? { + Ok(Some(IndexOption::Using(index_type))) + } else if self.parse_keyword(Keyword::COMMENT) { + let s = self.parse_literal_string()?; + Ok(Some(IndexOption::Comment(s))) + } else { + Ok(None) + } + } + + pub fn parse_optional_inline_comment(&mut self) -> Result, ParserError> { + let comment = if self.parse_keyword(Keyword::COMMENT) { + let has_eq = self.consume_token(&Token::Eq); + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(str) => Some(if has_eq { + CommentDef::WithEq(str) + } else { + CommentDef::WithoutEq(str) + }), + _ => self.expected("comment", next_token)?, + } + } else { + None + }; + Ok(comment) + } + + pub fn parse_optional_procedure_parameters( + &mut self, + ) -> Result>, ParserError> { + let mut params = vec![]; + if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { + return Ok(Some(params)); + } + loop { + if let Token::Word(_) = self.peek_token().token { + params.push(self.parse_procedure_param()?) + } + let comma = self.consume_token(&Token::Comma); + if self.consume_token(&Token::RParen) { + // allow a trailing comma, even though it's not in standard + break; + } else if !comma { + return self.expected("',' or ')' after parameter definition", self.peek_token()); + } + } + Ok(Some(params)) + } + + /// Parse [USING {BTREE | HASH}] + pub fn parse_optional_using_then_index_type( + &mut self, + ) -> Result, ParserError> { + if self.parse_keyword(Keyword::USING) { + Ok(Some(self.parse_index_type()?)) + } else { + Ok(None) + } + } + + pub fn parse_procedure_param(&mut self) -> Result { + let name = self.parse_identifier(false)?; + let data_type = self.parse_data_type()?; + Ok(ProcedureParam { name, data_type }) + } + + pub fn parse_schema_name(&mut self) -> Result { + if self.parse_keyword(Keyword::AUTHORIZATION) { + Ok(SchemaName::UnnamedAuthorization( + self.parse_identifier(false)?, + )) + } else { + let name = self.parse_object_name(false)?; + + if self.parse_keyword(Keyword::AUTHORIZATION) { + Ok(SchemaName::NamedAuthorization( + name, + self.parse_identifier(false)?, + )) + } else { + Ok(SchemaName::Simple(name)) + } + } + } + + pub fn parse_create_database(&mut self) -> Result { + let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let db_name = self.parse_object_name(false)?; + let mut location = None; + let mut managed_location = None; + loop { + match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) { + Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?), + Some(Keyword::MANAGEDLOCATION) => { + managed_location = Some(self.parse_literal_string()?) + } + _ => break, + } + } + Ok(Statement::CreateDatabase { + db_name, + if_not_exists: ine, + location, + managed_location, + }) + } + + pub fn parse_optional_create_function_using( + &mut self, + ) -> Result, ParserError> { + if !self.parse_keyword(Keyword::USING) { + return Ok(None); + }; + let keyword = + self.expect_one_of_keywords(&[Keyword::JAR, Keyword::FILE, Keyword::ARCHIVE])?; + + let uri = self.parse_literal_string()?; + + match keyword { + Keyword::JAR => Ok(Some(CreateFunctionUsing::Jar(uri))), + Keyword::FILE => Ok(Some(CreateFunctionUsing::File(uri))), + Keyword::ARCHIVE => Ok(Some(CreateFunctionUsing::Archive(uri))), + _ => self.expected( + "JAR, FILE or ARCHIVE, got {:?}", + TokenWithSpan::wrap(Token::make_keyword(format!("{keyword:?}").as_str())), + ), + } + } + + pub fn parse_create_function( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + if dialect_of!(self is HiveDialect) { + self.parse_hive_create_function(or_replace, temporary) + } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) { + self.parse_postgres_create_function(or_replace, temporary) + } else if dialect_of!(self is DuckDbDialect) { + self.parse_create_macro(or_replace, temporary) + } else if dialect_of!(self is BigQueryDialect) { + self.parse_bigquery_create_function(or_replace, temporary) + } else { + self.prev_token(); + self.expected("an object type after CREATE", self.peek_token()) + } + } + + pub fn parse_create_trigger( + &mut self, + or_replace: bool, + is_constraint: bool, + ) -> Result { + if !dialect_of!(self is PostgreSqlDialect | GenericDialect) { + self.prev_token(); + return self.expected("an object type after CREATE", self.peek_token()); + } + + let name = self.parse_object_name(false)?; + let period = self.parse_trigger_period()?; + + let events = self.parse_keyword_separated(Keyword::OR, Parser::parse_trigger_event)?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + + let referenced_table_name = if self.parse_keyword(Keyword::FROM) { + self.parse_object_name(true).ok() + } else { + None + }; + + let characteristics = self.parse_constraint_characteristics()?; + + let mut referencing = vec![]; + if self.parse_keyword(Keyword::REFERENCING) { + while let Some(refer) = self.parse_trigger_referencing()? { + referencing.push(refer); + } + } + + self.expect_keyword(Keyword::FOR)?; + let include_each = self.parse_keyword(Keyword::EACH); + let trigger_object = + match self.expect_one_of_keywords(&[Keyword::ROW, Keyword::STATEMENT])? { + Keyword::ROW => TriggerObject::Row, + Keyword::STATEMENT => TriggerObject::Statement, + _ => unreachable!(), + }; + + let condition = self + .parse_keyword(Keyword::WHEN) + .then(|| self.parse_expr()) + .transpose()?; + + self.expect_keyword(Keyword::EXECUTE)?; + + let exec_body = self.parse_trigger_exec_body()?; + + Ok(Statement::CreateTrigger { + or_replace, + is_constraint, + name, + period, + events, + table_name, + referenced_table_name, + referencing, + trigger_object, + include_each, + condition, + exec_body, + characteristics, + }) + } + + pub fn parse_trigger_period(&mut self) -> Result { + Ok( + match self.expect_one_of_keywords(&[ + Keyword::BEFORE, + Keyword::AFTER, + Keyword::INSTEAD, + ])? { + Keyword::BEFORE => TriggerPeriod::Before, + Keyword::AFTER => TriggerPeriod::After, + Keyword::INSTEAD => self + .expect_keyword(Keyword::OF) + .map(|_| TriggerPeriod::InsteadOf)?, + _ => unreachable!(), + }, + ) + } + + pub fn parse_trigger_event(&mut self) -> Result { + Ok( + match self.expect_one_of_keywords(&[ + Keyword::INSERT, + Keyword::UPDATE, + Keyword::DELETE, + Keyword::TRUNCATE, + ])? { + Keyword::INSERT => TriggerEvent::Insert, + Keyword::UPDATE => { + if self.parse_keyword(Keyword::OF) { + let cols = self.parse_comma_separated(|ident| { + Parser::parse_identifier(ident, false) + })?; + TriggerEvent::Update(cols) + } else { + TriggerEvent::Update(vec![]) + } + } + Keyword::DELETE => TriggerEvent::Delete, + Keyword::TRUNCATE => TriggerEvent::Truncate, + _ => unreachable!(), + }, + ) + } + + pub fn parse_trigger_referencing(&mut self) -> Result, ParserError> { + let refer_type = match self.parse_one_of_keywords(&[Keyword::OLD, Keyword::NEW]) { + Some(Keyword::OLD) if self.parse_keyword(Keyword::TABLE) => { + TriggerReferencingType::OldTable + } + Some(Keyword::NEW) if self.parse_keyword(Keyword::TABLE) => { + TriggerReferencingType::NewTable + } + _ => { + return Ok(None); + } + }; + + let is_as = self.parse_keyword(Keyword::AS); + let transition_relation_name = self.parse_object_name(false)?; + Ok(Some(TriggerReferencing { + refer_type, + is_as, + transition_relation_name, + })) + } + + pub fn parse_trigger_exec_body(&mut self) -> Result { + Ok(TriggerExecBody { + exec_type: match self + .expect_one_of_keywords(&[Keyword::FUNCTION, Keyword::PROCEDURE])? + { + Keyword::FUNCTION => TriggerExecBodyType::Function, + Keyword::PROCEDURE => TriggerExecBodyType::Procedure, + _ => unreachable!(), + }, + func_desc: self.parse_function_desc()?, + }) + } + + pub fn parse_create_macro( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + if dialect_of!(self is DuckDbDialect | GenericDialect) { + let name = self.parse_object_name(false)?; + self.expect_token(&Token::LParen)?; + let args = if self.consume_token(&Token::RParen) { + self.prev_token(); + None + } else { + Some(self.parse_comma_separated(Parser::parse_macro_arg)?) + }; + + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::AS)?; + + Ok(Statement::CreateMacro { + or_replace, + temporary, + name, + args, + definition: if self.parse_keyword(Keyword::TABLE) { + MacroDefinition::Table(self.parse_query()?) + } else { + MacroDefinition::Expr(self.parse_expr()?) + }, + }) + } else { + self.prev_token(); + self.expected("an object type after CREATE", self.peek_token()) + } + } + + fn parse_macro_arg(&mut self) -> Result { + let name = self.parse_identifier(false)?; + + let default_expr = + if self.consume_token(&Token::Assignment) || self.consume_token(&Token::RArrow) { + Some(self.parse_expr()?) + } else { + None + }; + Ok(MacroArg { name, default_expr }) + } + + pub(crate) fn parse_create_sequence_options( + &mut self, + ) -> Result, ParserError> { + let mut sequence_options = vec![]; + //[ INCREMENT [ BY ] increment ] + if self.parse_keywords(&[Keyword::INCREMENT]) { + if self.parse_keywords(&[Keyword::BY]) { + sequence_options.push(SequenceOptions::IncrementBy(self.parse_number()?, true)); + } else { + sequence_options.push(SequenceOptions::IncrementBy(self.parse_number()?, false)); + } + } + //[ MINVALUE minvalue | NO MINVALUE ] + if self.parse_keyword(Keyword::MINVALUE) { + sequence_options.push(SequenceOptions::MinValue(Some(self.parse_number()?))); + } else if self.parse_keywords(&[Keyword::NO, Keyword::MINVALUE]) { + sequence_options.push(SequenceOptions::MinValue(None)); + } + //[ MAXVALUE maxvalue | NO MAXVALUE ] + if self.parse_keywords(&[Keyword::MAXVALUE]) { + sequence_options.push(SequenceOptions::MaxValue(Some(self.parse_number()?))); + } else if self.parse_keywords(&[Keyword::NO, Keyword::MAXVALUE]) { + sequence_options.push(SequenceOptions::MaxValue(None)); + } + + //[ START [ WITH ] start ] + if self.parse_keywords(&[Keyword::START]) { + if self.parse_keywords(&[Keyword::WITH]) { + sequence_options.push(SequenceOptions::StartWith(self.parse_number()?, true)); + } else { + sequence_options.push(SequenceOptions::StartWith(self.parse_number()?, false)); + } + } + //[ CACHE cache ] + if self.parse_keywords(&[Keyword::CACHE]) { + sequence_options.push(SequenceOptions::Cache(self.parse_number()?)); + } + // [ [ NO ] CYCLE ] + if self.parse_keywords(&[Keyword::NO, Keyword::CYCLE]) { + sequence_options.push(SequenceOptions::Cycle(true)); + } else if self.parse_keywords(&[Keyword::CYCLE]) { + sequence_options.push(SequenceOptions::Cycle(false)); + } + + Ok(sequence_options) + } + + pub fn parse_create_external_table( + &mut self, + or_replace: bool, + ) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let table_name = self.parse_object_name(false)?; + let (columns, constraints) = self.parse_columns()?; + + let hive_distribution = self.parse_hive_distribution()?; + let hive_formats = self.parse_hive_formats()?; + + let file_format = if let Some(ff) = &hive_formats.storage { + match ff { + HiveIOFormat::FileFormat { format } => Some(*format), + _ => None, + } + } else { + None + }; + let location = hive_formats.location.clone(); + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; + Ok(CreateTableBuilder::new(table_name) + .columns(columns) + .constraints(constraints) + .hive_distribution(hive_distribution) + .hive_formats(Some(hive_formats)) + .table_properties(table_properties) + .or_replace(or_replace) + .if_not_exists(if_not_exists) + .external(true) + .file_format(file_format) + .location(location) + .build()) + } + + pub fn parse_file_format(&mut self) -> Result { + let next_token = self.next_token(); + match &next_token.token { + Token::Word(w) => match w.keyword { + Keyword::AVRO => Ok(FileFormat::AVRO), + Keyword::JSONFILE => Ok(FileFormat::JSONFILE), + Keyword::ORC => Ok(FileFormat::ORC), + Keyword::PARQUET => Ok(FileFormat::PARQUET), + Keyword::RCFILE => Ok(FileFormat::RCFILE), + Keyword::SEQUENCEFILE => Ok(FileFormat::SEQUENCEFILE), + Keyword::TEXTFILE => Ok(FileFormat::TEXTFILE), + _ => self.expected("fileformat", next_token), + }, + _ => self.expected("fileformat", next_token), + } + } + + pub fn parse_analyze_format(&mut self) -> Result { + let next_token = self.next_token(); + match &next_token.token { + Token::Word(w) => match w.keyword { + Keyword::TEXT => Ok(AnalyzeFormat::TEXT), + Keyword::GRAPHVIZ => Ok(AnalyzeFormat::GRAPHVIZ), + Keyword::JSON => Ok(AnalyzeFormat::JSON), + _ => self.expected("fileformat", next_token), + }, + _ => self.expected("fileformat", next_token), + } + } + + pub fn parse_create_view( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let materialized = self.parse_keyword(Keyword::MATERIALIZED); + self.expect_keyword(Keyword::VIEW)?; + let if_not_exists = dialect_of!(self is BigQueryDialect|SQLiteDialect|GenericDialect) + && self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + // Many dialects support `OR ALTER` right after `CREATE`, but we don't (yet). + // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. + let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); + let name = self.parse_object_name(allow_unquoted_hyphen)?; + let columns = self.parse_view_columns()?; + let mut options = CreateTableOptions::None; + let with_options = self.parse_options(Keyword::WITH)?; + if !with_options.is_empty() { + options = CreateTableOptions::With(with_options); + } + + let cluster_by = if self.parse_keyword(Keyword::CLUSTER) { + self.expect_keyword(Keyword::BY)?; + self.parse_parenthesized_column_list(Optional, false)? + } else { + vec![] + }; + + if dialect_of!(self is BigQueryDialect | GenericDialect) { + if let Some(opts) = self.maybe_parse_options(Keyword::OPTIONS)? { + if !opts.is_empty() { + options = CreateTableOptions::Options(opts); + } + }; + } + + let to = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::TO) + { + Some(self.parse_object_name(false)?) + } else { + None + }; + + let comment = if dialect_of!(self is SnowflakeDialect | GenericDialect) + && self.parse_keyword(Keyword::COMMENT) + { + self.expect_token(&Token::Eq)?; + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(str) => Some(str), + _ => self.expected("string literal", next_token)?, + } + } else { + None + }; + + self.expect_keyword(Keyword::AS)?; + let query = self.parse_query()?; + // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. + + let with_no_schema_binding = dialect_of!(self is RedshiftSqlDialect | GenericDialect) + && self.parse_keywords(&[ + Keyword::WITH, + Keyword::NO, + Keyword::SCHEMA, + Keyword::BINDING, + ]); + + Ok(Statement::CreateView { + name, + columns, + query, + materialized, + or_replace, + options, + cluster_by, + comment, + with_no_schema_binding, + if_not_exists, + temporary, + to, + }) + } + + pub fn parse_create_role(&mut self) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; + + let _ = self.parse_keyword(Keyword::WITH); // [ WITH ] + + let optional_keywords = if dialect_of!(self is MsSqlDialect) { + vec![Keyword::AUTHORIZATION] + } else if dialect_of!(self is PostgreSqlDialect) { + vec![ + Keyword::LOGIN, + Keyword::NOLOGIN, + Keyword::INHERIT, + Keyword::NOINHERIT, + Keyword::BYPASSRLS, + Keyword::NOBYPASSRLS, + Keyword::PASSWORD, + Keyword::CREATEDB, + Keyword::NOCREATEDB, + Keyword::CREATEROLE, + Keyword::NOCREATEROLE, + Keyword::SUPERUSER, + Keyword::NOSUPERUSER, + Keyword::REPLICATION, + Keyword::NOREPLICATION, + Keyword::CONNECTION, + Keyword::VALID, + Keyword::IN, + Keyword::ROLE, + Keyword::ADMIN, + Keyword::USER, + ] + } else { + vec![] + }; + + // MSSQL + let mut authorization_owner = None; + // Postgres + let mut login = None; + let mut inherit = None; + let mut bypassrls = None; + let mut password = None; + let mut create_db = None; + let mut create_role = None; + let mut superuser = None; + let mut replication = None; + let mut connection_limit = None; + let mut valid_until = None; + let mut in_role = vec![]; + let mut in_group = vec![]; + let mut role = vec![]; + let mut user = vec![]; + let mut admin = vec![]; + + while let Some(keyword) = self.parse_one_of_keywords(&optional_keywords) { + let loc = self + .tokens + .get(self.index - 1) + .map_or(Location { line: 0, column: 0 }, |t| t.span.start); + match keyword { + Keyword::AUTHORIZATION => { + if authorization_owner.is_some() { + parser_err!("Found multiple AUTHORIZATION", loc) + } else { + authorization_owner = Some(self.parse_object_name(false)?); + Ok(()) + } + } + Keyword::LOGIN | Keyword::NOLOGIN => { + if login.is_some() { + parser_err!("Found multiple LOGIN or NOLOGIN", loc) + } else { + login = Some(keyword == Keyword::LOGIN); + Ok(()) + } + } + Keyword::INHERIT | Keyword::NOINHERIT => { + if inherit.is_some() { + parser_err!("Found multiple INHERIT or NOINHERIT", loc) + } else { + inherit = Some(keyword == Keyword::INHERIT); + Ok(()) + } + } + Keyword::BYPASSRLS | Keyword::NOBYPASSRLS => { + if bypassrls.is_some() { + parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS", loc) + } else { + bypassrls = Some(keyword == Keyword::BYPASSRLS); + Ok(()) + } + } + Keyword::CREATEDB | Keyword::NOCREATEDB => { + if create_db.is_some() { + parser_err!("Found multiple CREATEDB or NOCREATEDB", loc) + } else { + create_db = Some(keyword == Keyword::CREATEDB); + Ok(()) + } + } + Keyword::CREATEROLE | Keyword::NOCREATEROLE => { + if create_role.is_some() { + parser_err!("Found multiple CREATEROLE or NOCREATEROLE", loc) + } else { + create_role = Some(keyword == Keyword::CREATEROLE); + Ok(()) + } + } + Keyword::SUPERUSER | Keyword::NOSUPERUSER => { + if superuser.is_some() { + parser_err!("Found multiple SUPERUSER or NOSUPERUSER", loc) + } else { + superuser = Some(keyword == Keyword::SUPERUSER); + Ok(()) + } + } + Keyword::REPLICATION | Keyword::NOREPLICATION => { + if replication.is_some() { + parser_err!("Found multiple REPLICATION or NOREPLICATION", loc) + } else { + replication = Some(keyword == Keyword::REPLICATION); + Ok(()) + } + } + Keyword::PASSWORD => { + if password.is_some() { + parser_err!("Found multiple PASSWORD", loc) + } else { + password = if self.parse_keyword(Keyword::NULL) { + Some(Password::NullPassword) + } else { + Some(Password::Password(Expr::Value(self.parse_value()?))) + }; + Ok(()) + } + } + Keyword::CONNECTION => { + self.expect_keyword(Keyword::LIMIT)?; + if connection_limit.is_some() { + parser_err!("Found multiple CONNECTION LIMIT", loc) + } else { + connection_limit = Some(Expr::Value(self.parse_number_value()?)); + Ok(()) + } + } + Keyword::VALID => { + self.expect_keyword(Keyword::UNTIL)?; + if valid_until.is_some() { + parser_err!("Found multiple VALID UNTIL", loc) + } else { + valid_until = Some(Expr::Value(self.parse_value()?)); + Ok(()) + } + } + Keyword::IN => { + if self.parse_keyword(Keyword::ROLE) { + if !in_role.is_empty() { + parser_err!("Found multiple IN ROLE", loc) + } else { + in_role = self.parse_comma_separated(|p| p.parse_identifier(false))?; + Ok(()) + } + } else if self.parse_keyword(Keyword::GROUP) { + if !in_group.is_empty() { + parser_err!("Found multiple IN GROUP", loc) + } else { + in_group = self.parse_comma_separated(|p| p.parse_identifier(false))?; + Ok(()) + } + } else { + self.expected("ROLE or GROUP after IN", self.peek_token()) + } + } + Keyword::ROLE => { + if !role.is_empty() { + parser_err!("Found multiple ROLE", loc) + } else { + role = self.parse_comma_separated(|p| p.parse_identifier(false))?; + Ok(()) + } + } + Keyword::USER => { + if !user.is_empty() { + parser_err!("Found multiple USER", loc) + } else { + user = self.parse_comma_separated(|p| p.parse_identifier(false))?; + Ok(()) + } + } + Keyword::ADMIN => { + if !admin.is_empty() { + parser_err!("Found multiple ADMIN", loc) + } else { + admin = self.parse_comma_separated(|p| p.parse_identifier(false))?; + Ok(()) + } + } + _ => break, + }? + } + + Ok(Statement::CreateRole { + names, + if_not_exists, + login, + inherit, + bypassrls, + password, + create_db, + create_role, + replication, + superuser, + connection_limit, + valid_until, + in_role, + in_group, + role, + user, + admin, + authorization_owner, + }) + } + + pub fn parse_owner(&mut self) -> Result { + let owner = match self.parse_one_of_keywords(&[Keyword::CURRENT_USER, Keyword::CURRENT_ROLE, Keyword::SESSION_USER]) { + Some(Keyword::CURRENT_USER) => Owner::CurrentUser, + Some(Keyword::CURRENT_ROLE) => Owner::CurrentRole, + Some(Keyword::SESSION_USER) => Owner::SessionUser, + Some(_) => unreachable!(), + None => { + match self.parse_identifier(false) { + Ok(ident) => Owner::Ident(ident), + Err(e) => { + return Err(ParserError::ParserError(format!("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}"))) + } + } + } + }; + Ok(owner) + } + + /// ```sql + /// CREATE POLICY name ON table_name [ AS { PERMISSIVE | RESTRICTIVE } ] + /// [ FOR { ALL | SELECT | INSERT | UPDATE | DELETE } ] + /// [ TO { role_name | PUBLIC | CURRENT_USER | CURRENT_ROLE | SESSION_USER } [, ...] ] + /// [ USING ( using_expression ) ] + /// [ WITH CHECK ( with_check_expression ) ] + /// ``` + /// + /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createpolicy.html) + pub fn parse_create_policy(&mut self) -> Result { + let name = self.parse_identifier(false)?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + + let policy_type = if self.parse_keyword(Keyword::AS) { + let keyword = + self.expect_one_of_keywords(&[Keyword::PERMISSIVE, Keyword::RESTRICTIVE])?; + Some(match keyword { + Keyword::PERMISSIVE => CreatePolicyType::Permissive, + Keyword::RESTRICTIVE => CreatePolicyType::Restrictive, + _ => unreachable!(), + }) + } else { + None + }; + + let command = if self.parse_keyword(Keyword::FOR) { + let keyword = self.expect_one_of_keywords(&[ + Keyword::ALL, + Keyword::SELECT, + Keyword::INSERT, + Keyword::UPDATE, + Keyword::DELETE, + ])?; + Some(match keyword { + Keyword::ALL => CreatePolicyCommand::All, + Keyword::SELECT => CreatePolicyCommand::Select, + Keyword::INSERT => CreatePolicyCommand::Insert, + Keyword::UPDATE => CreatePolicyCommand::Update, + Keyword::DELETE => CreatePolicyCommand::Delete, + _ => unreachable!(), + }) + } else { + None + }; + + let to = if self.parse_keyword(Keyword::TO) { + Some(self.parse_comma_separated(|p| p.parse_owner())?) + } else { + None + }; + + let using = if self.parse_keyword(Keyword::USING) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(expr) + } else { + None + }; + + let with_check = if self.parse_keywords(&[Keyword::WITH, Keyword::CHECK]) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(expr) + } else { + None + }; + + Ok(CreatePolicy { + name, + table_name, + policy_type, + command, + to, + using, + with_check, + }) + } + + pub fn parse_create_index(&mut self, unique: bool) -> Result { + let concurrently = self.parse_keyword(Keyword::CONCURRENTLY); + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let index_name = if if_not_exists || !self.parse_keyword(Keyword::ON) { + let index_name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::ON)?; + Some(index_name) + } else { + None + }; + let table_name = self.parse_object_name(false)?; + let using = if self.parse_keyword(Keyword::USING) { + Some(self.parse_identifier(false)?) + } else { + None + }; + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_order_by_expr)?; + self.expect_token(&Token::RParen)?; + + let include = if self.parse_keyword(Keyword::INCLUDE) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(|p| p.parse_identifier(false))?; + self.expect_token(&Token::RParen)?; + columns + } else { + vec![] + }; + + let nulls_distinct = if self.parse_keyword(Keyword::NULLS) { + let not = self.parse_keyword(Keyword::NOT); + self.expect_keyword(Keyword::DISTINCT)?; + Some(!not) + } else { + None + }; + + let with = if self.dialect.supports_create_index_with_clause() + && self.parse_keyword(Keyword::WITH) + { + self.expect_token(&Token::LParen)?; + let with_params = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + with_params + } else { + Vec::new() + }; + + let predicate = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(Statement::CreateIndex(CreateIndex { + name: index_name, + table_name, + using, + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct, + with, + predicate, + })) + } + + pub fn parse_create_extension(&mut self) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + + let (schema, version, cascade) = if self.parse_keyword(Keyword::WITH) { + let schema = if self.parse_keyword(Keyword::SCHEMA) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let version = if self.parse_keyword(Keyword::VERSION) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let cascade = self.parse_keyword(Keyword::CASCADE); + + (schema, version, cascade) + } else { + (None, None, false) + }; + + Ok(Statement::CreateExtension { + name, + if_not_exists, + schema, + version, + cascade, + }) + } + + pub fn parse_create_table( + &mut self, + or_replace: bool, + temporary: bool, + global: Option, + transient: bool, + ) -> Result { + let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let table_name = self.parse_object_name(allow_unquoted_hyphen)?; + + // Clickhouse has `ON CLUSTER 'cluster'` syntax for DDLs + let on_cluster = self.parse_optional_on_cluster()?; + + let like = if self.parse_keyword(Keyword::LIKE) || self.parse_keyword(Keyword::ILIKE) { + self.parse_object_name(allow_unquoted_hyphen).ok() + } else { + None + }; + + let clone = if self.parse_keyword(Keyword::CLONE) { + self.parse_object_name(allow_unquoted_hyphen).ok() + } else { + None + }; + + // parse optional column list (schema) + let (columns, constraints) = self.parse_columns()?; + let mut comment = if dialect_of!(self is HiveDialect) + && self.parse_keyword(Keyword::COMMENT) + { + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(str) => Some(CommentDef::AfterColumnDefsWithoutEq(str)), + _ => self.expected("comment", next_token)?, + } + } else { + None + }; + + // SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE` + let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]); + + let hive_distribution = self.parse_hive_distribution()?; + let clustered_by = self.parse_optional_clustered_by()?; + let hive_formats = self.parse_hive_formats()?; + // PostgreSQL supports `WITH ( options )`, before `AS` + let with_options = self.parse_options(Keyword::WITH)?; + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; + + let engine = if self.parse_keyword(Keyword::ENGINE) { + self.expect_token(&Token::Eq)?; + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => { + let name = w.value; + let parameters = if self.peek_token() == Token::LParen { + Some(self.parse_parenthesized_identifiers()?) + } else { + None + }; + Some(TableEngine { name, parameters }) + } + _ => self.expected("identifier", next_token)?, + } + } else { + None + }; + + let auto_increment_offset = if self.parse_keyword(Keyword::AUTO_INCREMENT) { + let _ = self.consume_token(&Token::Eq); + let next_token = self.next_token(); + match next_token.token { + Token::Number(s, _) => Some(Self::parse::(s, next_token.span.start)?), + _ => self.expected("literal int", next_token)?, + } + } else { + None + }; + + // ClickHouse supports `PRIMARY KEY`, before `ORDER BY` + // https://clickhouse.com/docs/en/sql-reference/statements/create/table#primary-key + let primary_key = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) + { + Some(Box::new(self.parse_expr()?)) + } else { + None + }; + + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + if self.consume_token(&Token::LParen) { + let columns = if self.peek_token() != Token::RParen { + self.parse_comma_separated(|p| p.parse_expr())? + } else { + vec![] + }; + self.expect_token(&Token::RParen)?; + Some(OneOrManyWithParens::Many(columns)) + } else { + Some(OneOrManyWithParens::One(self.parse_expr()?)) + } + } else { + None + }; + + let create_table_config = self.parse_optional_create_table_config()?; + + let default_charset = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARSET]) { + self.expect_token(&Token::Eq)?; + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => Some(w.value), + _ => self.expected("identifier", next_token)?, + } + } else { + None + }; + + let collation = if self.parse_keywords(&[Keyword::COLLATE]) { + self.expect_token(&Token::Eq)?; + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => Some(w.value), + _ => self.expected("identifier", next_token)?, + } + } else { + None + }; + + let on_commit: Option = + if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT, Keyword::DELETE, Keyword::ROWS]) + { + Some(OnCommit::DeleteRows) + } else if self.parse_keywords(&[ + Keyword::ON, + Keyword::COMMIT, + Keyword::PRESERVE, + Keyword::ROWS, + ]) { + Some(OnCommit::PreserveRows) + } else if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT, Keyword::DROP]) { + Some(OnCommit::Drop) + } else { + None + }; + + let strict = self.parse_keyword(Keyword::STRICT); + + // Excludes Hive dialect here since it has been handled after table column definitions. + if !dialect_of!(self is HiveDialect) && self.parse_keyword(Keyword::COMMENT) { + // rewind the COMMENT keyword + self.prev_token(); + comment = self.parse_optional_inline_comment()? + }; + + // Parse optional `AS ( query )` + let query = if self.parse_keyword(Keyword::AS) { + Some(self.parse_query()?) + } else if self.dialect.supports_create_table_select() && self.parse_keyword(Keyword::SELECT) + { + // rewind the SELECT keyword + self.prev_token(); + Some(self.parse_query()?) + } else { + None + }; + + Ok(CreateTableBuilder::new(table_name) + .temporary(temporary) + .columns(columns) + .constraints(constraints) + .with_options(with_options) + .table_properties(table_properties) + .or_replace(or_replace) + .if_not_exists(if_not_exists) + .transient(transient) + .hive_distribution(hive_distribution) + .hive_formats(Some(hive_formats)) + .global(global) + .query(query) + .without_rowid(without_rowid) + .like(like) + .clone_clause(clone) + .engine(engine) + .comment(comment) + .auto_increment_offset(auto_increment_offset) + .order_by(order_by) + .default_charset(default_charset) + .collation(collation) + .on_commit(on_commit) + .on_cluster(on_cluster) + .clustered_by(clustered_by) + .partition_by(create_table_config.partition_by) + .cluster_by(create_table_config.cluster_by) + .options(create_table_config.options) + .primary_key(primary_key) + .strict(strict) + .build()) + } + + /// Parse configuration like partitioning, clustering information during the table creation. + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_2) + /// [PostgreSQL](https://www.postgresql.org/docs/current/ddl-partitioning.html) + fn parse_optional_create_table_config( + &mut self, + ) -> Result { + let partition_by = if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) + && self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) + { + Some(Box::new(self.parse_expr()?)) + } else { + None + }; + + let mut cluster_by = None; + let mut options = None; + if dialect_of!(self is BigQueryDialect | GenericDialect) { + if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + cluster_by = Some(WrappedCollection::NoWrapping( + self.parse_comma_separated(|p| p.parse_identifier(false))?, + )); + }; + + if let Token::Word(word) = self.peek_token().token { + if word.keyword == Keyword::OPTIONS { + options = Some(self.parse_options(Keyword::OPTIONS)?); + } + }; + } + + Ok(CreateTableConfiguration { + partition_by, + cluster_by, + options, + }) + } + + pub fn parse_optional_clustered_by(&mut self) -> Result, ParserError> { + let clustered_by = if dialect_of!(self is HiveDialect|GenericDialect) + && self.parse_keywords(&[Keyword::CLUSTERED, Keyword::BY]) + { + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + + let sorted_by = if self.parse_keywords(&[Keyword::SORTED, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let sorted_by_columns = self.parse_comma_separated(|p| p.parse_order_by_expr())?; + self.expect_token(&Token::RParen)?; + Some(sorted_by_columns) + } else { + None + }; + + self.expect_keyword(Keyword::INTO)?; + let num_buckets = self.parse_number_value()?; + self.expect_keyword(Keyword::BUCKETS)?; + Some(ClusteredBy { + columns, + sorted_by, + num_buckets, + }) + } else { + None + }; + Ok(clustered_by) + } + + pub fn parse_referential_action(&mut self) -> Result { + if self.parse_keyword(Keyword::RESTRICT) { + Ok(ReferentialAction::Restrict) + } else if self.parse_keyword(Keyword::CASCADE) { + Ok(ReferentialAction::Cascade) + } else if self.parse_keywords(&[Keyword::SET, Keyword::NULL]) { + Ok(ReferentialAction::SetNull) + } else if self.parse_keywords(&[Keyword::NO, Keyword::ACTION]) { + Ok(ReferentialAction::NoAction) + } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { + Ok(ReferentialAction::SetDefault) + } else { + self.expected( + "one of RESTRICT, CASCADE, SET NULL, NO ACTION or SET DEFAULT", + self.peek_token(), + ) + } + } + + pub fn parse_constraint_characteristics( + &mut self, + ) -> Result, ParserError> { + let mut cc = ConstraintCharacteristics::default(); + + loop { + if cc.deferrable.is_none() && self.parse_keywords(&[Keyword::NOT, Keyword::DEFERRABLE]) + { + cc.deferrable = Some(false); + } else if cc.deferrable.is_none() && self.parse_keyword(Keyword::DEFERRABLE) { + cc.deferrable = Some(true); + } else if cc.initially.is_none() && self.parse_keyword(Keyword::INITIALLY) { + if self.parse_keyword(Keyword::DEFERRED) { + cc.initially = Some(DeferrableInitial::Deferred); + } else if self.parse_keyword(Keyword::IMMEDIATE) { + cc.initially = Some(DeferrableInitial::Immediate); + } else { + self.expected("one of DEFERRED or IMMEDIATE", self.peek_token())?; + } + } else if cc.enforced.is_none() && self.parse_keyword(Keyword::ENFORCED) { + cc.enforced = Some(true); + } else if cc.enforced.is_none() + && self.parse_keywords(&[Keyword::NOT, Keyword::ENFORCED]) + { + cc.enforced = Some(false); + } else { + break; + } + } + + if cc.deferrable.is_some() || cc.initially.is_some() || cc.enforced.is_some() { + Ok(Some(cc)) + } else { + Ok(None) + } + } + + pub fn parse_optional_table_constraint( + &mut self, + ) -> Result, ParserError> { + let name = if self.parse_keyword(Keyword::CONSTRAINT) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) if w.keyword == Keyword::UNIQUE => { + let index_type_display = self.parse_index_type_display(); + if !dialect_of!(self is GenericDialect | MySqlDialect) + && !index_type_display.is_none() + { + return self + .expected("`index_name` or `(column_name [, ...])`", self.peek_token()); + } + + let nulls_distinct = self.parse_optional_nulls_distinct()?; + + // optional index name + let index_name = self.parse_optional_indent()?; + let index_type = self.parse_optional_using_then_index_type()?; + + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + let index_options = self.parse_index_options()?; + let characteristics = self.parse_constraint_characteristics()?; + Ok(Some(TableConstraint::Unique { + name, + index_name, + index_type_display, + index_type, + columns, + index_options, + characteristics, + nulls_distinct, + })) + } + Token::Word(w) if w.keyword == Keyword::PRIMARY => { + // after `PRIMARY` always stay `KEY` + self.expect_keyword(Keyword::KEY)?; + + // optional index name + let index_name = self.parse_optional_indent()?; + let index_type = self.parse_optional_using_then_index_type()?; + + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + let index_options = self.parse_index_options()?; + let characteristics = self.parse_constraint_characteristics()?; + Ok(Some(TableConstraint::PrimaryKey { + name, + index_name, + index_type, + columns, + index_options, + characteristics, + })) + } + Token::Word(w) if w.keyword == Keyword::FOREIGN => { + self.expect_keyword(Keyword::KEY)?; + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + self.expect_keyword(Keyword::REFERENCES)?; + let foreign_table = self.parse_object_name(false)?; + let referred_columns = self.parse_parenthesized_column_list(Mandatory, false)?; + let mut on_delete = None; + let mut on_update = None; + loop { + if on_delete.is_none() && self.parse_keywords(&[Keyword::ON, Keyword::DELETE]) { + on_delete = Some(self.parse_referential_action()?); + } else if on_update.is_none() + && self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) + { + on_update = Some(self.parse_referential_action()?); + } else { + break; + } + } + + let characteristics = self.parse_constraint_characteristics()?; + + Ok(Some(TableConstraint::ForeignKey { + name, + columns, + foreign_table, + referred_columns, + on_delete, + on_update, + characteristics, + })) + } + Token::Word(w) if w.keyword == Keyword::CHECK => { + self.expect_token(&Token::LParen)?; + let expr = Box::new(self.parse_expr()?); + self.expect_token(&Token::RParen)?; + Ok(Some(TableConstraint::Check { name, expr })) + } + Token::Word(w) + if (w.keyword == Keyword::INDEX || w.keyword == Keyword::KEY) + && dialect_of!(self is GenericDialect | MySqlDialect) + && name.is_none() => + { + let display_as_key = w.keyword == Keyword::KEY; + + let name = match self.peek_token().token { + Token::Word(word) if word.keyword == Keyword::USING => None, + _ => self.parse_optional_indent()?, + }; + + let index_type = self.parse_optional_using_then_index_type()?; + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + + Ok(Some(TableConstraint::Index { + display_as_key, + name, + index_type, + columns, + })) + } + Token::Word(w) + if (w.keyword == Keyword::FULLTEXT || w.keyword == Keyword::SPATIAL) + && dialect_of!(self is GenericDialect | MySqlDialect) => + { + if let Some(name) = name { + return self.expected( + "FULLTEXT or SPATIAL option without constraint name", + TokenWithSpan { + token: Token::make_keyword(&name.to_string()), + span: next_token.span, + }, + ); + } + + let fulltext = w.keyword == Keyword::FULLTEXT; + + let index_type_display = self.parse_index_type_display(); + + let opt_index_name = self.parse_optional_indent()?; + + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + + Ok(Some(TableConstraint::FulltextOrSpatial { + fulltext, + index_type_display, + opt_index_name, + columns, + })) + } + _ => { + if name.is_some() { + self.expected("PRIMARY, UNIQUE, FOREIGN, or CHECK", next_token) + } else { + self.prev_token(); + Ok(None) + } + } + } + } + + fn parse_optional_nulls_distinct(&mut self) -> Result { + Ok(if self.parse_keyword(Keyword::NULLS) { + let not = self.parse_keyword(Keyword::NOT); + self.expect_keyword(Keyword::DISTINCT)?; + if not { + NullsDistinctOption::NotDistinct + } else { + NullsDistinctOption::Distinct + } + } else { + NullsDistinctOption::None + }) + } +} diff --git a/src/parser/deallocate.rs b/src/parser/deallocate.rs new file mode 100644 index 000000000..e93e6fcb4 --- /dev/null +++ b/src/parser/deallocate.rs @@ -0,0 +1,9 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_deallocate(&mut self) -> Result { + let prepare = self.parse_keyword(Keyword::PREPARE); + let name = self.parse_identifier(false)?; + Ok(Statement::Deallocate { name, prepare }) + } +} diff --git a/src/parser/declare.rs b/src/parser/declare.rs new file mode 100644 index 000000000..c480510e4 --- /dev/null +++ b/src/parser/declare.rs @@ -0,0 +1,76 @@ +use crate::parser::*; + +impl Parser<'_> { + /// Parse a `DECLARE` statement. + /// + /// ```sql + /// DECLARE name [ BINARY ] [ ASENSITIVE | INSENSITIVE ] [ [ NO ] SCROLL ] + /// CURSOR [ { WITH | WITHOUT } HOLD ] FOR query + /// ``` + /// + /// The syntax can vary significantly between warehouses. See the grammar + /// on the warehouse specific function in such cases. + pub fn parse_declare(&mut self) -> Result { + if dialect_of!(self is BigQueryDialect) { + return self.parse_big_query_declare(); + } + if dialect_of!(self is SnowflakeDialect) { + return self.parse_snowflake_declare(); + } + if dialect_of!(self is MsSqlDialect) { + return self.parse_mssql_declare(); + } + + let name = self.parse_identifier(false)?; + + let binary = Some(self.parse_keyword(Keyword::BINARY)); + let sensitive = if self.parse_keyword(Keyword::INSENSITIVE) { + Some(true) + } else if self.parse_keyword(Keyword::ASENSITIVE) { + Some(false) + } else { + None + }; + let scroll = if self.parse_keyword(Keyword::SCROLL) { + Some(true) + } else if self.parse_keywords(&[Keyword::NO, Keyword::SCROLL]) { + Some(false) + } else { + None + }; + + self.expect_keyword(Keyword::CURSOR)?; + let declare_type = Some(DeclareType::Cursor); + + let hold = match self.parse_one_of_keywords(&[Keyword::WITH, Keyword::WITHOUT]) { + Some(keyword) => { + self.expect_keyword(Keyword::HOLD)?; + + match keyword { + Keyword::WITH => Some(true), + Keyword::WITHOUT => Some(false), + _ => unreachable!(), + } + } + None => None, + }; + + self.expect_keyword(Keyword::FOR)?; + + let query = Some(self.parse_query()?); + + Ok(Statement::Declare { + stmts: vec![Declare { + names: vec![name], + data_type: None, + assignment: None, + declare_type, + binary, + sensitive, + scroll, + hold, + for_query: query, + }], + }) + } +} diff --git a/src/parser/delete.rs b/src/parser/delete.rs new file mode 100644 index 000000000..769500dd8 --- /dev/null +++ b/src/parser/delete.rs @@ -0,0 +1,60 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_delete(&mut self) -> Result { + let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { + // `FROM` keyword is optional in BigQuery SQL. + // https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#delete_statement + if dialect_of!(self is BigQueryDialect | GenericDialect) { + (vec![], false) + } else { + let tables = self.parse_comma_separated(|p| p.parse_object_name(false))?; + self.expect_keyword(Keyword::FROM)?; + (tables, true) + } + } else { + (vec![], true) + }; + + let from = self.parse_comma_separated(Parser::parse_table_and_joins)?; + let using = if self.parse_keyword(Keyword::USING) { + Some(self.parse_comma_separated(Parser::parse_table_and_joins)?) + } else { + None + }; + let selection = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + let returning = if self.parse_keyword(Keyword::RETURNING) { + Some(self.parse_comma_separated(Parser::parse_select_item)?) + } else { + None + }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + let limit = if self.parse_keyword(Keyword::LIMIT) { + self.parse_limit()? + } else { + None + }; + + Ok(Statement::Delete(Delete { + tables, + from: if with_from_keyword { + FromTable::WithFromKeyword(from) + } else { + FromTable::WithoutKeyword(from) + }, + using, + selection, + returning, + order_by, + limit, + })) + } +} diff --git a/src/parser/dialects/bigquery.rs b/src/parser/dialects/bigquery.rs new file mode 100644 index 000000000..399e6d81a --- /dev/null +++ b/src/parser/dialects/bigquery.rs @@ -0,0 +1,135 @@ +use crate::parser::*; + +impl Parser<'_> { + /// Parse `CREATE FUNCTION` for [BigQuery] + /// + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement + pub fn parse_bigquery_create_function( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = self.parse_object_name(false)?; + + let parse_function_param = + |parser: &mut Parser| -> Result { + let name = parser.parse_identifier(false)?; + let data_type = parser.parse_data_type()?; + Ok(OperateFunctionArg { + mode: None, + name: Some(name), + data_type, + default_expr: None, + }) + }; + self.expect_token(&Token::LParen)?; + let args = self.parse_comma_separated0(parse_function_param, Token::RParen)?; + self.expect_token(&Token::RParen)?; + + let return_type = if self.parse_keyword(Keyword::RETURNS) { + Some(self.parse_data_type()?) + } else { + None + }; + + let determinism_specifier = if self.parse_keyword(Keyword::DETERMINISTIC) { + Some(FunctionDeterminismSpecifier::Deterministic) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::DETERMINISTIC]) { + Some(FunctionDeterminismSpecifier::NotDeterministic) + } else { + None + }; + + let language = if self.parse_keyword(Keyword::LANGUAGE) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let remote_connection = + if self.parse_keywords(&[Keyword::REMOTE, Keyword::WITH, Keyword::CONNECTION]) { + Some(self.parse_object_name(false)?) + } else { + None + }; + + // `OPTIONS` may come before of after the function body but + // may be specified at most once. + let mut options = self.maybe_parse_options(Keyword::OPTIONS)?; + + let function_body = if remote_connection.is_none() { + self.expect_keyword(Keyword::AS)?; + let expr = self.parse_expr()?; + if options.is_none() { + options = self.maybe_parse_options(Keyword::OPTIONS)?; + Some(CreateFunctionBody::AsBeforeOptions(expr)) + } else { + Some(CreateFunctionBody::AsAfterOptions(expr)) + } + } else { + None + }; + + Ok(Statement::CreateFunction(CreateFunction { + or_replace, + temporary, + if_not_exists, + name, + args: Some(args), + return_type, + function_body, + language, + determinism_specifier, + options, + remote_connection, + using: None, + behavior: None, + called_on_null: None, + parallel: None, + })) + } + + /// Parse a [BigQuery] `DECLARE` statement. + /// + /// Syntax: + /// ```text + /// DECLARE variable_name[, ...] [{ | }]; + /// ``` + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#declare + pub fn parse_big_query_declare(&mut self) -> Result { + let names = self.parse_comma_separated(|parser| Parser::parse_identifier(parser, false))?; + + let data_type = match self.peek_token().token { + Token::Word(w) if w.keyword == Keyword::DEFAULT => None, + _ => Some(self.parse_data_type()?), + }; + + let expr = if data_type.is_some() { + if self.parse_keyword(Keyword::DEFAULT) { + Some(self.parse_expr()?) + } else { + None + } + } else { + // If no variable type - default expression must be specified, per BQ docs. + // i.e `DECLARE foo;` is invalid. + self.expect_keyword(Keyword::DEFAULT)?; + Some(self.parse_expr()?) + }; + + Ok(Statement::Declare { + stmts: vec![Declare { + names, + data_type, + assignment: expr.map(|expr| DeclareAssignment::Default(Box::new(expr))), + declare_type: None, + binary: None, + sensitive: None, + scroll: None, + hold: None, + for_query: None, + }], + }) + } +} diff --git a/src/parser/dialects/clickhouse.rs b/src/parser/dialects/clickhouse.rs new file mode 100644 index 000000000..61478d698 --- /dev/null +++ b/src/parser/dialects/clickhouse.rs @@ -0,0 +1,50 @@ +use crate::parser::*; + +impl Parser<'_> { + /// Parse clickhouse [map] + /// + /// Syntax + /// + /// ```sql + /// Map(key_data_type, value_data_type) + /// ``` + /// + /// [map]: https://clickhouse.com/docs/en/sql-reference/data-types/map + pub(crate) fn parse_click_house_map_def( + &mut self, + ) -> Result<(DataType, DataType), ParserError> { + self.expect_keyword(Keyword::MAP)?; + self.expect_token(&Token::LParen)?; + let key_data_type = self.parse_data_type()?; + self.expect_token(&Token::Comma)?; + let value_data_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + + Ok((key_data_type, value_data_type)) + } + + /// Parse clickhouse [tuple] + /// + /// Syntax + /// + /// ```sql + /// Tuple([field_name] field_type, ...) + /// ``` + /// + /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + pub(crate) fn parse_click_house_tuple_def(&mut self) -> Result, ParserError> { + self.expect_keyword(Keyword::TUPLE)?; + self.expect_token(&Token::LParen)?; + let mut field_defs = vec![]; + loop { + let (def, _) = self.parse_struct_field_def()?; + field_defs.push(def); + if !self.consume_token(&Token::Comma) { + break; + } + } + self.expect_token(&Token::RParen)?; + + Ok(field_defs) + } +} diff --git a/src/parser/dialects/duckdb.rs b/src/parser/dialects/duckdb.rs new file mode 100644 index 000000000..2e8cc5a32 --- /dev/null +++ b/src/parser/dialects/duckdb.rs @@ -0,0 +1,226 @@ +use crate::parser::*; + +// DuckDB related parsing + +impl Parser<'_> { + pub fn parse_attach_duckdb_database_options( + &mut self, + ) -> Result, ParserError> { + if !self.consume_token(&Token::LParen) { + return Ok(vec![]); + } + + let mut options = vec![]; + loop { + if self.parse_keyword(Keyword::READ_ONLY) { + let boolean = if self.parse_keyword(Keyword::TRUE) { + Some(true) + } else if self.parse_keyword(Keyword::FALSE) { + Some(false) + } else { + None + }; + options.push(AttachDuckDBDatabaseOption::ReadOnly(boolean)); + } else if self.parse_keyword(Keyword::TYPE) { + let ident = self.parse_identifier(false)?; + options.push(AttachDuckDBDatabaseOption::Type(ident)); + } else { + return self.expected("expected one of: ), READ_ONLY, TYPE", self.peek_token()); + }; + + if self.consume_token(&Token::RParen) { + return Ok(options); + } else if self.consume_token(&Token::Comma) { + continue; + } else { + return self.expected("expected one of: ')', ','", self.peek_token()); + } + } + } + + pub fn parse_attach_duckdb_database(&mut self) -> Result { + let database = self.parse_keyword(Keyword::DATABASE); + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let database_path = self.parse_identifier(false)?; + let database_alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let attach_options = self.parse_attach_duckdb_database_options()?; + Ok(Statement::AttachDuckDBDatabase { + if_not_exists, + database, + database_path, + database_alias, + attach_options, + }) + } + + pub fn parse_detach_duckdb_database(&mut self) -> Result { + let database = self.parse_keyword(Keyword::DATABASE); + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let database_alias = self.parse_identifier(false)?; + Ok(Statement::DetachDuckDBDatabase { + if_exists, + database, + database_alias, + }) + } + + /// See [DuckDB Docs](https://duckdb.org/docs/sql/statements/create_secret.html) for more details. + pub fn parse_create_secret( + &mut self, + or_replace: bool, + temporary: bool, + persistent: bool, + ) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + + let mut storage_specifier = None; + let mut name = None; + if self.peek_token() != Token::LParen { + if self.parse_keyword(Keyword::IN) { + storage_specifier = self.parse_identifier(false).ok() + } else { + name = self.parse_identifier(false).ok(); + } + + // Storage specifier may follow the name + if storage_specifier.is_none() + && self.peek_token() != Token::LParen + && self.parse_keyword(Keyword::IN) + { + storage_specifier = self.parse_identifier(false).ok(); + } + } + + self.expect_token(&Token::LParen)?; + self.expect_keyword(Keyword::TYPE)?; + let secret_type = self.parse_identifier(false)?; + + let mut options = Vec::new(); + if self.consume_token(&Token::Comma) { + options.append(&mut self.parse_comma_separated(|p| { + let key = p.parse_identifier(false)?; + let value = p.parse_identifier(false)?; + Ok(SecretOption { key, value }) + })?); + } + self.expect_token(&Token::RParen)?; + + let temp = match (temporary, persistent) { + (true, false) => Some(true), + (false, true) => Some(false), + (false, false) => None, + _ => self.expected("TEMPORARY or PERSISTENT", self.peek_token())?, + }; + + Ok(Statement::CreateSecret { + or_replace, + temporary: temp, + if_not_exists, + name, + storage_specifier, + secret_type, + options, + }) + } + + /// Parse a field for a duckdb [dictionary] + /// + /// Syntax + /// + /// ```sql + /// 'name': expr + /// ``` + /// + /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs + pub(crate) fn parse_duckdb_dictionary_field(&mut self) -> Result { + let key = self.parse_identifier(false)?; + + self.expect_token(&Token::Colon)?; + + let expr = self.parse_expr()?; + + Ok(DictionaryField { + key, + value: Box::new(expr), + }) + } + + /// Parse a field for a duckdb [map] + /// + /// Syntax + /// + /// ```sql + /// key: value + /// ``` + /// + /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps + pub(crate) fn parse_duckdb_map_field(&mut self) -> Result { + let key = self.parse_expr()?; + + self.expect_token(&Token::Colon)?; + + let value = self.parse_expr()?; + + Ok(MapEntry { + key: Box::new(key), + value: Box::new(value), + }) + } + + /// DuckDB specific: Parse a duckdb [map] + /// + /// Syntax: + /// + /// ```sql + /// Map {key1: value1[, ... ]} + /// ``` + /// + /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps + pub(crate) fn parse_duckdb_map_literal(&mut self) -> Result { + self.expect_token(&Token::LBrace)?; + let fields = self.parse_comma_separated0(Self::parse_duckdb_map_field, Token::RBrace)?; + self.expect_token(&Token::RBrace)?; + Ok(Expr::Map(Map { entries: fields })) + } + + /// DuckDB specific: Parse a duckdb [dictionary] + /// + /// Syntax: + /// + /// ```sql + /// {'field_name': expr1[, ... ]} + /// ``` + /// + /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs + pub(crate) fn parse_duckdb_struct_literal(&mut self) -> Result { + self.expect_token(&Token::LBrace)?; + + let fields = self.parse_comma_separated(Self::parse_duckdb_dictionary_field)?; + + self.expect_token(&Token::RBrace)?; + + Ok(Expr::Dictionary(fields)) + } + + /// Duckdb Struct Data Type + pub(crate) fn parse_duckdb_struct_type_def(&mut self) -> Result, ParserError> { + self.expect_keyword(Keyword::STRUCT)?; + self.expect_token(&Token::LParen)?; + let struct_body = self.parse_comma_separated(|parser| { + let field_name = parser.parse_identifier(false)?; + let field_type = parser.parse_data_type()?; + + Ok(StructField { + field_name: Some(field_name), + field_type, + }) + }); + self.expect_token(&Token::RParen)?; + struct_body + } +} diff --git a/src/parser/dialects/hive.rs b/src/parser/dialects/hive.rs new file mode 100644 index 000000000..7c453518d --- /dev/null +++ b/src/parser/dialects/hive.rs @@ -0,0 +1,192 @@ +use crate::parser::*; + +impl Parser<'_> { + /// Parse `CREATE FUNCTION` for [Hive] + /// + /// [Hive]: https://cwiki.apache.org/confluence/display/hive/languagemanual+ddl#LanguageManualDDL-Create/Drop/ReloadFunction + pub fn parse_hive_create_function( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::AS)?; + + let as_ = self.parse_create_function_body_string()?; + let using = self.parse_optional_create_function_using()?; + + Ok(Statement::CreateFunction(CreateFunction { + or_replace, + temporary, + name, + function_body: Some(CreateFunctionBody::AsBeforeOptions(as_)), + using, + if_not_exists: false, + args: None, + return_type: None, + behavior: None, + called_on_null: None, + parallel: None, + language: None, + determinism_specifier: None, + options: None, + remote_connection: None, + })) + } + + pub fn parse_hive_formats(&mut self) -> Result { + let mut hive_format = HiveFormat::default(); + loop { + match self.parse_one_of_keywords(&[ + Keyword::ROW, + Keyword::STORED, + Keyword::LOCATION, + Keyword::WITH, + ]) { + Some(Keyword::ROW) => { + hive_format.row_format = Some(self.parse_row_format()?); + } + Some(Keyword::STORED) => { + self.expect_keyword(Keyword::AS)?; + if self.parse_keyword(Keyword::INPUTFORMAT) { + let input_format = self.parse_expr()?; + self.expect_keyword(Keyword::OUTPUTFORMAT)?; + let output_format = self.parse_expr()?; + hive_format.storage = Some(HiveIOFormat::IOF { + input_format, + output_format, + }); + } else { + let format = self.parse_file_format()?; + hive_format.storage = Some(HiveIOFormat::FileFormat { format }); + } + } + Some(Keyword::LOCATION) => { + hive_format.location = Some(self.parse_literal_string()?); + } + Some(Keyword::WITH) => { + self.prev_token(); + let properties = self + .parse_options_with_keywords(&[Keyword::WITH, Keyword::SERDEPROPERTIES])?; + if !properties.is_empty() { + hive_format.serde_properties = Some(properties); + } else { + break; + } + } + None => break, + _ => break, + } + } + + Ok(hive_format) + } + + pub fn parse_row_format(&mut self) -> Result { + self.expect_keyword(Keyword::FORMAT)?; + match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { + Some(Keyword::SERDE) => { + let class = self.parse_literal_string()?; + Ok(HiveRowFormat::SERDE { class }) + } + _ => { + let mut row_delimiters = vec![]; + + loop { + match self.parse_one_of_keywords(&[ + Keyword::FIELDS, + Keyword::COLLECTION, + Keyword::MAP, + Keyword::LINES, + Keyword::NULL, + ]) { + Some(Keyword::FIELDS) => { + if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::FieldsTerminatedBy, + char: self.parse_identifier(false)?, + }); + + if self.parse_keywords(&[Keyword::ESCAPED, Keyword::BY]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::FieldsEscapedBy, + char: self.parse_identifier(false)?, + }); + } + } else { + break; + } + } + Some(Keyword::COLLECTION) => { + if self.parse_keywords(&[ + Keyword::ITEMS, + Keyword::TERMINATED, + Keyword::BY, + ]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::CollectionItemsTerminatedBy, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + Some(Keyword::MAP) => { + if self.parse_keywords(&[ + Keyword::KEYS, + Keyword::TERMINATED, + Keyword::BY, + ]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::MapKeysTerminatedBy, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + Some(Keyword::LINES) => { + if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::LinesTerminatedBy, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + Some(Keyword::NULL) => { + if self.parse_keywords(&[Keyword::DEFINED, Keyword::AS]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::NullDefinedAs, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + _ => { + break; + } + } + } + + Ok(HiveRowFormat::DELIMITED { + delimiters: row_delimiters, + }) + } + } + } + + //TODO: Implement parsing for Skewed + pub fn parse_hive_distribution(&mut self) -> Result { + if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_column_def)?; + self.expect_token(&Token::RParen)?; + Ok(HiveDistributionStyle::PARTITIONED { columns }) + } else { + Ok(HiveDistributionStyle::NONE) + } + } +} diff --git a/src/parser/dialects/mod.rs b/src/parser/dialects/mod.rs new file mode 100644 index 000000000..47d25f66d --- /dev/null +++ b/src/parser/dialects/mod.rs @@ -0,0 +1,9 @@ +mod bigquery; +mod clickhouse; +mod duckdb; +mod hive; +mod mssql; +mod postgresql; +mod snowflake; +mod sqlite; +mod utils; diff --git a/src/parser/dialects/mssql.rs b/src/parser/dialects/mssql.rs new file mode 100644 index 000000000..d2024a333 --- /dev/null +++ b/src/parser/dialects/mssql.rs @@ -0,0 +1,242 @@ +use crate::parser::*; + +impl Parser<'_> { + /// Parse a [MsSql] `DECLARE` statement. + /// + /// Syntax: + /// ```text + /// DECLARE + // { + // { @local_variable [AS] data_type [ = value ] } + // | { @cursor_variable_name CURSOR } + // } [ ,...n ] + /// ``` + /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/language-elements/declare-local-variable-transact-sql?view=sql-server-ver16 + pub fn parse_mssql_declare(&mut self) -> Result { + let stmts = self.parse_comma_separated(Parser::parse_mssql_declare_stmt)?; + + Ok(Statement::Declare { stmts }) + } + + /// Parse the body of a [MsSql] `DECLARE`statement. + /// + /// Syntax: + /// ```text + // { + // { @local_variable [AS] data_type [ = value ] } + // | { @cursor_variable_name CURSOR } + // } [ ,...n ] + /// ``` + /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/language-elements/declare-local-variable-transact-sql?view=sql-server-ver16 + pub fn parse_mssql_declare_stmt(&mut self) -> Result { + let name = { + let ident = self.parse_identifier(false)?; + if !ident.value.starts_with('@') { + Err(ParserError::TokenizerError( + "Invalid MsSql variable declaration.".to_string(), + )) + } else { + Ok(ident) + } + }?; + + let (declare_type, data_type) = match self.peek_token().token { + Token::Word(w) => match w.keyword { + Keyword::CURSOR => { + self.next_token(); + (Some(DeclareType::Cursor), None) + } + Keyword::AS => { + self.next_token(); + (None, Some(self.parse_data_type()?)) + } + _ => (None, Some(self.parse_data_type()?)), + }, + _ => (None, Some(self.parse_data_type()?)), + }; + + let assignment = self.parse_mssql_variable_declaration_expression()?; + + Ok(Declare { + names: vec![name], + data_type, + assignment, + declare_type, + binary: None, + sensitive: None, + scroll: None, + hold: None, + for_query: None, + }) + } + + /// Parses the assigned expression in a variable declaration. + /// + /// Syntax: + /// ```text + /// [ = ] + /// ``` + pub fn parse_mssql_variable_declaration_expression( + &mut self, + ) -> Result, ParserError> { + Ok(match self.peek_token().token { + Token::Eq => { + self.next_token(); // Skip `=` + Some(DeclareAssignment::MsSqlAssignment(Box::new( + self.parse_expr()?, + ))) + } + _ => None, + }) + } + + pub(crate) fn parse_mssql_alter_role(&mut self) -> Result { + let role_name = self.parse_identifier(false)?; + + let operation = if self.parse_keywords(&[Keyword::ADD, Keyword::MEMBER]) { + let member_name = self.parse_identifier(false)?; + AlterRoleOperation::AddMember { member_name } + } else if self.parse_keywords(&[Keyword::DROP, Keyword::MEMBER]) { + let member_name = self.parse_identifier(false)?; + AlterRoleOperation::DropMember { member_name } + } else if self.parse_keywords(&[Keyword::WITH, Keyword::NAME]) { + if self.consume_token(&Token::Eq) { + let role_name = self.parse_identifier(false)?; + AlterRoleOperation::RenameRole { role_name } + } else { + return self.expected("= after WITH NAME ", self.peek_token()); + } + } else { + return self.expected("'ADD' or 'DROP' or 'WITH NAME'", self.peek_token()); + }; + + Ok(Statement::AlterRole { + name: role_name, + operation, + }) + } + + /// Parse a mssql `FOR [XML | JSON | BROWSE]` clause + pub fn parse_for_clause(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::XML) { + Ok(Some(self.parse_for_xml()?)) + } else if self.parse_keyword(Keyword::JSON) { + Ok(Some(self.parse_for_json()?)) + } else if self.parse_keyword(Keyword::BROWSE) { + Ok(Some(ForClause::Browse)) + } else { + Ok(None) + } + } + + /// Parse a mssql `FOR XML` clause + pub fn parse_for_xml(&mut self) -> Result { + let for_xml = if self.parse_keyword(Keyword::RAW) { + let mut element_name = None; + if self.peek_token().token == Token::LParen { + self.expect_token(&Token::LParen)?; + element_name = Some(self.parse_literal_string()?); + self.expect_token(&Token::RParen)?; + } + ForXml::Raw(element_name) + } else if self.parse_keyword(Keyword::AUTO) { + ForXml::Auto + } else if self.parse_keyword(Keyword::EXPLICIT) { + ForXml::Explicit + } else if self.parse_keyword(Keyword::PATH) { + let mut element_name = None; + if self.peek_token().token == Token::LParen { + self.expect_token(&Token::LParen)?; + element_name = Some(self.parse_literal_string()?); + self.expect_token(&Token::RParen)?; + } + ForXml::Path(element_name) + } else { + return Err(ParserError::ParserError( + "Expected FOR XML [RAW | AUTO | EXPLICIT | PATH ]".to_string(), + )); + }; + let mut elements = false; + let mut binary_base64 = false; + let mut root = None; + let mut r#type = false; + while self.peek_token().token == Token::Comma { + self.next_token(); + if self.parse_keyword(Keyword::ELEMENTS) { + elements = true; + } else if self.parse_keyword(Keyword::BINARY) { + self.expect_keyword(Keyword::BASE64)?; + binary_base64 = true; + } else if self.parse_keyword(Keyword::ROOT) { + self.expect_token(&Token::LParen)?; + root = Some(self.parse_literal_string()?); + self.expect_token(&Token::RParen)?; + } else if self.parse_keyword(Keyword::TYPE) { + r#type = true; + } + } + Ok(ForClause::Xml { + for_xml, + elements, + binary_base64, + root, + r#type, + }) + } + + /// Parse a mssql `FOR JSON` clause + pub fn parse_for_json(&mut self) -> Result { + let for_json = if self.parse_keyword(Keyword::AUTO) { + ForJson::Auto + } else if self.parse_keyword(Keyword::PATH) { + ForJson::Path + } else { + return Err(ParserError::ParserError( + "Expected FOR JSON [AUTO | PATH ]".to_string(), + )); + }; + let mut root = None; + let mut include_null_values = false; + let mut without_array_wrapper = false; + while self.peek_token().token == Token::Comma { + self.next_token(); + if self.parse_keyword(Keyword::ROOT) { + self.expect_token(&Token::LParen)?; + root = Some(self.parse_literal_string()?); + self.expect_token(&Token::RParen)?; + } else if self.parse_keyword(Keyword::INCLUDE_NULL_VALUES) { + include_null_values = true; + } else if self.parse_keyword(Keyword::WITHOUT_ARRAY_WRAPPER) { + without_array_wrapper = true; + } + } + Ok(ForClause::Json { + for_json, + root, + include_null_values, + without_array_wrapper, + }) + } + + /// mssql-like convert function + pub(crate) fn parse_mssql_convert(&mut self, is_try: bool) -> Result { + self.expect_token(&Token::LParen)?; + let data_type = self.parse_data_type()?; + self.expect_token(&Token::Comma)?; + let expr = self.parse_expr()?; + let styles = if self.consume_token(&Token::Comma) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + Default::default() + }; + self.expect_token(&Token::RParen)?; + Ok(Expr::Convert { + is_try, + expr: Box::new(expr), + data_type: Some(data_type), + charset: None, + target_before_value: true, + styles, + }) + } +} diff --git a/src/parser/dialects/postgresql.rs b/src/parser/dialects/postgresql.rs new file mode 100644 index 000000000..fd5c07e05 --- /dev/null +++ b/src/parser/dialects/postgresql.rs @@ -0,0 +1,280 @@ +use crate::parser::*; + +impl Parser<'_> { + /// Parse `CREATE FUNCTION` for [Postgres] + /// + /// [Postgres]: https://www.postgresql.org/docs/15/sql-createfunction.html + pub fn parse_postgres_create_function( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let name = self.parse_object_name(false)?; + self.expect_token(&Token::LParen)?; + let args = if self.consume_token(&Token::RParen) { + self.prev_token(); + None + } else { + Some(self.parse_comma_separated(Parser::parse_function_arg)?) + }; + + self.expect_token(&Token::RParen)?; + + let return_type = if self.parse_keyword(Keyword::RETURNS) { + Some(self.parse_data_type()?) + } else { + None + }; + + #[derive(Default)] + struct Body { + language: Option, + behavior: Option, + function_body: Option, + called_on_null: Option, + parallel: Option, + } + let mut body = Body::default(); + loop { + fn ensure_not_set(field: &Option, name: &str) -> Result<(), ParserError> { + if field.is_some() { + return Err(ParserError::ParserError(format!( + "{name} specified more than once", + ))); + } + Ok(()) + } + if self.parse_keyword(Keyword::AS) { + ensure_not_set(&body.function_body, "AS")?; + body.function_body = Some(CreateFunctionBody::AsBeforeOptions( + self.parse_create_function_body_string()?, + )); + } else if self.parse_keyword(Keyword::LANGUAGE) { + ensure_not_set(&body.language, "LANGUAGE")?; + body.language = Some(self.parse_identifier(false)?); + } else if self.parse_keyword(Keyword::IMMUTABLE) { + ensure_not_set(&body.behavior, "IMMUTABLE | STABLE | VOLATILE")?; + body.behavior = Some(FunctionBehavior::Immutable); + } else if self.parse_keyword(Keyword::STABLE) { + ensure_not_set(&body.behavior, "IMMUTABLE | STABLE | VOLATILE")?; + body.behavior = Some(FunctionBehavior::Stable); + } else if self.parse_keyword(Keyword::VOLATILE) { + ensure_not_set(&body.behavior, "IMMUTABLE | STABLE | VOLATILE")?; + body.behavior = Some(FunctionBehavior::Volatile); + } else if self.parse_keywords(&[ + Keyword::CALLED, + Keyword::ON, + Keyword::NULL, + Keyword::INPUT, + ]) { + ensure_not_set( + &body.called_on_null, + "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", + )?; + body.called_on_null = Some(FunctionCalledOnNull::CalledOnNullInput); + } else if self.parse_keywords(&[ + Keyword::RETURNS, + Keyword::NULL, + Keyword::ON, + Keyword::NULL, + Keyword::INPUT, + ]) { + ensure_not_set( + &body.called_on_null, + "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", + )?; + body.called_on_null = Some(FunctionCalledOnNull::ReturnsNullOnNullInput); + } else if self.parse_keyword(Keyword::STRICT) { + ensure_not_set( + &body.called_on_null, + "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", + )?; + body.called_on_null = Some(FunctionCalledOnNull::Strict); + } else if self.parse_keyword(Keyword::PARALLEL) { + ensure_not_set(&body.parallel, "PARALLEL { UNSAFE | RESTRICTED | SAFE }")?; + if self.parse_keyword(Keyword::UNSAFE) { + body.parallel = Some(FunctionParallel::Unsafe); + } else if self.parse_keyword(Keyword::RESTRICTED) { + body.parallel = Some(FunctionParallel::Restricted); + } else if self.parse_keyword(Keyword::SAFE) { + body.parallel = Some(FunctionParallel::Safe); + } else { + return self.expected("one of UNSAFE | RESTRICTED | SAFE", self.peek_token()); + } + } else if self.parse_keyword(Keyword::RETURN) { + ensure_not_set(&body.function_body, "RETURN")?; + body.function_body = Some(CreateFunctionBody::Return(self.parse_expr()?)); + } else { + break; + } + } + + Ok(Statement::CreateFunction(CreateFunction { + or_replace, + temporary, + name, + args, + return_type, + behavior: body.behavior, + called_on_null: body.called_on_null, + parallel: body.parallel, + language: body.language, + function_body: body.function_body, + if_not_exists: false, + using: None, + determinism_specifier: None, + options: None, + remote_connection: None, + })) + } + + /// Parse a postgresql casting style which is in the form of `expr::datatype`. + pub fn parse_pg_cast(&mut self, expr: Expr) -> Result { + Ok(Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(expr), + data_type: self.parse_data_type()?, + format: None, + }) + } + + pub(crate) fn parse_pg_alter_role(&mut self) -> Result { + let role_name = self.parse_identifier(false)?; + + // [ IN DATABASE _`database_name`_ ] + let in_database = if self.parse_keywords(&[Keyword::IN, Keyword::DATABASE]) { + self.parse_object_name(false).ok() + } else { + None + }; + + let operation = if self.parse_keyword(Keyword::RENAME) { + if self.parse_keyword(Keyword::TO) { + let role_name = self.parse_identifier(false)?; + AlterRoleOperation::RenameRole { role_name } + } else { + return self.expected("TO after RENAME", self.peek_token()); + } + // SET + } else if self.parse_keyword(Keyword::SET) { + let config_name = self.parse_object_name(false)?; + // FROM CURRENT + if self.parse_keywords(&[Keyword::FROM, Keyword::CURRENT]) { + AlterRoleOperation::Set { + config_name, + config_value: SetConfigValue::FromCurrent, + in_database, + } + // { TO | = } { value | DEFAULT } + } else if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { + if self.parse_keyword(Keyword::DEFAULT) { + AlterRoleOperation::Set { + config_name, + config_value: SetConfigValue::Default, + in_database, + } + } else if let Ok(expr) = self.parse_expr() { + AlterRoleOperation::Set { + config_name, + config_value: SetConfigValue::Value(expr), + in_database, + } + } else { + self.expected("config value", self.peek_token())? + } + } else { + self.expected("'TO' or '=' or 'FROM CURRENT'", self.peek_token())? + } + // RESET + } else if self.parse_keyword(Keyword::RESET) { + if self.parse_keyword(Keyword::ALL) { + AlterRoleOperation::Reset { + config_name: ResetConfig::ALL, + in_database, + } + } else { + let config_name = self.parse_object_name(false)?; + AlterRoleOperation::Reset { + config_name: ResetConfig::ConfigName(config_name), + in_database, + } + } + // option + } else { + // [ WITH ] + let _ = self.parse_keyword(Keyword::WITH); + // option + let mut options = vec![]; + while let Some(opt) = self.maybe_parse(|parser| parser.parse_pg_role_option())? { + options.push(opt); + } + // check option + if options.is_empty() { + return self.expected("option", self.peek_token())?; + } + + AlterRoleOperation::WithOptions { options } + }; + + Ok(Statement::AlterRole { + name: role_name, + operation, + }) + } + + pub(crate) fn parse_pg_role_option(&mut self) -> Result { + let option = match self.parse_one_of_keywords(&[ + Keyword::BYPASSRLS, + Keyword::NOBYPASSRLS, + Keyword::CONNECTION, + Keyword::CREATEDB, + Keyword::NOCREATEDB, + Keyword::CREATEROLE, + Keyword::NOCREATEROLE, + Keyword::INHERIT, + Keyword::NOINHERIT, + Keyword::LOGIN, + Keyword::NOLOGIN, + Keyword::PASSWORD, + Keyword::REPLICATION, + Keyword::NOREPLICATION, + Keyword::SUPERUSER, + Keyword::NOSUPERUSER, + Keyword::VALID, + ]) { + Some(Keyword::BYPASSRLS) => RoleOption::BypassRLS(true), + Some(Keyword::NOBYPASSRLS) => RoleOption::BypassRLS(false), + Some(Keyword::CONNECTION) => { + self.expect_keyword(Keyword::LIMIT)?; + RoleOption::ConnectionLimit(Expr::Value(self.parse_number_value()?)) + } + Some(Keyword::CREATEDB) => RoleOption::CreateDB(true), + Some(Keyword::NOCREATEDB) => RoleOption::CreateDB(false), + Some(Keyword::CREATEROLE) => RoleOption::CreateRole(true), + Some(Keyword::NOCREATEROLE) => RoleOption::CreateRole(false), + Some(Keyword::INHERIT) => RoleOption::Inherit(true), + Some(Keyword::NOINHERIT) => RoleOption::Inherit(false), + Some(Keyword::LOGIN) => RoleOption::Login(true), + Some(Keyword::NOLOGIN) => RoleOption::Login(false), + Some(Keyword::PASSWORD) => { + let password = if self.parse_keyword(Keyword::NULL) { + Password::NullPassword + } else { + Password::Password(Expr::Value(self.parse_value()?)) + }; + RoleOption::Password(password) + } + Some(Keyword::REPLICATION) => RoleOption::Replication(true), + Some(Keyword::NOREPLICATION) => RoleOption::Replication(false), + Some(Keyword::SUPERUSER) => RoleOption::SuperUser(true), + Some(Keyword::NOSUPERUSER) => RoleOption::SuperUser(false), + Some(Keyword::VALID) => { + self.expect_keyword(Keyword::UNTIL)?; + RoleOption::ValidUntil(Expr::Value(self.parse_value()?)) + } + _ => self.expected("option", self.peek_token())?, + }; + + Ok(option) + } +} diff --git a/src/parser/dialects/snowflake.rs b/src/parser/dialects/snowflake.rs new file mode 100644 index 000000000..b0ee4bae1 --- /dev/null +++ b/src/parser/dialects/snowflake.rs @@ -0,0 +1,145 @@ +use crate::keywords::ALL_KEYWORDS; +use crate::parser::*; + +impl Parser<'_> { + /// Parse a [Snowflake] `DECLARE` statement. + /// + /// Syntax: + /// ```text + /// DECLARE + /// [{ + /// | + /// | + /// | }; ... ] + /// + /// + /// [] [ { DEFAULT | := } ] + /// + /// + /// CURSOR FOR + /// + /// + /// RESULTSET [ { DEFAULT | := } ( ) ] ; + /// + /// + /// EXCEPTION [ ( , '' ) ] ; + /// ``` + /// + /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/snowflake-scripting/declare + pub fn parse_snowflake_declare(&mut self) -> Result { + let mut stmts = vec![]; + loop { + let name = self.parse_identifier(false)?; + let (declare_type, for_query, assigned_expr, data_type) = + if self.parse_keyword(Keyword::CURSOR) { + self.expect_keyword(Keyword::FOR)?; + match self.peek_token().token { + Token::Word(w) if w.keyword == Keyword::SELECT => ( + Some(DeclareType::Cursor), + Some(self.parse_query()?), + None, + None, + ), + _ => ( + Some(DeclareType::Cursor), + None, + Some(DeclareAssignment::For(Box::new(self.parse_expr()?))), + None, + ), + } + } else if self.parse_keyword(Keyword::RESULTSET) { + let assigned_expr = if self.peek_token().token != Token::SemiColon { + self.parse_snowflake_variable_declaration_expression()? + } else { + // Nothing more to do. The statement has no further parameters. + None + }; + + (Some(DeclareType::ResultSet), None, assigned_expr, None) + } else if self.parse_keyword(Keyword::EXCEPTION) { + let assigned_expr = if self.peek_token().token == Token::LParen { + Some(DeclareAssignment::Expr(Box::new(self.parse_expr()?))) + } else { + // Nothing more to do. The statement has no further parameters. + None + }; + + (Some(DeclareType::Exception), None, assigned_expr, None) + } else { + // Without an explicit keyword, the only valid option is variable declaration. + let (assigned_expr, data_type) = if let Some(assigned_expr) = + self.parse_snowflake_variable_declaration_expression()? + { + (Some(assigned_expr), None) + } else if let Token::Word(_) = self.peek_token().token { + let data_type = self.parse_data_type()?; + ( + self.parse_snowflake_variable_declaration_expression()?, + Some(data_type), + ) + } else { + (None, None) + }; + (None, None, assigned_expr, data_type) + }; + let stmt = Declare { + names: vec![name], + data_type, + assignment: assigned_expr, + declare_type, + binary: None, + sensitive: None, + scroll: None, + hold: None, + for_query, + }; + + stmts.push(stmt); + if self.consume_token(&Token::SemiColon) { + match self.peek_token().token { + Token::Word(w) + if ALL_KEYWORDS + .binary_search(&w.value.to_uppercase().as_str()) + .is_err() => + { + // Not a keyword - start of a new declaration. + continue; + } + _ => { + // Put back the semicolon, this is the end of the DECLARE statement. + self.prev_token(); + } + } + } + + break; + } + + Ok(Statement::Declare { stmts }) + } + + /// Parses the assigned expression in a variable declaration. + /// + /// Syntax: + /// ```text + /// [ { DEFAULT | := } ] + /// ``` + /// + pub fn parse_snowflake_variable_declaration_expression( + &mut self, + ) -> Result, ParserError> { + Ok(match self.peek_token().token { + Token::Word(w) if w.keyword == Keyword::DEFAULT => { + self.next_token(); // Skip `DEFAULT` + Some(DeclareAssignment::Default(Box::new(self.parse_expr()?))) + } + Token::Assignment => { + self.next_token(); // Skip `:=` + Some(DeclareAssignment::DuckAssignment(Box::new( + self.parse_expr()?, + ))) + } + _ => None, + }) + } +} diff --git a/src/parser/dialects/sqlite.rs b/src/parser/dialects/sqlite.rs new file mode 100644 index 000000000..ce4bf13d6 --- /dev/null +++ b/src/parser/dialects/sqlite.rs @@ -0,0 +1,23 @@ +use crate::parser::*; + +impl Parser<'_> { + /// SQLite-specific `CREATE VIRTUAL TABLE` + pub fn parse_create_virtual_table(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let table_name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::USING)?; + let module_name = self.parse_identifier(false)?; + // SQLite docs note that module "arguments syntax is sufficiently + // general that the arguments can be made to appear as column + // definitions in a traditional CREATE TABLE statement", but + // we don't implement that. + let module_args = self.parse_parenthesized_column_list(Optional, false)?; + Ok(Statement::CreateVirtualTable { + name: table_name, + if_not_exists, + module_name, + module_args, + }) + } +} diff --git a/src/parser/dialects/utils.rs b/src/parser/dialects/utils.rs new file mode 100644 index 000000000..8771fe9b9 --- /dev/null +++ b/src/parser/dialects/utils.rs @@ -0,0 +1,19 @@ +use crate::parser::*; + +impl Parser<'_> { + /// Parse the body of a `CREATE FUNCTION` specified as a string. + /// e.g. `CREATE FUNCTION ... AS $$ body $$`. + pub(crate) fn parse_create_function_body_string(&mut self) -> Result { + let peek_token = self.peek_token(); + match peek_token.token { + Token::DollarQuotedString(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => + { + self.next_token(); + Ok(Expr::Value(Value::DollarQuotedString(s))) + } + _ => Ok(Expr::Value(Value::SingleQuotedString( + self.parse_literal_string()?, + ))), + } + } +} diff --git a/src/parser/discard.rs b/src/parser/discard.rs new file mode 100644 index 000000000..5aa2aec3f --- /dev/null +++ b/src/parser/discard.rs @@ -0,0 +1,21 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_discard(&mut self) -> Result { + let object_type = if self.parse_keyword(Keyword::ALL) { + DiscardObject::ALL + } else if self.parse_keyword(Keyword::PLANS) { + DiscardObject::PLANS + } else if self.parse_keyword(Keyword::SEQUENCES) { + DiscardObject::SEQUENCES + } else if self.parse_keyword(Keyword::TEMP) || self.parse_keyword(Keyword::TEMPORARY) { + DiscardObject::TEMP + } else { + return self.expected( + "ALL, PLANS, SEQUENCES, TEMP or TEMPORARY after DISCARD", + self.peek_token(), + ); + }; + Ok(Statement::Discard { object_type }) + } +} diff --git a/src/parser/drop.rs b/src/parser/drop.rs new file mode 100644 index 000000000..bc175b947 --- /dev/null +++ b/src/parser/drop.rs @@ -0,0 +1,189 @@ +use super::*; + +use crate::parser_err; + +impl Parser<'_> { + /// Parse statements of the DropTrigger type such as: + /// + /// ```sql + /// DROP TRIGGER [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] + /// ``` + pub fn parse_drop_trigger(&mut self) -> Result { + if !dialect_of!(self is PostgreSqlDialect | GenericDialect) { + self.prev_token(); + return self.expected("an object type after DROP", self.peek_token()); + } + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let trigger_name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + let option = self + .parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) + .map(|keyword| match keyword { + Keyword::CASCADE => ReferentialAction::Cascade, + Keyword::RESTRICT => ReferentialAction::Restrict, + _ => unreachable!(), + }); + Ok(Statement::DropTrigger { + if_exists, + trigger_name, + table_name, + option, + }) + } + + pub fn parse_drop(&mut self) -> Result { + // MySQL dialect supports `TEMPORARY` + let temporary = dialect_of!(self is MySqlDialect | GenericDialect | DuckDbDialect) + && self.parse_keyword(Keyword::TEMPORARY); + let persistent = dialect_of!(self is DuckDbDialect) + && self.parse_one_of_keywords(&[Keyword::PERSISTENT]).is_some(); + + let object_type = if self.parse_keyword(Keyword::TABLE) { + ObjectType::Table + } else if self.parse_keyword(Keyword::VIEW) { + ObjectType::View + } else if self.parse_keyword(Keyword::INDEX) { + ObjectType::Index + } else if self.parse_keyword(Keyword::ROLE) { + ObjectType::Role + } else if self.parse_keyword(Keyword::SCHEMA) { + ObjectType::Schema + } else if self.parse_keyword(Keyword::DATABASE) { + ObjectType::Database + } else if self.parse_keyword(Keyword::SEQUENCE) { + ObjectType::Sequence + } else if self.parse_keyword(Keyword::STAGE) { + ObjectType::Stage + } else if self.parse_keyword(Keyword::TYPE) { + ObjectType::Type + } else if self.parse_keyword(Keyword::FUNCTION) { + return self.parse_drop_function(); + } else if self.parse_keyword(Keyword::POLICY) { + return self.parse_drop_policy(); + } else if self.parse_keyword(Keyword::PROCEDURE) { + return self.parse_drop_procedure(); + } else if self.parse_keyword(Keyword::SECRET) { + return self.parse_drop_secret(temporary, persistent); + } else if self.parse_keyword(Keyword::TRIGGER) { + return self.parse_drop_trigger(); + } else { + return self.expected( + "TABLE, VIEW, INDEX, ROLE, SCHEMA, DATABASE, FUNCTION, PROCEDURE, STAGE, TRIGGER, SECRET, SEQUENCE, or TYPE after DROP", + self.peek_token(), + ); + }; + // Many dialects support the non-standard `IF EXISTS` clause and allow + // specifying multiple objects to delete in a single statement + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; + + let loc = self.peek_token().span.start; + let cascade = self.parse_keyword(Keyword::CASCADE); + let restrict = self.parse_keyword(Keyword::RESTRICT); + let purge = self.parse_keyword(Keyword::PURGE); + if cascade && restrict { + return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP", loc); + } + if object_type == ObjectType::Role && (cascade || restrict || purge) { + return parser_err!( + "Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE", + loc + ); + } + Ok(Statement::Drop { + object_type, + if_exists, + names, + cascade, + restrict, + purge, + temporary, + }) + } + + fn parse_optional_referential_action(&mut self) -> Option { + match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { + Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), + Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), + _ => None, + } + } + + /// ```sql + /// DROP FUNCTION [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] + /// [ CASCADE | RESTRICT ] + /// ``` + fn parse_drop_function(&mut self) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let func_desc = self.parse_comma_separated(Parser::parse_function_desc)?; + let option = self.parse_optional_referential_action(); + Ok(Statement::DropFunction { + if_exists, + func_desc, + option, + }) + } + + /// ```sql + /// DROP POLICY [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] + /// ``` + /// + /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-droppolicy.html) + fn parse_drop_policy(&mut self) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + let option = self.parse_optional_referential_action(); + Ok(Statement::DropPolicy { + if_exists, + name, + table_name, + option, + }) + } + + /// ```sql + /// DROP PROCEDURE [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] + /// [ CASCADE | RESTRICT ] + /// ``` + fn parse_drop_procedure(&mut self) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let proc_desc = self.parse_comma_separated(Parser::parse_function_desc)?; + let option = self.parse_optional_referential_action(); + Ok(Statement::DropProcedure { + if_exists, + proc_desc, + option, + }) + } + + /// See [DuckDB Docs](https://duckdb.org/docs/sql/statements/create_secret.html) for more details. + fn parse_drop_secret( + &mut self, + temporary: bool, + persistent: bool, + ) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + let storage_specifier = if self.parse_keyword(Keyword::FROM) { + self.parse_identifier(false).ok() + } else { + None + }; + let temp = match (temporary, persistent) { + (true, false) => Some(true), + (false, true) => Some(false), + (false, false) => None, + _ => self.expected("TEMPORARY or PERSISTENT", self.peek_token())?, + }; + + Ok(Statement::DropSecret { + if_exists, + temporary: temp, + name, + storage_specifier, + }) + } +} diff --git a/src/parser/end.rs b/src/parser/end.rs new file mode 100644 index 000000000..254b7b5ab --- /dev/null +++ b/src/parser/end.rs @@ -0,0 +1,9 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_end(&mut self) -> Result { + Ok(Statement::Commit { + chain: self.parse_commit_rollback_chain()?, + }) + } +} diff --git a/src/parser/execute.rs b/src/parser/execute.rs new file mode 100644 index 000000000..90fd1504d --- /dev/null +++ b/src/parser/execute.rs @@ -0,0 +1,38 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_execute(&mut self) -> Result { + let name = self.parse_object_name(false)?; + + let has_parentheses = self.consume_token(&Token::LParen); + + let end_token = match (has_parentheses, self.peek_token().token) { + (true, _) => Token::RParen, + (false, Token::EOF) => Token::EOF, + (false, Token::Word(w)) if w.keyword == Keyword::USING => Token::Word(w), + (false, _) => Token::SemiColon, + }; + + let parameters = self.parse_comma_separated0(Parser::parse_expr, end_token)?; + + if has_parentheses { + self.expect_token(&Token::RParen)?; + } + + let mut using = vec![]; + if self.parse_keyword(Keyword::USING) { + using.push(self.parse_expr()?); + + while self.consume_token(&Token::Comma) { + using.push(self.parse_expr()?); + } + }; + + Ok(Statement::Execute { + name, + parameters, + has_parentheses, + using, + }) + } +} diff --git a/src/parser/explain.rs b/src/parser/explain.rs new file mode 100644 index 000000000..81a445e79 --- /dev/null +++ b/src/parser/explain.rs @@ -0,0 +1,92 @@ +use super::*; + +impl Parser<'_> { + pub fn parse_explain( + &mut self, + describe_alias: DescribeAlias, + ) -> Result { + let mut analyze = false; + let mut verbose = false; + let mut query_plan = false; + let mut format = None; + let mut options = None; + + // Note: DuckDB is compatible with PostgreSQL syntax for this statement, + // although not all features may be implemented. + if describe_alias == DescribeAlias::Explain + && self.dialect.supports_explain_with_utility_options() + && self.peek_token().token == Token::LParen + { + options = Some(self.parse_utility_options()?) + } else if self.parse_keywords(&[Keyword::QUERY, Keyword::PLAN]) { + query_plan = true; + } else { + analyze = self.parse_keyword(Keyword::ANALYZE); + verbose = self.parse_keyword(Keyword::VERBOSE); + if self.parse_keyword(Keyword::FORMAT) { + format = Some(self.parse_analyze_format()?); + } + } + + match self.maybe_parse(|parser| parser.parse_statement())? { + Some(Statement::Explain { .. }) | Some(Statement::ExplainTable { .. }) => Err( + ParserError::ParserError("Explain must be root of the plan".to_string()), + ), + Some(statement) => Ok(Statement::Explain { + describe_alias, + analyze, + verbose, + query_plan, + statement: Box::new(statement), + format, + options, + }), + _ => { + let hive_format = + match self.parse_one_of_keywords(&[Keyword::EXTENDED, Keyword::FORMATTED]) { + Some(Keyword::EXTENDED) => Some(HiveDescribeFormat::Extended), + Some(Keyword::FORMATTED) => Some(HiveDescribeFormat::Formatted), + _ => None, + }; + + let has_table_keyword = if self.dialect.describe_requires_table_keyword() { + // only allow to use TABLE keyword for DESC|DESCRIBE statement + self.parse_keyword(Keyword::TABLE) + } else { + false + }; + + let table_name = self.parse_object_name(false)?; + Ok(Statement::ExplainTable { + describe_alias, + hive_format, + has_table_keyword, + table_name, + }) + } + } + } + + pub fn parse_utility_options(&mut self) -> Result, ParserError> { + self.expect_token(&Token::LParen)?; + let options = self.parse_comma_separated(Self::parse_utility_option)?; + self.expect_token(&Token::RParen)?; + + Ok(options) + } + + fn parse_utility_option(&mut self) -> Result { + let name = self.parse_identifier(false)?; + + let next_token = self.peek_token(); + if next_token == Token::Comma || next_token == Token::RParen { + return Ok(UtilityOption { name, arg: None }); + } + let arg = self.parse_expr()?; + + Ok(UtilityOption { + name, + arg: Some(arg), + }) + } +} diff --git a/src/parser/expr.rs b/src/parser/expr.rs new file mode 100644 index 000000000..b2b2e9260 --- /dev/null +++ b/src/parser/expr.rs @@ -0,0 +1,2115 @@ +use super::*; + +use crate::parser_err; + +// Every Parser method with "parse" and "expr" in the name. + +impl<'a> Parser<'a> { + /// Parse a new expression. + pub fn parse_expr(&mut self) -> Result { + self.parse_subexpr(self.dialect.prec_unknown()) + } + + /// Parse tokens until the precedence changes. + pub fn parse_subexpr(&mut self, precedence: u8) -> Result { + let _guard = self.recursion_counter.try_decrease()?; + debug!("parsing expr"); + let mut expr = self.parse_prefix()?; + debug!("prefix: {:?}", expr); + loop { + let next_precedence = self.get_next_precedence()?; + debug!("next precedence: {:?}", next_precedence); + + if precedence >= next_precedence { + break; + } + + expr = self.parse_infix(expr, next_precedence)?; + } + Ok(expr) + } + + /// Parses an array expression `[ex1, ex2, ..]` + /// if `named` is `true`, came from an expression like `ARRAY[ex1, ex2]` + pub fn parse_array_expr(&mut self, named: bool) -> Result { + let exprs = self.parse_comma_separated0(Parser::parse_expr, Token::RBracket)?; + self.expect_token(&Token::RBracket)?; + Ok(Expr::Array(Array { elem: exprs, named })) + } + + /// Parses `BETWEEN AND `, assuming the `BETWEEN` keyword was already consumed. + pub fn parse_between(&mut self, expr: Expr, negated: bool) -> Result { + // Stop parsing subexpressions for and on tokens with + // precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc. + let low = self.parse_subexpr(self.dialect.prec_value(Precedence::Between))?; + self.expect_keyword(Keyword::AND)?; + let high = self.parse_subexpr(self.dialect.prec_value(Precedence::Between))?; + Ok(Expr::Between { + expr: Box::new(expr), + negated, + low: Box::new(low), + high: Box::new(high), + }) + } + + pub fn parse_case_expr(&mut self) -> Result { + let mut operand = None; + if !self.parse_keyword(Keyword::WHEN) { + operand = Some(Box::new(self.parse_expr()?)); + self.expect_keyword(Keyword::WHEN)?; + } + let mut conditions = vec![]; + let mut results = vec![]; + loop { + conditions.push(self.parse_expr()?); + self.expect_keyword(Keyword::THEN)?; + results.push(self.parse_expr()?); + if !self.parse_keyword(Keyword::WHEN) { + break; + } + } + let else_result = if self.parse_keyword(Keyword::ELSE) { + Some(Box::new(self.parse_expr()?)) + } else { + None + }; + self.expect_keyword(Keyword::END)?; + Ok(Expr::Case { + operand, + conditions, + results, + else_result, + }) + } + + /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` + pub fn parse_cast_expr(&mut self, kind: CastKind) -> Result { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_keyword(Keyword::AS)?; + let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Cast { + kind, + expr: Box::new(expr), + data_type, + format, + }) + } + + pub fn parse_ceil_floor_expr(&mut self, is_ceil: bool) -> Result { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + // Parse `CEIL/FLOOR(expr)` + let field = if self.parse_keyword(Keyword::TO) { + // Parse `CEIL/FLOOR(expr TO DateTimeField)` + CeilFloorKind::DateTimeField(self.parse_date_time_field()?) + } else if self.consume_token(&Token::Comma) { + // Parse `CEIL/FLOOR(expr, scale)` + match self.parse_value()? { + Value::Number(n, s) => CeilFloorKind::Scale(Value::Number(n, s)), + _ => { + return Err(ParserError::ParserError( + "Scale field can only be of number type".to_string(), + )) + } + } + } else { + CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) + }; + self.expect_token(&Token::RParen)?; + if is_ceil { + Ok(Expr::Ceil { + expr: Box::new(expr), + field, + }) + } else { + Ok(Expr::Floor { + expr: Box::new(expr), + field, + }) + } + } + + pub fn parse_character_length(&mut self) -> Result { + if self.parse_keyword(Keyword::MAX) { + return Ok(CharacterLength::Max); + } + let length = self.parse_literal_uint()?; + let unit = if self.parse_keyword(Keyword::CHARACTERS) { + Some(CharLengthUnits::Characters) + } else if self.parse_keyword(Keyword::OCTETS) { + Some(CharLengthUnits::Octets) + } else { + None + }; + Ok(CharacterLength::IntegerLength { length, unit }) + } + + /// Parse a SQL CONVERT function: + /// - `CONVERT('héhé' USING utf8mb4)` (MySQL) + /// - `CONVERT('héhé', CHAR CHARACTER SET utf8mb4)` (MySQL) + /// - `CONVERT(DECIMAL(10, 5), 42)` (MSSQL) - the type comes first + pub fn parse_convert_expr(&mut self, is_try: bool) -> Result { + if self.dialect.convert_type_before_value() { + return self.parse_mssql_convert(is_try); + } + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + if self.parse_keyword(Keyword::USING) { + let charset = self.parse_object_name(false)?; + self.expect_token(&Token::RParen)?; + return Ok(Expr::Convert { + is_try, + expr: Box::new(expr), + data_type: None, + charset: Some(charset), + target_before_value: false, + styles: vec![], + }); + } + self.expect_token(&Token::Comma)?; + let data_type = self.parse_data_type()?; + let charset = if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) { + Some(self.parse_object_name(false)?) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok(Expr::Convert { + is_try, + expr: Box::new(expr), + data_type: Some(data_type), + charset, + target_before_value: false, + styles: vec![], + }) + } + + // This function parses date/time fields for the EXTRACT function-like + // operator, interval qualifiers, and the ceil/floor operations. + // EXTRACT supports a wider set of date/time fields than interval qualifiers, + // so this function may need to be split in two. + pub fn parse_date_time_field(&mut self) -> Result { + let next_token = self.next_token(); + match &next_token.token { + Token::Word(w) => match w.keyword { + Keyword::YEAR => Ok(DateTimeField::Year), + Keyword::MONTH => Ok(DateTimeField::Month), + Keyword::WEEK => { + let week_day = if dialect_of!(self is BigQueryDialect | GenericDialect) + && self.consume_token(&Token::LParen) + { + let week_day = self.parse_identifier(false)?; + self.expect_token(&Token::RParen)?; + Some(week_day) + } else { + None + }; + Ok(DateTimeField::Week(week_day)) + } + Keyword::DAY => Ok(DateTimeField::Day), + Keyword::DAYOFWEEK => Ok(DateTimeField::DayOfWeek), + Keyword::DAYOFYEAR => Ok(DateTimeField::DayOfYear), + Keyword::DATE => Ok(DateTimeField::Date), + Keyword::DATETIME => Ok(DateTimeField::Datetime), + Keyword::HOUR => Ok(DateTimeField::Hour), + Keyword::MINUTE => Ok(DateTimeField::Minute), + Keyword::SECOND => Ok(DateTimeField::Second), + Keyword::CENTURY => Ok(DateTimeField::Century), + Keyword::DECADE => Ok(DateTimeField::Decade), + Keyword::DOY => Ok(DateTimeField::Doy), + Keyword::DOW => Ok(DateTimeField::Dow), + Keyword::EPOCH => Ok(DateTimeField::Epoch), + Keyword::ISODOW => Ok(DateTimeField::Isodow), + Keyword::ISOYEAR => Ok(DateTimeField::Isoyear), + Keyword::ISOWEEK => Ok(DateTimeField::IsoWeek), + Keyword::JULIAN => Ok(DateTimeField::Julian), + Keyword::MICROSECOND => Ok(DateTimeField::Microsecond), + Keyword::MICROSECONDS => Ok(DateTimeField::Microseconds), + Keyword::MILLENIUM => Ok(DateTimeField::Millenium), + Keyword::MILLENNIUM => Ok(DateTimeField::Millennium), + Keyword::MILLISECOND => Ok(DateTimeField::Millisecond), + Keyword::MILLISECONDS => Ok(DateTimeField::Milliseconds), + Keyword::NANOSECOND => Ok(DateTimeField::Nanosecond), + Keyword::NANOSECONDS => Ok(DateTimeField::Nanoseconds), + Keyword::QUARTER => Ok(DateTimeField::Quarter), + Keyword::TIME => Ok(DateTimeField::Time), + Keyword::TIMEZONE => Ok(DateTimeField::Timezone), + Keyword::TIMEZONE_ABBR => Ok(DateTimeField::TimezoneAbbr), + Keyword::TIMEZONE_HOUR => Ok(DateTimeField::TimezoneHour), + Keyword::TIMEZONE_MINUTE => Ok(DateTimeField::TimezoneMinute), + Keyword::TIMEZONE_REGION => Ok(DateTimeField::TimezoneRegion), + _ if self.dialect.allow_extract_custom() => { + self.prev_token(); + let custom = self.parse_identifier(false)?; + Ok(DateTimeField::Custom(custom)) + } + _ => self.expected("date/time field", next_token), + }, + Token::SingleQuotedString(_) if self.dialect.allow_extract_single_quotes() => { + self.prev_token(); + let custom = self.parse_identifier(false)?; + Ok(DateTimeField::Custom(custom)) + } + _ => self.expected("date/time field", next_token), + } + } + + /// Parse the `ESCAPE CHAR` portion of `LIKE`, `ILIKE`, and `SIMILAR TO` + pub fn parse_escape_char(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::ESCAPE) { + Ok(Some(self.parse_literal_string()?)) + } else { + Ok(None) + } + } + + /// Parse a SQL EXISTS expression e.g. `WHERE EXISTS(SELECT ...)`. + pub fn parse_exists_expr(&mut self, negated: bool) -> Result { + self.expect_token(&Token::LParen)?; + let exists_node = Expr::Exists { + negated, + subquery: self.parse_query()?, + }; + self.expect_token(&Token::RParen)?; + Ok(exists_node) + } + + pub fn parse_extract_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let field = self.parse_date_time_field()?; + + let syntax = if self.parse_keyword(Keyword::FROM) { + ExtractSyntax::From + } else if self.consume_token(&Token::Comma) + && dialect_of!(self is SnowflakeDialect | GenericDialect) + { + ExtractSyntax::Comma + } else { + return Err(ParserError::ParserError( + "Expected 'FROM' or ','".to_string(), + )); + }; + + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Extract { + field, + expr: Box::new(expr), + syntax, + }) + } + + pub fn parse_function(&mut self, name: ObjectName) -> Result { + self.expect_token(&Token::LParen)?; + + // Snowflake permits a subquery to be passed as an argument without + // an enclosing set of parens if it's the only argument. + if dialect_of!(self is SnowflakeDialect) && self.peek_sub_query() { + let subquery = self.parse_query()?; + self.expect_token(&Token::RParen)?; + return Ok(Expr::Function(Function { + name, + parameters: FunctionArguments::None, + args: FunctionArguments::Subquery(subquery), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + })); + } + + let mut args = self.parse_function_argument_list()?; + let mut parameters = FunctionArguments::None; + // ClickHouse aggregations support parametric functions like `HISTOGRAM(0.5, 0.6)(x, y)` + // which (0.5, 0.6) is a parameter to the function. + if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.consume_token(&Token::LParen) + { + parameters = FunctionArguments::List(args); + args = self.parse_function_argument_list()?; + } + + let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { + self.expect_token(&Token::LParen)?; + self.expect_keywords(&[Keyword::ORDER, Keyword::BY])?; + let order_by = self.parse_comma_separated(Parser::parse_order_by_expr)?; + self.expect_token(&Token::RParen)?; + order_by + } else { + vec![] + }; + + let filter = if self.dialect.supports_filter_during_aggregation() + && self.parse_keyword(Keyword::FILTER) + && self.consume_token(&Token::LParen) + && self.parse_keyword(Keyword::WHERE) + { + let filter = Some(Box::new(self.parse_expr()?)); + self.expect_token(&Token::RParen)?; + filter + } else { + None + }; + + // Syntax for null treatment shows up either in the args list + // or after the function call, but not both. + let null_treatment = if args + .clauses + .iter() + .all(|clause| !matches!(clause, FunctionArgumentClause::IgnoreOrRespectNulls(_))) + { + self.parse_null_treatment()? + } else { + None + }; + + let over = if self.parse_keyword(Keyword::OVER) { + if self.consume_token(&Token::LParen) { + let window_spec = self.parse_window_spec()?; + Some(WindowType::WindowSpec(window_spec)) + } else { + Some(WindowType::NamedWindow(self.parse_identifier(false)?)) + } + } else { + None + }; + + Ok(Expr::Function(Function { + name, + parameters, + args: FunctionArguments::List(args), + null_treatment, + filter, + over, + within_group, + })) + } + + /// Parses the parens following the `[ NOT ] IN` operator. + pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result { + // BigQuery allows `IN UNNEST(array_expression)` + // https://cloud.google.com/bigquery/docs/reference/standard-sql/operators#in_operators + if self.parse_keyword(Keyword::UNNEST) { + self.expect_token(&Token::LParen)?; + let array_expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + return Ok(Expr::InUnnest { + expr: Box::new(expr), + array_expr: Box::new(array_expr), + negated, + }); + } + self.expect_token(&Token::LParen)?; + let in_op = if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { + self.prev_token(); + Expr::InSubquery { + expr: Box::new(expr), + subquery: self.parse_query()?, + negated, + } + } else { + Expr::InList { + expr: Box::new(expr), + list: if self.dialect.supports_in_empty_list() { + self.parse_comma_separated0(Parser::parse_expr, Token::RParen)? + } else { + self.parse_comma_separated(Parser::parse_expr)? + }, + negated, + } + }; + self.expect_token(&Token::RParen)?; + Ok(in_op) + } + + /// Parse an operator following an expression + pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { + // allow the dialect to override infix parsing + if let Some(infix) = self.dialect.parse_infix(self, &expr, precedence) { + return infix; + } + + let mut tok = self.next_token(); + let regular_binary_operator = match &mut tok.token { + Token::Spaceship => Some(BinaryOperator::Spaceship), + Token::DoubleEq => Some(BinaryOperator::Eq), + Token::Eq => Some(BinaryOperator::Eq), + Token::Neq => Some(BinaryOperator::NotEq), + Token::Gt => Some(BinaryOperator::Gt), + Token::GtEq => Some(BinaryOperator::GtEq), + Token::Lt => Some(BinaryOperator::Lt), + Token::LtEq => Some(BinaryOperator::LtEq), + Token::Plus => Some(BinaryOperator::Plus), + Token::Minus => Some(BinaryOperator::Minus), + Token::Mul => Some(BinaryOperator::Multiply), + Token::Mod => Some(BinaryOperator::Modulo), + Token::StringConcat => Some(BinaryOperator::StringConcat), + Token::Pipe => Some(BinaryOperator::BitwiseOr), + Token::Caret => { + // In PostgreSQL, ^ stands for the exponentiation operation, + // and # stands for XOR. See https://www.postgresql.org/docs/current/functions-math.html + if dialect_of!(self is PostgreSqlDialect) { + Some(BinaryOperator::PGExp) + } else { + Some(BinaryOperator::BitwiseXor) + } + } + Token::Ampersand => Some(BinaryOperator::BitwiseAnd), + Token::Div => Some(BinaryOperator::Divide), + Token::DuckIntDiv if dialect_of!(self is DuckDbDialect | GenericDialect) => { + Some(BinaryOperator::DuckIntegerDivide) + } + Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { + Some(BinaryOperator::PGBitwiseShiftLeft) + } + Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { + Some(BinaryOperator::PGBitwiseShiftRight) + } + Token::Sharp if dialect_of!(self is PostgreSqlDialect) => { + Some(BinaryOperator::PGBitwiseXor) + } + Token::Overlap if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Some(BinaryOperator::PGOverlap) + } + Token::CaretAt if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Some(BinaryOperator::PGStartsWith) + } + Token::Tilde => Some(BinaryOperator::PGRegexMatch), + Token::TildeAsterisk => Some(BinaryOperator::PGRegexIMatch), + Token::ExclamationMarkTilde => Some(BinaryOperator::PGRegexNotMatch), + Token::ExclamationMarkTildeAsterisk => Some(BinaryOperator::PGRegexNotIMatch), + Token::DoubleTilde => Some(BinaryOperator::PGLikeMatch), + Token::DoubleTildeAsterisk => Some(BinaryOperator::PGILikeMatch), + Token::ExclamationMarkDoubleTilde => Some(BinaryOperator::PGNotLikeMatch), + Token::ExclamationMarkDoubleTildeAsterisk => Some(BinaryOperator::PGNotILikeMatch), + Token::Arrow => Some(BinaryOperator::Arrow), + Token::LongArrow => Some(BinaryOperator::LongArrow), + Token::HashArrow => Some(BinaryOperator::HashArrow), + Token::HashLongArrow => Some(BinaryOperator::HashLongArrow), + Token::AtArrow => Some(BinaryOperator::AtArrow), + Token::ArrowAt => Some(BinaryOperator::ArrowAt), + Token::HashMinus => Some(BinaryOperator::HashMinus), + Token::AtQuestion => Some(BinaryOperator::AtQuestion), + Token::AtAt => Some(BinaryOperator::AtAt), + Token::Question => Some(BinaryOperator::Question), + Token::QuestionAnd => Some(BinaryOperator::QuestionAnd), + Token::QuestionPipe => Some(BinaryOperator::QuestionPipe), + Token::CustomBinaryOperator(s) => Some(BinaryOperator::Custom(core::mem::take(s))), + + Token::Word(w) => match w.keyword { + Keyword::AND => Some(BinaryOperator::And), + Keyword::OR => Some(BinaryOperator::Or), + Keyword::XOR => Some(BinaryOperator::Xor), + Keyword::OPERATOR if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + self.expect_token(&Token::LParen)?; + // there are special rules for operator names in + // postgres so we can not use 'parse_object' + // or similar. + // See https://www.postgresql.org/docs/current/sql-createoperator.html + let mut idents = vec![]; + loop { + idents.push(self.next_token().to_string()); + if !self.consume_token(&Token::Period) { + break; + } + } + self.expect_token(&Token::RParen)?; + Some(BinaryOperator::PGCustomBinaryOperator(idents)) + } + _ => None, + }, + _ => None, + }; + + if let Some(op) = regular_binary_operator { + if let Some(keyword) = + self.parse_one_of_keywords(&[Keyword::ANY, Keyword::ALL, Keyword::SOME]) + { + self.expect_token(&Token::LParen)?; + let right = if self.peek_sub_query() { + // We have a subquery ahead (SELECT\WITH ...) need to rewind and + // use the parenthesis for parsing the subquery as an expression. + self.prev_token(); // LParen + self.parse_subexpr(precedence)? + } else { + // Non-subquery expression + let right = self.parse_subexpr(precedence)?; + self.expect_token(&Token::RParen)?; + right + }; + + if !matches!( + op, + BinaryOperator::Gt + | BinaryOperator::Lt + | BinaryOperator::GtEq + | BinaryOperator::LtEq + | BinaryOperator::Eq + | BinaryOperator::NotEq + ) { + return parser_err!( + format!( + "Expected one of [=, >, <, =>, =<, !=] as comparison operator, found: {op}" + ), + tok.span.start + ); + }; + + Ok(match keyword { + Keyword::ALL => Expr::AllOp { + left: Box::new(expr), + compare_op: op, + right: Box::new(right), + }, + Keyword::ANY | Keyword::SOME => Expr::AnyOp { + left: Box::new(expr), + compare_op: op, + right: Box::new(right), + is_some: keyword == Keyword::SOME, + }, + _ => unreachable!(), + }) + } else { + Ok(Expr::BinaryOp { + left: Box::new(expr), + op, + right: Box::new(self.parse_subexpr(precedence)?), + }) + } + } else if let Token::Word(w) = &tok.token { + match w.keyword { + Keyword::IS => { + if self.parse_keyword(Keyword::NULL) { + Ok(Expr::IsNull(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { + Ok(Expr::IsNotNull(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::TRUE]) { + Ok(Expr::IsTrue(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::TRUE]) { + Ok(Expr::IsNotTrue(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::FALSE]) { + Ok(Expr::IsFalse(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::FALSE]) { + Ok(Expr::IsNotFalse(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::UNKNOWN]) { + Ok(Expr::IsUnknown(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::UNKNOWN]) { + Ok(Expr::IsNotUnknown(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::DISTINCT, Keyword::FROM]) { + let expr2 = self.parse_expr()?; + Ok(Expr::IsDistinctFrom(Box::new(expr), Box::new(expr2))) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::DISTINCT, Keyword::FROM]) + { + let expr2 = self.parse_expr()?; + Ok(Expr::IsNotDistinctFrom(Box::new(expr), Box::new(expr2))) + } else { + self.expected( + "[NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS", + self.peek_token(), + ) + } + } + Keyword::AT => { + self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; + Ok(Expr::AtTimeZone { + timestamp: Box::new(expr), + time_zone: Box::new(self.parse_subexpr(precedence)?), + }) + } + Keyword::NOT + | Keyword::IN + | Keyword::BETWEEN + | Keyword::LIKE + | Keyword::ILIKE + | Keyword::SIMILAR + | Keyword::REGEXP + | Keyword::RLIKE => { + self.prev_token(); + let negated = self.parse_keyword(Keyword::NOT); + let regexp = self.parse_keyword(Keyword::REGEXP); + let rlike = self.parse_keyword(Keyword::RLIKE); + if regexp || rlike { + Ok(Expr::RLike { + negated, + expr: Box::new(expr), + pattern: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, + ), + regexp, + }) + } else if self.parse_keyword(Keyword::IN) { + self.parse_in(expr, negated) + } else if self.parse_keyword(Keyword::BETWEEN) { + self.parse_between(expr, negated) + } else if self.parse_keyword(Keyword::LIKE) { + Ok(Expr::Like { + negated, + any: self.parse_keyword(Keyword::ANY), + expr: Box::new(expr), + pattern: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, + ), + escape_char: self.parse_escape_char()?, + }) + } else if self.parse_keyword(Keyword::ILIKE) { + Ok(Expr::ILike { + negated, + any: self.parse_keyword(Keyword::ANY), + expr: Box::new(expr), + pattern: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, + ), + escape_char: self.parse_escape_char()?, + }) + } else if self.parse_keywords(&[Keyword::SIMILAR, Keyword::TO]) { + Ok(Expr::SimilarTo { + negated, + expr: Box::new(expr), + pattern: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, + ), + escape_char: self.parse_escape_char()?, + }) + } else { + self.expected("IN or BETWEEN after NOT", self.peek_token()) + } + } + // Can only happen if `get_next_precedence` got out of sync with this function + _ => parser_err!( + format!("No infix parser for token {:?}", tok.token), + tok.span.start + ), + } + } else if Token::DoubleColon == tok { + Ok(Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(expr), + data_type: self.parse_data_type()?, + format: None, + }) + } else if Token::ExclamationMark == tok && self.dialect.supports_factorial_operator() { + Ok(Expr::UnaryOp { + op: UnaryOperator::PGPostfixFactorial, + expr: Box::new(expr), + }) + } else if Token::LBracket == tok { + if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { + self.parse_subscript(expr) + } else if dialect_of!(self is SnowflakeDialect) || self.dialect.supports_partiql() { + self.prev_token(); + self.parse_json_access(expr) + } else { + self.parse_map_access(expr) + } + } else if dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == tok { + self.prev_token(); + self.parse_json_access(expr) + } else { + // Can only happen if `get_next_precedence` got out of sync with this function + parser_err!( + format!("No infix parser for token {:?}", tok.token), + tok.span.start + ) + } + } + + /// Parse an `INTERVAL` expression. + /// + /// Some syntactically valid intervals: + /// + /// ```sql + /// 1. INTERVAL '1' DAY + /// 2. INTERVAL '1-1' YEAR TO MONTH + /// 3. INTERVAL '1' SECOND + /// 4. INTERVAL '1:1:1.1' HOUR (5) TO SECOND (5) + /// 5. INTERVAL '1.1' SECOND (2, 2) + /// 6. INTERVAL '1:1' HOUR (5) TO MINUTE (5) + /// 7. (MySql & BigQuery only): INTERVAL 1 DAY + /// ``` + /// + /// Note that we do not currently attempt to parse the quoted value. + pub fn parse_interval(&mut self) -> Result { + // The SQL standard allows an optional sign before the value string, but + // it is not clear if any implementations support that syntax, so we + // don't currently try to parse it. (The sign can instead be included + // inside the value string.) + + // to match the different flavours of INTERVAL syntax, we only allow expressions + // if the dialect requires an interval qualifier, + // see https://github.com/sqlparser-rs/sqlparser-rs/pull/1398 for more details + let value = if self.dialect.require_interval_qualifier() { + // parse a whole expression so `INTERVAL 1 + 1 DAY` is valid + self.parse_expr()? + } else { + // parse a prefix expression so `INTERVAL 1 DAY` is valid, but `INTERVAL 1 + 1 DAY` is not + // this also means that `INTERVAL '5 days' > INTERVAL '1 day'` treated properly + self.parse_prefix()? + }; + + // Following the string literal is a qualifier which indicates the units + // of the duration specified in the string literal. + // + // Note that PostgreSQL allows omitting the qualifier, so we provide + // this more general implementation. + let leading_field = if self.next_token_is_temporal_unit() { + Some(self.parse_date_time_field()?) + } else if self.dialect.require_interval_qualifier() { + return parser_err!( + "INTERVAL requires a unit after the literal value", + self.peek_token().span.start + ); + } else { + None + }; + + let (leading_precision, last_field, fsec_precision) = + if leading_field == Some(DateTimeField::Second) { + // SQL mandates special syntax for `SECOND TO SECOND` literals. + // Instead of + // `SECOND [()] TO SECOND[()]` + // one must use the special format: + // `SECOND [( [ , ] )]` + let last_field = None; + let (leading_precision, fsec_precision) = self.parse_optional_precision_scale()?; + (leading_precision, last_field, fsec_precision) + } else { + let leading_precision = self.parse_optional_precision()?; + if self.parse_keyword(Keyword::TO) { + let last_field = Some(self.parse_date_time_field()?); + let fsec_precision = if last_field == Some(DateTimeField::Second) { + self.parse_optional_precision()? + } else { + None + }; + (leading_precision, last_field, fsec_precision) + } else { + (leading_precision, None, None) + } + }; + + Ok(Expr::Interval(Interval { + value: Box::new(value), + leading_field, + leading_precision, + last_field, + fractional_seconds_precision: fsec_precision, + })) + } + + pub fn parse_listagg_on_overflow(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::ON, Keyword::OVERFLOW]) { + if self.parse_keyword(Keyword::ERROR) { + Ok(Some(ListAggOnOverflow::Error)) + } else { + self.expect_keyword(Keyword::TRUNCATE)?; + let filler = match self.peek_token().token { + Token::Word(w) + if w.keyword == Keyword::WITH || w.keyword == Keyword::WITHOUT => + { + None + } + Token::SingleQuotedString(_) + | Token::EscapedStringLiteral(_) + | Token::UnicodeStringLiteral(_) + | Token::NationalStringLiteral(_) + | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), + _ => self.expected( + "either filler, WITH, or WITHOUT in LISTAGG", + self.peek_token(), + )?, + }; + let with_count = self.parse_keyword(Keyword::WITH); + if !with_count && !self.parse_keyword(Keyword::WITHOUT) { + self.expected("either WITH or WITHOUT in LISTAGG", self.peek_token())?; + } + self.expect_keyword(Keyword::COUNT)?; + Ok(Some(ListAggOnOverflow::Truncate { filler, with_count })) + } + } else { + Ok(None) + } + } + + pub fn parse_map_access(&mut self, expr: Expr) -> Result { + let key = self.parse_expr()?; + self.expect_token(&Token::RBracket)?; + + let mut keys = vec![MapAccessKey { + key, + syntax: MapAccessSyntax::Bracket, + }]; + loop { + let key = match self.peek_token().token { + Token::LBracket => { + self.next_token(); // consume `[` + let key = self.parse_expr()?; + self.expect_token(&Token::RBracket)?; + MapAccessKey { + key, + syntax: MapAccessSyntax::Bracket, + } + } + // Access on BigQuery nested and repeated expressions can + // mix notations in the same expression. + // https://cloud.google.com/bigquery/docs/nested-repeated#query_nested_and_repeated_columns + Token::Period if dialect_of!(self is BigQueryDialect) => { + self.next_token(); // consume `.` + MapAccessKey { + key: self.parse_expr()?, + syntax: MapAccessSyntax::Period, + } + } + _ => break, + }; + keys.push(key); + } + + Ok(Expr::MapAccess { + column: Box::new(expr), + keys, + }) + } + + /// Parses fulltext expressions [`sqlparser::ast::Expr::MatchAgainst`] + /// + /// # Errors + /// This method will raise an error if the column list is empty or with invalid identifiers, + /// the match expression is not a literal string, or if the search modifier is not valid. + pub fn parse_match_against(&mut self) -> Result { + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + + self.expect_keyword(Keyword::AGAINST)?; + + self.expect_token(&Token::LParen)?; + + // MySQL is too permissive about the value, IMO we can't validate it perfectly on syntax level. + let match_value = self.parse_value()?; + + let in_natural_language_mode_keywords = &[ + Keyword::IN, + Keyword::NATURAL, + Keyword::LANGUAGE, + Keyword::MODE, + ]; + + let with_query_expansion_keywords = &[Keyword::WITH, Keyword::QUERY, Keyword::EXPANSION]; + + let in_boolean_mode_keywords = &[Keyword::IN, Keyword::BOOLEAN, Keyword::MODE]; + + let opt_search_modifier = if self.parse_keywords(in_natural_language_mode_keywords) { + if self.parse_keywords(with_query_expansion_keywords) { + Some(SearchModifier::InNaturalLanguageModeWithQueryExpansion) + } else { + Some(SearchModifier::InNaturalLanguageMode) + } + } else if self.parse_keywords(in_boolean_mode_keywords) { + Some(SearchModifier::InBooleanMode) + } else if self.parse_keywords(with_query_expansion_keywords) { + Some(SearchModifier::WithQueryExpansion) + } else { + None + }; + + self.expect_token(&Token::RParen)?; + + Ok(Expr::MatchAgainst { + columns, + match_value, + opt_search_modifier, + }) + } + + pub fn parse_not(&mut self) -> Result { + match self.peek_token().token { + Token::Word(w) => match w.keyword { + Keyword::EXISTS => { + let negated = true; + let _ = self.parse_keyword(Keyword::EXISTS); + self.parse_exists_expr(negated) + } + _ => Ok(Expr::UnaryOp { + op: UnaryOperator::Not, + expr: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?, + ), + }), + }, + _ => Ok(Expr::UnaryOp { + op: UnaryOperator::Not, + expr: Box::new(self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?), + }), + } + } + + pub fn parse_optional_cast_format(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::FORMAT) { + let value = self.parse_value()?; + match self.parse_optional_time_zone()? { + Some(tz) => Ok(Some(CastFormat::ValueAtTimeZone(value, tz))), + None => Ok(Some(CastFormat::Value(value))), + } + } else { + Ok(None) + } + } + + pub fn parse_optional_group_by(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { + let expressions = if self.parse_keyword(Keyword::ALL) { + None + } else { + Some(self.parse_comma_separated(Parser::parse_group_by_expr)?) + }; + + let mut modifiers = vec![]; + if dialect_of!(self is ClickHouseDialect | GenericDialect) { + loop { + if !self.parse_keyword(Keyword::WITH) { + break; + } + let keyword = self.expect_one_of_keywords(&[ + Keyword::ROLLUP, + Keyword::CUBE, + Keyword::TOTALS, + ])?; + modifiers.push(match keyword { + Keyword::ROLLUP => GroupByWithModifier::Rollup, + Keyword::CUBE => GroupByWithModifier::Cube, + Keyword::TOTALS => GroupByWithModifier::Totals, + _ => { + return parser_err!( + "BUG: expected to match GroupBy modifier keyword", + self.peek_token().span.start + ) + } + }); + } + } + let group_by = match expressions { + None => GroupByExpr::All(modifiers), + Some(exprs) => GroupByExpr::Expressions(exprs, modifiers), + }; + Ok(Some(group_by)) + } else { + Ok(None) + } + } + + pub fn parse_optional_precision_scale( + &mut self, + ) -> Result<(Option, Option), ParserError> { + if self.consume_token(&Token::LParen) { + let n = self.parse_literal_uint()?; + let scale = if self.consume_token(&Token::Comma) { + Some(self.parse_literal_uint()?) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok((Some(n), scale)) + } else { + Ok((None, None)) + } + } + + pub fn parse_optional_time_zone(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) { + self.parse_value().map(Some) + } else { + Ok(None) + } + } + + pub fn parse_overlay_expr(&mut self) -> Result { + // PARSE OVERLAY (EXPR PLACING EXPR FROM 1 [FOR 3]) + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_keyword(Keyword::PLACING)?; + let what_expr = self.parse_expr()?; + self.expect_keyword(Keyword::FROM)?; + let from_expr = self.parse_expr()?; + let mut for_expr = None; + if self.parse_keyword(Keyword::FOR) { + for_expr = Some(self.parse_expr()?); + } + self.expect_token(&Token::RParen)?; + + Ok(Expr::Overlay { + expr: Box::new(expr), + overlay_what: Box::new(what_expr), + overlay_from: Box::new(from_expr), + overlay_for: for_expr.map(Box::new), + }) + } + + pub fn parse_position_expr(&mut self, ident: Ident) -> Result { + let between_prec = self.dialect.prec_value(Precedence::Between); + let position_expr = self.maybe_parse(|p| { + // PARSE SELECT POSITION('@' in field) + p.expect_token(&Token::LParen)?; + + // Parse the subexpr till the IN keyword + let expr = p.parse_subexpr(between_prec)?; + p.expect_keyword(Keyword::IN)?; + let from = p.parse_expr()?; + p.expect_token(&Token::RParen)?; + Ok(Expr::Position { + expr: Box::new(expr), + r#in: Box::new(from), + }) + })?; + match position_expr { + Some(expr) => Ok(expr), + // Snowflake supports `position` as an ordinary function call + // without the special `IN` syntax. + None => self.parse_function(ObjectName(vec![ident])), + } + } + + /// Parse an expression prefix. + pub fn parse_prefix(&mut self) -> Result { + // allow the dialect to override prefix parsing + if let Some(prefix) = self.dialect.parse_prefix(self) { + return prefix; + } + + // PostgreSQL allows any string literal to be preceded by a type name, indicating that the + // string literal represents a literal of that type. Some examples: + // + // DATE '2020-05-20' + // TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54' + // BOOL 'true' + // + // The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating + // matters is the fact that INTERVAL string literals may optionally be followed by special + // keywords, e.g.: + // + // INTERVAL '7' DAY + // + // Note also that naively `SELECT date` looks like a syntax error because the `date` type + // name is not followed by a string literal, but in fact in PostgreSQL it is a valid + // expression that should parse as the column name "date". + let loc = self.peek_token().span.start; + let opt_expr = self.maybe_parse(|parser| { + match parser.parse_data_type()? { + DataType::Interval => parser.parse_interval(), + // PostgreSQL allows almost any identifier to be used as custom data type name, + // and we support that in `parse_data_type()`. But unlike Postgres we don't + // have a list of globally reserved keywords (since they vary across dialects), + // so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type + // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of + // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the + // `type 'string'` syntax for the custom data types at all. + DataType::Custom(..) => parser_err!("dummy", loc), + data_type => Ok(Expr::TypedString { + data_type, + value: parser.parse_literal_string()?, + }), + } + })?; + + if let Some(expr) = opt_expr { + return Ok(expr); + } + + let next_token = self.next_token(); + let expr = match next_token.token { + Token::Word(w) => { + // The word we consumed may fall into one of two cases: it has a special meaning, or not. + // For example, in Snowflake, the word `interval` may have two meanings depending on the context: + // `SELECT CURRENT_DATE() + INTERVAL '1 DAY', MAX(interval) FROM tbl;` + // ^^^^^^^^^^^^^^^^ ^^^^^^^^ + // interval expression identifier + // + // We first try to parse the word and following tokens as a special expression, and if that fails, + // we rollback and try to parse it as an identifier. + match self.try_parse(|parser| { + parser.parse_expr_prefix_by_reserved_word(&w, next_token.span) + }) { + // This word indicated an expression prefix and parsing was successful + Ok(Some(expr)) => Ok(expr), + + // No expression prefix associated with this word + Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, next_token.span)?), + + // If parsing of the word as a special expression failed, we are facing two options: + // 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI` (`DAI` instead of `DAY`) + // 2. The word is used as an identifier, e.g. `SELECT MAX(interval) FROM tbl` + // We first try to parse the word as an identifier and if that fails + // we rollback and return the parsing error we got from trying to parse a + // special expression (to maintain backwards compatibility of parsing errors). + Err(e) => { + if !self.dialect.is_reserved_for_identifier(w.keyword) { + if let Ok(Some(expr)) = self.maybe_parse(|parser| { + parser.parse_expr_prefix_by_unreserved_word(&w, next_token.span) + }) { + return Ok(expr); + } + } + return Err(e); + } + } + } // End of Token::Word + // array `[1, 2, 3]` + Token::LBracket => self.parse_array_expr(false), + tok @ Token::Minus | tok @ Token::Plus => { + let op = if tok == Token::Plus { + UnaryOperator::Plus + } else { + UnaryOperator::Minus + }; + Ok(Expr::UnaryOp { + op, + expr: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::MulDivModOp))?, + ), + }) + } + Token::ExclamationMark if self.dialect.supports_bang_not_operator() => { + Ok(Expr::UnaryOp { + op: UnaryOperator::BangNot, + expr: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?, + ), + }) + } + tok @ Token::DoubleExclamationMark + | tok @ Token::PGSquareRoot + | tok @ Token::PGCubeRoot + | tok @ Token::AtSign + | tok @ Token::Tilde + if dialect_of!(self is PostgreSqlDialect) => + { + let op = match tok { + Token::DoubleExclamationMark => UnaryOperator::PGPrefixFactorial, + Token::PGSquareRoot => UnaryOperator::PGSquareRoot, + Token::PGCubeRoot => UnaryOperator::PGCubeRoot, + Token::AtSign => UnaryOperator::PGAbs, + Token::Tilde => UnaryOperator::PGBitwiseNot, + _ => unreachable!(), + }; + Ok(Expr::UnaryOp { + op, + expr: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?, + ), + }) + } + Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => + { + self.prev_token(); + Ok(Expr::Value(self.parse_value()?)) + } + Token::UnicodeStringLiteral(_) => { + self.prev_token(); + Ok(Expr::Value(self.parse_value()?)) + } + Token::Number(_, _) + | Token::SingleQuotedString(_) + | Token::DoubleQuotedString(_) + | Token::TripleSingleQuotedString(_) + | Token::TripleDoubleQuotedString(_) + | Token::DollarQuotedString(_) + | Token::SingleQuotedByteStringLiteral(_) + | Token::DoubleQuotedByteStringLiteral(_) + | Token::TripleSingleQuotedByteStringLiteral(_) + | Token::TripleDoubleQuotedByteStringLiteral(_) + | Token::SingleQuotedRawStringLiteral(_) + | Token::DoubleQuotedRawStringLiteral(_) + | Token::TripleSingleQuotedRawStringLiteral(_) + | Token::TripleDoubleQuotedRawStringLiteral(_) + | Token::NationalStringLiteral(_) + | Token::HexStringLiteral(_) => { + self.prev_token(); + Ok(Expr::Value(self.parse_value()?)) + } + Token::LParen => { + let expr = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Some(lambda) = self.try_parse_lambda()? { + return Ok(lambda); + } else { + let exprs = self.parse_comma_separated(Parser::parse_expr)?; + match exprs.len() { + 0 => unreachable!(), // parse_comma_separated ensures 1 or more + 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), + _ => Expr::Tuple(exprs), + } + }; + self.expect_token(&Token::RParen)?; + let expr = self.try_parse_method(expr)?; + if !self.consume_token(&Token::Period) { + Ok(expr) + } else { + let tok = self.next_token(); + let key = match tok.token { + Token::Word(word) => word.to_ident(tok.span), + _ => { + return parser_err!( + format!("Expected identifier, found: {tok}"), + tok.span.start + ) + } + }; + Ok(Expr::CompositeAccess { + expr: Box::new(expr), + key, + }) + } + } + Token::Placeholder(_) | Token::Colon | Token::AtSign => { + self.prev_token(); + Ok(Expr::Value(self.parse_value()?)) + } + Token::LBrace if self.dialect.supports_dictionary_syntax() => { + self.prev_token(); + self.parse_duckdb_struct_literal() + } + _ => self.expected("an expression", next_token), + }?; + + let expr = self.try_parse_method(expr)?; + + if self.parse_keyword(Keyword::COLLATE) { + Ok(Expr::Collate { + expr: Box::new(expr), + collation: self.parse_object_name(false)?, + }) + } else { + Ok(expr) + } + } + + /// Parses an array subscript like `[1:3]` + /// + /// Parser is right after `[` + pub fn parse_subscript(&mut self, expr: Expr) -> Result { + let subscript = self.parse_subscript_inner()?; + Ok(Expr::Subscript { + expr: Box::new(expr), + subscript: Box::new(subscript), + }) + } + + pub fn parse_substring_expr(&mut self) -> Result { + // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + let mut from_expr = None; + let special = self.consume_token(&Token::Comma); + if special || self.parse_keyword(Keyword::FROM) { + from_expr = Some(self.parse_expr()?); + } + + let mut to_expr = None; + if self.parse_keyword(Keyword::FOR) || self.consume_token(&Token::Comma) { + to_expr = Some(self.parse_expr()?); + } + self.expect_token(&Token::RParen)?; + + Ok(Expr::Substring { + expr: Box::new(expr), + substring_from: from_expr.map(Box::new), + substring_for: to_expr.map(Box::new), + special, + }) + } + + pub fn parse_time_functions(&mut self, name: ObjectName) -> Result { + let args = if self.consume_token(&Token::LParen) { + FunctionArguments::List(self.parse_function_argument_list()?) + } else { + FunctionArguments::None + }; + Ok(Expr::Function(Function { + name, + parameters: FunctionArguments::None, + args, + filter: None, + over: None, + null_treatment: None, + within_group: vec![], + })) + } + + /// ```sql + /// TRIM ([WHERE] ['text' FROM] 'text') + /// TRIM ('text') + /// TRIM(, [, characters]) -- only Snowflake or BigQuery + /// ``` + pub fn parse_trim_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let mut trim_where = None; + if let Token::Word(word) = self.peek_token().token { + if [Keyword::BOTH, Keyword::LEADING, Keyword::TRAILING] + .iter() + .any(|d| word.keyword == *d) + { + trim_where = Some(self.parse_trim_where()?); + } + } + let expr = self.parse_expr()?; + if self.parse_keyword(Keyword::FROM) { + let trim_what = Box::new(expr); + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Trim { + expr: Box::new(expr), + trim_where, + trim_what: Some(trim_what), + trim_characters: None, + }) + } else if self.consume_token(&Token::Comma) + && dialect_of!(self is SnowflakeDialect | BigQueryDialect | GenericDialect) + { + let characters = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Trim { + expr: Box::new(expr), + trim_where: None, + trim_what: None, + trim_characters: Some(characters), + }) + } else { + self.expect_token(&Token::RParen)?; + Ok(Expr::Trim { + expr: Box::new(expr), + trim_where, + trim_what: None, + trim_characters: None, + }) + } + } + + pub fn parse_trim_where(&mut self) -> Result { + let next_token = self.next_token(); + match &next_token.token { + Token::Word(w) => match w.keyword { + Keyword::BOTH => Ok(TrimWhereField::Both), + Keyword::LEADING => Ok(TrimWhereField::Leading), + Keyword::TRAILING => Ok(TrimWhereField::Trailing), + _ => self.expected("trim_where field", next_token)?, + }, + _ => self.expected("trim_where field", next_token), + } + } + + /// Parse a new expression including wildcard & qualified wildcard. + pub fn parse_wildcard_expr(&mut self) -> Result { + let index = self.index; + + let next_token = self.next_token(); + match next_token.token { + t @ (Token::Word(_) | Token::SingleQuotedString(_)) => { + if self.peek_token().token == Token::Period { + let mut id_parts: Vec = vec![match t { + Token::Word(w) => w.to_ident(next_token.span), + Token::SingleQuotedString(s) => Ident::with_quote('\'', s), + _ => unreachable!(), // We matched above + }]; + + while self.consume_token(&Token::Period) { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => id_parts.push(w.to_ident(next_token.span)), + Token::SingleQuotedString(s) => { + // SQLite has single-quoted identifiers + id_parts.push(Ident::with_quote('\'', s)) + } + Token::Mul => { + return Ok(Expr::QualifiedWildcard( + ObjectName(id_parts), + AttachedToken(next_token), + )); + } + _ => { + return self + .expected("an identifier or a '*' after '.'", next_token); + } + } + } + } + } + Token::Mul => { + return Ok(Expr::Wildcard(AttachedToken(next_token))); + } + _ => (), + }; + + self.index = index; + self.parse_expr() + } + + /// Parse a Struct type definition as a sequence of field-value pairs. + /// The syntax of the Struct elem differs by dialect so it is customised + /// by the `elem_parser` argument. + /// + /// Syntax + /// ```sql + /// Hive: + /// STRUCT + /// + /// BigQuery: + /// STRUCT<[field_name] field_type> + /// ``` + pub(crate) fn parse_struct_type_def( + &mut self, + mut elem_parser: F, + ) -> Result<(Vec, MatchedTrailingBracket), ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result<(StructField, MatchedTrailingBracket), ParserError>, + { + let start_token = self.peek_token(); + self.expect_keyword(Keyword::STRUCT)?; + + // Nothing to do if we have no type information. + if Token::Lt != self.peek_token() { + return Ok((Default::default(), false.into())); + } + self.next_token(); + + let mut field_defs = vec![]; + let trailing_bracket = loop { + let (def, trailing_bracket) = elem_parser(self)?; + field_defs.push(def); + if !self.consume_token(&Token::Comma) { + break trailing_bracket; + } + + // Angle brackets are balanced so we only expect the trailing `>>` after + // we've matched all field types for the current struct. + // e.g. this is invalid syntax `STRUCT>>, INT>(NULL)` + if trailing_bracket.0 { + return parser_err!("unmatched > in STRUCT definition", start_token.span.start); + } + }; + + Ok(( + field_defs, + self.expect_closing_angle_bracket(trailing_bracket)?, + )) + } + + pub(crate) fn try_parse_expr_sub_query(&mut self) -> Result, ParserError> { + if !self.peek_sub_query() { + return Ok(None); + } + + Ok(Some(Expr::Subquery(self.parse_query()?))) + } + + fn parse_duplicate_treatment(&mut self) -> Result, ParserError> { + let loc = self.peek_token().span.start; + match ( + self.parse_keyword(Keyword::ALL), + self.parse_keyword(Keyword::DISTINCT), + ) { + (true, false) => Ok(Some(DuplicateTreatment::All)), + (false, true) => Ok(Some(DuplicateTreatment::Distinct)), + (false, false) => Ok(None), + (true, true) => parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc), + } + } + + // Tries to parse an expression by matching the specified word to known keywords that have a special meaning in the dialect. + // Returns `None if no match is found. + fn parse_expr_prefix_by_reserved_word( + &mut self, + w: &Word, + w_span: Span, + ) -> Result, ParserError> { + match w.keyword { + Keyword::TRUE | Keyword::FALSE if self.dialect.supports_boolean_literals() => { + self.prev_token(); + Ok(Some(Expr::Value(self.parse_value()?))) + } + Keyword::NULL => { + self.prev_token(); + Ok(Some(Expr::Value(self.parse_value()?))) + } + Keyword::CURRENT_CATALOG + | Keyword::CURRENT_USER + | Keyword::SESSION_USER + | Keyword::USER + if dialect_of!(self is PostgreSqlDialect | GenericDialect) => + { + Ok(Some(Expr::Function(Function { + name: ObjectName(vec![w.to_ident(w_span)]), + parameters: FunctionArguments::None, + args: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + }))) + } + Keyword::CURRENT_TIMESTAMP + | Keyword::CURRENT_TIME + | Keyword::CURRENT_DATE + | Keyword::LOCALTIME + | Keyword::LOCALTIMESTAMP => { + Ok(Some(self.parse_time_functions(ObjectName(vec![w.to_ident(w_span)]))?)) + } + Keyword::CASE => Ok(Some(self.parse_case_expr()?)), + Keyword::CONVERT => Ok(Some(self.parse_convert_expr(false)?)), + Keyword::TRY_CONVERT if self.dialect.supports_try_convert() => Ok(Some(self.parse_convert_expr(true)?)), + Keyword::CAST => Ok(Some(self.parse_cast_expr(CastKind::Cast)?)), + Keyword::TRY_CAST => Ok(Some(self.parse_cast_expr(CastKind::TryCast)?)), + Keyword::SAFE_CAST => Ok(Some(self.parse_cast_expr(CastKind::SafeCast)?)), + Keyword::EXISTS + // Support parsing Databricks has a function named `exists`. + if !dialect_of!(self is DatabricksDialect) + || matches!( + self.peek_nth_token(1).token, + Token::Word(Word { + keyword: Keyword::SELECT | Keyword::WITH, + .. + }) + ) => + { + Ok(Some(self.parse_exists_expr(false)?)) + } + Keyword::EXTRACT => Ok(Some(self.parse_extract_expr()?)), + Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)), + Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)), + Keyword::POSITION if self.peek_token().token == Token::LParen => { + Ok(Some(self.parse_position_expr(w.to_ident(w_span))?)) + } + Keyword::SUBSTRING => Ok(Some(self.parse_substring_expr()?)), + Keyword::OVERLAY => Ok(Some(self.parse_overlay_expr()?)), + Keyword::TRIM => Ok(Some(self.parse_trim_expr()?)), + Keyword::INTERVAL => Ok(Some(self.parse_interval()?)), + // Treat ARRAY[1,2,3] as an array [1,2,3], otherwise try as subquery or a function call + Keyword::ARRAY if self.peek_token() == Token::LBracket => { + self.expect_token(&Token::LBracket)?; + Ok(Some(self.parse_array_expr(true)?)) + } + Keyword::ARRAY + if self.peek_token() == Token::LParen + && !dialect_of!(self is ClickHouseDialect | DatabricksDialect) => + { + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + Ok(Some(Expr::Function(Function { + name: ObjectName(vec![w.to_ident(w_span)]), + parameters: FunctionArguments::None, + args: FunctionArguments::Subquery(query), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + }))) + } + Keyword::NOT => Ok(Some(self.parse_not()?)), + Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { + Ok(Some(self.parse_match_against()?)) + } + Keyword::STRUCT if self.dialect.supports_struct_literal() => { + Ok(Some(self.parse_struct_literal()?)) + } + Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => { + let expr = self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?; + Ok(Some(Expr::Prior(Box::new(expr)))) + } + Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => { + Ok(Some(self.parse_duckdb_map_literal()?)) + } + _ => Ok(None) + } + } + + // Tries to parse an expression by a word that is not known to have a special meaning in the dialect. + fn parse_expr_prefix_by_unreserved_word( + &mut self, + w: &Word, + w_span: Span, + ) -> Result { + match self.peek_token().token { + Token::LParen | Token::Period => { + let mut id_parts: Vec = vec![w.to_ident(w_span)]; + let mut ending_wildcard: Option = None; + while self.consume_token(&Token::Period) { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => id_parts.push(w.to_ident(next_token.span)), + Token::Mul => { + // Postgres explicitly allows funcnm(tablenm.*) and the + // function array_agg traverses this control flow + if dialect_of!(self is PostgreSqlDialect) { + ending_wildcard = Some(next_token); + break; + } else { + return self.expected("an identifier after '.'", next_token); + } + } + Token::SingleQuotedString(s) => id_parts.push(Ident::with_quote('\'', s)), + _ => { + return self.expected("an identifier or a '*' after '.'", next_token); + } + } + } + + if let Some(wildcard_token) = ending_wildcard { + Ok(Expr::QualifiedWildcard( + ObjectName(id_parts), + AttachedToken(wildcard_token), + )) + } else if self.consume_token(&Token::LParen) { + if dialect_of!(self is SnowflakeDialect | MsSqlDialect) + && self.consume_tokens(&[Token::Plus, Token::RParen]) + { + Ok(Expr::OuterJoin(Box::new( + match <[Ident; 1]>::try_from(id_parts) { + Ok([ident]) => Expr::Identifier(ident), + Err(parts) => Expr::CompoundIdentifier(parts), + }, + ))) + } else { + self.prev_token(); + self.parse_function(ObjectName(id_parts)) + } + } else { + Ok(Expr::CompoundIdentifier(id_parts)) + } + } + // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html + Token::SingleQuotedString(_) + | Token::DoubleQuotedString(_) + | Token::HexStringLiteral(_) + if w.value.starts_with('_') => + { + Ok(Expr::IntroducedString { + introducer: w.value.clone(), + value: self.parse_introduced_string_value()?, + }) + } + Token::Arrow if self.dialect.supports_lambda_functions() => { + self.expect_token(&Token::Arrow)?; + Ok(Expr::Lambda(LambdaFunction { + params: OneOrManyWithParens::One(w.to_ident(w_span)), + body: Box::new(self.parse_expr()?), + })) + } + _ => Ok(Expr::Identifier(w.to_ident(w_span))), + } + } + + /// Parses a potentially empty list of arguments to a window function + /// (including the closing parenthesis). + /// + /// Examples: + /// ```sql + /// FIRST_VALUE(x ORDER BY 1,2,3); + /// FIRST_VALUE(x IGNORE NULL); + /// ``` + fn parse_function_argument_list(&mut self) -> Result { + let mut clauses = vec![]; + + // For MSSQL empty argument list with json-null-clause case, e.g. `JSON_ARRAY(NULL ON NULL)` + if let Some(null_clause) = self.parse_json_null_clause() { + clauses.push(FunctionArgumentClause::JsonNullClause(null_clause)); + } + + if self.consume_token(&Token::RParen) { + return Ok(FunctionArgumentList { + duplicate_treatment: None, + args: vec![], + clauses, + }); + } + + let duplicate_treatment = self.parse_duplicate_treatment()?; + let args = self.parse_comma_separated(Parser::parse_function_args)?; + + if self.dialect.supports_window_function_null_treatment_arg() { + if let Some(null_treatment) = self.parse_null_treatment()? { + clauses.push(FunctionArgumentClause::IgnoreOrRespectNulls(null_treatment)); + } + } + + if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + clauses.push(FunctionArgumentClause::OrderBy( + self.parse_comma_separated(Parser::parse_order_by_expr)?, + )); + } + + if self.parse_keyword(Keyword::LIMIT) { + clauses.push(FunctionArgumentClause::Limit(self.parse_expr()?)); + } + + if dialect_of!(self is GenericDialect | BigQueryDialect) + && self.parse_keyword(Keyword::HAVING) + { + let kind = match self.expect_one_of_keywords(&[Keyword::MIN, Keyword::MAX])? { + Keyword::MIN => HavingBoundKind::Min, + Keyword::MAX => HavingBoundKind::Max, + _ => unreachable!(), + }; + clauses.push(FunctionArgumentClause::Having(HavingBound( + kind, + self.parse_expr()?, + ))) + } + + if dialect_of!(self is GenericDialect | MySqlDialect) + && self.parse_keyword(Keyword::SEPARATOR) + { + clauses.push(FunctionArgumentClause::Separator(self.parse_value()?)); + } + + if let Some(on_overflow) = self.parse_listagg_on_overflow()? { + clauses.push(FunctionArgumentClause::OnOverflow(on_overflow)); + } + + if let Some(null_clause) = self.parse_json_null_clause() { + clauses.push(FunctionArgumentClause::JsonNullClause(null_clause)); + } + + self.expect_token(&Token::RParen)?; + Ok(FunctionArgumentList { + duplicate_treatment, + args, + clauses, + }) + } + + /// Parse a group by expr. Group by expr can be one of group sets, roll up, cube, or simple expr. + fn parse_group_by_expr(&mut self) -> Result { + if self.dialect.supports_group_by_expr() { + if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { + self.expect_token(&Token::LParen)?; + let result = self.parse_comma_separated(|p| p.parse_tuple(false, true))?; + self.expect_token(&Token::RParen)?; + Ok(Expr::GroupingSets(result)) + } else if self.parse_keyword(Keyword::CUBE) { + self.expect_token(&Token::LParen)?; + let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Cube(result)) + } else if self.parse_keyword(Keyword::ROLLUP) { + self.expect_token(&Token::LParen)?; + let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Rollup(result)) + } else if self.consume_tokens(&[Token::LParen, Token::RParen]) { + // PostgreSQL allow to use empty tuple as a group by expression, + // e.g. `GROUP BY (), name`. Please refer to GROUP BY Clause section in + // [PostgreSQL](https://www.postgresql.org/docs/16/sql-select.html) + Ok(Expr::Tuple(vec![])) + } else { + self.parse_expr() + } + } else { + // TODO parse rollup for other dialects + self.parse_expr() + } + } + + fn parse_json_access(&mut self, expr: Expr) -> Result { + let path = self.parse_json_path()?; + Ok(Expr::JsonAccess { + value: Box::new(expr), + path, + }) + } + + /// Parses MSSQL's json-null-clause + fn parse_json_null_clause(&mut self) -> Option { + if self.parse_keywords(&[Keyword::ABSENT, Keyword::ON, Keyword::NULL]) { + Some(JsonNullClause::AbsentOnNull) + } else if self.parse_keywords(&[Keyword::NULL, Keyword::ON, Keyword::NULL]) { + Some(JsonNullClause::NullOnNull) + } else { + None + } + } + + pub(crate) fn parse_json_path(&mut self) -> Result { + let mut path = Vec::new(); + loop { + match self.next_token().token { + Token::Colon if path.is_empty() => { + path.push(self.parse_json_path_object_key()?); + } + Token::Period if !path.is_empty() => { + path.push(self.parse_json_path_object_key()?); + } + Token::LBracket => { + let key = self.parse_expr()?; + self.expect_token(&Token::RBracket)?; + + path.push(JsonPathElem::Bracket { key }); + } + _ => { + self.prev_token(); + break; + } + }; + } + + debug_assert!(!path.is_empty()); + Ok(JsonPath { path }) + } + + fn parse_json_path_object_key(&mut self) -> Result { + let token = self.next_token(); + match token.token { + Token::Word(Word { + value, + // path segments in SF dot notation can be unquoted or double-quoted + quote_style: quote_style @ (Some('"') | None), + // some experimentation suggests that snowflake permits + // any keyword here unquoted. + keyword: _, + }) => Ok(JsonPathElem::Dot { + key: value, + quoted: quote_style.is_some(), + }), + + // This token should never be generated on snowflake or generic + // dialects, but we handle it just in case this is used on future + // dialects. + Token::DoubleQuotedString(key) => Ok(JsonPathElem::Dot { key, quoted: true }), + + _ => self.expected("variant object key name", token), + } + } + + /// Optionally parses a null treatment clause. + fn parse_null_treatment(&mut self) -> Result, ParserError> { + match self.parse_one_of_keywords(&[Keyword::RESPECT, Keyword::IGNORE]) { + Some(keyword) => { + self.expect_keyword(Keyword::NULLS)?; + + Ok(match keyword { + Keyword::RESPECT => Some(NullTreatment::RespectNulls), + Keyword::IGNORE => Some(NullTreatment::IgnoreNulls), + _ => None, + }) + } + None => Ok(None), + } + } + + /// Parse an expression value for a struct literal + /// Syntax + /// ```sql + /// expr [AS name] + /// ``` + /// + /// For biquery [1], Parameter typed_syntax is set to true if the expression + /// is to be parsed as a field expression declared using typed + /// struct syntax [2], and false if using typeless struct syntax [3]. + /// + /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#constructing_a_struct + /// [2]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax + /// [3]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax + fn parse_struct_field_expr(&mut self, typed_syntax: bool) -> Result { + let expr = self.parse_expr()?; + if self.parse_keyword(Keyword::AS) { + if typed_syntax { + return parser_err!("Typed syntax does not allow AS", { + self.prev_token(); + self.peek_token().span.start + }); + } + let field_name = self.parse_identifier(false)?; + Ok(Expr::Named { + expr: expr.into(), + name: field_name, + }) + } else { + Ok(expr) + } + } + + /// Syntax + /// ```sql + /// -- typed + /// STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) + /// -- typeless + /// STRUCT( expr1 [AS field_name] [, ... ]) + /// ``` + fn parse_struct_literal(&mut self) -> Result { + // Parse the fields definition if exist `<[field_name] field_type, ...>` + self.prev_token(); + let (fields, trailing_bracket) = + self.parse_struct_type_def(Self::parse_struct_field_def)?; + if trailing_bracket.0 { + return parser_err!( + "unmatched > in STRUCT literal", + self.peek_token().span.start + ); + } + + // Parse the struct values `(expr1 [, ... ])` + self.expect_token(&Token::LParen)?; + let values = self + .parse_comma_separated(|parser| parser.parse_struct_field_expr(!fields.is_empty()))?; + self.expect_token(&Token::RParen)?; + + Ok(Expr::Struct { values, fields }) + } + + /// Parses an array subscript like + /// * `[:]` + /// * `[l]` + /// * `[l:]` + /// * `[:u]` + /// * `[l:u]` + /// * `[l:u:s]` + /// + /// Parser is right after `[` + fn parse_subscript_inner(&mut self) -> Result { + // at either `:(rest)` or `:(rest)]` + let lower_bound = if self.consume_token(&Token::Colon) { + None + } else { + Some(self.parse_expr()?) + }; + + // check for end + if self.consume_token(&Token::RBracket) { + if let Some(lower_bound) = lower_bound { + return Ok(Subscript::Index { index: lower_bound }); + }; + return Ok(Subscript::Slice { + lower_bound, + upper_bound: None, + stride: None, + }); + } + + // consume the `:` + if lower_bound.is_some() { + self.expect_token(&Token::Colon)?; + } + + // we are now at either `]`, `(rest)]` + let upper_bound = if self.consume_token(&Token::RBracket) { + return Ok(Subscript::Slice { + lower_bound, + upper_bound: None, + stride: None, + }); + } else { + Some(self.parse_expr()?) + }; + + // check for end + if self.consume_token(&Token::RBracket) { + return Ok(Subscript::Slice { + lower_bound, + upper_bound, + stride: None, + }); + } + + // we are now at `:]` or `:stride]` + self.expect_token(&Token::Colon)?; + let stride = if self.consume_token(&Token::RBracket) { + None + } else { + Some(self.parse_expr()?) + }; + + if stride.is_some() { + self.expect_token(&Token::RBracket)?; + } + + Ok(Subscript::Slice { + lower_bound, + upper_bound, + stride, + }) + } + + /// Parse a tuple with `(` and `)`. + /// If `lift_singleton` is true, then a singleton tuple is lifted to a tuple of length 1, otherwise it will fail. + /// If `allow_empty` is true, then an empty tuple is allowed. + fn parse_tuple( + &mut self, + lift_singleton: bool, + allow_empty: bool, + ) -> Result, ParserError> { + if lift_singleton { + if self.consume_token(&Token::LParen) { + let result = if allow_empty && self.consume_token(&Token::RParen) { + vec![] + } else { + let result = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + result + }; + Ok(result) + } else { + Ok(vec![self.parse_expr()?]) + } + } else { + self.expect_token(&Token::LParen)?; + let result = if allow_empty && self.consume_token(&Token::RParen) { + vec![] + } else { + let result = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + result + }; + Ok(result) + } + } + + fn try_parse_lambda(&mut self) -> Result, ParserError> { + if !self.dialect.supports_lambda_functions() { + return Ok(None); + } + self.maybe_parse(|p| { + let params = p.parse_comma_separated(|p| p.parse_identifier(false))?; + p.expect_token(&Token::RParen)?; + p.expect_token(&Token::Arrow)?; + let expr = p.parse_expr()?; + Ok(Expr::Lambda(LambdaFunction { + params: OneOrManyWithParens::Many(params), + body: Box::new(expr), + })) + }) + } + + /// Parses method call expression + fn try_parse_method(&mut self, expr: Expr) -> Result { + if !self.dialect.supports_methods() { + return Ok(expr); + } + let method_chain = self.maybe_parse(|p| { + let mut method_chain = Vec::new(); + while p.consume_token(&Token::Period) { + let tok = p.next_token(); + let name = match tok.token { + Token::Word(word) => word.to_ident(tok.span), + _ => return p.expected("identifier", tok), + }; + let func = match p.parse_function(ObjectName(vec![name]))? { + Expr::Function(func) => func, + _ => return p.expected("function", p.peek_token()), + }; + method_chain.push(func); + } + if !method_chain.is_empty() { + Ok(method_chain) + } else { + p.expected("function", p.peek_token()) + } + })?; + if let Some(method_chain) = method_chain { + Ok(Expr::Method(Method { + expr: Box::new(expr), + method_chain, + })) + } else { + Ok(expr) + } + } +} diff --git a/src/parser/fetch.rs b/src/parser/fetch.rs new file mode 100644 index 000000000..c1ac6c244 --- /dev/null +++ b/src/parser/fetch.rs @@ -0,0 +1,64 @@ +use crate::parser::*; + +impl Parser<'_> { + // FETCH [ direction { FROM | IN } ] cursor INTO target; + pub fn parse_fetch_statement(&mut self) -> Result { + let direction = if self.parse_keyword(Keyword::NEXT) { + FetchDirection::Next + } else if self.parse_keyword(Keyword::PRIOR) { + FetchDirection::Prior + } else if self.parse_keyword(Keyword::FIRST) { + FetchDirection::First + } else if self.parse_keyword(Keyword::LAST) { + FetchDirection::Last + } else if self.parse_keyword(Keyword::ABSOLUTE) { + FetchDirection::Absolute { + limit: self.parse_number_value()?, + } + } else if self.parse_keyword(Keyword::RELATIVE) { + FetchDirection::Relative { + limit: self.parse_number_value()?, + } + } else if self.parse_keyword(Keyword::FORWARD) { + if self.parse_keyword(Keyword::ALL) { + FetchDirection::ForwardAll + } else { + FetchDirection::Forward { + // TODO: Support optional + limit: Some(self.parse_number_value()?), + } + } + } else if self.parse_keyword(Keyword::BACKWARD) { + if self.parse_keyword(Keyword::ALL) { + FetchDirection::BackwardAll + } else { + FetchDirection::Backward { + // TODO: Support optional + limit: Some(self.parse_number_value()?), + } + } + } else if self.parse_keyword(Keyword::ALL) { + FetchDirection::All + } else { + FetchDirection::Count { + limit: self.parse_number_value()?, + } + }; + + self.expect_one_of_keywords(&[Keyword::FROM, Keyword::IN])?; + + let name = self.parse_identifier(false)?; + + let into = if self.parse_keyword(Keyword::INTO) { + Some(self.parse_object_name(false)?) + } else { + None + }; + + Ok(Statement::Fetch { + name, + direction, + into, + }) + } +} diff --git a/src/parser/flush.rs b/src/parser/flush.rs new file mode 100644 index 000000000..f89b648ca --- /dev/null +++ b/src/parser/flush.rs @@ -0,0 +1,92 @@ +use crate::parser::*; + +use crate::parser_err; + +impl Parser<'_> { + pub fn parse_flush(&mut self) -> Result { + let mut channel = None; + let mut tables: Vec = vec![]; + let mut read_lock = false; + let mut export = false; + + if !dialect_of!(self is MySqlDialect | GenericDialect) { + return parser_err!("Unsupported statement FLUSH", self.peek_token().span.start); + } + + let location = if self.parse_keyword(Keyword::NO_WRITE_TO_BINLOG) { + Some(FlushLocation::NoWriteToBinlog) + } else if self.parse_keyword(Keyword::LOCAL) { + Some(FlushLocation::Local) + } else { + None + }; + + let object_type = if self.parse_keywords(&[Keyword::BINARY, Keyword::LOGS]) { + FlushType::BinaryLogs + } else if self.parse_keywords(&[Keyword::ENGINE, Keyword::LOGS]) { + FlushType::EngineLogs + } else if self.parse_keywords(&[Keyword::ERROR, Keyword::LOGS]) { + FlushType::ErrorLogs + } else if self.parse_keywords(&[Keyword::GENERAL, Keyword::LOGS]) { + FlushType::GeneralLogs + } else if self.parse_keywords(&[Keyword::HOSTS]) { + FlushType::Hosts + } else if self.parse_keyword(Keyword::PRIVILEGES) { + FlushType::Privileges + } else if self.parse_keyword(Keyword::OPTIMIZER_COSTS) { + FlushType::OptimizerCosts + } else if self.parse_keywords(&[Keyword::RELAY, Keyword::LOGS]) { + if self.parse_keywords(&[Keyword::FOR, Keyword::CHANNEL]) { + channel = Some(self.parse_object_name(false).unwrap().to_string()); + } + FlushType::RelayLogs + } else if self.parse_keywords(&[Keyword::SLOW, Keyword::LOGS]) { + FlushType::SlowLogs + } else if self.parse_keyword(Keyword::STATUS) { + FlushType::Status + } else if self.parse_keyword(Keyword::USER_RESOURCES) { + FlushType::UserResources + } else if self.parse_keywords(&[Keyword::LOGS]) { + FlushType::Logs + } else if self.parse_keywords(&[Keyword::TABLES]) { + loop { + let next_token = self.next_token(); + match &next_token.token { + Token::Word(w) => match w.keyword { + Keyword::WITH => { + read_lock = self.parse_keywords(&[Keyword::READ, Keyword::LOCK]); + } + Keyword::FOR => { + export = self.parse_keyword(Keyword::EXPORT); + } + Keyword::NoKeyword => { + self.prev_token(); + tables = self.parse_comma_separated(|p| p.parse_object_name(false))?; + } + _ => {} + }, + _ => { + break; + } + } + } + + FlushType::Tables + } else { + return self.expected( + "BINARY LOGS, ENGINE LOGS, ERROR LOGS, GENERAL LOGS, HOSTS, LOGS, PRIVILEGES, OPTIMIZER_COSTS,\ + RELAY LOGS [FOR CHANNEL channel], SLOW LOGS, STATUS, USER_RESOURCES", + self.peek_token(), + ); + }; + + Ok(Statement::Flush { + object_type, + location, + channel, + read_lock, + export, + tables, + }) + } +} diff --git a/src/parser/grant.rs b/src/parser/grant.rs new file mode 100644 index 000000000..fee9975d7 --- /dev/null +++ b/src/parser/grant.rs @@ -0,0 +1,157 @@ +use crate::parser::*; + +impl Parser<'_> { + /// Parse a GRANT statement. + pub fn parse_grant(&mut self) -> Result { + let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; + + self.expect_keyword(Keyword::TO)?; + let grantees = self.parse_comma_separated(|p| p.parse_identifier(false))?; + + let with_grant_option = + self.parse_keywords(&[Keyword::WITH, Keyword::GRANT, Keyword::OPTION]); + + let granted_by = self + .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) + .then(|| self.parse_identifier(false).unwrap()); + + Ok(Statement::Grant { + privileges, + objects, + grantees, + with_grant_option, + granted_by, + }) + } + + pub fn parse_actions_list(&mut self) -> Result, ParserError> { + let mut values = vec![]; + loop { + values.push(self.parse_grant_permission()?); + if !self.consume_token(&Token::Comma) { + break; + } else if self.options.trailing_commas { + match self.peek_token().token { + Token::Word(kw) if kw.keyword == Keyword::ON => { + break; + } + Token::RParen + | Token::SemiColon + | Token::EOF + | Token::RBracket + | Token::RBrace => break, + _ => continue, + } + } + } + Ok(values) + } + + pub fn parse_grant_permission(&mut self) -> Result { + if let Some(kw) = self.parse_one_of_keywords(&[ + Keyword::CONNECT, + Keyword::CREATE, + Keyword::DELETE, + Keyword::EXECUTE, + Keyword::INSERT, + Keyword::REFERENCES, + Keyword::SELECT, + Keyword::TEMPORARY, + Keyword::TRIGGER, + Keyword::TRUNCATE, + Keyword::UPDATE, + Keyword::USAGE, + ]) { + let columns = match kw { + Keyword::INSERT | Keyword::REFERENCES | Keyword::SELECT | Keyword::UPDATE => { + let columns = self.parse_parenthesized_column_list(Optional, false)?; + if columns.is_empty() { + None + } else { + Some(columns) + } + } + _ => None, + }; + Ok((kw, columns)) + } else { + self.expected("a privilege keyword", self.peek_token())? + } + } + + pub fn parse_grant_revoke_privileges_objects( + &mut self, + ) -> Result<(Privileges, GrantObjects), ParserError> { + let privileges = if self.parse_keyword(Keyword::ALL) { + Privileges::All { + with_privileges_keyword: self.parse_keyword(Keyword::PRIVILEGES), + } + } else { + let (actions, err): (Vec<_>, Vec<_>) = self + .parse_actions_list()? + .into_iter() + .map(|(kw, columns)| match kw { + Keyword::DELETE => Ok(Action::Delete), + Keyword::INSERT => Ok(Action::Insert { columns }), + Keyword::REFERENCES => Ok(Action::References { columns }), + Keyword::SELECT => Ok(Action::Select { columns }), + Keyword::TRIGGER => Ok(Action::Trigger), + Keyword::TRUNCATE => Ok(Action::Truncate), + Keyword::UPDATE => Ok(Action::Update { columns }), + Keyword::USAGE => Ok(Action::Usage), + Keyword::CONNECT => Ok(Action::Connect), + Keyword::CREATE => Ok(Action::Create), + Keyword::EXECUTE => Ok(Action::Execute), + Keyword::TEMPORARY => Ok(Action::Temporary), + // This will cover all future added keywords to + // parse_grant_permission and unhandled in this + // match + _ => Err(kw), + }) + .partition(Result::is_ok); + + if !err.is_empty() { + let errors: Vec = err.into_iter().filter_map(|x| x.err()).collect(); + return Err(ParserError::ParserError(format!( + "INTERNAL ERROR: GRANT/REVOKE unexpected keyword(s) - {errors:?}" + ))); + } + let act = actions.into_iter().filter_map(|x| x.ok()).collect(); + Privileges::Actions(act) + }; + + self.expect_keyword(Keyword::ON)?; + + let objects = if self.parse_keywords(&[ + Keyword::ALL, + Keyword::TABLES, + Keyword::IN, + Keyword::SCHEMA, + ]) { + GrantObjects::AllTablesInSchema { + schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, + } + } else if self.parse_keywords(&[ + Keyword::ALL, + Keyword::SEQUENCES, + Keyword::IN, + Keyword::SCHEMA, + ]) { + GrantObjects::AllSequencesInSchema { + schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, + } + } else { + let object_type = + self.parse_one_of_keywords(&[Keyword::SEQUENCE, Keyword::SCHEMA, Keyword::TABLE]); + let objects = self.parse_comma_separated(|p| p.parse_object_name(false)); + match object_type { + Some(Keyword::SCHEMA) => GrantObjects::Schemas(objects?), + Some(Keyword::SEQUENCE) => GrantObjects::Sequences(objects?), + Some(Keyword::TABLE) | None => GrantObjects::Tables(objects?), + _ => unreachable!(), + } + }; + + Ok((privileges, objects)) + } +} diff --git a/src/parser/identifier.rs b/src/parser/identifier.rs new file mode 100644 index 000000000..1d60f5ff4 --- /dev/null +++ b/src/parser/identifier.rs @@ -0,0 +1,278 @@ +use crate::parser::*; + +impl Parser<'_> { + /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) + /// + /// The `in_table_clause` parameter indicates whether the identifier is a table in a FROM, JOIN, or + /// similar table clause. Currently, this is used only to support unquoted hyphenated identifiers in + // this context on BigQuery. + pub fn parse_identifier(&mut self, in_table_clause: bool) -> Result { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => { + let mut ident = w.to_ident(next_token.span); + + // On BigQuery, hyphens are permitted in unquoted identifiers inside of a FROM or + // TABLE clause [0]. + // + // The first segment must be an ordinary unquoted identifier, e.g. it must not start + // with a digit. Subsequent segments are either must either be valid identifiers or + // integers, e.g. foo-123 is allowed, but foo-123a is not. + // + // [0] https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical + if dialect_of!(self is BigQueryDialect) + && w.quote_style.is_none() + && in_table_clause + { + let mut requires_whitespace = false; + while matches!(self.peek_token_no_skip().token, Token::Minus) { + self.next_token(); + ident.value.push('-'); + + let token = self + .next_token_no_skip() + .cloned() + .unwrap_or(TokenWithSpan::wrap(Token::EOF)); + requires_whitespace = match token.token { + Token::Word(next_word) if next_word.quote_style.is_none() => { + ident.value.push_str(&next_word.value); + false + } + Token::Number(s, false) if s.chars().all(|c| c.is_ascii_digit()) => { + ident.value.push_str(&s); + true + } + _ => { + return self + .expected("continuation of hyphenated identifier", token); + } + } + } + + // If the last segment was a number, we must check that it's followed by whitespace, + // otherwise foo-123a will be parsed as `foo-123` with the alias `a`. + if requires_whitespace { + let token = self.next_token(); + if !matches!(token.token, Token::EOF | Token::Whitespace(_)) { + return self + .expected("whitespace following hyphenated identifier", token); + } + } + } + Ok(ident) + } + Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)), + Token::DoubleQuotedString(s) => Ok(Ident::with_quote('\"', s)), + _ => self.expected("identifier", next_token), + } + } + + /// Parse identifiers + pub fn parse_identifiers(&mut self) -> Result, ParserError> { + let mut idents = vec![]; + loop { + match self.peek_token().token { + Token::Word(w) => { + idents.push(w.to_ident(self.peek_token().span)); + } + Token::EOF | Token::Eq => break, + _ => {} + } + self.next_token(); + } + Ok(idents) + } + + /// Parse a possibly qualified, possibly quoted identifier, e.g. + /// `foo` or `myschema."table" + /// + /// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN, + /// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers + /// in this context on BigQuery. + pub fn parse_object_name(&mut self, in_table_clause: bool) -> Result { + let mut idents = vec![]; + loop { + if self.dialect.supports_object_name_double_dot_notation() + && idents.len() == 1 + && self.consume_token(&Token::Period) + { + // Empty string here means default schema + idents.push(Ident::new("")); + } + idents.push(self.parse_identifier(in_table_clause)?); + if !self.consume_token(&Token::Period) { + break; + } + } + + // BigQuery accepts any number of quoted identifiers of a table name. + // https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_identifiers + if dialect_of!(self is BigQueryDialect) + && idents.iter().any(|ident| ident.value.contains('.')) + { + idents = idents + .into_iter() + .flat_map(|ident| { + ident + .value + .split('.') + .map(|value| Ident { + value: value.into(), + quote_style: ident.quote_style, + span: ident.span, + }) + .collect::>() + }) + .collect() + } + + Ok(ObjectName(idents)) + } + + /// Strictly parse `identifier AS identifier` + pub fn parse_identifier_with_alias(&mut self) -> Result { + let ident = self.parse_identifier(false)?; + self.expect_keyword(Keyword::AS)?; + let alias = self.parse_identifier(false)?; + Ok(IdentWithAlias { ident, alias }) + } + + /// Parse identifiers of form ident1[.identN]* + /// + /// Similar in functionality to [parse_identifiers], with difference + /// being this function is much more strict about parsing a valid multipart identifier, not + /// allowing extraneous tokens to be parsed, otherwise it fails. + /// + /// For example: + /// + /// ```rust + /// use sqlparser::ast::Ident; + /// use sqlparser::dialect::GenericDialect; + /// use sqlparser::parser::Parser; + /// + /// let dialect = GenericDialect {}; + /// let expected = vec![Ident::new("one"), Ident::new("two")]; + /// + /// // expected usage + /// let sql = "one.two"; + /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); + /// let actual = parser.parse_multipart_identifier().unwrap(); + /// assert_eq!(&actual, &expected); + /// + /// // parse_identifiers is more loose on what it allows, parsing successfully + /// let sql = "one + two"; + /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); + /// let actual = parser.parse_identifiers().unwrap(); + /// assert_eq!(&actual, &expected); + /// + /// // expected to strictly fail due to + separator + /// let sql = "one + two"; + /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); + /// let actual = parser.parse_multipart_identifier().unwrap_err(); + /// assert_eq!( + /// actual.to_string(), + /// "sql parser error: Unexpected token in identifier: +" + /// ); + /// ``` + /// + /// [parse_identifiers]: Parser::parse_identifiers + pub fn parse_multipart_identifier(&mut self) -> Result, ParserError> { + let mut idents = vec![]; + + // expecting at least one word for identifier + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => idents.push(w.to_ident(next_token.span)), + Token::EOF => { + return Err(ParserError::ParserError( + "Empty input when parsing identifier".to_string(), + ))? + } + token => { + return Err(ParserError::ParserError(format!( + "Unexpected token in identifier: {token}" + )))? + } + }; + + // parse optional next parts if exist + loop { + match self.next_token().token { + // ensure that optional period is succeeded by another identifier + Token::Period => { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => idents.push(w.to_ident(next_token.span)), + Token::EOF => { + return Err(ParserError::ParserError( + "Trailing period in identifier".to_string(), + ))? + } + token => { + return Err(ParserError::ParserError(format!( + "Unexpected token following period in identifier: {token}" + )))? + } + } + } + Token::EOF => break, + token => { + return Err(ParserError::ParserError(format!( + "Unexpected token in identifier: {token}" + )))? + } + } + } + + Ok(idents) + } + + /// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword) + /// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`, + /// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar` + pub fn parse_optional_alias( + &mut self, + reserved_kwds: &[Keyword], + ) -> Result, ParserError> { + let after_as = self.parse_keyword(Keyword::AS); + let next_token = self.next_token(); + match next_token.token { + // Accept any identifier after `AS` (though many dialects have restrictions on + // keywords that may appear here). If there's no `AS`: don't parse keywords, + // which may start a construct allowed in this position, to be parsed as aliases. + // (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword, + // not an alias.) + Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => { + Ok(Some(w.to_ident(next_token.span))) + } + // MSSQL supports single-quoted strings as aliases for columns + // We accept them as table aliases too, although MSSQL does not. + // + // Note, that this conflicts with an obscure rule from the SQL + // standard, which we don't implement: + // https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s + // "[Obscure Rule] SQL allows you to break a long up into two or more smaller s, split by a that includes a newline + // character. When it sees such a , your DBMS will + // ignore the and treat the multiple strings as + // a single ." + Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))), + // Support for MySql dialect double-quoted string, `AS "HOUR"` for example + Token::DoubleQuotedString(s) => Ok(Some(Ident::with_quote('\"', s))), + _ => { + if after_as { + return self.expected("an identifier after AS", next_token); + } + self.prev_token(); + Ok(None) // no alias found + } + } + } + + /// Parse `[ident]`, mostly `ident` is name, like: + /// `window_name`, `index_name`, ... + pub fn parse_optional_indent(&mut self) -> Result, ParserError> { + self.maybe_parse(|parser| parser.parse_identifier(false)) + } +} diff --git a/src/parser/insert.rs b/src/parser/insert.rs new file mode 100644 index 000000000..3562a3237 --- /dev/null +++ b/src/parser/insert.rs @@ -0,0 +1,200 @@ +use super::*; + +impl Parser<'_> { + /// Parse an INSERT statement, returning a `Box`ed SetExpr + /// + /// This is used to reduce the size of the stack frames in debug builds + pub(crate) fn parse_insert_setexpr_boxed(&mut self) -> Result, ParserError> { + Ok(Box::new(SetExpr::Insert(self.parse_insert()?))) + } + + /// Parse an INSERT statement + pub fn parse_insert(&mut self) -> Result { + let or = self.parse_conflict_clause(); + let priority = if !dialect_of!(self is MySqlDialect | GenericDialect) { + None + } else if self.parse_keyword(Keyword::LOW_PRIORITY) { + Some(MysqlInsertPriority::LowPriority) + } else if self.parse_keyword(Keyword::DELAYED) { + Some(MysqlInsertPriority::Delayed) + } else if self.parse_keyword(Keyword::HIGH_PRIORITY) { + Some(MysqlInsertPriority::HighPriority) + } else { + None + }; + + let ignore = dialect_of!(self is MySqlDialect | GenericDialect) + && self.parse_keyword(Keyword::IGNORE); + + let replace_into = false; + + let action = self.parse_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE]); + let into = action == Some(Keyword::INTO); + let overwrite = action == Some(Keyword::OVERWRITE); + + let local = self.parse_keyword(Keyword::LOCAL); + + if self.parse_keyword(Keyword::DIRECTORY) { + let path = self.parse_literal_string()?; + let file_format = if self.parse_keywords(&[Keyword::STORED, Keyword::AS]) { + Some(self.parse_file_format()?) + } else { + None + }; + let source = self.parse_query()?; + Ok(Statement::Directory { + local, + path, + overwrite, + file_format, + source, + }) + } else { + // Hive lets you put table here regardless + let table = self.parse_keyword(Keyword::TABLE); + let table_name = self.parse_object_name(false)?; + + let table_alias = + if dialect_of!(self is PostgreSqlDialect) && self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let is_mysql = dialect_of!(self is MySqlDialect); + + let (columns, partitioned, after_columns, source) = + if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) { + (vec![], None, vec![], None) + } else { + let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; + + let partitioned = self.parse_insert_partition()?; + // Hive allows you to specify columns after partitions as well if you want. + let after_columns = if dialect_of!(self is HiveDialect) { + self.parse_parenthesized_column_list(Optional, false)? + } else { + vec![] + }; + + let source = Some(self.parse_query()?); + + (columns, partitioned, after_columns, source) + }; + + let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect) + && self.parse_keyword(Keyword::AS) + { + let row_alias = self.parse_object_name(false)?; + let col_aliases = Some(self.parse_parenthesized_column_list(Optional, false)?); + Some(InsertAliases { + row_alias, + col_aliases, + }) + } else { + None + }; + + let on = if self.parse_keyword(Keyword::ON) { + if self.parse_keyword(Keyword::CONFLICT) { + let conflict_target = + if self.parse_keywords(&[Keyword::ON, Keyword::CONSTRAINT]) { + Some(ConflictTarget::OnConstraint(self.parse_object_name(false)?)) + } else if self.peek_token() == Token::LParen { + Some(ConflictTarget::Columns( + self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?, + )) + } else { + None + }; + + self.expect_keyword(Keyword::DO)?; + let action = if self.parse_keyword(Keyword::NOTHING) { + OnConflictAction::DoNothing + } else { + self.expect_keyword(Keyword::UPDATE)?; + self.expect_keyword(Keyword::SET)?; + let assignments = self.parse_comma_separated(Parser::parse_assignment)?; + let selection = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + OnConflictAction::DoUpdate(DoUpdate { + assignments, + selection, + }) + }; + + Some(OnInsert::OnConflict(OnConflict { + conflict_target, + action, + })) + } else { + self.expect_keyword(Keyword::DUPLICATE)?; + self.expect_keyword(Keyword::KEY)?; + self.expect_keyword(Keyword::UPDATE)?; + let l = self.parse_comma_separated(Parser::parse_assignment)?; + + Some(OnInsert::DuplicateKeyUpdate(l)) + } + } else { + None + }; + + let returning = if self.parse_keyword(Keyword::RETURNING) { + Some(self.parse_comma_separated(Parser::parse_select_item)?) + } else { + None + }; + + Ok(Statement::Insert(Insert { + or, + table_name, + table_alias, + ignore, + into, + overwrite, + partitioned, + columns, + after_columns, + source, + table, + on, + returning, + replace_into, + priority, + insert_alias, + })) + } + } + + pub(crate) fn parse_conflict_clause(&mut self) -> Option { + if self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]) { + Some(SqliteOnConflict::Replace) + } else if self.parse_keywords(&[Keyword::OR, Keyword::ROLLBACK]) { + Some(SqliteOnConflict::Rollback) + } else if self.parse_keywords(&[Keyword::OR, Keyword::ABORT]) { + Some(SqliteOnConflict::Abort) + } else if self.parse_keywords(&[Keyword::OR, Keyword::FAIL]) { + Some(SqliteOnConflict::Fail) + } else if self.parse_keywords(&[Keyword::OR, Keyword::IGNORE]) { + Some(SqliteOnConflict::Ignore) + } else if self.parse_keyword(Keyword::REPLACE) { + Some(SqliteOnConflict::Replace) + } else { + None + } + } + + pub fn parse_insert_partition(&mut self) -> Result>, ParserError> { + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partition_cols = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + Ok(partition_cols) + } else { + Ok(None) + } + } +} diff --git a/src/parser/install.rs b/src/parser/install.rs new file mode 100644 index 000000000..d1dc4007a --- /dev/null +++ b/src/parser/install.rs @@ -0,0 +1,10 @@ +use crate::parser::*; + +impl Parser<'_> { + /// `INSTALL [extension_name]` + pub fn parse_install(&mut self) -> Result { + let extension_name = self.parse_identifier(false)?; + + Ok(Statement::Install { extension_name }) + } +} diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs new file mode 100644 index 000000000..609cadccd --- /dev/null +++ b/src/parser/keyword.rs @@ -0,0 +1,22 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse a keyword-separated list of 1+ items accepted by `F` + pub fn parse_keyword_separated( + &mut self, + keyword: Keyword, + mut f: F, + ) -> Result, ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result, + { + let mut values = vec![]; + loop { + values.push(f(self)?); + if !self.parse_keyword(keyword) { + break; + } + } + Ok(values) + } +} diff --git a/src/parser/kill.rs b/src/parser/kill.rs new file mode 100644 index 000000000..d4c39cdc7 --- /dev/null +++ b/src/parser/kill.rs @@ -0,0 +1,29 @@ +use crate::parser::*; + +impl Parser<'_> { + // KILL [CONNECTION | QUERY | MUTATION] processlist_id + pub fn parse_kill(&mut self) -> Result { + let modifier_keyword = + self.parse_one_of_keywords(&[Keyword::CONNECTION, Keyword::QUERY, Keyword::MUTATION]); + + let id = self.parse_literal_uint()?; + + let modifier = match modifier_keyword { + Some(Keyword::CONNECTION) => Some(KillType::Connection), + Some(Keyword::QUERY) => Some(KillType::Query), + Some(Keyword::MUTATION) => { + if dialect_of!(self is ClickHouseDialect | GenericDialect) { + Some(KillType::Mutation) + } else { + self.expected( + "Unsupported type for KILL, allowed: CONNECTION | QUERY", + self.peek_token(), + )? + } + } + _ => None, + }; + + Ok(Statement::Kill { modifier, id }) + } +} diff --git a/src/parser/listen.rs b/src/parser/listen.rs new file mode 100644 index 000000000..ef7f99ac7 --- /dev/null +++ b/src/parser/listen.rs @@ -0,0 +1,8 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_listen(&mut self) -> Result { + let channel = self.parse_identifier(false)?; + Ok(Statement::LISTEN { channel }) + } +} diff --git a/src/parser/lists.rs b/src/parser/lists.rs new file mode 100644 index 000000000..731fd64b8 --- /dev/null +++ b/src/parser/lists.rs @@ -0,0 +1,100 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse the comma of a comma-separated syntax element. + /// Allows for control over trailing commas + /// Returns true if there is a next element + fn is_parse_comma_separated_end_with_trailing_commas(&mut self, trailing_commas: bool) -> bool { + if !self.consume_token(&Token::Comma) { + true + } else if trailing_commas { + let token = self.peek_token().token; + match token { + Token::Word(ref kw) + if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => + { + true + } + Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => { + true + } + _ => false, + } + } else { + false + } + } + + /// Parse the comma of a comma-separated syntax element. + /// Returns true if there is a next element + pub(crate) fn is_parse_comma_separated_end(&mut self) -> bool { + self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas) + } + + /// Parse a comma-separated list of 1+ items accepted by `F` + pub fn parse_comma_separated(&mut self, f: F) -> Result, ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result, + { + self.parse_comma_separated_with_trailing_commas(f, self.options.trailing_commas) + } + + /// Parse a comma-separated list of 1+ items accepted by `F` + /// Allows for control over trailing commas + pub(crate) fn parse_comma_separated_with_trailing_commas( + &mut self, + mut f: F, + trailing_commas: bool, + ) -> Result, ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result, + { + let mut values = vec![]; + loop { + values.push(f(self)?); + if self.is_parse_comma_separated_end_with_trailing_commas(trailing_commas) { + break; + } + } + Ok(values) + } + + pub fn parse_parenthesized(&mut self, mut f: F) -> Result + where + F: FnMut(&mut Parser<'a>) -> Result, + { + self.expect_token(&Token::LParen)?; + let res = f(self)?; + self.expect_token(&Token::RParen)?; + Ok(res) + } + + /// Parse a comma-separated list of 0+ items accepted by `F` + /// * `end_token` - expected end token for the closure (e.g. [Token::RParen], [Token::RBrace] ...) + pub fn parse_comma_separated0( + &mut self, + f: F, + end_token: Token, + ) -> Result, ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result, + { + if self.peek_token().token == end_token { + return Ok(vec![]); + } + + if self.options.trailing_commas && self.peek_tokens() == [Token::Comma, end_token] { + let _ = self.consume_token(&Token::Comma); + return Ok(vec![]); + } + + self.parse_comma_separated(f) + } + + pub(crate) fn parse_parenthesized_identifiers(&mut self) -> Result, ParserError> { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(|p| p.parse_identifier(false))?; + self.expect_token(&Token::RParen)?; + Ok(partitions) + } +} diff --git a/src/parser/load.rs b/src/parser/load.rs new file mode 100644 index 000000000..fca741607 --- /dev/null +++ b/src/parser/load.rs @@ -0,0 +1,50 @@ +use crate::parser::*; + +impl Parser<'_> { + /// Parse a SQL LOAD statement + pub fn parse_load(&mut self) -> Result { + if self.dialect.supports_load_extension() { + let extension_name = self.parse_identifier(false)?; + Ok(Statement::Load { extension_name }) + } else if self.parse_keyword(Keyword::DATA) && self.dialect.supports_load_data() { + let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some(); + self.expect_keyword(Keyword::INPATH)?; + let inpath = self.parse_literal_string()?; + let overwrite = self.parse_one_of_keywords(&[Keyword::OVERWRITE]).is_some(); + self.expect_keyword(Keyword::INTO)?; + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name(false)?; + let partitioned = self.parse_insert_partition()?; + let table_format = self.parse_load_data_table_format()?; + Ok(Statement::LoadData { + local, + inpath, + overwrite, + table_name, + partitioned, + table_format, + }) + } else { + self.expected( + "`DATA` or an extension name after `LOAD`", + self.peek_token(), + ) + } + } + + pub fn parse_load_data_table_format( + &mut self, + ) -> Result, ParserError> { + if self.parse_keyword(Keyword::INPUTFORMAT) { + let input_format = self.parse_expr()?; + self.expect_keyword(Keyword::SERDE)?; + let serde = self.parse_expr()?; + Ok(Some(HiveLoadDataFormat { + input_format, + serde, + })) + } else { + Ok(None) + } + } +} diff --git a/src/parser/merge.rs b/src/parser/merge.rs new file mode 100644 index 000000000..acfaca5df --- /dev/null +++ b/src/parser/merge.rs @@ -0,0 +1,123 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_merge(&mut self) -> Result { + let into = self.parse_keyword(Keyword::INTO); + + let table = self.parse_table_factor()?; + + self.expect_keyword(Keyword::USING)?; + let source = self.parse_table_factor()?; + self.expect_keyword(Keyword::ON)?; + let on = self.parse_expr()?; + let clauses = self.parse_merge_clauses()?; + + Ok(Statement::Merge { + into, + table, + source, + on: Box::new(on), + clauses, + }) + } + + pub fn parse_merge_clauses(&mut self) -> Result, ParserError> { + let mut clauses = vec![]; + loop { + if self.peek_token() == Token::EOF || self.peek_token() == Token::SemiColon { + break; + } + self.expect_keyword(Keyword::WHEN)?; + + let mut clause_kind = MergeClauseKind::Matched; + if self.parse_keyword(Keyword::NOT) { + clause_kind = MergeClauseKind::NotMatched; + } + self.expect_keyword(Keyword::MATCHED)?; + + if matches!(clause_kind, MergeClauseKind::NotMatched) + && self.parse_keywords(&[Keyword::BY, Keyword::SOURCE]) + { + clause_kind = MergeClauseKind::NotMatchedBySource; + } else if matches!(clause_kind, MergeClauseKind::NotMatched) + && self.parse_keywords(&[Keyword::BY, Keyword::TARGET]) + { + clause_kind = MergeClauseKind::NotMatchedByTarget; + } + + let predicate = if self.parse_keyword(Keyword::AND) { + Some(self.parse_expr()?) + } else { + None + }; + + self.expect_keyword(Keyword::THEN)?; + + let merge_clause = match self.parse_one_of_keywords(&[ + Keyword::UPDATE, + Keyword::INSERT, + Keyword::DELETE, + ]) { + Some(Keyword::UPDATE) => { + if matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return Err(ParserError::ParserError(format!( + "UPDATE is not allowed in a {clause_kind} merge clause" + ))); + } + self.expect_keyword(Keyword::SET)?; + MergeAction::Update { + assignments: self.parse_comma_separated(Parser::parse_assignment)?, + } + } + Some(Keyword::DELETE) => { + if matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return Err(ParserError::ParserError(format!( + "DELETE is not allowed in a {clause_kind} merge clause" + ))); + } + MergeAction::Delete + } + Some(Keyword::INSERT) => { + if !matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return Err(ParserError::ParserError(format!( + "INSERT is not allowed in a {clause_kind} merge clause" + ))); + } + let is_mysql = dialect_of!(self is MySqlDialect); + + let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; + let kind = if dialect_of!(self is BigQueryDialect | GenericDialect) + && self.parse_keyword(Keyword::ROW) + { + MergeInsertKind::Row + } else { + self.expect_keyword(Keyword::VALUES)?; + let values = self.parse_values(is_mysql)?; + MergeInsertKind::Values(values) + }; + MergeAction::Insert(MergeInsertExpr { columns, kind }) + } + _ => { + return Err(ParserError::ParserError( + "expected UPDATE, DELETE or INSERT in merge clause".to_string(), + )); + } + }; + clauses.push(MergeClause { + clause_kind, + predicate, + action: merge_clause, + }); + } + Ok(clauses) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ac76f6484..382c5b730 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -36,10 +36,71 @@ use crate::ast::helpers::stmt_create_table::{CreateTableBuilder, CreateTableConf use crate::ast::Statement::CreatePolicy; use crate::ast::*; use crate::dialect::*; -use crate::keywords::{Keyword, ALL_KEYWORDS}; +use crate::keywords::Keyword; use crate::tokenizer::*; mod alter; +mod analyze; +mod assert; +mod assignment; +mod attach; +mod cache; +mod call; +mod close; +mod columns; +mod comment; +mod commit; +mod copy; +mod create; +mod deallocate; +mod declare; +mod delete; +mod dialects; +mod discard; +mod drop; +mod end; +mod execute; +mod explain; +mod expr; +mod fetch; +mod flush; +mod grant; +mod identifier; +mod insert; +mod install; +mod keyword; +mod kill; +mod listen; +mod lists; +mod load; +mod merge; +mod msck; +mod notify; +mod optimize; +mod options; +mod pragma; +mod prepare; +mod release; +mod replace; +mod revoke; +mod rollback; +mod savepoint; +mod select; +mod set; +mod show; +mod start; +mod tokens; +mod truncate; +mod uncache; +mod unlisten; +mod unload; +mod update; +mod r#use; +mod value; +mod window; + +#[cfg(test)] +mod tests; #[derive(Debug, Clone, PartialEq, Eq)] pub enum ParserError { @@ -52,6 +113,7 @@ pub enum ParserError { type ParsedAction = (Keyword, Option>); // Use `Parser::expected` instead, if possible +#[macro_export] macro_rules! parser_err { ($MSG:expr, $loc:expr) => { Err(ParserError::ParserError(format!("{}{}", $MSG, $loc))) @@ -198,7 +260,7 @@ const DEFAULT_REMAINING_DEPTH: usize = 50; /// child type. /// /// See [Parser::parse_data_type] for details -struct MatchedTrailingBracket(bool); +pub(crate) struct MatchedTrailingBracket(bool); impl From for MatchedTrailingBracket { fn from(value: bool) -> Self { @@ -392,6 +454,11 @@ impl<'a> Parser<'a> { Ok(self.with_tokens_with_locations(tokens)) } + /// The index of the first unprocessed token. + pub fn index(&self) -> usize { + self.index + } + /// Parse potentially multiple statements /// /// Example @@ -562,13093 +629,73 @@ impl<'a> Parser<'a> { } } - pub fn parse_comment(&mut self) -> Result { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - - self.expect_keyword(Keyword::ON)?; - let token = self.next_token(); - - let (object_type, object_name) = match token.token { - Token::Word(w) if w.keyword == Keyword::COLUMN => { - (CommentObject::Column, self.parse_object_name(false)?) - } - Token::Word(w) if w.keyword == Keyword::TABLE => { - (CommentObject::Table, self.parse_object_name(false)?) - } - Token::Word(w) if w.keyword == Keyword::EXTENSION => { - (CommentObject::Extension, self.parse_object_name(false)?) - } - Token::Word(w) if w.keyword == Keyword::SCHEMA => { - (CommentObject::Schema, self.parse_object_name(false)?) - } - Token::Word(w) if w.keyword == Keyword::DATABASE => { - (CommentObject::Database, self.parse_object_name(false)?) - } - Token::Word(w) if w.keyword == Keyword::USER => { - (CommentObject::User, self.parse_object_name(false)?) - } - Token::Word(w) if w.keyword == Keyword::ROLE => { - (CommentObject::Role, self.parse_object_name(false)?) - } - _ => self.expected("comment object_type", token)?, - }; - - self.expect_keyword(Keyword::IS)?; - let comment = if self.parse_keyword(Keyword::NULL) { - None - } else { - Some(self.parse_literal_string()?) - }; - Ok(Statement::Comment { - object_type, - object_name, - comment, - if_exists, - }) - } - - pub fn parse_flush(&mut self) -> Result { - let mut channel = None; - let mut tables: Vec = vec![]; - let mut read_lock = false; - let mut export = false; - - if !dialect_of!(self is MySqlDialect | GenericDialect) { - return parser_err!("Unsupported statement FLUSH", self.peek_token().span.start); - } - - let location = if self.parse_keyword(Keyword::NO_WRITE_TO_BINLOG) { - Some(FlushLocation::NoWriteToBinlog) - } else if self.parse_keyword(Keyword::LOCAL) { - Some(FlushLocation::Local) - } else { - None - }; - - let object_type = if self.parse_keywords(&[Keyword::BINARY, Keyword::LOGS]) { - FlushType::BinaryLogs - } else if self.parse_keywords(&[Keyword::ENGINE, Keyword::LOGS]) { - FlushType::EngineLogs - } else if self.parse_keywords(&[Keyword::ERROR, Keyword::LOGS]) { - FlushType::ErrorLogs - } else if self.parse_keywords(&[Keyword::GENERAL, Keyword::LOGS]) { - FlushType::GeneralLogs - } else if self.parse_keywords(&[Keyword::HOSTS]) { - FlushType::Hosts - } else if self.parse_keyword(Keyword::PRIVILEGES) { - FlushType::Privileges - } else if self.parse_keyword(Keyword::OPTIMIZER_COSTS) { - FlushType::OptimizerCosts - } else if self.parse_keywords(&[Keyword::RELAY, Keyword::LOGS]) { - if self.parse_keywords(&[Keyword::FOR, Keyword::CHANNEL]) { - channel = Some(self.parse_object_name(false).unwrap().to_string()); - } - FlushType::RelayLogs - } else if self.parse_keywords(&[Keyword::SLOW, Keyword::LOGS]) { - FlushType::SlowLogs - } else if self.parse_keyword(Keyword::STATUS) { - FlushType::Status - } else if self.parse_keyword(Keyword::USER_RESOURCES) { - FlushType::UserResources - } else if self.parse_keywords(&[Keyword::LOGS]) { - FlushType::Logs - } else if self.parse_keywords(&[Keyword::TABLES]) { - loop { - let next_token = self.next_token(); - match &next_token.token { - Token::Word(w) => match w.keyword { - Keyword::WITH => { - read_lock = self.parse_keywords(&[Keyword::READ, Keyword::LOCK]); - } - Keyword::FOR => { - export = self.parse_keyword(Keyword::EXPORT); - } - Keyword::NoKeyword => { - self.prev_token(); - tables = self.parse_comma_separated(|p| p.parse_object_name(false))?; - } - _ => {} - }, - _ => { - break; - } - } - } - - FlushType::Tables - } else { - return self.expected( - "BINARY LOGS, ENGINE LOGS, ERROR LOGS, GENERAL LOGS, HOSTS, LOGS, PRIVILEGES, OPTIMIZER_COSTS,\ - RELAY LOGS [FOR CHANNEL channel], SLOW LOGS, STATUS, USER_RESOURCES", - self.peek_token(), - ); - }; - - Ok(Statement::Flush { - object_type, - location, - channel, - read_lock, - export, - tables, - }) - } - - pub fn parse_msck(&mut self) -> Result { - let repair = self.parse_keyword(Keyword::REPAIR); - self.expect_keyword(Keyword::TABLE)?; - let table_name = self.parse_object_name(false)?; - let partition_action = self - .maybe_parse(|parser| { - let pa = match parser.parse_one_of_keywords(&[ - Keyword::ADD, - Keyword::DROP, - Keyword::SYNC, - ]) { - Some(Keyword::ADD) => Some(AddDropSync::ADD), - Some(Keyword::DROP) => Some(AddDropSync::DROP), - Some(Keyword::SYNC) => Some(AddDropSync::SYNC), - _ => None, - }; - parser.expect_keyword(Keyword::PARTITIONS)?; - Ok(pa) - })? - .unwrap_or_default(); - Ok(Statement::Msck { - repair, - table_name, - partition_action, - }) - } - - pub fn parse_truncate(&mut self) -> Result { - let table = self.parse_keyword(Keyword::TABLE); - let only = self.parse_keyword(Keyword::ONLY); - - let table_names = self - .parse_comma_separated(|p| p.parse_object_name(false))? - .into_iter() - .map(|n| TruncateTableTarget { name: n }) - .collect(); - - let mut partitions = None; - if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); - self.expect_token(&Token::RParen)?; - } - - let mut identity = None; - let mut cascade = None; - - if dialect_of!(self is PostgreSqlDialect | GenericDialect) { - identity = if self.parse_keywords(&[Keyword::RESTART, Keyword::IDENTITY]) { - Some(TruncateIdentityOption::Restart) - } else if self.parse_keywords(&[Keyword::CONTINUE, Keyword::IDENTITY]) { - Some(TruncateIdentityOption::Continue) - } else { - None - }; - - cascade = if self.parse_keyword(Keyword::CASCADE) { - Some(TruncateCascadeOption::Cascade) - } else if self.parse_keyword(Keyword::RESTRICT) { - Some(TruncateCascadeOption::Restrict) - } else { - None - }; - }; - - let on_cluster = self.parse_optional_on_cluster()?; - - Ok(Statement::Truncate { - table_names, - partitions, - table, - only, - identity, - cascade, - on_cluster, - }) - } - - pub fn parse_attach_duckdb_database_options( - &mut self, - ) -> Result, ParserError> { - if !self.consume_token(&Token::LParen) { - return Ok(vec![]); - } - - let mut options = vec![]; - loop { - if self.parse_keyword(Keyword::READ_ONLY) { - let boolean = if self.parse_keyword(Keyword::TRUE) { - Some(true) - } else if self.parse_keyword(Keyword::FALSE) { - Some(false) - } else { - None - }; - options.push(AttachDuckDBDatabaseOption::ReadOnly(boolean)); - } else if self.parse_keyword(Keyword::TYPE) { - let ident = self.parse_identifier(false)?; - options.push(AttachDuckDBDatabaseOption::Type(ident)); - } else { - return self.expected("expected one of: ), READ_ONLY, TYPE", self.peek_token()); - }; - - if self.consume_token(&Token::RParen) { - return Ok(options); - } else if self.consume_token(&Token::Comma) { - continue; - } else { - return self.expected("expected one of: ')', ','", self.peek_token()); - } - } - } - - pub fn parse_attach_duckdb_database(&mut self) -> Result { - let database = self.parse_keyword(Keyword::DATABASE); - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let database_path = self.parse_identifier(false)?; - let database_alias = if self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - let attach_options = self.parse_attach_duckdb_database_options()?; - Ok(Statement::AttachDuckDBDatabase { - if_not_exists, - database, - database_path, - database_alias, - attach_options, - }) - } - - pub fn parse_detach_duckdb_database(&mut self) -> Result { - let database = self.parse_keyword(Keyword::DATABASE); - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let database_alias = self.parse_identifier(false)?; - Ok(Statement::DetachDuckDBDatabase { - if_exists, - database, - database_alias, - }) - } - - pub fn parse_attach_database(&mut self) -> Result { - let database = self.parse_keyword(Keyword::DATABASE); - let database_file_name = self.parse_expr()?; - self.expect_keyword(Keyword::AS)?; - let schema_name = self.parse_identifier(false)?; - Ok(Statement::AttachDatabase { - database, - schema_name, - database_file_name, + fn parse(s: String, loc: Location) -> Result + where + ::Err: Display, + { + s.parse::().map_err(|e| { + ParserError::ParserError(format!( + "Could not parse '{s}' as {}: {e}{loc}", + core::any::type_name::() + )) }) } - pub fn parse_analyze(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; - let table_name = self.parse_object_name(false)?; - let mut for_columns = false; - let mut cache_metadata = false; - let mut noscan = false; - let mut partitions = None; - let mut compute_statistics = false; - let mut columns = vec![]; - loop { - match self.parse_one_of_keywords(&[ - Keyword::PARTITION, - Keyword::FOR, - Keyword::CACHE, - Keyword::NOSCAN, - Keyword::COMPUTE, - ]) { - Some(Keyword::PARTITION) => { - self.expect_token(&Token::LParen)?; - partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); - self.expect_token(&Token::RParen)?; - } - Some(Keyword::NOSCAN) => noscan = true, - Some(Keyword::FOR) => { - self.expect_keyword(Keyword::COLUMNS)?; - - columns = self - .maybe_parse(|parser| { - parser.parse_comma_separated(|p| p.parse_identifier(false)) - })? - .unwrap_or_default(); - for_columns = true - } - Some(Keyword::CACHE) => { - self.expect_keyword(Keyword::METADATA)?; - cache_metadata = true - } - Some(Keyword::COMPUTE) => { - self.expect_keyword(Keyword::STATISTICS)?; - compute_statistics = true - } - _ => break, - } + /// Run a parser method `f`, reverting back to the current position if unsuccessful. + /// Returns `None` if `f` returns an error + pub fn maybe_parse(&mut self, f: F) -> Result, ParserError> + where + F: FnMut(&mut Parser) -> Result, + { + match self.try_parse(f) { + Ok(t) => Ok(Some(t)), + Err(ParserError::RecursionLimitExceeded) => Err(ParserError::RecursionLimitExceeded), + _ => Ok(None), } - - Ok(Statement::Analyze { - table_name, - for_columns, - columns, - partitions, - cache_metadata, - noscan, - compute_statistics, - }) } - /// Parse a new expression including wildcard & qualified wildcard. - pub fn parse_wildcard_expr(&mut self) -> Result { + /// Run a parser method `f`, reverting back to the current position if unsuccessful. + pub fn try_parse(&mut self, mut f: F) -> Result + where + F: FnMut(&mut Parser) -> Result, + { let index = self.index; - - let next_token = self.next_token(); - match next_token.token { - t @ (Token::Word(_) | Token::SingleQuotedString(_)) => { - if self.peek_token().token == Token::Period { - let mut id_parts: Vec = vec![match t { - Token::Word(w) => w.to_ident(next_token.span), - Token::SingleQuotedString(s) => Ident::with_quote('\'', s), - _ => unreachable!(), // We matched above - }]; - - while self.consume_token(&Token::Period) { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => id_parts.push(w.to_ident(next_token.span)), - Token::SingleQuotedString(s) => { - // SQLite has single-quoted identifiers - id_parts.push(Ident::with_quote('\'', s)) - } - Token::Mul => { - return Ok(Expr::QualifiedWildcard( - ObjectName(id_parts), - AttachedToken(next_token), - )); - } - _ => { - return self - .expected("an identifier or a '*' after '.'", next_token); - } - } - } - } - } - Token::Mul => { - return Ok(Expr::Wildcard(AttachedToken(next_token))); - } - _ => (), - }; - - self.index = index; - self.parse_expr() - } - - /// Parse a new expression. - pub fn parse_expr(&mut self) -> Result { - self.parse_subexpr(self.dialect.prec_unknown()) - } - - /// Parse tokens until the precedence changes. - pub fn parse_subexpr(&mut self, precedence: u8) -> Result { - let _guard = self.recursion_counter.try_decrease()?; - debug!("parsing expr"); - let mut expr = self.parse_prefix()?; - debug!("prefix: {:?}", expr); - loop { - let next_precedence = self.get_next_precedence()?; - debug!("next precedence: {:?}", next_precedence); - - if precedence >= next_precedence { - break; - } - - expr = self.parse_infix(expr, next_precedence)?; - } - Ok(expr) - } - - pub fn parse_assert(&mut self) -> Result { - let condition = self.parse_expr()?; - let message = if self.parse_keyword(Keyword::AS) { - Some(self.parse_expr()?) - } else { - None - }; - - Ok(Statement::Assert { condition, message }) - } - - pub fn parse_savepoint(&mut self) -> Result { - let name = self.parse_identifier(false)?; - Ok(Statement::Savepoint { name }) - } - - pub fn parse_release(&mut self) -> Result { - let _ = self.parse_keyword(Keyword::SAVEPOINT); - let name = self.parse_identifier(false)?; - - Ok(Statement::ReleaseSavepoint { name }) - } - - pub fn parse_listen(&mut self) -> Result { - let channel = self.parse_identifier(false)?; - Ok(Statement::LISTEN { channel }) - } - - pub fn parse_unlisten(&mut self) -> Result { - let channel = if self.consume_token(&Token::Mul) { - Ident::new(Expr::Wildcard(AttachedToken::empty()).to_string()) - } else { - match self.parse_identifier(false) { - Ok(expr) => expr, - _ => { - self.prev_token(); - return self.expected("wildcard or identifier", self.peek_token()); - } - } - }; - Ok(Statement::UNLISTEN { channel }) - } - - pub fn parse_notify(&mut self) -> Result { - let channel = self.parse_identifier(false)?; - let payload = if self.consume_token(&Token::Comma) { - Some(self.parse_literal_string()?) - } else { - None - }; - Ok(Statement::NOTIFY { channel, payload }) - } - - // Tries to parse an expression by matching the specified word to known keywords that have a special meaning in the dialect. - // Returns `None if no match is found. - fn parse_expr_prefix_by_reserved_word( - &mut self, - w: &Word, - w_span: Span, - ) -> Result, ParserError> { - match w.keyword { - Keyword::TRUE | Keyword::FALSE if self.dialect.supports_boolean_literals() => { - self.prev_token(); - Ok(Some(Expr::Value(self.parse_value()?))) - } - Keyword::NULL => { - self.prev_token(); - Ok(Some(Expr::Value(self.parse_value()?))) - } - Keyword::CURRENT_CATALOG - | Keyword::CURRENT_USER - | Keyword::SESSION_USER - | Keyword::USER - if dialect_of!(self is PostgreSqlDialect | GenericDialect) => - { - Ok(Some(Expr::Function(Function { - name: ObjectName(vec![w.to_ident(w_span)]), - parameters: FunctionArguments::None, - args: FunctionArguments::None, - null_treatment: None, - filter: None, - over: None, - within_group: vec![], - }))) - } - Keyword::CURRENT_TIMESTAMP - | Keyword::CURRENT_TIME - | Keyword::CURRENT_DATE - | Keyword::LOCALTIME - | Keyword::LOCALTIMESTAMP => { - Ok(Some(self.parse_time_functions(ObjectName(vec![w.to_ident(w_span)]))?)) - } - Keyword::CASE => Ok(Some(self.parse_case_expr()?)), - Keyword::CONVERT => Ok(Some(self.parse_convert_expr(false)?)), - Keyword::TRY_CONVERT if self.dialect.supports_try_convert() => Ok(Some(self.parse_convert_expr(true)?)), - Keyword::CAST => Ok(Some(self.parse_cast_expr(CastKind::Cast)?)), - Keyword::TRY_CAST => Ok(Some(self.parse_cast_expr(CastKind::TryCast)?)), - Keyword::SAFE_CAST => Ok(Some(self.parse_cast_expr(CastKind::SafeCast)?)), - Keyword::EXISTS - // Support parsing Databricks has a function named `exists`. - if !dialect_of!(self is DatabricksDialect) - || matches!( - self.peek_nth_token(1).token, - Token::Word(Word { - keyword: Keyword::SELECT | Keyword::WITH, - .. - }) - ) => - { - Ok(Some(self.parse_exists_expr(false)?)) - } - Keyword::EXTRACT => Ok(Some(self.parse_extract_expr()?)), - Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)), - Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)), - Keyword::POSITION if self.peek_token().token == Token::LParen => { - Ok(Some(self.parse_position_expr(w.to_ident(w_span))?)) - } - Keyword::SUBSTRING => Ok(Some(self.parse_substring_expr()?)), - Keyword::OVERLAY => Ok(Some(self.parse_overlay_expr()?)), - Keyword::TRIM => Ok(Some(self.parse_trim_expr()?)), - Keyword::INTERVAL => Ok(Some(self.parse_interval()?)), - // Treat ARRAY[1,2,3] as an array [1,2,3], otherwise try as subquery or a function call - Keyword::ARRAY if self.peek_token() == Token::LBracket => { - self.expect_token(&Token::LBracket)?; - Ok(Some(self.parse_array_expr(true)?)) - } - Keyword::ARRAY - if self.peek_token() == Token::LParen - && !dialect_of!(self is ClickHouseDialect | DatabricksDialect) => - { - self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - self.expect_token(&Token::RParen)?; - Ok(Some(Expr::Function(Function { - name: ObjectName(vec![w.to_ident(w_span)]), - parameters: FunctionArguments::None, - args: FunctionArguments::Subquery(query), - filter: None, - null_treatment: None, - over: None, - within_group: vec![], - }))) - } - Keyword::NOT => Ok(Some(self.parse_not()?)), - Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { - Ok(Some(self.parse_match_against()?)) - } - Keyword::STRUCT if self.dialect.supports_struct_literal() => { - Ok(Some(self.parse_struct_literal()?)) - } - Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => { - let expr = self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?; - Ok(Some(Expr::Prior(Box::new(expr)))) - } - Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => { - Ok(Some(self.parse_duckdb_map_literal()?)) - } - _ => Ok(None) - } - } - - // Tries to parse an expression by a word that is not known to have a special meaning in the dialect. - fn parse_expr_prefix_by_unreserved_word( - &mut self, - w: &Word, - w_span: Span, - ) -> Result { - match self.peek_token().token { - Token::LParen | Token::Period => { - let mut id_parts: Vec = vec![w.to_ident(w_span)]; - let mut ending_wildcard: Option = None; - while self.consume_token(&Token::Period) { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => id_parts.push(w.to_ident(next_token.span)), - Token::Mul => { - // Postgres explicitly allows funcnm(tablenm.*) and the - // function array_agg traverses this control flow - if dialect_of!(self is PostgreSqlDialect) { - ending_wildcard = Some(next_token); - break; - } else { - return self.expected("an identifier after '.'", next_token); - } - } - Token::SingleQuotedString(s) => id_parts.push(Ident::with_quote('\'', s)), - _ => { - return self.expected("an identifier or a '*' after '.'", next_token); - } - } - } - - if let Some(wildcard_token) = ending_wildcard { - Ok(Expr::QualifiedWildcard( - ObjectName(id_parts), - AttachedToken(wildcard_token), - )) - } else if self.consume_token(&Token::LParen) { - if dialect_of!(self is SnowflakeDialect | MsSqlDialect) - && self.consume_tokens(&[Token::Plus, Token::RParen]) - { - Ok(Expr::OuterJoin(Box::new( - match <[Ident; 1]>::try_from(id_parts) { - Ok([ident]) => Expr::Identifier(ident), - Err(parts) => Expr::CompoundIdentifier(parts), - }, - ))) - } else { - self.prev_token(); - self.parse_function(ObjectName(id_parts)) - } - } else { - Ok(Expr::CompoundIdentifier(id_parts)) - } - } - // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html - Token::SingleQuotedString(_) - | Token::DoubleQuotedString(_) - | Token::HexStringLiteral(_) - if w.value.starts_with('_') => - { - Ok(Expr::IntroducedString { - introducer: w.value.clone(), - value: self.parse_introduced_string_value()?, - }) - } - Token::Arrow if self.dialect.supports_lambda_functions() => { - self.expect_token(&Token::Arrow)?; - Ok(Expr::Lambda(LambdaFunction { - params: OneOrManyWithParens::One(w.to_ident(w_span)), - body: Box::new(self.parse_expr()?), - })) + match f(self) { + Ok(t) => Ok(t), + Err(e) => { + // Unwind stack if limit exceeded + self.index = index; + Err(e) } - _ => Ok(Expr::Identifier(w.to_ident(w_span))), } } - /// Parse an expression prefix. - pub fn parse_prefix(&mut self) -> Result { - // allow the dialect to override prefix parsing - if let Some(prefix) = self.dialect.parse_prefix(self) { - return prefix; - } - - // PostgreSQL allows any string literal to be preceded by a type name, indicating that the - // string literal represents a literal of that type. Some examples: - // - // DATE '2020-05-20' - // TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54' - // BOOL 'true' - // - // The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating - // matters is the fact that INTERVAL string literals may optionally be followed by special - // keywords, e.g.: - // - // INTERVAL '7' DAY - // - // Note also that naively `SELECT date` looks like a syntax error because the `date` type - // name is not followed by a string literal, but in fact in PostgreSQL it is a valid - // expression that should parse as the column name "date". - let loc = self.peek_token().span.start; - let opt_expr = self.maybe_parse(|parser| { - match parser.parse_data_type()? { - DataType::Interval => parser.parse_interval(), - // PostgreSQL allows almost any identifier to be used as custom data type name, - // and we support that in `parse_data_type()`. But unlike Postgres we don't - // have a list of globally reserved keywords (since they vary across dialects), - // so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type - // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of - // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the - // `type 'string'` syntax for the custom data types at all. - DataType::Custom(..) => parser_err!("dummy", loc), - data_type => Ok(Expr::TypedString { - data_type, - value: parser.parse_literal_string()?, - }), - } - })?; - - if let Some(expr) = opt_expr { - return Ok(expr); - } - - let next_token = self.next_token(); - let expr = match next_token.token { - Token::Word(w) => { - // The word we consumed may fall into one of two cases: it has a special meaning, or not. - // For example, in Snowflake, the word `interval` may have two meanings depending on the context: - // `SELECT CURRENT_DATE() + INTERVAL '1 DAY', MAX(interval) FROM tbl;` - // ^^^^^^^^^^^^^^^^ ^^^^^^^^ - // interval expression identifier - // - // We first try to parse the word and following tokens as a special expression, and if that fails, - // we rollback and try to parse it as an identifier. - match self.try_parse(|parser| { - parser.parse_expr_prefix_by_reserved_word(&w, next_token.span) - }) { - // This word indicated an expression prefix and parsing was successful - Ok(Some(expr)) => Ok(expr), - - // No expression prefix associated with this word - Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, next_token.span)?), - - // If parsing of the word as a special expression failed, we are facing two options: - // 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI` (`DAI` instead of `DAY`) - // 2. The word is used as an identifier, e.g. `SELECT MAX(interval) FROM tbl` - // We first try to parse the word as an identifier and if that fails - // we rollback and return the parsing error we got from trying to parse a - // special expression (to maintain backwards compatibility of parsing errors). - Err(e) => { - if !self.dialect.is_reserved_for_identifier(w.keyword) { - if let Ok(Some(expr)) = self.maybe_parse(|parser| { - parser.parse_expr_prefix_by_unreserved_word(&w, next_token.span) - }) { - return Ok(expr); - } - } - return Err(e); - } - } - } // End of Token::Word - // array `[1, 2, 3]` - Token::LBracket => self.parse_array_expr(false), - tok @ Token::Minus | tok @ Token::Plus => { - let op = if tok == Token::Plus { - UnaryOperator::Plus - } else { - UnaryOperator::Minus - }; - Ok(Expr::UnaryOp { - op, - expr: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::MulDivModOp))?, - ), - }) - } - Token::ExclamationMark if self.dialect.supports_bang_not_operator() => { - Ok(Expr::UnaryOp { - op: UnaryOperator::BangNot, - expr: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?, - ), - }) - } - tok @ Token::DoubleExclamationMark - | tok @ Token::PGSquareRoot - | tok @ Token::PGCubeRoot - | tok @ Token::AtSign - | tok @ Token::Tilde - if dialect_of!(self is PostgreSqlDialect) => - { - let op = match tok { - Token::DoubleExclamationMark => UnaryOperator::PGPrefixFactorial, - Token::PGSquareRoot => UnaryOperator::PGSquareRoot, - Token::PGCubeRoot => UnaryOperator::PGCubeRoot, - Token::AtSign => UnaryOperator::PGAbs, - Token::Tilde => UnaryOperator::PGBitwiseNot, - _ => unreachable!(), - }; - Ok(Expr::UnaryOp { - op, - expr: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?, - ), - }) - } - Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => - { - self.prev_token(); - Ok(Expr::Value(self.parse_value()?)) - } - Token::UnicodeStringLiteral(_) => { - self.prev_token(); - Ok(Expr::Value(self.parse_value()?)) - } - Token::Number(_, _) - | Token::SingleQuotedString(_) - | Token::DoubleQuotedString(_) - | Token::TripleSingleQuotedString(_) - | Token::TripleDoubleQuotedString(_) - | Token::DollarQuotedString(_) - | Token::SingleQuotedByteStringLiteral(_) - | Token::DoubleQuotedByteStringLiteral(_) - | Token::TripleSingleQuotedByteStringLiteral(_) - | Token::TripleDoubleQuotedByteStringLiteral(_) - | Token::SingleQuotedRawStringLiteral(_) - | Token::DoubleQuotedRawStringLiteral(_) - | Token::TripleSingleQuotedRawStringLiteral(_) - | Token::TripleDoubleQuotedRawStringLiteral(_) - | Token::NationalStringLiteral(_) - | Token::HexStringLiteral(_) => { - self.prev_token(); - Ok(Expr::Value(self.parse_value()?)) - } - Token::LParen => { - let expr = if let Some(expr) = self.try_parse_expr_sub_query()? { - expr - } else if let Some(lambda) = self.try_parse_lambda()? { - return Ok(lambda); - } else { - let exprs = self.parse_comma_separated(Parser::parse_expr)?; - match exprs.len() { - 0 => unreachable!(), // parse_comma_separated ensures 1 or more - 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), - _ => Expr::Tuple(exprs), - } - }; - self.expect_token(&Token::RParen)?; - let expr = self.try_parse_method(expr)?; - if !self.consume_token(&Token::Period) { - Ok(expr) - } else { - let tok = self.next_token(); - let key = match tok.token { - Token::Word(word) => word.to_ident(tok.span), - _ => { - return parser_err!( - format!("Expected identifier, found: {tok}"), - tok.span.start - ) - } - }; - Ok(Expr::CompositeAccess { - expr: Box::new(expr), - key, - }) - } - } - Token::Placeholder(_) | Token::Colon | Token::AtSign => { - self.prev_token(); - Ok(Expr::Value(self.parse_value()?)) - } - Token::LBrace if self.dialect.supports_dictionary_syntax() => { - self.prev_token(); - self.parse_duckdb_struct_literal() - } - _ => self.expected("an expression", next_token), - }?; - - let expr = self.try_parse_method(expr)?; - - if self.parse_keyword(Keyword::COLLATE) { - Ok(Expr::Collate { - expr: Box::new(expr), - collation: self.parse_object_name(false)?, - }) + fn parse_optional_on_cluster(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::ON, Keyword::CLUSTER]) { + Ok(Some(self.parse_identifier(false)?)) } else { - Ok(expr) + Ok(None) } } - pub fn parse_utility_options(&mut self) -> Result, ParserError> { + // Apparently this is dead code? I assume its retained for public API + // compatibility or something of that nature. + pub fn parse_precision(&mut self) -> Result { self.expect_token(&Token::LParen)?; - let options = self.parse_comma_separated(Self::parse_utility_option)?; + let n = self.parse_literal_uint()?; self.expect_token(&Token::RParen)?; - - Ok(options) - } - - fn parse_utility_option(&mut self) -> Result { - let name = self.parse_identifier(false)?; - - let next_token = self.peek_token(); - if next_token == Token::Comma || next_token == Token::RParen { - return Ok(UtilityOption { name, arg: None }); - } - let arg = self.parse_expr()?; - - Ok(UtilityOption { - name, - arg: Some(arg), - }) - } - - fn try_parse_expr_sub_query(&mut self) -> Result, ParserError> { - if !self.peek_sub_query() { - return Ok(None); - } - - Ok(Some(Expr::Subquery(self.parse_query()?))) - } - - fn try_parse_lambda(&mut self) -> Result, ParserError> { - if !self.dialect.supports_lambda_functions() { - return Ok(None); - } - self.maybe_parse(|p| { - let params = p.parse_comma_separated(|p| p.parse_identifier(false))?; - p.expect_token(&Token::RParen)?; - p.expect_token(&Token::Arrow)?; - let expr = p.parse_expr()?; - Ok(Expr::Lambda(LambdaFunction { - params: OneOrManyWithParens::Many(params), - body: Box::new(expr), - })) - }) - } - - /// Parses method call expression - fn try_parse_method(&mut self, expr: Expr) -> Result { - if !self.dialect.supports_methods() { - return Ok(expr); - } - let method_chain = self.maybe_parse(|p| { - let mut method_chain = Vec::new(); - while p.consume_token(&Token::Period) { - let tok = p.next_token(); - let name = match tok.token { - Token::Word(word) => word.to_ident(tok.span), - _ => return p.expected("identifier", tok), - }; - let func = match p.parse_function(ObjectName(vec![name]))? { - Expr::Function(func) => func, - _ => return p.expected("function", p.peek_token()), - }; - method_chain.push(func); - } - if !method_chain.is_empty() { - Ok(method_chain) - } else { - p.expected("function", p.peek_token()) - } - })?; - if let Some(method_chain) = method_chain { - Ok(Expr::Method(Method { - expr: Box::new(expr), - method_chain, - })) - } else { - Ok(expr) - } + Ok(n) } - pub fn parse_function(&mut self, name: ObjectName) -> Result { - self.expect_token(&Token::LParen)?; - - // Snowflake permits a subquery to be passed as an argument without - // an enclosing set of parens if it's the only argument. - if dialect_of!(self is SnowflakeDialect) && self.peek_sub_query() { - let subquery = self.parse_query()?; - self.expect_token(&Token::RParen)?; - return Ok(Expr::Function(Function { - name, - parameters: FunctionArguments::None, - args: FunctionArguments::Subquery(subquery), - filter: None, - null_treatment: None, - over: None, - within_group: vec![], - })); - } - - let mut args = self.parse_function_argument_list()?; - let mut parameters = FunctionArguments::None; - // ClickHouse aggregations support parametric functions like `HISTOGRAM(0.5, 0.6)(x, y)` - // which (0.5, 0.6) is a parameter to the function. - if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.consume_token(&Token::LParen) + /// Returns true if the next keyword indicates a sub query, i.e. SELECT or WITH + fn peek_sub_query(&mut self) -> bool { + if self + .parse_one_of_keywords(&[Keyword::SELECT, Keyword::WITH]) + .is_some() { - parameters = FunctionArguments::List(args); - args = self.parse_function_argument_list()?; - } - - let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { - self.expect_token(&Token::LParen)?; - self.expect_keywords(&[Keyword::ORDER, Keyword::BY])?; - let order_by = self.parse_comma_separated(Parser::parse_order_by_expr)?; - self.expect_token(&Token::RParen)?; - order_by - } else { - vec![] - }; - - let filter = if self.dialect.supports_filter_during_aggregation() - && self.parse_keyword(Keyword::FILTER) - && self.consume_token(&Token::LParen) - && self.parse_keyword(Keyword::WHERE) - { - let filter = Some(Box::new(self.parse_expr()?)); - self.expect_token(&Token::RParen)?; - filter - } else { - None - }; - - // Syntax for null treatment shows up either in the args list - // or after the function call, but not both. - let null_treatment = if args - .clauses - .iter() - .all(|clause| !matches!(clause, FunctionArgumentClause::IgnoreOrRespectNulls(_))) - { - self.parse_null_treatment()? - } else { - None - }; - - let over = if self.parse_keyword(Keyword::OVER) { - if self.consume_token(&Token::LParen) { - let window_spec = self.parse_window_spec()?; - Some(WindowType::WindowSpec(window_spec)) - } else { - Some(WindowType::NamedWindow(self.parse_identifier(false)?)) - } - } else { - None - }; - - Ok(Expr::Function(Function { - name, - parameters, - args: FunctionArguments::List(args), - null_treatment, - filter, - over, - within_group, - })) - } - - /// Optionally parses a null treatment clause. - fn parse_null_treatment(&mut self) -> Result, ParserError> { - match self.parse_one_of_keywords(&[Keyword::RESPECT, Keyword::IGNORE]) { - Some(keyword) => { - self.expect_keyword(Keyword::NULLS)?; - - Ok(match keyword { - Keyword::RESPECT => Some(NullTreatment::RespectNulls), - Keyword::IGNORE => Some(NullTreatment::IgnoreNulls), - _ => None, - }) - } - None => Ok(None), - } - } - - pub fn parse_time_functions(&mut self, name: ObjectName) -> Result { - let args = if self.consume_token(&Token::LParen) { - FunctionArguments::List(self.parse_function_argument_list()?) - } else { - FunctionArguments::None - }; - Ok(Expr::Function(Function { - name, - parameters: FunctionArguments::None, - args, - filter: None, - over: None, - null_treatment: None, - within_group: vec![], - })) - } - - pub fn parse_window_frame_units(&mut self) -> Result { - let next_token = self.next_token(); - match &next_token.token { - Token::Word(w) => match w.keyword { - Keyword::ROWS => Ok(WindowFrameUnits::Rows), - Keyword::RANGE => Ok(WindowFrameUnits::Range), - Keyword::GROUPS => Ok(WindowFrameUnits::Groups), - _ => self.expected("ROWS, RANGE, GROUPS", next_token)?, - }, - _ => self.expected("ROWS, RANGE, GROUPS", next_token), - } - } - - pub fn parse_window_frame(&mut self) -> Result { - let units = self.parse_window_frame_units()?; - let (start_bound, end_bound) = if self.parse_keyword(Keyword::BETWEEN) { - let start_bound = self.parse_window_frame_bound()?; - self.expect_keyword(Keyword::AND)?; - let end_bound = Some(self.parse_window_frame_bound()?); - (start_bound, end_bound) - } else { - (self.parse_window_frame_bound()?, None) - }; - Ok(WindowFrame { - units, - start_bound, - end_bound, - }) - } - - /// Parse `CURRENT ROW` or `{ | UNBOUNDED } { PRECEDING | FOLLOWING }` - pub fn parse_window_frame_bound(&mut self) -> Result { - if self.parse_keywords(&[Keyword::CURRENT, Keyword::ROW]) { - Ok(WindowFrameBound::CurrentRow) - } else { - let rows = if self.parse_keyword(Keyword::UNBOUNDED) { - None - } else { - Some(Box::new(match self.peek_token().token { - Token::SingleQuotedString(_) => self.parse_interval()?, - _ => self.parse_expr()?, - })) - }; - if self.parse_keyword(Keyword::PRECEDING) { - Ok(WindowFrameBound::Preceding(rows)) - } else if self.parse_keyword(Keyword::FOLLOWING) { - Ok(WindowFrameBound::Following(rows)) - } else { - self.expected("PRECEDING or FOLLOWING", self.peek_token()) - } - } - } - - /// Parse a group by expr. Group by expr can be one of group sets, roll up, cube, or simple expr. - fn parse_group_by_expr(&mut self) -> Result { - if self.dialect.supports_group_by_expr() { - if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { - self.expect_token(&Token::LParen)?; - let result = self.parse_comma_separated(|p| p.parse_tuple(false, true))?; - self.expect_token(&Token::RParen)?; - Ok(Expr::GroupingSets(result)) - } else if self.parse_keyword(Keyword::CUBE) { - self.expect_token(&Token::LParen)?; - let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Cube(result)) - } else if self.parse_keyword(Keyword::ROLLUP) { - self.expect_token(&Token::LParen)?; - let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Rollup(result)) - } else if self.consume_tokens(&[Token::LParen, Token::RParen]) { - // PostgreSQL allow to use empty tuple as a group by expression, - // e.g. `GROUP BY (), name`. Please refer to GROUP BY Clause section in - // [PostgreSQL](https://www.postgresql.org/docs/16/sql-select.html) - Ok(Expr::Tuple(vec![])) - } else { - self.parse_expr() - } - } else { - // TODO parse rollup for other dialects - self.parse_expr() - } - } - - /// Parse a tuple with `(` and `)`. - /// If `lift_singleton` is true, then a singleton tuple is lifted to a tuple of length 1, otherwise it will fail. - /// If `allow_empty` is true, then an empty tuple is allowed. - fn parse_tuple( - &mut self, - lift_singleton: bool, - allow_empty: bool, - ) -> Result, ParserError> { - if lift_singleton { - if self.consume_token(&Token::LParen) { - let result = if allow_empty && self.consume_token(&Token::RParen) { - vec![] - } else { - let result = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - result - }; - Ok(result) - } else { - Ok(vec![self.parse_expr()?]) - } - } else { - self.expect_token(&Token::LParen)?; - let result = if allow_empty && self.consume_token(&Token::RParen) { - vec![] - } else { - let result = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - result - }; - Ok(result) - } - } - - pub fn parse_case_expr(&mut self) -> Result { - let mut operand = None; - if !self.parse_keyword(Keyword::WHEN) { - operand = Some(Box::new(self.parse_expr()?)); - self.expect_keyword(Keyword::WHEN)?; - } - let mut conditions = vec![]; - let mut results = vec![]; - loop { - conditions.push(self.parse_expr()?); - self.expect_keyword(Keyword::THEN)?; - results.push(self.parse_expr()?); - if !self.parse_keyword(Keyword::WHEN) { - break; - } - } - let else_result = if self.parse_keyword(Keyword::ELSE) { - Some(Box::new(self.parse_expr()?)) - } else { - None - }; - self.expect_keyword(Keyword::END)?; - Ok(Expr::Case { - operand, - conditions, - results, - else_result, - }) - } - - pub fn parse_optional_cast_format(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::FORMAT) { - let value = self.parse_value()?; - match self.parse_optional_time_zone()? { - Some(tz) => Ok(Some(CastFormat::ValueAtTimeZone(value, tz))), - None => Ok(Some(CastFormat::Value(value))), - } - } else { - Ok(None) - } - } - - pub fn parse_optional_time_zone(&mut self) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) { - self.parse_value().map(Some) - } else { - Ok(None) - } - } - - /// mssql-like convert function - fn parse_mssql_convert(&mut self, is_try: bool) -> Result { - self.expect_token(&Token::LParen)?; - let data_type = self.parse_data_type()?; - self.expect_token(&Token::Comma)?; - let expr = self.parse_expr()?; - let styles = if self.consume_token(&Token::Comma) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - Default::default() - }; - self.expect_token(&Token::RParen)?; - Ok(Expr::Convert { - is_try, - expr: Box::new(expr), - data_type: Some(data_type), - charset: None, - target_before_value: true, - styles, - }) - } - - /// Parse a SQL CONVERT function: - /// - `CONVERT('héhé' USING utf8mb4)` (MySQL) - /// - `CONVERT('héhé', CHAR CHARACTER SET utf8mb4)` (MySQL) - /// - `CONVERT(DECIMAL(10, 5), 42)` (MSSQL) - the type comes first - pub fn parse_convert_expr(&mut self, is_try: bool) -> Result { - if self.dialect.convert_type_before_value() { - return self.parse_mssql_convert(is_try); - } - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - if self.parse_keyword(Keyword::USING) { - let charset = self.parse_object_name(false)?; - self.expect_token(&Token::RParen)?; - return Ok(Expr::Convert { - is_try, - expr: Box::new(expr), - data_type: None, - charset: Some(charset), - target_before_value: false, - styles: vec![], - }); - } - self.expect_token(&Token::Comma)?; - let data_type = self.parse_data_type()?; - let charset = if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) { - Some(self.parse_object_name(false)?) - } else { - None - }; - self.expect_token(&Token::RParen)?; - Ok(Expr::Convert { - is_try, - expr: Box::new(expr), - data_type: Some(data_type), - charset, - target_before_value: false, - styles: vec![], - }) - } - - /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` - pub fn parse_cast_expr(&mut self, kind: CastKind) -> Result { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_keyword(Keyword::AS)?; - let data_type = self.parse_data_type()?; - let format = self.parse_optional_cast_format()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Cast { - kind, - expr: Box::new(expr), - data_type, - format, - }) - } - - /// Parse a SQL EXISTS expression e.g. `WHERE EXISTS(SELECT ...)`. - pub fn parse_exists_expr(&mut self, negated: bool) -> Result { - self.expect_token(&Token::LParen)?; - let exists_node = Expr::Exists { - negated, - subquery: self.parse_query()?, - }; - self.expect_token(&Token::RParen)?; - Ok(exists_node) - } - - pub fn parse_extract_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let field = self.parse_date_time_field()?; - - let syntax = if self.parse_keyword(Keyword::FROM) { - ExtractSyntax::From - } else if self.consume_token(&Token::Comma) - && dialect_of!(self is SnowflakeDialect | GenericDialect) - { - ExtractSyntax::Comma - } else { - return Err(ParserError::ParserError( - "Expected 'FROM' or ','".to_string(), - )); - }; - - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Extract { - field, - expr: Box::new(expr), - syntax, - }) - } - - pub fn parse_ceil_floor_expr(&mut self, is_ceil: bool) -> Result { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - // Parse `CEIL/FLOOR(expr)` - let field = if self.parse_keyword(Keyword::TO) { - // Parse `CEIL/FLOOR(expr TO DateTimeField)` - CeilFloorKind::DateTimeField(self.parse_date_time_field()?) - } else if self.consume_token(&Token::Comma) { - // Parse `CEIL/FLOOR(expr, scale)` - match self.parse_value()? { - Value::Number(n, s) => CeilFloorKind::Scale(Value::Number(n, s)), - _ => { - return Err(ParserError::ParserError( - "Scale field can only be of number type".to_string(), - )) - } - } - } else { - CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) - }; - self.expect_token(&Token::RParen)?; - if is_ceil { - Ok(Expr::Ceil { - expr: Box::new(expr), - field, - }) - } else { - Ok(Expr::Floor { - expr: Box::new(expr), - field, - }) - } - } - - pub fn parse_position_expr(&mut self, ident: Ident) -> Result { - let between_prec = self.dialect.prec_value(Precedence::Between); - let position_expr = self.maybe_parse(|p| { - // PARSE SELECT POSITION('@' in field) - p.expect_token(&Token::LParen)?; - - // Parse the subexpr till the IN keyword - let expr = p.parse_subexpr(between_prec)?; - p.expect_keyword(Keyword::IN)?; - let from = p.parse_expr()?; - p.expect_token(&Token::RParen)?; - Ok(Expr::Position { - expr: Box::new(expr), - r#in: Box::new(from), - }) - })?; - match position_expr { - Some(expr) => Ok(expr), - // Snowflake supports `position` as an ordinary function call - // without the special `IN` syntax. - None => self.parse_function(ObjectName(vec![ident])), - } - } - - pub fn parse_substring_expr(&mut self) -> Result { - // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - let mut from_expr = None; - let special = self.consume_token(&Token::Comma); - if special || self.parse_keyword(Keyword::FROM) { - from_expr = Some(self.parse_expr()?); - } - - let mut to_expr = None; - if self.parse_keyword(Keyword::FOR) || self.consume_token(&Token::Comma) { - to_expr = Some(self.parse_expr()?); - } - self.expect_token(&Token::RParen)?; - - Ok(Expr::Substring { - expr: Box::new(expr), - substring_from: from_expr.map(Box::new), - substring_for: to_expr.map(Box::new), - special, - }) - } - - pub fn parse_overlay_expr(&mut self) -> Result { - // PARSE OVERLAY (EXPR PLACING EXPR FROM 1 [FOR 3]) - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_keyword(Keyword::PLACING)?; - let what_expr = self.parse_expr()?; - self.expect_keyword(Keyword::FROM)?; - let from_expr = self.parse_expr()?; - let mut for_expr = None; - if self.parse_keyword(Keyword::FOR) { - for_expr = Some(self.parse_expr()?); - } - self.expect_token(&Token::RParen)?; - - Ok(Expr::Overlay { - expr: Box::new(expr), - overlay_what: Box::new(what_expr), - overlay_from: Box::new(from_expr), - overlay_for: for_expr.map(Box::new), - }) - } - - /// ```sql - /// TRIM ([WHERE] ['text' FROM] 'text') - /// TRIM ('text') - /// TRIM(, [, characters]) -- only Snowflake or BigQuery - /// ``` - pub fn parse_trim_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let mut trim_where = None; - if let Token::Word(word) = self.peek_token().token { - if [Keyword::BOTH, Keyword::LEADING, Keyword::TRAILING] - .iter() - .any(|d| word.keyword == *d) - { - trim_where = Some(self.parse_trim_where()?); - } - } - let expr = self.parse_expr()?; - if self.parse_keyword(Keyword::FROM) { - let trim_what = Box::new(expr); - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Trim { - expr: Box::new(expr), - trim_where, - trim_what: Some(trim_what), - trim_characters: None, - }) - } else if self.consume_token(&Token::Comma) - && dialect_of!(self is SnowflakeDialect | BigQueryDialect | GenericDialect) - { - let characters = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Trim { - expr: Box::new(expr), - trim_where: None, - trim_what: None, - trim_characters: Some(characters), - }) - } else { - self.expect_token(&Token::RParen)?; - Ok(Expr::Trim { - expr: Box::new(expr), - trim_where, - trim_what: None, - trim_characters: None, - }) - } - } - - pub fn parse_trim_where(&mut self) -> Result { - let next_token = self.next_token(); - match &next_token.token { - Token::Word(w) => match w.keyword { - Keyword::BOTH => Ok(TrimWhereField::Both), - Keyword::LEADING => Ok(TrimWhereField::Leading), - Keyword::TRAILING => Ok(TrimWhereField::Trailing), - _ => self.expected("trim_where field", next_token)?, - }, - _ => self.expected("trim_where field", next_token), - } - } - - /// Parses an array expression `[ex1, ex2, ..]` - /// if `named` is `true`, came from an expression like `ARRAY[ex1, ex2]` - pub fn parse_array_expr(&mut self, named: bool) -> Result { - let exprs = self.parse_comma_separated0(Parser::parse_expr, Token::RBracket)?; - self.expect_token(&Token::RBracket)?; - Ok(Expr::Array(Array { elem: exprs, named })) - } - - pub fn parse_listagg_on_overflow(&mut self) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::ON, Keyword::OVERFLOW]) { - if self.parse_keyword(Keyword::ERROR) { - Ok(Some(ListAggOnOverflow::Error)) - } else { - self.expect_keyword(Keyword::TRUNCATE)?; - let filler = match self.peek_token().token { - Token::Word(w) - if w.keyword == Keyword::WITH || w.keyword == Keyword::WITHOUT => - { - None - } - Token::SingleQuotedString(_) - | Token::EscapedStringLiteral(_) - | Token::UnicodeStringLiteral(_) - | Token::NationalStringLiteral(_) - | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), - _ => self.expected( - "either filler, WITH, or WITHOUT in LISTAGG", - self.peek_token(), - )?, - }; - let with_count = self.parse_keyword(Keyword::WITH); - if !with_count && !self.parse_keyword(Keyword::WITHOUT) { - self.expected("either WITH or WITHOUT in LISTAGG", self.peek_token())?; - } - self.expect_keyword(Keyword::COUNT)?; - Ok(Some(ListAggOnOverflow::Truncate { filler, with_count })) - } - } else { - Ok(None) - } - } - - // This function parses date/time fields for the EXTRACT function-like - // operator, interval qualifiers, and the ceil/floor operations. - // EXTRACT supports a wider set of date/time fields than interval qualifiers, - // so this function may need to be split in two. - pub fn parse_date_time_field(&mut self) -> Result { - let next_token = self.next_token(); - match &next_token.token { - Token::Word(w) => match w.keyword { - Keyword::YEAR => Ok(DateTimeField::Year), - Keyword::MONTH => Ok(DateTimeField::Month), - Keyword::WEEK => { - let week_day = if dialect_of!(self is BigQueryDialect | GenericDialect) - && self.consume_token(&Token::LParen) - { - let week_day = self.parse_identifier(false)?; - self.expect_token(&Token::RParen)?; - Some(week_day) - } else { - None - }; - Ok(DateTimeField::Week(week_day)) - } - Keyword::DAY => Ok(DateTimeField::Day), - Keyword::DAYOFWEEK => Ok(DateTimeField::DayOfWeek), - Keyword::DAYOFYEAR => Ok(DateTimeField::DayOfYear), - Keyword::DATE => Ok(DateTimeField::Date), - Keyword::DATETIME => Ok(DateTimeField::Datetime), - Keyword::HOUR => Ok(DateTimeField::Hour), - Keyword::MINUTE => Ok(DateTimeField::Minute), - Keyword::SECOND => Ok(DateTimeField::Second), - Keyword::CENTURY => Ok(DateTimeField::Century), - Keyword::DECADE => Ok(DateTimeField::Decade), - Keyword::DOY => Ok(DateTimeField::Doy), - Keyword::DOW => Ok(DateTimeField::Dow), - Keyword::EPOCH => Ok(DateTimeField::Epoch), - Keyword::ISODOW => Ok(DateTimeField::Isodow), - Keyword::ISOYEAR => Ok(DateTimeField::Isoyear), - Keyword::ISOWEEK => Ok(DateTimeField::IsoWeek), - Keyword::JULIAN => Ok(DateTimeField::Julian), - Keyword::MICROSECOND => Ok(DateTimeField::Microsecond), - Keyword::MICROSECONDS => Ok(DateTimeField::Microseconds), - Keyword::MILLENIUM => Ok(DateTimeField::Millenium), - Keyword::MILLENNIUM => Ok(DateTimeField::Millennium), - Keyword::MILLISECOND => Ok(DateTimeField::Millisecond), - Keyword::MILLISECONDS => Ok(DateTimeField::Milliseconds), - Keyword::NANOSECOND => Ok(DateTimeField::Nanosecond), - Keyword::NANOSECONDS => Ok(DateTimeField::Nanoseconds), - Keyword::QUARTER => Ok(DateTimeField::Quarter), - Keyword::TIME => Ok(DateTimeField::Time), - Keyword::TIMEZONE => Ok(DateTimeField::Timezone), - Keyword::TIMEZONE_ABBR => Ok(DateTimeField::TimezoneAbbr), - Keyword::TIMEZONE_HOUR => Ok(DateTimeField::TimezoneHour), - Keyword::TIMEZONE_MINUTE => Ok(DateTimeField::TimezoneMinute), - Keyword::TIMEZONE_REGION => Ok(DateTimeField::TimezoneRegion), - _ if self.dialect.allow_extract_custom() => { - self.prev_token(); - let custom = self.parse_identifier(false)?; - Ok(DateTimeField::Custom(custom)) - } - _ => self.expected("date/time field", next_token), - }, - Token::SingleQuotedString(_) if self.dialect.allow_extract_single_quotes() => { - self.prev_token(); - let custom = self.parse_identifier(false)?; - Ok(DateTimeField::Custom(custom)) - } - _ => self.expected("date/time field", next_token), - } - } - - pub fn parse_not(&mut self) -> Result { - match self.peek_token().token { - Token::Word(w) => match w.keyword { - Keyword::EXISTS => { - let negated = true; - let _ = self.parse_keyword(Keyword::EXISTS); - self.parse_exists_expr(negated) - } - _ => Ok(Expr::UnaryOp { - op: UnaryOperator::Not, - expr: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?, - ), - }), - }, - _ => Ok(Expr::UnaryOp { - op: UnaryOperator::Not, - expr: Box::new(self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?), - }), - } - } - - /// Parses fulltext expressions [`sqlparser::ast::Expr::MatchAgainst`] - /// - /// # Errors - /// This method will raise an error if the column list is empty or with invalid identifiers, - /// the match expression is not a literal string, or if the search modifier is not valid. - pub fn parse_match_against(&mut self) -> Result { - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - - self.expect_keyword(Keyword::AGAINST)?; - - self.expect_token(&Token::LParen)?; - - // MySQL is too permissive about the value, IMO we can't validate it perfectly on syntax level. - let match_value = self.parse_value()?; - - let in_natural_language_mode_keywords = &[ - Keyword::IN, - Keyword::NATURAL, - Keyword::LANGUAGE, - Keyword::MODE, - ]; - - let with_query_expansion_keywords = &[Keyword::WITH, Keyword::QUERY, Keyword::EXPANSION]; - - let in_boolean_mode_keywords = &[Keyword::IN, Keyword::BOOLEAN, Keyword::MODE]; - - let opt_search_modifier = if self.parse_keywords(in_natural_language_mode_keywords) { - if self.parse_keywords(with_query_expansion_keywords) { - Some(SearchModifier::InNaturalLanguageModeWithQueryExpansion) - } else { - Some(SearchModifier::InNaturalLanguageMode) - } - } else if self.parse_keywords(in_boolean_mode_keywords) { - Some(SearchModifier::InBooleanMode) - } else if self.parse_keywords(with_query_expansion_keywords) { - Some(SearchModifier::WithQueryExpansion) - } else { - None - }; - - self.expect_token(&Token::RParen)?; - - Ok(Expr::MatchAgainst { - columns, - match_value, - opt_search_modifier, - }) - } - - /// Parse an `INTERVAL` expression. - /// - /// Some syntactically valid intervals: - /// - /// ```sql - /// 1. INTERVAL '1' DAY - /// 2. INTERVAL '1-1' YEAR TO MONTH - /// 3. INTERVAL '1' SECOND - /// 4. INTERVAL '1:1:1.1' HOUR (5) TO SECOND (5) - /// 5. INTERVAL '1.1' SECOND (2, 2) - /// 6. INTERVAL '1:1' HOUR (5) TO MINUTE (5) - /// 7. (MySql & BigQuery only): INTERVAL 1 DAY - /// ``` - /// - /// Note that we do not currently attempt to parse the quoted value. - pub fn parse_interval(&mut self) -> Result { - // The SQL standard allows an optional sign before the value string, but - // it is not clear if any implementations support that syntax, so we - // don't currently try to parse it. (The sign can instead be included - // inside the value string.) - - // to match the different flavours of INTERVAL syntax, we only allow expressions - // if the dialect requires an interval qualifier, - // see https://github.com/sqlparser-rs/sqlparser-rs/pull/1398 for more details - let value = if self.dialect.require_interval_qualifier() { - // parse a whole expression so `INTERVAL 1 + 1 DAY` is valid - self.parse_expr()? - } else { - // parse a prefix expression so `INTERVAL 1 DAY` is valid, but `INTERVAL 1 + 1 DAY` is not - // this also means that `INTERVAL '5 days' > INTERVAL '1 day'` treated properly - self.parse_prefix()? - }; - - // Following the string literal is a qualifier which indicates the units - // of the duration specified in the string literal. - // - // Note that PostgreSQL allows omitting the qualifier, so we provide - // this more general implementation. - let leading_field = if self.next_token_is_temporal_unit() { - Some(self.parse_date_time_field()?) - } else if self.dialect.require_interval_qualifier() { - return parser_err!( - "INTERVAL requires a unit after the literal value", - self.peek_token().span.start - ); - } else { - None - }; - - let (leading_precision, last_field, fsec_precision) = - if leading_field == Some(DateTimeField::Second) { - // SQL mandates special syntax for `SECOND TO SECOND` literals. - // Instead of - // `SECOND [()] TO SECOND[()]` - // one must use the special format: - // `SECOND [( [ , ] )]` - let last_field = None; - let (leading_precision, fsec_precision) = self.parse_optional_precision_scale()?; - (leading_precision, last_field, fsec_precision) - } else { - let leading_precision = self.parse_optional_precision()?; - if self.parse_keyword(Keyword::TO) { - let last_field = Some(self.parse_date_time_field()?); - let fsec_precision = if last_field == Some(DateTimeField::Second) { - self.parse_optional_precision()? - } else { - None - }; - (leading_precision, last_field, fsec_precision) - } else { - (leading_precision, None, None) - } - }; - - Ok(Expr::Interval(Interval { - value: Box::new(value), - leading_field, - leading_precision, - last_field, - fractional_seconds_precision: fsec_precision, - })) - } - - /// Peek at the next token and determine if it is a temporal unit - /// like `second`. - pub fn next_token_is_temporal_unit(&mut self) -> bool { - if let Token::Word(word) = self.peek_token().token { - matches!( - word.keyword, - Keyword::YEAR - | Keyword::MONTH - | Keyword::WEEK - | Keyword::DAY - | Keyword::HOUR - | Keyword::MINUTE - | Keyword::SECOND - | Keyword::CENTURY - | Keyword::DECADE - | Keyword::DOW - | Keyword::DOY - | Keyword::EPOCH - | Keyword::ISODOW - | Keyword::ISOYEAR - | Keyword::JULIAN - | Keyword::MICROSECOND - | Keyword::MICROSECONDS - | Keyword::MILLENIUM - | Keyword::MILLENNIUM - | Keyword::MILLISECOND - | Keyword::MILLISECONDS - | Keyword::NANOSECOND - | Keyword::NANOSECONDS - | Keyword::QUARTER - | Keyword::TIMEZONE - | Keyword::TIMEZONE_HOUR - | Keyword::TIMEZONE_MINUTE - ) - } else { - false - } - } - - /// Syntax - /// ```sql - /// -- typed - /// STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) - /// -- typeless - /// STRUCT( expr1 [AS field_name] [, ... ]) - /// ``` - fn parse_struct_literal(&mut self) -> Result { - // Parse the fields definition if exist `<[field_name] field_type, ...>` - self.prev_token(); - let (fields, trailing_bracket) = - self.parse_struct_type_def(Self::parse_struct_field_def)?; - if trailing_bracket.0 { - return parser_err!( - "unmatched > in STRUCT literal", - self.peek_token().span.start - ); - } - - // Parse the struct values `(expr1 [, ... ])` - self.expect_token(&Token::LParen)?; - let values = self - .parse_comma_separated(|parser| parser.parse_struct_field_expr(!fields.is_empty()))?; - self.expect_token(&Token::RParen)?; - - Ok(Expr::Struct { values, fields }) - } - - /// Parse an expression value for a struct literal - /// Syntax - /// ```sql - /// expr [AS name] - /// ``` - /// - /// For biquery [1], Parameter typed_syntax is set to true if the expression - /// is to be parsed as a field expression declared using typed - /// struct syntax [2], and false if using typeless struct syntax [3]. - /// - /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#constructing_a_struct - /// [2]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax - /// [3]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax - fn parse_struct_field_expr(&mut self, typed_syntax: bool) -> Result { - let expr = self.parse_expr()?; - if self.parse_keyword(Keyword::AS) { - if typed_syntax { - return parser_err!("Typed syntax does not allow AS", { - self.prev_token(); - self.peek_token().span.start - }); - } - let field_name = self.parse_identifier(false)?; - Ok(Expr::Named { - expr: expr.into(), - name: field_name, - }) - } else { - Ok(expr) - } - } - - /// Parse a Struct type definition as a sequence of field-value pairs. - /// The syntax of the Struct elem differs by dialect so it is customised - /// by the `elem_parser` argument. - /// - /// Syntax - /// ```sql - /// Hive: - /// STRUCT - /// - /// BigQuery: - /// STRUCT<[field_name] field_type> - /// ``` - fn parse_struct_type_def( - &mut self, - mut elem_parser: F, - ) -> Result<(Vec, MatchedTrailingBracket), ParserError> - where - F: FnMut(&mut Parser<'a>) -> Result<(StructField, MatchedTrailingBracket), ParserError>, - { - let start_token = self.peek_token(); - self.expect_keyword(Keyword::STRUCT)?; - - // Nothing to do if we have no type information. - if Token::Lt != self.peek_token() { - return Ok((Default::default(), false.into())); - } - self.next_token(); - - let mut field_defs = vec![]; - let trailing_bracket = loop { - let (def, trailing_bracket) = elem_parser(self)?; - field_defs.push(def); - if !self.consume_token(&Token::Comma) { - break trailing_bracket; - } - - // Angle brackets are balanced so we only expect the trailing `>>` after - // we've matched all field types for the current struct. - // e.g. this is invalid syntax `STRUCT>>, INT>(NULL)` - if trailing_bracket.0 { - return parser_err!("unmatched > in STRUCT definition", start_token.span.start); - } - }; - - Ok(( - field_defs, - self.expect_closing_angle_bracket(trailing_bracket)?, - )) - } - - /// Duckdb Struct Data Type - fn parse_duckdb_struct_type_def(&mut self) -> Result, ParserError> { - self.expect_keyword(Keyword::STRUCT)?; - self.expect_token(&Token::LParen)?; - let struct_body = self.parse_comma_separated(|parser| { - let field_name = parser.parse_identifier(false)?; - let field_type = parser.parse_data_type()?; - - Ok(StructField { - field_name: Some(field_name), - field_type, - }) - }); - self.expect_token(&Token::RParen)?; - struct_body - } - - /// Parse a field definition in a [struct] or [tuple]. - /// Syntax: - /// - /// ```sql - /// [field_name] field_type - /// ``` - /// - /// [struct]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type - /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple - fn parse_struct_field_def( - &mut self, - ) -> Result<(StructField, MatchedTrailingBracket), ParserError> { - // Look beyond the next item to infer whether both field name - // and type are specified. - let is_anonymous_field = !matches!( - (self.peek_nth_token(0).token, self.peek_nth_token(1).token), - (Token::Word(_), Token::Word(_)) - ); - - let field_name = if is_anonymous_field { - None - } else { - Some(self.parse_identifier(false)?) - }; - - let (field_type, trailing_bracket) = self.parse_data_type_helper()?; - - Ok(( - StructField { - field_name, - field_type, - }, - trailing_bracket, - )) - } - - /// DuckDB specific: Parse a Union type definition as a sequence of field-value pairs. - /// - /// Syntax: - /// - /// ```sql - /// UNION(field_name field_type[,...]) - /// ``` - /// - /// [1]: https://duckdb.org/docs/sql/data_types/union.html - fn parse_union_type_def(&mut self) -> Result, ParserError> { - self.expect_keyword(Keyword::UNION)?; - - self.expect_token(&Token::LParen)?; - - let fields = self.parse_comma_separated(|p| { - Ok(UnionField { - field_name: p.parse_identifier(false)?, - field_type: p.parse_data_type()?, - }) - })?; - - self.expect_token(&Token::RParen)?; - - Ok(fields) - } - - /// DuckDB specific: Parse a duckdb [dictionary] - /// - /// Syntax: - /// - /// ```sql - /// {'field_name': expr1[, ... ]} - /// ``` - /// - /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs - fn parse_duckdb_struct_literal(&mut self) -> Result { - self.expect_token(&Token::LBrace)?; - - let fields = self.parse_comma_separated(Self::parse_duckdb_dictionary_field)?; - - self.expect_token(&Token::RBrace)?; - - Ok(Expr::Dictionary(fields)) - } - - /// Parse a field for a duckdb [dictionary] - /// - /// Syntax - /// - /// ```sql - /// 'name': expr - /// ``` - /// - /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs - fn parse_duckdb_dictionary_field(&mut self) -> Result { - let key = self.parse_identifier(false)?; - - self.expect_token(&Token::Colon)?; - - let expr = self.parse_expr()?; - - Ok(DictionaryField { - key, - value: Box::new(expr), - }) - } - - /// DuckDB specific: Parse a duckdb [map] - /// - /// Syntax: - /// - /// ```sql - /// Map {key1: value1[, ... ]} - /// ``` - /// - /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps - fn parse_duckdb_map_literal(&mut self) -> Result { - self.expect_token(&Token::LBrace)?; - let fields = self.parse_comma_separated0(Self::parse_duckdb_map_field, Token::RBrace)?; - self.expect_token(&Token::RBrace)?; - Ok(Expr::Map(Map { entries: fields })) - } - - /// Parse a field for a duckdb [map] - /// - /// Syntax - /// - /// ```sql - /// key: value - /// ``` - /// - /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps - fn parse_duckdb_map_field(&mut self) -> Result { - let key = self.parse_expr()?; - - self.expect_token(&Token::Colon)?; - - let value = self.parse_expr()?; - - Ok(MapEntry { - key: Box::new(key), - value: Box::new(value), - }) - } - - /// Parse clickhouse [map] - /// - /// Syntax - /// - /// ```sql - /// Map(key_data_type, value_data_type) - /// ``` - /// - /// [map]: https://clickhouse.com/docs/en/sql-reference/data-types/map - fn parse_click_house_map_def(&mut self) -> Result<(DataType, DataType), ParserError> { - self.expect_keyword(Keyword::MAP)?; - self.expect_token(&Token::LParen)?; - let key_data_type = self.parse_data_type()?; - self.expect_token(&Token::Comma)?; - let value_data_type = self.parse_data_type()?; - self.expect_token(&Token::RParen)?; - - Ok((key_data_type, value_data_type)) - } - - /// Parse clickhouse [tuple] - /// - /// Syntax - /// - /// ```sql - /// Tuple([field_name] field_type, ...) - /// ``` - /// - /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple - fn parse_click_house_tuple_def(&mut self) -> Result, ParserError> { - self.expect_keyword(Keyword::TUPLE)?; - self.expect_token(&Token::LParen)?; - let mut field_defs = vec![]; - loop { - let (def, _) = self.parse_struct_field_def()?; - field_defs.push(def); - if !self.consume_token(&Token::Comma) { - break; - } - } - self.expect_token(&Token::RParen)?; - - Ok(field_defs) - } - - /// For nested types that use the angle bracket syntax, this matches either - /// `>`, `>>` or nothing depending on which variant is expected (specified by the previously - /// matched `trailing_bracket` argument). It returns whether there is a trailing - /// left to be matched - (i.e. if '>>' was matched). - fn expect_closing_angle_bracket( - &mut self, - trailing_bracket: MatchedTrailingBracket, - ) -> Result { - let trailing_bracket = if !trailing_bracket.0 { - match self.peek_token().token { - Token::Gt => { - self.next_token(); - false.into() - } - Token::ShiftRight => { - self.next_token(); - true.into() - } - _ => return self.expected(">", self.peek_token()), - } - } else { - false.into() - }; - - Ok(trailing_bracket) - } - - /// Parse an operator following an expression - pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { - // allow the dialect to override infix parsing - if let Some(infix) = self.dialect.parse_infix(self, &expr, precedence) { - return infix; - } - - let mut tok = self.next_token(); - let regular_binary_operator = match &mut tok.token { - Token::Spaceship => Some(BinaryOperator::Spaceship), - Token::DoubleEq => Some(BinaryOperator::Eq), - Token::Eq => Some(BinaryOperator::Eq), - Token::Neq => Some(BinaryOperator::NotEq), - Token::Gt => Some(BinaryOperator::Gt), - Token::GtEq => Some(BinaryOperator::GtEq), - Token::Lt => Some(BinaryOperator::Lt), - Token::LtEq => Some(BinaryOperator::LtEq), - Token::Plus => Some(BinaryOperator::Plus), - Token::Minus => Some(BinaryOperator::Minus), - Token::Mul => Some(BinaryOperator::Multiply), - Token::Mod => Some(BinaryOperator::Modulo), - Token::StringConcat => Some(BinaryOperator::StringConcat), - Token::Pipe => Some(BinaryOperator::BitwiseOr), - Token::Caret => { - // In PostgreSQL, ^ stands for the exponentiation operation, - // and # stands for XOR. See https://www.postgresql.org/docs/current/functions-math.html - if dialect_of!(self is PostgreSqlDialect) { - Some(BinaryOperator::PGExp) - } else { - Some(BinaryOperator::BitwiseXor) - } - } - Token::Ampersand => Some(BinaryOperator::BitwiseAnd), - Token::Div => Some(BinaryOperator::Divide), - Token::DuckIntDiv if dialect_of!(self is DuckDbDialect | GenericDialect) => { - Some(BinaryOperator::DuckIntegerDivide) - } - Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { - Some(BinaryOperator::PGBitwiseShiftLeft) - } - Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { - Some(BinaryOperator::PGBitwiseShiftRight) - } - Token::Sharp if dialect_of!(self is PostgreSqlDialect) => { - Some(BinaryOperator::PGBitwiseXor) - } - Token::Overlap if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { - Some(BinaryOperator::PGOverlap) - } - Token::CaretAt if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { - Some(BinaryOperator::PGStartsWith) - } - Token::Tilde => Some(BinaryOperator::PGRegexMatch), - Token::TildeAsterisk => Some(BinaryOperator::PGRegexIMatch), - Token::ExclamationMarkTilde => Some(BinaryOperator::PGRegexNotMatch), - Token::ExclamationMarkTildeAsterisk => Some(BinaryOperator::PGRegexNotIMatch), - Token::DoubleTilde => Some(BinaryOperator::PGLikeMatch), - Token::DoubleTildeAsterisk => Some(BinaryOperator::PGILikeMatch), - Token::ExclamationMarkDoubleTilde => Some(BinaryOperator::PGNotLikeMatch), - Token::ExclamationMarkDoubleTildeAsterisk => Some(BinaryOperator::PGNotILikeMatch), - Token::Arrow => Some(BinaryOperator::Arrow), - Token::LongArrow => Some(BinaryOperator::LongArrow), - Token::HashArrow => Some(BinaryOperator::HashArrow), - Token::HashLongArrow => Some(BinaryOperator::HashLongArrow), - Token::AtArrow => Some(BinaryOperator::AtArrow), - Token::ArrowAt => Some(BinaryOperator::ArrowAt), - Token::HashMinus => Some(BinaryOperator::HashMinus), - Token::AtQuestion => Some(BinaryOperator::AtQuestion), - Token::AtAt => Some(BinaryOperator::AtAt), - Token::Question => Some(BinaryOperator::Question), - Token::QuestionAnd => Some(BinaryOperator::QuestionAnd), - Token::QuestionPipe => Some(BinaryOperator::QuestionPipe), - Token::CustomBinaryOperator(s) => Some(BinaryOperator::Custom(core::mem::take(s))), - - Token::Word(w) => match w.keyword { - Keyword::AND => Some(BinaryOperator::And), - Keyword::OR => Some(BinaryOperator::Or), - Keyword::XOR => Some(BinaryOperator::Xor), - Keyword::OPERATOR if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { - self.expect_token(&Token::LParen)?; - // there are special rules for operator names in - // postgres so we can not use 'parse_object' - // or similar. - // See https://www.postgresql.org/docs/current/sql-createoperator.html - let mut idents = vec![]; - loop { - idents.push(self.next_token().to_string()); - if !self.consume_token(&Token::Period) { - break; - } - } - self.expect_token(&Token::RParen)?; - Some(BinaryOperator::PGCustomBinaryOperator(idents)) - } - _ => None, - }, - _ => None, - }; - - if let Some(op) = regular_binary_operator { - if let Some(keyword) = - self.parse_one_of_keywords(&[Keyword::ANY, Keyword::ALL, Keyword::SOME]) - { - self.expect_token(&Token::LParen)?; - let right = if self.peek_sub_query() { - // We have a subquery ahead (SELECT\WITH ...) need to rewind and - // use the parenthesis for parsing the subquery as an expression. - self.prev_token(); // LParen - self.parse_subexpr(precedence)? - } else { - // Non-subquery expression - let right = self.parse_subexpr(precedence)?; - self.expect_token(&Token::RParen)?; - right - }; - - if !matches!( - op, - BinaryOperator::Gt - | BinaryOperator::Lt - | BinaryOperator::GtEq - | BinaryOperator::LtEq - | BinaryOperator::Eq - | BinaryOperator::NotEq - ) { - return parser_err!( - format!( - "Expected one of [=, >, <, =>, =<, !=] as comparison operator, found: {op}" - ), - tok.span.start - ); - }; - - Ok(match keyword { - Keyword::ALL => Expr::AllOp { - left: Box::new(expr), - compare_op: op, - right: Box::new(right), - }, - Keyword::ANY | Keyword::SOME => Expr::AnyOp { - left: Box::new(expr), - compare_op: op, - right: Box::new(right), - is_some: keyword == Keyword::SOME, - }, - _ => unreachable!(), - }) - } else { - Ok(Expr::BinaryOp { - left: Box::new(expr), - op, - right: Box::new(self.parse_subexpr(precedence)?), - }) - } - } else if let Token::Word(w) = &tok.token { - match w.keyword { - Keyword::IS => { - if self.parse_keyword(Keyword::NULL) { - Ok(Expr::IsNull(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { - Ok(Expr::IsNotNull(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::TRUE]) { - Ok(Expr::IsTrue(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::NOT, Keyword::TRUE]) { - Ok(Expr::IsNotTrue(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::FALSE]) { - Ok(Expr::IsFalse(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::NOT, Keyword::FALSE]) { - Ok(Expr::IsNotFalse(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::UNKNOWN]) { - Ok(Expr::IsUnknown(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::NOT, Keyword::UNKNOWN]) { - Ok(Expr::IsNotUnknown(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::DISTINCT, Keyword::FROM]) { - let expr2 = self.parse_expr()?; - Ok(Expr::IsDistinctFrom(Box::new(expr), Box::new(expr2))) - } else if self.parse_keywords(&[Keyword::NOT, Keyword::DISTINCT, Keyword::FROM]) - { - let expr2 = self.parse_expr()?; - Ok(Expr::IsNotDistinctFrom(Box::new(expr), Box::new(expr2))) - } else { - self.expected( - "[NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS", - self.peek_token(), - ) - } - } - Keyword::AT => { - self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; - Ok(Expr::AtTimeZone { - timestamp: Box::new(expr), - time_zone: Box::new(self.parse_subexpr(precedence)?), - }) - } - Keyword::NOT - | Keyword::IN - | Keyword::BETWEEN - | Keyword::LIKE - | Keyword::ILIKE - | Keyword::SIMILAR - | Keyword::REGEXP - | Keyword::RLIKE => { - self.prev_token(); - let negated = self.parse_keyword(Keyword::NOT); - let regexp = self.parse_keyword(Keyword::REGEXP); - let rlike = self.parse_keyword(Keyword::RLIKE); - if regexp || rlike { - Ok(Expr::RLike { - negated, - expr: Box::new(expr), - pattern: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, - ), - regexp, - }) - } else if self.parse_keyword(Keyword::IN) { - self.parse_in(expr, negated) - } else if self.parse_keyword(Keyword::BETWEEN) { - self.parse_between(expr, negated) - } else if self.parse_keyword(Keyword::LIKE) { - Ok(Expr::Like { - negated, - any: self.parse_keyword(Keyword::ANY), - expr: Box::new(expr), - pattern: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, - ), - escape_char: self.parse_escape_char()?, - }) - } else if self.parse_keyword(Keyword::ILIKE) { - Ok(Expr::ILike { - negated, - any: self.parse_keyword(Keyword::ANY), - expr: Box::new(expr), - pattern: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, - ), - escape_char: self.parse_escape_char()?, - }) - } else if self.parse_keywords(&[Keyword::SIMILAR, Keyword::TO]) { - Ok(Expr::SimilarTo { - negated, - expr: Box::new(expr), - pattern: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, - ), - escape_char: self.parse_escape_char()?, - }) - } else { - self.expected("IN or BETWEEN after NOT", self.peek_token()) - } - } - // Can only happen if `get_next_precedence` got out of sync with this function - _ => parser_err!( - format!("No infix parser for token {:?}", tok.token), - tok.span.start - ), - } - } else if Token::DoubleColon == tok { - Ok(Expr::Cast { - kind: CastKind::DoubleColon, - expr: Box::new(expr), - data_type: self.parse_data_type()?, - format: None, - }) - } else if Token::ExclamationMark == tok && self.dialect.supports_factorial_operator() { - Ok(Expr::UnaryOp { - op: UnaryOperator::PGPostfixFactorial, - expr: Box::new(expr), - }) - } else if Token::LBracket == tok { - if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { - self.parse_subscript(expr) - } else if dialect_of!(self is SnowflakeDialect) || self.dialect.supports_partiql() { - self.prev_token(); - self.parse_json_access(expr) - } else { - self.parse_map_access(expr) - } - } else if dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == tok { - self.prev_token(); - self.parse_json_access(expr) - } else { - // Can only happen if `get_next_precedence` got out of sync with this function - parser_err!( - format!("No infix parser for token {:?}", tok.token), - tok.span.start - ) - } - } - - /// Parse the `ESCAPE CHAR` portion of `LIKE`, `ILIKE`, and `SIMILAR TO` - pub fn parse_escape_char(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::ESCAPE) { - Ok(Some(self.parse_literal_string()?)) - } else { - Ok(None) - } - } - - /// Parses an array subscript like - /// * `[:]` - /// * `[l]` - /// * `[l:]` - /// * `[:u]` - /// * `[l:u]` - /// * `[l:u:s]` - /// - /// Parser is right after `[` - fn parse_subscript_inner(&mut self) -> Result { - // at either `:(rest)` or `:(rest)]` - let lower_bound = if self.consume_token(&Token::Colon) { - None - } else { - Some(self.parse_expr()?) - }; - - // check for end - if self.consume_token(&Token::RBracket) { - if let Some(lower_bound) = lower_bound { - return Ok(Subscript::Index { index: lower_bound }); - }; - return Ok(Subscript::Slice { - lower_bound, - upper_bound: None, - stride: None, - }); - } - - // consume the `:` - if lower_bound.is_some() { - self.expect_token(&Token::Colon)?; - } - - // we are now at either `]`, `(rest)]` - let upper_bound = if self.consume_token(&Token::RBracket) { - return Ok(Subscript::Slice { - lower_bound, - upper_bound: None, - stride: None, - }); - } else { - Some(self.parse_expr()?) - }; - - // check for end - if self.consume_token(&Token::RBracket) { - return Ok(Subscript::Slice { - lower_bound, - upper_bound, - stride: None, - }); - } - - // we are now at `:]` or `:stride]` - self.expect_token(&Token::Colon)?; - let stride = if self.consume_token(&Token::RBracket) { - None - } else { - Some(self.parse_expr()?) - }; - - if stride.is_some() { - self.expect_token(&Token::RBracket)?; - } - - Ok(Subscript::Slice { - lower_bound, - upper_bound, - stride, - }) - } - - /// Parses an array subscript like `[1:3]` - /// - /// Parser is right after `[` - pub fn parse_subscript(&mut self, expr: Expr) -> Result { - let subscript = self.parse_subscript_inner()?; - Ok(Expr::Subscript { - expr: Box::new(expr), - subscript: Box::new(subscript), - }) - } - - fn parse_json_path_object_key(&mut self) -> Result { - let token = self.next_token(); - match token.token { - Token::Word(Word { - value, - // path segments in SF dot notation can be unquoted or double-quoted - quote_style: quote_style @ (Some('"') | None), - // some experimentation suggests that snowflake permits - // any keyword here unquoted. - keyword: _, - }) => Ok(JsonPathElem::Dot { - key: value, - quoted: quote_style.is_some(), - }), - - // This token should never be generated on snowflake or generic - // dialects, but we handle it just in case this is used on future - // dialects. - Token::DoubleQuotedString(key) => Ok(JsonPathElem::Dot { key, quoted: true }), - - _ => self.expected("variant object key name", token), - } - } - - fn parse_json_access(&mut self, expr: Expr) -> Result { - let path = self.parse_json_path()?; - Ok(Expr::JsonAccess { - value: Box::new(expr), - path, - }) - } - - fn parse_json_path(&mut self) -> Result { - let mut path = Vec::new(); - loop { - match self.next_token().token { - Token::Colon if path.is_empty() => { - path.push(self.parse_json_path_object_key()?); - } - Token::Period if !path.is_empty() => { - path.push(self.parse_json_path_object_key()?); - } - Token::LBracket => { - let key = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - - path.push(JsonPathElem::Bracket { key }); - } - _ => { - self.prev_token(); - break; - } - }; - } - - debug_assert!(!path.is_empty()); - Ok(JsonPath { path }) - } - - pub fn parse_map_access(&mut self, expr: Expr) -> Result { - let key = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - - let mut keys = vec![MapAccessKey { - key, - syntax: MapAccessSyntax::Bracket, - }]; - loop { - let key = match self.peek_token().token { - Token::LBracket => { - self.next_token(); // consume `[` - let key = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - MapAccessKey { - key, - syntax: MapAccessSyntax::Bracket, - } - } - // Access on BigQuery nested and repeated expressions can - // mix notations in the same expression. - // https://cloud.google.com/bigquery/docs/nested-repeated#query_nested_and_repeated_columns - Token::Period if dialect_of!(self is BigQueryDialect) => { - self.next_token(); // consume `.` - MapAccessKey { - key: self.parse_expr()?, - syntax: MapAccessSyntax::Period, - } - } - _ => break, - }; - keys.push(key); - } - - Ok(Expr::MapAccess { - column: Box::new(expr), - keys, - }) - } - - /// Parses the parens following the `[ NOT ] IN` operator. - pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result { - // BigQuery allows `IN UNNEST(array_expression)` - // https://cloud.google.com/bigquery/docs/reference/standard-sql/operators#in_operators - if self.parse_keyword(Keyword::UNNEST) { - self.expect_token(&Token::LParen)?; - let array_expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - return Ok(Expr::InUnnest { - expr: Box::new(expr), - array_expr: Box::new(array_expr), - negated, - }); - } - self.expect_token(&Token::LParen)?; - let in_op = if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { - self.prev_token(); - Expr::InSubquery { - expr: Box::new(expr), - subquery: self.parse_query()?, - negated, - } - } else { - Expr::InList { - expr: Box::new(expr), - list: if self.dialect.supports_in_empty_list() { - self.parse_comma_separated0(Parser::parse_expr, Token::RParen)? - } else { - self.parse_comma_separated(Parser::parse_expr)? - }, - negated, - } - }; - self.expect_token(&Token::RParen)?; - Ok(in_op) - } - - /// Parses `BETWEEN AND `, assuming the `BETWEEN` keyword was already consumed. - pub fn parse_between(&mut self, expr: Expr, negated: bool) -> Result { - // Stop parsing subexpressions for and on tokens with - // precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc. - let low = self.parse_subexpr(self.dialect.prec_value(Precedence::Between))?; - self.expect_keyword(Keyword::AND)?; - let high = self.parse_subexpr(self.dialect.prec_value(Precedence::Between))?; - Ok(Expr::Between { - expr: Box::new(expr), - negated, - low: Box::new(low), - high: Box::new(high), - }) - } - - /// Parse a postgresql casting style which is in the form of `expr::datatype`. - pub fn parse_pg_cast(&mut self, expr: Expr) -> Result { - Ok(Expr::Cast { - kind: CastKind::DoubleColon, - expr: Box::new(expr), - data_type: self.parse_data_type()?, - format: None, - }) - } - - /// Get the precedence of the next token - pub fn get_next_precedence(&self) -> Result { - self.dialect.get_next_precedence_default(self) - } - - /// Return the first non-whitespace token that has not yet been processed - /// (or None if reached end-of-file) - pub fn peek_token(&self) -> TokenWithSpan { - self.peek_nth_token(0) - } - - /// Returns the `N` next non-whitespace tokens that have not yet been - /// processed. - /// - /// Example: - /// ```rust - /// # use sqlparser::dialect::GenericDialect; - /// # use sqlparser::parser::Parser; - /// # use sqlparser::keywords::Keyword; - /// # use sqlparser::tokenizer::{Token, Word}; - /// let dialect = GenericDialect {}; - /// let mut parser = Parser::new(&dialect).try_with_sql("ORDER BY foo, bar").unwrap(); - /// - /// // Note that Rust infers the number of tokens to peek based on the - /// // length of the slice pattern! - /// assert!(matches!( - /// parser.peek_tokens(), - /// [ - /// Token::Word(Word { keyword: Keyword::ORDER, .. }), - /// Token::Word(Word { keyword: Keyword::BY, .. }), - /// ] - /// )); - /// ``` - pub fn peek_tokens(&self) -> [Token; N] { - self.peek_tokens_with_location() - .map(|with_loc| with_loc.token) - } - - /// Returns the `N` next non-whitespace tokens with locations that have not - /// yet been processed. - /// - /// See [`Self::peek_token`] for an example. - pub fn peek_tokens_with_location(&self) -> [TokenWithSpan; N] { - let mut index = self.index; - core::array::from_fn(|_| loop { - let token = self.tokens.get(index); - index += 1; - if let Some(TokenWithSpan { - token: Token::Whitespace(_), - span: _, - }) = token - { - continue; - } - break token.cloned().unwrap_or(TokenWithSpan { - token: Token::EOF, - span: Span::empty(), - }); - }) - } - - /// Return nth non-whitespace token that has not yet been processed - pub fn peek_nth_token(&self, mut n: usize) -> TokenWithSpan { - let mut index = self.index; - loop { - index += 1; - match self.tokens.get(index - 1) { - Some(TokenWithSpan { - token: Token::Whitespace(_), - span: _, - }) => continue, - non_whitespace => { - if n == 0 { - return non_whitespace.cloned().unwrap_or(TokenWithSpan { - token: Token::EOF, - span: Span::empty(), - }); - } - n -= 1; - } - } - } - } - - /// Return the first token, possibly whitespace, that has not yet been processed - /// (or None if reached end-of-file). - pub fn peek_token_no_skip(&self) -> TokenWithSpan { - self.peek_nth_token_no_skip(0) - } - - /// Return nth token, possibly whitespace, that has not yet been processed. - pub fn peek_nth_token_no_skip(&self, n: usize) -> TokenWithSpan { - self.tokens - .get(self.index + n) - .cloned() - .unwrap_or(TokenWithSpan { - token: Token::EOF, - span: Span::empty(), - }) - } - - /// Look for all of the expected keywords in sequence, without consuming them - fn peek_keywords(&mut self, expected: &[Keyword]) -> bool { - let index = self.index; - let matched = self.parse_keywords(expected); - self.index = index; - matched - } - - /// Return the first non-whitespace token that has not yet been processed - /// (or None if reached end-of-file) and mark it as processed. OK to call - /// repeatedly after reaching EOF. - pub fn next_token(&mut self) -> TokenWithSpan { - loop { - self.index += 1; - match self.tokens.get(self.index - 1) { - Some(TokenWithSpan { - token: Token::Whitespace(_), - span: _, - }) => continue, - token => { - return token - .cloned() - .unwrap_or_else(|| TokenWithSpan::wrap(Token::EOF)) - } - } - } - } - - /// Return the first unprocessed token, possibly whitespace. - pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> { - self.index += 1; - self.tokens.get(self.index - 1) - } - - /// Push back the last one non-whitespace token. Must be called after - /// `next_token()`, otherwise might panic. OK to call after - /// `next_token()` indicates an EOF. - pub fn prev_token(&mut self) { - loop { - assert!(self.index > 0); - self.index -= 1; - if let Some(TokenWithSpan { - token: Token::Whitespace(_), - span: _, - }) = self.tokens.get(self.index) - { - continue; - } - return; - } - } - - /// Report `found` was encountered instead of `expected` - pub fn expected(&self, expected: &str, found: TokenWithSpan) -> Result { - parser_err!( - format!("Expected: {expected}, found: {found}"), - found.span.start - ) - } - - /// If the current token is the `expected` keyword, consume it and returns - /// true. Otherwise, no tokens are consumed and returns false. - #[must_use] - pub fn parse_keyword(&mut self, expected: Keyword) -> bool { - self.parse_keyword_token(expected).is_some() - } - - #[must_use] - pub fn parse_keyword_token(&mut self, expected: Keyword) -> Option { - match self.peek_token().token { - Token::Word(w) if expected == w.keyword => Some(self.next_token()), - _ => None, - } - } - - #[must_use] - pub fn peek_keyword(&mut self, expected: Keyword) -> bool { - matches!(self.peek_token().token, Token::Word(w) if expected == w.keyword) - } - - /// If the current token is the `expected` keyword followed by - /// specified tokens, consume them and returns true. - /// Otherwise, no tokens are consumed and returns false. - /// - /// Note that if the length of `tokens` is too long, this function will - /// not be efficient as it does a loop on the tokens with `peek_nth_token` - /// each time. - pub fn parse_keyword_with_tokens(&mut self, expected: Keyword, tokens: &[Token]) -> bool { - match self.peek_token().token { - Token::Word(w) if expected == w.keyword => { - for (idx, token) in tokens.iter().enumerate() { - if self.peek_nth_token(idx + 1).token != *token { - return false; - } - } - // consume all tokens - for _ in 0..(tokens.len() + 1) { - self.next_token(); - } - true - } - _ => false, - } - } - - /// If the current and subsequent tokens exactly match the `keywords` - /// sequence, consume them and returns true. Otherwise, no tokens are - /// consumed and returns false - #[must_use] - pub fn parse_keywords(&mut self, keywords: &[Keyword]) -> bool { - let index = self.index; - for &keyword in keywords { - if !self.parse_keyword(keyword) { - // println!("parse_keywords aborting .. did not find {:?}", keyword); - // reset index and return immediately - self.index = index; - return false; - } - } - true - } - - /// If the current token is one of the given `keywords`, consume the token - /// and return the keyword that matches. Otherwise, no tokens are consumed - /// and returns [`None`]. - #[must_use] - pub fn parse_one_of_keywords(&mut self, keywords: &[Keyword]) -> Option { - match self.peek_token().token { - Token::Word(w) => { - keywords - .iter() - .find(|keyword| **keyword == w.keyword) - .map(|keyword| { - self.next_token(); - *keyword - }) - } - _ => None, - } - } - - /// If the current token is one of the expected keywords, consume the token - /// and return the keyword that matches. Otherwise, return an error. - pub fn expect_one_of_keywords(&mut self, keywords: &[Keyword]) -> Result { - if let Some(keyword) = self.parse_one_of_keywords(keywords) { - Ok(keyword) - } else { - let keywords: Vec = keywords.iter().map(|x| format!("{x:?}")).collect(); - self.expected( - &format!("one of {}", keywords.join(" or ")), - self.peek_token(), - ) - } - } - - /// If the current token is the `expected` keyword, consume the token. - /// Otherwise, return an error. - pub fn expect_keyword(&mut self, expected: Keyword) -> Result { - if let Some(token) = self.parse_keyword_token(expected) { - Ok(token) - } else { - self.expected(format!("{:?}", &expected).as_str(), self.peek_token()) - } - } - - /// If the current and subsequent tokens exactly match the `keywords` - /// sequence, consume them and returns Ok. Otherwise, return an Error. - pub fn expect_keywords(&mut self, expected: &[Keyword]) -> Result<(), ParserError> { - for &kw in expected { - self.expect_keyword(kw)?; - } - Ok(()) - } - - /// Consume the next token if it matches the expected token, otherwise return false - #[must_use] - pub fn consume_token(&mut self, expected: &Token) -> bool { - if self.peek_token() == *expected { - self.next_token(); - true - } else { - false - } - } - - /// If the current and subsequent tokens exactly match the `tokens` - /// sequence, consume them and returns true. Otherwise, no tokens are - /// consumed and returns false - #[must_use] - pub fn consume_tokens(&mut self, tokens: &[Token]) -> bool { - let index = self.index; - for token in tokens { - if !self.consume_token(token) { - self.index = index; - return false; - } - } - true - } - - /// Bail out if the current token is not an expected keyword, or consume it if it is - pub fn expect_token(&mut self, expected: &Token) -> Result { - if self.peek_token() == *expected { - Ok(self.next_token()) - } else { - self.expected(&expected.to_string(), self.peek_token()) - } - } - - fn parse(s: String, loc: Location) -> Result - where - ::Err: Display, - { - s.parse::().map_err(|e| { - ParserError::ParserError(format!( - "Could not parse '{s}' as {}: {e}{loc}", - core::any::type_name::() - )) - }) - } - - /// Parse a comma-separated list of 1+ SelectItem - pub fn parse_projection(&mut self) -> Result, ParserError> { - // BigQuery and Snowflake allow trailing commas, but only in project lists - // e.g. `SELECT 1, 2, FROM t` - // https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#trailing_commas - // https://docs.snowflake.com/en/release-notes/2024/8_11#select-supports-trailing-commas - - let trailing_commas = - self.options.trailing_commas | self.dialect.supports_projection_trailing_commas(); - - self.parse_comma_separated_with_trailing_commas(|p| p.parse_select_item(), trailing_commas) - } - - pub fn parse_actions_list(&mut self) -> Result, ParserError> { - let mut values = vec![]; - loop { - values.push(self.parse_grant_permission()?); - if !self.consume_token(&Token::Comma) { - break; - } else if self.options.trailing_commas { - match self.peek_token().token { - Token::Word(kw) if kw.keyword == Keyword::ON => { - break; - } - Token::RParen - | Token::SemiColon - | Token::EOF - | Token::RBracket - | Token::RBrace => break, - _ => continue, - } - } - } - Ok(values) - } - - /// Parse the comma of a comma-separated syntax element. - /// Allows for control over trailing commas - /// Returns true if there is a next element - fn is_parse_comma_separated_end_with_trailing_commas(&mut self, trailing_commas: bool) -> bool { - if !self.consume_token(&Token::Comma) { - true - } else if trailing_commas { - let token = self.peek_token().token; - match token { - Token::Word(ref kw) - if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => - { - true - } - Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => { - true - } - _ => false, - } - } else { - false - } - } - - /// Parse the comma of a comma-separated syntax element. - /// Returns true if there is a next element - fn is_parse_comma_separated_end(&mut self) -> bool { - self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas) - } - - /// Parse a comma-separated list of 1+ items accepted by `F` - pub fn parse_comma_separated(&mut self, f: F) -> Result, ParserError> - where - F: FnMut(&mut Parser<'a>) -> Result, - { - self.parse_comma_separated_with_trailing_commas(f, self.options.trailing_commas) - } - - /// Parse a comma-separated list of 1+ items accepted by `F` - /// Allows for control over trailing commas - fn parse_comma_separated_with_trailing_commas( - &mut self, - mut f: F, - trailing_commas: bool, - ) -> Result, ParserError> - where - F: FnMut(&mut Parser<'a>) -> Result, - { - let mut values = vec![]; - loop { - values.push(f(self)?); - if self.is_parse_comma_separated_end_with_trailing_commas(trailing_commas) { - break; - } - } - Ok(values) - } - - /// Parse a keyword-separated list of 1+ items accepted by `F` - pub fn parse_keyword_separated( - &mut self, - keyword: Keyword, - mut f: F, - ) -> Result, ParserError> - where - F: FnMut(&mut Parser<'a>) -> Result, - { - let mut values = vec![]; - loop { - values.push(f(self)?); - if !self.parse_keyword(keyword) { - break; - } - } - Ok(values) - } - - pub fn parse_parenthesized(&mut self, mut f: F) -> Result - where - F: FnMut(&mut Parser<'a>) -> Result, - { - self.expect_token(&Token::LParen)?; - let res = f(self)?; - self.expect_token(&Token::RParen)?; - Ok(res) - } - - /// Parse a comma-separated list of 0+ items accepted by `F` - /// * `end_token` - expected end token for the closure (e.g. [Token::RParen], [Token::RBrace] ...) - pub fn parse_comma_separated0( - &mut self, - f: F, - end_token: Token, - ) -> Result, ParserError> - where - F: FnMut(&mut Parser<'a>) -> Result, - { - if self.peek_token().token == end_token { - return Ok(vec![]); - } - - if self.options.trailing_commas && self.peek_tokens() == [Token::Comma, end_token] { - let _ = self.consume_token(&Token::Comma); - return Ok(vec![]); - } - - self.parse_comma_separated(f) - } - - /// Run a parser method `f`, reverting back to the current position if unsuccessful. - /// Returns `None` if `f` returns an error - pub fn maybe_parse(&mut self, f: F) -> Result, ParserError> - where - F: FnMut(&mut Parser) -> Result, - { - match self.try_parse(f) { - Ok(t) => Ok(Some(t)), - Err(ParserError::RecursionLimitExceeded) => Err(ParserError::RecursionLimitExceeded), - _ => Ok(None), - } - } - - /// Run a parser method `f`, reverting back to the current position if unsuccessful. - pub fn try_parse(&mut self, mut f: F) -> Result - where - F: FnMut(&mut Parser) -> Result, - { - let index = self.index; - match f(self) { - Ok(t) => Ok(t), - Err(e) => { - // Unwind stack if limit exceeded - self.index = index; - Err(e) - } - } - } - - /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns [`None`] if `ALL` is parsed - /// and results in a [`ParserError`] if both `ALL` and `DISTINCT` are found. - pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { - let loc = self.peek_token().span.start; - let all = self.parse_keyword(Keyword::ALL); - let distinct = self.parse_keyword(Keyword::DISTINCT); - if !distinct { - return Ok(None); - } - if all { - return parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc); - } - let on = self.parse_keyword(Keyword::ON); - if !on { - return Ok(Some(Distinct::Distinct)); - } - - self.expect_token(&Token::LParen)?; - let col_names = if self.consume_token(&Token::RParen) { - self.prev_token(); - Vec::new() - } else { - self.parse_comma_separated(Parser::parse_expr)? - }; - self.expect_token(&Token::RParen)?; - Ok(Some(Distinct::On(col_names))) - } - - /// Parse a SQL CREATE statement - pub fn parse_create(&mut self) -> Result { - let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); - let or_alter = self.parse_keywords(&[Keyword::OR, Keyword::ALTER]); - let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some(); - let global = self.parse_one_of_keywords(&[Keyword::GLOBAL]).is_some(); - let transient = self.parse_one_of_keywords(&[Keyword::TRANSIENT]).is_some(); - let global: Option = if global { - Some(true) - } else if local { - Some(false) - } else { - None - }; - let temporary = self - .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) - .is_some(); - let persistent = dialect_of!(self is DuckDbDialect) - && self.parse_one_of_keywords(&[Keyword::PERSISTENT]).is_some(); - if self.parse_keyword(Keyword::TABLE) { - self.parse_create_table(or_replace, temporary, global, transient) - } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { - self.prev_token(); - self.parse_create_view(or_replace, temporary) - } else if self.parse_keyword(Keyword::POLICY) { - self.parse_create_policy() - } else if self.parse_keyword(Keyword::EXTERNAL) { - self.parse_create_external_table(or_replace) - } else if self.parse_keyword(Keyword::FUNCTION) { - self.parse_create_function(or_replace, temporary) - } else if self.parse_keyword(Keyword::TRIGGER) { - self.parse_create_trigger(or_replace, false) - } else if self.parse_keywords(&[Keyword::CONSTRAINT, Keyword::TRIGGER]) { - self.parse_create_trigger(or_replace, true) - } else if self.parse_keyword(Keyword::MACRO) { - self.parse_create_macro(or_replace, temporary) - } else if self.parse_keyword(Keyword::SECRET) { - self.parse_create_secret(or_replace, temporary, persistent) - } else if or_replace { - self.expected( - "[EXTERNAL] TABLE or [MATERIALIZED] VIEW or FUNCTION after CREATE OR REPLACE", - self.peek_token(), - ) - } else if self.parse_keyword(Keyword::EXTENSION) { - self.parse_create_extension() - } else if self.parse_keyword(Keyword::INDEX) { - self.parse_create_index(false) - } else if self.parse_keywords(&[Keyword::UNIQUE, Keyword::INDEX]) { - self.parse_create_index(true) - } else if self.parse_keyword(Keyword::VIRTUAL) { - self.parse_create_virtual_table() - } else if self.parse_keyword(Keyword::SCHEMA) { - self.parse_create_schema() - } else if self.parse_keyword(Keyword::DATABASE) { - self.parse_create_database() - } else if self.parse_keyword(Keyword::ROLE) { - self.parse_create_role() - } else if self.parse_keyword(Keyword::SEQUENCE) { - self.parse_create_sequence(temporary) - } else if self.parse_keyword(Keyword::TYPE) { - self.parse_create_type() - } else if self.parse_keyword(Keyword::PROCEDURE) { - self.parse_create_procedure(or_alter) - } else { - self.expected("an object type after CREATE", self.peek_token()) - } - } - - /// See [DuckDB Docs](https://duckdb.org/docs/sql/statements/create_secret.html) for more details. - pub fn parse_create_secret( - &mut self, - or_replace: bool, - temporary: bool, - persistent: bool, - ) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - - let mut storage_specifier = None; - let mut name = None; - if self.peek_token() != Token::LParen { - if self.parse_keyword(Keyword::IN) { - storage_specifier = self.parse_identifier(false).ok() - } else { - name = self.parse_identifier(false).ok(); - } - - // Storage specifier may follow the name - if storage_specifier.is_none() - && self.peek_token() != Token::LParen - && self.parse_keyword(Keyword::IN) - { - storage_specifier = self.parse_identifier(false).ok(); - } - } - - self.expect_token(&Token::LParen)?; - self.expect_keyword(Keyword::TYPE)?; - let secret_type = self.parse_identifier(false)?; - - let mut options = Vec::new(); - if self.consume_token(&Token::Comma) { - options.append(&mut self.parse_comma_separated(|p| { - let key = p.parse_identifier(false)?; - let value = p.parse_identifier(false)?; - Ok(SecretOption { key, value }) - })?); - } - self.expect_token(&Token::RParen)?; - - let temp = match (temporary, persistent) { - (true, false) => Some(true), - (false, true) => Some(false), - (false, false) => None, - _ => self.expected("TEMPORARY or PERSISTENT", self.peek_token())?, - }; - - Ok(Statement::CreateSecret { - or_replace, - temporary: temp, - if_not_exists, - name, - storage_specifier, - secret_type, - options, - }) - } - - /// Parse a CACHE TABLE statement - pub fn parse_cache_table(&mut self) -> Result { - let (mut table_flag, mut options, mut has_as, mut query) = (None, vec![], false, None); - if self.parse_keyword(Keyword::TABLE) { - let table_name = self.parse_object_name(false)?; - if self.peek_token().token != Token::EOF { - if let Token::Word(word) = self.peek_token().token { - if word.keyword == Keyword::OPTIONS { - options = self.parse_options(Keyword::OPTIONS)? - } - }; - - if self.peek_token().token != Token::EOF { - let (a, q) = self.parse_as_query()?; - has_as = a; - query = Some(q); - } - - Ok(Statement::Cache { - table_flag, - table_name, - has_as, - options, - query, - }) - } else { - Ok(Statement::Cache { - table_flag, - table_name, - has_as, - options, - query, - }) - } - } else { - table_flag = Some(self.parse_object_name(false)?); - if self.parse_keyword(Keyword::TABLE) { - let table_name = self.parse_object_name(false)?; - if self.peek_token() != Token::EOF { - if let Token::Word(word) = self.peek_token().token { - if word.keyword == Keyword::OPTIONS { - options = self.parse_options(Keyword::OPTIONS)? - } - }; - - if self.peek_token() != Token::EOF { - let (a, q) = self.parse_as_query()?; - has_as = a; - query = Some(q); - } - - Ok(Statement::Cache { - table_flag, - table_name, - has_as, - options, - query, - }) - } else { - Ok(Statement::Cache { - table_flag, - table_name, - has_as, - options, - query, - }) - } - } else { - if self.peek_token() == Token::EOF { - self.prev_token(); - } - self.expected("a `TABLE` keyword", self.peek_token()) - } - } - } - - /// Parse 'AS' before as query,such as `WITH XXX AS SELECT XXX` oer `CACHE TABLE AS SELECT XXX` - pub fn parse_as_query(&mut self) -> Result<(bool, Box), ParserError> { - match self.peek_token().token { - Token::Word(word) => match word.keyword { - Keyword::AS => { - self.next_token(); - Ok((true, self.parse_query()?)) - } - _ => Ok((false, self.parse_query()?)), - }, - _ => self.expected("a QUERY statement", self.peek_token()), - } - } - - /// Parse a UNCACHE TABLE statement - pub fn parse_uncache_table(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let table_name = self.parse_object_name(false)?; - Ok(Statement::UNCache { - table_name, - if_exists, - }) - } - - /// SQLite-specific `CREATE VIRTUAL TABLE` - pub fn parse_create_virtual_table(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let table_name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::USING)?; - let module_name = self.parse_identifier(false)?; - // SQLite docs note that module "arguments syntax is sufficiently - // general that the arguments can be made to appear as column - // definitions in a traditional CREATE TABLE statement", but - // we don't implement that. - let module_args = self.parse_parenthesized_column_list(Optional, false)?; - Ok(Statement::CreateVirtualTable { - name: table_name, - if_not_exists, - module_name, - module_args, - }) - } - - pub fn parse_create_schema(&mut self) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - - let schema_name = self.parse_schema_name()?; - - Ok(Statement::CreateSchema { - schema_name, - if_not_exists, - }) - } - - fn parse_schema_name(&mut self) -> Result { - if self.parse_keyword(Keyword::AUTHORIZATION) { - Ok(SchemaName::UnnamedAuthorization( - self.parse_identifier(false)?, - )) - } else { - let name = self.parse_object_name(false)?; - - if self.parse_keyword(Keyword::AUTHORIZATION) { - Ok(SchemaName::NamedAuthorization( - name, - self.parse_identifier(false)?, - )) - } else { - Ok(SchemaName::Simple(name)) - } - } - } - - pub fn parse_create_database(&mut self) -> Result { - let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let db_name = self.parse_object_name(false)?; - let mut location = None; - let mut managed_location = None; - loop { - match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) { - Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?), - Some(Keyword::MANAGEDLOCATION) => { - managed_location = Some(self.parse_literal_string()?) - } - _ => break, - } - } - Ok(Statement::CreateDatabase { - db_name, - if_not_exists: ine, - location, - managed_location, - }) - } - - pub fn parse_optional_create_function_using( - &mut self, - ) -> Result, ParserError> { - if !self.parse_keyword(Keyword::USING) { - return Ok(None); - }; - let keyword = - self.expect_one_of_keywords(&[Keyword::JAR, Keyword::FILE, Keyword::ARCHIVE])?; - - let uri = self.parse_literal_string()?; - - match keyword { - Keyword::JAR => Ok(Some(CreateFunctionUsing::Jar(uri))), - Keyword::FILE => Ok(Some(CreateFunctionUsing::File(uri))), - Keyword::ARCHIVE => Ok(Some(CreateFunctionUsing::Archive(uri))), - _ => self.expected( - "JAR, FILE or ARCHIVE, got {:?}", - TokenWithSpan::wrap(Token::make_keyword(format!("{keyword:?}").as_str())), - ), - } - } - - pub fn parse_create_function( - &mut self, - or_replace: bool, - temporary: bool, - ) -> Result { - if dialect_of!(self is HiveDialect) { - self.parse_hive_create_function(or_replace, temporary) - } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) { - self.parse_postgres_create_function(or_replace, temporary) - } else if dialect_of!(self is DuckDbDialect) { - self.parse_create_macro(or_replace, temporary) - } else if dialect_of!(self is BigQueryDialect) { - self.parse_bigquery_create_function(or_replace, temporary) - } else { - self.prev_token(); - self.expected("an object type after CREATE", self.peek_token()) - } - } - - /// Parse `CREATE FUNCTION` for [Postgres] - /// - /// [Postgres]: https://www.postgresql.org/docs/15/sql-createfunction.html - fn parse_postgres_create_function( - &mut self, - or_replace: bool, - temporary: bool, - ) -> Result { - let name = self.parse_object_name(false)?; - self.expect_token(&Token::LParen)?; - let args = if self.consume_token(&Token::RParen) { - self.prev_token(); - None - } else { - Some(self.parse_comma_separated(Parser::parse_function_arg)?) - }; - - self.expect_token(&Token::RParen)?; - - let return_type = if self.parse_keyword(Keyword::RETURNS) { - Some(self.parse_data_type()?) - } else { - None - }; - - #[derive(Default)] - struct Body { - language: Option, - behavior: Option, - function_body: Option, - called_on_null: Option, - parallel: Option, - } - let mut body = Body::default(); - loop { - fn ensure_not_set(field: &Option, name: &str) -> Result<(), ParserError> { - if field.is_some() { - return Err(ParserError::ParserError(format!( - "{name} specified more than once", - ))); - } - Ok(()) - } - if self.parse_keyword(Keyword::AS) { - ensure_not_set(&body.function_body, "AS")?; - body.function_body = Some(CreateFunctionBody::AsBeforeOptions( - self.parse_create_function_body_string()?, - )); - } else if self.parse_keyword(Keyword::LANGUAGE) { - ensure_not_set(&body.language, "LANGUAGE")?; - body.language = Some(self.parse_identifier(false)?); - } else if self.parse_keyword(Keyword::IMMUTABLE) { - ensure_not_set(&body.behavior, "IMMUTABLE | STABLE | VOLATILE")?; - body.behavior = Some(FunctionBehavior::Immutable); - } else if self.parse_keyword(Keyword::STABLE) { - ensure_not_set(&body.behavior, "IMMUTABLE | STABLE | VOLATILE")?; - body.behavior = Some(FunctionBehavior::Stable); - } else if self.parse_keyword(Keyword::VOLATILE) { - ensure_not_set(&body.behavior, "IMMUTABLE | STABLE | VOLATILE")?; - body.behavior = Some(FunctionBehavior::Volatile); - } else if self.parse_keywords(&[ - Keyword::CALLED, - Keyword::ON, - Keyword::NULL, - Keyword::INPUT, - ]) { - ensure_not_set( - &body.called_on_null, - "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", - )?; - body.called_on_null = Some(FunctionCalledOnNull::CalledOnNullInput); - } else if self.parse_keywords(&[ - Keyword::RETURNS, - Keyword::NULL, - Keyword::ON, - Keyword::NULL, - Keyword::INPUT, - ]) { - ensure_not_set( - &body.called_on_null, - "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", - )?; - body.called_on_null = Some(FunctionCalledOnNull::ReturnsNullOnNullInput); - } else if self.parse_keyword(Keyword::STRICT) { - ensure_not_set( - &body.called_on_null, - "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", - )?; - body.called_on_null = Some(FunctionCalledOnNull::Strict); - } else if self.parse_keyword(Keyword::PARALLEL) { - ensure_not_set(&body.parallel, "PARALLEL { UNSAFE | RESTRICTED | SAFE }")?; - if self.parse_keyword(Keyword::UNSAFE) { - body.parallel = Some(FunctionParallel::Unsafe); - } else if self.parse_keyword(Keyword::RESTRICTED) { - body.parallel = Some(FunctionParallel::Restricted); - } else if self.parse_keyword(Keyword::SAFE) { - body.parallel = Some(FunctionParallel::Safe); - } else { - return self.expected("one of UNSAFE | RESTRICTED | SAFE", self.peek_token()); - } - } else if self.parse_keyword(Keyword::RETURN) { - ensure_not_set(&body.function_body, "RETURN")?; - body.function_body = Some(CreateFunctionBody::Return(self.parse_expr()?)); - } else { - break; - } - } - - Ok(Statement::CreateFunction(CreateFunction { - or_replace, - temporary, - name, - args, - return_type, - behavior: body.behavior, - called_on_null: body.called_on_null, - parallel: body.parallel, - language: body.language, - function_body: body.function_body, - if_not_exists: false, - using: None, - determinism_specifier: None, - options: None, - remote_connection: None, - })) - } - - /// Parse `CREATE FUNCTION` for [Hive] - /// - /// [Hive]: https://cwiki.apache.org/confluence/display/hive/languagemanual+ddl#LanguageManualDDL-Create/Drop/ReloadFunction - fn parse_hive_create_function( - &mut self, - or_replace: bool, - temporary: bool, - ) -> Result { - let name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::AS)?; - - let as_ = self.parse_create_function_body_string()?; - let using = self.parse_optional_create_function_using()?; - - Ok(Statement::CreateFunction(CreateFunction { - or_replace, - temporary, - name, - function_body: Some(CreateFunctionBody::AsBeforeOptions(as_)), - using, - if_not_exists: false, - args: None, - return_type: None, - behavior: None, - called_on_null: None, - parallel: None, - language: None, - determinism_specifier: None, - options: None, - remote_connection: None, - })) - } - - /// Parse `CREATE FUNCTION` for [BigQuery] - /// - /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement - fn parse_bigquery_create_function( - &mut self, - or_replace: bool, - temporary: bool, - ) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let name = self.parse_object_name(false)?; - - let parse_function_param = - |parser: &mut Parser| -> Result { - let name = parser.parse_identifier(false)?; - let data_type = parser.parse_data_type()?; - Ok(OperateFunctionArg { - mode: None, - name: Some(name), - data_type, - default_expr: None, - }) - }; - self.expect_token(&Token::LParen)?; - let args = self.parse_comma_separated0(parse_function_param, Token::RParen)?; - self.expect_token(&Token::RParen)?; - - let return_type = if self.parse_keyword(Keyword::RETURNS) { - Some(self.parse_data_type()?) - } else { - None - }; - - let determinism_specifier = if self.parse_keyword(Keyword::DETERMINISTIC) { - Some(FunctionDeterminismSpecifier::Deterministic) - } else if self.parse_keywords(&[Keyword::NOT, Keyword::DETERMINISTIC]) { - Some(FunctionDeterminismSpecifier::NotDeterministic) - } else { - None - }; - - let language = if self.parse_keyword(Keyword::LANGUAGE) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - let remote_connection = - if self.parse_keywords(&[Keyword::REMOTE, Keyword::WITH, Keyword::CONNECTION]) { - Some(self.parse_object_name(false)?) - } else { - None - }; - - // `OPTIONS` may come before of after the function body but - // may be specified at most once. - let mut options = self.maybe_parse_options(Keyword::OPTIONS)?; - - let function_body = if remote_connection.is_none() { - self.expect_keyword(Keyword::AS)?; - let expr = self.parse_expr()?; - if options.is_none() { - options = self.maybe_parse_options(Keyword::OPTIONS)?; - Some(CreateFunctionBody::AsBeforeOptions(expr)) - } else { - Some(CreateFunctionBody::AsAfterOptions(expr)) - } - } else { - None - }; - - Ok(Statement::CreateFunction(CreateFunction { - or_replace, - temporary, - if_not_exists, - name, - args: Some(args), - return_type, - function_body, - language, - determinism_specifier, - options, - remote_connection, - using: None, - behavior: None, - called_on_null: None, - parallel: None, - })) - } - - fn parse_function_arg(&mut self) -> Result { - let mode = if self.parse_keyword(Keyword::IN) { - Some(ArgMode::In) - } else if self.parse_keyword(Keyword::OUT) { - Some(ArgMode::Out) - } else if self.parse_keyword(Keyword::INOUT) { - Some(ArgMode::InOut) - } else { - None - }; - - // parse: [ argname ] argtype - let mut name = None; - let mut data_type = self.parse_data_type()?; - if let DataType::Custom(n, _) = &data_type { - // the first token is actually a name - name = Some(n.0[0].clone()); - data_type = self.parse_data_type()?; - } - - let default_expr = if self.parse_keyword(Keyword::DEFAULT) || self.consume_token(&Token::Eq) - { - Some(self.parse_expr()?) - } else { - None - }; - Ok(OperateFunctionArg { - mode, - name, - data_type, - default_expr, - }) - } - - /// Parse statements of the DropTrigger type such as: - /// - /// ```sql - /// DROP TRIGGER [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] - /// ``` - pub fn parse_drop_trigger(&mut self) -> Result { - if !dialect_of!(self is PostgreSqlDialect | GenericDialect) { - self.prev_token(); - return self.expected("an object type after DROP", self.peek_token()); - } - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let trigger_name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::ON)?; - let table_name = self.parse_object_name(false)?; - let option = self - .parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) - .map(|keyword| match keyword { - Keyword::CASCADE => ReferentialAction::Cascade, - Keyword::RESTRICT => ReferentialAction::Restrict, - _ => unreachable!(), - }); - Ok(Statement::DropTrigger { - if_exists, - trigger_name, - table_name, - option, - }) - } - - pub fn parse_create_trigger( - &mut self, - or_replace: bool, - is_constraint: bool, - ) -> Result { - if !dialect_of!(self is PostgreSqlDialect | GenericDialect) { - self.prev_token(); - return self.expected("an object type after CREATE", self.peek_token()); - } - - let name = self.parse_object_name(false)?; - let period = self.parse_trigger_period()?; - - let events = self.parse_keyword_separated(Keyword::OR, Parser::parse_trigger_event)?; - self.expect_keyword(Keyword::ON)?; - let table_name = self.parse_object_name(false)?; - - let referenced_table_name = if self.parse_keyword(Keyword::FROM) { - self.parse_object_name(true).ok() - } else { - None - }; - - let characteristics = self.parse_constraint_characteristics()?; - - let mut referencing = vec![]; - if self.parse_keyword(Keyword::REFERENCING) { - while let Some(refer) = self.parse_trigger_referencing()? { - referencing.push(refer); - } - } - - self.expect_keyword(Keyword::FOR)?; - let include_each = self.parse_keyword(Keyword::EACH); - let trigger_object = - match self.expect_one_of_keywords(&[Keyword::ROW, Keyword::STATEMENT])? { - Keyword::ROW => TriggerObject::Row, - Keyword::STATEMENT => TriggerObject::Statement, - _ => unreachable!(), - }; - - let condition = self - .parse_keyword(Keyword::WHEN) - .then(|| self.parse_expr()) - .transpose()?; - - self.expect_keyword(Keyword::EXECUTE)?; - - let exec_body = self.parse_trigger_exec_body()?; - - Ok(Statement::CreateTrigger { - or_replace, - is_constraint, - name, - period, - events, - table_name, - referenced_table_name, - referencing, - trigger_object, - include_each, - condition, - exec_body, - characteristics, - }) - } - - pub fn parse_trigger_period(&mut self) -> Result { - Ok( - match self.expect_one_of_keywords(&[ - Keyword::BEFORE, - Keyword::AFTER, - Keyword::INSTEAD, - ])? { - Keyword::BEFORE => TriggerPeriod::Before, - Keyword::AFTER => TriggerPeriod::After, - Keyword::INSTEAD => self - .expect_keyword(Keyword::OF) - .map(|_| TriggerPeriod::InsteadOf)?, - _ => unreachable!(), - }, - ) - } - - pub fn parse_trigger_event(&mut self) -> Result { - Ok( - match self.expect_one_of_keywords(&[ - Keyword::INSERT, - Keyword::UPDATE, - Keyword::DELETE, - Keyword::TRUNCATE, - ])? { - Keyword::INSERT => TriggerEvent::Insert, - Keyword::UPDATE => { - if self.parse_keyword(Keyword::OF) { - let cols = self.parse_comma_separated(|ident| { - Parser::parse_identifier(ident, false) - })?; - TriggerEvent::Update(cols) - } else { - TriggerEvent::Update(vec![]) - } - } - Keyword::DELETE => TriggerEvent::Delete, - Keyword::TRUNCATE => TriggerEvent::Truncate, - _ => unreachable!(), - }, - ) - } - - pub fn parse_trigger_referencing(&mut self) -> Result, ParserError> { - let refer_type = match self.parse_one_of_keywords(&[Keyword::OLD, Keyword::NEW]) { - Some(Keyword::OLD) if self.parse_keyword(Keyword::TABLE) => { - TriggerReferencingType::OldTable - } - Some(Keyword::NEW) if self.parse_keyword(Keyword::TABLE) => { - TriggerReferencingType::NewTable - } - _ => { - return Ok(None); - } - }; - - let is_as = self.parse_keyword(Keyword::AS); - let transition_relation_name = self.parse_object_name(false)?; - Ok(Some(TriggerReferencing { - refer_type, - is_as, - transition_relation_name, - })) - } - - pub fn parse_trigger_exec_body(&mut self) -> Result { - Ok(TriggerExecBody { - exec_type: match self - .expect_one_of_keywords(&[Keyword::FUNCTION, Keyword::PROCEDURE])? - { - Keyword::FUNCTION => TriggerExecBodyType::Function, - Keyword::PROCEDURE => TriggerExecBodyType::Procedure, - _ => unreachable!(), - }, - func_desc: self.parse_function_desc()?, - }) - } - - pub fn parse_create_macro( - &mut self, - or_replace: bool, - temporary: bool, - ) -> Result { - if dialect_of!(self is DuckDbDialect | GenericDialect) { - let name = self.parse_object_name(false)?; - self.expect_token(&Token::LParen)?; - let args = if self.consume_token(&Token::RParen) { - self.prev_token(); - None - } else { - Some(self.parse_comma_separated(Parser::parse_macro_arg)?) - }; - - self.expect_token(&Token::RParen)?; - self.expect_keyword(Keyword::AS)?; - - Ok(Statement::CreateMacro { - or_replace, - temporary, - name, - args, - definition: if self.parse_keyword(Keyword::TABLE) { - MacroDefinition::Table(self.parse_query()?) - } else { - MacroDefinition::Expr(self.parse_expr()?) - }, - }) - } else { - self.prev_token(); - self.expected("an object type after CREATE", self.peek_token()) - } - } - - fn parse_macro_arg(&mut self) -> Result { - let name = self.parse_identifier(false)?; - - let default_expr = - if self.consume_token(&Token::Assignment) || self.consume_token(&Token::RArrow) { - Some(self.parse_expr()?) - } else { - None - }; - Ok(MacroArg { name, default_expr }) - } - - pub fn parse_create_external_table( - &mut self, - or_replace: bool, - ) -> Result { - self.expect_keyword(Keyword::TABLE)?; - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let table_name = self.parse_object_name(false)?; - let (columns, constraints) = self.parse_columns()?; - - let hive_distribution = self.parse_hive_distribution()?; - let hive_formats = self.parse_hive_formats()?; - - let file_format = if let Some(ff) = &hive_formats.storage { - match ff { - HiveIOFormat::FileFormat { format } => Some(*format), - _ => None, - } - } else { - None - }; - let location = hive_formats.location.clone(); - let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; - Ok(CreateTableBuilder::new(table_name) - .columns(columns) - .constraints(constraints) - .hive_distribution(hive_distribution) - .hive_formats(Some(hive_formats)) - .table_properties(table_properties) - .or_replace(or_replace) - .if_not_exists(if_not_exists) - .external(true) - .file_format(file_format) - .location(location) - .build()) - } - - pub fn parse_file_format(&mut self) -> Result { - let next_token = self.next_token(); - match &next_token.token { - Token::Word(w) => match w.keyword { - Keyword::AVRO => Ok(FileFormat::AVRO), - Keyword::JSONFILE => Ok(FileFormat::JSONFILE), - Keyword::ORC => Ok(FileFormat::ORC), - Keyword::PARQUET => Ok(FileFormat::PARQUET), - Keyword::RCFILE => Ok(FileFormat::RCFILE), - Keyword::SEQUENCEFILE => Ok(FileFormat::SEQUENCEFILE), - Keyword::TEXTFILE => Ok(FileFormat::TEXTFILE), - _ => self.expected("fileformat", next_token), - }, - _ => self.expected("fileformat", next_token), - } - } - - pub fn parse_analyze_format(&mut self) -> Result { - let next_token = self.next_token(); - match &next_token.token { - Token::Word(w) => match w.keyword { - Keyword::TEXT => Ok(AnalyzeFormat::TEXT), - Keyword::GRAPHVIZ => Ok(AnalyzeFormat::GRAPHVIZ), - Keyword::JSON => Ok(AnalyzeFormat::JSON), - _ => self.expected("fileformat", next_token), - }, - _ => self.expected("fileformat", next_token), - } - } - - pub fn parse_create_view( - &mut self, - or_replace: bool, - temporary: bool, - ) -> Result { - let materialized = self.parse_keyword(Keyword::MATERIALIZED); - self.expect_keyword(Keyword::VIEW)?; - let if_not_exists = dialect_of!(self is BigQueryDialect|SQLiteDialect|GenericDialect) - && self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - // Many dialects support `OR ALTER` right after `CREATE`, but we don't (yet). - // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. - let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); - let name = self.parse_object_name(allow_unquoted_hyphen)?; - let columns = self.parse_view_columns()?; - let mut options = CreateTableOptions::None; - let with_options = self.parse_options(Keyword::WITH)?; - if !with_options.is_empty() { - options = CreateTableOptions::With(with_options); - } - - let cluster_by = if self.parse_keyword(Keyword::CLUSTER) { - self.expect_keyword(Keyword::BY)?; - self.parse_parenthesized_column_list(Optional, false)? - } else { - vec![] - }; - - if dialect_of!(self is BigQueryDialect | GenericDialect) { - if let Some(opts) = self.maybe_parse_options(Keyword::OPTIONS)? { - if !opts.is_empty() { - options = CreateTableOptions::Options(opts); - } - }; - } - - let to = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keyword(Keyword::TO) - { - Some(self.parse_object_name(false)?) - } else { - None - }; - - let comment = if dialect_of!(self is SnowflakeDialect | GenericDialect) - && self.parse_keyword(Keyword::COMMENT) - { - self.expect_token(&Token::Eq)?; - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(str) => Some(str), - _ => self.expected("string literal", next_token)?, - } - } else { - None - }; - - self.expect_keyword(Keyword::AS)?; - let query = self.parse_query()?; - // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. - - let with_no_schema_binding = dialect_of!(self is RedshiftSqlDialect | GenericDialect) - && self.parse_keywords(&[ - Keyword::WITH, - Keyword::NO, - Keyword::SCHEMA, - Keyword::BINDING, - ]); - - Ok(Statement::CreateView { - name, - columns, - query, - materialized, - or_replace, - options, - cluster_by, - comment, - with_no_schema_binding, - if_not_exists, - temporary, - to, - }) - } - - pub fn parse_create_role(&mut self) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; - - let _ = self.parse_keyword(Keyword::WITH); // [ WITH ] - - let optional_keywords = if dialect_of!(self is MsSqlDialect) { - vec![Keyword::AUTHORIZATION] - } else if dialect_of!(self is PostgreSqlDialect) { - vec![ - Keyword::LOGIN, - Keyword::NOLOGIN, - Keyword::INHERIT, - Keyword::NOINHERIT, - Keyword::BYPASSRLS, - Keyword::NOBYPASSRLS, - Keyword::PASSWORD, - Keyword::CREATEDB, - Keyword::NOCREATEDB, - Keyword::CREATEROLE, - Keyword::NOCREATEROLE, - Keyword::SUPERUSER, - Keyword::NOSUPERUSER, - Keyword::REPLICATION, - Keyword::NOREPLICATION, - Keyword::CONNECTION, - Keyword::VALID, - Keyword::IN, - Keyword::ROLE, - Keyword::ADMIN, - Keyword::USER, - ] - } else { - vec![] - }; - - // MSSQL - let mut authorization_owner = None; - // Postgres - let mut login = None; - let mut inherit = None; - let mut bypassrls = None; - let mut password = None; - let mut create_db = None; - let mut create_role = None; - let mut superuser = None; - let mut replication = None; - let mut connection_limit = None; - let mut valid_until = None; - let mut in_role = vec![]; - let mut in_group = vec![]; - let mut role = vec![]; - let mut user = vec![]; - let mut admin = vec![]; - - while let Some(keyword) = self.parse_one_of_keywords(&optional_keywords) { - let loc = self - .tokens - .get(self.index - 1) - .map_or(Location { line: 0, column: 0 }, |t| t.span.start); - match keyword { - Keyword::AUTHORIZATION => { - if authorization_owner.is_some() { - parser_err!("Found multiple AUTHORIZATION", loc) - } else { - authorization_owner = Some(self.parse_object_name(false)?); - Ok(()) - } - } - Keyword::LOGIN | Keyword::NOLOGIN => { - if login.is_some() { - parser_err!("Found multiple LOGIN or NOLOGIN", loc) - } else { - login = Some(keyword == Keyword::LOGIN); - Ok(()) - } - } - Keyword::INHERIT | Keyword::NOINHERIT => { - if inherit.is_some() { - parser_err!("Found multiple INHERIT or NOINHERIT", loc) - } else { - inherit = Some(keyword == Keyword::INHERIT); - Ok(()) - } - } - Keyword::BYPASSRLS | Keyword::NOBYPASSRLS => { - if bypassrls.is_some() { - parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS", loc) - } else { - bypassrls = Some(keyword == Keyword::BYPASSRLS); - Ok(()) - } - } - Keyword::CREATEDB | Keyword::NOCREATEDB => { - if create_db.is_some() { - parser_err!("Found multiple CREATEDB or NOCREATEDB", loc) - } else { - create_db = Some(keyword == Keyword::CREATEDB); - Ok(()) - } - } - Keyword::CREATEROLE | Keyword::NOCREATEROLE => { - if create_role.is_some() { - parser_err!("Found multiple CREATEROLE or NOCREATEROLE", loc) - } else { - create_role = Some(keyword == Keyword::CREATEROLE); - Ok(()) - } - } - Keyword::SUPERUSER | Keyword::NOSUPERUSER => { - if superuser.is_some() { - parser_err!("Found multiple SUPERUSER or NOSUPERUSER", loc) - } else { - superuser = Some(keyword == Keyword::SUPERUSER); - Ok(()) - } - } - Keyword::REPLICATION | Keyword::NOREPLICATION => { - if replication.is_some() { - parser_err!("Found multiple REPLICATION or NOREPLICATION", loc) - } else { - replication = Some(keyword == Keyword::REPLICATION); - Ok(()) - } - } - Keyword::PASSWORD => { - if password.is_some() { - parser_err!("Found multiple PASSWORD", loc) - } else { - password = if self.parse_keyword(Keyword::NULL) { - Some(Password::NullPassword) - } else { - Some(Password::Password(Expr::Value(self.parse_value()?))) - }; - Ok(()) - } - } - Keyword::CONNECTION => { - self.expect_keyword(Keyword::LIMIT)?; - if connection_limit.is_some() { - parser_err!("Found multiple CONNECTION LIMIT", loc) - } else { - connection_limit = Some(Expr::Value(self.parse_number_value()?)); - Ok(()) - } - } - Keyword::VALID => { - self.expect_keyword(Keyword::UNTIL)?; - if valid_until.is_some() { - parser_err!("Found multiple VALID UNTIL", loc) - } else { - valid_until = Some(Expr::Value(self.parse_value()?)); - Ok(()) - } - } - Keyword::IN => { - if self.parse_keyword(Keyword::ROLE) { - if !in_role.is_empty() { - parser_err!("Found multiple IN ROLE", loc) - } else { - in_role = self.parse_comma_separated(|p| p.parse_identifier(false))?; - Ok(()) - } - } else if self.parse_keyword(Keyword::GROUP) { - if !in_group.is_empty() { - parser_err!("Found multiple IN GROUP", loc) - } else { - in_group = self.parse_comma_separated(|p| p.parse_identifier(false))?; - Ok(()) - } - } else { - self.expected("ROLE or GROUP after IN", self.peek_token()) - } - } - Keyword::ROLE => { - if !role.is_empty() { - parser_err!("Found multiple ROLE", loc) - } else { - role = self.parse_comma_separated(|p| p.parse_identifier(false))?; - Ok(()) - } - } - Keyword::USER => { - if !user.is_empty() { - parser_err!("Found multiple USER", loc) - } else { - user = self.parse_comma_separated(|p| p.parse_identifier(false))?; - Ok(()) - } - } - Keyword::ADMIN => { - if !admin.is_empty() { - parser_err!("Found multiple ADMIN", loc) - } else { - admin = self.parse_comma_separated(|p| p.parse_identifier(false))?; - Ok(()) - } - } - _ => break, - }? - } - - Ok(Statement::CreateRole { - names, - if_not_exists, - login, - inherit, - bypassrls, - password, - create_db, - create_role, - replication, - superuser, - connection_limit, - valid_until, - in_role, - in_group, - role, - user, - admin, - authorization_owner, - }) - } - - pub fn parse_owner(&mut self) -> Result { - let owner = match self.parse_one_of_keywords(&[Keyword::CURRENT_USER, Keyword::CURRENT_ROLE, Keyword::SESSION_USER]) { - Some(Keyword::CURRENT_USER) => Owner::CurrentUser, - Some(Keyword::CURRENT_ROLE) => Owner::CurrentRole, - Some(Keyword::SESSION_USER) => Owner::SessionUser, - Some(_) => unreachable!(), - None => { - match self.parse_identifier(false) { - Ok(ident) => Owner::Ident(ident), - Err(e) => { - return Err(ParserError::ParserError(format!("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}"))) - } - } - } - }; - Ok(owner) - } - - /// ```sql - /// CREATE POLICY name ON table_name [ AS { PERMISSIVE | RESTRICTIVE } ] - /// [ FOR { ALL | SELECT | INSERT | UPDATE | DELETE } ] - /// [ TO { role_name | PUBLIC | CURRENT_USER | CURRENT_ROLE | SESSION_USER } [, ...] ] - /// [ USING ( using_expression ) ] - /// [ WITH CHECK ( with_check_expression ) ] - /// ``` - /// - /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createpolicy.html) - pub fn parse_create_policy(&mut self) -> Result { - let name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::ON)?; - let table_name = self.parse_object_name(false)?; - - let policy_type = if self.parse_keyword(Keyword::AS) { - let keyword = - self.expect_one_of_keywords(&[Keyword::PERMISSIVE, Keyword::RESTRICTIVE])?; - Some(match keyword { - Keyword::PERMISSIVE => CreatePolicyType::Permissive, - Keyword::RESTRICTIVE => CreatePolicyType::Restrictive, - _ => unreachable!(), - }) - } else { - None - }; - - let command = if self.parse_keyword(Keyword::FOR) { - let keyword = self.expect_one_of_keywords(&[ - Keyword::ALL, - Keyword::SELECT, - Keyword::INSERT, - Keyword::UPDATE, - Keyword::DELETE, - ])?; - Some(match keyword { - Keyword::ALL => CreatePolicyCommand::All, - Keyword::SELECT => CreatePolicyCommand::Select, - Keyword::INSERT => CreatePolicyCommand::Insert, - Keyword::UPDATE => CreatePolicyCommand::Update, - Keyword::DELETE => CreatePolicyCommand::Delete, - _ => unreachable!(), - }) - } else { - None - }; - - let to = if self.parse_keyword(Keyword::TO) { - Some(self.parse_comma_separated(|p| p.parse_owner())?) - } else { - None - }; - - let using = if self.parse_keyword(Keyword::USING) { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Some(expr) - } else { - None - }; - - let with_check = if self.parse_keywords(&[Keyword::WITH, Keyword::CHECK]) { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Some(expr) - } else { - None - }; - - Ok(CreatePolicy { - name, - table_name, - policy_type, - command, - to, - using, - with_check, - }) - } - - pub fn parse_drop(&mut self) -> Result { - // MySQL dialect supports `TEMPORARY` - let temporary = dialect_of!(self is MySqlDialect | GenericDialect | DuckDbDialect) - && self.parse_keyword(Keyword::TEMPORARY); - let persistent = dialect_of!(self is DuckDbDialect) - && self.parse_one_of_keywords(&[Keyword::PERSISTENT]).is_some(); - - let object_type = if self.parse_keyword(Keyword::TABLE) { - ObjectType::Table - } else if self.parse_keyword(Keyword::VIEW) { - ObjectType::View - } else if self.parse_keyword(Keyword::INDEX) { - ObjectType::Index - } else if self.parse_keyword(Keyword::ROLE) { - ObjectType::Role - } else if self.parse_keyword(Keyword::SCHEMA) { - ObjectType::Schema - } else if self.parse_keyword(Keyword::DATABASE) { - ObjectType::Database - } else if self.parse_keyword(Keyword::SEQUENCE) { - ObjectType::Sequence - } else if self.parse_keyword(Keyword::STAGE) { - ObjectType::Stage - } else if self.parse_keyword(Keyword::TYPE) { - ObjectType::Type - } else if self.parse_keyword(Keyword::FUNCTION) { - return self.parse_drop_function(); - } else if self.parse_keyword(Keyword::POLICY) { - return self.parse_drop_policy(); - } else if self.parse_keyword(Keyword::PROCEDURE) { - return self.parse_drop_procedure(); - } else if self.parse_keyword(Keyword::SECRET) { - return self.parse_drop_secret(temporary, persistent); - } else if self.parse_keyword(Keyword::TRIGGER) { - return self.parse_drop_trigger(); - } else { - return self.expected( - "TABLE, VIEW, INDEX, ROLE, SCHEMA, DATABASE, FUNCTION, PROCEDURE, STAGE, TRIGGER, SECRET, SEQUENCE, or TYPE after DROP", - self.peek_token(), - ); - }; - // Many dialects support the non-standard `IF EXISTS` clause and allow - // specifying multiple objects to delete in a single statement - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; - - let loc = self.peek_token().span.start; - let cascade = self.parse_keyword(Keyword::CASCADE); - let restrict = self.parse_keyword(Keyword::RESTRICT); - let purge = self.parse_keyword(Keyword::PURGE); - if cascade && restrict { - return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP", loc); - } - if object_type == ObjectType::Role && (cascade || restrict || purge) { - return parser_err!( - "Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE", - loc - ); - } - Ok(Statement::Drop { - object_type, - if_exists, - names, - cascade, - restrict, - purge, - temporary, - }) - } - - fn parse_optional_referential_action(&mut self) -> Option { - match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { - Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), - Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), - _ => None, - } - } - - /// ```sql - /// DROP FUNCTION [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] - /// [ CASCADE | RESTRICT ] - /// ``` - fn parse_drop_function(&mut self) -> Result { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let func_desc = self.parse_comma_separated(Parser::parse_function_desc)?; - let option = self.parse_optional_referential_action(); - Ok(Statement::DropFunction { - if_exists, - func_desc, - option, - }) - } - - /// ```sql - /// DROP POLICY [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] - /// ``` - /// - /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-droppolicy.html) - fn parse_drop_policy(&mut self) -> Result { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::ON)?; - let table_name = self.parse_object_name(false)?; - let option = self.parse_optional_referential_action(); - Ok(Statement::DropPolicy { - if_exists, - name, - table_name, - option, - }) - } - - /// ```sql - /// DROP PROCEDURE [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] - /// [ CASCADE | RESTRICT ] - /// ``` - fn parse_drop_procedure(&mut self) -> Result { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let proc_desc = self.parse_comma_separated(Parser::parse_function_desc)?; - let option = self.parse_optional_referential_action(); - Ok(Statement::DropProcedure { - if_exists, - proc_desc, - option, - }) - } - - fn parse_function_desc(&mut self) -> Result { - let name = self.parse_object_name(false)?; - - let args = if self.consume_token(&Token::LParen) { - if self.consume_token(&Token::RParen) { - None - } else { - let args = self.parse_comma_separated(Parser::parse_function_arg)?; - self.expect_token(&Token::RParen)?; - Some(args) - } - } else { - None - }; - - Ok(FunctionDesc { name, args }) - } - - /// See [DuckDB Docs](https://duckdb.org/docs/sql/statements/create_secret.html) for more details. - fn parse_drop_secret( - &mut self, - temporary: bool, - persistent: bool, - ) -> Result { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - let storage_specifier = if self.parse_keyword(Keyword::FROM) { - self.parse_identifier(false).ok() - } else { - None - }; - let temp = match (temporary, persistent) { - (true, false) => Some(true), - (false, true) => Some(false), - (false, false) => None, - _ => self.expected("TEMPORARY or PERSISTENT", self.peek_token())?, - }; - - Ok(Statement::DropSecret { - if_exists, - temporary: temp, - name, - storage_specifier, - }) - } - - /// Parse a `DECLARE` statement. - /// - /// ```sql - /// DECLARE name [ BINARY ] [ ASENSITIVE | INSENSITIVE ] [ [ NO ] SCROLL ] - /// CURSOR [ { WITH | WITHOUT } HOLD ] FOR query - /// ``` - /// - /// The syntax can vary significantly between warehouses. See the grammar - /// on the warehouse specific function in such cases. - pub fn parse_declare(&mut self) -> Result { - if dialect_of!(self is BigQueryDialect) { - return self.parse_big_query_declare(); - } - if dialect_of!(self is SnowflakeDialect) { - return self.parse_snowflake_declare(); - } - if dialect_of!(self is MsSqlDialect) { - return self.parse_mssql_declare(); - } - - let name = self.parse_identifier(false)?; - - let binary = Some(self.parse_keyword(Keyword::BINARY)); - let sensitive = if self.parse_keyword(Keyword::INSENSITIVE) { - Some(true) - } else if self.parse_keyword(Keyword::ASENSITIVE) { - Some(false) - } else { - None - }; - let scroll = if self.parse_keyword(Keyword::SCROLL) { - Some(true) - } else if self.parse_keywords(&[Keyword::NO, Keyword::SCROLL]) { - Some(false) - } else { - None - }; - - self.expect_keyword(Keyword::CURSOR)?; - let declare_type = Some(DeclareType::Cursor); - - let hold = match self.parse_one_of_keywords(&[Keyword::WITH, Keyword::WITHOUT]) { - Some(keyword) => { - self.expect_keyword(Keyword::HOLD)?; - - match keyword { - Keyword::WITH => Some(true), - Keyword::WITHOUT => Some(false), - _ => unreachable!(), - } - } - None => None, - }; - - self.expect_keyword(Keyword::FOR)?; - - let query = Some(self.parse_query()?); - - Ok(Statement::Declare { - stmts: vec![Declare { - names: vec![name], - data_type: None, - assignment: None, - declare_type, - binary, - sensitive, - scroll, - hold, - for_query: query, - }], - }) - } - - /// Parse a [BigQuery] `DECLARE` statement. - /// - /// Syntax: - /// ```text - /// DECLARE variable_name[, ...] [{ | }]; - /// ``` - /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#declare - pub fn parse_big_query_declare(&mut self) -> Result { - let names = self.parse_comma_separated(|parser| Parser::parse_identifier(parser, false))?; - - let data_type = match self.peek_token().token { - Token::Word(w) if w.keyword == Keyword::DEFAULT => None, - _ => Some(self.parse_data_type()?), - }; - - let expr = if data_type.is_some() { - if self.parse_keyword(Keyword::DEFAULT) { - Some(self.parse_expr()?) - } else { - None - } - } else { - // If no variable type - default expression must be specified, per BQ docs. - // i.e `DECLARE foo;` is invalid. - self.expect_keyword(Keyword::DEFAULT)?; - Some(self.parse_expr()?) - }; - - Ok(Statement::Declare { - stmts: vec![Declare { - names, - data_type, - assignment: expr.map(|expr| DeclareAssignment::Default(Box::new(expr))), - declare_type: None, - binary: None, - sensitive: None, - scroll: None, - hold: None, - for_query: None, - }], - }) - } - - /// Parse a [Snowflake] `DECLARE` statement. - /// - /// Syntax: - /// ```text - /// DECLARE - /// [{ - /// | - /// | - /// | }; ... ] - /// - /// - /// [] [ { DEFAULT | := } ] - /// - /// - /// CURSOR FOR - /// - /// - /// RESULTSET [ { DEFAULT | := } ( ) ] ; - /// - /// - /// EXCEPTION [ ( , '' ) ] ; - /// ``` - /// - /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/snowflake-scripting/declare - pub fn parse_snowflake_declare(&mut self) -> Result { - let mut stmts = vec![]; - loop { - let name = self.parse_identifier(false)?; - let (declare_type, for_query, assigned_expr, data_type) = - if self.parse_keyword(Keyword::CURSOR) { - self.expect_keyword(Keyword::FOR)?; - match self.peek_token().token { - Token::Word(w) if w.keyword == Keyword::SELECT => ( - Some(DeclareType::Cursor), - Some(self.parse_query()?), - None, - None, - ), - _ => ( - Some(DeclareType::Cursor), - None, - Some(DeclareAssignment::For(Box::new(self.parse_expr()?))), - None, - ), - } - } else if self.parse_keyword(Keyword::RESULTSET) { - let assigned_expr = if self.peek_token().token != Token::SemiColon { - self.parse_snowflake_variable_declaration_expression()? - } else { - // Nothing more to do. The statement has no further parameters. - None - }; - - (Some(DeclareType::ResultSet), None, assigned_expr, None) - } else if self.parse_keyword(Keyword::EXCEPTION) { - let assigned_expr = if self.peek_token().token == Token::LParen { - Some(DeclareAssignment::Expr(Box::new(self.parse_expr()?))) - } else { - // Nothing more to do. The statement has no further parameters. - None - }; - - (Some(DeclareType::Exception), None, assigned_expr, None) - } else { - // Without an explicit keyword, the only valid option is variable declaration. - let (assigned_expr, data_type) = if let Some(assigned_expr) = - self.parse_snowflake_variable_declaration_expression()? - { - (Some(assigned_expr), None) - } else if let Token::Word(_) = self.peek_token().token { - let data_type = self.parse_data_type()?; - ( - self.parse_snowflake_variable_declaration_expression()?, - Some(data_type), - ) - } else { - (None, None) - }; - (None, None, assigned_expr, data_type) - }; - let stmt = Declare { - names: vec![name], - data_type, - assignment: assigned_expr, - declare_type, - binary: None, - sensitive: None, - scroll: None, - hold: None, - for_query, - }; - - stmts.push(stmt); - if self.consume_token(&Token::SemiColon) { - match self.peek_token().token { - Token::Word(w) - if ALL_KEYWORDS - .binary_search(&w.value.to_uppercase().as_str()) - .is_err() => - { - // Not a keyword - start of a new declaration. - continue; - } - _ => { - // Put back the semicolon, this is the end of the DECLARE statement. - self.prev_token(); - } - } - } - - break; - } - - Ok(Statement::Declare { stmts }) - } - - /// Parse a [MsSql] `DECLARE` statement. - /// - /// Syntax: - /// ```text - /// DECLARE - // { - // { @local_variable [AS] data_type [ = value ] } - // | { @cursor_variable_name CURSOR } - // } [ ,...n ] - /// ``` - /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/language-elements/declare-local-variable-transact-sql?view=sql-server-ver16 - pub fn parse_mssql_declare(&mut self) -> Result { - let stmts = self.parse_comma_separated(Parser::parse_mssql_declare_stmt)?; - - Ok(Statement::Declare { stmts }) - } - - /// Parse the body of a [MsSql] `DECLARE`statement. - /// - /// Syntax: - /// ```text - // { - // { @local_variable [AS] data_type [ = value ] } - // | { @cursor_variable_name CURSOR } - // } [ ,...n ] - /// ``` - /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/language-elements/declare-local-variable-transact-sql?view=sql-server-ver16 - pub fn parse_mssql_declare_stmt(&mut self) -> Result { - let name = { - let ident = self.parse_identifier(false)?; - if !ident.value.starts_with('@') { - Err(ParserError::TokenizerError( - "Invalid MsSql variable declaration.".to_string(), - )) - } else { - Ok(ident) - } - }?; - - let (declare_type, data_type) = match self.peek_token().token { - Token::Word(w) => match w.keyword { - Keyword::CURSOR => { - self.next_token(); - (Some(DeclareType::Cursor), None) - } - Keyword::AS => { - self.next_token(); - (None, Some(self.parse_data_type()?)) - } - _ => (None, Some(self.parse_data_type()?)), - }, - _ => (None, Some(self.parse_data_type()?)), - }; - - let assignment = self.parse_mssql_variable_declaration_expression()?; - - Ok(Declare { - names: vec![name], - data_type, - assignment, - declare_type, - binary: None, - sensitive: None, - scroll: None, - hold: None, - for_query: None, - }) - } - - /// Parses the assigned expression in a variable declaration. - /// - /// Syntax: - /// ```text - /// [ { DEFAULT | := } ] - /// ``` - /// - pub fn parse_snowflake_variable_declaration_expression( - &mut self, - ) -> Result, ParserError> { - Ok(match self.peek_token().token { - Token::Word(w) if w.keyword == Keyword::DEFAULT => { - self.next_token(); // Skip `DEFAULT` - Some(DeclareAssignment::Default(Box::new(self.parse_expr()?))) - } - Token::Assignment => { - self.next_token(); // Skip `:=` - Some(DeclareAssignment::DuckAssignment(Box::new( - self.parse_expr()?, - ))) - } - _ => None, - }) - } - - /// Parses the assigned expression in a variable declaration. - /// - /// Syntax: - /// ```text - /// [ = ] - /// ``` - pub fn parse_mssql_variable_declaration_expression( - &mut self, - ) -> Result, ParserError> { - Ok(match self.peek_token().token { - Token::Eq => { - self.next_token(); // Skip `=` - Some(DeclareAssignment::MsSqlAssignment(Box::new( - self.parse_expr()?, - ))) - } - _ => None, - }) - } - - // FETCH [ direction { FROM | IN } ] cursor INTO target; - pub fn parse_fetch_statement(&mut self) -> Result { - let direction = if self.parse_keyword(Keyword::NEXT) { - FetchDirection::Next - } else if self.parse_keyword(Keyword::PRIOR) { - FetchDirection::Prior - } else if self.parse_keyword(Keyword::FIRST) { - FetchDirection::First - } else if self.parse_keyword(Keyword::LAST) { - FetchDirection::Last - } else if self.parse_keyword(Keyword::ABSOLUTE) { - FetchDirection::Absolute { - limit: self.parse_number_value()?, - } - } else if self.parse_keyword(Keyword::RELATIVE) { - FetchDirection::Relative { - limit: self.parse_number_value()?, - } - } else if self.parse_keyword(Keyword::FORWARD) { - if self.parse_keyword(Keyword::ALL) { - FetchDirection::ForwardAll - } else { - FetchDirection::Forward { - // TODO: Support optional - limit: Some(self.parse_number_value()?), - } - } - } else if self.parse_keyword(Keyword::BACKWARD) { - if self.parse_keyword(Keyword::ALL) { - FetchDirection::BackwardAll - } else { - FetchDirection::Backward { - // TODO: Support optional - limit: Some(self.parse_number_value()?), - } - } - } else if self.parse_keyword(Keyword::ALL) { - FetchDirection::All - } else { - FetchDirection::Count { - limit: self.parse_number_value()?, - } - }; - - self.expect_one_of_keywords(&[Keyword::FROM, Keyword::IN])?; - - let name = self.parse_identifier(false)?; - - let into = if self.parse_keyword(Keyword::INTO) { - Some(self.parse_object_name(false)?) - } else { - None - }; - - Ok(Statement::Fetch { - name, - direction, - into, - }) - } - - pub fn parse_discard(&mut self) -> Result { - let object_type = if self.parse_keyword(Keyword::ALL) { - DiscardObject::ALL - } else if self.parse_keyword(Keyword::PLANS) { - DiscardObject::PLANS - } else if self.parse_keyword(Keyword::SEQUENCES) { - DiscardObject::SEQUENCES - } else if self.parse_keyword(Keyword::TEMP) || self.parse_keyword(Keyword::TEMPORARY) { - DiscardObject::TEMP - } else { - return self.expected( - "ALL, PLANS, SEQUENCES, TEMP or TEMPORARY after DISCARD", - self.peek_token(), - ); - }; - Ok(Statement::Discard { object_type }) - } - - pub fn parse_create_index(&mut self, unique: bool) -> Result { - let concurrently = self.parse_keyword(Keyword::CONCURRENTLY); - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let index_name = if if_not_exists || !self.parse_keyword(Keyword::ON) { - let index_name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::ON)?; - Some(index_name) - } else { - None - }; - let table_name = self.parse_object_name(false)?; - let using = if self.parse_keyword(Keyword::USING) { - Some(self.parse_identifier(false)?) - } else { - None - }; - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_order_by_expr)?; - self.expect_token(&Token::RParen)?; - - let include = if self.parse_keyword(Keyword::INCLUDE) { - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(|p| p.parse_identifier(false))?; - self.expect_token(&Token::RParen)?; - columns - } else { - vec![] - }; - - let nulls_distinct = if self.parse_keyword(Keyword::NULLS) { - let not = self.parse_keyword(Keyword::NOT); - self.expect_keyword(Keyword::DISTINCT)?; - Some(!not) - } else { - None - }; - - let with = if self.dialect.supports_create_index_with_clause() - && self.parse_keyword(Keyword::WITH) - { - self.expect_token(&Token::LParen)?; - let with_params = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - with_params - } else { - Vec::new() - }; - - let predicate = if self.parse_keyword(Keyword::WHERE) { - Some(self.parse_expr()?) - } else { - None - }; - - Ok(Statement::CreateIndex(CreateIndex { - name: index_name, - table_name, - using, - columns, - unique, - concurrently, - if_not_exists, - include, - nulls_distinct, - with, - predicate, - })) - } - - pub fn parse_create_extension(&mut self) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - - let (schema, version, cascade) = if self.parse_keyword(Keyword::WITH) { - let schema = if self.parse_keyword(Keyword::SCHEMA) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - let version = if self.parse_keyword(Keyword::VERSION) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - let cascade = self.parse_keyword(Keyword::CASCADE); - - (schema, version, cascade) - } else { - (None, None, false) - }; - - Ok(Statement::CreateExtension { - name, - if_not_exists, - schema, - version, - cascade, - }) - } - - //TODO: Implement parsing for Skewed - pub fn parse_hive_distribution(&mut self) -> Result { - if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_column_def)?; - self.expect_token(&Token::RParen)?; - Ok(HiveDistributionStyle::PARTITIONED { columns }) - } else { - Ok(HiveDistributionStyle::NONE) - } - } - - pub fn parse_hive_formats(&mut self) -> Result { - let mut hive_format = HiveFormat::default(); - loop { - match self.parse_one_of_keywords(&[ - Keyword::ROW, - Keyword::STORED, - Keyword::LOCATION, - Keyword::WITH, - ]) { - Some(Keyword::ROW) => { - hive_format.row_format = Some(self.parse_row_format()?); - } - Some(Keyword::STORED) => { - self.expect_keyword(Keyword::AS)?; - if self.parse_keyword(Keyword::INPUTFORMAT) { - let input_format = self.parse_expr()?; - self.expect_keyword(Keyword::OUTPUTFORMAT)?; - let output_format = self.parse_expr()?; - hive_format.storage = Some(HiveIOFormat::IOF { - input_format, - output_format, - }); - } else { - let format = self.parse_file_format()?; - hive_format.storage = Some(HiveIOFormat::FileFormat { format }); - } - } - Some(Keyword::LOCATION) => { - hive_format.location = Some(self.parse_literal_string()?); - } - Some(Keyword::WITH) => { - self.prev_token(); - let properties = self - .parse_options_with_keywords(&[Keyword::WITH, Keyword::SERDEPROPERTIES])?; - if !properties.is_empty() { - hive_format.serde_properties = Some(properties); - } else { - break; - } - } - None => break, - _ => break, - } - } - - Ok(hive_format) - } - - pub fn parse_row_format(&mut self) -> Result { - self.expect_keyword(Keyword::FORMAT)?; - match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { - Some(Keyword::SERDE) => { - let class = self.parse_literal_string()?; - Ok(HiveRowFormat::SERDE { class }) - } - _ => { - let mut row_delimiters = vec![]; - - loop { - match self.parse_one_of_keywords(&[ - Keyword::FIELDS, - Keyword::COLLECTION, - Keyword::MAP, - Keyword::LINES, - Keyword::NULL, - ]) { - Some(Keyword::FIELDS) => { - if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::FieldsTerminatedBy, - char: self.parse_identifier(false)?, - }); - - if self.parse_keywords(&[Keyword::ESCAPED, Keyword::BY]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::FieldsEscapedBy, - char: self.parse_identifier(false)?, - }); - } - } else { - break; - } - } - Some(Keyword::COLLECTION) => { - if self.parse_keywords(&[ - Keyword::ITEMS, - Keyword::TERMINATED, - Keyword::BY, - ]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::CollectionItemsTerminatedBy, - char: self.parse_identifier(false)?, - }); - } else { - break; - } - } - Some(Keyword::MAP) => { - if self.parse_keywords(&[ - Keyword::KEYS, - Keyword::TERMINATED, - Keyword::BY, - ]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::MapKeysTerminatedBy, - char: self.parse_identifier(false)?, - }); - } else { - break; - } - } - Some(Keyword::LINES) => { - if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::LinesTerminatedBy, - char: self.parse_identifier(false)?, - }); - } else { - break; - } - } - Some(Keyword::NULL) => { - if self.parse_keywords(&[Keyword::DEFINED, Keyword::AS]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::NullDefinedAs, - char: self.parse_identifier(false)?, - }); - } else { - break; - } - } - _ => { - break; - } - } - } - - Ok(HiveRowFormat::DELIMITED { - delimiters: row_delimiters, - }) - } - } - } - - fn parse_optional_on_cluster(&mut self) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::ON, Keyword::CLUSTER]) { - Ok(Some(self.parse_identifier(false)?)) - } else { - Ok(None) - } - } - - pub fn parse_create_table( - &mut self, - or_replace: bool, - temporary: bool, - global: Option, - transient: bool, - ) -> Result { - let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let table_name = self.parse_object_name(allow_unquoted_hyphen)?; - - // Clickhouse has `ON CLUSTER 'cluster'` syntax for DDLs - let on_cluster = self.parse_optional_on_cluster()?; - - let like = if self.parse_keyword(Keyword::LIKE) || self.parse_keyword(Keyword::ILIKE) { - self.parse_object_name(allow_unquoted_hyphen).ok() - } else { - None - }; - - let clone = if self.parse_keyword(Keyword::CLONE) { - self.parse_object_name(allow_unquoted_hyphen).ok() - } else { - None - }; - - // parse optional column list (schema) - let (columns, constraints) = self.parse_columns()?; - let mut comment = if dialect_of!(self is HiveDialect) - && self.parse_keyword(Keyword::COMMENT) - { - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(str) => Some(CommentDef::AfterColumnDefsWithoutEq(str)), - _ => self.expected("comment", next_token)?, - } - } else { - None - }; - - // SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE` - let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]); - - let hive_distribution = self.parse_hive_distribution()?; - let clustered_by = self.parse_optional_clustered_by()?; - let hive_formats = self.parse_hive_formats()?; - // PostgreSQL supports `WITH ( options )`, before `AS` - let with_options = self.parse_options(Keyword::WITH)?; - let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; - - let engine = if self.parse_keyword(Keyword::ENGINE) { - self.expect_token(&Token::Eq)?; - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => { - let name = w.value; - let parameters = if self.peek_token() == Token::LParen { - Some(self.parse_parenthesized_identifiers()?) - } else { - None - }; - Some(TableEngine { name, parameters }) - } - _ => self.expected("identifier", next_token)?, - } - } else { - None - }; - - let auto_increment_offset = if self.parse_keyword(Keyword::AUTO_INCREMENT) { - let _ = self.consume_token(&Token::Eq); - let next_token = self.next_token(); - match next_token.token { - Token::Number(s, _) => Some(Self::parse::(s, next_token.span.start)?), - _ => self.expected("literal int", next_token)?, - } - } else { - None - }; - - // ClickHouse supports `PRIMARY KEY`, before `ORDER BY` - // https://clickhouse.com/docs/en/sql-reference/statements/create/table#primary-key - let primary_key = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) - { - Some(Box::new(self.parse_expr()?)) - } else { - None - }; - - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - if self.consume_token(&Token::LParen) { - let columns = if self.peek_token() != Token::RParen { - self.parse_comma_separated(|p| p.parse_expr())? - } else { - vec![] - }; - self.expect_token(&Token::RParen)?; - Some(OneOrManyWithParens::Many(columns)) - } else { - Some(OneOrManyWithParens::One(self.parse_expr()?)) - } - } else { - None - }; - - let create_table_config = self.parse_optional_create_table_config()?; - - let default_charset = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARSET]) { - self.expect_token(&Token::Eq)?; - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => Some(w.value), - _ => self.expected("identifier", next_token)?, - } - } else { - None - }; - - let collation = if self.parse_keywords(&[Keyword::COLLATE]) { - self.expect_token(&Token::Eq)?; - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => Some(w.value), - _ => self.expected("identifier", next_token)?, - } - } else { - None - }; - - let on_commit: Option = - if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT, Keyword::DELETE, Keyword::ROWS]) - { - Some(OnCommit::DeleteRows) - } else if self.parse_keywords(&[ - Keyword::ON, - Keyword::COMMIT, - Keyword::PRESERVE, - Keyword::ROWS, - ]) { - Some(OnCommit::PreserveRows) - } else if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT, Keyword::DROP]) { - Some(OnCommit::Drop) - } else { - None - }; - - let strict = self.parse_keyword(Keyword::STRICT); - - // Excludes Hive dialect here since it has been handled after table column definitions. - if !dialect_of!(self is HiveDialect) && self.parse_keyword(Keyword::COMMENT) { - // rewind the COMMENT keyword - self.prev_token(); - comment = self.parse_optional_inline_comment()? - }; - - // Parse optional `AS ( query )` - let query = if self.parse_keyword(Keyword::AS) { - Some(self.parse_query()?) - } else if self.dialect.supports_create_table_select() && self.parse_keyword(Keyword::SELECT) - { - // rewind the SELECT keyword - self.prev_token(); - Some(self.parse_query()?) - } else { - None - }; - - Ok(CreateTableBuilder::new(table_name) - .temporary(temporary) - .columns(columns) - .constraints(constraints) - .with_options(with_options) - .table_properties(table_properties) - .or_replace(or_replace) - .if_not_exists(if_not_exists) - .transient(transient) - .hive_distribution(hive_distribution) - .hive_formats(Some(hive_formats)) - .global(global) - .query(query) - .without_rowid(without_rowid) - .like(like) - .clone_clause(clone) - .engine(engine) - .comment(comment) - .auto_increment_offset(auto_increment_offset) - .order_by(order_by) - .default_charset(default_charset) - .collation(collation) - .on_commit(on_commit) - .on_cluster(on_cluster) - .clustered_by(clustered_by) - .partition_by(create_table_config.partition_by) - .cluster_by(create_table_config.cluster_by) - .options(create_table_config.options) - .primary_key(primary_key) - .strict(strict) - .build()) - } - - /// Parse configuration like partitioning, clustering information during the table creation. - /// - /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_2) - /// [PostgreSQL](https://www.postgresql.org/docs/current/ddl-partitioning.html) - fn parse_optional_create_table_config( - &mut self, - ) -> Result { - let partition_by = if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) - && self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) - { - Some(Box::new(self.parse_expr()?)) - } else { - None - }; - - let mut cluster_by = None; - let mut options = None; - if dialect_of!(self is BigQueryDialect | GenericDialect) { - if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { - cluster_by = Some(WrappedCollection::NoWrapping( - self.parse_comma_separated(|p| p.parse_identifier(false))?, - )); - }; - - if let Token::Word(word) = self.peek_token().token { - if word.keyword == Keyword::OPTIONS { - options = Some(self.parse_options(Keyword::OPTIONS)?); - } - }; - } - - Ok(CreateTableConfiguration { - partition_by, - cluster_by, - options, - }) - } - - pub fn parse_optional_inline_comment(&mut self) -> Result, ParserError> { - let comment = if self.parse_keyword(Keyword::COMMENT) { - let has_eq = self.consume_token(&Token::Eq); - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(str) => Some(if has_eq { - CommentDef::WithEq(str) - } else { - CommentDef::WithoutEq(str) - }), - _ => self.expected("comment", next_token)?, - } - } else { - None - }; - Ok(comment) - } - - pub fn parse_optional_procedure_parameters( - &mut self, - ) -> Result>, ParserError> { - let mut params = vec![]; - if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { - return Ok(Some(params)); - } - loop { - if let Token::Word(_) = self.peek_token().token { - params.push(self.parse_procedure_param()?) - } - let comma = self.consume_token(&Token::Comma); - if self.consume_token(&Token::RParen) { - // allow a trailing comma, even though it's not in standard - break; - } else if !comma { - return self.expected("',' or ')' after parameter definition", self.peek_token()); - } - } - Ok(Some(params)) - } - - pub fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { - let mut columns = vec![]; - let mut constraints = vec![]; - if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { - return Ok((columns, constraints)); - } - - loop { - if let Some(constraint) = self.parse_optional_table_constraint()? { - constraints.push(constraint); - } else if let Token::Word(_) = self.peek_token().token { - columns.push(self.parse_column_def()?); - } else { - return self.expected("column name or constraint definition", self.peek_token()); - } - - let comma = self.consume_token(&Token::Comma); - let rparen = self.peek_token().token == Token::RParen; - - if !comma && !rparen { - return self.expected("',' or ')' after column definition", self.peek_token()); - }; - - if rparen && (!comma || self.options.trailing_commas) { - let _ = self.consume_token(&Token::RParen); - break; - } - } - - Ok((columns, constraints)) - } - - pub fn parse_procedure_param(&mut self) -> Result { - let name = self.parse_identifier(false)?; - let data_type = self.parse_data_type()?; - Ok(ProcedureParam { name, data_type }) - } - - pub fn parse_column_def(&mut self) -> Result { - let name = self.parse_identifier(false)?; - let data_type = if self.is_column_type_sqlite_unspecified() { - DataType::Unspecified - } else { - self.parse_data_type()? - }; - let mut collation = if self.parse_keyword(Keyword::COLLATE) { - Some(self.parse_object_name(false)?) - } else { - None - }; - let mut options = vec![]; - loop { - if self.parse_keyword(Keyword::CONSTRAINT) { - let name = Some(self.parse_identifier(false)?); - if let Some(option) = self.parse_optional_column_option()? { - options.push(ColumnOptionDef { name, option }); - } else { - return self.expected( - "constraint details after CONSTRAINT ", - self.peek_token(), - ); - } - } else if let Some(option) = self.parse_optional_column_option()? { - options.push(ColumnOptionDef { name: None, option }); - } else if dialect_of!(self is MySqlDialect | SnowflakeDialect | GenericDialect) - && self.parse_keyword(Keyword::COLLATE) - { - collation = Some(self.parse_object_name(false)?); - } else { - break; - }; - } - Ok(ColumnDef { - name, - data_type, - collation, - options, - }) - } - - fn is_column_type_sqlite_unspecified(&mut self) -> bool { - if dialect_of!(self is SQLiteDialect) { - match self.peek_token().token { - Token::Word(word) => matches!( - word.keyword, - Keyword::CONSTRAINT - | Keyword::PRIMARY - | Keyword::NOT - | Keyword::UNIQUE - | Keyword::CHECK - | Keyword::DEFAULT - | Keyword::COLLATE - | Keyword::REFERENCES - | Keyword::GENERATED - | Keyword::AS - ), - _ => true, // e.g. comma immediately after column name - } - } else { - false - } - } - - pub fn parse_optional_column_option(&mut self) -> Result, ParserError> { - if let Some(option) = self.dialect.parse_column_option(self)? { - return option; - } - - if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) { - Ok(Some(ColumnOption::CharacterSet( - self.parse_object_name(false)?, - ))) - } else if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { - Ok(Some(ColumnOption::NotNull)) - } else if self.parse_keywords(&[Keyword::COMMENT]) { - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(value, ..) => Ok(Some(ColumnOption::Comment(value))), - _ => self.expected("string", next_token), - } - } else if self.parse_keyword(Keyword::NULL) { - Ok(Some(ColumnOption::Null)) - } else if self.parse_keyword(Keyword::DEFAULT) { - Ok(Some(ColumnOption::Default(self.parse_expr()?))) - } else if dialect_of!(self is ClickHouseDialect| GenericDialect) - && self.parse_keyword(Keyword::MATERIALIZED) - { - Ok(Some(ColumnOption::Materialized(self.parse_expr()?))) - } else if dialect_of!(self is ClickHouseDialect| GenericDialect) - && self.parse_keyword(Keyword::ALIAS) - { - Ok(Some(ColumnOption::Alias(self.parse_expr()?))) - } else if dialect_of!(self is ClickHouseDialect| GenericDialect) - && self.parse_keyword(Keyword::EPHEMERAL) - { - // The expression is optional for the EPHEMERAL syntax, so we need to check - // if the column definition has remaining tokens before parsing the expression. - if matches!(self.peek_token().token, Token::Comma | Token::RParen) { - Ok(Some(ColumnOption::Ephemeral(None))) - } else { - Ok(Some(ColumnOption::Ephemeral(Some(self.parse_expr()?)))) - } - } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { - let characteristics = self.parse_constraint_characteristics()?; - Ok(Some(ColumnOption::Unique { - is_primary: true, - characteristics, - })) - } else if self.parse_keyword(Keyword::UNIQUE) { - let characteristics = self.parse_constraint_characteristics()?; - Ok(Some(ColumnOption::Unique { - is_primary: false, - characteristics, - })) - } else if self.parse_keyword(Keyword::REFERENCES) { - let foreign_table = self.parse_object_name(false)?; - // PostgreSQL allows omitting the column list and - // uses the primary key column of the foreign table by default - let referred_columns = self.parse_parenthesized_column_list(Optional, false)?; - let mut on_delete = None; - let mut on_update = None; - loop { - if on_delete.is_none() && self.parse_keywords(&[Keyword::ON, Keyword::DELETE]) { - on_delete = Some(self.parse_referential_action()?); - } else if on_update.is_none() - && self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) - { - on_update = Some(self.parse_referential_action()?); - } else { - break; - } - } - let characteristics = self.parse_constraint_characteristics()?; - - Ok(Some(ColumnOption::ForeignKey { - foreign_table, - referred_columns, - on_delete, - on_update, - characteristics, - })) - } else if self.parse_keyword(Keyword::CHECK) { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Ok(Some(ColumnOption::Check(expr))) - } else if self.parse_keyword(Keyword::AUTO_INCREMENT) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - // Support AUTO_INCREMENT for MySQL - Ok(Some(ColumnOption::DialectSpecific(vec![ - Token::make_keyword("AUTO_INCREMENT"), - ]))) - } else if self.parse_keyword(Keyword::AUTOINCREMENT) - && dialect_of!(self is SQLiteDialect | GenericDialect) - { - // Support AUTOINCREMENT for SQLite - Ok(Some(ColumnOption::DialectSpecific(vec![ - Token::make_keyword("AUTOINCREMENT"), - ]))) - } else if self.parse_keyword(Keyword::ASC) - && self.dialect.supports_asc_desc_in_column_definition() - { - // Support ASC for SQLite - Ok(Some(ColumnOption::DialectSpecific(vec![ - Token::make_keyword("ASC"), - ]))) - } else if self.parse_keyword(Keyword::DESC) - && self.dialect.supports_asc_desc_in_column_definition() - { - // Support DESC for SQLite - Ok(Some(ColumnOption::DialectSpecific(vec![ - Token::make_keyword("DESC"), - ]))) - } else if self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - let expr = self.parse_expr()?; - Ok(Some(ColumnOption::OnUpdate(expr))) - } else if self.parse_keyword(Keyword::GENERATED) { - self.parse_optional_column_option_generated() - } else if dialect_of!(self is BigQueryDialect | GenericDialect) - && self.parse_keyword(Keyword::OPTIONS) - { - self.prev_token(); - Ok(Some(ColumnOption::Options( - self.parse_options(Keyword::OPTIONS)?, - ))) - } else if self.parse_keyword(Keyword::AS) - && dialect_of!(self is MySqlDialect | SQLiteDialect | DuckDbDialect | GenericDialect) - { - self.parse_optional_column_option_as() - } else if self.parse_keyword(Keyword::IDENTITY) - && dialect_of!(self is MsSqlDialect | GenericDialect) - { - let parameters = if self.consume_token(&Token::LParen) { - let seed = self.parse_number()?; - self.expect_token(&Token::Comma)?; - let increment = self.parse_number()?; - self.expect_token(&Token::RParen)?; - - Some(IdentityPropertyFormatKind::FunctionCall( - IdentityParameters { seed, increment }, - )) - } else { - None - }; - Ok(Some(ColumnOption::Identity( - IdentityPropertyKind::Identity(IdentityProperty { - parameters, - order: None, - }), - ))) - } else if dialect_of!(self is SQLiteDialect | GenericDialect) - && self.parse_keywords(&[Keyword::ON, Keyword::CONFLICT]) - { - // Support ON CONFLICT for SQLite - Ok(Some(ColumnOption::OnConflict( - self.expect_one_of_keywords(&[ - Keyword::ROLLBACK, - Keyword::ABORT, - Keyword::FAIL, - Keyword::IGNORE, - Keyword::REPLACE, - ])?, - ))) - } else { - Ok(None) - } - } - - pub(crate) fn parse_tag(&mut self) -> Result { - let name = self.parse_identifier(false)?; - self.expect_token(&Token::Eq)?; - let value = self.parse_literal_string()?; - - Ok(Tag::new(name, value)) - } - - fn parse_optional_column_option_generated( - &mut self, - ) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::ALWAYS, Keyword::AS, Keyword::IDENTITY]) { - let mut sequence_options = vec![]; - if self.expect_token(&Token::LParen).is_ok() { - sequence_options = self.parse_create_sequence_options()?; - self.expect_token(&Token::RParen)?; - } - Ok(Some(ColumnOption::Generated { - generated_as: GeneratedAs::Always, - sequence_options: Some(sequence_options), - generation_expr: None, - generation_expr_mode: None, - generated_keyword: true, - })) - } else if self.parse_keywords(&[ - Keyword::BY, - Keyword::DEFAULT, - Keyword::AS, - Keyword::IDENTITY, - ]) { - let mut sequence_options = vec![]; - if self.expect_token(&Token::LParen).is_ok() { - sequence_options = self.parse_create_sequence_options()?; - self.expect_token(&Token::RParen)?; - } - Ok(Some(ColumnOption::Generated { - generated_as: GeneratedAs::ByDefault, - sequence_options: Some(sequence_options), - generation_expr: None, - generation_expr_mode: None, - generated_keyword: true, - })) - } else if self.parse_keywords(&[Keyword::ALWAYS, Keyword::AS]) { - if self.expect_token(&Token::LParen).is_ok() { - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - let (gen_as, expr_mode) = if self.parse_keywords(&[Keyword::STORED]) { - Ok(( - GeneratedAs::ExpStored, - Some(GeneratedExpressionMode::Stored), - )) - } else if dialect_of!(self is PostgreSqlDialect) { - // Postgres' AS IDENTITY branches are above, this one needs STORED - self.expected("STORED", self.peek_token()) - } else if self.parse_keywords(&[Keyword::VIRTUAL]) { - Ok((GeneratedAs::Always, Some(GeneratedExpressionMode::Virtual))) - } else { - Ok((GeneratedAs::Always, None)) - }?; - - Ok(Some(ColumnOption::Generated { - generated_as: gen_as, - sequence_options: None, - generation_expr: Some(expr), - generation_expr_mode: expr_mode, - generated_keyword: true, - })) - } else { - Ok(None) - } - } else { - Ok(None) - } - } - - fn parse_optional_column_option_as(&mut self) -> Result, ParserError> { - // Some DBs allow 'AS (expr)', shorthand for GENERATED ALWAYS AS - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - - let (gen_as, expr_mode) = if self.parse_keywords(&[Keyword::STORED]) { - ( - GeneratedAs::ExpStored, - Some(GeneratedExpressionMode::Stored), - ) - } else if self.parse_keywords(&[Keyword::VIRTUAL]) { - (GeneratedAs::Always, Some(GeneratedExpressionMode::Virtual)) - } else { - (GeneratedAs::Always, None) - }; - - Ok(Some(ColumnOption::Generated { - generated_as: gen_as, - sequence_options: None, - generation_expr: Some(expr), - generation_expr_mode: expr_mode, - generated_keyword: false, - })) - } - - pub fn parse_optional_clustered_by(&mut self) -> Result, ParserError> { - let clustered_by = if dialect_of!(self is HiveDialect|GenericDialect) - && self.parse_keywords(&[Keyword::CLUSTERED, Keyword::BY]) - { - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - - let sorted_by = if self.parse_keywords(&[Keyword::SORTED, Keyword::BY]) { - self.expect_token(&Token::LParen)?; - let sorted_by_columns = self.parse_comma_separated(|p| p.parse_order_by_expr())?; - self.expect_token(&Token::RParen)?; - Some(sorted_by_columns) - } else { - None - }; - - self.expect_keyword(Keyword::INTO)?; - let num_buckets = self.parse_number_value()?; - self.expect_keyword(Keyword::BUCKETS)?; - Some(ClusteredBy { - columns, - sorted_by, - num_buckets, - }) - } else { - None - }; - Ok(clustered_by) - } - - pub fn parse_referential_action(&mut self) -> Result { - if self.parse_keyword(Keyword::RESTRICT) { - Ok(ReferentialAction::Restrict) - } else if self.parse_keyword(Keyword::CASCADE) { - Ok(ReferentialAction::Cascade) - } else if self.parse_keywords(&[Keyword::SET, Keyword::NULL]) { - Ok(ReferentialAction::SetNull) - } else if self.parse_keywords(&[Keyword::NO, Keyword::ACTION]) { - Ok(ReferentialAction::NoAction) - } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { - Ok(ReferentialAction::SetDefault) - } else { - self.expected( - "one of RESTRICT, CASCADE, SET NULL, NO ACTION or SET DEFAULT", - self.peek_token(), - ) - } - } - - pub fn parse_constraint_characteristics( - &mut self, - ) -> Result, ParserError> { - let mut cc = ConstraintCharacteristics::default(); - - loop { - if cc.deferrable.is_none() && self.parse_keywords(&[Keyword::NOT, Keyword::DEFERRABLE]) - { - cc.deferrable = Some(false); - } else if cc.deferrable.is_none() && self.parse_keyword(Keyword::DEFERRABLE) { - cc.deferrable = Some(true); - } else if cc.initially.is_none() && self.parse_keyword(Keyword::INITIALLY) { - if self.parse_keyword(Keyword::DEFERRED) { - cc.initially = Some(DeferrableInitial::Deferred); - } else if self.parse_keyword(Keyword::IMMEDIATE) { - cc.initially = Some(DeferrableInitial::Immediate); - } else { - self.expected("one of DEFERRED or IMMEDIATE", self.peek_token())?; - } - } else if cc.enforced.is_none() && self.parse_keyword(Keyword::ENFORCED) { - cc.enforced = Some(true); - } else if cc.enforced.is_none() - && self.parse_keywords(&[Keyword::NOT, Keyword::ENFORCED]) - { - cc.enforced = Some(false); - } else { - break; - } - } - - if cc.deferrable.is_some() || cc.initially.is_some() || cc.enforced.is_some() { - Ok(Some(cc)) - } else { - Ok(None) - } - } - - pub fn parse_optional_table_constraint( - &mut self, - ) -> Result, ParserError> { - let name = if self.parse_keyword(Keyword::CONSTRAINT) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) if w.keyword == Keyword::UNIQUE => { - let index_type_display = self.parse_index_type_display(); - if !dialect_of!(self is GenericDialect | MySqlDialect) - && !index_type_display.is_none() - { - return self - .expected("`index_name` or `(column_name [, ...])`", self.peek_token()); - } - - let nulls_distinct = self.parse_optional_nulls_distinct()?; - - // optional index name - let index_name = self.parse_optional_indent()?; - let index_type = self.parse_optional_using_then_index_type()?; - - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - let index_options = self.parse_index_options()?; - let characteristics = self.parse_constraint_characteristics()?; - Ok(Some(TableConstraint::Unique { - name, - index_name, - index_type_display, - index_type, - columns, - index_options, - characteristics, - nulls_distinct, - })) - } - Token::Word(w) if w.keyword == Keyword::PRIMARY => { - // after `PRIMARY` always stay `KEY` - self.expect_keyword(Keyword::KEY)?; - - // optional index name - let index_name = self.parse_optional_indent()?; - let index_type = self.parse_optional_using_then_index_type()?; - - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - let index_options = self.parse_index_options()?; - let characteristics = self.parse_constraint_characteristics()?; - Ok(Some(TableConstraint::PrimaryKey { - name, - index_name, - index_type, - columns, - index_options, - characteristics, - })) - } - Token::Word(w) if w.keyword == Keyword::FOREIGN => { - self.expect_keyword(Keyword::KEY)?; - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - self.expect_keyword(Keyword::REFERENCES)?; - let foreign_table = self.parse_object_name(false)?; - let referred_columns = self.parse_parenthesized_column_list(Mandatory, false)?; - let mut on_delete = None; - let mut on_update = None; - loop { - if on_delete.is_none() && self.parse_keywords(&[Keyword::ON, Keyword::DELETE]) { - on_delete = Some(self.parse_referential_action()?); - } else if on_update.is_none() - && self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) - { - on_update = Some(self.parse_referential_action()?); - } else { - break; - } - } - - let characteristics = self.parse_constraint_characteristics()?; - - Ok(Some(TableConstraint::ForeignKey { - name, - columns, - foreign_table, - referred_columns, - on_delete, - on_update, - characteristics, - })) - } - Token::Word(w) if w.keyword == Keyword::CHECK => { - self.expect_token(&Token::LParen)?; - let expr = Box::new(self.parse_expr()?); - self.expect_token(&Token::RParen)?; - Ok(Some(TableConstraint::Check { name, expr })) - } - Token::Word(w) - if (w.keyword == Keyword::INDEX || w.keyword == Keyword::KEY) - && dialect_of!(self is GenericDialect | MySqlDialect) - && name.is_none() => - { - let display_as_key = w.keyword == Keyword::KEY; - - let name = match self.peek_token().token { - Token::Word(word) if word.keyword == Keyword::USING => None, - _ => self.parse_optional_indent()?, - }; - - let index_type = self.parse_optional_using_then_index_type()?; - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - - Ok(Some(TableConstraint::Index { - display_as_key, - name, - index_type, - columns, - })) - } - Token::Word(w) - if (w.keyword == Keyword::FULLTEXT || w.keyword == Keyword::SPATIAL) - && dialect_of!(self is GenericDialect | MySqlDialect) => - { - if let Some(name) = name { - return self.expected( - "FULLTEXT or SPATIAL option without constraint name", - TokenWithSpan { - token: Token::make_keyword(&name.to_string()), - span: next_token.span, - }, - ); - } - - let fulltext = w.keyword == Keyword::FULLTEXT; - - let index_type_display = self.parse_index_type_display(); - - let opt_index_name = self.parse_optional_indent()?; - - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - - Ok(Some(TableConstraint::FulltextOrSpatial { - fulltext, - index_type_display, - opt_index_name, - columns, - })) - } - _ => { - if name.is_some() { - self.expected("PRIMARY, UNIQUE, FOREIGN, or CHECK", next_token) - } else { - self.prev_token(); - Ok(None) - } - } - } - } - - fn parse_optional_nulls_distinct(&mut self) -> Result { - Ok(if self.parse_keyword(Keyword::NULLS) { - let not = self.parse_keyword(Keyword::NOT); - self.expect_keyword(Keyword::DISTINCT)?; - if not { - NullsDistinctOption::NotDistinct - } else { - NullsDistinctOption::Distinct - } - } else { - NullsDistinctOption::None - }) - } - - pub fn maybe_parse_options( - &mut self, - keyword: Keyword, - ) -> Result>, ParserError> { - if let Token::Word(word) = self.peek_token().token { - if word.keyword == keyword { - return Ok(Some(self.parse_options(keyword)?)); - } - }; - Ok(None) - } - - pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { - if self.parse_keyword(keyword) { - self.expect_token(&Token::LParen)?; - let options = self.parse_comma_separated(Parser::parse_sql_option)?; - self.expect_token(&Token::RParen)?; - Ok(options) - } else { - Ok(vec![]) - } - } - - pub fn parse_options_with_keywords( - &mut self, - keywords: &[Keyword], - ) -> Result, ParserError> { - if self.parse_keywords(keywords) { - self.expect_token(&Token::LParen)?; - let options = self.parse_comma_separated(Parser::parse_sql_option)?; - self.expect_token(&Token::RParen)?; - Ok(options) - } else { - Ok(vec![]) - } - } - - pub fn parse_index_type(&mut self) -> Result { - if self.parse_keyword(Keyword::BTREE) { - Ok(IndexType::BTree) - } else if self.parse_keyword(Keyword::HASH) { - Ok(IndexType::Hash) - } else { - self.expected("index type {BTREE | HASH}", self.peek_token()) - } - } - - /// Parse [USING {BTREE | HASH}] - pub fn parse_optional_using_then_index_type( - &mut self, - ) -> Result, ParserError> { - if self.parse_keyword(Keyword::USING) { - Ok(Some(self.parse_index_type()?)) - } else { - Ok(None) - } - } - - /// Parse `[ident]`, mostly `ident` is name, like: - /// `window_name`, `index_name`, ... - pub fn parse_optional_indent(&mut self) -> Result, ParserError> { - self.maybe_parse(|parser| parser.parse_identifier(false)) - } - - #[must_use] - pub fn parse_index_type_display(&mut self) -> KeyOrIndexDisplay { - if self.parse_keyword(Keyword::KEY) { - KeyOrIndexDisplay::Key - } else if self.parse_keyword(Keyword::INDEX) { - KeyOrIndexDisplay::Index - } else { - KeyOrIndexDisplay::None - } - } - - pub fn parse_optional_index_option(&mut self) -> Result, ParserError> { - if let Some(index_type) = self.parse_optional_using_then_index_type()? { - Ok(Some(IndexOption::Using(index_type))) - } else if self.parse_keyword(Keyword::COMMENT) { - let s = self.parse_literal_string()?; - Ok(Some(IndexOption::Comment(s))) - } else { - Ok(None) - } - } - - pub fn parse_index_options(&mut self) -> Result, ParserError> { - let mut options = Vec::new(); - - loop { - match self.parse_optional_index_option()? { - Some(index_option) => options.push(index_option), - None => return Ok(options), - } - } - } - - pub fn parse_sql_option(&mut self) -> Result { - let is_mssql = dialect_of!(self is MsSqlDialect|GenericDialect); - - match self.peek_token().token { - Token::Word(w) if w.keyword == Keyword::HEAP && is_mssql => { - Ok(SqlOption::Ident(self.parse_identifier(false)?)) - } - Token::Word(w) if w.keyword == Keyword::PARTITION && is_mssql => { - self.parse_option_partition() - } - Token::Word(w) if w.keyword == Keyword::CLUSTERED && is_mssql => { - self.parse_option_clustered() - } - _ => { - let name = self.parse_identifier(false)?; - self.expect_token(&Token::Eq)?; - let value = self.parse_expr()?; - - Ok(SqlOption::KeyValue { key: name, value }) - } - } - } - - pub fn parse_option_clustered(&mut self) -> Result { - if self.parse_keywords(&[ - Keyword::CLUSTERED, - Keyword::COLUMNSTORE, - Keyword::INDEX, - Keyword::ORDER, - ]) { - Ok(SqlOption::Clustered( - TableOptionsClustered::ColumnstoreIndexOrder( - self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?, - ), - )) - } else if self.parse_keywords(&[Keyword::CLUSTERED, Keyword::COLUMNSTORE, Keyword::INDEX]) { - Ok(SqlOption::Clustered( - TableOptionsClustered::ColumnstoreIndex, - )) - } else if self.parse_keywords(&[Keyword::CLUSTERED, Keyword::INDEX]) { - self.expect_token(&Token::LParen)?; - - let columns = self.parse_comma_separated(|p| { - let name = p.parse_identifier(false)?; - let asc = p.parse_asc_desc(); - - Ok(ClusteredIndex { name, asc }) - })?; - - self.expect_token(&Token::RParen)?; - - Ok(SqlOption::Clustered(TableOptionsClustered::Index(columns))) - } else { - Err(ParserError::ParserError( - "invalid CLUSTERED sequence".to_string(), - )) - } - } - - pub fn parse_option_partition(&mut self) -> Result { - self.expect_keyword(Keyword::PARTITION)?; - self.expect_token(&Token::LParen)?; - let column_name = self.parse_identifier(false)?; - - self.expect_keyword(Keyword::RANGE)?; - let range_direction = if self.parse_keyword(Keyword::LEFT) { - Some(PartitionRangeDirection::Left) - } else if self.parse_keyword(Keyword::RIGHT) { - Some(PartitionRangeDirection::Right) - } else { - None - }; - - self.expect_keywords(&[Keyword::FOR, Keyword::VALUES])?; - self.expect_token(&Token::LParen)?; - - let for_values = self.parse_comma_separated(Parser::parse_expr)?; - - self.expect_token(&Token::RParen)?; - self.expect_token(&Token::RParen)?; - - Ok(SqlOption::Partition { - column_name, - range_direction, - for_values, - }) - } - - pub fn parse_partition(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - Ok(Partition::Partitions(partitions)) - } - - pub fn parse_projection_select(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - self.expect_keyword(Keyword::SELECT)?; - let projection = self.parse_projection()?; - let group_by = self.parse_optional_group_by()?; - let order_by = self.parse_optional_order_by()?; - self.expect_token(&Token::RParen)?; - Ok(ProjectionSelect { - projection, - group_by, - order_by, - }) - } - pub fn parse_alter_table_add_projection(&mut self) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - let query = self.parse_projection_select()?; - Ok(AlterTableOperation::AddProjection { - if_not_exists, - name, - select: query, - }) - } - - pub fn parse_alter_table_operation(&mut self) -> Result { - let operation = if self.parse_keyword(Keyword::ADD) { - if let Some(constraint) = self.parse_optional_table_constraint()? { - AlterTableOperation::AddConstraint(constraint) - } else if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::PROJECTION) - { - return self.parse_alter_table_add_projection(); - } else { - let if_not_exists = - self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let mut new_partitions = vec![]; - loop { - if self.parse_keyword(Keyword::PARTITION) { - new_partitions.push(self.parse_partition()?); - } else { - break; - } - } - if !new_partitions.is_empty() { - AlterTableOperation::AddPartitions { - if_not_exists, - new_partitions, - } - } else { - let column_keyword = self.parse_keyword(Keyword::COLUMN); - - let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | DuckDbDialect | GenericDialect) - { - self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]) - || if_not_exists - } else { - false - }; - - let column_def = self.parse_column_def()?; - - let column_position = self.parse_column_position()?; - - AlterTableOperation::AddColumn { - column_keyword, - if_not_exists, - column_def, - column_position, - } - } - } - } else if self.parse_keyword(Keyword::RENAME) { - if dialect_of!(self is PostgreSqlDialect) && self.parse_keyword(Keyword::CONSTRAINT) { - let old_name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::TO)?; - let new_name = self.parse_identifier(false)?; - AlterTableOperation::RenameConstraint { old_name, new_name } - } else if self.parse_keyword(Keyword::TO) { - let table_name = self.parse_object_name(false)?; - AlterTableOperation::RenameTable { table_name } - } else { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let old_column_name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::TO)?; - let new_column_name = self.parse_identifier(false)?; - AlterTableOperation::RenameColumn { - old_column_name, - new_column_name, - } - } - } else if self.parse_keyword(Keyword::DISABLE) { - if self.parse_keywords(&[Keyword::ROW, Keyword::LEVEL, Keyword::SECURITY]) { - AlterTableOperation::DisableRowLevelSecurity {} - } else if self.parse_keyword(Keyword::RULE) { - let name = self.parse_identifier(false)?; - AlterTableOperation::DisableRule { name } - } else if self.parse_keyword(Keyword::TRIGGER) { - let name = self.parse_identifier(false)?; - AlterTableOperation::DisableTrigger { name } - } else { - return self.expected( - "ROW LEVEL SECURITY, RULE, or TRIGGER after DISABLE", - self.peek_token(), - ); - } - } else if self.parse_keyword(Keyword::ENABLE) { - if self.parse_keywords(&[Keyword::ALWAYS, Keyword::RULE]) { - let name = self.parse_identifier(false)?; - AlterTableOperation::EnableAlwaysRule { name } - } else if self.parse_keywords(&[Keyword::ALWAYS, Keyword::TRIGGER]) { - let name = self.parse_identifier(false)?; - AlterTableOperation::EnableAlwaysTrigger { name } - } else if self.parse_keywords(&[Keyword::ROW, Keyword::LEVEL, Keyword::SECURITY]) { - AlterTableOperation::EnableRowLevelSecurity {} - } else if self.parse_keywords(&[Keyword::REPLICA, Keyword::RULE]) { - let name = self.parse_identifier(false)?; - AlterTableOperation::EnableReplicaRule { name } - } else if self.parse_keywords(&[Keyword::REPLICA, Keyword::TRIGGER]) { - let name = self.parse_identifier(false)?; - AlterTableOperation::EnableReplicaTrigger { name } - } else if self.parse_keyword(Keyword::RULE) { - let name = self.parse_identifier(false)?; - AlterTableOperation::EnableRule { name } - } else if self.parse_keyword(Keyword::TRIGGER) { - let name = self.parse_identifier(false)?; - AlterTableOperation::EnableTrigger { name } - } else { - return self.expected( - "ALWAYS, REPLICA, ROW LEVEL SECURITY, RULE, or TRIGGER after ENABLE", - self.peek_token(), - ); - } - } else if self.parse_keywords(&[Keyword::CLEAR, Keyword::PROJECTION]) - && dialect_of!(self is ClickHouseDialect|GenericDialect) - { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - let partition = if self.parse_keywords(&[Keyword::IN, Keyword::PARTITION]) { - Some(self.parse_identifier(false)?) - } else { - None - }; - AlterTableOperation::ClearProjection { - if_exists, - name, - partition, - } - } else if self.parse_keywords(&[Keyword::MATERIALIZE, Keyword::PROJECTION]) - && dialect_of!(self is ClickHouseDialect|GenericDialect) - { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - let partition = if self.parse_keywords(&[Keyword::IN, Keyword::PARTITION]) { - Some(self.parse_identifier(false)?) - } else { - None - }; - AlterTableOperation::MaterializeProjection { - if_exists, - name, - partition, - } - } else if self.parse_keyword(Keyword::DROP) { - if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::DropPartitions { - partitions, - if_exists: true, - } - } else if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::DropPartitions { - partitions, - if_exists: false, - } - } else if self.parse_keyword(Keyword::CONSTRAINT) { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - let cascade = self.parse_keyword(Keyword::CASCADE); - AlterTableOperation::DropConstraint { - if_exists, - name, - cascade, - } - } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - AlterTableOperation::DropPrimaryKey - } else if self.parse_keyword(Keyword::PROJECTION) - && dialect_of!(self is ClickHouseDialect|GenericDialect) - { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - AlterTableOperation::DropProjection { if_exists, name } - } else if self.parse_keywords(&[Keyword::CLUSTERING, Keyword::KEY]) { - AlterTableOperation::DropClusteringKey - } else { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let column_name = self.parse_identifier(false)?; - let cascade = self.parse_keyword(Keyword::CASCADE); - AlterTableOperation::DropColumn { - column_name, - if_exists, - cascade, - } - } - } else if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let before = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - self.expect_keyword(Keyword::RENAME)?; - self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; - self.expect_token(&Token::LParen)?; - let renames = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::RenamePartitions { - old_partitions: before, - new_partitions: renames, - } - } else if self.parse_keyword(Keyword::CHANGE) { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let old_name = self.parse_identifier(false)?; - let new_name = self.parse_identifier(false)?; - let data_type = self.parse_data_type()?; - let mut options = vec![]; - while let Some(option) = self.parse_optional_column_option()? { - options.push(option); - } - - let column_position = self.parse_column_position()?; - - AlterTableOperation::ChangeColumn { - old_name, - new_name, - data_type, - options, - column_position, - } - } else if self.parse_keyword(Keyword::MODIFY) { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let col_name = self.parse_identifier(false)?; - let data_type = self.parse_data_type()?; - let mut options = vec![]; - while let Some(option) = self.parse_optional_column_option()? { - options.push(option); - } - - let column_position = self.parse_column_position()?; - - AlterTableOperation::ModifyColumn { - col_name, - data_type, - options, - column_position, - } - } else if self.parse_keyword(Keyword::ALTER) { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let column_name = self.parse_identifier(false)?; - let is_postgresql = dialect_of!(self is PostgreSqlDialect); - - let op: AlterColumnOperation = if self.parse_keywords(&[ - Keyword::SET, - Keyword::NOT, - Keyword::NULL, - ]) { - AlterColumnOperation::SetNotNull {} - } else if self.parse_keywords(&[Keyword::DROP, Keyword::NOT, Keyword::NULL]) { - AlterColumnOperation::DropNotNull {} - } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { - AlterColumnOperation::SetDefault { - value: self.parse_expr()?, - } - } else if self.parse_keywords(&[Keyword::DROP, Keyword::DEFAULT]) { - AlterColumnOperation::DropDefault {} - } else if self.parse_keywords(&[Keyword::SET, Keyword::DATA, Keyword::TYPE]) - || (is_postgresql && self.parse_keyword(Keyword::TYPE)) - { - let data_type = self.parse_data_type()?; - let using = if is_postgresql && self.parse_keyword(Keyword::USING) { - Some(self.parse_expr()?) - } else { - None - }; - AlterColumnOperation::SetDataType { data_type, using } - } else if self.parse_keywords(&[Keyword::ADD, Keyword::GENERATED]) { - let generated_as = if self.parse_keyword(Keyword::ALWAYS) { - Some(GeneratedAs::Always) - } else if self.parse_keywords(&[Keyword::BY, Keyword::DEFAULT]) { - Some(GeneratedAs::ByDefault) - } else { - None - }; - - self.expect_keywords(&[Keyword::AS, Keyword::IDENTITY])?; - - let mut sequence_options: Option> = None; - - if self.peek_token().token == Token::LParen { - self.expect_token(&Token::LParen)?; - sequence_options = Some(self.parse_create_sequence_options()?); - self.expect_token(&Token::RParen)?; - } - - AlterColumnOperation::AddGenerated { - generated_as, - sequence_options, - } - } else { - let message = if is_postgresql { - "SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE, or ADD GENERATED after ALTER COLUMN" - } else { - "SET/DROP NOT NULL, SET DEFAULT, or SET DATA TYPE after ALTER COLUMN" - }; - - return self.expected(message, self.peek_token()); - }; - AlterTableOperation::AlterColumn { column_name, op } - } else if self.parse_keyword(Keyword::SWAP) { - self.expect_keyword(Keyword::WITH)?; - let table_name = self.parse_object_name(false)?; - AlterTableOperation::SwapWith { table_name } - } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) - && self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) - { - let new_owner = self.parse_owner()?; - AlterTableOperation::OwnerTo { new_owner } - } else if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::ATTACH) - { - AlterTableOperation::AttachPartition { - partition: self.parse_part_or_partition()?, - } - } else if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::DETACH) - { - AlterTableOperation::DetachPartition { - partition: self.parse_part_or_partition()?, - } - } else if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::FREEZE) - { - let partition = self.parse_part_or_partition()?; - let with_name = if self.parse_keyword(Keyword::WITH) { - self.expect_keyword(Keyword::NAME)?; - Some(self.parse_identifier(false)?) - } else { - None - }; - AlterTableOperation::FreezePartition { - partition, - with_name, - } - } else if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::UNFREEZE) - { - let partition = self.parse_part_or_partition()?; - let with_name = if self.parse_keyword(Keyword::WITH) { - self.expect_keyword(Keyword::NAME)?; - Some(self.parse_identifier(false)?) - } else { - None - }; - AlterTableOperation::UnfreezePartition { - partition, - with_name, - } - } else if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { - self.expect_token(&Token::LParen)?; - let exprs = self.parse_comma_separated(|parser| parser.parse_expr())?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::ClusterBy { exprs } - } else if self.parse_keywords(&[Keyword::SUSPEND, Keyword::RECLUSTER]) { - AlterTableOperation::SuspendRecluster - } else if self.parse_keywords(&[Keyword::RESUME, Keyword::RECLUSTER]) { - AlterTableOperation::ResumeRecluster - } else { - let options: Vec = - self.parse_options_with_keywords(&[Keyword::SET, Keyword::TBLPROPERTIES])?; - if !options.is_empty() { - AlterTableOperation::SetTblProperties { - table_properties: options, - } - } else { - return self.expected( - "ADD, RENAME, PARTITION, SWAP, DROP, or SET TBLPROPERTIES after ALTER TABLE", - self.peek_token(), - ); - } - }; - Ok(operation) - } - - fn parse_part_or_partition(&mut self) -> Result { - let keyword = self.expect_one_of_keywords(&[Keyword::PART, Keyword::PARTITION])?; - match keyword { - Keyword::PART => Ok(Partition::Part(self.parse_expr()?)), - Keyword::PARTITION => Ok(Partition::Expr(self.parse_expr()?)), - // unreachable because expect_one_of_keywords used above - _ => unreachable!(), - } - } - - pub fn parse_alter(&mut self) -> Result { - let object_type = self.expect_one_of_keywords(&[ - Keyword::VIEW, - Keyword::TABLE, - Keyword::INDEX, - Keyword::ROLE, - Keyword::POLICY, - ])?; - match object_type { - Keyword::VIEW => self.parse_alter_view(), - Keyword::TABLE => { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let only = self.parse_keyword(Keyword::ONLY); // [ ONLY ] - let table_name = self.parse_object_name(false)?; - let on_cluster = self.parse_optional_on_cluster()?; - let operations = self.parse_comma_separated(Parser::parse_alter_table_operation)?; - - let mut location = None; - if self.parse_keyword(Keyword::LOCATION) { - location = Some(HiveSetLocation { - has_set: false, - location: self.parse_identifier(false)?, - }); - } else if self.parse_keywords(&[Keyword::SET, Keyword::LOCATION]) { - location = Some(HiveSetLocation { - has_set: true, - location: self.parse_identifier(false)?, - }); - } - - Ok(Statement::AlterTable { - name: table_name, - if_exists, - only, - operations, - location, - on_cluster, - }) - } - Keyword::INDEX => { - let index_name = self.parse_object_name(false)?; - let operation = if self.parse_keyword(Keyword::RENAME) { - if self.parse_keyword(Keyword::TO) { - let index_name = self.parse_object_name(false)?; - AlterIndexOperation::RenameIndex { index_name } - } else { - return self.expected("TO after RENAME", self.peek_token()); - } - } else { - return self.expected("RENAME after ALTER INDEX", self.peek_token()); - }; - - Ok(Statement::AlterIndex { - name: index_name, - operation, - }) - } - Keyword::ROLE => self.parse_alter_role(), - Keyword::POLICY => self.parse_alter_policy(), - // unreachable because expect_one_of_keywords used above - _ => unreachable!(), - } - } - - pub fn parse_alter_view(&mut self) -> Result { - let name = self.parse_object_name(false)?; - let columns = self.parse_parenthesized_column_list(Optional, false)?; - - let with_options = self.parse_options(Keyword::WITH)?; - - self.expect_keyword(Keyword::AS)?; - let query = self.parse_query()?; - - Ok(Statement::AlterView { - name, - columns, - query, - with_options, - }) - } - - /// Parse a `CALL procedure_name(arg1, arg2, ...)` - /// or `CALL procedure_name` statement - pub fn parse_call(&mut self) -> Result { - let object_name = self.parse_object_name(false)?; - if self.peek_token().token == Token::LParen { - match self.parse_function(object_name)? { - Expr::Function(f) => Ok(Statement::Call(f)), - other => parser_err!( - format!("Expected a simple procedure call but found: {other}"), - self.peek_token().span.start - ), - } - } else { - Ok(Statement::Call(Function { - name: object_name, - parameters: FunctionArguments::None, - args: FunctionArguments::None, - over: None, - filter: None, - null_treatment: None, - within_group: vec![], - })) - } - } - - /// Parse a copy statement - pub fn parse_copy(&mut self) -> Result { - let source; - if self.consume_token(&Token::LParen) { - source = CopySource::Query(self.parse_query()?); - self.expect_token(&Token::RParen)?; - } else { - let table_name = self.parse_object_name(false)?; - let columns = self.parse_parenthesized_column_list(Optional, false)?; - source = CopySource::Table { - table_name, - columns, - }; - } - let to = match self.parse_one_of_keywords(&[Keyword::FROM, Keyword::TO]) { - Some(Keyword::FROM) => false, - Some(Keyword::TO) => true, - _ => self.expected("FROM or TO", self.peek_token())?, - }; - if !to { - // Use a separate if statement to prevent Rust compiler from complaining about - // "if statement in this position is unstable: https://github.com/rust-lang/rust/issues/53667" - if let CopySource::Query(_) = source { - return Err(ParserError::ParserError( - "COPY ... FROM does not support query as a source".to_string(), - )); - } - } - let target = if self.parse_keyword(Keyword::STDIN) { - CopyTarget::Stdin - } else if self.parse_keyword(Keyword::STDOUT) { - CopyTarget::Stdout - } else if self.parse_keyword(Keyword::PROGRAM) { - CopyTarget::Program { - command: self.parse_literal_string()?, - } - } else { - CopyTarget::File { - filename: self.parse_literal_string()?, - } - }; - let _ = self.parse_keyword(Keyword::WITH); // [ WITH ] - let mut options = vec![]; - if self.consume_token(&Token::LParen) { - options = self.parse_comma_separated(Parser::parse_copy_option)?; - self.expect_token(&Token::RParen)?; - } - let mut legacy_options = vec![]; - while let Some(opt) = self.maybe_parse(|parser| parser.parse_copy_legacy_option())? { - legacy_options.push(opt); - } - let values = if let CopyTarget::Stdin = target { - self.expect_token(&Token::SemiColon)?; - self.parse_tsv() - } else { - vec![] - }; - Ok(Statement::Copy { - source, - to, - target, - options, - legacy_options, - values, - }) - } - - pub fn parse_close(&mut self) -> Result { - let cursor = if self.parse_keyword(Keyword::ALL) { - CloseCursor::All - } else { - let name = self.parse_identifier(false)?; - - CloseCursor::Specific { name } - }; - - Ok(Statement::Close { cursor }) - } - - fn parse_copy_option(&mut self) -> Result { - let ret = match self.parse_one_of_keywords(&[ - Keyword::FORMAT, - Keyword::FREEZE, - Keyword::DELIMITER, - Keyword::NULL, - Keyword::HEADER, - Keyword::QUOTE, - Keyword::ESCAPE, - Keyword::FORCE_QUOTE, - Keyword::FORCE_NOT_NULL, - Keyword::FORCE_NULL, - Keyword::ENCODING, - ]) { - Some(Keyword::FORMAT) => CopyOption::Format(self.parse_identifier(false)?), - Some(Keyword::FREEZE) => CopyOption::Freeze(!matches!( - self.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]), - Some(Keyword::FALSE) - )), - Some(Keyword::DELIMITER) => CopyOption::Delimiter(self.parse_literal_char()?), - Some(Keyword::NULL) => CopyOption::Null(self.parse_literal_string()?), - Some(Keyword::HEADER) => CopyOption::Header(!matches!( - self.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]), - Some(Keyword::FALSE) - )), - Some(Keyword::QUOTE) => CopyOption::Quote(self.parse_literal_char()?), - Some(Keyword::ESCAPE) => CopyOption::Escape(self.parse_literal_char()?), - Some(Keyword::FORCE_QUOTE) => { - CopyOption::ForceQuote(self.parse_parenthesized_column_list(Mandatory, false)?) - } - Some(Keyword::FORCE_NOT_NULL) => { - CopyOption::ForceNotNull(self.parse_parenthesized_column_list(Mandatory, false)?) - } - Some(Keyword::FORCE_NULL) => { - CopyOption::ForceNull(self.parse_parenthesized_column_list(Mandatory, false)?) - } - Some(Keyword::ENCODING) => CopyOption::Encoding(self.parse_literal_string()?), - _ => self.expected("option", self.peek_token())?, - }; - Ok(ret) - } - - fn parse_copy_legacy_option(&mut self) -> Result { - let ret = match self.parse_one_of_keywords(&[ - Keyword::BINARY, - Keyword::DELIMITER, - Keyword::NULL, - Keyword::CSV, - ]) { - Some(Keyword::BINARY) => CopyLegacyOption::Binary, - Some(Keyword::DELIMITER) => { - let _ = self.parse_keyword(Keyword::AS); // [ AS ] - CopyLegacyOption::Delimiter(self.parse_literal_char()?) - } - Some(Keyword::NULL) => { - let _ = self.parse_keyword(Keyword::AS); // [ AS ] - CopyLegacyOption::Null(self.parse_literal_string()?) - } - Some(Keyword::CSV) => CopyLegacyOption::Csv({ - let mut opts = vec![]; - while let Some(opt) = - self.maybe_parse(|parser| parser.parse_copy_legacy_csv_option())? - { - opts.push(opt); - } - opts - }), - _ => self.expected("option", self.peek_token())?, - }; - Ok(ret) - } - - fn parse_copy_legacy_csv_option(&mut self) -> Result { - let ret = match self.parse_one_of_keywords(&[ - Keyword::HEADER, - Keyword::QUOTE, - Keyword::ESCAPE, - Keyword::FORCE, - ]) { - Some(Keyword::HEADER) => CopyLegacyCsvOption::Header, - Some(Keyword::QUOTE) => { - let _ = self.parse_keyword(Keyword::AS); // [ AS ] - CopyLegacyCsvOption::Quote(self.parse_literal_char()?) - } - Some(Keyword::ESCAPE) => { - let _ = self.parse_keyword(Keyword::AS); // [ AS ] - CopyLegacyCsvOption::Escape(self.parse_literal_char()?) - } - Some(Keyword::FORCE) if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) => { - CopyLegacyCsvOption::ForceNotNull( - self.parse_comma_separated(|p| p.parse_identifier(false))?, - ) - } - Some(Keyword::FORCE) if self.parse_keywords(&[Keyword::QUOTE]) => { - CopyLegacyCsvOption::ForceQuote( - self.parse_comma_separated(|p| p.parse_identifier(false))?, - ) - } - _ => self.expected("csv option", self.peek_token())?, - }; - Ok(ret) - } - - fn parse_literal_char(&mut self) -> Result { - let s = self.parse_literal_string()?; - if s.len() != 1 { - let loc = self - .tokens - .get(self.index - 1) - .map_or(Location { line: 0, column: 0 }, |t| t.span.start); - return parser_err!(format!("Expect a char, found {s:?}"), loc); - } - Ok(s.chars().next().unwrap()) - } - - /// Parse a tab separated values in - /// COPY payload - pub fn parse_tsv(&mut self) -> Vec> { - self.parse_tab_value() - } - - pub fn parse_tab_value(&mut self) -> Vec> { - let mut values = vec![]; - let mut content = String::from(""); - while let Some(t) = self.next_token_no_skip().map(|t| &t.token) { - match t { - Token::Whitespace(Whitespace::Tab) => { - values.push(Some(content.to_string())); - content.clear(); - } - Token::Whitespace(Whitespace::Newline) => { - values.push(Some(content.to_string())); - content.clear(); - } - Token::Backslash => { - if self.consume_token(&Token::Period) { - return values; - } - if let Token::Word(w) = self.next_token().token { - if w.value == "N" { - values.push(None); - } - } - } - _ => { - content.push_str(&t.to_string()); - } - } - } - values - } - - /// Parse a literal value (numbers, strings, date/time, booleans) - pub fn parse_value(&mut self) -> Result { - let next_token = self.next_token(); - let span = next_token.span; - match next_token.token { - Token::Word(w) => match w.keyword { - Keyword::TRUE if self.dialect.supports_boolean_literals() => { - Ok(Value::Boolean(true)) - } - Keyword::FALSE if self.dialect.supports_boolean_literals() => { - Ok(Value::Boolean(false)) - } - Keyword::NULL => Ok(Value::Null), - Keyword::NoKeyword if w.quote_style.is_some() => match w.quote_style { - Some('"') => Ok(Value::DoubleQuotedString(w.value)), - Some('\'') => Ok(Value::SingleQuotedString(w.value)), - _ => self.expected( - "A value?", - TokenWithSpan { - token: Token::Word(w), - span, - }, - )?, - }, - _ => self.expected( - "a concrete value", - TokenWithSpan { - token: Token::Word(w), - span, - }, - ), - }, - // The call to n.parse() returns a bigdecimal when the - // bigdecimal feature is enabled, and is otherwise a no-op - // (i.e., it returns the input string). - Token::Number(n, l) => Ok(Value::Number(Self::parse(n, span.start)?, l)), - Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), - Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), - Token::TripleSingleQuotedString(ref s) => { - Ok(Value::TripleSingleQuotedString(s.to_string())) - } - Token::TripleDoubleQuotedString(ref s) => { - Ok(Value::TripleDoubleQuotedString(s.to_string())) - } - Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone())), - Token::SingleQuotedByteStringLiteral(ref s) => { - Ok(Value::SingleQuotedByteStringLiteral(s.clone())) - } - Token::DoubleQuotedByteStringLiteral(ref s) => { - Ok(Value::DoubleQuotedByteStringLiteral(s.clone())) - } - Token::TripleSingleQuotedByteStringLiteral(ref s) => { - Ok(Value::TripleSingleQuotedByteStringLiteral(s.clone())) - } - Token::TripleDoubleQuotedByteStringLiteral(ref s) => { - Ok(Value::TripleDoubleQuotedByteStringLiteral(s.clone())) - } - Token::SingleQuotedRawStringLiteral(ref s) => { - Ok(Value::SingleQuotedRawStringLiteral(s.clone())) - } - Token::DoubleQuotedRawStringLiteral(ref s) => { - Ok(Value::DoubleQuotedRawStringLiteral(s.clone())) - } - Token::TripleSingleQuotedRawStringLiteral(ref s) => { - Ok(Value::TripleSingleQuotedRawStringLiteral(s.clone())) - } - Token::TripleDoubleQuotedRawStringLiteral(ref s) => { - Ok(Value::TripleDoubleQuotedRawStringLiteral(s.clone())) - } - Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), - Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())), - Token::UnicodeStringLiteral(ref s) => Ok(Value::UnicodeStringLiteral(s.to_string())), - Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), - Token::Placeholder(ref s) => Ok(Value::Placeholder(s.to_string())), - tok @ Token::Colon | tok @ Token::AtSign => { - // Not calling self.parse_identifier(false)? because only in placeholder we want to check numbers as idfentifies - // This because snowflake allows numbers as placeholders - let next_token = self.next_token(); - let ident = match next_token.token { - Token::Word(w) => Ok(w.to_ident(next_token.span)), - Token::Number(w, false) => Ok(Ident::new(w)), - _ => self.expected("placeholder", next_token), - }?; - let placeholder = tok.to_string() + &ident.value; - Ok(Value::Placeholder(placeholder)) - } - unexpected => self.expected( - "a value", - TokenWithSpan { - token: unexpected, - span, - }, - ), - } - } - - /// Parse an unsigned numeric literal - pub fn parse_number_value(&mut self) -> Result { - match self.parse_value()? { - v @ Value::Number(_, _) => Ok(v), - v @ Value::Placeholder(_) => Ok(v), - _ => { - self.prev_token(); - self.expected("literal number", self.peek_token()) - } - } - } - - /// Parse a numeric literal as an expression. Returns a [`Expr::UnaryOp`] if the number is signed, - /// otherwise returns a [`Expr::Value`] - pub fn parse_number(&mut self) -> Result { - let next_token = self.next_token(); - match next_token.token { - Token::Plus => Ok(Expr::UnaryOp { - op: UnaryOperator::Plus, - expr: Box::new(Expr::Value(self.parse_number_value()?)), - }), - Token::Minus => Ok(Expr::UnaryOp { - op: UnaryOperator::Minus, - expr: Box::new(Expr::Value(self.parse_number_value()?)), - }), - _ => { - self.prev_token(); - Ok(Expr::Value(self.parse_number_value()?)) - } - } - } - - fn parse_introduced_string_value(&mut self) -> Result { - let next_token = self.next_token(); - let span = next_token.span; - match next_token.token { - Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), - Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), - Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), - unexpected => self.expected( - "a string value", - TokenWithSpan { - token: unexpected, - span, - }, - ), - } - } - - /// Parse an unsigned literal integer/long - pub fn parse_literal_uint(&mut self) -> Result { - let next_token = self.next_token(); - match next_token.token { - Token::Number(s, _) => Self::parse::(s, next_token.span.start), - _ => self.expected("literal int", next_token), - } - } - - /// Parse the body of a `CREATE FUNCTION` specified as a string. - /// e.g. `CREATE FUNCTION ... AS $$ body $$`. - fn parse_create_function_body_string(&mut self) -> Result { - let peek_token = self.peek_token(); - match peek_token.token { - Token::DollarQuotedString(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => - { - self.next_token(); - Ok(Expr::Value(Value::DollarQuotedString(s))) - } - _ => Ok(Expr::Value(Value::SingleQuotedString( - self.parse_literal_string()?, - ))), - } - } - - /// Parse a literal string - pub fn parse_literal_string(&mut self) -> Result { - let next_token = self.next_token(); - match next_token.token { - Token::Word(Word { - value, - keyword: Keyword::NoKeyword, - .. - }) => Ok(value), - Token::SingleQuotedString(s) => Ok(s), - Token::DoubleQuotedString(s) => Ok(s), - Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { - Ok(s) - } - Token::UnicodeStringLiteral(s) => Ok(s), - _ => self.expected("literal string", next_token), - } - } - - pub fn parse_enum_values(&mut self) -> Result, ParserError> { - self.expect_token(&Token::LParen)?; - let values = self.parse_comma_separated(|parser| { - let name = parser.parse_literal_string()?; - let e = if parser.consume_token(&Token::Eq) { - let value = parser.parse_number()?; - EnumMember::NamedValue(name, value) - } else { - EnumMember::Name(name) - }; - Ok(e) - })?; - self.expect_token(&Token::RParen)?; - - Ok(values) - } - - /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) - pub fn parse_data_type(&mut self) -> Result { - let (ty, trailing_bracket) = self.parse_data_type_helper()?; - if trailing_bracket.0 { - return parser_err!( - format!("unmatched > after parsing data type {ty}"), - self.peek_token() - ); - } - - Ok(ty) - } - - fn parse_data_type_helper( - &mut self, - ) -> Result<(DataType, MatchedTrailingBracket), ParserError> { - let next_token = self.next_token(); - let mut trailing_bracket: MatchedTrailingBracket = false.into(); - let mut data = match next_token.token { - Token::Word(w) => match w.keyword { - Keyword::BOOLEAN => Ok(DataType::Boolean), - Keyword::BOOL => Ok(DataType::Bool), - Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)), - Keyword::REAL => Ok(DataType::Real), - Keyword::FLOAT4 => Ok(DataType::Float4), - Keyword::FLOAT32 => Ok(DataType::Float32), - Keyword::FLOAT64 => Ok(DataType::Float64), - Keyword::FLOAT8 => Ok(DataType::Float8), - Keyword::DOUBLE => { - if self.parse_keyword(Keyword::PRECISION) { - Ok(DataType::DoublePrecision) - } else { - Ok(DataType::Double) - } - } - Keyword::TINYINT => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedTinyInt(optional_precision?)) - } else { - Ok(DataType::TinyInt(optional_precision?)) - } - } - Keyword::INT2 => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt2(optional_precision?)) - } else { - Ok(DataType::Int2(optional_precision?)) - } - } - Keyword::SMALLINT => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedSmallInt(optional_precision?)) - } else { - Ok(DataType::SmallInt(optional_precision?)) - } - } - Keyword::MEDIUMINT => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedMediumInt(optional_precision?)) - } else { - Ok(DataType::MediumInt(optional_precision?)) - } - } - Keyword::INT => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt(optional_precision?)) - } else { - Ok(DataType::Int(optional_precision?)) - } - } - Keyword::INT4 => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt4(optional_precision?)) - } else { - Ok(DataType::Int4(optional_precision?)) - } - } - Keyword::INT8 => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt8(optional_precision?)) - } else { - Ok(DataType::Int8(optional_precision?)) - } - } - Keyword::INT16 => Ok(DataType::Int16), - Keyword::INT32 => Ok(DataType::Int32), - Keyword::INT64 => Ok(DataType::Int64), - Keyword::INT128 => Ok(DataType::Int128), - Keyword::INT256 => Ok(DataType::Int256), - Keyword::INTEGER => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInteger(optional_precision?)) - } else { - Ok(DataType::Integer(optional_precision?)) - } - } - Keyword::BIGINT => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedBigInt(optional_precision?)) - } else { - Ok(DataType::BigInt(optional_precision?)) - } - } - Keyword::UINT8 => Ok(DataType::UInt8), - Keyword::UINT16 => Ok(DataType::UInt16), - Keyword::UINT32 => Ok(DataType::UInt32), - Keyword::UINT64 => Ok(DataType::UInt64), - Keyword::UINT128 => Ok(DataType::UInt128), - Keyword::UINT256 => Ok(DataType::UInt256), - Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)), - Keyword::NVARCHAR => { - Ok(DataType::Nvarchar(self.parse_optional_character_length()?)) - } - Keyword::CHARACTER => { - if self.parse_keyword(Keyword::VARYING) { - Ok(DataType::CharacterVarying( - self.parse_optional_character_length()?, - )) - } else if self.parse_keywords(&[Keyword::LARGE, Keyword::OBJECT]) { - Ok(DataType::CharacterLargeObject( - self.parse_optional_precision()?, - )) - } else { - Ok(DataType::Character(self.parse_optional_character_length()?)) - } - } - Keyword::CHAR => { - if self.parse_keyword(Keyword::VARYING) { - Ok(DataType::CharVarying( - self.parse_optional_character_length()?, - )) - } else if self.parse_keywords(&[Keyword::LARGE, Keyword::OBJECT]) { - Ok(DataType::CharLargeObject(self.parse_optional_precision()?)) - } else { - Ok(DataType::Char(self.parse_optional_character_length()?)) - } - } - Keyword::CLOB => Ok(DataType::Clob(self.parse_optional_precision()?)), - Keyword::BINARY => Ok(DataType::Binary(self.parse_optional_precision()?)), - Keyword::VARBINARY => Ok(DataType::Varbinary(self.parse_optional_precision()?)), - Keyword::BLOB => Ok(DataType::Blob(self.parse_optional_precision()?)), - Keyword::TINYBLOB => Ok(DataType::TinyBlob), - Keyword::MEDIUMBLOB => Ok(DataType::MediumBlob), - Keyword::LONGBLOB => Ok(DataType::LongBlob), - Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)), - Keyword::BIT => { - if self.parse_keyword(Keyword::VARYING) { - Ok(DataType::BitVarying(self.parse_optional_precision()?)) - } else { - Ok(DataType::Bit(self.parse_optional_precision()?)) - } - } - Keyword::UUID => Ok(DataType::Uuid), - Keyword::DATE => Ok(DataType::Date), - Keyword::DATE32 => Ok(DataType::Date32), - Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)), - Keyword::DATETIME64 => { - self.prev_token(); - let (precision, time_zone) = self.parse_datetime_64()?; - Ok(DataType::Datetime64(precision, time_zone)) - } - Keyword::TIMESTAMP => { - let precision = self.parse_optional_precision()?; - let tz = if self.parse_keyword(Keyword::WITH) { - self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; - TimezoneInfo::WithTimeZone - } else if self.parse_keyword(Keyword::WITHOUT) { - self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; - TimezoneInfo::WithoutTimeZone - } else { - TimezoneInfo::None - }; - Ok(DataType::Timestamp(precision, tz)) - } - Keyword::TIMESTAMPTZ => Ok(DataType::Timestamp( - self.parse_optional_precision()?, - TimezoneInfo::Tz, - )), - Keyword::TIME => { - let precision = self.parse_optional_precision()?; - let tz = if self.parse_keyword(Keyword::WITH) { - self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; - TimezoneInfo::WithTimeZone - } else if self.parse_keyword(Keyword::WITHOUT) { - self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; - TimezoneInfo::WithoutTimeZone - } else { - TimezoneInfo::None - }; - Ok(DataType::Time(precision, tz)) - } - Keyword::TIMETZ => Ok(DataType::Time( - self.parse_optional_precision()?, - TimezoneInfo::Tz, - )), - // Interval types can be followed by a complicated interval - // qualifier that we don't currently support. See - // parse_interval for a taste. - Keyword::INTERVAL => Ok(DataType::Interval), - Keyword::JSON => Ok(DataType::JSON), - Keyword::JSONB => Ok(DataType::JSONB), - Keyword::REGCLASS => Ok(DataType::Regclass), - Keyword::STRING => Ok(DataType::String(self.parse_optional_precision()?)), - Keyword::FIXEDSTRING => { - self.expect_token(&Token::LParen)?; - let character_length = self.parse_literal_uint()?; - self.expect_token(&Token::RParen)?; - Ok(DataType::FixedString(character_length)) - } - Keyword::TEXT => Ok(DataType::Text), - Keyword::TINYTEXT => Ok(DataType::TinyText), - Keyword::MEDIUMTEXT => Ok(DataType::MediumText), - Keyword::LONGTEXT => Ok(DataType::LongText), - Keyword::BYTEA => Ok(DataType::Bytea), - Keyword::NUMERIC => Ok(DataType::Numeric( - self.parse_exact_number_optional_precision_scale()?, - )), - Keyword::DECIMAL => Ok(DataType::Decimal( - self.parse_exact_number_optional_precision_scale()?, - )), - Keyword::DEC => Ok(DataType::Dec( - self.parse_exact_number_optional_precision_scale()?, - )), - Keyword::BIGNUMERIC => Ok(DataType::BigNumeric( - self.parse_exact_number_optional_precision_scale()?, - )), - Keyword::BIGDECIMAL => Ok(DataType::BigDecimal( - self.parse_exact_number_optional_precision_scale()?, - )), - Keyword::ENUM => Ok(DataType::Enum(self.parse_enum_values()?, None)), - Keyword::ENUM8 => Ok(DataType::Enum(self.parse_enum_values()?, Some(8))), - Keyword::ENUM16 => Ok(DataType::Enum(self.parse_enum_values()?, Some(16))), - Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)), - Keyword::ARRAY => { - if dialect_of!(self is SnowflakeDialect) { - Ok(DataType::Array(ArrayElemTypeDef::None)) - } else if dialect_of!(self is ClickHouseDialect) { - Ok(self.parse_sub_type(|internal_type| { - DataType::Array(ArrayElemTypeDef::Parenthesis(internal_type)) - })?) - } else { - self.expect_token(&Token::Lt)?; - let (inside_type, _trailing_bracket) = self.parse_data_type_helper()?; - trailing_bracket = self.expect_closing_angle_bracket(_trailing_bracket)?; - Ok(DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( - inside_type, - )))) - } - } - Keyword::STRUCT if dialect_of!(self is DuckDbDialect) => { - self.prev_token(); - let field_defs = self.parse_duckdb_struct_type_def()?; - Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses)) - } - Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { - self.prev_token(); - let (field_defs, _trailing_bracket) = - self.parse_struct_type_def(Self::parse_struct_field_def)?; - trailing_bracket = _trailing_bracket; - Ok(DataType::Struct( - field_defs, - StructBracketKind::AngleBrackets, - )) - } - Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => { - self.prev_token(); - let fields = self.parse_union_type_def()?; - Ok(DataType::Union(fields)) - } - Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { - Ok(self.parse_sub_type(DataType::Nullable)?) - } - Keyword::LOWCARDINALITY if dialect_of!(self is ClickHouseDialect | GenericDialect) => { - Ok(self.parse_sub_type(DataType::LowCardinality)?) - } - Keyword::MAP if dialect_of!(self is ClickHouseDialect | GenericDialect) => { - self.prev_token(); - let (key_data_type, value_data_type) = self.parse_click_house_map_def()?; - Ok(DataType::Map( - Box::new(key_data_type), - Box::new(value_data_type), - )) - } - Keyword::NESTED if dialect_of!(self is ClickHouseDialect | GenericDialect) => { - self.expect_token(&Token::LParen)?; - let field_defs = self.parse_comma_separated(Parser::parse_column_def)?; - self.expect_token(&Token::RParen)?; - Ok(DataType::Nested(field_defs)) - } - Keyword::TUPLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { - self.prev_token(); - let field_defs = self.parse_click_house_tuple_def()?; - Ok(DataType::Tuple(field_defs)) - } - Keyword::TRIGGER => Ok(DataType::Trigger), - _ => { - self.prev_token(); - let type_name = self.parse_object_name(false)?; - if let Some(modifiers) = self.parse_optional_type_modifiers()? { - Ok(DataType::Custom(type_name, modifiers)) - } else { - Ok(DataType::Custom(type_name, vec![])) - } - } - }, - _ => self.expected("a data type name", next_token), - }?; - - // Parse array data types. Note: this is postgresql-specific and different from - // Keyword::ARRAY syntax from above - while self.consume_token(&Token::LBracket) { - let size = if dialect_of!(self is GenericDialect | DuckDbDialect | PostgreSqlDialect) { - self.maybe_parse(|p| p.parse_literal_uint())? - } else { - None - }; - self.expect_token(&Token::RBracket)?; - data = DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(data), size)) - } - Ok((data, trailing_bracket)) - } - - pub fn parse_string_values(&mut self) -> Result, ParserError> { - self.expect_token(&Token::LParen)?; - let mut values = Vec::new(); - loop { - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(value) => values.push(value), - _ => self.expected("a string", next_token)?, - } - let next_token = self.next_token(); - match next_token.token { - Token::Comma => (), - Token::RParen => break, - _ => self.expected(", or }", next_token)?, - } - } - Ok(values) - } - - /// Strictly parse `identifier AS identifier` - pub fn parse_identifier_with_alias(&mut self) -> Result { - let ident = self.parse_identifier(false)?; - self.expect_keyword(Keyword::AS)?; - let alias = self.parse_identifier(false)?; - Ok(IdentWithAlias { ident, alias }) - } - - /// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword) - /// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`, - /// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar` - pub fn parse_optional_alias( - &mut self, - reserved_kwds: &[Keyword], - ) -> Result, ParserError> { - let after_as = self.parse_keyword(Keyword::AS); - let next_token = self.next_token(); - match next_token.token { - // Accept any identifier after `AS` (though many dialects have restrictions on - // keywords that may appear here). If there's no `AS`: don't parse keywords, - // which may start a construct allowed in this position, to be parsed as aliases. - // (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword, - // not an alias.) - Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => { - Ok(Some(w.to_ident(next_token.span))) - } - // MSSQL supports single-quoted strings as aliases for columns - // We accept them as table aliases too, although MSSQL does not. - // - // Note, that this conflicts with an obscure rule from the SQL - // standard, which we don't implement: - // https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s - // "[Obscure Rule] SQL allows you to break a long up into two or more smaller s, split by a that includes a newline - // character. When it sees such a , your DBMS will - // ignore the and treat the multiple strings as - // a single ." - Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))), - // Support for MySql dialect double-quoted string, `AS "HOUR"` for example - Token::DoubleQuotedString(s) => Ok(Some(Ident::with_quote('\"', s))), - _ => { - if after_as { - return self.expected("an identifier after AS", next_token); - } - self.prev_token(); - Ok(None) // no alias found - } - } - } - - /// Parse `AS identifier` when the AS is describing a table-valued object, - /// like in `... FROM generate_series(1, 10) AS t (col)`. In this case - /// the alias is allowed to optionally name the columns in the table, in - /// addition to the table itself. - pub fn parse_optional_table_alias( - &mut self, - reserved_kwds: &[Keyword], - ) -> Result, ParserError> { - match self.parse_optional_alias(reserved_kwds)? { - Some(name) => { - let columns = self.parse_table_alias_column_defs()?; - Ok(Some(TableAlias { name, columns })) - } - None => Ok(None), - } - } - - pub fn parse_optional_group_by(&mut self) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { - let expressions = if self.parse_keyword(Keyword::ALL) { - None - } else { - Some(self.parse_comma_separated(Parser::parse_group_by_expr)?) - }; - - let mut modifiers = vec![]; - if dialect_of!(self is ClickHouseDialect | GenericDialect) { - loop { - if !self.parse_keyword(Keyword::WITH) { - break; - } - let keyword = self.expect_one_of_keywords(&[ - Keyword::ROLLUP, - Keyword::CUBE, - Keyword::TOTALS, - ])?; - modifiers.push(match keyword { - Keyword::ROLLUP => GroupByWithModifier::Rollup, - Keyword::CUBE => GroupByWithModifier::Cube, - Keyword::TOTALS => GroupByWithModifier::Totals, - _ => { - return parser_err!( - "BUG: expected to match GroupBy modifier keyword", - self.peek_token().span.start - ) - } - }); - } - } - let group_by = match expressions { - None => GroupByExpr::All(modifiers), - Some(exprs) => GroupByExpr::Expressions(exprs, modifiers), - }; - Ok(Some(group_by)) - } else { - Ok(None) - } - } - - pub fn parse_optional_order_by(&mut self) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - let order_by_exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; - let interpolate = if dialect_of!(self is ClickHouseDialect | GenericDialect) { - self.parse_interpolations()? - } else { - None - }; - - Ok(Some(OrderBy { - exprs: order_by_exprs, - interpolate, - })) - } else { - Ok(None) - } - } - - /// Parse a possibly qualified, possibly quoted identifier, e.g. - /// `foo` or `myschema."table" - /// - /// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN, - /// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers - /// in this context on BigQuery. - pub fn parse_object_name(&mut self, in_table_clause: bool) -> Result { - let mut idents = vec![]; - loop { - if self.dialect.supports_object_name_double_dot_notation() - && idents.len() == 1 - && self.consume_token(&Token::Period) - { - // Empty string here means default schema - idents.push(Ident::new("")); - } - idents.push(self.parse_identifier(in_table_clause)?); - if !self.consume_token(&Token::Period) { - break; - } - } - - // BigQuery accepts any number of quoted identifiers of a table name. - // https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_identifiers - if dialect_of!(self is BigQueryDialect) - && idents.iter().any(|ident| ident.value.contains('.')) - { - idents = idents - .into_iter() - .flat_map(|ident| { - ident - .value - .split('.') - .map(|value| Ident { - value: value.into(), - quote_style: ident.quote_style, - span: ident.span, - }) - .collect::>() - }) - .collect() - } - - Ok(ObjectName(idents)) - } - - /// Parse identifiers - pub fn parse_identifiers(&mut self) -> Result, ParserError> { - let mut idents = vec![]; - loop { - match self.peek_token().token { - Token::Word(w) => { - idents.push(w.to_ident(self.peek_token().span)); - } - Token::EOF | Token::Eq => break, - _ => {} - } - self.next_token(); - } - Ok(idents) - } - - /// Parse identifiers of form ident1[.identN]* - /// - /// Similar in functionality to [parse_identifiers], with difference - /// being this function is much more strict about parsing a valid multipart identifier, not - /// allowing extraneous tokens to be parsed, otherwise it fails. - /// - /// For example: - /// - /// ```rust - /// use sqlparser::ast::Ident; - /// use sqlparser::dialect::GenericDialect; - /// use sqlparser::parser::Parser; - /// - /// let dialect = GenericDialect {}; - /// let expected = vec![Ident::new("one"), Ident::new("two")]; - /// - /// // expected usage - /// let sql = "one.two"; - /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); - /// let actual = parser.parse_multipart_identifier().unwrap(); - /// assert_eq!(&actual, &expected); - /// - /// // parse_identifiers is more loose on what it allows, parsing successfully - /// let sql = "one + two"; - /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); - /// let actual = parser.parse_identifiers().unwrap(); - /// assert_eq!(&actual, &expected); - /// - /// // expected to strictly fail due to + separator - /// let sql = "one + two"; - /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); - /// let actual = parser.parse_multipart_identifier().unwrap_err(); - /// assert_eq!( - /// actual.to_string(), - /// "sql parser error: Unexpected token in identifier: +" - /// ); - /// ``` - /// - /// [parse_identifiers]: Parser::parse_identifiers - pub fn parse_multipart_identifier(&mut self) -> Result, ParserError> { - let mut idents = vec![]; - - // expecting at least one word for identifier - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => idents.push(w.to_ident(next_token.span)), - Token::EOF => { - return Err(ParserError::ParserError( - "Empty input when parsing identifier".to_string(), - ))? - } - token => { - return Err(ParserError::ParserError(format!( - "Unexpected token in identifier: {token}" - )))? - } - }; - - // parse optional next parts if exist - loop { - match self.next_token().token { - // ensure that optional period is succeeded by another identifier - Token::Period => { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => idents.push(w.to_ident(next_token.span)), - Token::EOF => { - return Err(ParserError::ParserError( - "Trailing period in identifier".to_string(), - ))? - } - token => { - return Err(ParserError::ParserError(format!( - "Unexpected token following period in identifier: {token}" - )))? - } - } - } - Token::EOF => break, - token => { - return Err(ParserError::ParserError(format!( - "Unexpected token in identifier: {token}" - )))? - } - } - } - - Ok(idents) - } - - /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) - /// - /// The `in_table_clause` parameter indicates whether the identifier is a table in a FROM, JOIN, or - /// similar table clause. Currently, this is used only to support unquoted hyphenated identifiers in - // this context on BigQuery. - pub fn parse_identifier(&mut self, in_table_clause: bool) -> Result { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => { - let mut ident = w.to_ident(next_token.span); - - // On BigQuery, hyphens are permitted in unquoted identifiers inside of a FROM or - // TABLE clause [0]. - // - // The first segment must be an ordinary unquoted identifier, e.g. it must not start - // with a digit. Subsequent segments are either must either be valid identifiers or - // integers, e.g. foo-123 is allowed, but foo-123a is not. - // - // [0] https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical - if dialect_of!(self is BigQueryDialect) - && w.quote_style.is_none() - && in_table_clause - { - let mut requires_whitespace = false; - while matches!(self.peek_token_no_skip().token, Token::Minus) { - self.next_token(); - ident.value.push('-'); - - let token = self - .next_token_no_skip() - .cloned() - .unwrap_or(TokenWithSpan::wrap(Token::EOF)); - requires_whitespace = match token.token { - Token::Word(next_word) if next_word.quote_style.is_none() => { - ident.value.push_str(&next_word.value); - false - } - Token::Number(s, false) if s.chars().all(|c| c.is_ascii_digit()) => { - ident.value.push_str(&s); - true - } - _ => { - return self - .expected("continuation of hyphenated identifier", token); - } - } - } - - // If the last segment was a number, we must check that it's followed by whitespace, - // otherwise foo-123a will be parsed as `foo-123` with the alias `a`. - if requires_whitespace { - let token = self.next_token(); - if !matches!(token.token, Token::EOF | Token::Whitespace(_)) { - return self - .expected("whitespace following hyphenated identifier", token); - } - } - } - Ok(ident) - } - Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)), - Token::DoubleQuotedString(s) => Ok(Ident::with_quote('\"', s)), - _ => self.expected("identifier", next_token), - } - } - - /// Parses a parenthesized, comma-separated list of column definitions within a view. - fn parse_view_columns(&mut self) -> Result, ParserError> { - if self.consume_token(&Token::LParen) { - if self.peek_token().token == Token::RParen { - self.next_token(); - Ok(vec![]) - } else { - let cols = self.parse_comma_separated(Parser::parse_view_column)?; - self.expect_token(&Token::RParen)?; - Ok(cols) - } - } else { - Ok(vec![]) - } - } - - /// Parses a column definition within a view. - fn parse_view_column(&mut self) -> Result { - let name = self.parse_identifier(false)?; - let options = if (dialect_of!(self is BigQueryDialect | GenericDialect) - && self.parse_keyword(Keyword::OPTIONS)) - || (dialect_of!(self is SnowflakeDialect | GenericDialect) - && self.parse_keyword(Keyword::COMMENT)) - { - self.prev_token(); - self.parse_optional_column_option()? - .map(|option| vec![option]) - } else { - None - }; - let data_type = if dialect_of!(self is ClickHouseDialect) { - Some(self.parse_data_type()?) - } else { - None - }; - Ok(ViewColumnDef { - name, - data_type, - options, - }) - } - - /// Parse a parenthesized comma-separated list of unqualified, possibly quoted identifiers - pub fn parse_parenthesized_column_list( - &mut self, - optional: IsOptional, - allow_empty: bool, - ) -> Result, ParserError> { - if self.consume_token(&Token::LParen) { - if allow_empty && self.peek_token().token == Token::RParen { - self.next_token(); - Ok(vec![]) - } else { - let cols = self.parse_comma_separated(|p| p.parse_identifier(false))?; - self.expect_token(&Token::RParen)?; - Ok(cols) - } - } else if optional == Optional { - Ok(vec![]) - } else { - self.expected("a list of columns in parentheses", self.peek_token()) - } - } - - /// Parse a parenthesized comma-separated list of table alias column definitions. - fn parse_table_alias_column_defs(&mut self) -> Result, ParserError> { - if self.consume_token(&Token::LParen) { - let cols = self.parse_comma_separated(|p| { - let name = p.parse_identifier(false)?; - let data_type = p.maybe_parse(|p| p.parse_data_type())?; - Ok(TableAliasColumnDef { name, data_type }) - })?; - self.expect_token(&Token::RParen)?; - Ok(cols) - } else { - Ok(vec![]) - } - } - - pub fn parse_precision(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let n = self.parse_literal_uint()?; - self.expect_token(&Token::RParen)?; - Ok(n) - } - - pub fn parse_optional_precision(&mut self) -> Result, ParserError> { - if self.consume_token(&Token::LParen) { - let n = self.parse_literal_uint()?; - self.expect_token(&Token::RParen)?; - Ok(Some(n)) - } else { - Ok(None) - } - } - - /// Parse datetime64 [1] - /// Syntax - /// ```sql - /// DateTime64(precision[, timezone]) - /// ``` - /// - /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64 - pub fn parse_datetime_64(&mut self) -> Result<(u64, Option), ParserError> { - self.expect_keyword(Keyword::DATETIME64)?; - self.expect_token(&Token::LParen)?; - let precision = self.parse_literal_uint()?; - let time_zone = if self.consume_token(&Token::Comma) { - Some(self.parse_literal_string()?) - } else { - None - }; - self.expect_token(&Token::RParen)?; - Ok((precision, time_zone)) - } - - pub fn parse_optional_character_length( - &mut self, - ) -> Result, ParserError> { - if self.consume_token(&Token::LParen) { - let character_length = self.parse_character_length()?; - self.expect_token(&Token::RParen)?; - Ok(Some(character_length)) - } else { - Ok(None) - } - } - - pub fn parse_character_length(&mut self) -> Result { - if self.parse_keyword(Keyword::MAX) { - return Ok(CharacterLength::Max); - } - let length = self.parse_literal_uint()?; - let unit = if self.parse_keyword(Keyword::CHARACTERS) { - Some(CharLengthUnits::Characters) - } else if self.parse_keyword(Keyword::OCTETS) { - Some(CharLengthUnits::Octets) - } else { - None - }; - Ok(CharacterLength::IntegerLength { length, unit }) - } - - pub fn parse_optional_precision_scale( - &mut self, - ) -> Result<(Option, Option), ParserError> { - if self.consume_token(&Token::LParen) { - let n = self.parse_literal_uint()?; - let scale = if self.consume_token(&Token::Comma) { - Some(self.parse_literal_uint()?) - } else { - None - }; - self.expect_token(&Token::RParen)?; - Ok((Some(n), scale)) - } else { - Ok((None, None)) - } - } - - pub fn parse_exact_number_optional_precision_scale( - &mut self, - ) -> Result { - if self.consume_token(&Token::LParen) { - let precision = self.parse_literal_uint()?; - let scale = if self.consume_token(&Token::Comma) { - Some(self.parse_literal_uint()?) - } else { - None - }; - - self.expect_token(&Token::RParen)?; - - match scale { - None => Ok(ExactNumberInfo::Precision(precision)), - Some(scale) => Ok(ExactNumberInfo::PrecisionAndScale(precision, scale)), - } - } else { - Ok(ExactNumberInfo::None) - } - } - - pub fn parse_optional_type_modifiers(&mut self) -> Result>, ParserError> { - if self.consume_token(&Token::LParen) { - let mut modifiers = Vec::new(); - loop { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => modifiers.push(w.to_string()), - Token::Number(n, _) => modifiers.push(n), - Token::SingleQuotedString(s) => modifiers.push(s), - - Token::Comma => { - continue; - } - Token::RParen => { - break; - } - _ => self.expected("type modifiers", next_token)?, - } - } - - Ok(Some(modifiers)) - } else { - Ok(None) - } - } - - /// Parse a parenthesized sub data type - fn parse_sub_type(&mut self, parent_type: F) -> Result - where - F: FnOnce(Box) -> DataType, - { - self.expect_token(&Token::LParen)?; - let inside_type = self.parse_data_type()?; - self.expect_token(&Token::RParen)?; - Ok(parent_type(inside_type.into())) - } - - pub fn parse_delete(&mut self) -> Result { - let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { - // `FROM` keyword is optional in BigQuery SQL. - // https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#delete_statement - if dialect_of!(self is BigQueryDialect | GenericDialect) { - (vec![], false) - } else { - let tables = self.parse_comma_separated(|p| p.parse_object_name(false))?; - self.expect_keyword(Keyword::FROM)?; - (tables, true) - } - } else { - (vec![], true) - }; - - let from = self.parse_comma_separated(Parser::parse_table_and_joins)?; - let using = if self.parse_keyword(Keyword::USING) { - Some(self.parse_comma_separated(Parser::parse_table_and_joins)?) - } else { - None - }; - let selection = if self.parse_keyword(Keyword::WHERE) { - Some(self.parse_expr()?) - } else { - None - }; - let returning = if self.parse_keyword(Keyword::RETURNING) { - Some(self.parse_comma_separated(Parser::parse_select_item)?) - } else { - None - }; - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? - } else { - vec![] - }; - let limit = if self.parse_keyword(Keyword::LIMIT) { - self.parse_limit()? - } else { - None - }; - - Ok(Statement::Delete(Delete { - tables, - from: if with_from_keyword { - FromTable::WithFromKeyword(from) - } else { - FromTable::WithoutKeyword(from) - }, - using, - selection, - returning, - order_by, - limit, - })) - } - - // KILL [CONNECTION | QUERY | MUTATION] processlist_id - pub fn parse_kill(&mut self) -> Result { - let modifier_keyword = - self.parse_one_of_keywords(&[Keyword::CONNECTION, Keyword::QUERY, Keyword::MUTATION]); - - let id = self.parse_literal_uint()?; - - let modifier = match modifier_keyword { - Some(Keyword::CONNECTION) => Some(KillType::Connection), - Some(Keyword::QUERY) => Some(KillType::Query), - Some(Keyword::MUTATION) => { - if dialect_of!(self is ClickHouseDialect | GenericDialect) { - Some(KillType::Mutation) - } else { - self.expected( - "Unsupported type for KILL, allowed: CONNECTION | QUERY", - self.peek_token(), - )? - } - } - _ => None, - }; - - Ok(Statement::Kill { modifier, id }) - } - - pub fn parse_explain( - &mut self, - describe_alias: DescribeAlias, - ) -> Result { - let mut analyze = false; - let mut verbose = false; - let mut query_plan = false; - let mut format = None; - let mut options = None; - - // Note: DuckDB is compatible with PostgreSQL syntax for this statement, - // although not all features may be implemented. - if describe_alias == DescribeAlias::Explain - && self.dialect.supports_explain_with_utility_options() - && self.peek_token().token == Token::LParen - { - options = Some(self.parse_utility_options()?) - } else if self.parse_keywords(&[Keyword::QUERY, Keyword::PLAN]) { - query_plan = true; - } else { - analyze = self.parse_keyword(Keyword::ANALYZE); - verbose = self.parse_keyword(Keyword::VERBOSE); - if self.parse_keyword(Keyword::FORMAT) { - format = Some(self.parse_analyze_format()?); - } - } - - match self.maybe_parse(|parser| parser.parse_statement())? { - Some(Statement::Explain { .. }) | Some(Statement::ExplainTable { .. }) => Err( - ParserError::ParserError("Explain must be root of the plan".to_string()), - ), - Some(statement) => Ok(Statement::Explain { - describe_alias, - analyze, - verbose, - query_plan, - statement: Box::new(statement), - format, - options, - }), - _ => { - let hive_format = - match self.parse_one_of_keywords(&[Keyword::EXTENDED, Keyword::FORMATTED]) { - Some(Keyword::EXTENDED) => Some(HiveDescribeFormat::Extended), - Some(Keyword::FORMATTED) => Some(HiveDescribeFormat::Formatted), - _ => None, - }; - - let has_table_keyword = if self.dialect.describe_requires_table_keyword() { - // only allow to use TABLE keyword for DESC|DESCRIBE statement - self.parse_keyword(Keyword::TABLE) - } else { - false - }; - - let table_name = self.parse_object_name(false)?; - Ok(Statement::ExplainTable { - describe_alias, - hive_format, - has_table_keyword, - table_name, - }) - } - } - } - - /// Parse a query expression, i.e. a `SELECT` statement optionally - /// preceded with some `WITH` CTE declarations and optionally followed - /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't - /// expect the initial keyword to be already consumed - pub fn parse_query(&mut self) -> Result, ParserError> { - let _guard = self.recursion_counter.try_decrease()?; - let with = if let Some(with_token) = self.parse_keyword_token(Keyword::WITH) { - Some(With { - with_token: with_token.into(), - recursive: self.parse_keyword(Keyword::RECURSIVE), - cte_tables: self.parse_comma_separated(Parser::parse_cte)?, - }) - } else { - None - }; - if self.parse_keyword(Keyword::INSERT) { - Ok(Query { - with, - body: self.parse_insert_setexpr_boxed()?, - limit: None, - limit_by: vec![], - order_by: None, - offset: None, - fetch: None, - locks: vec![], - for_clause: None, - settings: None, - format_clause: None, - } - .into()) - } else if self.parse_keyword(Keyword::UPDATE) { - Ok(Query { - with, - body: self.parse_update_setexpr_boxed()?, - limit: None, - limit_by: vec![], - order_by: None, - offset: None, - fetch: None, - locks: vec![], - for_clause: None, - settings: None, - format_clause: None, - } - .into()) - } else { - let body = self.parse_query_body(self.dialect.prec_unknown())?; - - let order_by = self.parse_optional_order_by()?; - - let mut limit = None; - let mut offset = None; - - for _x in 0..2 { - if limit.is_none() && self.parse_keyword(Keyword::LIMIT) { - limit = self.parse_limit()? - } - - if offset.is_none() && self.parse_keyword(Keyword::OFFSET) { - offset = Some(self.parse_offset()?) - } - - if self.dialect.supports_limit_comma() - && limit.is_some() - && offset.is_none() - && self.consume_token(&Token::Comma) - { - // MySQL style LIMIT x,y => LIMIT y OFFSET x. - // Check for more details. - offset = Some(Offset { - value: limit.unwrap(), - rows: OffsetRows::None, - }); - limit = Some(self.parse_expr()?); - } - } - - let limit_by = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keyword(Keyword::BY) - { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - - let settings = self.parse_settings()?; - - let fetch = if self.parse_keyword(Keyword::FETCH) { - Some(self.parse_fetch()?) - } else { - None - }; - - let mut for_clause = None; - let mut locks = Vec::new(); - while self.parse_keyword(Keyword::FOR) { - if let Some(parsed_for_clause) = self.parse_for_clause()? { - for_clause = Some(parsed_for_clause); - break; - } else { - locks.push(self.parse_lock()?); - } - } - let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keyword(Keyword::FORMAT) - { - if self.parse_keyword(Keyword::NULL) { - Some(FormatClause::Null) - } else { - let ident = self.parse_identifier(false)?; - Some(FormatClause::Identifier(ident)) - } - } else { - None - }; - - Ok(Query { - with, - body, - order_by, - limit, - limit_by, - offset, - fetch, - locks, - for_clause, - settings, - format_clause, - } - .into()) - } - } - - fn parse_settings(&mut self) -> Result>, ParserError> { - let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::SETTINGS) - { - let key_values = self.parse_comma_separated(|p| { - let key = p.parse_identifier(false)?; - p.expect_token(&Token::Eq)?; - let value = p.parse_value()?; - Ok(Setting { key, value }) - })?; - Some(key_values) - } else { - None - }; - Ok(settings) - } - - /// Parse a mssql `FOR [XML | JSON | BROWSE]` clause - pub fn parse_for_clause(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::XML) { - Ok(Some(self.parse_for_xml()?)) - } else if self.parse_keyword(Keyword::JSON) { - Ok(Some(self.parse_for_json()?)) - } else if self.parse_keyword(Keyword::BROWSE) { - Ok(Some(ForClause::Browse)) - } else { - Ok(None) - } - } - - /// Parse a mssql `FOR XML` clause - pub fn parse_for_xml(&mut self) -> Result { - let for_xml = if self.parse_keyword(Keyword::RAW) { - let mut element_name = None; - if self.peek_token().token == Token::LParen { - self.expect_token(&Token::LParen)?; - element_name = Some(self.parse_literal_string()?); - self.expect_token(&Token::RParen)?; - } - ForXml::Raw(element_name) - } else if self.parse_keyword(Keyword::AUTO) { - ForXml::Auto - } else if self.parse_keyword(Keyword::EXPLICIT) { - ForXml::Explicit - } else if self.parse_keyword(Keyword::PATH) { - let mut element_name = None; - if self.peek_token().token == Token::LParen { - self.expect_token(&Token::LParen)?; - element_name = Some(self.parse_literal_string()?); - self.expect_token(&Token::RParen)?; - } - ForXml::Path(element_name) - } else { - return Err(ParserError::ParserError( - "Expected FOR XML [RAW | AUTO | EXPLICIT | PATH ]".to_string(), - )); - }; - let mut elements = false; - let mut binary_base64 = false; - let mut root = None; - let mut r#type = false; - while self.peek_token().token == Token::Comma { - self.next_token(); - if self.parse_keyword(Keyword::ELEMENTS) { - elements = true; - } else if self.parse_keyword(Keyword::BINARY) { - self.expect_keyword(Keyword::BASE64)?; - binary_base64 = true; - } else if self.parse_keyword(Keyword::ROOT) { - self.expect_token(&Token::LParen)?; - root = Some(self.parse_literal_string()?); - self.expect_token(&Token::RParen)?; - } else if self.parse_keyword(Keyword::TYPE) { - r#type = true; - } - } - Ok(ForClause::Xml { - for_xml, - elements, - binary_base64, - root, - r#type, - }) - } - - /// Parse a mssql `FOR JSON` clause - pub fn parse_for_json(&mut self) -> Result { - let for_json = if self.parse_keyword(Keyword::AUTO) { - ForJson::Auto - } else if self.parse_keyword(Keyword::PATH) { - ForJson::Path - } else { - return Err(ParserError::ParserError( - "Expected FOR JSON [AUTO | PATH ]".to_string(), - )); - }; - let mut root = None; - let mut include_null_values = false; - let mut without_array_wrapper = false; - while self.peek_token().token == Token::Comma { - self.next_token(); - if self.parse_keyword(Keyword::ROOT) { - self.expect_token(&Token::LParen)?; - root = Some(self.parse_literal_string()?); - self.expect_token(&Token::RParen)?; - } else if self.parse_keyword(Keyword::INCLUDE_NULL_VALUES) { - include_null_values = true; - } else if self.parse_keyword(Keyword::WITHOUT_ARRAY_WRAPPER) { - without_array_wrapper = true; - } - } - Ok(ForClause::Json { - for_json, - root, - include_null_values, - without_array_wrapper, - }) - } - - /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) - pub fn parse_cte(&mut self) -> Result { - let name = self.parse_identifier(false)?; - - let mut cte = if self.parse_keyword(Keyword::AS) { - let mut is_materialized = None; - if dialect_of!(self is PostgreSqlDialect) { - if self.parse_keyword(Keyword::MATERIALIZED) { - is_materialized = Some(CteAsMaterialized::Materialized); - } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { - is_materialized = Some(CteAsMaterialized::NotMaterialized); - } - } - self.expect_token(&Token::LParen)?; - - let query = self.parse_query()?; - let closing_paren_token = self.expect_token(&Token::RParen)?; - - let alias = TableAlias { - name, - columns: vec![], - }; - Cte { - alias, - query, - from: None, - materialized: is_materialized, - closing_paren_token: closing_paren_token.into(), - } - } else { - let columns = self.parse_table_alias_column_defs()?; - self.expect_keyword(Keyword::AS)?; - let mut is_materialized = None; - if dialect_of!(self is PostgreSqlDialect) { - if self.parse_keyword(Keyword::MATERIALIZED) { - is_materialized = Some(CteAsMaterialized::Materialized); - } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { - is_materialized = Some(CteAsMaterialized::NotMaterialized); - } - } - self.expect_token(&Token::LParen)?; - - let query = self.parse_query()?; - let closing_paren_token = self.expect_token(&Token::RParen)?; - - let alias = TableAlias { name, columns }; - Cte { - alias, - query, - from: None, - materialized: is_materialized, - closing_paren_token: closing_paren_token.into(), - } - }; - if self.parse_keyword(Keyword::FROM) { - cte.from = Some(self.parse_identifier(false)?); - } - Ok(cte) - } - - /// Parse a "query body", which is an expression with roughly the - /// following grammar: - /// ```sql - /// query_body ::= restricted_select | '(' subquery ')' | set_operation - /// restricted_select ::= 'SELECT' [expr_list] [ from ] [ where ] [ groupby_having ] - /// subquery ::= query_body [ order_by_limit ] - /// set_operation ::= query_body { 'UNION' | 'EXCEPT' | 'INTERSECT' } [ 'ALL' ] query_body - /// ``` - pub fn parse_query_body(&mut self, precedence: u8) -> Result, ParserError> { - // We parse the expression using a Pratt parser, as in `parse_expr()`. - // Start by parsing a restricted SELECT or a `(subquery)`: - let expr = if self.peek_keyword(Keyword::SELECT) { - SetExpr::Select(self.parse_select().map(Box::new)?) - } else if self.consume_token(&Token::LParen) { - // CTEs are not allowed here, but the parser currently accepts them - let subquery = self.parse_query()?; - self.expect_token(&Token::RParen)?; - SetExpr::Query(subquery) - } else if self.parse_keyword(Keyword::VALUES) { - let is_mysql = dialect_of!(self is MySqlDialect); - SetExpr::Values(self.parse_values(is_mysql)?) - } else if self.parse_keyword(Keyword::TABLE) { - SetExpr::Table(Box::new(self.parse_as_table()?)) - } else { - return self.expected( - "SELECT, VALUES, or a subquery in the query body", - self.peek_token(), - ); - }; - - self.parse_remaining_set_exprs(expr, precedence) - } - - /// Parse any extra set expressions that may be present in a query body - /// - /// (this is its own function to reduce required stack size in debug builds) - fn parse_remaining_set_exprs( - &mut self, - mut expr: SetExpr, - precedence: u8, - ) -> Result, ParserError> { - loop { - // The query can be optionally followed by a set operator: - let op = self.parse_set_operator(&self.peek_token().token); - let next_precedence = match op { - // UNION and EXCEPT have the same binding power and evaluate left-to-right - Some(SetOperator::Union) | Some(SetOperator::Except) => 10, - // INTERSECT has higher precedence than UNION/EXCEPT - Some(SetOperator::Intersect) => 20, - // Unexpected token or EOF => stop parsing the query body - None => break, - }; - if precedence >= next_precedence { - break; - } - self.next_token(); // skip past the set operator - let set_quantifier = self.parse_set_quantifier(&op); - expr = SetExpr::SetOperation { - left: Box::new(expr), - op: op.unwrap(), - set_quantifier, - right: self.parse_query_body(next_precedence)?, - }; - } - - Ok(expr.into()) - } - - pub fn parse_set_operator(&mut self, token: &Token) -> Option { - match token { - Token::Word(w) if w.keyword == Keyword::UNION => Some(SetOperator::Union), - Token::Word(w) if w.keyword == Keyword::EXCEPT => Some(SetOperator::Except), - Token::Word(w) if w.keyword == Keyword::INTERSECT => Some(SetOperator::Intersect), - _ => None, - } - } - - pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { - match op { - Some(SetOperator::Except | SetOperator::Intersect | SetOperator::Union) => { - if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { - SetQuantifier::DistinctByName - } else if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { - SetQuantifier::ByName - } else if self.parse_keyword(Keyword::ALL) { - if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { - SetQuantifier::AllByName - } else { - SetQuantifier::All - } - } else if self.parse_keyword(Keyword::DISTINCT) { - SetQuantifier::Distinct - } else { - SetQuantifier::None - } - } - _ => SetQuantifier::None, - } - } - - /// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`) - pub fn parse_select(&mut self) -> Result { - let select_token = self.expect_keyword(Keyword::SELECT)?; - let value_table_mode = - if dialect_of!(self is BigQueryDialect) && self.parse_keyword(Keyword::AS) { - if self.parse_keyword(Keyword::VALUE) { - Some(ValueTableMode::AsValue) - } else if self.parse_keyword(Keyword::STRUCT) { - Some(ValueTableMode::AsStruct) - } else { - self.expected("VALUE or STRUCT", self.peek_token())? - } - } else { - None - }; - - let mut top_before_distinct = false; - let mut top = None; - if self.dialect.supports_top_before_distinct() && self.parse_keyword(Keyword::TOP) { - top = Some(self.parse_top()?); - top_before_distinct = true; - } - let distinct = self.parse_all_or_distinct()?; - if !self.dialect.supports_top_before_distinct() && self.parse_keyword(Keyword::TOP) { - top = Some(self.parse_top()?); - } - - let projection = self.parse_projection()?; - - let into = if self.parse_keyword(Keyword::INTO) { - let temporary = self - .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) - .is_some(); - let unlogged = self.parse_keyword(Keyword::UNLOGGED); - let table = self.parse_keyword(Keyword::TABLE); - let name = self.parse_object_name(false)?; - Some(SelectInto { - temporary, - unlogged, - table, - name, - }) - } else { - None - }; - - // Note that for keywords to be properly handled here, they need to be - // added to `RESERVED_FOR_COLUMN_ALIAS` / `RESERVED_FOR_TABLE_ALIAS`, - // otherwise they may be parsed as an alias as part of the `projection` - // or `from`. - - let from = if self.parse_keyword(Keyword::FROM) { - self.parse_comma_separated(Parser::parse_table_and_joins)? - } else { - vec![] - }; - - let mut lateral_views = vec![]; - loop { - if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { - let outer = self.parse_keyword(Keyword::OUTER); - let lateral_view = self.parse_expr()?; - let lateral_view_name = self.parse_object_name(false)?; - let lateral_col_alias = self - .parse_comma_separated(|parser| { - parser.parse_optional_alias(&[ - Keyword::WHERE, - Keyword::GROUP, - Keyword::CLUSTER, - Keyword::HAVING, - Keyword::LATERAL, - ]) // This couldn't possibly be a bad idea - })? - .into_iter() - .flatten() - .collect(); - - lateral_views.push(LateralView { - lateral_view, - lateral_view_name, - lateral_col_alias, - outer, - }); - } else { - break; - } - } - - let prewhere = if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::PREWHERE) - { - Some(self.parse_expr()?) - } else { - None - }; - - let selection = if self.parse_keyword(Keyword::WHERE) { - Some(self.parse_expr()?) - } else { - None - }; - - let group_by = self - .parse_optional_group_by()? - .unwrap_or_else(|| GroupByExpr::Expressions(vec![], vec![])); - - let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - - let distribute_by = if self.parse_keywords(&[Keyword::DISTRIBUTE, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - - let sort_by = if self.parse_keywords(&[Keyword::SORT, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - - let having = if self.parse_keyword(Keyword::HAVING) { - Some(self.parse_expr()?) - } else { - None - }; - - // Accept QUALIFY and WINDOW in any order and flag accordingly. - let (named_windows, qualify, window_before_qualify) = if self.parse_keyword(Keyword::WINDOW) - { - let named_windows = self.parse_comma_separated(Parser::parse_named_window)?; - if self.parse_keyword(Keyword::QUALIFY) { - (named_windows, Some(self.parse_expr()?), true) - } else { - (named_windows, None, true) - } - } else if self.parse_keyword(Keyword::QUALIFY) { - let qualify = Some(self.parse_expr()?); - if self.parse_keyword(Keyword::WINDOW) { - ( - self.parse_comma_separated(Parser::parse_named_window)?, - qualify, - false, - ) - } else { - (Default::default(), qualify, false) - } - } else { - Default::default() - }; - - let connect_by = if self.dialect.supports_connect_by() - && self - .parse_one_of_keywords(&[Keyword::START, Keyword::CONNECT]) - .is_some() - { - self.prev_token(); - Some(self.parse_connect_by()?) - } else { - None - }; - - Ok(Select { - select_token: AttachedToken(select_token), - distinct, - top, - top_before_distinct, - projection, - into, - from, - lateral_views, - prewhere, - selection, - group_by, - cluster_by, - distribute_by, - sort_by, - having, - named_window: named_windows, - window_before_qualify, - qualify, - value_table_mode, - connect_by, - }) - } - - /// Invoke `f` after first setting the parser's `ParserState` to `state`. - /// - /// Upon return, restores the parser's state to what it started at. - fn with_state(&mut self, state: ParserState, mut f: F) -> Result - where - F: FnMut(&mut Parser) -> Result, - { - let current_state = self.state; - self.state = state; - let res = f(self); - self.state = current_state; - res - } - - pub fn parse_connect_by(&mut self) -> Result { - let (condition, relationships) = if self.parse_keywords(&[Keyword::CONNECT, Keyword::BY]) { - let relationships = self.with_state(ParserState::ConnectBy, |parser| { - parser.parse_comma_separated(Parser::parse_expr) - })?; - self.expect_keywords(&[Keyword::START, Keyword::WITH])?; - let condition = self.parse_expr()?; - (condition, relationships) - } else { - self.expect_keywords(&[Keyword::START, Keyword::WITH])?; - let condition = self.parse_expr()?; - self.expect_keywords(&[Keyword::CONNECT, Keyword::BY])?; - let relationships = self.with_state(ParserState::ConnectBy, |parser| { - parser.parse_comma_separated(Parser::parse_expr) - })?; - (condition, relationships) - }; - Ok(ConnectBy { - condition, - relationships, - }) - } - - /// Parse `CREATE TABLE x AS TABLE y` - pub fn parse_as_table(&mut self) -> Result { - let token1 = self.next_token(); - let token2 = self.next_token(); - let token3 = self.next_token(); - - let table_name; - let schema_name; - if token2 == Token::Period { - match token1.token { - Token::Word(w) => { - schema_name = w.value; - } - _ => { - return self.expected("Schema name", token1); - } - } - match token3.token { - Token::Word(w) => { - table_name = w.value; - } - _ => { - return self.expected("Table name", token3); - } - } - Ok(Table { - table_name: Some(table_name), - schema_name: Some(schema_name), - }) - } else { - match token1.token { - Token::Word(w) => { - table_name = w.value; - } - _ => { - return self.expected("Table name", token1); - } - } - Ok(Table { - table_name: Some(table_name), - schema_name: None, - }) - } - } - - /// Parse a `SET ROLE` statement. Expects SET to be consumed already. - fn parse_set_role(&mut self, modifier: Option) -> Result { - self.expect_keyword(Keyword::ROLE)?; - let context_modifier = match modifier { - Some(Keyword::LOCAL) => ContextModifier::Local, - Some(Keyword::SESSION) => ContextModifier::Session, - _ => ContextModifier::None, - }; - - let role_name = if self.parse_keyword(Keyword::NONE) { - None - } else { - Some(self.parse_identifier(false)?) - }; - Ok(Statement::SetRole { - context_modifier, - role_name, - }) - } - - pub fn parse_set(&mut self) -> Result { - let modifier = - self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); - if let Some(Keyword::HIVEVAR) = modifier { - self.expect_token(&Token::Colon)?; - } else if let Some(set_role_stmt) = - self.maybe_parse(|parser| parser.parse_set_role(modifier))? - { - return Ok(set_role_stmt); - } - - let variables = if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) { - OneOrManyWithParens::One(ObjectName(vec!["TIMEZONE".into()])) - } else if self.dialect.supports_parenthesized_set_variables() - && self.consume_token(&Token::LParen) - { - let variables = OneOrManyWithParens::Many( - self.parse_comma_separated(|parser: &mut Parser<'a>| { - parser.parse_identifier(false) - })? - .into_iter() - .map(|ident| ObjectName(vec![ident])) - .collect(), - ); - self.expect_token(&Token::RParen)?; - variables - } else { - OneOrManyWithParens::One(self.parse_object_name(false)?) - }; - - if matches!(&variables, OneOrManyWithParens::One(variable) if variable.to_string().eq_ignore_ascii_case("NAMES") - && dialect_of!(self is MySqlDialect | GenericDialect)) - { - if self.parse_keyword(Keyword::DEFAULT) { - return Ok(Statement::SetNamesDefault {}); - } - - let charset_name = self.parse_literal_string()?; - let collation_name = if self.parse_one_of_keywords(&[Keyword::COLLATE]).is_some() { - Some(self.parse_literal_string()?) - } else { - None - }; - - return Ok(Statement::SetNames { - charset_name, - collation_name, - }); - } - - let parenthesized_assignment = matches!(&variables, OneOrManyWithParens::Many(_)); - - if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { - if parenthesized_assignment { - self.expect_token(&Token::LParen)?; - } - - let mut values = vec![]; - loop { - let value = if let Some(expr) = self.try_parse_expr_sub_query()? { - expr - } else if let Ok(expr) = self.parse_expr() { - expr - } else { - self.expected("variable value", self.peek_token())? - }; - - values.push(value); - if self.consume_token(&Token::Comma) { - continue; - } - - if parenthesized_assignment { - self.expect_token(&Token::RParen)?; - } - return Ok(Statement::SetVariable { - local: modifier == Some(Keyword::LOCAL), - hivevar: Some(Keyword::HIVEVAR) == modifier, - variables, - value: values, - }); - } - } - - let OneOrManyWithParens::One(variable) = variables else { - return self.expected("set variable", self.peek_token()); - }; - - if variable.to_string().eq_ignore_ascii_case("TIMEZONE") { - // for some db (e.g. postgresql), SET TIME ZONE is an alias for SET TIMEZONE [TO|=] - match self.parse_expr() { - Ok(expr) => Ok(Statement::SetTimeZone { - local: modifier == Some(Keyword::LOCAL), - value: expr, - }), - _ => self.expected("timezone value", self.peek_token())?, - } - } else if variable.to_string() == "CHARACTERISTICS" { - self.expect_keywords(&[Keyword::AS, Keyword::TRANSACTION])?; - Ok(Statement::SetTransaction { - modes: self.parse_transaction_modes()?, - snapshot: None, - session: true, - }) - } else if variable.to_string() == "TRANSACTION" && modifier.is_none() { - if self.parse_keyword(Keyword::SNAPSHOT) { - let snapshot_id = self.parse_value()?; - return Ok(Statement::SetTransaction { - modes: vec![], - snapshot: Some(snapshot_id), - session: false, - }); - } - Ok(Statement::SetTransaction { - modes: self.parse_transaction_modes()?, - snapshot: None, - session: false, - }) - } else { - self.expected("equals sign or TO", self.peek_token()) - } - } - - pub fn parse_show(&mut self) -> Result { - let terse = self.parse_keyword(Keyword::TERSE); - let extended = self.parse_keyword(Keyword::EXTENDED); - let full = self.parse_keyword(Keyword::FULL); - let session = self.parse_keyword(Keyword::SESSION); - let global = self.parse_keyword(Keyword::GLOBAL); - let external = self.parse_keyword(Keyword::EXTERNAL); - if self - .parse_one_of_keywords(&[Keyword::COLUMNS, Keyword::FIELDS]) - .is_some() - { - Ok(self.parse_show_columns(extended, full)?) - } else if self.parse_keyword(Keyword::TABLES) { - Ok(self.parse_show_tables(terse, extended, full, external)?) - } else if self.parse_keywords(&[Keyword::MATERIALIZED, Keyword::VIEWS]) { - Ok(self.parse_show_views(terse, true)?) - } else if self.parse_keyword(Keyword::VIEWS) { - Ok(self.parse_show_views(terse, false)?) - } else if self.parse_keyword(Keyword::FUNCTIONS) { - Ok(self.parse_show_functions()?) - } else if extended || full { - Err(ParserError::ParserError( - "EXTENDED/FULL are not supported with this type of SHOW query".to_string(), - )) - } else if self.parse_one_of_keywords(&[Keyword::CREATE]).is_some() { - Ok(self.parse_show_create()?) - } else if self.parse_keyword(Keyword::COLLATION) { - Ok(self.parse_show_collation()?) - } else if self.parse_keyword(Keyword::VARIABLES) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - Ok(Statement::ShowVariables { - filter: self.parse_show_statement_filter()?, - session, - global, - }) - } else if self.parse_keyword(Keyword::STATUS) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - Ok(Statement::ShowStatus { - filter: self.parse_show_statement_filter()?, - session, - global, - }) - } else if self.parse_keyword(Keyword::DATABASES) { - self.parse_show_databases(terse) - } else if self.parse_keyword(Keyword::SCHEMAS) { - self.parse_show_schemas(terse) - } else { - Ok(Statement::ShowVariable { - variable: self.parse_identifiers()?, - }) - } - } - - fn parse_show_databases(&mut self, terse: bool) -> Result { - let history = self.parse_keyword(Keyword::HISTORY); - let show_options = self.parse_show_stmt_options()?; - Ok(Statement::ShowDatabases { - terse, - history, - show_options, - }) - } - - fn parse_show_schemas(&mut self, terse: bool) -> Result { - let history = self.parse_keyword(Keyword::HISTORY); - let show_options = self.parse_show_stmt_options()?; - Ok(Statement::ShowSchemas { - terse, - history, - show_options, - }) - } - - pub fn parse_show_create(&mut self) -> Result { - let obj_type = match self.expect_one_of_keywords(&[ - Keyword::TABLE, - Keyword::TRIGGER, - Keyword::FUNCTION, - Keyword::PROCEDURE, - Keyword::EVENT, - Keyword::VIEW, - ])? { - Keyword::TABLE => Ok(ShowCreateObject::Table), - Keyword::TRIGGER => Ok(ShowCreateObject::Trigger), - Keyword::FUNCTION => Ok(ShowCreateObject::Function), - Keyword::PROCEDURE => Ok(ShowCreateObject::Procedure), - Keyword::EVENT => Ok(ShowCreateObject::Event), - Keyword::VIEW => Ok(ShowCreateObject::View), - keyword => Err(ParserError::ParserError(format!( - "Unable to map keyword to ShowCreateObject: {keyword:?}" - ))), - }?; - - let obj_name = self.parse_object_name(false)?; - - Ok(Statement::ShowCreate { obj_type, obj_name }) - } - - pub fn parse_show_columns( - &mut self, - extended: bool, - full: bool, - ) -> Result { - let show_options = self.parse_show_stmt_options()?; - Ok(Statement::ShowColumns { - extended, - full, - show_options, - }) - } - - fn parse_show_tables( - &mut self, - terse: bool, - extended: bool, - full: bool, - external: bool, - ) -> Result { - let history = !external && self.parse_keyword(Keyword::HISTORY); - let show_options = self.parse_show_stmt_options()?; - Ok(Statement::ShowTables { - terse, - history, - extended, - full, - external, - show_options, - }) - } - - fn parse_show_views( - &mut self, - terse: bool, - materialized: bool, - ) -> Result { - let show_options = self.parse_show_stmt_options()?; - Ok(Statement::ShowViews { - materialized, - terse, - show_options, - }) - } - - pub fn parse_show_functions(&mut self) -> Result { - let filter = self.parse_show_statement_filter()?; - Ok(Statement::ShowFunctions { filter }) - } - - pub fn parse_show_collation(&mut self) -> Result { - let filter = self.parse_show_statement_filter()?; - Ok(Statement::ShowCollation { filter }) - } - - pub fn parse_show_statement_filter( - &mut self, - ) -> Result, ParserError> { - if self.parse_keyword(Keyword::LIKE) { - Ok(Some(ShowStatementFilter::Like( - self.parse_literal_string()?, - ))) - } else if self.parse_keyword(Keyword::ILIKE) { - Ok(Some(ShowStatementFilter::ILike( - self.parse_literal_string()?, - ))) - } else if self.parse_keyword(Keyword::WHERE) { - Ok(Some(ShowStatementFilter::Where(self.parse_expr()?))) - } else { - self.maybe_parse(|parser| -> Result { - parser.parse_literal_string() - })? - .map_or(Ok(None), |filter| { - Ok(Some(ShowStatementFilter::NoKeyword(filter))) - }) - } - } - - pub fn parse_use(&mut self) -> Result { - // Determine which keywords are recognized by the current dialect - let parsed_keyword = if dialect_of!(self is HiveDialect) { - // HiveDialect accepts USE DEFAULT; statement without any db specified - if self.parse_keyword(Keyword::DEFAULT) { - return Ok(Statement::Use(Use::Default)); - } - None // HiveDialect doesn't expect any other specific keyword after `USE` - } else if dialect_of!(self is DatabricksDialect) { - self.parse_one_of_keywords(&[Keyword::CATALOG, Keyword::DATABASE, Keyword::SCHEMA]) - } else if dialect_of!(self is SnowflakeDialect) { - self.parse_one_of_keywords(&[ - Keyword::DATABASE, - Keyword::SCHEMA, - Keyword::WAREHOUSE, - Keyword::ROLE, - Keyword::SECONDARY, - ]) - } else { - None // No specific keywords for other dialects, including GenericDialect - }; - - let result = if matches!(parsed_keyword, Some(Keyword::SECONDARY)) { - self.parse_secondary_roles()? - } else { - let obj_name = self.parse_object_name(false)?; - match parsed_keyword { - Some(Keyword::CATALOG) => Use::Catalog(obj_name), - Some(Keyword::DATABASE) => Use::Database(obj_name), - Some(Keyword::SCHEMA) => Use::Schema(obj_name), - Some(Keyword::WAREHOUSE) => Use::Warehouse(obj_name), - Some(Keyword::ROLE) => Use::Role(obj_name), - _ => Use::Object(obj_name), - } - }; - - Ok(Statement::Use(result)) - } - - fn parse_secondary_roles(&mut self) -> Result { - self.expect_keyword(Keyword::ROLES)?; - if self.parse_keyword(Keyword::NONE) { - Ok(Use::SecondaryRoles(SecondaryRoles::None)) - } else if self.parse_keyword(Keyword::ALL) { - Ok(Use::SecondaryRoles(SecondaryRoles::All)) - } else { - let roles = self.parse_comma_separated(|parser| parser.parse_identifier(false))?; - Ok(Use::SecondaryRoles(SecondaryRoles::List(roles))) - } - } - - pub fn parse_table_and_joins(&mut self) -> Result { - let relation = self.parse_table_factor()?; - // Note that for keywords to be properly handled here, they need to be - // added to `RESERVED_FOR_TABLE_ALIAS`, otherwise they may be parsed as - // a table alias. - let mut joins = vec![]; - loop { - let global = self.parse_keyword(Keyword::GLOBAL); - let join = if self.parse_keyword(Keyword::CROSS) { - let join_operator = if self.parse_keyword(Keyword::JOIN) { - JoinOperator::CrossJoin - } else if self.parse_keyword(Keyword::APPLY) { - // MSSQL extension, similar to CROSS JOIN LATERAL - JoinOperator::CrossApply - } else { - return self.expected("JOIN or APPLY after CROSS", self.peek_token()); - }; - Join { - relation: self.parse_table_factor()?, - global, - join_operator, - } - } else if self.parse_keyword(Keyword::OUTER) { - // MSSQL extension, similar to LEFT JOIN LATERAL .. ON 1=1 - self.expect_keyword(Keyword::APPLY)?; - Join { - relation: self.parse_table_factor()?, - global, - join_operator: JoinOperator::OuterApply, - } - } else if self.parse_keyword(Keyword::ASOF) { - self.expect_keyword(Keyword::JOIN)?; - let relation = self.parse_table_factor()?; - self.expect_keyword(Keyword::MATCH_CONDITION)?; - let match_condition = self.parse_parenthesized(Self::parse_expr)?; - Join { - relation, - global, - join_operator: JoinOperator::AsOf { - match_condition, - constraint: self.parse_join_constraint(false)?, - }, - } - } else { - let natural = self.parse_keyword(Keyword::NATURAL); - let peek_keyword = if let Token::Word(w) = self.peek_token().token { - w.keyword - } else { - Keyword::NoKeyword - }; - - let join_operator_type = match peek_keyword { - Keyword::INNER | Keyword::JOIN => { - let _ = self.parse_keyword(Keyword::INNER); // [ INNER ] - self.expect_keyword(Keyword::JOIN)?; - JoinOperator::Inner - } - kw @ Keyword::LEFT | kw @ Keyword::RIGHT => { - let _ = self.next_token(); // consume LEFT/RIGHT - let is_left = kw == Keyword::LEFT; - let join_type = self.parse_one_of_keywords(&[ - Keyword::OUTER, - Keyword::SEMI, - Keyword::ANTI, - Keyword::JOIN, - ]); - match join_type { - Some(Keyword::OUTER) => { - self.expect_keyword(Keyword::JOIN)?; - if is_left { - JoinOperator::LeftOuter - } else { - JoinOperator::RightOuter - } - } - Some(Keyword::SEMI) => { - self.expect_keyword(Keyword::JOIN)?; - if is_left { - JoinOperator::LeftSemi - } else { - JoinOperator::RightSemi - } - } - Some(Keyword::ANTI) => { - self.expect_keyword(Keyword::JOIN)?; - if is_left { - JoinOperator::LeftAnti - } else { - JoinOperator::RightAnti - } - } - Some(Keyword::JOIN) => { - if is_left { - JoinOperator::LeftOuter - } else { - JoinOperator::RightOuter - } - } - _ => { - return Err(ParserError::ParserError(format!( - "expected OUTER, SEMI, ANTI or JOIN after {kw:?}" - ))) - } - } - } - Keyword::ANTI => { - let _ = self.next_token(); // consume ANTI - self.expect_keyword(Keyword::JOIN)?; - JoinOperator::Anti - } - Keyword::SEMI => { - let _ = self.next_token(); // consume SEMI - self.expect_keyword(Keyword::JOIN)?; - JoinOperator::Semi - } - Keyword::FULL => { - let _ = self.next_token(); // consume FULL - let _ = self.parse_keyword(Keyword::OUTER); // [ OUTER ] - self.expect_keyword(Keyword::JOIN)?; - JoinOperator::FullOuter - } - Keyword::OUTER => { - return self.expected("LEFT, RIGHT, or FULL", self.peek_token()); - } - _ if natural => { - return self.expected("a join type after NATURAL", self.peek_token()); - } - _ => break, - }; - let relation = self.parse_table_factor()?; - let join_constraint = self.parse_join_constraint(natural)?; - Join { - relation, - global, - join_operator: join_operator_type(join_constraint), - } - }; - joins.push(join); - } - Ok(TableWithJoins { relation, joins }) - } - - /// A table name or a parenthesized subquery, followed by optional `[AS] alias` - pub fn parse_table_factor(&mut self) -> Result { - if self.parse_keyword(Keyword::LATERAL) { - // LATERAL must always be followed by a subquery or table function. - if self.consume_token(&Token::LParen) { - self.parse_derived_table_factor(Lateral) - } else { - let name = self.parse_object_name(false)?; - self.expect_token(&Token::LParen)?; - let args = self.parse_optional_args()?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::Function { - lateral: true, - name, - args, - alias, - }) - } - } else if self.parse_keyword(Keyword::TABLE) { - // parse table function (SELECT * FROM TABLE () [ AS ]) - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::TableFunction { expr, alias }) - } else if self.consume_token(&Token::LParen) { - // A left paren introduces either a derived table (i.e., a subquery) - // or a nested join. It's nearly impossible to determine ahead of - // time which it is... so we just try to parse both. - // - // Here's an example that demonstrates the complexity: - // /-------------------------------------------------------\ - // | /-----------------------------------\ | - // SELECT * FROM ( ( ( (SELECT 1) UNION (SELECT 2) ) AS t1 NATURAL JOIN t2 ) ) - // ^ ^ ^ ^ - // | | | | - // | | | | - // | | | (4) belongs to a SetExpr::Query inside the subquery - // | | (3) starts a derived table (subquery) - // | (2) starts a nested join - // (1) an additional set of parens around a nested join - // - - // If the recently consumed '(' starts a derived table, the call to - // `parse_derived_table_factor` below will return success after parsing the - // subquery, followed by the closing ')', and the alias of the derived table. - // In the example above this is case (3). - if let Some(mut table) = - self.maybe_parse(|parser| parser.parse_derived_table_factor(NotLateral))? - { - while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) - { - table = match kw { - Keyword::PIVOT => self.parse_pivot_table_factor(table)?, - Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, - _ => unreachable!(), - } - } - return Ok(table); - } - - // A parsing error from `parse_derived_table_factor` indicates that the '(' we've - // recently consumed does not start a derived table (cases 1, 2, or 4). - // `maybe_parse` will ignore such an error and rewind to be after the opening '('. - - // Inside the parentheses we expect to find an (A) table factor - // followed by some joins or (B) another level of nesting. - let mut table_and_joins = self.parse_table_and_joins()?; - - #[allow(clippy::if_same_then_else)] - if !table_and_joins.joins.is_empty() { - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::NestedJoin { - table_with_joins: Box::new(table_and_joins), - alias, - }) // (A) - } else if let TableFactor::NestedJoin { - table_with_joins: _, - alias: _, - } = &table_and_joins.relation - { - // (B): `table_and_joins` (what we found inside the parentheses) - // is a nested join `(foo JOIN bar)`, not followed by other joins. - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::NestedJoin { - table_with_joins: Box::new(table_and_joins), - alias, - }) - } else if dialect_of!(self is SnowflakeDialect | GenericDialect) { - // Dialect-specific behavior: Snowflake diverges from the - // standard and from most of the other implementations by - // allowing extra parentheses not only around a join (B), but - // around lone table names (e.g. `FROM (mytable [AS alias])`) - // and around derived tables (e.g. `FROM ((SELECT ...) - // [AS alias])`) as well. - self.expect_token(&Token::RParen)?; - - if let Some(outer_alias) = - self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)? - { - // Snowflake also allows specifying an alias *after* parens - // e.g. `FROM (mytable) AS alias` - match &mut table_and_joins.relation { - TableFactor::Derived { alias, .. } - | TableFactor::Table { alias, .. } - | TableFactor::Function { alias, .. } - | TableFactor::UNNEST { alias, .. } - | TableFactor::JsonTable { alias, .. } - | TableFactor::OpenJsonTable { alias, .. } - | TableFactor::TableFunction { alias, .. } - | TableFactor::Pivot { alias, .. } - | TableFactor::Unpivot { alias, .. } - | TableFactor::MatchRecognize { alias, .. } - | TableFactor::NestedJoin { alias, .. } => { - // but not `FROM (mytable AS alias1) AS alias2`. - if let Some(inner_alias) = alias { - return Err(ParserError::ParserError(format!( - "duplicate alias {inner_alias}" - ))); - } - // Act as if the alias was specified normally next - // to the table name: `(mytable) AS alias` -> - // `(mytable AS alias)` - alias.replace(outer_alias); - } - }; - } - // Do not store the extra set of parens in the AST - Ok(table_and_joins.relation) - } else { - // The SQL spec prohibits derived tables and bare tables from - // appearing alone in parentheses (e.g. `FROM (mytable)`) - self.expected("joined table", self.peek_token()) - } - } else if dialect_of!(self is SnowflakeDialect | DatabricksDialect | GenericDialect) - && matches!( - self.peek_tokens(), - [ - Token::Word(Word { - keyword: Keyword::VALUES, - .. - }), - Token::LParen - ] - ) - { - self.expect_keyword(Keyword::VALUES)?; - - // Snowflake and Databricks allow syntax like below: - // SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2) - // where there are no parentheses around the VALUES clause. - let values = SetExpr::Values(self.parse_values(false)?); - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::Derived { - lateral: false, - subquery: Box::new(Query { - with: None, - body: Box::new(values), - order_by: None, - limit: None, - limit_by: vec![], - offset: None, - fetch: None, - locks: vec![], - for_clause: None, - settings: None, - format_clause: None, - }), - alias, - }) - } else if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) - && self.parse_keyword(Keyword::UNNEST) - { - self.expect_token(&Token::LParen)?; - let array_exprs = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - - let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); - let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) { - Ok(Some(alias)) => Some(alias), - Ok(None) => None, - Err(e) => return Err(e), - }; - - let with_offset = match self.expect_keywords(&[Keyword::WITH, Keyword::OFFSET]) { - Ok(()) => true, - Err(_) => false, - }; - - let with_offset_alias = if with_offset { - match self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS) { - Ok(Some(alias)) => Some(alias), - Ok(None) => None, - Err(e) => return Err(e), - } - } else { - None - }; - - Ok(TableFactor::UNNEST { - alias, - array_exprs, - with_offset, - with_offset_alias, - with_ordinality, - }) - } else if self.parse_keyword_with_tokens(Keyword::JSON_TABLE, &[Token::LParen]) { - let json_expr = self.parse_expr()?; - self.expect_token(&Token::Comma)?; - let json_path = self.parse_value()?; - self.expect_keyword(Keyword::COLUMNS)?; - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_json_table_column_def)?; - self.expect_token(&Token::RParen)?; - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::JsonTable { - json_expr, - json_path, - columns, - alias, - }) - } else if self.parse_keyword_with_tokens(Keyword::OPENJSON, &[Token::LParen]) { - self.prev_token(); - self.parse_open_json_table_factor() - } else { - let name = self.parse_object_name(true)?; - - let json_path = match self.peek_token().token { - Token::LBracket if self.dialect.supports_partiql() => Some(self.parse_json_path()?), - _ => None, - }; - - let partitions: Vec = if dialect_of!(self is MySqlDialect | GenericDialect) - && self.parse_keyword(Keyword::PARTITION) - { - self.parse_parenthesized_identifiers()? - } else { - vec![] - }; - - // Parse potential version qualifier - let version = self.parse_table_version()?; - - // Postgres, MSSQL, ClickHouse: table-valued functions: - let args = if self.consume_token(&Token::LParen) { - Some(self.parse_table_function_args()?) - } else { - None - }; - - let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); - - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - - // MSSQL-specific table hints: - let mut with_hints = vec![]; - if self.parse_keyword(Keyword::WITH) { - if self.consume_token(&Token::LParen) { - with_hints = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - } else { - // rewind, as WITH may belong to the next statement's CTE - self.prev_token(); - } - }; - - let mut table = TableFactor::Table { - name, - alias, - args, - with_hints, - version, - partitions, - with_ordinality, - json_path, - }; - - while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) { - table = match kw { - Keyword::PIVOT => self.parse_pivot_table_factor(table)?, - Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, - _ => unreachable!(), - } - } - - if self.dialect.supports_match_recognize() - && self.parse_keyword(Keyword::MATCH_RECOGNIZE) - { - table = self.parse_match_recognize(table)?; - } - - Ok(table) - } - } - - /// Parses `OPENJSON( jsonExpression [ , path ] ) [ ]` clause, - /// assuming the `OPENJSON` keyword was already consumed. - fn parse_open_json_table_factor(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let json_expr = self.parse_expr()?; - let json_path = if self.consume_token(&Token::Comma) { - Some(self.parse_value()?) - } else { - None - }; - self.expect_token(&Token::RParen)?; - let columns = if self.parse_keyword(Keyword::WITH) { - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_openjson_table_column_def)?; - self.expect_token(&Token::RParen)?; - columns - } else { - Vec::new() - }; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::OpenJsonTable { - json_expr, - json_path, - columns, - alias, - }) - } - - fn parse_match_recognize(&mut self, table: TableFactor) -> Result { - self.expect_token(&Token::LParen)?; - - let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? - } else { - vec![] - }; - - let measures = if self.parse_keyword(Keyword::MEASURES) { - self.parse_comma_separated(|p| { - let expr = p.parse_expr()?; - let _ = p.parse_keyword(Keyword::AS); - let alias = p.parse_identifier(false)?; - Ok(Measure { expr, alias }) - })? - } else { - vec![] - }; - - let rows_per_match = - if self.parse_keywords(&[Keyword::ONE, Keyword::ROW, Keyword::PER, Keyword::MATCH]) { - Some(RowsPerMatch::OneRow) - } else if self.parse_keywords(&[ - Keyword::ALL, - Keyword::ROWS, - Keyword::PER, - Keyword::MATCH, - ]) { - Some(RowsPerMatch::AllRows( - if self.parse_keywords(&[Keyword::SHOW, Keyword::EMPTY, Keyword::MATCHES]) { - Some(EmptyMatchesMode::Show) - } else if self.parse_keywords(&[ - Keyword::OMIT, - Keyword::EMPTY, - Keyword::MATCHES, - ]) { - Some(EmptyMatchesMode::Omit) - } else if self.parse_keywords(&[ - Keyword::WITH, - Keyword::UNMATCHED, - Keyword::ROWS, - ]) { - Some(EmptyMatchesMode::WithUnmatched) - } else { - None - }, - )) - } else { - None - }; - - let after_match_skip = - if self.parse_keywords(&[Keyword::AFTER, Keyword::MATCH, Keyword::SKIP]) { - if self.parse_keywords(&[Keyword::PAST, Keyword::LAST, Keyword::ROW]) { - Some(AfterMatchSkip::PastLastRow) - } else if self.parse_keywords(&[Keyword::TO, Keyword::NEXT, Keyword::ROW]) { - Some(AfterMatchSkip::ToNextRow) - } else if self.parse_keywords(&[Keyword::TO, Keyword::FIRST]) { - Some(AfterMatchSkip::ToFirst(self.parse_identifier(false)?)) - } else if self.parse_keywords(&[Keyword::TO, Keyword::LAST]) { - Some(AfterMatchSkip::ToLast(self.parse_identifier(false)?)) - } else { - let found = self.next_token(); - return self.expected("after match skip option", found); - } - } else { - None - }; - - self.expect_keyword(Keyword::PATTERN)?; - let pattern = self.parse_parenthesized(Self::parse_pattern)?; - - self.expect_keyword(Keyword::DEFINE)?; - - let symbols = self.parse_comma_separated(|p| { - let symbol = p.parse_identifier(false)?; - p.expect_keyword(Keyword::AS)?; - let definition = p.parse_expr()?; - Ok(SymbolDefinition { symbol, definition }) - })?; - - self.expect_token(&Token::RParen)?; - - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - - Ok(TableFactor::MatchRecognize { - table: Box::new(table), - partition_by, - order_by, - measures, - rows_per_match, - after_match_skip, - pattern, - symbols, - alias, - }) - } - - fn parse_base_pattern(&mut self) -> Result { - match self.next_token().token { - Token::Caret => Ok(MatchRecognizePattern::Symbol(MatchRecognizeSymbol::Start)), - Token::Placeholder(s) if s == "$" => { - Ok(MatchRecognizePattern::Symbol(MatchRecognizeSymbol::End)) - } - Token::LBrace => { - self.expect_token(&Token::Minus)?; - let symbol = self - .parse_identifier(false) - .map(MatchRecognizeSymbol::Named)?; - self.expect_token(&Token::Minus)?; - self.expect_token(&Token::RBrace)?; - Ok(MatchRecognizePattern::Exclude(symbol)) - } - Token::Word(Word { - value, - quote_style: None, - .. - }) if value == "PERMUTE" => { - self.expect_token(&Token::LParen)?; - let symbols = self.parse_comma_separated(|p| { - p.parse_identifier(false).map(MatchRecognizeSymbol::Named) - })?; - self.expect_token(&Token::RParen)?; - Ok(MatchRecognizePattern::Permute(symbols)) - } - Token::LParen => { - let pattern = self.parse_pattern()?; - self.expect_token(&Token::RParen)?; - Ok(MatchRecognizePattern::Group(Box::new(pattern))) - } - _ => { - self.prev_token(); - self.parse_identifier(false) - .map(MatchRecognizeSymbol::Named) - .map(MatchRecognizePattern::Symbol) - } - } - } - - fn parse_repetition_pattern(&mut self) -> Result { - let mut pattern = self.parse_base_pattern()?; - loop { - let token = self.next_token(); - let quantifier = match token.token { - Token::Mul => RepetitionQuantifier::ZeroOrMore, - Token::Plus => RepetitionQuantifier::OneOrMore, - Token::Placeholder(s) if s == "?" => RepetitionQuantifier::AtMostOne, - Token::LBrace => { - // quantifier is a range like {n} or {n,} or {,m} or {n,m} - let token = self.next_token(); - match token.token { - Token::Comma => { - let next_token = self.next_token(); - let Token::Number(n, _) = next_token.token else { - return self.expected("literal number", next_token); - }; - self.expect_token(&Token::RBrace)?; - RepetitionQuantifier::AtMost(Self::parse(n, token.span.start)?) - } - Token::Number(n, _) if self.consume_token(&Token::Comma) => { - let next_token = self.next_token(); - match next_token.token { - Token::Number(m, _) => { - self.expect_token(&Token::RBrace)?; - RepetitionQuantifier::Range( - Self::parse(n, token.span.start)?, - Self::parse(m, token.span.start)?, - ) - } - Token::RBrace => { - RepetitionQuantifier::AtLeast(Self::parse(n, token.span.start)?) - } - _ => { - return self.expected("} or upper bound", next_token); - } - } - } - Token::Number(n, _) => { - self.expect_token(&Token::RBrace)?; - RepetitionQuantifier::Exactly(Self::parse(n, token.span.start)?) - } - _ => return self.expected("quantifier range", token), - } - } - _ => { - self.prev_token(); - break; - } - }; - pattern = MatchRecognizePattern::Repetition(Box::new(pattern), quantifier); - } - Ok(pattern) - } - - fn parse_concat_pattern(&mut self) -> Result { - let mut patterns = vec![self.parse_repetition_pattern()?]; - while !matches!(self.peek_token().token, Token::RParen | Token::Pipe) { - patterns.push(self.parse_repetition_pattern()?); - } - match <[MatchRecognizePattern; 1]>::try_from(patterns) { - Ok([pattern]) => Ok(pattern), - Err(patterns) => Ok(MatchRecognizePattern::Concat(patterns)), - } - } - - fn parse_pattern(&mut self) -> Result { - let pattern = self.parse_concat_pattern()?; - if self.consume_token(&Token::Pipe) { - match self.parse_pattern()? { - // flatten nested alternations - MatchRecognizePattern::Alternation(mut patterns) => { - patterns.insert(0, pattern); - Ok(MatchRecognizePattern::Alternation(patterns)) - } - next => Ok(MatchRecognizePattern::Alternation(vec![pattern, next])), - } - } else { - Ok(pattern) - } - } - - /// Parse a given table version specifier. - /// - /// For now it only supports timestamp versioning for BigQuery and MSSQL dialects. - pub fn parse_table_version(&mut self) -> Result, ParserError> { - if dialect_of!(self is BigQueryDialect | MsSqlDialect) - && self.parse_keywords(&[Keyword::FOR, Keyword::SYSTEM_TIME, Keyword::AS, Keyword::OF]) - { - let expr = self.parse_expr()?; - Ok(Some(TableVersion::ForSystemTimeAsOf(expr))) - } else { - Ok(None) - } - } - - /// Parses MySQL's JSON_TABLE column definition. - /// For example: `id INT EXISTS PATH '$' DEFAULT '0' ON EMPTY ERROR ON ERROR` - pub fn parse_json_table_column_def(&mut self) -> Result { - if self.parse_keyword(Keyword::NESTED) { - let _has_path_keyword = self.parse_keyword(Keyword::PATH); - let path = self.parse_value()?; - self.expect_keyword(Keyword::COLUMNS)?; - let columns = self.parse_parenthesized(|p| { - p.parse_comma_separated(Self::parse_json_table_column_def) - })?; - return Ok(JsonTableColumn::Nested(JsonTableNestedColumn { - path, - columns, - })); - } - let name = self.parse_identifier(false)?; - if self.parse_keyword(Keyword::FOR) { - self.expect_keyword(Keyword::ORDINALITY)?; - return Ok(JsonTableColumn::ForOrdinality(name)); - } - let r#type = self.parse_data_type()?; - let exists = self.parse_keyword(Keyword::EXISTS); - self.expect_keyword(Keyword::PATH)?; - let path = self.parse_value()?; - let mut on_empty = None; - let mut on_error = None; - while let Some(error_handling) = self.parse_json_table_column_error_handling()? { - if self.parse_keyword(Keyword::EMPTY) { - on_empty = Some(error_handling); - } else { - self.expect_keyword(Keyword::ERROR)?; - on_error = Some(error_handling); - } - } - Ok(JsonTableColumn::Named(JsonTableNamedColumn { - name, - r#type, - path, - exists, - on_empty, - on_error, - })) - } - - /// Parses MSSQL's `OPENJSON WITH` column definition. - /// - /// ```sql - /// colName type [ column_path ] [ AS JSON ] - /// ``` - /// - /// Reference: - pub fn parse_openjson_table_column_def(&mut self) -> Result { - let name = self.parse_identifier(false)?; - let r#type = self.parse_data_type()?; - let path = if let Token::SingleQuotedString(path) = self.peek_token().token { - self.next_token(); - Some(path) - } else { - None - }; - let as_json = self.parse_keyword(Keyword::AS); - if as_json { - self.expect_keyword(Keyword::JSON)?; - } - Ok(OpenJsonTableColumn { - name, - r#type, - path, - as_json, - }) - } - - fn parse_json_table_column_error_handling( - &mut self, - ) -> Result, ParserError> { - let res = if self.parse_keyword(Keyword::NULL) { - JsonTableColumnErrorHandling::Null - } else if self.parse_keyword(Keyword::ERROR) { - JsonTableColumnErrorHandling::Error - } else if self.parse_keyword(Keyword::DEFAULT) { - JsonTableColumnErrorHandling::Default(self.parse_value()?) - } else { - return Ok(None); - }; - self.expect_keyword(Keyword::ON)?; - Ok(Some(res)) - } - - pub fn parse_derived_table_factor( - &mut self, - lateral: IsLateral, - ) -> Result { - let subquery = self.parse_query()?; - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::Derived { - lateral: match lateral { - Lateral => true, - NotLateral => false, - }, - subquery, - alias, - }) - } - - fn parse_aliased_function_call(&mut self) -> Result { - let function_name = match self.next_token().token { - Token::Word(w) => Ok(w.value), - _ => self.expected("a function identifier", self.peek_token()), - }?; - let expr = self.parse_function(ObjectName(vec![Ident::new(function_name)]))?; - let alias = if self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - Ok(ExprWithAlias { expr, alias }) - } - /// Parses an expression with an optional alias - /// - /// Examples: - /// - /// ```sql - /// SUM(price) AS total_price - /// ``` - /// ```sql - /// SUM(price) - /// ``` - /// - /// Example - /// ``` - /// # use sqlparser::parser::{Parser, ParserError}; - /// # use sqlparser::dialect::GenericDialect; - /// # fn main() ->Result<(), ParserError> { - /// let sql = r#"SUM("a") as "b""#; - /// let mut parser = Parser::new(&GenericDialect).try_with_sql(sql)?; - /// let expr_with_alias = parser.parse_expr_with_alias()?; - /// assert_eq!(Some("b".to_string()), expr_with_alias.alias.map(|x|x.value)); - /// # Ok(()) - /// # } - pub fn parse_expr_with_alias(&mut self) -> Result { - let expr = self.parse_expr()?; - let alias = if self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - Ok(ExprWithAlias { expr, alias }) - } - - pub fn parse_pivot_table_factor( - &mut self, - table: TableFactor, - ) -> Result { - self.expect_token(&Token::LParen)?; - let aggregate_functions = self.parse_comma_separated(Self::parse_aliased_function_call)?; - self.expect_keyword(Keyword::FOR)?; - let value_column = self.parse_object_name(false)?.0; - self.expect_keyword(Keyword::IN)?; - - self.expect_token(&Token::LParen)?; - let value_source = if self.parse_keyword(Keyword::ANY) { - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? - } else { - vec![] - }; - PivotValueSource::Any(order_by) - } else if self.peek_sub_query() { - PivotValueSource::Subquery(self.parse_query()?) - } else { - PivotValueSource::List(self.parse_comma_separated(Self::parse_expr_with_alias)?) - }; - self.expect_token(&Token::RParen)?; - - let default_on_null = - if self.parse_keywords(&[Keyword::DEFAULT, Keyword::ON, Keyword::NULL]) { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Some(expr) - } else { - None - }; - - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::Pivot { - table: Box::new(table), - aggregate_functions, - value_column, - value_source, - default_on_null, - alias, - }) - } - - pub fn parse_unpivot_table_factor( - &mut self, - table: TableFactor, - ) -> Result { - self.expect_token(&Token::LParen)?; - let value = self.parse_identifier(false)?; - self.expect_keyword(Keyword::FOR)?; - let name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::IN)?; - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::Unpivot { - table: Box::new(table), - value, - name, - columns, - alias, - }) - } - - pub fn parse_join_constraint(&mut self, natural: bool) -> Result { - if natural { - Ok(JoinConstraint::Natural) - } else if self.parse_keyword(Keyword::ON) { - let constraint = self.parse_expr()?; - Ok(JoinConstraint::On(constraint)) - } else if self.parse_keyword(Keyword::USING) { - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - Ok(JoinConstraint::Using(columns)) - } else { - Ok(JoinConstraint::None) - //self.expected("ON, or USING after JOIN", self.peek_token()) - } - } - - /// Parse a GRANT statement. - pub fn parse_grant(&mut self) -> Result { - let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; - - self.expect_keyword(Keyword::TO)?; - let grantees = self.parse_comma_separated(|p| p.parse_identifier(false))?; - - let with_grant_option = - self.parse_keywords(&[Keyword::WITH, Keyword::GRANT, Keyword::OPTION]); - - let granted_by = self - .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) - .then(|| self.parse_identifier(false).unwrap()); - - Ok(Statement::Grant { - privileges, - objects, - grantees, - with_grant_option, - granted_by, - }) - } - - pub fn parse_grant_revoke_privileges_objects( - &mut self, - ) -> Result<(Privileges, GrantObjects), ParserError> { - let privileges = if self.parse_keyword(Keyword::ALL) { - Privileges::All { - with_privileges_keyword: self.parse_keyword(Keyword::PRIVILEGES), - } - } else { - let (actions, err): (Vec<_>, Vec<_>) = self - .parse_actions_list()? - .into_iter() - .map(|(kw, columns)| match kw { - Keyword::DELETE => Ok(Action::Delete), - Keyword::INSERT => Ok(Action::Insert { columns }), - Keyword::REFERENCES => Ok(Action::References { columns }), - Keyword::SELECT => Ok(Action::Select { columns }), - Keyword::TRIGGER => Ok(Action::Trigger), - Keyword::TRUNCATE => Ok(Action::Truncate), - Keyword::UPDATE => Ok(Action::Update { columns }), - Keyword::USAGE => Ok(Action::Usage), - Keyword::CONNECT => Ok(Action::Connect), - Keyword::CREATE => Ok(Action::Create), - Keyword::EXECUTE => Ok(Action::Execute), - Keyword::TEMPORARY => Ok(Action::Temporary), - // This will cover all future added keywords to - // parse_grant_permission and unhandled in this - // match - _ => Err(kw), - }) - .partition(Result::is_ok); - - if !err.is_empty() { - let errors: Vec = err.into_iter().filter_map(|x| x.err()).collect(); - return Err(ParserError::ParserError(format!( - "INTERNAL ERROR: GRANT/REVOKE unexpected keyword(s) - {errors:?}" - ))); - } - let act = actions.into_iter().filter_map(|x| x.ok()).collect(); - Privileges::Actions(act) - }; - - self.expect_keyword(Keyword::ON)?; - - let objects = if self.parse_keywords(&[ - Keyword::ALL, - Keyword::TABLES, - Keyword::IN, - Keyword::SCHEMA, - ]) { - GrantObjects::AllTablesInSchema { - schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, - } - } else if self.parse_keywords(&[ - Keyword::ALL, - Keyword::SEQUENCES, - Keyword::IN, - Keyword::SCHEMA, - ]) { - GrantObjects::AllSequencesInSchema { - schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, - } - } else { - let object_type = - self.parse_one_of_keywords(&[Keyword::SEQUENCE, Keyword::SCHEMA, Keyword::TABLE]); - let objects = self.parse_comma_separated(|p| p.parse_object_name(false)); - match object_type { - Some(Keyword::SCHEMA) => GrantObjects::Schemas(objects?), - Some(Keyword::SEQUENCE) => GrantObjects::Sequences(objects?), - Some(Keyword::TABLE) | None => GrantObjects::Tables(objects?), - _ => unreachable!(), - } - }; - - Ok((privileges, objects)) - } - - pub fn parse_grant_permission(&mut self) -> Result { - if let Some(kw) = self.parse_one_of_keywords(&[ - Keyword::CONNECT, - Keyword::CREATE, - Keyword::DELETE, - Keyword::EXECUTE, - Keyword::INSERT, - Keyword::REFERENCES, - Keyword::SELECT, - Keyword::TEMPORARY, - Keyword::TRIGGER, - Keyword::TRUNCATE, - Keyword::UPDATE, - Keyword::USAGE, - ]) { - let columns = match kw { - Keyword::INSERT | Keyword::REFERENCES | Keyword::SELECT | Keyword::UPDATE => { - let columns = self.parse_parenthesized_column_list(Optional, false)?; - if columns.is_empty() { - None - } else { - Some(columns) - } - } - _ => None, - }; - Ok((kw, columns)) - } else { - self.expected("a privilege keyword", self.peek_token())? - } - } - - /// Parse a REVOKE statement - pub fn parse_revoke(&mut self) -> Result { - let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; - - self.expect_keyword(Keyword::FROM)?; - let grantees = self.parse_comma_separated(|p| p.parse_identifier(false))?; - - let granted_by = self - .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) - .then(|| self.parse_identifier(false).unwrap()); - - let loc = self.peek_token().span.start; - let cascade = self.parse_keyword(Keyword::CASCADE); - let restrict = self.parse_keyword(Keyword::RESTRICT); - if cascade && restrict { - return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE", loc); - } - - Ok(Statement::Revoke { - privileges, - objects, - grantees, - granted_by, - cascade, - }) - } - - /// Parse an REPLACE statement - pub fn parse_replace(&mut self) -> Result { - if !dialect_of!(self is MySqlDialect | GenericDialect) { - return parser_err!( - "Unsupported statement REPLACE", - self.peek_token().span.start - ); - } - - let mut insert = self.parse_insert()?; - if let Statement::Insert(Insert { replace_into, .. }) = &mut insert { - *replace_into = true; - } - - Ok(insert) - } - - /// Parse an INSERT statement, returning a `Box`ed SetExpr - /// - /// This is used to reduce the size of the stack frames in debug builds - fn parse_insert_setexpr_boxed(&mut self) -> Result, ParserError> { - Ok(Box::new(SetExpr::Insert(self.parse_insert()?))) - } - - /// Parse an INSERT statement - pub fn parse_insert(&mut self) -> Result { - let or = self.parse_conflict_clause(); - let priority = if !dialect_of!(self is MySqlDialect | GenericDialect) { - None - } else if self.parse_keyword(Keyword::LOW_PRIORITY) { - Some(MysqlInsertPriority::LowPriority) - } else if self.parse_keyword(Keyword::DELAYED) { - Some(MysqlInsertPriority::Delayed) - } else if self.parse_keyword(Keyword::HIGH_PRIORITY) { - Some(MysqlInsertPriority::HighPriority) - } else { - None - }; - - let ignore = dialect_of!(self is MySqlDialect | GenericDialect) - && self.parse_keyword(Keyword::IGNORE); - - let replace_into = false; - - let action = self.parse_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE]); - let into = action == Some(Keyword::INTO); - let overwrite = action == Some(Keyword::OVERWRITE); - - let local = self.parse_keyword(Keyword::LOCAL); - - if self.parse_keyword(Keyword::DIRECTORY) { - let path = self.parse_literal_string()?; - let file_format = if self.parse_keywords(&[Keyword::STORED, Keyword::AS]) { - Some(self.parse_file_format()?) - } else { - None - }; - let source = self.parse_query()?; - Ok(Statement::Directory { - local, - path, - overwrite, - file_format, - source, - }) - } else { - // Hive lets you put table here regardless - let table = self.parse_keyword(Keyword::TABLE); - let table_name = self.parse_object_name(false)?; - - let table_alias = - if dialect_of!(self is PostgreSqlDialect) && self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - let is_mysql = dialect_of!(self is MySqlDialect); - - let (columns, partitioned, after_columns, source) = - if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) { - (vec![], None, vec![], None) - } else { - let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; - - let partitioned = self.parse_insert_partition()?; - // Hive allows you to specify columns after partitions as well if you want. - let after_columns = if dialect_of!(self is HiveDialect) { - self.parse_parenthesized_column_list(Optional, false)? - } else { - vec![] - }; - - let source = Some(self.parse_query()?); - - (columns, partitioned, after_columns, source) - }; - - let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect) - && self.parse_keyword(Keyword::AS) - { - let row_alias = self.parse_object_name(false)?; - let col_aliases = Some(self.parse_parenthesized_column_list(Optional, false)?); - Some(InsertAliases { - row_alias, - col_aliases, - }) - } else { - None - }; - - let on = if self.parse_keyword(Keyword::ON) { - if self.parse_keyword(Keyword::CONFLICT) { - let conflict_target = - if self.parse_keywords(&[Keyword::ON, Keyword::CONSTRAINT]) { - Some(ConflictTarget::OnConstraint(self.parse_object_name(false)?)) - } else if self.peek_token() == Token::LParen { - Some(ConflictTarget::Columns( - self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?, - )) - } else { - None - }; - - self.expect_keyword(Keyword::DO)?; - let action = if self.parse_keyword(Keyword::NOTHING) { - OnConflictAction::DoNothing - } else { - self.expect_keyword(Keyword::UPDATE)?; - self.expect_keyword(Keyword::SET)?; - let assignments = self.parse_comma_separated(Parser::parse_assignment)?; - let selection = if self.parse_keyword(Keyword::WHERE) { - Some(self.parse_expr()?) - } else { - None - }; - OnConflictAction::DoUpdate(DoUpdate { - assignments, - selection, - }) - }; - - Some(OnInsert::OnConflict(OnConflict { - conflict_target, - action, - })) - } else { - self.expect_keyword(Keyword::DUPLICATE)?; - self.expect_keyword(Keyword::KEY)?; - self.expect_keyword(Keyword::UPDATE)?; - let l = self.parse_comma_separated(Parser::parse_assignment)?; - - Some(OnInsert::DuplicateKeyUpdate(l)) - } - } else { - None - }; - - let returning = if self.parse_keyword(Keyword::RETURNING) { - Some(self.parse_comma_separated(Parser::parse_select_item)?) - } else { - None - }; - - Ok(Statement::Insert(Insert { - or, - table_name, - table_alias, - ignore, - into, - overwrite, - partitioned, - columns, - after_columns, - source, - table, - on, - returning, - replace_into, - priority, - insert_alias, - })) - } - } - - fn parse_conflict_clause(&mut self) -> Option { - if self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]) { - Some(SqliteOnConflict::Replace) - } else if self.parse_keywords(&[Keyword::OR, Keyword::ROLLBACK]) { - Some(SqliteOnConflict::Rollback) - } else if self.parse_keywords(&[Keyword::OR, Keyword::ABORT]) { - Some(SqliteOnConflict::Abort) - } else if self.parse_keywords(&[Keyword::OR, Keyword::FAIL]) { - Some(SqliteOnConflict::Fail) - } else if self.parse_keywords(&[Keyword::OR, Keyword::IGNORE]) { - Some(SqliteOnConflict::Ignore) - } else if self.parse_keyword(Keyword::REPLACE) { - Some(SqliteOnConflict::Replace) - } else { - None - } - } - - pub fn parse_insert_partition(&mut self) -> Result>, ParserError> { - if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let partition_cols = Some(self.parse_comma_separated(Parser::parse_expr)?); - self.expect_token(&Token::RParen)?; - Ok(partition_cols) - } else { - Ok(None) - } - } - - pub fn parse_load_data_table_format( - &mut self, - ) -> Result, ParserError> { - if self.parse_keyword(Keyword::INPUTFORMAT) { - let input_format = self.parse_expr()?; - self.expect_keyword(Keyword::SERDE)?; - let serde = self.parse_expr()?; - Ok(Some(HiveLoadDataFormat { - input_format, - serde, - })) - } else { - Ok(None) - } - } - - /// Parse an UPDATE statement, returning a `Box`ed SetExpr - /// - /// This is used to reduce the size of the stack frames in debug builds - fn parse_update_setexpr_boxed(&mut self) -> Result, ParserError> { - Ok(Box::new(SetExpr::Update(self.parse_update()?))) - } - - pub fn parse_update(&mut self) -> Result { - let or = self.parse_conflict_clause(); - let table = self.parse_table_and_joins()?; - self.expect_keyword(Keyword::SET)?; - let assignments = self.parse_comma_separated(Parser::parse_assignment)?; - let from = if self.parse_keyword(Keyword::FROM) - && dialect_of!(self is GenericDialect | PostgreSqlDialect | DuckDbDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect | SQLiteDialect ) - { - Some(self.parse_table_and_joins()?) - } else { - None - }; - let selection = if self.parse_keyword(Keyword::WHERE) { - Some(self.parse_expr()?) - } else { - None - }; - let returning = if self.parse_keyword(Keyword::RETURNING) { - Some(self.parse_comma_separated(Parser::parse_select_item)?) - } else { - None - }; - Ok(Statement::Update { - table, - assignments, - from, - selection, - returning, - or, - }) - } - - /// Parse a `var = expr` assignment, used in an UPDATE statement - pub fn parse_assignment(&mut self) -> Result { - let target = self.parse_assignment_target()?; - self.expect_token(&Token::Eq)?; - let value = self.parse_expr()?; - Ok(Assignment { target, value }) - } - - /// Parse the left-hand side of an assignment, used in an UPDATE statement - pub fn parse_assignment_target(&mut self) -> Result { - if self.consume_token(&Token::LParen) { - let columns = self.parse_comma_separated(|p| p.parse_object_name(false))?; - self.expect_token(&Token::RParen)?; - Ok(AssignmentTarget::Tuple(columns)) - } else { - let column = self.parse_object_name(false)?; - Ok(AssignmentTarget::ColumnName(column)) - } - } - - pub fn parse_function_args(&mut self) -> Result { - let arg = if self.dialect.supports_named_fn_args_with_expr_name() { - self.maybe_parse(|p| { - let name = p.parse_expr()?; - let operator = p.parse_function_named_arg_operator()?; - let arg = p.parse_wildcard_expr()?.into(); - Ok(FunctionArg::ExprNamed { - name, - arg, - operator, - }) - })? - } else { - self.maybe_parse(|p| { - let name = p.parse_identifier(false)?; - let operator = p.parse_function_named_arg_operator()?; - let arg = p.parse_wildcard_expr()?.into(); - Ok(FunctionArg::Named { - name, - arg, - operator, - }) - })? - }; - if let Some(arg) = arg { - return Ok(arg); - } - Ok(FunctionArg::Unnamed(self.parse_wildcard_expr()?.into())) - } - - fn parse_function_named_arg_operator(&mut self) -> Result { - if self.parse_keyword(Keyword::VALUE) { - return Ok(FunctionArgOperator::Value); - } - let tok = self.next_token(); - match tok.token { - Token::RArrow if self.dialect.supports_named_fn_args_with_rarrow_operator() => { - Ok(FunctionArgOperator::RightArrow) - } - Token::Eq if self.dialect.supports_named_fn_args_with_eq_operator() => { - Ok(FunctionArgOperator::Equals) - } - Token::Assignment - if self - .dialect - .supports_named_fn_args_with_assignment_operator() => - { - Ok(FunctionArgOperator::Assignment) - } - Token::Colon if self.dialect.supports_named_fn_args_with_colon_operator() => { - Ok(FunctionArgOperator::Colon) - } - _ => { - self.prev_token(); - self.expected("argument operator", tok) - } - } - } - - pub fn parse_optional_args(&mut self) -> Result, ParserError> { - if self.consume_token(&Token::RParen) { - Ok(vec![]) - } else { - let args = self.parse_comma_separated(Parser::parse_function_args)?; - self.expect_token(&Token::RParen)?; - Ok(args) - } - } - - fn parse_table_function_args(&mut self) -> Result { - if self.consume_token(&Token::RParen) { - return Ok(TableFunctionArgs { - args: vec![], - settings: None, - }); - } - let mut args = vec![]; - let settings = loop { - if let Some(settings) = self.parse_settings()? { - break Some(settings); - } - args.push(self.parse_function_args()?); - if self.is_parse_comma_separated_end() { - break None; - } - }; - self.expect_token(&Token::RParen)?; - Ok(TableFunctionArgs { args, settings }) - } - - /// Parses a potentially empty list of arguments to a window function - /// (including the closing parenthesis). - /// - /// Examples: - /// ```sql - /// FIRST_VALUE(x ORDER BY 1,2,3); - /// FIRST_VALUE(x IGNORE NULL); - /// ``` - fn parse_function_argument_list(&mut self) -> Result { - let mut clauses = vec![]; - - // For MSSQL empty argument list with json-null-clause case, e.g. `JSON_ARRAY(NULL ON NULL)` - if let Some(null_clause) = self.parse_json_null_clause() { - clauses.push(FunctionArgumentClause::JsonNullClause(null_clause)); - } - - if self.consume_token(&Token::RParen) { - return Ok(FunctionArgumentList { - duplicate_treatment: None, - args: vec![], - clauses, - }); - } - - let duplicate_treatment = self.parse_duplicate_treatment()?; - let args = self.parse_comma_separated(Parser::parse_function_args)?; - - if self.dialect.supports_window_function_null_treatment_arg() { - if let Some(null_treatment) = self.parse_null_treatment()? { - clauses.push(FunctionArgumentClause::IgnoreOrRespectNulls(null_treatment)); - } - } - - if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - clauses.push(FunctionArgumentClause::OrderBy( - self.parse_comma_separated(Parser::parse_order_by_expr)?, - )); - } - - if self.parse_keyword(Keyword::LIMIT) { - clauses.push(FunctionArgumentClause::Limit(self.parse_expr()?)); - } - - if dialect_of!(self is GenericDialect | BigQueryDialect) - && self.parse_keyword(Keyword::HAVING) - { - let kind = match self.expect_one_of_keywords(&[Keyword::MIN, Keyword::MAX])? { - Keyword::MIN => HavingBoundKind::Min, - Keyword::MAX => HavingBoundKind::Max, - _ => unreachable!(), - }; - clauses.push(FunctionArgumentClause::Having(HavingBound( - kind, - self.parse_expr()?, - ))) - } - - if dialect_of!(self is GenericDialect | MySqlDialect) - && self.parse_keyword(Keyword::SEPARATOR) - { - clauses.push(FunctionArgumentClause::Separator(self.parse_value()?)); - } - - if let Some(on_overflow) = self.parse_listagg_on_overflow()? { - clauses.push(FunctionArgumentClause::OnOverflow(on_overflow)); - } - - if let Some(null_clause) = self.parse_json_null_clause() { - clauses.push(FunctionArgumentClause::JsonNullClause(null_clause)); - } - - self.expect_token(&Token::RParen)?; - Ok(FunctionArgumentList { - duplicate_treatment, - args, - clauses, - }) - } - - /// Parses MSSQL's json-null-clause - fn parse_json_null_clause(&mut self) -> Option { - if self.parse_keywords(&[Keyword::ABSENT, Keyword::ON, Keyword::NULL]) { - Some(JsonNullClause::AbsentOnNull) - } else if self.parse_keywords(&[Keyword::NULL, Keyword::ON, Keyword::NULL]) { - Some(JsonNullClause::NullOnNull) - } else { - None - } - } - - fn parse_duplicate_treatment(&mut self) -> Result, ParserError> { - let loc = self.peek_token().span.start; - match ( - self.parse_keyword(Keyword::ALL), - self.parse_keyword(Keyword::DISTINCT), - ) { - (true, false) => Ok(Some(DuplicateTreatment::All)), - (false, true) => Ok(Some(DuplicateTreatment::Distinct)), - (false, false) => Ok(None), - (true, true) => parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc), - } - } - - /// Parse a comma-delimited list of projections after SELECT - pub fn parse_select_item(&mut self) -> Result { - match self.parse_wildcard_expr()? { - Expr::QualifiedWildcard(prefix, token) => Ok(SelectItem::QualifiedWildcard( - prefix, - self.parse_wildcard_additional_options(token.0)?, - )), - Expr::Wildcard(token) => Ok(SelectItem::Wildcard( - self.parse_wildcard_additional_options(token.0)?, - )), - Expr::Identifier(v) if v.value.to_lowercase() == "from" && v.quote_style.is_none() => { - parser_err!( - format!("Expected an expression, found: {}", v), - self.peek_token().span.start - ) - } - Expr::BinaryOp { - left, - op: BinaryOperator::Eq, - right, - } if self.dialect.supports_eq_alias_assignment() - && matches!(left.as_ref(), Expr::Identifier(_)) => - { - let Expr::Identifier(alias) = *left else { - return parser_err!( - "BUG: expected identifier expression as alias", - self.peek_token().span.start - ); - }; - Ok(SelectItem::ExprWithAlias { - expr: *right, - alias, - }) - } - expr => self - .parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS) - .map(|alias| match alias { - Some(alias) => SelectItem::ExprWithAlias { expr, alias }, - None => SelectItem::UnnamedExpr(expr), - }), - } - } - - /// Parse an [`WildcardAdditionalOptions`] information for wildcard select items. - /// - /// If it is not possible to parse it, will return an option. - pub fn parse_wildcard_additional_options( - &mut self, - wildcard_token: TokenWithSpan, - ) -> Result { - let opt_ilike = if dialect_of!(self is GenericDialect | SnowflakeDialect) { - self.parse_optional_select_item_ilike()? - } else { - None - }; - let opt_exclude = if opt_ilike.is_none() - && dialect_of!(self is GenericDialect | DuckDbDialect | SnowflakeDialect) - { - self.parse_optional_select_item_exclude()? - } else { - None - }; - let opt_except = if self.dialect.supports_select_wildcard_except() { - self.parse_optional_select_item_except()? - } else { - None - }; - let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect | DuckDbDialect | SnowflakeDialect) - { - self.parse_optional_select_item_replace()? - } else { - None - }; - let opt_rename = if dialect_of!(self is GenericDialect | SnowflakeDialect) { - self.parse_optional_select_item_rename()? - } else { - None - }; - - Ok(WildcardAdditionalOptions { - wildcard_token: wildcard_token.into(), - opt_ilike, - opt_exclude, - opt_except, - opt_rename, - opt_replace, - }) - } - - /// Parse an [`Ilike`](IlikeSelectItem) information for wildcard select items. - /// - /// If it is not possible to parse it, will return an option. - pub fn parse_optional_select_item_ilike( - &mut self, - ) -> Result, ParserError> { - let opt_ilike = if self.parse_keyword(Keyword::ILIKE) { - let next_token = self.next_token(); - let pattern = match next_token.token { - Token::SingleQuotedString(s) => s, - _ => return self.expected("ilike pattern", next_token), - }; - Some(IlikeSelectItem { pattern }) - } else { - None - }; - Ok(opt_ilike) - } - - /// Parse an [`Exclude`](ExcludeSelectItem) information for wildcard select items. - /// - /// If it is not possible to parse it, will return an option. - pub fn parse_optional_select_item_exclude( - &mut self, - ) -> Result, ParserError> { - let opt_exclude = if self.parse_keyword(Keyword::EXCLUDE) { - if self.consume_token(&Token::LParen) { - let columns = - self.parse_comma_separated(|parser| parser.parse_identifier(false))?; - self.expect_token(&Token::RParen)?; - Some(ExcludeSelectItem::Multiple(columns)) - } else { - let column = self.parse_identifier(false)?; - Some(ExcludeSelectItem::Single(column)) - } - } else { - None - }; - - Ok(opt_exclude) - } - - /// Parse an [`Except`](ExceptSelectItem) information for wildcard select items. - /// - /// If it is not possible to parse it, will return an option. - pub fn parse_optional_select_item_except( - &mut self, - ) -> Result, ParserError> { - let opt_except = if self.parse_keyword(Keyword::EXCEPT) { - if self.peek_token().token == Token::LParen { - let idents = self.parse_parenthesized_column_list(Mandatory, false)?; - match &idents[..] { - [] => { - return self.expected( - "at least one column should be parsed by the expect clause", - self.peek_token(), - )?; - } - [first, idents @ ..] => Some(ExceptSelectItem { - first_element: first.clone(), - additional_elements: idents.to_vec(), - }), - } - } else { - // Clickhouse allows EXCEPT column_name - let ident = self.parse_identifier(false)?; - Some(ExceptSelectItem { - first_element: ident, - additional_elements: vec![], - }) - } - } else { - None - }; - - Ok(opt_except) - } - - /// Parse a [`Rename`](RenameSelectItem) information for wildcard select items. - pub fn parse_optional_select_item_rename( - &mut self, - ) -> Result, ParserError> { - let opt_rename = if self.parse_keyword(Keyword::RENAME) { - if self.consume_token(&Token::LParen) { - let idents = - self.parse_comma_separated(|parser| parser.parse_identifier_with_alias())?; - self.expect_token(&Token::RParen)?; - Some(RenameSelectItem::Multiple(idents)) - } else { - let ident = self.parse_identifier_with_alias()?; - Some(RenameSelectItem::Single(ident)) - } - } else { - None - }; - - Ok(opt_rename) - } - - /// Parse a [`Replace`](ReplaceSelectItem) information for wildcard select items. - pub fn parse_optional_select_item_replace( - &mut self, - ) -> Result, ParserError> { - let opt_replace = if self.parse_keyword(Keyword::REPLACE) { - if self.consume_token(&Token::LParen) { - let items = self.parse_comma_separated(|parser| { - Ok(Box::new(parser.parse_replace_elements()?)) - })?; - self.expect_token(&Token::RParen)?; - Some(ReplaceSelectItem { items }) - } else { - let tok = self.next_token(); - return self.expected("( after REPLACE but", tok); - } - } else { - None - }; - - Ok(opt_replace) - } - pub fn parse_replace_elements(&mut self) -> Result { - let expr = self.parse_expr()?; - let as_keyword = self.parse_keyword(Keyword::AS); - let ident = self.parse_identifier(false)?; - Ok(ReplaceSelectElement { - expr, - column_name: ident, - as_keyword, - }) - } - - /// Parse ASC or DESC, returns an Option with true if ASC, false of DESC or `None` if none of - /// them. - pub fn parse_asc_desc(&mut self) -> Option { - if self.parse_keyword(Keyword::ASC) { - Some(true) - } else if self.parse_keyword(Keyword::DESC) { - Some(false) - } else { - None - } - } - - /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY) - pub fn parse_order_by_expr(&mut self) -> Result { - let expr = self.parse_expr()?; - - let asc = self.parse_asc_desc(); - - let nulls_first = if self.parse_keywords(&[Keyword::NULLS, Keyword::FIRST]) { - Some(true) - } else if self.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) { - Some(false) - } else { - None - }; - - let with_fill = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keywords(&[Keyword::WITH, Keyword::FILL]) - { - Some(self.parse_with_fill()?) - } else { - None - }; - - Ok(OrderByExpr { - expr, - asc, - nulls_first, - with_fill, - }) - } - - // Parse a WITH FILL clause (ClickHouse dialect) - // that follow the WITH FILL keywords in a ORDER BY clause - pub fn parse_with_fill(&mut self) -> Result { - let from = if self.parse_keyword(Keyword::FROM) { - Some(self.parse_expr()?) - } else { - None - }; - - let to = if self.parse_keyword(Keyword::TO) { - Some(self.parse_expr()?) - } else { - None - }; - - let step = if self.parse_keyword(Keyword::STEP) { - Some(self.parse_expr()?) - } else { - None - }; - - Ok(WithFill { from, to, step }) - } - - // Parse a set of comma seperated INTERPOLATE expressions (ClickHouse dialect) - // that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier - pub fn parse_interpolations(&mut self) -> Result, ParserError> { - if !self.parse_keyword(Keyword::INTERPOLATE) { - return Ok(None); - } - - if self.consume_token(&Token::LParen) { - let interpolations = - self.parse_comma_separated0(|p| p.parse_interpolation(), Token::RParen)?; - self.expect_token(&Token::RParen)?; - // INTERPOLATE () and INTERPOLATE ( ... ) variants - return Ok(Some(Interpolate { - exprs: Some(interpolations), - })); - } - - // INTERPOLATE - Ok(Some(Interpolate { exprs: None })) - } - - // Parse a INTERPOLATE expression (ClickHouse dialect) - pub fn parse_interpolation(&mut self) -> Result { - let column = self.parse_identifier(false)?; - let expr = if self.parse_keyword(Keyword::AS) { - Some(self.parse_expr()?) - } else { - None - }; - Ok(InterpolateExpr { column, expr }) - } - - /// Parse a TOP clause, MSSQL equivalent of LIMIT, - /// that follows after `SELECT [DISTINCT]`. - pub fn parse_top(&mut self) -> Result { - let quantity = if self.consume_token(&Token::LParen) { - let quantity = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Some(TopQuantity::Expr(quantity)) - } else { - let next_token = self.next_token(); - let quantity = match next_token.token { - Token::Number(s, _) => Self::parse::(s, next_token.span.start)?, - _ => self.expected("literal int", next_token)?, - }; - Some(TopQuantity::Constant(quantity)) - }; - - let percent = self.parse_keyword(Keyword::PERCENT); - - let with_ties = self.parse_keywords(&[Keyword::WITH, Keyword::TIES]); - - Ok(Top { - with_ties, - percent, - quantity, - }) - } - - /// Parse a LIMIT clause - pub fn parse_limit(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::ALL) { - Ok(None) - } else { - Ok(Some(self.parse_expr()?)) - } - } - - /// Parse an OFFSET clause - pub fn parse_offset(&mut self) -> Result { - let value = self.parse_expr()?; - let rows = if self.parse_keyword(Keyword::ROW) { - OffsetRows::Row - } else if self.parse_keyword(Keyword::ROWS) { - OffsetRows::Rows - } else { - OffsetRows::None - }; - Ok(Offset { value, rows }) - } - - /// Parse a FETCH clause - pub fn parse_fetch(&mut self) -> Result { - self.expect_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT])?; - let (quantity, percent) = if self - .parse_one_of_keywords(&[Keyword::ROW, Keyword::ROWS]) - .is_some() - { - (None, false) - } else { - let quantity = Expr::Value(self.parse_value()?); - let percent = self.parse_keyword(Keyword::PERCENT); - self.expect_one_of_keywords(&[Keyword::ROW, Keyword::ROWS])?; - (Some(quantity), percent) - }; - let with_ties = if self.parse_keyword(Keyword::ONLY) { - false - } else if self.parse_keywords(&[Keyword::WITH, Keyword::TIES]) { - true - } else { - return self.expected("one of ONLY or WITH TIES", self.peek_token()); - }; - Ok(Fetch { - with_ties, - percent, - quantity, - }) - } - - /// Parse a FOR UPDATE/FOR SHARE clause - pub fn parse_lock(&mut self) -> Result { - let lock_type = match self.expect_one_of_keywords(&[Keyword::UPDATE, Keyword::SHARE])? { - Keyword::UPDATE => LockType::Update, - Keyword::SHARE => LockType::Share, - _ => unreachable!(), - }; - let of = if self.parse_keyword(Keyword::OF) { - Some(self.parse_object_name(false)?) - } else { - None - }; - let nonblock = if self.parse_keyword(Keyword::NOWAIT) { - Some(NonBlock::Nowait) - } else if self.parse_keywords(&[Keyword::SKIP, Keyword::LOCKED]) { - Some(NonBlock::SkipLocked) - } else { - None - }; - Ok(LockClause { - lock_type, - of, - nonblock, - }) - } - - pub fn parse_values(&mut self, allow_empty: bool) -> Result { - let mut explicit_row = false; - - let rows = self.parse_comma_separated(|parser| { - if parser.parse_keyword(Keyword::ROW) { - explicit_row = true; - } - - parser.expect_token(&Token::LParen)?; - if allow_empty && parser.peek_token().token == Token::RParen { - parser.next_token(); - Ok(vec![]) - } else { - let exprs = parser.parse_comma_separated(Parser::parse_expr)?; - parser.expect_token(&Token::RParen)?; - Ok(exprs) - } - })?; - Ok(Values { explicit_row, rows }) - } - - pub fn parse_start_transaction(&mut self) -> Result { - self.expect_keyword(Keyword::TRANSACTION)?; - Ok(Statement::StartTransaction { - modes: self.parse_transaction_modes()?, - begin: false, - transaction: Some(BeginTransactionKind::Transaction), - modifier: None, - }) - } - - pub fn parse_begin(&mut self) -> Result { - let modifier = if !self.dialect.supports_start_transaction_modifier() { - None - } else if self.parse_keyword(Keyword::DEFERRED) { - Some(TransactionModifier::Deferred) - } else if self.parse_keyword(Keyword::IMMEDIATE) { - Some(TransactionModifier::Immediate) - } else if self.parse_keyword(Keyword::EXCLUSIVE) { - Some(TransactionModifier::Exclusive) - } else { - None - }; - let transaction = match self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]) { - Some(Keyword::TRANSACTION) => Some(BeginTransactionKind::Transaction), - Some(Keyword::WORK) => Some(BeginTransactionKind::Work), - _ => None, - }; - Ok(Statement::StartTransaction { - modes: self.parse_transaction_modes()?, - begin: true, - transaction, - modifier, - }) - } - - pub fn parse_end(&mut self) -> Result { - Ok(Statement::Commit { - chain: self.parse_commit_rollback_chain()?, - }) - } - - pub fn parse_transaction_modes(&mut self) -> Result, ParserError> { - let mut modes = vec![]; - let mut required = false; - loop { - let mode = if self.parse_keywords(&[Keyword::ISOLATION, Keyword::LEVEL]) { - let iso_level = if self.parse_keywords(&[Keyword::READ, Keyword::UNCOMMITTED]) { - TransactionIsolationLevel::ReadUncommitted - } else if self.parse_keywords(&[Keyword::READ, Keyword::COMMITTED]) { - TransactionIsolationLevel::ReadCommitted - } else if self.parse_keywords(&[Keyword::REPEATABLE, Keyword::READ]) { - TransactionIsolationLevel::RepeatableRead - } else if self.parse_keyword(Keyword::SERIALIZABLE) { - TransactionIsolationLevel::Serializable - } else { - self.expected("isolation level", self.peek_token())? - }; - TransactionMode::IsolationLevel(iso_level) - } else if self.parse_keywords(&[Keyword::READ, Keyword::ONLY]) { - TransactionMode::AccessMode(TransactionAccessMode::ReadOnly) - } else if self.parse_keywords(&[Keyword::READ, Keyword::WRITE]) { - TransactionMode::AccessMode(TransactionAccessMode::ReadWrite) - } else if required { - self.expected("transaction mode", self.peek_token())? - } else { - break; - }; - modes.push(mode); - // ANSI requires a comma after each transaction mode, but - // PostgreSQL, for historical reasons, does not. We follow - // PostgreSQL in making the comma optional, since that is strictly - // more general. - required = self.consume_token(&Token::Comma); - } - Ok(modes) - } - - pub fn parse_commit(&mut self) -> Result { - Ok(Statement::Commit { - chain: self.parse_commit_rollback_chain()?, - }) - } - - pub fn parse_rollback(&mut self) -> Result { - let chain = self.parse_commit_rollback_chain()?; - let savepoint = self.parse_rollback_savepoint()?; - - Ok(Statement::Rollback { chain, savepoint }) - } - - pub fn parse_commit_rollback_chain(&mut self) -> Result { - let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); - if self.parse_keyword(Keyword::AND) { - let chain = !self.parse_keyword(Keyword::NO); - self.expect_keyword(Keyword::CHAIN)?; - Ok(chain) - } else { - Ok(false) - } - } - - pub fn parse_rollback_savepoint(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::TO) { - let _ = self.parse_keyword(Keyword::SAVEPOINT); - let savepoint = self.parse_identifier(false)?; - - Ok(Some(savepoint)) - } else { - Ok(None) - } - } - - pub fn parse_deallocate(&mut self) -> Result { - let prepare = self.parse_keyword(Keyword::PREPARE); - let name = self.parse_identifier(false)?; - Ok(Statement::Deallocate { name, prepare }) - } - - pub fn parse_execute(&mut self) -> Result { - let name = self.parse_object_name(false)?; - - let has_parentheses = self.consume_token(&Token::LParen); - - let end_token = match (has_parentheses, self.peek_token().token) { - (true, _) => Token::RParen, - (false, Token::EOF) => Token::EOF, - (false, Token::Word(w)) if w.keyword == Keyword::USING => Token::Word(w), - (false, _) => Token::SemiColon, - }; - - let parameters = self.parse_comma_separated0(Parser::parse_expr, end_token)?; - - if has_parentheses { - self.expect_token(&Token::RParen)?; - } - - let mut using = vec![]; - if self.parse_keyword(Keyword::USING) { - using.push(self.parse_expr()?); - - while self.consume_token(&Token::Comma) { - using.push(self.parse_expr()?); - } - }; - - Ok(Statement::Execute { - name, - parameters, - has_parentheses, - using, - }) - } - - pub fn parse_prepare(&mut self) -> Result { - let name = self.parse_identifier(false)?; - - let mut data_types = vec![]; - if self.consume_token(&Token::LParen) { - data_types = self.parse_comma_separated(Parser::parse_data_type)?; - self.expect_token(&Token::RParen)?; - } - - self.expect_keyword(Keyword::AS)?; - let statement = Box::new(self.parse_statement()?); - Ok(Statement::Prepare { - name, - data_types, - statement, - }) - } - - pub fn parse_unload(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - self.expect_token(&Token::RParen)?; - - self.expect_keyword(Keyword::TO)?; - let to = self.parse_identifier(false)?; - - let with_options = self.parse_options(Keyword::WITH)?; - - Ok(Statement::Unload { - query, - to, - with: with_options, - }) - } - - pub fn parse_merge_clauses(&mut self) -> Result, ParserError> { - let mut clauses = vec![]; - loop { - if self.peek_token() == Token::EOF || self.peek_token() == Token::SemiColon { - break; - } - self.expect_keyword(Keyword::WHEN)?; - - let mut clause_kind = MergeClauseKind::Matched; - if self.parse_keyword(Keyword::NOT) { - clause_kind = MergeClauseKind::NotMatched; - } - self.expect_keyword(Keyword::MATCHED)?; - - if matches!(clause_kind, MergeClauseKind::NotMatched) - && self.parse_keywords(&[Keyword::BY, Keyword::SOURCE]) - { - clause_kind = MergeClauseKind::NotMatchedBySource; - } else if matches!(clause_kind, MergeClauseKind::NotMatched) - && self.parse_keywords(&[Keyword::BY, Keyword::TARGET]) - { - clause_kind = MergeClauseKind::NotMatchedByTarget; - } - - let predicate = if self.parse_keyword(Keyword::AND) { - Some(self.parse_expr()?) - } else { - None - }; - - self.expect_keyword(Keyword::THEN)?; - - let merge_clause = match self.parse_one_of_keywords(&[ - Keyword::UPDATE, - Keyword::INSERT, - Keyword::DELETE, - ]) { - Some(Keyword::UPDATE) => { - if matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return Err(ParserError::ParserError(format!( - "UPDATE is not allowed in a {clause_kind} merge clause" - ))); - } - self.expect_keyword(Keyword::SET)?; - MergeAction::Update { - assignments: self.parse_comma_separated(Parser::parse_assignment)?, - } - } - Some(Keyword::DELETE) => { - if matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return Err(ParserError::ParserError(format!( - "DELETE is not allowed in a {clause_kind} merge clause" - ))); - } - MergeAction::Delete - } - Some(Keyword::INSERT) => { - if !matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return Err(ParserError::ParserError(format!( - "INSERT is not allowed in a {clause_kind} merge clause" - ))); - } - let is_mysql = dialect_of!(self is MySqlDialect); - - let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; - let kind = if dialect_of!(self is BigQueryDialect | GenericDialect) - && self.parse_keyword(Keyword::ROW) - { - MergeInsertKind::Row - } else { - self.expect_keyword(Keyword::VALUES)?; - let values = self.parse_values(is_mysql)?; - MergeInsertKind::Values(values) - }; - MergeAction::Insert(MergeInsertExpr { columns, kind }) - } - _ => { - return Err(ParserError::ParserError( - "expected UPDATE, DELETE or INSERT in merge clause".to_string(), - )); - } - }; - clauses.push(MergeClause { - clause_kind, - predicate, - action: merge_clause, - }); - } - Ok(clauses) - } - - pub fn parse_merge(&mut self) -> Result { - let into = self.parse_keyword(Keyword::INTO); - - let table = self.parse_table_factor()?; - - self.expect_keyword(Keyword::USING)?; - let source = self.parse_table_factor()?; - self.expect_keyword(Keyword::ON)?; - let on = self.parse_expr()?; - let clauses = self.parse_merge_clauses()?; - - Ok(Statement::Merge { - into, - table, - source, - on: Box::new(on), - clauses, - }) - } - - fn parse_pragma_value(&mut self) -> Result { - match self.parse_value()? { - v @ Value::SingleQuotedString(_) => Ok(v), - v @ Value::DoubleQuotedString(_) => Ok(v), - v @ Value::Number(_, _) => Ok(v), - v @ Value::Placeholder(_) => Ok(v), - _ => { - self.prev_token(); - self.expected("number or string or ? placeholder", self.peek_token()) - } - } - } - - // PRAGMA [schema-name '.'] pragma-name [('=' pragma-value) | '(' pragma-value ')'] - pub fn parse_pragma(&mut self) -> Result { - let name = self.parse_object_name(false)?; - if self.consume_token(&Token::LParen) { - let value = self.parse_pragma_value()?; - self.expect_token(&Token::RParen)?; - Ok(Statement::Pragma { - name, - value: Some(value), - is_eq: false, - }) - } else if self.consume_token(&Token::Eq) { - Ok(Statement::Pragma { - name, - value: Some(self.parse_pragma_value()?), - is_eq: true, - }) - } else { - Ok(Statement::Pragma { - name, - value: None, - is_eq: false, - }) - } - } - - /// `INSTALL [extension_name]` - pub fn parse_install(&mut self) -> Result { - let extension_name = self.parse_identifier(false)?; - - Ok(Statement::Install { extension_name }) - } - - /// Parse a SQL LOAD statement - pub fn parse_load(&mut self) -> Result { - if self.dialect.supports_load_extension() { - let extension_name = self.parse_identifier(false)?; - Ok(Statement::Load { extension_name }) - } else if self.parse_keyword(Keyword::DATA) && self.dialect.supports_load_data() { - let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some(); - self.expect_keyword(Keyword::INPATH)?; - let inpath = self.parse_literal_string()?; - let overwrite = self.parse_one_of_keywords(&[Keyword::OVERWRITE]).is_some(); - self.expect_keyword(Keyword::INTO)?; - self.expect_keyword(Keyword::TABLE)?; - let table_name = self.parse_object_name(false)?; - let partitioned = self.parse_insert_partition()?; - let table_format = self.parse_load_data_table_format()?; - Ok(Statement::LoadData { - local, - inpath, - overwrite, - table_name, - partitioned, - table_format, - }) - } else { - self.expected( - "`DATA` or an extension name after `LOAD`", - self.peek_token(), - ) - } - } - - /// ```sql - /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] - /// ``` - /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) - pub fn parse_optimize_table(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; - let name = self.parse_object_name(false)?; - let on_cluster = self.parse_optional_on_cluster()?; - - let partition = if self.parse_keyword(Keyword::PARTITION) { - if self.parse_keyword(Keyword::ID) { - Some(Partition::Identifier(self.parse_identifier(false)?)) - } else { - Some(Partition::Expr(self.parse_expr()?)) - } - } else { - None - }; - - let include_final = self.parse_keyword(Keyword::FINAL); - let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) { - if self.parse_keyword(Keyword::BY) { - Some(Deduplicate::ByExpression(self.parse_expr()?)) - } else { - Some(Deduplicate::All) - } - } else { - None - }; - - Ok(Statement::OptimizeTable { - name, - on_cluster, - partition, - include_final, - deduplicate, - }) - } - - /// ```sql - /// CREATE [ { TEMPORARY | TEMP } ] SEQUENCE [ IF NOT EXISTS ] - /// ``` - /// - /// See [Postgres docs](https://www.postgresql.org/docs/current/sql-createsequence.html) for more details. - pub fn parse_create_sequence(&mut self, temporary: bool) -> Result { - //[ IF NOT EXISTS ] - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - //name - let name = self.parse_object_name(false)?; - //[ AS data_type ] - let mut data_type: Option = None; - if self.parse_keywords(&[Keyword::AS]) { - data_type = Some(self.parse_data_type()?) - } - let sequence_options = self.parse_create_sequence_options()?; - // [ OWNED BY { table_name.column_name | NONE } ] - let owned_by = if self.parse_keywords(&[Keyword::OWNED, Keyword::BY]) { - if self.parse_keywords(&[Keyword::NONE]) { - Some(ObjectName(vec![Ident::new("NONE")])) - } else { - Some(self.parse_object_name(false)?) - } - } else { - None - }; - Ok(Statement::CreateSequence { - temporary, - if_not_exists, - name, - data_type, - sequence_options, - owned_by, - }) - } - - fn parse_create_sequence_options(&mut self) -> Result, ParserError> { - let mut sequence_options = vec![]; - //[ INCREMENT [ BY ] increment ] - if self.parse_keywords(&[Keyword::INCREMENT]) { - if self.parse_keywords(&[Keyword::BY]) { - sequence_options.push(SequenceOptions::IncrementBy(self.parse_number()?, true)); - } else { - sequence_options.push(SequenceOptions::IncrementBy(self.parse_number()?, false)); - } - } - //[ MINVALUE minvalue | NO MINVALUE ] - if self.parse_keyword(Keyword::MINVALUE) { - sequence_options.push(SequenceOptions::MinValue(Some(self.parse_number()?))); - } else if self.parse_keywords(&[Keyword::NO, Keyword::MINVALUE]) { - sequence_options.push(SequenceOptions::MinValue(None)); - } - //[ MAXVALUE maxvalue | NO MAXVALUE ] - if self.parse_keywords(&[Keyword::MAXVALUE]) { - sequence_options.push(SequenceOptions::MaxValue(Some(self.parse_number()?))); - } else if self.parse_keywords(&[Keyword::NO, Keyword::MAXVALUE]) { - sequence_options.push(SequenceOptions::MaxValue(None)); - } - - //[ START [ WITH ] start ] - if self.parse_keywords(&[Keyword::START]) { - if self.parse_keywords(&[Keyword::WITH]) { - sequence_options.push(SequenceOptions::StartWith(self.parse_number()?, true)); - } else { - sequence_options.push(SequenceOptions::StartWith(self.parse_number()?, false)); - } - } - //[ CACHE cache ] - if self.parse_keywords(&[Keyword::CACHE]) { - sequence_options.push(SequenceOptions::Cache(self.parse_number()?)); - } - // [ [ NO ] CYCLE ] - if self.parse_keywords(&[Keyword::NO, Keyword::CYCLE]) { - sequence_options.push(SequenceOptions::Cycle(true)); - } else if self.parse_keywords(&[Keyword::CYCLE]) { - sequence_options.push(SequenceOptions::Cycle(false)); - } - - Ok(sequence_options) - } - - /// The index of the first unprocessed token. - pub fn index(&self) -> usize { - self.index - } - - pub fn parse_named_window(&mut self) -> Result { - let ident = self.parse_identifier(false)?; - self.expect_keyword(Keyword::AS)?; - - let window_expr = if self.consume_token(&Token::LParen) { - NamedWindowExpr::WindowSpec(self.parse_window_spec()?) - } else if self.dialect.supports_window_clause_named_window_reference() { - NamedWindowExpr::NamedWindow(self.parse_identifier(false)?) - } else { - return self.expected("(", self.peek_token()); - }; - - Ok(NamedWindowDefinition(ident, window_expr)) - } - - pub fn parse_create_procedure(&mut self, or_alter: bool) -> Result { - let name = self.parse_object_name(false)?; - let params = self.parse_optional_procedure_parameters()?; - self.expect_keyword(Keyword::AS)?; - self.expect_keyword(Keyword::BEGIN)?; - let statements = self.parse_statements()?; - self.expect_keyword(Keyword::END)?; - Ok(Statement::CreateProcedure { - name, - or_alter, - params, - body: statements, - }) - } - - pub fn parse_window_spec(&mut self) -> Result { - let window_name = match self.peek_token().token { - Token::Word(word) if word.keyword == Keyword::NoKeyword => { - self.parse_optional_indent()? - } - _ => None, - }; - - let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? - } else { - vec![] - }; - - let window_frame = if !self.consume_token(&Token::RParen) { - let window_frame = self.parse_window_frame()?; - self.expect_token(&Token::RParen)?; - Some(window_frame) - } else { - None - }; - Ok(WindowSpec { - window_name, - partition_by, - order_by, - window_frame, - }) - } - - pub fn parse_create_type(&mut self) -> Result { - let name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::AS)?; - - let mut attributes = vec![]; - if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { - return Ok(Statement::CreateType { - name, - representation: UserDefinedTypeRepresentation::Composite { attributes }, - }); - } - - loop { - let attr_name = self.parse_identifier(false)?; - let attr_data_type = self.parse_data_type()?; - let attr_collation = if self.parse_keyword(Keyword::COLLATE) { - Some(self.parse_object_name(false)?) - } else { - None - }; - attributes.push(UserDefinedTypeCompositeAttributeDef { - name: attr_name, - data_type: attr_data_type, - collation: attr_collation, - }); - let comma = self.consume_token(&Token::Comma); - if self.consume_token(&Token::RParen) { - // allow a trailing comma - break; - } else if !comma { - return self.expected("',' or ')' after attribute definition", self.peek_token()); - } - } - - Ok(Statement::CreateType { - name, - representation: UserDefinedTypeRepresentation::Composite { attributes }, - }) - } - - fn parse_parenthesized_identifiers(&mut self) -> Result, ParserError> { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(|p| p.parse_identifier(false))?; - self.expect_token(&Token::RParen)?; - Ok(partitions) - } - - fn parse_column_position(&mut self) -> Result, ParserError> { - if dialect_of!(self is MySqlDialect | GenericDialect) { - if self.parse_keyword(Keyword::FIRST) { - Ok(Some(MySQLColumnPosition::First)) - } else if self.parse_keyword(Keyword::AFTER) { - let ident = self.parse_identifier(false)?; - Ok(Some(MySQLColumnPosition::After(ident))) - } else { - Ok(None) - } - } else { - Ok(None) - } - } - - /// Consume the parser and return its underlying token buffer - pub fn into_tokens(self) -> Vec { - self.tokens - } - - /// Returns true if the next keyword indicates a sub query, i.e. SELECT or WITH - fn peek_sub_query(&mut self) -> bool { - if self - .parse_one_of_keywords(&[Keyword::SELECT, Keyword::WITH]) - .is_some() - { - self.prev_token(); - return true; + self.prev_token(); + return true; } false } - - fn parse_show_stmt_options(&mut self) -> Result { - let show_in; - let mut filter_position = None; - if self.dialect.supports_show_like_before_in() { - if let Some(filter) = self.parse_show_statement_filter()? { - filter_position = Some(ShowStatementFilterPosition::Infix(filter)); - } - show_in = self.maybe_parse_show_stmt_in()?; - } else { - show_in = self.maybe_parse_show_stmt_in()?; - if let Some(filter) = self.parse_show_statement_filter()? { - filter_position = Some(ShowStatementFilterPosition::Suffix(filter)); - } - } - let starts_with = self.maybe_parse_show_stmt_starts_with()?; - let limit = self.maybe_parse_show_stmt_limit()?; - let from = self.maybe_parse_show_stmt_from()?; - Ok(ShowStatementOptions { - filter_position, - show_in, - starts_with, - limit, - limit_from: from, - }) - } - - fn maybe_parse_show_stmt_in(&mut self) -> Result, ParserError> { - let clause = match self.parse_one_of_keywords(&[Keyword::FROM, Keyword::IN]) { - Some(Keyword::FROM) => ShowStatementInClause::FROM, - Some(Keyword::IN) => ShowStatementInClause::IN, - None => return Ok(None), - _ => return self.expected("FROM or IN", self.peek_token()), - }; - - let (parent_type, parent_name) = match self.parse_one_of_keywords(&[ - Keyword::ACCOUNT, - Keyword::DATABASE, - Keyword::SCHEMA, - Keyword::TABLE, - Keyword::VIEW, - ]) { - // If we see these next keywords it means we don't have a parent name - Some(Keyword::DATABASE) - if self.peek_keywords(&[Keyword::STARTS, Keyword::WITH]) - | self.peek_keyword(Keyword::LIMIT) => - { - (Some(ShowStatementInParentType::Database), None) - } - Some(Keyword::SCHEMA) - if self.peek_keywords(&[Keyword::STARTS, Keyword::WITH]) - | self.peek_keyword(Keyword::LIMIT) => - { - (Some(ShowStatementInParentType::Schema), None) - } - Some(parent_kw) => { - // The parent name here is still optional, for example: - // SHOW TABLES IN ACCOUNT, so parsing the object name - // may fail because the statement ends. - let parent_name = self.maybe_parse(|p| p.parse_object_name(false))?; - match parent_kw { - Keyword::ACCOUNT => (Some(ShowStatementInParentType::Account), parent_name), - Keyword::DATABASE => (Some(ShowStatementInParentType::Database), parent_name), - Keyword::SCHEMA => (Some(ShowStatementInParentType::Schema), parent_name), - Keyword::TABLE => (Some(ShowStatementInParentType::Table), parent_name), - Keyword::VIEW => (Some(ShowStatementInParentType::View), parent_name), - _ => { - return self.expected( - "one of ACCOUNT, DATABASE, SCHEMA, TABLE or VIEW", - self.peek_token(), - ) - } - } - } - None => { - // Parsing MySQL style FROM tbl_name FROM db_name - // which is equivalent to FROM tbl_name.db_name - let mut parent_name = self.parse_object_name(false)?; - if self - .parse_one_of_keywords(&[Keyword::FROM, Keyword::IN]) - .is_some() - { - parent_name.0.insert(0, self.parse_identifier(false)?); - } - (None, Some(parent_name)) - } - }; - - Ok(Some(ShowStatementIn { - clause, - parent_type, - parent_name, - })) - } - - fn maybe_parse_show_stmt_starts_with(&mut self) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::STARTS, Keyword::WITH]) { - Ok(Some(self.parse_value()?)) - } else { - Ok(None) - } - } - - fn maybe_parse_show_stmt_limit(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::LIMIT) { - Ok(self.parse_limit()?) - } else { - Ok(None) - } - } - - fn maybe_parse_show_stmt_from(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::FROM) { - Ok(Some(self.parse_value()?)) - } else { - Ok(None) - } - } -} - -impl Word { - pub fn to_ident(&self, span: Span) -> Ident { - Ident { - value: self.value.clone(), - quote_style: self.quote_style, - span, - } - } -} - -#[cfg(test)] -mod tests { - use crate::test_utils::{all_dialects, TestedDialects}; - - use super::*; - - #[test] - fn test_prev_index() { - let sql = "SELECT version"; - all_dialects().run_parser_method(sql, |parser| { - assert_eq!(parser.peek_token(), Token::make_keyword("SELECT")); - assert_eq!(parser.next_token(), Token::make_keyword("SELECT")); - parser.prev_token(); - assert_eq!(parser.next_token(), Token::make_keyword("SELECT")); - assert_eq!(parser.next_token(), Token::make_word("version", None)); - parser.prev_token(); - assert_eq!(parser.peek_token(), Token::make_word("version", None)); - assert_eq!(parser.next_token(), Token::make_word("version", None)); - assert_eq!(parser.peek_token(), Token::EOF); - parser.prev_token(); - assert_eq!(parser.next_token(), Token::make_word("version", None)); - assert_eq!(parser.next_token(), Token::EOF); - assert_eq!(parser.next_token(), Token::EOF); - parser.prev_token(); - }); - } - - #[test] - fn test_peek_tokens() { - all_dialects().run_parser_method("SELECT foo AS bar FROM baz", |parser| { - assert!(matches!( - parser.peek_tokens(), - [Token::Word(Word { - keyword: Keyword::SELECT, - .. - })] - )); - - assert!(matches!( - parser.peek_tokens(), - [ - Token::Word(Word { - keyword: Keyword::SELECT, - .. - }), - Token::Word(_), - Token::Word(Word { - keyword: Keyword::AS, - .. - }), - ] - )); - - for _ in 0..4 { - parser.next_token(); - } - - assert!(matches!( - parser.peek_tokens(), - [ - Token::Word(Word { - keyword: Keyword::FROM, - .. - }), - Token::Word(_), - Token::EOF, - Token::EOF, - ] - )) - }) - } - - #[cfg(test)] - mod test_parse_data_type { - use crate::ast::{ - CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, ObjectName, TimezoneInfo, - }; - use crate::dialect::{AnsiDialect, GenericDialect}; - use crate::test_utils::TestedDialects; - - macro_rules! test_parse_data_type { - ($dialect:expr, $input:expr, $expected_type:expr $(,)?) => {{ - $dialect.run_parser_method(&*$input, |parser| { - let data_type = parser.parse_data_type().unwrap(); - assert_eq!($expected_type, data_type); - assert_eq!($input.to_string(), data_type.to_string()); - }); - }}; - } - - #[test] - fn test_ansii_character_string_types() { - // Character string types: - let dialect = - TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); - - test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None)); - - test_parse_data_type!( - dialect, - "CHARACTER(20)", - DataType::Character(Some(CharacterLength::IntegerLength { - length: 20, - unit: None - })) - ); - - test_parse_data_type!( - dialect, - "CHARACTER(20 CHARACTERS)", - DataType::Character(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Characters) - })) - ); - - test_parse_data_type!( - dialect, - "CHARACTER(20 OCTETS)", - DataType::Character(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Octets) - })) - ); - - test_parse_data_type!(dialect, "CHAR", DataType::Char(None)); - - test_parse_data_type!( - dialect, - "CHAR(20)", - DataType::Char(Some(CharacterLength::IntegerLength { - length: 20, - unit: None - })) - ); - - test_parse_data_type!( - dialect, - "CHAR(20 CHARACTERS)", - DataType::Char(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Characters) - })) - ); - - test_parse_data_type!( - dialect, - "CHAR(20 OCTETS)", - DataType::Char(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Octets) - })) - ); - - test_parse_data_type!( - dialect, - "CHARACTER VARYING(20)", - DataType::CharacterVarying(Some(CharacterLength::IntegerLength { - length: 20, - unit: None - })) - ); - - test_parse_data_type!( - dialect, - "CHARACTER VARYING(20 CHARACTERS)", - DataType::CharacterVarying(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Characters) - })) - ); - - test_parse_data_type!( - dialect, - "CHARACTER VARYING(20 OCTETS)", - DataType::CharacterVarying(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Octets) - })) - ); - - test_parse_data_type!( - dialect, - "CHAR VARYING(20)", - DataType::CharVarying(Some(CharacterLength::IntegerLength { - length: 20, - unit: None - })) - ); - - test_parse_data_type!( - dialect, - "CHAR VARYING(20 CHARACTERS)", - DataType::CharVarying(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Characters) - })) - ); - - test_parse_data_type!( - dialect, - "CHAR VARYING(20 OCTETS)", - DataType::CharVarying(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Octets) - })) - ); - - test_parse_data_type!( - dialect, - "VARCHAR(20)", - DataType::Varchar(Some(CharacterLength::IntegerLength { - length: 20, - unit: None - })) - ); - } - - #[test] - fn test_ansii_character_large_object_types() { - // Character large object types: - let dialect = - TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); - - test_parse_data_type!( - dialect, - "CHARACTER LARGE OBJECT", - DataType::CharacterLargeObject(None) - ); - test_parse_data_type!( - dialect, - "CHARACTER LARGE OBJECT(20)", - DataType::CharacterLargeObject(Some(20)) - ); - - test_parse_data_type!( - dialect, - "CHAR LARGE OBJECT", - DataType::CharLargeObject(None) - ); - test_parse_data_type!( - dialect, - "CHAR LARGE OBJECT(20)", - DataType::CharLargeObject(Some(20)) - ); - - test_parse_data_type!(dialect, "CLOB", DataType::Clob(None)); - test_parse_data_type!(dialect, "CLOB(20)", DataType::Clob(Some(20))); - } - - #[test] - fn test_parse_custom_types() { - let dialect = - TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); - - test_parse_data_type!( - dialect, - "GEOMETRY", - DataType::Custom(ObjectName(vec!["GEOMETRY".into()]), vec![]) - ); - - test_parse_data_type!( - dialect, - "GEOMETRY(POINT)", - DataType::Custom( - ObjectName(vec!["GEOMETRY".into()]), - vec!["POINT".to_string()] - ) - ); - - test_parse_data_type!( - dialect, - "GEOMETRY(POINT, 4326)", - DataType::Custom( - ObjectName(vec!["GEOMETRY".into()]), - vec!["POINT".to_string(), "4326".to_string()] - ) - ); - } - - #[test] - fn test_ansii_exact_numeric_types() { - // Exact numeric types: - let dialect = - TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); - - test_parse_data_type!(dialect, "NUMERIC", DataType::Numeric(ExactNumberInfo::None)); - - test_parse_data_type!( - dialect, - "NUMERIC(2)", - DataType::Numeric(ExactNumberInfo::Precision(2)) - ); - - test_parse_data_type!( - dialect, - "NUMERIC(2,10)", - DataType::Numeric(ExactNumberInfo::PrecisionAndScale(2, 10)) - ); - - test_parse_data_type!(dialect, "DECIMAL", DataType::Decimal(ExactNumberInfo::None)); - - test_parse_data_type!( - dialect, - "DECIMAL(2)", - DataType::Decimal(ExactNumberInfo::Precision(2)) - ); - - test_parse_data_type!( - dialect, - "DECIMAL(2,10)", - DataType::Decimal(ExactNumberInfo::PrecisionAndScale(2, 10)) - ); - - test_parse_data_type!(dialect, "DEC", DataType::Dec(ExactNumberInfo::None)); - - test_parse_data_type!( - dialect, - "DEC(2)", - DataType::Dec(ExactNumberInfo::Precision(2)) - ); - - test_parse_data_type!( - dialect, - "DEC(2,10)", - DataType::Dec(ExactNumberInfo::PrecisionAndScale(2, 10)) - ); - } - - #[test] - fn test_ansii_date_type() { - // Datetime types: - let dialect = - TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); - - test_parse_data_type!(dialect, "DATE", DataType::Date); - - test_parse_data_type!(dialect, "TIME", DataType::Time(None, TimezoneInfo::None)); - - test_parse_data_type!( - dialect, - "TIME(6)", - DataType::Time(Some(6), TimezoneInfo::None) - ); - - test_parse_data_type!( - dialect, - "TIME WITH TIME ZONE", - DataType::Time(None, TimezoneInfo::WithTimeZone) - ); - - test_parse_data_type!( - dialect, - "TIME(6) WITH TIME ZONE", - DataType::Time(Some(6), TimezoneInfo::WithTimeZone) - ); - - test_parse_data_type!( - dialect, - "TIME WITHOUT TIME ZONE", - DataType::Time(None, TimezoneInfo::WithoutTimeZone) - ); - - test_parse_data_type!( - dialect, - "TIME(6) WITHOUT TIME ZONE", - DataType::Time(Some(6), TimezoneInfo::WithoutTimeZone) - ); - - test_parse_data_type!( - dialect, - "TIMESTAMP", - DataType::Timestamp(None, TimezoneInfo::None) - ); - - test_parse_data_type!( - dialect, - "TIMESTAMP(22)", - DataType::Timestamp(Some(22), TimezoneInfo::None) - ); - - test_parse_data_type!( - dialect, - "TIMESTAMP(22) WITH TIME ZONE", - DataType::Timestamp(Some(22), TimezoneInfo::WithTimeZone) - ); - - test_parse_data_type!( - dialect, - "TIMESTAMP(33) WITHOUT TIME ZONE", - DataType::Timestamp(Some(33), TimezoneInfo::WithoutTimeZone) - ); - } - } - - #[test] - fn test_parse_schema_name() { - // The expected name should be identical as the input name, that's why I don't receive both - macro_rules! test_parse_schema_name { - ($input:expr, $expected_name:expr $(,)?) => {{ - all_dialects().run_parser_method(&*$input, |parser| { - let schema_name = parser.parse_schema_name().unwrap(); - // Validate that the structure is the same as expected - assert_eq!(schema_name, $expected_name); - // Validate that the input and the expected structure serialization are the same - assert_eq!(schema_name.to_string(), $input.to_string()); - }); - }}; - } - - let dummy_name = ObjectName(vec![Ident::new("dummy_name")]); - let dummy_authorization = Ident::new("dummy_authorization"); - - test_parse_schema_name!( - format!("{dummy_name}"), - SchemaName::Simple(dummy_name.clone()) - ); - - test_parse_schema_name!( - format!("AUTHORIZATION {dummy_authorization}"), - SchemaName::UnnamedAuthorization(dummy_authorization.clone()), - ); - test_parse_schema_name!( - format!("{dummy_name} AUTHORIZATION {dummy_authorization}"), - SchemaName::NamedAuthorization(dummy_name.clone(), dummy_authorization.clone()), - ); - } - - #[test] - fn mysql_parse_index_table_constraint() { - macro_rules! test_parse_table_constraint { - ($dialect:expr, $input:expr, $expected:expr $(,)?) => {{ - $dialect.run_parser_method(&*$input, |parser| { - let constraint = parser.parse_optional_table_constraint().unwrap().unwrap(); - // Validate that the structure is the same as expected - assert_eq!(constraint, $expected); - // Validate that the input and the expected structure serialization are the same - assert_eq!(constraint.to_string(), $input.to_string()); - }); - }}; - } - - let dialect = - TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(MySqlDialect {})]); - - test_parse_table_constraint!( - dialect, - "INDEX (c1)", - TableConstraint::Index { - display_as_key: false, - name: None, - index_type: None, - columns: vec![Ident::new("c1")], - } - ); - - test_parse_table_constraint!( - dialect, - "KEY (c1)", - TableConstraint::Index { - display_as_key: true, - name: None, - index_type: None, - columns: vec![Ident::new("c1")], - } - ); - - test_parse_table_constraint!( - dialect, - "INDEX 'index' (c1, c2)", - TableConstraint::Index { - display_as_key: false, - name: Some(Ident::with_quote('\'', "index")), - index_type: None, - columns: vec![Ident::new("c1"), Ident::new("c2")], - } - ); - - test_parse_table_constraint!( - dialect, - "INDEX USING BTREE (c1)", - TableConstraint::Index { - display_as_key: false, - name: None, - index_type: Some(IndexType::BTree), - columns: vec![Ident::new("c1")], - } - ); - - test_parse_table_constraint!( - dialect, - "INDEX USING HASH (c1)", - TableConstraint::Index { - display_as_key: false, - name: None, - index_type: Some(IndexType::Hash), - columns: vec![Ident::new("c1")], - } - ); - - test_parse_table_constraint!( - dialect, - "INDEX idx_name USING BTREE (c1)", - TableConstraint::Index { - display_as_key: false, - name: Some(Ident::new("idx_name")), - index_type: Some(IndexType::BTree), - columns: vec![Ident::new("c1")], - } - ); - - test_parse_table_constraint!( - dialect, - "INDEX idx_name USING HASH (c1)", - TableConstraint::Index { - display_as_key: false, - name: Some(Ident::new("idx_name")), - index_type: Some(IndexType::Hash), - columns: vec![Ident::new("c1")], - } - ); - } - - #[test] - fn test_tokenizer_error_loc() { - let sql = "foo '"; - let ast = Parser::parse_sql(&GenericDialect, sql); - assert_eq!( - ast, - Err(ParserError::TokenizerError( - "Unterminated string literal at Line: 1, Column: 5".to_string() - )) - ); - } - - #[test] - fn test_parser_error_loc() { - let sql = "SELECT this is a syntax error"; - let ast = Parser::parse_sql(&GenericDialect, sql); - assert_eq!( - ast, - Err(ParserError::ParserError( - "Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column: 16" - .to_string() - )) - ); - } - - #[test] - fn test_nested_explain_error() { - let sql = "EXPLAIN EXPLAIN SELECT 1"; - let ast = Parser::parse_sql(&GenericDialect, sql); - assert_eq!( - ast, - Err(ParserError::ParserError( - "Explain must be root of the plan".to_string() - )) - ); - } - - #[test] - fn test_parse_multipart_identifier_positive() { - let dialect = TestedDialects::new(vec![Box::new(GenericDialect {})]); - - // parse multipart with quotes - let expected = vec![ - Ident { - value: "CATALOG".to_string(), - quote_style: None, - span: Span::empty(), - }, - Ident { - value: "F(o)o. \"bar".to_string(), - quote_style: Some('"'), - span: Span::empty(), - }, - Ident { - value: "table".to_string(), - quote_style: None, - span: Span::empty(), - }, - ]; - dialect.run_parser_method(r#"CATALOG."F(o)o. ""bar".table"#, |parser| { - let actual = parser.parse_multipart_identifier().unwrap(); - assert_eq!(expected, actual); - }); - - // allow whitespace between ident parts - let expected = vec![ - Ident { - value: "CATALOG".to_string(), - quote_style: None, - span: Span::empty(), - }, - Ident { - value: "table".to_string(), - quote_style: None, - span: Span::empty(), - }, - ]; - dialect.run_parser_method("CATALOG . table", |parser| { - let actual = parser.parse_multipart_identifier().unwrap(); - assert_eq!(expected, actual); - }); - } - - #[test] - fn test_parse_multipart_identifier_negative() { - macro_rules! test_parse_multipart_identifier_error { - ($input:expr, $expected_err:expr $(,)?) => {{ - all_dialects().run_parser_method(&*$input, |parser| { - let actual_err = parser.parse_multipart_identifier().unwrap_err(); - assert_eq!(actual_err.to_string(), $expected_err); - }); - }}; - } - - test_parse_multipart_identifier_error!( - "", - "sql parser error: Empty input when parsing identifier", - ); - - test_parse_multipart_identifier_error!( - "*schema.table", - "sql parser error: Unexpected token in identifier: *", - ); - - test_parse_multipart_identifier_error!( - "schema.table*", - "sql parser error: Unexpected token in identifier: *", - ); - - test_parse_multipart_identifier_error!( - "schema.table.", - "sql parser error: Trailing period in identifier", - ); - - test_parse_multipart_identifier_error!( - "schema.*", - "sql parser error: Unexpected token following period in identifier: *", - ); - } - - #[test] - fn test_mysql_partition_selection() { - let sql = "SELECT * FROM employees PARTITION (p0, p2)"; - let expected = vec!["p0", "p2"]; - - let ast: Vec = Parser::parse_sql(&MySqlDialect {}, sql).unwrap(); - assert_eq!(ast.len(), 1); - if let Statement::Query(v) = &ast[0] { - if let SetExpr::Select(select) = &*v.body { - assert_eq!(select.from.len(), 1); - let from: &TableWithJoins = &select.from[0]; - let table_factor = &from.relation; - if let TableFactor::Table { partitions, .. } = table_factor { - let actual: Vec<&str> = partitions - .iter() - .map(|ident| ident.value.as_str()) - .collect(); - assert_eq!(expected, actual); - } - } - } else { - panic!("fail to parse mysql partition selection"); - } - } - - #[test] - fn test_replace_into_placeholders() { - let sql = "REPLACE INTO t (a) VALUES (&a)"; - - assert!(Parser::parse_sql(&GenericDialect {}, sql).is_err()); - } - - #[test] - fn test_replace_into_set() { - // NOTE: This is actually valid MySQL syntax, REPLACE and INSERT, - // but the parser does not yet support it. - // https://dev.mysql.com/doc/refman/8.3/en/insert.html - let sql = "REPLACE INTO t SET a='1'"; - - assert!(Parser::parse_sql(&MySqlDialect {}, sql).is_err()); - } - - #[test] - fn test_replace_into_set_placeholder() { - let sql = "REPLACE INTO t SET ?"; - - assert!(Parser::parse_sql(&GenericDialect {}, sql).is_err()); - } - - #[test] - fn test_replace_incomplete() { - let sql = r#"REPLACE"#; - - assert!(Parser::parse_sql(&MySqlDialect {}, sql).is_err()); - } } diff --git a/src/parser/msck.rs b/src/parser/msck.rs new file mode 100644 index 000000000..1357c3f0e --- /dev/null +++ b/src/parser/msck.rs @@ -0,0 +1,30 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_msck(&mut self) -> Result { + let repair = self.parse_keyword(Keyword::REPAIR); + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name(false)?; + let partition_action = self + .maybe_parse(|parser| { + let pa = match parser.parse_one_of_keywords(&[ + Keyword::ADD, + Keyword::DROP, + Keyword::SYNC, + ]) { + Some(Keyword::ADD) => Some(AddDropSync::ADD), + Some(Keyword::DROP) => Some(AddDropSync::DROP), + Some(Keyword::SYNC) => Some(AddDropSync::SYNC), + _ => None, + }; + parser.expect_keyword(Keyword::PARTITIONS)?; + Ok(pa) + })? + .unwrap_or_default(); + Ok(Statement::Msck { + repair, + table_name, + partition_action, + }) + } +} diff --git a/src/parser/notify.rs b/src/parser/notify.rs new file mode 100644 index 000000000..5aef89a0d --- /dev/null +++ b/src/parser/notify.rs @@ -0,0 +1,13 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_notify(&mut self) -> Result { + let channel = self.parse_identifier(false)?; + let payload = if self.consume_token(&Token::Comma) { + Some(self.parse_literal_string()?) + } else { + None + }; + Ok(Statement::NOTIFY { channel, payload }) + } +} diff --git a/src/parser/optimize.rs b/src/parser/optimize.rs new file mode 100644 index 000000000..76753fa26 --- /dev/null +++ b/src/parser/optimize.rs @@ -0,0 +1,42 @@ +use crate::parser::*; + +impl Parser<'_> { + /// ```sql + /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] + /// ``` + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) + pub fn parse_optimize_table(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let name = self.parse_object_name(false)?; + let on_cluster = self.parse_optional_on_cluster()?; + + let partition = if self.parse_keyword(Keyword::PARTITION) { + if self.parse_keyword(Keyword::ID) { + Some(Partition::Identifier(self.parse_identifier(false)?)) + } else { + Some(Partition::Expr(self.parse_expr()?)) + } + } else { + None + }; + + let include_final = self.parse_keyword(Keyword::FINAL); + let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) { + if self.parse_keyword(Keyword::BY) { + Some(Deduplicate::ByExpression(self.parse_expr()?)) + } else { + Some(Deduplicate::All) + } + } else { + None + }; + + Ok(Statement::OptimizeTable { + name, + on_cluster, + partition, + include_final, + deduplicate, + }) + } +} diff --git a/src/parser/options.rs b/src/parser/options.rs new file mode 100644 index 000000000..1f50f65e7 --- /dev/null +++ b/src/parser/options.rs @@ -0,0 +1,128 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn maybe_parse_options( + &mut self, + keyword: Keyword, + ) -> Result>, ParserError> { + if let Token::Word(word) = self.peek_token().token { + if word.keyword == keyword { + return Ok(Some(self.parse_options(keyword)?)); + } + }; + Ok(None) + } + + pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { + if self.parse_keyword(keyword) { + self.expect_token(&Token::LParen)?; + let options = self.parse_comma_separated(Parser::parse_sql_option)?; + self.expect_token(&Token::RParen)?; + Ok(options) + } else { + Ok(vec![]) + } + } + + pub fn parse_option_clustered(&mut self) -> Result { + if self.parse_keywords(&[ + Keyword::CLUSTERED, + Keyword::COLUMNSTORE, + Keyword::INDEX, + Keyword::ORDER, + ]) { + Ok(SqlOption::Clustered( + TableOptionsClustered::ColumnstoreIndexOrder( + self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?, + ), + )) + } else if self.parse_keywords(&[Keyword::CLUSTERED, Keyword::COLUMNSTORE, Keyword::INDEX]) { + Ok(SqlOption::Clustered( + TableOptionsClustered::ColumnstoreIndex, + )) + } else if self.parse_keywords(&[Keyword::CLUSTERED, Keyword::INDEX]) { + self.expect_token(&Token::LParen)?; + + let columns = self.parse_comma_separated(|p| { + let name = p.parse_identifier(false)?; + let asc = p.parse_asc_desc(); + + Ok(ClusteredIndex { name, asc }) + })?; + + self.expect_token(&Token::RParen)?; + + Ok(SqlOption::Clustered(TableOptionsClustered::Index(columns))) + } else { + Err(ParserError::ParserError( + "invalid CLUSTERED sequence".to_string(), + )) + } + } + + pub fn parse_option_partition(&mut self) -> Result { + self.expect_keyword(Keyword::PARTITION)?; + self.expect_token(&Token::LParen)?; + let column_name = self.parse_identifier(false)?; + + self.expect_keyword(Keyword::RANGE)?; + let range_direction = if self.parse_keyword(Keyword::LEFT) { + Some(PartitionRangeDirection::Left) + } else if self.parse_keyword(Keyword::RIGHT) { + Some(PartitionRangeDirection::Right) + } else { + None + }; + + self.expect_keywords(&[Keyword::FOR, Keyword::VALUES])?; + self.expect_token(&Token::LParen)?; + + let for_values = self.parse_comma_separated(Parser::parse_expr)?; + + self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; + + Ok(SqlOption::Partition { + column_name, + range_direction, + for_values, + }) + } + + pub fn parse_options_with_keywords( + &mut self, + keywords: &[Keyword], + ) -> Result, ParserError> { + if self.parse_keywords(keywords) { + self.expect_token(&Token::LParen)?; + let options = self.parse_comma_separated(Parser::parse_sql_option)?; + self.expect_token(&Token::RParen)?; + Ok(options) + } else { + Ok(vec![]) + } + } + + pub fn parse_sql_option(&mut self) -> Result { + let is_mssql = dialect_of!(self is MsSqlDialect|GenericDialect); + + match self.peek_token().token { + Token::Word(w) if w.keyword == Keyword::HEAP && is_mssql => { + Ok(SqlOption::Ident(self.parse_identifier(false)?)) + } + Token::Word(w) if w.keyword == Keyword::PARTITION && is_mssql => { + self.parse_option_partition() + } + Token::Word(w) if w.keyword == Keyword::CLUSTERED && is_mssql => { + self.parse_option_clustered() + } + _ => { + let name = self.parse_identifier(false)?; + self.expect_token(&Token::Eq)?; + let value = self.parse_expr()?; + + Ok(SqlOption::KeyValue { key: name, value }) + } + } + } +} diff --git a/src/parser/pragma.rs b/src/parser/pragma.rs new file mode 100644 index 000000000..5a6727da9 --- /dev/null +++ b/src/parser/pragma.rs @@ -0,0 +1,42 @@ +use crate::parser::*; + +impl Parser<'_> { + // PRAGMA [schema-name '.'] pragma-name [('=' pragma-value) | '(' pragma-value ')'] + pub fn parse_pragma(&mut self) -> Result { + let name = self.parse_object_name(false)?; + if self.consume_token(&Token::LParen) { + let value = self.parse_pragma_value()?; + self.expect_token(&Token::RParen)?; + Ok(Statement::Pragma { + name, + value: Some(value), + is_eq: false, + }) + } else if self.consume_token(&Token::Eq) { + Ok(Statement::Pragma { + name, + value: Some(self.parse_pragma_value()?), + is_eq: true, + }) + } else { + Ok(Statement::Pragma { + name, + value: None, + is_eq: false, + }) + } + } + + fn parse_pragma_value(&mut self) -> Result { + match self.parse_value()? { + v @ Value::SingleQuotedString(_) => Ok(v), + v @ Value::DoubleQuotedString(_) => Ok(v), + v @ Value::Number(_, _) => Ok(v), + v @ Value::Placeholder(_) => Ok(v), + _ => { + self.prev_token(); + self.expected("number or string or ? placeholder", self.peek_token()) + } + } + } +} diff --git a/src/parser/prepare.rs b/src/parser/prepare.rs new file mode 100644 index 000000000..0b0343a25 --- /dev/null +++ b/src/parser/prepare.rs @@ -0,0 +1,21 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_prepare(&mut self) -> Result { + let name = self.parse_identifier(false)?; + + let mut data_types = vec![]; + if self.consume_token(&Token::LParen) { + data_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + } + + self.expect_keyword(Keyword::AS)?; + let statement = Box::new(self.parse_statement()?); + Ok(Statement::Prepare { + name, + data_types, + statement, + }) + } +} diff --git a/src/parser/release.rs b/src/parser/release.rs new file mode 100644 index 000000000..fd18a576b --- /dev/null +++ b/src/parser/release.rs @@ -0,0 +1,10 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_release(&mut self) -> Result { + let _ = self.parse_keyword(Keyword::SAVEPOINT); + let name = self.parse_identifier(false)?; + + Ok(Statement::ReleaseSavepoint { name }) + } +} diff --git a/src/parser/replace.rs b/src/parser/replace.rs new file mode 100644 index 000000000..6ae8b78f5 --- /dev/null +++ b/src/parser/replace.rs @@ -0,0 +1,22 @@ +use crate::parser::*; + +use crate::parser_err; + +impl Parser<'_> { + /// Parse an REPLACE statement + pub fn parse_replace(&mut self) -> Result { + if !dialect_of!(self is MySqlDialect | GenericDialect) { + return parser_err!( + "Unsupported statement REPLACE", + self.peek_token().span.start + ); + } + + let mut insert = self.parse_insert()?; + if let Statement::Insert(Insert { replace_into, .. }) = &mut insert { + *replace_into = true; + } + + Ok(insert) + } +} diff --git a/src/parser/revoke.rs b/src/parser/revoke.rs new file mode 100644 index 000000000..235114767 --- /dev/null +++ b/src/parser/revoke.rs @@ -0,0 +1,32 @@ +use crate::parser::*; + +use crate::parser_err; + +impl Parser<'_> { + /// Parse a REVOKE statement + pub fn parse_revoke(&mut self) -> Result { + let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; + + self.expect_keyword(Keyword::FROM)?; + let grantees = self.parse_comma_separated(|p| p.parse_identifier(false))?; + + let granted_by = self + .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) + .then(|| self.parse_identifier(false).unwrap()); + + let loc = self.peek_token().span.start; + let cascade = self.parse_keyword(Keyword::CASCADE); + let restrict = self.parse_keyword(Keyword::RESTRICT); + if cascade && restrict { + return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE", loc); + } + + Ok(Statement::Revoke { + privileges, + objects, + grantees, + granted_by, + cascade, + }) + } +} diff --git a/src/parser/rollback.rs b/src/parser/rollback.rs new file mode 100644 index 000000000..35872210a --- /dev/null +++ b/src/parser/rollback.rs @@ -0,0 +1,32 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_rollback(&mut self) -> Result { + let chain = self.parse_commit_rollback_chain()?; + let savepoint = self.parse_rollback_savepoint()?; + + Ok(Statement::Rollback { chain, savepoint }) + } + + pub fn parse_commit_rollback_chain(&mut self) -> Result { + let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); + if self.parse_keyword(Keyword::AND) { + let chain = !self.parse_keyword(Keyword::NO); + self.expect_keyword(Keyword::CHAIN)?; + Ok(chain) + } else { + Ok(false) + } + } + + pub fn parse_rollback_savepoint(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::TO) { + let _ = self.parse_keyword(Keyword::SAVEPOINT); + let savepoint = self.parse_identifier(false)?; + + Ok(Some(savepoint)) + } else { + Ok(None) + } + } +} diff --git a/src/parser/savepoint.rs b/src/parser/savepoint.rs new file mode 100644 index 000000000..dca82b257 --- /dev/null +++ b/src/parser/savepoint.rs @@ -0,0 +1,8 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_savepoint(&mut self) -> Result { + let name = self.parse_identifier(false)?; + Ok(Statement::Savepoint { name }) + } +} diff --git a/src/parser/select.rs b/src/parser/select.rs new file mode 100644 index 000000000..bcdd82233 --- /dev/null +++ b/src/parser/select.rs @@ -0,0 +1,2142 @@ +use super::*; + +use crate::parser_err; + +impl Parser<'_> { + /// Parse a query expression, i.e. a `SELECT` statement optionally + /// preceded with some `WITH` CTE declarations and optionally followed + /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't + /// expect the initial keyword to be already consumed + pub fn parse_query(&mut self) -> Result, ParserError> { + let _guard = self.recursion_counter.try_decrease()?; + let with = if let Some(with_token) = self.parse_keyword_token(Keyword::WITH) { + Some(With { + with_token: with_token.into(), + recursive: self.parse_keyword(Keyword::RECURSIVE), + cte_tables: self.parse_comma_separated(Parser::parse_cte)?, + }) + } else { + None + }; + if self.parse_keyword(Keyword::INSERT) { + Ok(Query { + with, + body: self.parse_insert_setexpr_boxed()?, + limit: None, + limit_by: vec![], + order_by: None, + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + } + .into()) + } else if self.parse_keyword(Keyword::UPDATE) { + Ok(Query { + with, + body: self.parse_update_setexpr_boxed()?, + limit: None, + limit_by: vec![], + order_by: None, + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + } + .into()) + } else { + let body = self.parse_query_body(self.dialect.prec_unknown())?; + + let order_by = self.parse_optional_order_by()?; + + let mut limit = None; + let mut offset = None; + + for _x in 0..2 { + if limit.is_none() && self.parse_keyword(Keyword::LIMIT) { + limit = self.parse_limit()? + } + + if offset.is_none() && self.parse_keyword(Keyword::OFFSET) { + offset = Some(self.parse_offset()?) + } + + if self.dialect.supports_limit_comma() + && limit.is_some() + && offset.is_none() + && self.consume_token(&Token::Comma) + { + // MySQL style LIMIT x,y => LIMIT y OFFSET x. + // Check for more details. + offset = Some(Offset { + value: limit.unwrap(), + rows: OffsetRows::None, + }); + limit = Some(self.parse_expr()?); + } + } + + let limit_by = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::BY) + { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let settings = self.parse_settings()?; + + let fetch = if self.parse_keyword(Keyword::FETCH) { + Some(self.parse_fetch()?) + } else { + None + }; + + let mut for_clause = None; + let mut locks = Vec::new(); + while self.parse_keyword(Keyword::FOR) { + if let Some(parsed_for_clause) = self.parse_for_clause()? { + for_clause = Some(parsed_for_clause); + break; + } else { + locks.push(self.parse_lock()?); + } + } + let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::FORMAT) + { + if self.parse_keyword(Keyword::NULL) { + Some(FormatClause::Null) + } else { + let ident = self.parse_identifier(false)?; + Some(FormatClause::Identifier(ident)) + } + } else { + None + }; + + Ok(Query { + with, + body, + order_by, + limit, + limit_by, + offset, + fetch, + locks, + for_clause, + settings, + format_clause, + } + .into()) + } + } + + /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns [`None`] if `ALL` is parsed + /// and results in a [`ParserError`] if both `ALL` and `DISTINCT` are found. + pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { + let loc = self.peek_token().span.start; + let all = self.parse_keyword(Keyword::ALL); + let distinct = self.parse_keyword(Keyword::DISTINCT); + if !distinct { + return Ok(None); + } + if all { + return parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc); + } + let on = self.parse_keyword(Keyword::ON); + if !on { + return Ok(Some(Distinct::Distinct)); + } + + self.expect_token(&Token::LParen)?; + let col_names = if self.consume_token(&Token::RParen) { + self.prev_token(); + Vec::new() + } else { + self.parse_comma_separated(Parser::parse_expr)? + }; + self.expect_token(&Token::RParen)?; + Ok(Some(Distinct::On(col_names))) + } + + /// Parse `CREATE TABLE x AS TABLE y` + pub fn parse_as_table(&mut self) -> Result { + let token1 = self.next_token(); + let token2 = self.next_token(); + let token3 = self.next_token(); + + let table_name; + let schema_name; + if token2 == Token::Period { + match token1.token { + Token::Word(w) => { + schema_name = w.value; + } + _ => { + return self.expected("Schema name", token1); + } + } + match token3.token { + Token::Word(w) => { + table_name = w.value; + } + _ => { + return self.expected("Table name", token3); + } + } + Ok(Table { + table_name: Some(table_name), + schema_name: Some(schema_name), + }) + } else { + match token1.token { + Token::Word(w) => { + table_name = w.value; + } + _ => { + return self.expected("Table name", token1); + } + } + Ok(Table { + table_name: Some(table_name), + schema_name: None, + }) + } + } + + /// Parse ASC or DESC, returns an Option with true if ASC, false of DESC or `None` if none of + /// them. + pub fn parse_asc_desc(&mut self) -> Option { + if self.parse_keyword(Keyword::ASC) { + Some(true) + } else if self.parse_keyword(Keyword::DESC) { + Some(false) + } else { + None + } + } + + pub fn parse_connect_by(&mut self) -> Result { + let (condition, relationships) = if self.parse_keywords(&[Keyword::CONNECT, Keyword::BY]) { + let relationships = self.with_state(ParserState::ConnectBy, |parser| { + parser.parse_comma_separated(Parser::parse_expr) + })?; + self.expect_keywords(&[Keyword::START, Keyword::WITH])?; + let condition = self.parse_expr()?; + (condition, relationships) + } else { + self.expect_keywords(&[Keyword::START, Keyword::WITH])?; + let condition = self.parse_expr()?; + self.expect_keywords(&[Keyword::CONNECT, Keyword::BY])?; + let relationships = self.with_state(ParserState::ConnectBy, |parser| { + parser.parse_comma_separated(Parser::parse_expr) + })?; + (condition, relationships) + }; + Ok(ConnectBy { + condition, + relationships, + }) + } + + /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) + pub fn parse_cte(&mut self) -> Result { + let name = self.parse_identifier(false)?; + + let mut cte = if self.parse_keyword(Keyword::AS) { + let mut is_materialized = None; + if dialect_of!(self is PostgreSqlDialect) { + if self.parse_keyword(Keyword::MATERIALIZED) { + is_materialized = Some(CteAsMaterialized::Materialized); + } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { + is_materialized = Some(CteAsMaterialized::NotMaterialized); + } + } + self.expect_token(&Token::LParen)?; + + let query = self.parse_query()?; + let closing_paren_token = self.expect_token(&Token::RParen)?; + + let alias = TableAlias { + name, + columns: vec![], + }; + Cte { + alias, + query, + from: None, + materialized: is_materialized, + closing_paren_token: closing_paren_token.into(), + } + } else { + let columns = self.parse_table_alias_column_defs()?; + self.expect_keyword(Keyword::AS)?; + let mut is_materialized = None; + if dialect_of!(self is PostgreSqlDialect) { + if self.parse_keyword(Keyword::MATERIALIZED) { + is_materialized = Some(CteAsMaterialized::Materialized); + } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { + is_materialized = Some(CteAsMaterialized::NotMaterialized); + } + } + self.expect_token(&Token::LParen)?; + + let query = self.parse_query()?; + let closing_paren_token = self.expect_token(&Token::RParen)?; + + let alias = TableAlias { name, columns }; + Cte { + alias, + query, + from: None, + materialized: is_materialized, + closing_paren_token: closing_paren_token.into(), + } + }; + if self.parse_keyword(Keyword::FROM) { + cte.from = Some(self.parse_identifier(false)?); + } + Ok(cte) + } + + pub fn parse_derived_table_factor( + &mut self, + lateral: IsLateral, + ) -> Result { + let subquery = self.parse_query()?; + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Derived { + lateral: match lateral { + Lateral => true, + NotLateral => false, + }, + subquery, + alias, + }) + } + + /// Parses an expression with an optional alias + /// + /// Examples: + /// + /// ```sql + /// SUM(price) AS total_price + /// ``` + /// ```sql + /// SUM(price) + /// ``` + /// + /// Example + /// ``` + /// # use sqlparser::parser::{Parser, ParserError}; + /// # use sqlparser::dialect::GenericDialect; + /// # fn main() ->Result<(), ParserError> { + /// let sql = r#"SUM("a") as "b""#; + /// let mut parser = Parser::new(&GenericDialect).try_with_sql(sql)?; + /// let expr_with_alias = parser.parse_expr_with_alias()?; + /// assert_eq!(Some("b".to_string()), expr_with_alias.alias.map(|x|x.value)); + /// # Ok(()) + /// # } + pub fn parse_expr_with_alias(&mut self) -> Result { + let expr = self.parse_expr()?; + let alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + Ok(ExprWithAlias { expr, alias }) + } + + /// Parse a FETCH clause + pub fn parse_fetch(&mut self) -> Result { + self.expect_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT])?; + let (quantity, percent) = if self + .parse_one_of_keywords(&[Keyword::ROW, Keyword::ROWS]) + .is_some() + { + (None, false) + } else { + let quantity = Expr::Value(self.parse_value()?); + let percent = self.parse_keyword(Keyword::PERCENT); + self.expect_one_of_keywords(&[Keyword::ROW, Keyword::ROWS])?; + (Some(quantity), percent) + }; + let with_ties = if self.parse_keyword(Keyword::ONLY) { + false + } else if self.parse_keywords(&[Keyword::WITH, Keyword::TIES]) { + true + } else { + return self.expected("one of ONLY or WITH TIES", self.peek_token()); + }; + Ok(Fetch { + with_ties, + percent, + quantity, + }) + } + + pub fn parse_function_args(&mut self) -> Result { + let arg = if self.dialect.supports_named_fn_args_with_expr_name() { + self.maybe_parse(|p| { + let name = p.parse_expr()?; + let operator = p.parse_function_named_arg_operator()?; + let arg = p.parse_wildcard_expr()?.into(); + Ok(FunctionArg::ExprNamed { + name, + arg, + operator, + }) + })? + } else { + self.maybe_parse(|p| { + let name = p.parse_identifier(false)?; + let operator = p.parse_function_named_arg_operator()?; + let arg = p.parse_wildcard_expr()?.into(); + Ok(FunctionArg::Named { + name, + arg, + operator, + }) + })? + }; + if let Some(arg) = arg { + return Ok(arg); + } + Ok(FunctionArg::Unnamed(self.parse_wildcard_expr()?.into())) + } + + // Parse a INTERPOLATE expression (ClickHouse dialect) + pub fn parse_interpolation(&mut self) -> Result { + let column = self.parse_identifier(false)?; + let expr = if self.parse_keyword(Keyword::AS) { + Some(self.parse_expr()?) + } else { + None + }; + Ok(InterpolateExpr { column, expr }) + } + + // Parse a set of comma seperated INTERPOLATE expressions (ClickHouse dialect) + // that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier + pub fn parse_interpolations(&mut self) -> Result, ParserError> { + if !self.parse_keyword(Keyword::INTERPOLATE) { + return Ok(None); + } + + if self.consume_token(&Token::LParen) { + let interpolations = + self.parse_comma_separated0(|p| p.parse_interpolation(), Token::RParen)?; + self.expect_token(&Token::RParen)?; + // INTERPOLATE () and INTERPOLATE ( ... ) variants + return Ok(Some(Interpolate { + exprs: Some(interpolations), + })); + } + + // INTERPOLATE + Ok(Some(Interpolate { exprs: None })) + } + + pub fn parse_join_constraint(&mut self, natural: bool) -> Result { + if natural { + Ok(JoinConstraint::Natural) + } else if self.parse_keyword(Keyword::ON) { + let constraint = self.parse_expr()?; + Ok(JoinConstraint::On(constraint)) + } else if self.parse_keyword(Keyword::USING) { + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + Ok(JoinConstraint::Using(columns)) + } else { + Ok(JoinConstraint::None) + //self.expected("ON, or USING after JOIN", self.peek_token()) + } + } + + /// Parses MySQL's JSON_TABLE column definition. + /// For example: `id INT EXISTS PATH '$' DEFAULT '0' ON EMPTY ERROR ON ERROR` + pub fn parse_json_table_column_def(&mut self) -> Result { + if self.parse_keyword(Keyword::NESTED) { + let _has_path_keyword = self.parse_keyword(Keyword::PATH); + let path = self.parse_value()?; + self.expect_keyword(Keyword::COLUMNS)?; + let columns = self.parse_parenthesized(|p| { + p.parse_comma_separated(Self::parse_json_table_column_def) + })?; + return Ok(JsonTableColumn::Nested(JsonTableNestedColumn { + path, + columns, + })); + } + let name = self.parse_identifier(false)?; + if self.parse_keyword(Keyword::FOR) { + self.expect_keyword(Keyword::ORDINALITY)?; + return Ok(JsonTableColumn::ForOrdinality(name)); + } + let r#type = self.parse_data_type()?; + let exists = self.parse_keyword(Keyword::EXISTS); + self.expect_keyword(Keyword::PATH)?; + let path = self.parse_value()?; + let mut on_empty = None; + let mut on_error = None; + while let Some(error_handling) = self.parse_json_table_column_error_handling()? { + if self.parse_keyword(Keyword::EMPTY) { + on_empty = Some(error_handling); + } else { + self.expect_keyword(Keyword::ERROR)?; + on_error = Some(error_handling); + } + } + Ok(JsonTableColumn::Named(JsonTableNamedColumn { + name, + r#type, + path, + exists, + on_empty, + on_error, + })) + } + + /// Parse a LIMIT clause + pub fn parse_limit(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::ALL) { + Ok(None) + } else { + Ok(Some(self.parse_expr()?)) + } + } + + /// Parse a FOR UPDATE/FOR SHARE clause + pub fn parse_lock(&mut self) -> Result { + let lock_type = match self.expect_one_of_keywords(&[Keyword::UPDATE, Keyword::SHARE])? { + Keyword::UPDATE => LockType::Update, + Keyword::SHARE => LockType::Share, + _ => unreachable!(), + }; + let of = if self.parse_keyword(Keyword::OF) { + Some(self.parse_object_name(false)?) + } else { + None + }; + let nonblock = if self.parse_keyword(Keyword::NOWAIT) { + Some(NonBlock::Nowait) + } else if self.parse_keywords(&[Keyword::SKIP, Keyword::LOCKED]) { + Some(NonBlock::SkipLocked) + } else { + None + }; + Ok(LockClause { + lock_type, + of, + nonblock, + }) + } + + /// Parses MSSQL's `OPENJSON WITH` column definition. + /// + /// ```sql + /// colName type [ column_path ] [ AS JSON ] + /// ``` + /// + /// Reference: + pub fn parse_openjson_table_column_def(&mut self) -> Result { + let name = self.parse_identifier(false)?; + let r#type = self.parse_data_type()?; + let path = if let Token::SingleQuotedString(path) = self.peek_token().token { + self.next_token(); + Some(path) + } else { + None + }; + let as_json = self.parse_keyword(Keyword::AS); + if as_json { + self.expect_keyword(Keyword::JSON)?; + } + Ok(OpenJsonTableColumn { + name, + r#type, + path, + as_json, + }) + } + + pub fn parse_optional_order_by(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + let order_by_exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; + let interpolate = if dialect_of!(self is ClickHouseDialect | GenericDialect) { + self.parse_interpolations()? + } else { + None + }; + + Ok(Some(OrderBy { + exprs: order_by_exprs, + interpolate, + })) + } else { + Ok(None) + } + } + + /// Parse an [`Except`](ExceptSelectItem) information for wildcard select items. + /// + /// If it is not possible to parse it, will return an option. + pub fn parse_optional_select_item_except( + &mut self, + ) -> Result, ParserError> { + let opt_except = if self.parse_keyword(Keyword::EXCEPT) { + if self.peek_token().token == Token::LParen { + let idents = self.parse_parenthesized_column_list(Mandatory, false)?; + match &idents[..] { + [] => { + return self.expected( + "at least one column should be parsed by the expect clause", + self.peek_token(), + )?; + } + [first, idents @ ..] => Some(ExceptSelectItem { + first_element: first.clone(), + additional_elements: idents.to_vec(), + }), + } + } else { + // Clickhouse allows EXCEPT column_name + let ident = self.parse_identifier(false)?; + Some(ExceptSelectItem { + first_element: ident, + additional_elements: vec![], + }) + } + } else { + None + }; + + Ok(opt_except) + } + + /// Parse an [`Exclude`](ExcludeSelectItem) information for wildcard select items. + /// + /// If it is not possible to parse it, will return an option. + pub fn parse_optional_select_item_exclude( + &mut self, + ) -> Result, ParserError> { + let opt_exclude = if self.parse_keyword(Keyword::EXCLUDE) { + if self.consume_token(&Token::LParen) { + let columns = + self.parse_comma_separated(|parser| parser.parse_identifier(false))?; + self.expect_token(&Token::RParen)?; + Some(ExcludeSelectItem::Multiple(columns)) + } else { + let column = self.parse_identifier(false)?; + Some(ExcludeSelectItem::Single(column)) + } + } else { + None + }; + + Ok(opt_exclude) + } + + /// Parse an [`Ilike`](IlikeSelectItem) information for wildcard select items. + /// + /// If it is not possible to parse it, will return an option. + pub fn parse_optional_select_item_ilike( + &mut self, + ) -> Result, ParserError> { + let opt_ilike = if self.parse_keyword(Keyword::ILIKE) { + let next_token = self.next_token(); + let pattern = match next_token.token { + Token::SingleQuotedString(s) => s, + _ => return self.expected("ilike pattern", next_token), + }; + Some(IlikeSelectItem { pattern }) + } else { + None + }; + Ok(opt_ilike) + } + + /// Parse a [`Rename`](RenameSelectItem) information for wildcard select items. + pub fn parse_optional_select_item_rename( + &mut self, + ) -> Result, ParserError> { + let opt_rename = if self.parse_keyword(Keyword::RENAME) { + if self.consume_token(&Token::LParen) { + let idents = + self.parse_comma_separated(|parser| parser.parse_identifier_with_alias())?; + self.expect_token(&Token::RParen)?; + Some(RenameSelectItem::Multiple(idents)) + } else { + let ident = self.parse_identifier_with_alias()?; + Some(RenameSelectItem::Single(ident)) + } + } else { + None + }; + + Ok(opt_rename) + } + + /// Parse a [`Replace`](ReplaceSelectItem) information for wildcard select items. + pub fn parse_optional_select_item_replace( + &mut self, + ) -> Result, ParserError> { + let opt_replace = if self.parse_keyword(Keyword::REPLACE) { + if self.consume_token(&Token::LParen) { + let items = self.parse_comma_separated(|parser| { + Ok(Box::new(parser.parse_replace_elements()?)) + })?; + self.expect_token(&Token::RParen)?; + Some(ReplaceSelectItem { items }) + } else { + let tok = self.next_token(); + return self.expected("( after REPLACE but", tok); + } + } else { + None + }; + + Ok(opt_replace) + } + + /// Parse `AS identifier` when the AS is describing a table-valued object, + /// like in `... FROM generate_series(1, 10) AS t (col)`. In this case + /// the alias is allowed to optionally name the columns in the table, in + /// addition to the table itself. + pub fn parse_optional_table_alias( + &mut self, + reserved_kwds: &[Keyword], + ) -> Result, ParserError> { + match self.parse_optional_alias(reserved_kwds)? { + Some(name) => { + let columns = self.parse_table_alias_column_defs()?; + Ok(Some(TableAlias { name, columns })) + } + None => Ok(None), + } + } + + /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY) + pub fn parse_order_by_expr(&mut self) -> Result { + let expr = self.parse_expr()?; + + let asc = self.parse_asc_desc(); + + let nulls_first = if self.parse_keywords(&[Keyword::NULLS, Keyword::FIRST]) { + Some(true) + } else if self.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) { + Some(false) + } else { + None + }; + + let with_fill = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::WITH, Keyword::FILL]) + { + Some(self.parse_with_fill()?) + } else { + None + }; + + Ok(OrderByExpr { + expr, + asc, + nulls_first, + with_fill, + }) + } + + /// Parse a comma-separated list of 1+ SelectItem + pub fn parse_projection(&mut self) -> Result, ParserError> { + // BigQuery and Snowflake allow trailing commas, but only in project lists + // e.g. `SELECT 1, 2, FROM t` + // https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#trailing_commas + // https://docs.snowflake.com/en/release-notes/2024/8_11#select-supports-trailing-commas + + let trailing_commas = + self.options.trailing_commas | self.dialect.supports_projection_trailing_commas(); + + self.parse_comma_separated_with_trailing_commas(|p| p.parse_select_item(), trailing_commas) + } + + pub fn parse_replace_elements(&mut self) -> Result { + let expr = self.parse_expr()?; + let as_keyword = self.parse_keyword(Keyword::AS); + let ident = self.parse_identifier(false)?; + Ok(ReplaceSelectElement { + expr, + column_name: ident, + as_keyword, + }) + } + + /// Parse a comma-delimited list of projections after SELECT + pub fn parse_select_item(&mut self) -> Result { + match self.parse_wildcard_expr()? { + Expr::QualifiedWildcard(prefix, token) => Ok(SelectItem::QualifiedWildcard( + prefix, + self.parse_wildcard_additional_options(token.0)?, + )), + Expr::Wildcard(token) => Ok(SelectItem::Wildcard( + self.parse_wildcard_additional_options(token.0)?, + )), + Expr::Identifier(v) if v.value.to_lowercase() == "from" && v.quote_style.is_none() => { + parser_err!( + format!("Expected an expression, found: {}", v), + self.peek_token().span.start + ) + } + Expr::BinaryOp { + left, + op: BinaryOperator::Eq, + right, + } if self.dialect.supports_eq_alias_assignment() + && matches!(left.as_ref(), Expr::Identifier(_)) => + { + let Expr::Identifier(alias) = *left else { + return parser_err!( + "BUG: expected identifier expression as alias", + self.peek_token().span.start + ); + }; + Ok(SelectItem::ExprWithAlias { + expr: *right, + alias, + }) + } + expr => self + .parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS) + .map(|alias| match alias { + Some(alias) => SelectItem::ExprWithAlias { expr, alias }, + None => SelectItem::UnnamedExpr(expr), + }), + } + } + + pub fn parse_table_and_joins(&mut self) -> Result { + let relation = self.parse_table_factor()?; + // Note that for keywords to be properly handled here, they need to be + // added to `RESERVED_FOR_TABLE_ALIAS`, otherwise they may be parsed as + // a table alias. + let mut joins = vec![]; + loop { + let global = self.parse_keyword(Keyword::GLOBAL); + let join = if self.parse_keyword(Keyword::CROSS) { + let join_operator = if self.parse_keyword(Keyword::JOIN) { + JoinOperator::CrossJoin + } else if self.parse_keyword(Keyword::APPLY) { + // MSSQL extension, similar to CROSS JOIN LATERAL + JoinOperator::CrossApply + } else { + return self.expected("JOIN or APPLY after CROSS", self.peek_token()); + }; + Join { + relation: self.parse_table_factor()?, + global, + join_operator, + } + } else if self.parse_keyword(Keyword::OUTER) { + // MSSQL extension, similar to LEFT JOIN LATERAL .. ON 1=1 + self.expect_keyword(Keyword::APPLY)?; + Join { + relation: self.parse_table_factor()?, + global, + join_operator: JoinOperator::OuterApply, + } + } else if self.parse_keyword(Keyword::ASOF) { + self.expect_keyword(Keyword::JOIN)?; + let relation = self.parse_table_factor()?; + self.expect_keyword(Keyword::MATCH_CONDITION)?; + let match_condition = self.parse_parenthesized(Self::parse_expr)?; + Join { + relation, + global, + join_operator: JoinOperator::AsOf { + match_condition, + constraint: self.parse_join_constraint(false)?, + }, + } + } else { + let natural = self.parse_keyword(Keyword::NATURAL); + let peek_keyword = if let Token::Word(w) = self.peek_token().token { + w.keyword + } else { + Keyword::NoKeyword + }; + + let join_operator_type = match peek_keyword { + Keyword::INNER | Keyword::JOIN => { + let _ = self.parse_keyword(Keyword::INNER); // [ INNER ] + self.expect_keyword(Keyword::JOIN)?; + JoinOperator::Inner + } + kw @ Keyword::LEFT | kw @ Keyword::RIGHT => { + let _ = self.next_token(); // consume LEFT/RIGHT + let is_left = kw == Keyword::LEFT; + let join_type = self.parse_one_of_keywords(&[ + Keyword::OUTER, + Keyword::SEMI, + Keyword::ANTI, + Keyword::JOIN, + ]); + match join_type { + Some(Keyword::OUTER) => { + self.expect_keyword(Keyword::JOIN)?; + if is_left { + JoinOperator::LeftOuter + } else { + JoinOperator::RightOuter + } + } + Some(Keyword::SEMI) => { + self.expect_keyword(Keyword::JOIN)?; + if is_left { + JoinOperator::LeftSemi + } else { + JoinOperator::RightSemi + } + } + Some(Keyword::ANTI) => { + self.expect_keyword(Keyword::JOIN)?; + if is_left { + JoinOperator::LeftAnti + } else { + JoinOperator::RightAnti + } + } + Some(Keyword::JOIN) => { + if is_left { + JoinOperator::LeftOuter + } else { + JoinOperator::RightOuter + } + } + _ => { + return Err(ParserError::ParserError(format!( + "expected OUTER, SEMI, ANTI or JOIN after {kw:?}" + ))) + } + } + } + Keyword::ANTI => { + let _ = self.next_token(); // consume ANTI + self.expect_keyword(Keyword::JOIN)?; + JoinOperator::Anti + } + Keyword::SEMI => { + let _ = self.next_token(); // consume SEMI + self.expect_keyword(Keyword::JOIN)?; + JoinOperator::Semi + } + Keyword::FULL => { + let _ = self.next_token(); // consume FULL + let _ = self.parse_keyword(Keyword::OUTER); // [ OUTER ] + self.expect_keyword(Keyword::JOIN)?; + JoinOperator::FullOuter + } + Keyword::OUTER => { + return self.expected("LEFT, RIGHT, or FULL", self.peek_token()); + } + _ if natural => { + return self.expected("a join type after NATURAL", self.peek_token()); + } + _ => break, + }; + let relation = self.parse_table_factor()?; + let join_constraint = self.parse_join_constraint(natural)?; + Join { + relation, + global, + join_operator: join_operator_type(join_constraint), + } + }; + joins.push(join); + } + Ok(TableWithJoins { relation, joins }) + } + + /// Parse a given table version specifier. + /// + /// For now it only supports timestamp versioning for BigQuery and MSSQL dialects. + pub fn parse_table_version(&mut self) -> Result, ParserError> { + if dialect_of!(self is BigQueryDialect | MsSqlDialect) + && self.parse_keywords(&[Keyword::FOR, Keyword::SYSTEM_TIME, Keyword::AS, Keyword::OF]) + { + let expr = self.parse_expr()?; + Ok(Some(TableVersion::ForSystemTimeAsOf(expr))) + } else { + Ok(None) + } + } + + /// Parse an [`WildcardAdditionalOptions`] information for wildcard select items. + /// + /// If it is not possible to parse it, will return an option. + pub fn parse_wildcard_additional_options( + &mut self, + wildcard_token: TokenWithSpan, + ) -> Result { + let opt_ilike = if dialect_of!(self is GenericDialect | SnowflakeDialect) { + self.parse_optional_select_item_ilike()? + } else { + None + }; + let opt_exclude = if opt_ilike.is_none() + && dialect_of!(self is GenericDialect | DuckDbDialect | SnowflakeDialect) + { + self.parse_optional_select_item_exclude()? + } else { + None + }; + let opt_except = if self.dialect.supports_select_wildcard_except() { + self.parse_optional_select_item_except()? + } else { + None + }; + let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect | DuckDbDialect | SnowflakeDialect) + { + self.parse_optional_select_item_replace()? + } else { + None + }; + let opt_rename = if dialect_of!(self is GenericDialect | SnowflakeDialect) { + self.parse_optional_select_item_rename()? + } else { + None + }; + + Ok(WildcardAdditionalOptions { + wildcard_token: wildcard_token.into(), + opt_ilike, + opt_exclude, + opt_except, + opt_rename, + opt_replace, + }) + } + + // Parse a WITH FILL clause (ClickHouse dialect) + // that follow the WITH FILL keywords in a ORDER BY clause + pub fn parse_with_fill(&mut self) -> Result { + let from = if self.parse_keyword(Keyword::FROM) { + Some(self.parse_expr()?) + } else { + None + }; + + let to = if self.parse_keyword(Keyword::TO) { + Some(self.parse_expr()?) + } else { + None + }; + + let step = if self.parse_keyword(Keyword::STEP) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(WithFill { from, to, step }) + } + + fn parse_settings(&mut self) -> Result>, ParserError> { + let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::SETTINGS) + { + let key_values = self.parse_comma_separated(|p| { + let key = p.parse_identifier(false)?; + p.expect_token(&Token::Eq)?; + let value = p.parse_value()?; + Ok(Setting { key, value }) + })?; + Some(key_values) + } else { + None + }; + Ok(settings) + } + + /// Parse an UPDATE statement, returning a `Box`ed SetExpr + /// + /// This is used to reduce the size of the stack frames in debug builds + fn parse_update_setexpr_boxed(&mut self) -> Result, ParserError> { + Ok(Box::new(SetExpr::Update(self.parse_update()?))) + } + + /// Parse a "query body", which is an expression with roughly the + /// following grammar: + /// ```sql + /// query_body ::= restricted_select | '(' subquery ')' | set_operation + /// restricted_select ::= 'SELECT' [expr_list] [ from ] [ where ] [ groupby_having ] + /// subquery ::= query_body [ order_by_limit ] + /// set_operation ::= query_body { 'UNION' | 'EXCEPT' | 'INTERSECT' } [ 'ALL' ] query_body + /// ``` + pub fn parse_query_body(&mut self, precedence: u8) -> Result, ParserError> { + // We parse the expression using a Pratt parser, as in `parse_expr()`. + // Start by parsing a restricted SELECT or a `(subquery)`: + let expr = if self.peek_keyword(Keyword::SELECT) { + SetExpr::Select(self.parse_select().map(Box::new)?) + } else if self.consume_token(&Token::LParen) { + // CTEs are not allowed here, but the parser currently accepts them + let subquery = self.parse_query()?; + self.expect_token(&Token::RParen)?; + SetExpr::Query(subquery) + } else if self.parse_keyword(Keyword::VALUES) { + let is_mysql = dialect_of!(self is MySqlDialect); + SetExpr::Values(self.parse_values(is_mysql)?) + } else if self.parse_keyword(Keyword::TABLE) { + SetExpr::Table(Box::new(self.parse_as_table()?)) + } else { + return self.expected( + "SELECT, VALUES, or a subquery in the query body", + self.peek_token(), + ); + }; + + self.parse_remaining_set_exprs(expr, precedence) + } + + /// Parse any extra set expressions that may be present in a query body + /// + /// (this is its own function to reduce required stack size in debug builds) + fn parse_remaining_set_exprs( + &mut self, + mut expr: SetExpr, + precedence: u8, + ) -> Result, ParserError> { + loop { + // The query can be optionally followed by a set operator: + let op = self.parse_set_operator(&self.peek_token().token); + let next_precedence = match op { + // UNION and EXCEPT have the same binding power and evaluate left-to-right + Some(SetOperator::Union) | Some(SetOperator::Except) => 10, + // INTERSECT has higher precedence than UNION/EXCEPT + Some(SetOperator::Intersect) => 20, + // Unexpected token or EOF => stop parsing the query body + None => break, + }; + if precedence >= next_precedence { + break; + } + self.next_token(); // skip past the set operator + let set_quantifier = self.parse_set_quantifier(&op); + expr = SetExpr::SetOperation { + left: Box::new(expr), + op: op.unwrap(), + set_quantifier, + right: self.parse_query_body(next_precedence)?, + }; + } + + Ok(expr.into()) + } + + pub fn parse_set_operator(&mut self, token: &Token) -> Option { + match token { + Token::Word(w) if w.keyword == Keyword::UNION => Some(SetOperator::Union), + Token::Word(w) if w.keyword == Keyword::EXCEPT => Some(SetOperator::Except), + Token::Word(w) if w.keyword == Keyword::INTERSECT => Some(SetOperator::Intersect), + _ => None, + } + } + + pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { + match op { + Some(SetOperator::Except | SetOperator::Intersect | SetOperator::Union) => { + if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + SetQuantifier::DistinctByName + } else if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { + SetQuantifier::ByName + } else if self.parse_keyword(Keyword::ALL) { + if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { + SetQuantifier::AllByName + } else { + SetQuantifier::All + } + } else if self.parse_keyword(Keyword::DISTINCT) { + SetQuantifier::Distinct + } else { + SetQuantifier::None + } + } + _ => SetQuantifier::None, + } + } + + /// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`) + pub fn parse_select(&mut self) -> Result { + let select_token = self.expect_keyword(Keyword::SELECT)?; + let value_table_mode = + if dialect_of!(self is BigQueryDialect) && self.parse_keyword(Keyword::AS) { + if self.parse_keyword(Keyword::VALUE) { + Some(ValueTableMode::AsValue) + } else if self.parse_keyword(Keyword::STRUCT) { + Some(ValueTableMode::AsStruct) + } else { + self.expected("VALUE or STRUCT", self.peek_token())? + } + } else { + None + }; + + let mut top_before_distinct = false; + let mut top = None; + if self.dialect.supports_top_before_distinct() && self.parse_keyword(Keyword::TOP) { + top = Some(self.parse_top()?); + top_before_distinct = true; + } + let distinct = self.parse_all_or_distinct()?; + if !self.dialect.supports_top_before_distinct() && self.parse_keyword(Keyword::TOP) { + top = Some(self.parse_top()?); + } + + let projection = self.parse_projection()?; + + let into = if self.parse_keyword(Keyword::INTO) { + let temporary = self + .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) + .is_some(); + let unlogged = self.parse_keyword(Keyword::UNLOGGED); + let table = self.parse_keyword(Keyword::TABLE); + let name = self.parse_object_name(false)?; + Some(SelectInto { + temporary, + unlogged, + table, + name, + }) + } else { + None + }; + + // Note that for keywords to be properly handled here, they need to be + // added to `RESERVED_FOR_COLUMN_ALIAS` / `RESERVED_FOR_TABLE_ALIAS`, + // otherwise they may be parsed as an alias as part of the `projection` + // or `from`. + + let from = if self.parse_keyword(Keyword::FROM) { + self.parse_comma_separated(Parser::parse_table_and_joins)? + } else { + vec![] + }; + + let mut lateral_views = vec![]; + loop { + if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { + let outer = self.parse_keyword(Keyword::OUTER); + let lateral_view = self.parse_expr()?; + let lateral_view_name = self.parse_object_name(false)?; + let lateral_col_alias = self + .parse_comma_separated(|parser| { + parser.parse_optional_alias(&[ + Keyword::WHERE, + Keyword::GROUP, + Keyword::CLUSTER, + Keyword::HAVING, + Keyword::LATERAL, + ]) // This couldn't possibly be a bad idea + })? + .into_iter() + .flatten() + .collect(); + + lateral_views.push(LateralView { + lateral_view, + lateral_view_name, + lateral_col_alias, + outer, + }); + } else { + break; + } + } + + let prewhere = if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::PREWHERE) + { + Some(self.parse_expr()?) + } else { + None + }; + + let selection = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + + let group_by = self + .parse_optional_group_by()? + .unwrap_or_else(|| GroupByExpr::Expressions(vec![], vec![])); + + let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let distribute_by = if self.parse_keywords(&[Keyword::DISTRIBUTE, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let sort_by = if self.parse_keywords(&[Keyword::SORT, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let having = if self.parse_keyword(Keyword::HAVING) { + Some(self.parse_expr()?) + } else { + None + }; + + // Accept QUALIFY and WINDOW in any order and flag accordingly. + let (named_windows, qualify, window_before_qualify) = if self.parse_keyword(Keyword::WINDOW) + { + let named_windows = self.parse_comma_separated(Parser::parse_named_window)?; + if self.parse_keyword(Keyword::QUALIFY) { + (named_windows, Some(self.parse_expr()?), true) + } else { + (named_windows, None, true) + } + } else if self.parse_keyword(Keyword::QUALIFY) { + let qualify = Some(self.parse_expr()?); + if self.parse_keyword(Keyword::WINDOW) { + ( + self.parse_comma_separated(Parser::parse_named_window)?, + qualify, + false, + ) + } else { + (Default::default(), qualify, false) + } + } else { + Default::default() + }; + + let connect_by = if self.dialect.supports_connect_by() + && self + .parse_one_of_keywords(&[Keyword::START, Keyword::CONNECT]) + .is_some() + { + self.prev_token(); + Some(self.parse_connect_by()?) + } else { + None + }; + + Ok(Select { + select_token: AttachedToken(select_token), + distinct, + top, + top_before_distinct, + projection, + into, + from, + lateral_views, + prewhere, + selection, + group_by, + cluster_by, + distribute_by, + sort_by, + having, + named_window: named_windows, + window_before_qualify, + qualify, + value_table_mode, + connect_by, + }) + } + + /// Parse an OFFSET clause + pub fn parse_offset(&mut self) -> Result { + let value = self.parse_expr()?; + let rows = if self.parse_keyword(Keyword::ROW) { + OffsetRows::Row + } else if self.parse_keyword(Keyword::ROWS) { + OffsetRows::Rows + } else { + OffsetRows::None + }; + Ok(Offset { value, rows }) + } + + pub fn parse_pivot_table_factor( + &mut self, + table: TableFactor, + ) -> Result { + self.expect_token(&Token::LParen)?; + let aggregate_functions = self.parse_comma_separated(Self::parse_aliased_function_call)?; + self.expect_keyword(Keyword::FOR)?; + let value_column = self.parse_object_name(false)?.0; + self.expect_keyword(Keyword::IN)?; + + self.expect_token(&Token::LParen)?; + let value_source = if self.parse_keyword(Keyword::ANY) { + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + PivotValueSource::Any(order_by) + } else if self.peek_sub_query() { + PivotValueSource::Subquery(self.parse_query()?) + } else { + PivotValueSource::List(self.parse_comma_separated(Self::parse_expr_with_alias)?) + }; + self.expect_token(&Token::RParen)?; + + let default_on_null = + if self.parse_keywords(&[Keyword::DEFAULT, Keyword::ON, Keyword::NULL]) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(expr) + } else { + None + }; + + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Pivot { + table: Box::new(table), + aggregate_functions, + value_column, + value_source, + default_on_null, + alias, + }) + } + + /// Parse a TOP clause, MSSQL equivalent of LIMIT, + /// that follows after `SELECT [DISTINCT]`. + pub fn parse_top(&mut self) -> Result { + let quantity = if self.consume_token(&Token::LParen) { + let quantity = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(TopQuantity::Expr(quantity)) + } else { + let next_token = self.next_token(); + let quantity = match next_token.token { + Token::Number(s, _) => Self::parse::(s, next_token.span.start)?, + _ => self.expected("literal int", next_token)?, + }; + Some(TopQuantity::Constant(quantity)) + }; + + let percent = self.parse_keyword(Keyword::PERCENT); + + let with_ties = self.parse_keywords(&[Keyword::WITH, Keyword::TIES]); + + Ok(Top { + with_ties, + percent, + quantity, + }) + } + + /// A table name or a parenthesized subquery, followed by optional `[AS] alias` + pub fn parse_table_factor(&mut self) -> Result { + if self.parse_keyword(Keyword::LATERAL) { + // LATERAL must always be followed by a subquery or table function. + if self.consume_token(&Token::LParen) { + self.parse_derived_table_factor(Lateral) + } else { + let name = self.parse_object_name(false)?; + self.expect_token(&Token::LParen)?; + let args = self.parse_optional_args()?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Function { + lateral: true, + name, + args, + alias, + }) + } + } else if self.parse_keyword(Keyword::TABLE) { + // parse table function (SELECT * FROM TABLE () [ AS ]) + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::TableFunction { expr, alias }) + } else if self.consume_token(&Token::LParen) { + // A left paren introduces either a derived table (i.e., a subquery) + // or a nested join. It's nearly impossible to determine ahead of + // time which it is... so we just try to parse both. + // + // Here's an example that demonstrates the complexity: + // /-------------------------------------------------------\ + // | /-----------------------------------\ | + // SELECT * FROM ( ( ( (SELECT 1) UNION (SELECT 2) ) AS t1 NATURAL JOIN t2 ) ) + // ^ ^ ^ ^ + // | | | | + // | | | | + // | | | (4) belongs to a SetExpr::Query inside the subquery + // | | (3) starts a derived table (subquery) + // | (2) starts a nested join + // (1) an additional set of parens around a nested join + // + + // If the recently consumed '(' starts a derived table, the call to + // `parse_derived_table_factor` below will return success after parsing the + // subquery, followed by the closing ')', and the alias of the derived table. + // In the example above this is case (3). + if let Some(mut table) = + self.maybe_parse(|parser| parser.parse_derived_table_factor(NotLateral))? + { + while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) + { + table = match kw { + Keyword::PIVOT => self.parse_pivot_table_factor(table)?, + Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, + _ => unreachable!(), + } + } + return Ok(table); + } + + // A parsing error from `parse_derived_table_factor` indicates that the '(' we've + // recently consumed does not start a derived table (cases 1, 2, or 4). + // `maybe_parse` will ignore such an error and rewind to be after the opening '('. + + // Inside the parentheses we expect to find an (A) table factor + // followed by some joins or (B) another level of nesting. + let mut table_and_joins = self.parse_table_and_joins()?; + + #[allow(clippy::if_same_then_else)] + if !table_and_joins.joins.is_empty() { + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::NestedJoin { + table_with_joins: Box::new(table_and_joins), + alias, + }) // (A) + } else if let TableFactor::NestedJoin { + table_with_joins: _, + alias: _, + } = &table_and_joins.relation + { + // (B): `table_and_joins` (what we found inside the parentheses) + // is a nested join `(foo JOIN bar)`, not followed by other joins. + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::NestedJoin { + table_with_joins: Box::new(table_and_joins), + alias, + }) + } else if dialect_of!(self is SnowflakeDialect | GenericDialect) { + // Dialect-specific behavior: Snowflake diverges from the + // standard and from most of the other implementations by + // allowing extra parentheses not only around a join (B), but + // around lone table names (e.g. `FROM (mytable [AS alias])`) + // and around derived tables (e.g. `FROM ((SELECT ...) + // [AS alias])`) as well. + self.expect_token(&Token::RParen)?; + + if let Some(outer_alias) = + self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)? + { + // Snowflake also allows specifying an alias *after* parens + // e.g. `FROM (mytable) AS alias` + match &mut table_and_joins.relation { + TableFactor::Derived { alias, .. } + | TableFactor::Table { alias, .. } + | TableFactor::Function { alias, .. } + | TableFactor::UNNEST { alias, .. } + | TableFactor::JsonTable { alias, .. } + | TableFactor::OpenJsonTable { alias, .. } + | TableFactor::TableFunction { alias, .. } + | TableFactor::Pivot { alias, .. } + | TableFactor::Unpivot { alias, .. } + | TableFactor::MatchRecognize { alias, .. } + | TableFactor::NestedJoin { alias, .. } => { + // but not `FROM (mytable AS alias1) AS alias2`. + if let Some(inner_alias) = alias { + return Err(ParserError::ParserError(format!( + "duplicate alias {inner_alias}" + ))); + } + // Act as if the alias was specified normally next + // to the table name: `(mytable) AS alias` -> + // `(mytable AS alias)` + alias.replace(outer_alias); + } + }; + } + // Do not store the extra set of parens in the AST + Ok(table_and_joins.relation) + } else { + // The SQL spec prohibits derived tables and bare tables from + // appearing alone in parentheses (e.g. `FROM (mytable)`) + self.expected("joined table", self.peek_token()) + } + } else if dialect_of!(self is SnowflakeDialect | DatabricksDialect | GenericDialect) + && matches!( + self.peek_tokens(), + [ + Token::Word(Word { + keyword: Keyword::VALUES, + .. + }), + Token::LParen + ] + ) + { + self.expect_keyword(Keyword::VALUES)?; + + // Snowflake and Databricks allow syntax like below: + // SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2) + // where there are no parentheses around the VALUES clause. + let values = SetExpr::Values(self.parse_values(false)?); + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Derived { + lateral: false, + subquery: Box::new(Query { + with: None, + body: Box::new(values), + order_by: None, + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + }), + alias, + }) + } else if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) + && self.parse_keyword(Keyword::UNNEST) + { + self.expect_token(&Token::LParen)?; + let array_exprs = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + + let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); + let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) { + Ok(Some(alias)) => Some(alias), + Ok(None) => None, + Err(e) => return Err(e), + }; + + let with_offset = match self.expect_keywords(&[Keyword::WITH, Keyword::OFFSET]) { + Ok(()) => true, + Err(_) => false, + }; + + let with_offset_alias = if with_offset { + match self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS) { + Ok(Some(alias)) => Some(alias), + Ok(None) => None, + Err(e) => return Err(e), + } + } else { + None + }; + + Ok(TableFactor::UNNEST { + alias, + array_exprs, + with_offset, + with_offset_alias, + with_ordinality, + }) + } else if self.parse_keyword_with_tokens(Keyword::JSON_TABLE, &[Token::LParen]) { + let json_expr = self.parse_expr()?; + self.expect_token(&Token::Comma)?; + let json_path = self.parse_value()?; + self.expect_keyword(Keyword::COLUMNS)?; + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_json_table_column_def)?; + self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::JsonTable { + json_expr, + json_path, + columns, + alias, + }) + } else if self.parse_keyword_with_tokens(Keyword::OPENJSON, &[Token::LParen]) { + self.prev_token(); + self.parse_open_json_table_factor() + } else { + let name = self.parse_object_name(true)?; + + let json_path = match self.peek_token().token { + Token::LBracket if self.dialect.supports_partiql() => Some(self.parse_json_path()?), + _ => None, + }; + + let partitions: Vec = if dialect_of!(self is MySqlDialect | GenericDialect) + && self.parse_keyword(Keyword::PARTITION) + { + self.parse_parenthesized_identifiers()? + } else { + vec![] + }; + + // Parse potential version qualifier + let version = self.parse_table_version()?; + + // Postgres, MSSQL, ClickHouse: table-valued functions: + let args = if self.consume_token(&Token::LParen) { + Some(self.parse_table_function_args()?) + } else { + None + }; + + let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); + + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + + // MSSQL-specific table hints: + let mut with_hints = vec![]; + if self.parse_keyword(Keyword::WITH) { + if self.consume_token(&Token::LParen) { + with_hints = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + } else { + // rewind, as WITH may belong to the next statement's CTE + self.prev_token(); + } + }; + + let mut table = TableFactor::Table { + name, + alias, + args, + with_hints, + version, + partitions, + with_ordinality, + json_path, + }; + + while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) { + table = match kw { + Keyword::PIVOT => self.parse_pivot_table_factor(table)?, + Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, + _ => unreachable!(), + } + } + + if self.dialect.supports_match_recognize() + && self.parse_keyword(Keyword::MATCH_RECOGNIZE) + { + table = self.parse_match_recognize(table)?; + } + + Ok(table) + } + } + + pub fn parse_unpivot_table_factor( + &mut self, + table: TableFactor, + ) -> Result { + self.expect_token(&Token::LParen)?; + let value = self.parse_identifier(false)?; + self.expect_keyword(Keyword::FOR)?; + let name = self.parse_identifier(false)?; + self.expect_keyword(Keyword::IN)?; + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Unpivot { + table: Box::new(table), + value, + name, + columns, + alias, + }) + } + + /// Parses `OPENJSON( jsonExpression [ , path ] ) [ ]` clause, + /// assuming the `OPENJSON` keyword was already consumed. + fn parse_open_json_table_factor(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let json_expr = self.parse_expr()?; + let json_path = if self.consume_token(&Token::Comma) { + Some(self.parse_value()?) + } else { + None + }; + self.expect_token(&Token::RParen)?; + let columns = if self.parse_keyword(Keyword::WITH) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_openjson_table_column_def)?; + self.expect_token(&Token::RParen)?; + columns + } else { + Vec::new() + }; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::OpenJsonTable { + json_expr, + json_path, + columns, + alias, + }) + } + + fn parse_match_recognize(&mut self, table: TableFactor) -> Result { + self.expect_token(&Token::LParen)?; + + let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + + let measures = if self.parse_keyword(Keyword::MEASURES) { + self.parse_comma_separated(|p| { + let expr = p.parse_expr()?; + let _ = p.parse_keyword(Keyword::AS); + let alias = p.parse_identifier(false)?; + Ok(Measure { expr, alias }) + })? + } else { + vec![] + }; + + let rows_per_match = + if self.parse_keywords(&[Keyword::ONE, Keyword::ROW, Keyword::PER, Keyword::MATCH]) { + Some(RowsPerMatch::OneRow) + } else if self.parse_keywords(&[ + Keyword::ALL, + Keyword::ROWS, + Keyword::PER, + Keyword::MATCH, + ]) { + Some(RowsPerMatch::AllRows( + if self.parse_keywords(&[Keyword::SHOW, Keyword::EMPTY, Keyword::MATCHES]) { + Some(EmptyMatchesMode::Show) + } else if self.parse_keywords(&[ + Keyword::OMIT, + Keyword::EMPTY, + Keyword::MATCHES, + ]) { + Some(EmptyMatchesMode::Omit) + } else if self.parse_keywords(&[ + Keyword::WITH, + Keyword::UNMATCHED, + Keyword::ROWS, + ]) { + Some(EmptyMatchesMode::WithUnmatched) + } else { + None + }, + )) + } else { + None + }; + + let after_match_skip = + if self.parse_keywords(&[Keyword::AFTER, Keyword::MATCH, Keyword::SKIP]) { + if self.parse_keywords(&[Keyword::PAST, Keyword::LAST, Keyword::ROW]) { + Some(AfterMatchSkip::PastLastRow) + } else if self.parse_keywords(&[Keyword::TO, Keyword::NEXT, Keyword::ROW]) { + Some(AfterMatchSkip::ToNextRow) + } else if self.parse_keywords(&[Keyword::TO, Keyword::FIRST]) { + Some(AfterMatchSkip::ToFirst(self.parse_identifier(false)?)) + } else if self.parse_keywords(&[Keyword::TO, Keyword::LAST]) { + Some(AfterMatchSkip::ToLast(self.parse_identifier(false)?)) + } else { + let found = self.next_token(); + return self.expected("after match skip option", found); + } + } else { + None + }; + + self.expect_keyword(Keyword::PATTERN)?; + let pattern = self.parse_parenthesized(Self::parse_pattern)?; + + self.expect_keyword(Keyword::DEFINE)?; + + let symbols = self.parse_comma_separated(|p| { + let symbol = p.parse_identifier(false)?; + p.expect_keyword(Keyword::AS)?; + let definition = p.parse_expr()?; + Ok(SymbolDefinition { symbol, definition }) + })?; + + self.expect_token(&Token::RParen)?; + + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + + Ok(TableFactor::MatchRecognize { + table: Box::new(table), + partition_by, + order_by, + measures, + rows_per_match, + after_match_skip, + pattern, + symbols, + alias, + }) + } + + fn parse_base_pattern(&mut self) -> Result { + match self.next_token().token { + Token::Caret => Ok(MatchRecognizePattern::Symbol(MatchRecognizeSymbol::Start)), + Token::Placeholder(s) if s == "$" => { + Ok(MatchRecognizePattern::Symbol(MatchRecognizeSymbol::End)) + } + Token::LBrace => { + self.expect_token(&Token::Minus)?; + let symbol = self + .parse_identifier(false) + .map(MatchRecognizeSymbol::Named)?; + self.expect_token(&Token::Minus)?; + self.expect_token(&Token::RBrace)?; + Ok(MatchRecognizePattern::Exclude(symbol)) + } + Token::Word(Word { + value, + quote_style: None, + .. + }) if value == "PERMUTE" => { + self.expect_token(&Token::LParen)?; + let symbols = self.parse_comma_separated(|p| { + p.parse_identifier(false).map(MatchRecognizeSymbol::Named) + })?; + self.expect_token(&Token::RParen)?; + Ok(MatchRecognizePattern::Permute(symbols)) + } + Token::LParen => { + let pattern = self.parse_pattern()?; + self.expect_token(&Token::RParen)?; + Ok(MatchRecognizePattern::Group(Box::new(pattern))) + } + _ => { + self.prev_token(); + self.parse_identifier(false) + .map(MatchRecognizeSymbol::Named) + .map(MatchRecognizePattern::Symbol) + } + } + } + + fn parse_json_table_column_error_handling( + &mut self, + ) -> Result, ParserError> { + let res = if self.parse_keyword(Keyword::NULL) { + JsonTableColumnErrorHandling::Null + } else if self.parse_keyword(Keyword::ERROR) { + JsonTableColumnErrorHandling::Error + } else if self.parse_keyword(Keyword::DEFAULT) { + JsonTableColumnErrorHandling::Default(self.parse_value()?) + } else { + return Ok(None); + }; + self.expect_keyword(Keyword::ON)?; + Ok(Some(res)) + } + + fn parse_repetition_pattern(&mut self) -> Result { + let mut pattern = self.parse_base_pattern()?; + loop { + let token = self.next_token(); + let quantifier = match token.token { + Token::Mul => RepetitionQuantifier::ZeroOrMore, + Token::Plus => RepetitionQuantifier::OneOrMore, + Token::Placeholder(s) if s == "?" => RepetitionQuantifier::AtMostOne, + Token::LBrace => { + // quantifier is a range like {n} or {n,} or {,m} or {n,m} + let token = self.next_token(); + match token.token { + Token::Comma => { + let next_token = self.next_token(); + let Token::Number(n, _) = next_token.token else { + return self.expected("literal number", next_token); + }; + self.expect_token(&Token::RBrace)?; + RepetitionQuantifier::AtMost(Self::parse(n, token.span.start)?) + } + Token::Number(n, _) if self.consume_token(&Token::Comma) => { + let next_token = self.next_token(); + match next_token.token { + Token::Number(m, _) => { + self.expect_token(&Token::RBrace)?; + RepetitionQuantifier::Range( + Self::parse(n, token.span.start)?, + Self::parse(m, token.span.start)?, + ) + } + Token::RBrace => { + RepetitionQuantifier::AtLeast(Self::parse(n, token.span.start)?) + } + _ => { + return self.expected("} or upper bound", next_token); + } + } + } + Token::Number(n, _) => { + self.expect_token(&Token::RBrace)?; + RepetitionQuantifier::Exactly(Self::parse(n, token.span.start)?) + } + _ => return self.expected("quantifier range", token), + } + } + _ => { + self.prev_token(); + break; + } + }; + pattern = MatchRecognizePattern::Repetition(Box::new(pattern), quantifier); + } + Ok(pattern) + } + + fn parse_concat_pattern(&mut self) -> Result { + let mut patterns = vec![self.parse_repetition_pattern()?]; + while !matches!(self.peek_token().token, Token::RParen | Token::Pipe) { + patterns.push(self.parse_repetition_pattern()?); + } + match <[MatchRecognizePattern; 1]>::try_from(patterns) { + Ok([pattern]) => Ok(pattern), + Err(patterns) => Ok(MatchRecognizePattern::Concat(patterns)), + } + } + + fn parse_pattern(&mut self) -> Result { + let pattern = self.parse_concat_pattern()?; + if self.consume_token(&Token::Pipe) { + match self.parse_pattern()? { + // flatten nested alternations + MatchRecognizePattern::Alternation(mut patterns) => { + patterns.insert(0, pattern); + Ok(MatchRecognizePattern::Alternation(patterns)) + } + next => Ok(MatchRecognizePattern::Alternation(vec![pattern, next])), + } + } else { + Ok(pattern) + } + } + + pub fn parse_optional_args(&mut self) -> Result, ParserError> { + if self.consume_token(&Token::RParen) { + Ok(vec![]) + } else { + let args = self.parse_comma_separated(Parser::parse_function_args)?; + self.expect_token(&Token::RParen)?; + Ok(args) + } + } + + fn parse_aliased_function_call(&mut self) -> Result { + let function_name = match self.next_token().token { + Token::Word(w) => Ok(w.value), + _ => self.expected("a function identifier", self.peek_token()), + }?; + let expr = self.parse_function(ObjectName(vec![Ident::new(function_name)]))?; + let alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + Ok(ExprWithAlias { expr, alias }) + } + + fn parse_function_named_arg_operator(&mut self) -> Result { + if self.parse_keyword(Keyword::VALUE) { + return Ok(FunctionArgOperator::Value); + } + let tok = self.next_token(); + match tok.token { + Token::RArrow if self.dialect.supports_named_fn_args_with_rarrow_operator() => { + Ok(FunctionArgOperator::RightArrow) + } + Token::Eq if self.dialect.supports_named_fn_args_with_eq_operator() => { + Ok(FunctionArgOperator::Equals) + } + Token::Assignment + if self + .dialect + .supports_named_fn_args_with_assignment_operator() => + { + Ok(FunctionArgOperator::Assignment) + } + Token::Colon if self.dialect.supports_named_fn_args_with_colon_operator() => { + Ok(FunctionArgOperator::Colon) + } + _ => { + self.prev_token(); + self.expected("argument operator", tok) + } + } + } + + fn parse_table_function_args(&mut self) -> Result { + if self.consume_token(&Token::RParen) { + return Ok(TableFunctionArgs { + args: vec![], + settings: None, + }); + } + let mut args = vec![]; + let settings = loop { + if let Some(settings) = self.parse_settings()? { + break Some(settings); + } + args.push(self.parse_function_args()?); + if self.is_parse_comma_separated_end() { + break None; + } + }; + self.expect_token(&Token::RParen)?; + Ok(TableFunctionArgs { args, settings }) + } + + /// Parse a parenthesized comma-separated list of table alias column definitions. + fn parse_table_alias_column_defs(&mut self) -> Result, ParserError> { + if self.consume_token(&Token::LParen) { + let cols = self.parse_comma_separated(|p| { + let name = p.parse_identifier(false)?; + let data_type = p.maybe_parse(|p| p.parse_data_type())?; + Ok(TableAliasColumnDef { name, data_type }) + })?; + self.expect_token(&Token::RParen)?; + Ok(cols) + } else { + Ok(vec![]) + } + } + + /// Invoke `f` after first setting the parser's `ParserState` to `state`. + /// + /// Upon return, restores the parser's state to what it started at. + fn with_state(&mut self, state: ParserState, mut f: F) -> Result + where + F: FnMut(&mut Parser) -> Result, + { + let current_state = self.state; + self.state = state; + let res = f(self); + self.state = current_state; + res + } +} diff --git a/src/parser/set.rs b/src/parser/set.rs new file mode 100644 index 000000000..df312fbfb --- /dev/null +++ b/src/parser/set.rs @@ -0,0 +1,126 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_set(&mut self) -> Result { + let modifier = + self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); + if let Some(Keyword::HIVEVAR) = modifier { + self.expect_token(&Token::Colon)?; + } else if let Some(set_role_stmt) = + self.maybe_parse(|parser| parser.parse_set_role(modifier))? + { + return Ok(set_role_stmt); + } + + let variables = if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) { + OneOrManyWithParens::One(ObjectName(vec!["TIMEZONE".into()])) + } else if self.dialect.supports_parenthesized_set_variables() + && self.consume_token(&Token::LParen) + { + let variables = OneOrManyWithParens::Many( + self.parse_comma_separated(|parser: &mut Parser<'_>| { + parser.parse_identifier(false) + })? + .into_iter() + .map(|ident| ObjectName(vec![ident])) + .collect(), + ); + self.expect_token(&Token::RParen)?; + variables + } else { + OneOrManyWithParens::One(self.parse_object_name(false)?) + }; + + if matches!(&variables, OneOrManyWithParens::One(variable) if variable.to_string().eq_ignore_ascii_case("NAMES") + && dialect_of!(self is MySqlDialect | GenericDialect)) + { + if self.parse_keyword(Keyword::DEFAULT) { + return Ok(Statement::SetNamesDefault {}); + } + + let charset_name = self.parse_literal_string()?; + let collation_name = if self.parse_one_of_keywords(&[Keyword::COLLATE]).is_some() { + Some(self.parse_literal_string()?) + } else { + None + }; + + return Ok(Statement::SetNames { + charset_name, + collation_name, + }); + } + + let parenthesized_assignment = matches!(&variables, OneOrManyWithParens::Many(_)); + + if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { + if parenthesized_assignment { + self.expect_token(&Token::LParen)?; + } + + let mut values = vec![]; + loop { + let value = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Ok(expr) = self.parse_expr() { + expr + } else { + self.expected("variable value", self.peek_token())? + }; + + values.push(value); + if self.consume_token(&Token::Comma) { + continue; + } + + if parenthesized_assignment { + self.expect_token(&Token::RParen)?; + } + return Ok(Statement::SetVariable { + local: modifier == Some(Keyword::LOCAL), + hivevar: Some(Keyword::HIVEVAR) == modifier, + variables, + value: values, + }); + } + } + + let OneOrManyWithParens::One(variable) = variables else { + return self.expected("set variable", self.peek_token()); + }; + + if variable.to_string().eq_ignore_ascii_case("TIMEZONE") { + // for some db (e.g. postgresql), SET TIME ZONE is an alias for SET TIMEZONE [TO|=] + match self.parse_expr() { + Ok(expr) => Ok(Statement::SetTimeZone { + local: modifier == Some(Keyword::LOCAL), + value: expr, + }), + _ => self.expected("timezone value", self.peek_token())?, + } + } else if variable.to_string() == "CHARACTERISTICS" { + self.expect_keywords(&[Keyword::AS, Keyword::TRANSACTION])?; + Ok(Statement::SetTransaction { + modes: self.parse_transaction_modes()?, + snapshot: None, + session: true, + }) + } else if variable.to_string() == "TRANSACTION" && modifier.is_none() { + if self.parse_keyword(Keyword::SNAPSHOT) { + let snapshot_id = self.parse_value()?; + return Ok(Statement::SetTransaction { + modes: vec![], + snapshot: Some(snapshot_id), + session: false, + }); + } + Ok(Statement::SetTransaction { + modes: self.parse_transaction_modes()?, + snapshot: None, + session: false, + }) + } else { + self.expected("equals sign or TO", self.peek_token()) + } + } +} diff --git a/src/parser/show.rs b/src/parser/show.rs new file mode 100644 index 000000000..f8b5f3a9a --- /dev/null +++ b/src/parser/show.rs @@ -0,0 +1,299 @@ +use super::*; + +impl Parser<'_> { + pub fn parse_show(&mut self) -> Result { + let terse = self.parse_keyword(Keyword::TERSE); + let extended = self.parse_keyword(Keyword::EXTENDED); + let full = self.parse_keyword(Keyword::FULL); + let session = self.parse_keyword(Keyword::SESSION); + let global = self.parse_keyword(Keyword::GLOBAL); + let external = self.parse_keyword(Keyword::EXTERNAL); + if self + .parse_one_of_keywords(&[Keyword::COLUMNS, Keyword::FIELDS]) + .is_some() + { + Ok(self.parse_show_columns(extended, full)?) + } else if self.parse_keyword(Keyword::TABLES) { + Ok(self.parse_show_tables(terse, extended, full, external)?) + } else if self.parse_keywords(&[Keyword::MATERIALIZED, Keyword::VIEWS]) { + Ok(self.parse_show_views(terse, true)?) + } else if self.parse_keyword(Keyword::VIEWS) { + Ok(self.parse_show_views(terse, false)?) + } else if self.parse_keyword(Keyword::FUNCTIONS) { + Ok(self.parse_show_functions()?) + } else if extended || full { + Err(ParserError::ParserError( + "EXTENDED/FULL are not supported with this type of SHOW query".to_string(), + )) + } else if self.parse_one_of_keywords(&[Keyword::CREATE]).is_some() { + Ok(self.parse_show_create()?) + } else if self.parse_keyword(Keyword::COLLATION) { + Ok(self.parse_show_collation()?) + } else if self.parse_keyword(Keyword::VARIABLES) + && dialect_of!(self is MySqlDialect | GenericDialect) + { + Ok(Statement::ShowVariables { + filter: self.parse_show_statement_filter()?, + session, + global, + }) + } else if self.parse_keyword(Keyword::STATUS) + && dialect_of!(self is MySqlDialect | GenericDialect) + { + Ok(Statement::ShowStatus { + filter: self.parse_show_statement_filter()?, + session, + global, + }) + } else if self.parse_keyword(Keyword::DATABASES) { + self.parse_show_databases(terse) + } else if self.parse_keyword(Keyword::SCHEMAS) { + self.parse_show_schemas(terse) + } else { + Ok(Statement::ShowVariable { + variable: self.parse_identifiers()?, + }) + } + } + + fn parse_show_databases(&mut self, terse: bool) -> Result { + let history = self.parse_keyword(Keyword::HISTORY); + let show_options = self.parse_show_stmt_options()?; + Ok(Statement::ShowDatabases { + terse, + history, + show_options, + }) + } + + fn parse_show_schemas(&mut self, terse: bool) -> Result { + let history = self.parse_keyword(Keyword::HISTORY); + let show_options = self.parse_show_stmt_options()?; + Ok(Statement::ShowSchemas { + terse, + history, + show_options, + }) + } + + pub fn parse_show_create(&mut self) -> Result { + let obj_type = match self.expect_one_of_keywords(&[ + Keyword::TABLE, + Keyword::TRIGGER, + Keyword::FUNCTION, + Keyword::PROCEDURE, + Keyword::EVENT, + Keyword::VIEW, + ])? { + Keyword::TABLE => Ok(ShowCreateObject::Table), + Keyword::TRIGGER => Ok(ShowCreateObject::Trigger), + Keyword::FUNCTION => Ok(ShowCreateObject::Function), + Keyword::PROCEDURE => Ok(ShowCreateObject::Procedure), + Keyword::EVENT => Ok(ShowCreateObject::Event), + Keyword::VIEW => Ok(ShowCreateObject::View), + keyword => Err(ParserError::ParserError(format!( + "Unable to map keyword to ShowCreateObject: {keyword:?}" + ))), + }?; + + let obj_name = self.parse_object_name(false)?; + + Ok(Statement::ShowCreate { obj_type, obj_name }) + } + + pub fn parse_show_columns( + &mut self, + extended: bool, + full: bool, + ) -> Result { + let show_options = self.parse_show_stmt_options()?; + Ok(Statement::ShowColumns { + extended, + full, + show_options, + }) + } + + fn parse_show_tables( + &mut self, + terse: bool, + extended: bool, + full: bool, + external: bool, + ) -> Result { + let history = !external && self.parse_keyword(Keyword::HISTORY); + let show_options = self.parse_show_stmt_options()?; + Ok(Statement::ShowTables { + terse, + history, + extended, + full, + external, + show_options, + }) + } + + fn parse_show_views( + &mut self, + terse: bool, + materialized: bool, + ) -> Result { + let show_options = self.parse_show_stmt_options()?; + Ok(Statement::ShowViews { + materialized, + terse, + show_options, + }) + } + + pub fn parse_show_functions(&mut self) -> Result { + let filter = self.parse_show_statement_filter()?; + Ok(Statement::ShowFunctions { filter }) + } + + pub fn parse_show_collation(&mut self) -> Result { + let filter = self.parse_show_statement_filter()?; + Ok(Statement::ShowCollation { filter }) + } + + pub fn parse_show_statement_filter( + &mut self, + ) -> Result, ParserError> { + if self.parse_keyword(Keyword::LIKE) { + Ok(Some(ShowStatementFilter::Like( + self.parse_literal_string()?, + ))) + } else if self.parse_keyword(Keyword::ILIKE) { + Ok(Some(ShowStatementFilter::ILike( + self.parse_literal_string()?, + ))) + } else if self.parse_keyword(Keyword::WHERE) { + Ok(Some(ShowStatementFilter::Where(self.parse_expr()?))) + } else { + self.maybe_parse(|parser| -> Result { + parser.parse_literal_string() + })? + .map_or(Ok(None), |filter| { + Ok(Some(ShowStatementFilter::NoKeyword(filter))) + }) + } + } + + fn parse_show_stmt_options(&mut self) -> Result { + let show_in; + let mut filter_position = None; + if self.dialect.supports_show_like_before_in() { + if let Some(filter) = self.parse_show_statement_filter()? { + filter_position = Some(ShowStatementFilterPosition::Infix(filter)); + } + show_in = self.maybe_parse_show_stmt_in()?; + } else { + show_in = self.maybe_parse_show_stmt_in()?; + if let Some(filter) = self.parse_show_statement_filter()? { + filter_position = Some(ShowStatementFilterPosition::Suffix(filter)); + } + } + let starts_with = self.maybe_parse_show_stmt_starts_with()?; + let limit = self.maybe_parse_show_stmt_limit()?; + let from = self.maybe_parse_show_stmt_from()?; + Ok(ShowStatementOptions { + filter_position, + show_in, + starts_with, + limit, + limit_from: from, + }) + } + + fn maybe_parse_show_stmt_in(&mut self) -> Result, ParserError> { + let clause = match self.parse_one_of_keywords(&[Keyword::FROM, Keyword::IN]) { + Some(Keyword::FROM) => ShowStatementInClause::FROM, + Some(Keyword::IN) => ShowStatementInClause::IN, + None => return Ok(None), + _ => return self.expected("FROM or IN", self.peek_token()), + }; + + let (parent_type, parent_name) = match self.parse_one_of_keywords(&[ + Keyword::ACCOUNT, + Keyword::DATABASE, + Keyword::SCHEMA, + Keyword::TABLE, + Keyword::VIEW, + ]) { + // If we see these next keywords it means we don't have a parent name + Some(Keyword::DATABASE) + if self.peek_keywords(&[Keyword::STARTS, Keyword::WITH]) + | self.peek_keyword(Keyword::LIMIT) => + { + (Some(ShowStatementInParentType::Database), None) + } + Some(Keyword::SCHEMA) + if self.peek_keywords(&[Keyword::STARTS, Keyword::WITH]) + | self.peek_keyword(Keyword::LIMIT) => + { + (Some(ShowStatementInParentType::Schema), None) + } + Some(parent_kw) => { + // The parent name here is still optional, for example: + // SHOW TABLES IN ACCOUNT, so parsing the object name + // may fail because the statement ends. + let parent_name = self.maybe_parse(|p| p.parse_object_name(false))?; + match parent_kw { + Keyword::ACCOUNT => (Some(ShowStatementInParentType::Account), parent_name), + Keyword::DATABASE => (Some(ShowStatementInParentType::Database), parent_name), + Keyword::SCHEMA => (Some(ShowStatementInParentType::Schema), parent_name), + Keyword::TABLE => (Some(ShowStatementInParentType::Table), parent_name), + Keyword::VIEW => (Some(ShowStatementInParentType::View), parent_name), + _ => { + return self.expected( + "one of ACCOUNT, DATABASE, SCHEMA, TABLE or VIEW", + self.peek_token(), + ) + } + } + } + None => { + // Parsing MySQL style FROM tbl_name FROM db_name + // which is equivalent to FROM tbl_name.db_name + let mut parent_name = self.parse_object_name(false)?; + if self + .parse_one_of_keywords(&[Keyword::FROM, Keyword::IN]) + .is_some() + { + parent_name.0.insert(0, self.parse_identifier(false)?); + } + (None, Some(parent_name)) + } + }; + + Ok(Some(ShowStatementIn { + clause, + parent_type, + parent_name, + })) + } + + fn maybe_parse_show_stmt_starts_with(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::STARTS, Keyword::WITH]) { + Ok(Some(self.parse_value()?)) + } else { + Ok(None) + } + } + + fn maybe_parse_show_stmt_limit(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::LIMIT) { + Ok(self.parse_limit()?) + } else { + Ok(None) + } + } + + fn maybe_parse_show_stmt_from(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::FROM) { + Ok(Some(self.parse_value()?)) + } else { + Ok(None) + } + } +} diff --git a/src/parser/start.rs b/src/parser/start.rs new file mode 100644 index 000000000..e0081c689 --- /dev/null +++ b/src/parser/start.rs @@ -0,0 +1,74 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_start_transaction(&mut self) -> Result { + self.expect_keyword(Keyword::TRANSACTION)?; + Ok(Statement::StartTransaction { + modes: self.parse_transaction_modes()?, + begin: false, + transaction: Some(BeginTransactionKind::Transaction), + modifier: None, + }) + } + + pub fn parse_begin(&mut self) -> Result { + let modifier = if !self.dialect.supports_start_transaction_modifier() { + None + } else if self.parse_keyword(Keyword::DEFERRED) { + Some(TransactionModifier::Deferred) + } else if self.parse_keyword(Keyword::IMMEDIATE) { + Some(TransactionModifier::Immediate) + } else if self.parse_keyword(Keyword::EXCLUSIVE) { + Some(TransactionModifier::Exclusive) + } else { + None + }; + let transaction = match self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]) { + Some(Keyword::TRANSACTION) => Some(BeginTransactionKind::Transaction), + Some(Keyword::WORK) => Some(BeginTransactionKind::Work), + _ => None, + }; + Ok(Statement::StartTransaction { + modes: self.parse_transaction_modes()?, + begin: true, + transaction, + modifier, + }) + } + + pub fn parse_transaction_modes(&mut self) -> Result, ParserError> { + let mut modes = vec![]; + let mut required = false; + loop { + let mode = if self.parse_keywords(&[Keyword::ISOLATION, Keyword::LEVEL]) { + let iso_level = if self.parse_keywords(&[Keyword::READ, Keyword::UNCOMMITTED]) { + TransactionIsolationLevel::ReadUncommitted + } else if self.parse_keywords(&[Keyword::READ, Keyword::COMMITTED]) { + TransactionIsolationLevel::ReadCommitted + } else if self.parse_keywords(&[Keyword::REPEATABLE, Keyword::READ]) { + TransactionIsolationLevel::RepeatableRead + } else if self.parse_keyword(Keyword::SERIALIZABLE) { + TransactionIsolationLevel::Serializable + } else { + self.expected("isolation level", self.peek_token())? + }; + TransactionMode::IsolationLevel(iso_level) + } else if self.parse_keywords(&[Keyword::READ, Keyword::ONLY]) { + TransactionMode::AccessMode(TransactionAccessMode::ReadOnly) + } else if self.parse_keywords(&[Keyword::READ, Keyword::WRITE]) { + TransactionMode::AccessMode(TransactionAccessMode::ReadWrite) + } else if required { + self.expected("transaction mode", self.peek_token())? + } else { + break; + }; + modes.push(mode); + // ANSI requires a comma after each transaction mode, but + // PostgreSQL, for historical reasons, does not. We follow + // PostgreSQL in making the comma optional, since that is strictly + // more general. + required = self.consume_token(&Token::Comma); + } + Ok(modes) + } +} diff --git a/src/parser/tests.rs b/src/parser/tests.rs new file mode 100644 index 000000000..d7d06f139 --- /dev/null +++ b/src/parser/tests.rs @@ -0,0 +1,695 @@ +use crate::test_utils::{all_dialects, TestedDialects}; + +use super::*; + +#[test] +fn test_prev_index() { + let sql = "SELECT version"; + all_dialects().run_parser_method(sql, |parser| { + assert_eq!(parser.peek_token(), Token::make_keyword("SELECT")); + assert_eq!(parser.next_token(), Token::make_keyword("SELECT")); + parser.prev_token(); + assert_eq!(parser.next_token(), Token::make_keyword("SELECT")); + assert_eq!(parser.next_token(), Token::make_word("version", None)); + parser.prev_token(); + assert_eq!(parser.peek_token(), Token::make_word("version", None)); + assert_eq!(parser.next_token(), Token::make_word("version", None)); + assert_eq!(parser.peek_token(), Token::EOF); + parser.prev_token(); + assert_eq!(parser.next_token(), Token::make_word("version", None)); + assert_eq!(parser.next_token(), Token::EOF); + assert_eq!(parser.next_token(), Token::EOF); + parser.prev_token(); + }); +} + +#[test] +fn test_peek_tokens() { + all_dialects().run_parser_method("SELECT foo AS bar FROM baz", |parser| { + assert!(matches!( + parser.peek_tokens(), + [Token::Word(Word { + keyword: Keyword::SELECT, + .. + })] + )); + + assert!(matches!( + parser.peek_tokens(), + [ + Token::Word(Word { + keyword: Keyword::SELECT, + .. + }), + Token::Word(_), + Token::Word(Word { + keyword: Keyword::AS, + .. + }), + ] + )); + + for _ in 0..4 { + parser.next_token(); + } + + assert!(matches!( + parser.peek_tokens(), + [ + Token::Word(Word { + keyword: Keyword::FROM, + .. + }), + Token::Word(_), + Token::EOF, + Token::EOF, + ] + )) + }) +} + +#[cfg(test)] +mod test_parse_data_type { + use crate::ast::{ + CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, ObjectName, TimezoneInfo, + }; + use crate::dialect::{AnsiDialect, GenericDialect}; + use crate::test_utils::TestedDialects; + + macro_rules! test_parse_data_type { + ($dialect:expr, $input:expr, $expected_type:expr $(,)?) => {{ + $dialect.run_parser_method(&*$input, |parser| { + let data_type = parser.parse_data_type().unwrap(); + assert_eq!($expected_type, data_type); + assert_eq!($input.to_string(), data_type.to_string()); + }); + }}; + } + + #[test] + fn test_ansii_character_string_types() { + // Character string types: + let dialect = + TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); + + test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None)); + + test_parse_data_type!( + dialect, + "CHARACTER(20)", + DataType::Character(Some(CharacterLength::IntegerLength { + length: 20, + unit: None + })) + ); + + test_parse_data_type!( + dialect, + "CHARACTER(20 CHARACTERS)", + DataType::Character(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Characters) + })) + ); + + test_parse_data_type!( + dialect, + "CHARACTER(20 OCTETS)", + DataType::Character(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Octets) + })) + ); + + test_parse_data_type!(dialect, "CHAR", DataType::Char(None)); + + test_parse_data_type!( + dialect, + "CHAR(20)", + DataType::Char(Some(CharacterLength::IntegerLength { + length: 20, + unit: None + })) + ); + + test_parse_data_type!( + dialect, + "CHAR(20 CHARACTERS)", + DataType::Char(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Characters) + })) + ); + + test_parse_data_type!( + dialect, + "CHAR(20 OCTETS)", + DataType::Char(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Octets) + })) + ); + + test_parse_data_type!( + dialect, + "CHARACTER VARYING(20)", + DataType::CharacterVarying(Some(CharacterLength::IntegerLength { + length: 20, + unit: None + })) + ); + + test_parse_data_type!( + dialect, + "CHARACTER VARYING(20 CHARACTERS)", + DataType::CharacterVarying(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Characters) + })) + ); + + test_parse_data_type!( + dialect, + "CHARACTER VARYING(20 OCTETS)", + DataType::CharacterVarying(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Octets) + })) + ); + + test_parse_data_type!( + dialect, + "CHAR VARYING(20)", + DataType::CharVarying(Some(CharacterLength::IntegerLength { + length: 20, + unit: None + })) + ); + + test_parse_data_type!( + dialect, + "CHAR VARYING(20 CHARACTERS)", + DataType::CharVarying(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Characters) + })) + ); + + test_parse_data_type!( + dialect, + "CHAR VARYING(20 OCTETS)", + DataType::CharVarying(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Octets) + })) + ); + + test_parse_data_type!( + dialect, + "VARCHAR(20)", + DataType::Varchar(Some(CharacterLength::IntegerLength { + length: 20, + unit: None + })) + ); + } + + #[test] + fn test_ansii_character_large_object_types() { + // Character large object types: + let dialect = + TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); + + test_parse_data_type!( + dialect, + "CHARACTER LARGE OBJECT", + DataType::CharacterLargeObject(None) + ); + test_parse_data_type!( + dialect, + "CHARACTER LARGE OBJECT(20)", + DataType::CharacterLargeObject(Some(20)) + ); + + test_parse_data_type!( + dialect, + "CHAR LARGE OBJECT", + DataType::CharLargeObject(None) + ); + test_parse_data_type!( + dialect, + "CHAR LARGE OBJECT(20)", + DataType::CharLargeObject(Some(20)) + ); + + test_parse_data_type!(dialect, "CLOB", DataType::Clob(None)); + test_parse_data_type!(dialect, "CLOB(20)", DataType::Clob(Some(20))); + } + + #[test] + fn test_parse_custom_types() { + let dialect = + TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); + + test_parse_data_type!( + dialect, + "GEOMETRY", + DataType::Custom(ObjectName(vec!["GEOMETRY".into()]), vec![]) + ); + + test_parse_data_type!( + dialect, + "GEOMETRY(POINT)", + DataType::Custom( + ObjectName(vec!["GEOMETRY".into()]), + vec!["POINT".to_string()] + ) + ); + + test_parse_data_type!( + dialect, + "GEOMETRY(POINT, 4326)", + DataType::Custom( + ObjectName(vec!["GEOMETRY".into()]), + vec!["POINT".to_string(), "4326".to_string()] + ) + ); + } + + #[test] + fn test_ansii_exact_numeric_types() { + // Exact numeric types: + let dialect = + TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); + + test_parse_data_type!(dialect, "NUMERIC", DataType::Numeric(ExactNumberInfo::None)); + + test_parse_data_type!( + dialect, + "NUMERIC(2)", + DataType::Numeric(ExactNumberInfo::Precision(2)) + ); + + test_parse_data_type!( + dialect, + "NUMERIC(2,10)", + DataType::Numeric(ExactNumberInfo::PrecisionAndScale(2, 10)) + ); + + test_parse_data_type!(dialect, "DECIMAL", DataType::Decimal(ExactNumberInfo::None)); + + test_parse_data_type!( + dialect, + "DECIMAL(2)", + DataType::Decimal(ExactNumberInfo::Precision(2)) + ); + + test_parse_data_type!( + dialect, + "DECIMAL(2,10)", + DataType::Decimal(ExactNumberInfo::PrecisionAndScale(2, 10)) + ); + + test_parse_data_type!(dialect, "DEC", DataType::Dec(ExactNumberInfo::None)); + + test_parse_data_type!( + dialect, + "DEC(2)", + DataType::Dec(ExactNumberInfo::Precision(2)) + ); + + test_parse_data_type!( + dialect, + "DEC(2,10)", + DataType::Dec(ExactNumberInfo::PrecisionAndScale(2, 10)) + ); + } + + #[test] + fn test_ansii_date_type() { + // Datetime types: + let dialect = + TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); + + test_parse_data_type!(dialect, "DATE", DataType::Date); + + test_parse_data_type!(dialect, "TIME", DataType::Time(None, TimezoneInfo::None)); + + test_parse_data_type!( + dialect, + "TIME(6)", + DataType::Time(Some(6), TimezoneInfo::None) + ); + + test_parse_data_type!( + dialect, + "TIME WITH TIME ZONE", + DataType::Time(None, TimezoneInfo::WithTimeZone) + ); + + test_parse_data_type!( + dialect, + "TIME(6) WITH TIME ZONE", + DataType::Time(Some(6), TimezoneInfo::WithTimeZone) + ); + + test_parse_data_type!( + dialect, + "TIME WITHOUT TIME ZONE", + DataType::Time(None, TimezoneInfo::WithoutTimeZone) + ); + + test_parse_data_type!( + dialect, + "TIME(6) WITHOUT TIME ZONE", + DataType::Time(Some(6), TimezoneInfo::WithoutTimeZone) + ); + + test_parse_data_type!( + dialect, + "TIMESTAMP", + DataType::Timestamp(None, TimezoneInfo::None) + ); + + test_parse_data_type!( + dialect, + "TIMESTAMP(22)", + DataType::Timestamp(Some(22), TimezoneInfo::None) + ); + + test_parse_data_type!( + dialect, + "TIMESTAMP(22) WITH TIME ZONE", + DataType::Timestamp(Some(22), TimezoneInfo::WithTimeZone) + ); + + test_parse_data_type!( + dialect, + "TIMESTAMP(33) WITHOUT TIME ZONE", + DataType::Timestamp(Some(33), TimezoneInfo::WithoutTimeZone) + ); + } +} + +#[test] +fn test_parse_schema_name() { + // The expected name should be identical as the input name, that's why I don't receive both + macro_rules! test_parse_schema_name { + ($input:expr, $expected_name:expr $(,)?) => {{ + all_dialects().run_parser_method(&*$input, |parser| { + let schema_name = parser.parse_schema_name().unwrap(); + // Validate that the structure is the same as expected + assert_eq!(schema_name, $expected_name); + // Validate that the input and the expected structure serialization are the same + assert_eq!(schema_name.to_string(), $input.to_string()); + }); + }}; + } + + let dummy_name = ObjectName(vec![Ident::new("dummy_name")]); + let dummy_authorization = Ident::new("dummy_authorization"); + + test_parse_schema_name!( + format!("{dummy_name}"), + SchemaName::Simple(dummy_name.clone()) + ); + + test_parse_schema_name!( + format!("AUTHORIZATION {dummy_authorization}"), + SchemaName::UnnamedAuthorization(dummy_authorization.clone()), + ); + test_parse_schema_name!( + format!("{dummy_name} AUTHORIZATION {dummy_authorization}"), + SchemaName::NamedAuthorization(dummy_name.clone(), dummy_authorization.clone()), + ); +} + +#[test] +fn mysql_parse_index_table_constraint() { + macro_rules! test_parse_table_constraint { + ($dialect:expr, $input:expr, $expected:expr $(,)?) => {{ + $dialect.run_parser_method(&*$input, |parser| { + let constraint = parser.parse_optional_table_constraint().unwrap().unwrap(); + // Validate that the structure is the same as expected + assert_eq!(constraint, $expected); + // Validate that the input and the expected structure serialization are the same + assert_eq!(constraint.to_string(), $input.to_string()); + }); + }}; + } + + let dialect = TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(MySqlDialect {})]); + + test_parse_table_constraint!( + dialect, + "INDEX (c1)", + TableConstraint::Index { + display_as_key: false, + name: None, + index_type: None, + columns: vec![Ident::new("c1")], + } + ); + + test_parse_table_constraint!( + dialect, + "KEY (c1)", + TableConstraint::Index { + display_as_key: true, + name: None, + index_type: None, + columns: vec![Ident::new("c1")], + } + ); + + test_parse_table_constraint!( + dialect, + "INDEX 'index' (c1, c2)", + TableConstraint::Index { + display_as_key: false, + name: Some(Ident::with_quote('\'', "index")), + index_type: None, + columns: vec![Ident::new("c1"), Ident::new("c2")], + } + ); + + test_parse_table_constraint!( + dialect, + "INDEX USING BTREE (c1)", + TableConstraint::Index { + display_as_key: false, + name: None, + index_type: Some(IndexType::BTree), + columns: vec![Ident::new("c1")], + } + ); + + test_parse_table_constraint!( + dialect, + "INDEX USING HASH (c1)", + TableConstraint::Index { + display_as_key: false, + name: None, + index_type: Some(IndexType::Hash), + columns: vec![Ident::new("c1")], + } + ); + + test_parse_table_constraint!( + dialect, + "INDEX idx_name USING BTREE (c1)", + TableConstraint::Index { + display_as_key: false, + name: Some(Ident::new("idx_name")), + index_type: Some(IndexType::BTree), + columns: vec![Ident::new("c1")], + } + ); + + test_parse_table_constraint!( + dialect, + "INDEX idx_name USING HASH (c1)", + TableConstraint::Index { + display_as_key: false, + name: Some(Ident::new("idx_name")), + index_type: Some(IndexType::Hash), + columns: vec![Ident::new("c1")], + } + ); +} + +#[test] +fn test_tokenizer_error_loc() { + let sql = "foo '"; + let ast = Parser::parse_sql(&GenericDialect, sql); + assert_eq!( + ast, + Err(ParserError::TokenizerError( + "Unterminated string literal at Line: 1, Column: 5".to_string() + )) + ); +} + +#[test] +fn test_parser_error_loc() { + let sql = "SELECT this is a syntax error"; + let ast = Parser::parse_sql(&GenericDialect, sql); + assert_eq!( + ast, + Err(ParserError::ParserError( + "Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column: 16" + .to_string() + )) + ); +} + +#[test] +fn test_nested_explain_error() { + let sql = "EXPLAIN EXPLAIN SELECT 1"; + let ast = Parser::parse_sql(&GenericDialect, sql); + assert_eq!( + ast, + Err(ParserError::ParserError( + "Explain must be root of the plan".to_string() + )) + ); +} + +#[test] +fn test_parse_multipart_identifier_positive() { + let dialect = TestedDialects::new(vec![Box::new(GenericDialect {})]); + + // parse multipart with quotes + let expected = vec![ + Ident { + value: "CATALOG".to_string(), + quote_style: None, + span: Span::empty(), + }, + Ident { + value: "F(o)o. \"bar".to_string(), + quote_style: Some('"'), + span: Span::empty(), + }, + Ident { + value: "table".to_string(), + quote_style: None, + span: Span::empty(), + }, + ]; + dialect.run_parser_method(r#"CATALOG."F(o)o. ""bar".table"#, |parser| { + let actual = parser.parse_multipart_identifier().unwrap(); + assert_eq!(expected, actual); + }); + + // allow whitespace between ident parts + let expected = vec![ + Ident { + value: "CATALOG".to_string(), + quote_style: None, + span: Span::empty(), + }, + Ident { + value: "table".to_string(), + quote_style: None, + span: Span::empty(), + }, + ]; + dialect.run_parser_method("CATALOG . table", |parser| { + let actual = parser.parse_multipart_identifier().unwrap(); + assert_eq!(expected, actual); + }); +} + +#[test] +fn test_parse_multipart_identifier_negative() { + macro_rules! test_parse_multipart_identifier_error { + ($input:expr, $expected_err:expr $(,)?) => {{ + all_dialects().run_parser_method(&*$input, |parser| { + let actual_err = parser.parse_multipart_identifier().unwrap_err(); + assert_eq!(actual_err.to_string(), $expected_err); + }); + }}; + } + + test_parse_multipart_identifier_error!( + "", + "sql parser error: Empty input when parsing identifier", + ); + + test_parse_multipart_identifier_error!( + "*schema.table", + "sql parser error: Unexpected token in identifier: *", + ); + + test_parse_multipart_identifier_error!( + "schema.table*", + "sql parser error: Unexpected token in identifier: *", + ); + + test_parse_multipart_identifier_error!( + "schema.table.", + "sql parser error: Trailing period in identifier", + ); + + test_parse_multipart_identifier_error!( + "schema.*", + "sql parser error: Unexpected token following period in identifier: *", + ); +} + +#[test] +fn test_mysql_partition_selection() { + let sql = "SELECT * FROM employees PARTITION (p0, p2)"; + let expected = vec!["p0", "p2"]; + + let ast: Vec = Parser::parse_sql(&MySqlDialect {}, sql).unwrap(); + assert_eq!(ast.len(), 1); + if let Statement::Query(v) = &ast[0] { + if let SetExpr::Select(select) = &*v.body { + assert_eq!(select.from.len(), 1); + let from: &TableWithJoins = &select.from[0]; + let table_factor = &from.relation; + if let TableFactor::Table { partitions, .. } = table_factor { + let actual: Vec<&str> = partitions + .iter() + .map(|ident| ident.value.as_str()) + .collect(); + assert_eq!(expected, actual); + } + } + } else { + panic!("fail to parse mysql partition selection"); + } +} + +#[test] +fn test_replace_into_placeholders() { + let sql = "REPLACE INTO t (a) VALUES (&a)"; + + assert!(Parser::parse_sql(&GenericDialect {}, sql).is_err()); +} + +#[test] +fn test_replace_into_set() { + // NOTE: This is actually valid MySQL syntax, REPLACE and INSERT, + // but the parser does not yet support it. + // https://dev.mysql.com/doc/refman/8.3/en/insert.html + let sql = "REPLACE INTO t SET a='1'"; + + assert!(Parser::parse_sql(&MySqlDialect {}, sql).is_err()); +} + +#[test] +fn test_replace_into_set_placeholder() { + let sql = "REPLACE INTO t SET ?"; + + assert!(Parser::parse_sql(&GenericDialect {}, sql).is_err()); +} + +#[test] +fn test_replace_incomplete() { + let sql = r#"REPLACE"#; + + assert!(Parser::parse_sql(&MySqlDialect {}, sql).is_err()); +} diff --git a/src/parser/tokens.rs b/src/parser/tokens.rs new file mode 100644 index 000000000..21363206e --- /dev/null +++ b/src/parser/tokens.rs @@ -0,0 +1,359 @@ +use super::*; + +use crate::parser_err; + +impl Parser<'_> { + /// Consume the parser and return its underlying token buffer + pub fn into_tokens(self) -> Vec { + self.tokens + } + + /// Get the precedence of the next token + pub fn get_next_precedence(&self) -> Result { + self.dialect.get_next_precedence_default(self) + } + + /// Return the first non-whitespace token that has not yet been processed + /// (or None if reached end-of-file) + pub fn peek_token(&self) -> TokenWithSpan { + self.peek_nth_token(0) + } + + /// Returns the `N` next non-whitespace tokens that have not yet been + /// processed. + /// + /// Example: + /// ```rust + /// # use sqlparser::dialect::GenericDialect; + /// # use sqlparser::parser::Parser; + /// # use sqlparser::keywords::Keyword; + /// # use sqlparser::tokenizer::{Token, Word}; + /// let dialect = GenericDialect {}; + /// let mut parser = Parser::new(&dialect).try_with_sql("ORDER BY foo, bar").unwrap(); + /// + /// // Note that Rust infers the number of tokens to peek based on the + /// // length of the slice pattern! + /// assert!(matches!( + /// parser.peek_tokens(), + /// [ + /// Token::Word(Word { keyword: Keyword::ORDER, .. }), + /// Token::Word(Word { keyword: Keyword::BY, .. }), + /// ] + /// )); + /// ``` + pub fn peek_tokens(&self) -> [Token; N] { + self.peek_tokens_with_location() + .map(|with_loc| with_loc.token) + } + + /// Returns the `N` next non-whitespace tokens with locations that have not + /// yet been processed. + /// + /// See [`Self::peek_token`] for an example. + pub fn peek_tokens_with_location(&self) -> [TokenWithSpan; N] { + let mut index = self.index; + core::array::from_fn(|_| loop { + let token = self.tokens.get(index); + index += 1; + if let Some(TokenWithSpan { + token: Token::Whitespace(_), + span: _, + }) = token + { + continue; + } + break token.cloned().unwrap_or(TokenWithSpan { + token: Token::EOF, + span: Span::empty(), + }); + }) + } + + /// Return nth non-whitespace token that has not yet been processed + pub fn peek_nth_token(&self, mut n: usize) -> TokenWithSpan { + let mut index = self.index; + loop { + index += 1; + match self.tokens.get(index - 1) { + Some(TokenWithSpan { + token: Token::Whitespace(_), + span: _, + }) => continue, + non_whitespace => { + if n == 0 { + return non_whitespace.cloned().unwrap_or(TokenWithSpan { + token: Token::EOF, + span: Span::empty(), + }); + } + n -= 1; + } + } + } + } + + /// Return the first token, possibly whitespace, that has not yet been processed + /// (or None if reached end-of-file). + pub fn peek_token_no_skip(&self) -> TokenWithSpan { + self.peek_nth_token_no_skip(0) + } + + /// Return nth token, possibly whitespace, that has not yet been processed. + pub fn peek_nth_token_no_skip(&self, n: usize) -> TokenWithSpan { + self.tokens + .get(self.index + n) + .cloned() + .unwrap_or(TokenWithSpan { + token: Token::EOF, + span: Span::empty(), + }) + } + + /// Look for all of the expected keywords in sequence, without consuming them + pub fn peek_keywords(&mut self, expected: &[Keyword]) -> bool { + let index = self.index; + let matched = self.parse_keywords(expected); + self.index = index; + matched + } + + /// Return the first non-whitespace token that has not yet been processed + /// (or None if reached end-of-file) and mark it as processed. OK to call + /// repeatedly after reaching EOF. + pub fn next_token(&mut self) -> TokenWithSpan { + loop { + self.index += 1; + match self.tokens.get(self.index - 1) { + Some(TokenWithSpan { + token: Token::Whitespace(_), + span: _, + }) => continue, + token => { + return token + .cloned() + .unwrap_or_else(|| TokenWithSpan::wrap(Token::EOF)) + } + } + } + } + + /// Return the first unprocessed token, possibly whitespace. + pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> { + self.index += 1; + self.tokens.get(self.index - 1) + } + + /// Push back the last one non-whitespace token. Must be called after + /// `next_token()`, otherwise might panic. OK to call after + /// `next_token()` indicates an EOF. + pub fn prev_token(&mut self) { + loop { + assert!(self.index > 0); + self.index -= 1; + if let Some(TokenWithSpan { + token: Token::Whitespace(_), + span: _, + }) = self.tokens.get(self.index) + { + continue; + } + return; + } + } + + /// Report `found` was encountered instead of `expected` + pub fn expected(&self, expected: &str, found: TokenWithSpan) -> Result { + parser_err!( + format!("Expected: {expected}, found: {found}"), + found.span.start + ) + } + + /// If the current token is the `expected` keyword, consume it and returns + /// true. Otherwise, no tokens are consumed and returns false. + #[must_use] + pub fn parse_keyword(&mut self, expected: Keyword) -> bool { + self.parse_keyword_token(expected).is_some() + } + + #[must_use] + pub fn parse_keyword_token(&mut self, expected: Keyword) -> Option { + match self.peek_token().token { + Token::Word(w) if expected == w.keyword => Some(self.next_token()), + _ => None, + } + } + + #[must_use] + pub fn peek_keyword(&mut self, expected: Keyword) -> bool { + matches!(self.peek_token().token, Token::Word(w) if expected == w.keyword) + } + + /// If the current token is the `expected` keyword followed by + /// specified tokens, consume them and returns true. + /// Otherwise, no tokens are consumed and returns false. + /// + /// Note that if the length of `tokens` is too long, this function will + /// not be efficient as it does a loop on the tokens with `peek_nth_token` + /// each time. + pub fn parse_keyword_with_tokens(&mut self, expected: Keyword, tokens: &[Token]) -> bool { + match self.peek_token().token { + Token::Word(w) if expected == w.keyword => { + for (idx, token) in tokens.iter().enumerate() { + if self.peek_nth_token(idx + 1).token != *token { + return false; + } + } + // consume all tokens + for _ in 0..(tokens.len() + 1) { + self.next_token(); + } + true + } + _ => false, + } + } + + /// If the current and subsequent tokens exactly match the `keywords` + /// sequence, consume them and returns true. Otherwise, no tokens are + /// consumed and returns false + #[must_use] + pub fn parse_keywords(&mut self, keywords: &[Keyword]) -> bool { + let index = self.index; + for &keyword in keywords { + if !self.parse_keyword(keyword) { + // println!("parse_keywords aborting .. did not find {:?}", keyword); + // reset index and return immediately + self.index = index; + return false; + } + } + true + } + + /// If the current token is one of the given `keywords`, consume the token + /// and return the keyword that matches. Otherwise, no tokens are consumed + /// and returns [`None`]. + #[must_use] + pub fn parse_one_of_keywords(&mut self, keywords: &[Keyword]) -> Option { + match self.peek_token().token { + Token::Word(w) => { + keywords + .iter() + .find(|keyword| **keyword == w.keyword) + .map(|keyword| { + self.next_token(); + *keyword + }) + } + _ => None, + } + } + + /// If the current token is one of the expected keywords, consume the token + /// and return the keyword that matches. Otherwise, return an error. + pub fn expect_one_of_keywords(&mut self, keywords: &[Keyword]) -> Result { + if let Some(keyword) = self.parse_one_of_keywords(keywords) { + Ok(keyword) + } else { + let keywords: Vec = keywords.iter().map(|x| format!("{x:?}")).collect(); + self.expected( + &format!("one of {}", keywords.join(" or ")), + self.peek_token(), + ) + } + } + + /// If the current token is the `expected` keyword, consume the token. + /// Otherwise, return an error. + pub fn expect_keyword(&mut self, expected: Keyword) -> Result { + if let Some(token) = self.parse_keyword_token(expected) { + Ok(token) + } else { + self.expected(format!("{:?}", &expected).as_str(), self.peek_token()) + } + } + + /// If the current and subsequent tokens exactly match the `keywords` + /// sequence, consume them and returns Ok. Otherwise, return an Error. + pub fn expect_keywords(&mut self, expected: &[Keyword]) -> Result<(), ParserError> { + for &kw in expected { + self.expect_keyword(kw)?; + } + Ok(()) + } + + /// Consume the next token if it matches the expected token, otherwise return false + #[must_use] + pub fn consume_token(&mut self, expected: &Token) -> bool { + if self.peek_token() == *expected { + self.next_token(); + true + } else { + false + } + } + + /// If the current and subsequent tokens exactly match the `tokens` + /// sequence, consume them and returns true. Otherwise, no tokens are + /// consumed and returns false + #[must_use] + pub fn consume_tokens(&mut self, tokens: &[Token]) -> bool { + let index = self.index; + for token in tokens { + if !self.consume_token(token) { + self.index = index; + return false; + } + } + true + } + + /// Bail out if the current token is not an expected keyword, or consume it if it is + pub fn expect_token(&mut self, expected: &Token) -> Result { + if self.peek_token() == *expected { + Ok(self.next_token()) + } else { + self.expected(&expected.to_string(), self.peek_token()) + } + } + + /// Peek at the next token and determine if it is a temporal unit + /// like `second`. + pub fn next_token_is_temporal_unit(&mut self) -> bool { + if let Token::Word(word) = self.peek_token().token { + matches!( + word.keyword, + Keyword::YEAR + | Keyword::MONTH + | Keyword::WEEK + | Keyword::DAY + | Keyword::HOUR + | Keyword::MINUTE + | Keyword::SECOND + | Keyword::CENTURY + | Keyword::DECADE + | Keyword::DOW + | Keyword::DOY + | Keyword::EPOCH + | Keyword::ISODOW + | Keyword::ISOYEAR + | Keyword::JULIAN + | Keyword::MICROSECOND + | Keyword::MICROSECONDS + | Keyword::MILLENIUM + | Keyword::MILLENNIUM + | Keyword::MILLISECOND + | Keyword::MILLISECONDS + | Keyword::NANOSECOND + | Keyword::NANOSECONDS + | Keyword::QUARTER + | Keyword::TIMEZONE + | Keyword::TIMEZONE_HOUR + | Keyword::TIMEZONE_MINUTE + ) + } else { + false + } + } +} diff --git a/src/parser/truncate.rs b/src/parser/truncate.rs new file mode 100644 index 000000000..8eccdaa32 --- /dev/null +++ b/src/parser/truncate.rs @@ -0,0 +1,54 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_truncate(&mut self) -> Result { + let table = self.parse_keyword(Keyword::TABLE); + let only = self.parse_keyword(Keyword::ONLY); + + let table_names = self + .parse_comma_separated(|p| p.parse_object_name(false))? + .into_iter() + .map(|n| TruncateTableTarget { name: n }) + .collect(); + + let mut partitions = None; + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + } + + let mut identity = None; + let mut cascade = None; + + if dialect_of!(self is PostgreSqlDialect | GenericDialect) { + identity = if self.parse_keywords(&[Keyword::RESTART, Keyword::IDENTITY]) { + Some(TruncateIdentityOption::Restart) + } else if self.parse_keywords(&[Keyword::CONTINUE, Keyword::IDENTITY]) { + Some(TruncateIdentityOption::Continue) + } else { + None + }; + + cascade = if self.parse_keyword(Keyword::CASCADE) { + Some(TruncateCascadeOption::Cascade) + } else if self.parse_keyword(Keyword::RESTRICT) { + Some(TruncateCascadeOption::Restrict) + } else { + None + }; + }; + + let on_cluster = self.parse_optional_on_cluster()?; + + Ok(Statement::Truncate { + table_names, + partitions, + table, + only, + identity, + cascade, + on_cluster, + }) + } +} diff --git a/src/parser/uncache.rs b/src/parser/uncache.rs new file mode 100644 index 000000000..bca640a49 --- /dev/null +++ b/src/parser/uncache.rs @@ -0,0 +1,14 @@ +use crate::parser::*; + +impl Parser<'_> { + /// Parse a UNCACHE TABLE statement + pub fn parse_uncache_table(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let table_name = self.parse_object_name(false)?; + Ok(Statement::UNCache { + table_name, + if_exists, + }) + } +} diff --git a/src/parser/unlisten.rs b/src/parser/unlisten.rs new file mode 100644 index 000000000..29e321f6b --- /dev/null +++ b/src/parser/unlisten.rs @@ -0,0 +1,18 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_unlisten(&mut self) -> Result { + let channel = if self.consume_token(&Token::Mul) { + Ident::new(Expr::Wildcard(AttachedToken::empty()).to_string()) + } else { + match self.parse_identifier(false) { + Ok(expr) => expr, + _ => { + self.prev_token(); + return self.expected("wildcard or identifier", self.peek_token()); + } + } + }; + Ok(Statement::UNLISTEN { channel }) + } +} diff --git a/src/parser/unload.rs b/src/parser/unload.rs new file mode 100644 index 000000000..a696404e5 --- /dev/null +++ b/src/parser/unload.rs @@ -0,0 +1,20 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_unload(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + + self.expect_keyword(Keyword::TO)?; + let to = self.parse_identifier(false)?; + + let with_options = self.parse_options(Keyword::WITH)?; + + Ok(Statement::Unload { + query, + to, + with: with_options, + }) + } +} diff --git a/src/parser/update.rs b/src/parser/update.rs new file mode 100644 index 000000000..5ccbcd126 --- /dev/null +++ b/src/parser/update.rs @@ -0,0 +1,35 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_update(&mut self) -> Result { + let or = self.parse_conflict_clause(); + let table = self.parse_table_and_joins()?; + self.expect_keyword(Keyword::SET)?; + let assignments = self.parse_comma_separated(Parser::parse_assignment)?; + let from = if self.parse_keyword(Keyword::FROM) + && dialect_of!(self is GenericDialect | PostgreSqlDialect | DuckDbDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect | SQLiteDialect ) + { + Some(self.parse_table_and_joins()?) + } else { + None + }; + let selection = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + let returning = if self.parse_keyword(Keyword::RETURNING) { + Some(self.parse_comma_separated(Parser::parse_select_item)?) + } else { + None + }; + Ok(Statement::Update { + table, + assignments, + from, + selection, + returning, + or, + }) + } +} diff --git a/src/parser/use.rs b/src/parser/use.rs new file mode 100644 index 000000000..2605adf01 --- /dev/null +++ b/src/parser/use.rs @@ -0,0 +1,77 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_use(&mut self) -> Result { + // Determine which keywords are recognized by the current dialect + let parsed_keyword = if dialect_of!(self is HiveDialect) { + // HiveDialect accepts USE DEFAULT; statement without any db specified + if self.parse_keyword(Keyword::DEFAULT) { + return Ok(Statement::Use(Use::Default)); + } + None // HiveDialect doesn't expect any other specific keyword after `USE` + } else if dialect_of!(self is DatabricksDialect) { + self.parse_one_of_keywords(&[Keyword::CATALOG, Keyword::DATABASE, Keyword::SCHEMA]) + } else if dialect_of!(self is SnowflakeDialect) { + self.parse_one_of_keywords(&[ + Keyword::DATABASE, + Keyword::SCHEMA, + Keyword::WAREHOUSE, + Keyword::ROLE, + Keyword::SECONDARY, + ]) + } else { + None // No specific keywords for other dialects, including GenericDialect + }; + + let result = if matches!(parsed_keyword, Some(Keyword::SECONDARY)) { + self.parse_secondary_roles()? + } else { + let obj_name = self.parse_object_name(false)?; + match parsed_keyword { + Some(Keyword::CATALOG) => Use::Catalog(obj_name), + Some(Keyword::DATABASE) => Use::Database(obj_name), + Some(Keyword::SCHEMA) => Use::Schema(obj_name), + Some(Keyword::WAREHOUSE) => Use::Warehouse(obj_name), + Some(Keyword::ROLE) => Use::Role(obj_name), + _ => Use::Object(obj_name), + } + }; + + Ok(Statement::Use(result)) + } + + fn parse_secondary_roles(&mut self) -> Result { + self.expect_keyword(Keyword::ROLES)?; + if self.parse_keyword(Keyword::NONE) { + Ok(Use::SecondaryRoles(SecondaryRoles::None)) + } else if self.parse_keyword(Keyword::ALL) { + Ok(Use::SecondaryRoles(SecondaryRoles::All)) + } else { + let roles = self.parse_comma_separated(|parser| parser.parse_identifier(false))?; + Ok(Use::SecondaryRoles(SecondaryRoles::List(roles))) + } + } + + /// Parse a `SET ROLE` statement. Expects SET to be consumed already. + pub(crate) fn parse_set_role( + &mut self, + modifier: Option, + ) -> Result { + self.expect_keyword(Keyword::ROLE)?; + let context_modifier = match modifier { + Some(Keyword::LOCAL) => ContextModifier::Local, + Some(Keyword::SESSION) => ContextModifier::Session, + _ => ContextModifier::None, + }; + + let role_name = if self.parse_keyword(Keyword::NONE) { + None + } else { + Some(self.parse_identifier(false)?) + }; + Ok(Statement::SetRole { + context_modifier, + role_name, + }) + } +} diff --git a/src/parser/value.rs b/src/parser/value.rs new file mode 100644 index 000000000..369da917c --- /dev/null +++ b/src/parser/value.rs @@ -0,0 +1,756 @@ +use super::*; + +use crate::parser_err; + +impl Parser<'_> { + /// Parse a literal value (numbers, strings, date/time, booleans) + pub fn parse_value(&mut self) -> Result { + let next_token = self.next_token(); + let span = next_token.span; + match next_token.token { + Token::Word(w) => match w.keyword { + Keyword::TRUE if self.dialect.supports_boolean_literals() => { + Ok(Value::Boolean(true)) + } + Keyword::FALSE if self.dialect.supports_boolean_literals() => { + Ok(Value::Boolean(false)) + } + Keyword::NULL => Ok(Value::Null), + Keyword::NoKeyword if w.quote_style.is_some() => match w.quote_style { + Some('"') => Ok(Value::DoubleQuotedString(w.value)), + Some('\'') => Ok(Value::SingleQuotedString(w.value)), + _ => self.expected( + "A value?", + TokenWithSpan { + token: Token::Word(w), + span, + }, + )?, + }, + _ => self.expected( + "a concrete value", + TokenWithSpan { + token: Token::Word(w), + span, + }, + ), + }, + // The call to n.parse() returns a bigdecimal when the + // bigdecimal feature is enabled, and is otherwise a no-op + // (i.e., it returns the input string). + Token::Number(n, l) => Ok(Value::Number(Self::parse(n, span.start)?, l)), + Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), + Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), + Token::TripleSingleQuotedString(ref s) => { + Ok(Value::TripleSingleQuotedString(s.to_string())) + } + Token::TripleDoubleQuotedString(ref s) => { + Ok(Value::TripleDoubleQuotedString(s.to_string())) + } + Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone())), + Token::SingleQuotedByteStringLiteral(ref s) => { + Ok(Value::SingleQuotedByteStringLiteral(s.clone())) + } + Token::DoubleQuotedByteStringLiteral(ref s) => { + Ok(Value::DoubleQuotedByteStringLiteral(s.clone())) + } + Token::TripleSingleQuotedByteStringLiteral(ref s) => { + Ok(Value::TripleSingleQuotedByteStringLiteral(s.clone())) + } + Token::TripleDoubleQuotedByteStringLiteral(ref s) => { + Ok(Value::TripleDoubleQuotedByteStringLiteral(s.clone())) + } + Token::SingleQuotedRawStringLiteral(ref s) => { + Ok(Value::SingleQuotedRawStringLiteral(s.clone())) + } + Token::DoubleQuotedRawStringLiteral(ref s) => { + Ok(Value::DoubleQuotedRawStringLiteral(s.clone())) + } + Token::TripleSingleQuotedRawStringLiteral(ref s) => { + Ok(Value::TripleSingleQuotedRawStringLiteral(s.clone())) + } + Token::TripleDoubleQuotedRawStringLiteral(ref s) => { + Ok(Value::TripleDoubleQuotedRawStringLiteral(s.clone())) + } + Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), + Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())), + Token::UnicodeStringLiteral(ref s) => Ok(Value::UnicodeStringLiteral(s.to_string())), + Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), + Token::Placeholder(ref s) => Ok(Value::Placeholder(s.to_string())), + tok @ Token::Colon | tok @ Token::AtSign => { + // Not calling self.parse_identifier(false)? because only in placeholder we want to check numbers as idfentifies + // This because snowflake allows numbers as placeholders + let next_token = self.next_token(); + let ident = match next_token.token { + Token::Word(w) => Ok(w.to_ident(next_token.span)), + Token::Number(w, false) => Ok(Ident::new(w)), + _ => self.expected("placeholder", next_token), + }?; + let placeholder = tok.to_string() + &ident.value; + Ok(Value::Placeholder(placeholder)) + } + unexpected => self.expected( + "a value", + TokenWithSpan { + token: unexpected, + span, + }, + ), + } + } + + pub fn parse_values(&mut self, allow_empty: bool) -> Result { + let mut explicit_row = false; + + let rows = self.parse_comma_separated(|parser| { + if parser.parse_keyword(Keyword::ROW) { + explicit_row = true; + } + + parser.expect_token(&Token::LParen)?; + if allow_empty && parser.peek_token().token == Token::RParen { + parser.next_token(); + Ok(vec![]) + } else { + let exprs = parser.parse_comma_separated(Parser::parse_expr)?; + parser.expect_token(&Token::RParen)?; + Ok(exprs) + } + })?; + Ok(Values { explicit_row, rows }) + } + + /// Parse an unsigned numeric literal + pub fn parse_number_value(&mut self) -> Result { + match self.parse_value()? { + v @ Value::Number(_, _) => Ok(v), + v @ Value::Placeholder(_) => Ok(v), + _ => { + self.prev_token(); + self.expected("literal number", self.peek_token()) + } + } + } + + /// Parse a numeric literal as an expression. Returns a [`Expr::UnaryOp`] if the number is signed, + /// otherwise returns a [`Expr::Value`] + pub fn parse_number(&mut self) -> Result { + let next_token = self.next_token(); + match next_token.token { + Token::Plus => Ok(Expr::UnaryOp { + op: UnaryOperator::Plus, + expr: Box::new(Expr::Value(self.parse_number_value()?)), + }), + Token::Minus => Ok(Expr::UnaryOp { + op: UnaryOperator::Minus, + expr: Box::new(Expr::Value(self.parse_number_value()?)), + }), + _ => { + self.prev_token(); + Ok(Expr::Value(self.parse_number_value()?)) + } + } + } + + pub(crate) fn parse_introduced_string_value(&mut self) -> Result { + let next_token = self.next_token(); + let span = next_token.span; + match next_token.token { + Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), + Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), + Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), + unexpected => self.expected( + "a string value", + TokenWithSpan { + token: unexpected, + span, + }, + ), + } + } + + /// Parse an unsigned literal integer/long + pub fn parse_literal_uint(&mut self) -> Result { + let next_token = self.next_token(); + match next_token.token { + Token::Number(s, _) => Self::parse::(s, next_token.span.start), + _ => self.expected("literal int", next_token), + } + } + + /// Parse a literal string + pub fn parse_literal_string(&mut self) -> Result { + let next_token = self.next_token(); + match next_token.token { + Token::Word(Word { + value, + keyword: Keyword::NoKeyword, + .. + }) => Ok(value), + Token::SingleQuotedString(s) => Ok(s), + Token::DoubleQuotedString(s) => Ok(s), + Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Ok(s) + } + Token::UnicodeStringLiteral(s) => Ok(s), + _ => self.expected("literal string", next_token), + } + } + + pub fn parse_enum_values(&mut self) -> Result, ParserError> { + self.expect_token(&Token::LParen)?; + let values = self.parse_comma_separated(|parser| { + let name = parser.parse_literal_string()?; + let e = if parser.consume_token(&Token::Eq) { + let value = parser.parse_number()?; + EnumMember::NamedValue(name, value) + } else { + EnumMember::Name(name) + }; + Ok(e) + })?; + self.expect_token(&Token::RParen)?; + + Ok(values) + } + + /// Parse datetime64 [1] + /// Syntax + /// ```sql + /// DateTime64(precision[, timezone]) + /// ``` + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64 + pub fn parse_datetime_64(&mut self) -> Result<(u64, Option), ParserError> { + self.expect_keyword(Keyword::DATETIME64)?; + self.expect_token(&Token::LParen)?; + let precision = self.parse_literal_uint()?; + let time_zone = if self.consume_token(&Token::Comma) { + Some(self.parse_literal_string()?) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok((precision, time_zone)) + } + + /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) + pub fn parse_data_type(&mut self) -> Result { + let (ty, trailing_bracket) = self.parse_data_type_helper()?; + if trailing_bracket.0 { + return parser_err!( + format!("unmatched > after parsing data type {ty}"), + self.peek_token() + ); + } + + Ok(ty) + } + + pub(crate) fn parse_data_type_helper( + &mut self, + ) -> Result<(DataType, MatchedTrailingBracket), ParserError> { + let next_token = self.next_token(); + let mut trailing_bracket: MatchedTrailingBracket = false.into(); + let mut data = match next_token.token { + Token::Word(w) => match w.keyword { + Keyword::BOOLEAN => Ok(DataType::Boolean), + Keyword::BOOL => Ok(DataType::Bool), + Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)), + Keyword::REAL => Ok(DataType::Real), + Keyword::FLOAT4 => Ok(DataType::Float4), + Keyword::FLOAT32 => Ok(DataType::Float32), + Keyword::FLOAT64 => Ok(DataType::Float64), + Keyword::FLOAT8 => Ok(DataType::Float8), + Keyword::DOUBLE => { + if self.parse_keyword(Keyword::PRECISION) { + Ok(DataType::DoublePrecision) + } else { + Ok(DataType::Double) + } + } + Keyword::TINYINT => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedTinyInt(optional_precision?)) + } else { + Ok(DataType::TinyInt(optional_precision?)) + } + } + Keyword::INT2 => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt2(optional_precision?)) + } else { + Ok(DataType::Int2(optional_precision?)) + } + } + Keyword::SMALLINT => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedSmallInt(optional_precision?)) + } else { + Ok(DataType::SmallInt(optional_precision?)) + } + } + Keyword::MEDIUMINT => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedMediumInt(optional_precision?)) + } else { + Ok(DataType::MediumInt(optional_precision?)) + } + } + Keyword::INT => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt(optional_precision?)) + } else { + Ok(DataType::Int(optional_precision?)) + } + } + Keyword::INT4 => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt4(optional_precision?)) + } else { + Ok(DataType::Int4(optional_precision?)) + } + } + Keyword::INT8 => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt8(optional_precision?)) + } else { + Ok(DataType::Int8(optional_precision?)) + } + } + Keyword::INT16 => Ok(DataType::Int16), + Keyword::INT32 => Ok(DataType::Int32), + Keyword::INT64 => Ok(DataType::Int64), + Keyword::INT128 => Ok(DataType::Int128), + Keyword::INT256 => Ok(DataType::Int256), + Keyword::INTEGER => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInteger(optional_precision?)) + } else { + Ok(DataType::Integer(optional_precision?)) + } + } + Keyword::BIGINT => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedBigInt(optional_precision?)) + } else { + Ok(DataType::BigInt(optional_precision?)) + } + } + Keyword::UINT8 => Ok(DataType::UInt8), + Keyword::UINT16 => Ok(DataType::UInt16), + Keyword::UINT32 => Ok(DataType::UInt32), + Keyword::UINT64 => Ok(DataType::UInt64), + Keyword::UINT128 => Ok(DataType::UInt128), + Keyword::UINT256 => Ok(DataType::UInt256), + Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)), + Keyword::NVARCHAR => { + Ok(DataType::Nvarchar(self.parse_optional_character_length()?)) + } + Keyword::CHARACTER => { + if self.parse_keyword(Keyword::VARYING) { + Ok(DataType::CharacterVarying( + self.parse_optional_character_length()?, + )) + } else if self.parse_keywords(&[Keyword::LARGE, Keyword::OBJECT]) { + Ok(DataType::CharacterLargeObject( + self.parse_optional_precision()?, + )) + } else { + Ok(DataType::Character(self.parse_optional_character_length()?)) + } + } + Keyword::CHAR => { + if self.parse_keyword(Keyword::VARYING) { + Ok(DataType::CharVarying( + self.parse_optional_character_length()?, + )) + } else if self.parse_keywords(&[Keyword::LARGE, Keyword::OBJECT]) { + Ok(DataType::CharLargeObject(self.parse_optional_precision()?)) + } else { + Ok(DataType::Char(self.parse_optional_character_length()?)) + } + } + Keyword::CLOB => Ok(DataType::Clob(self.parse_optional_precision()?)), + Keyword::BINARY => Ok(DataType::Binary(self.parse_optional_precision()?)), + Keyword::VARBINARY => Ok(DataType::Varbinary(self.parse_optional_precision()?)), + Keyword::BLOB => Ok(DataType::Blob(self.parse_optional_precision()?)), + Keyword::TINYBLOB => Ok(DataType::TinyBlob), + Keyword::MEDIUMBLOB => Ok(DataType::MediumBlob), + Keyword::LONGBLOB => Ok(DataType::LongBlob), + Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)), + Keyword::BIT => { + if self.parse_keyword(Keyword::VARYING) { + Ok(DataType::BitVarying(self.parse_optional_precision()?)) + } else { + Ok(DataType::Bit(self.parse_optional_precision()?)) + } + } + Keyword::UUID => Ok(DataType::Uuid), + Keyword::DATE => Ok(DataType::Date), + Keyword::DATE32 => Ok(DataType::Date32), + Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)), + Keyword::DATETIME64 => { + self.prev_token(); + let (precision, time_zone) = self.parse_datetime_64()?; + Ok(DataType::Datetime64(precision, time_zone)) + } + Keyword::TIMESTAMP => { + let precision = self.parse_optional_precision()?; + let tz = if self.parse_keyword(Keyword::WITH) { + self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; + TimezoneInfo::WithTimeZone + } else if self.parse_keyword(Keyword::WITHOUT) { + self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; + TimezoneInfo::WithoutTimeZone + } else { + TimezoneInfo::None + }; + Ok(DataType::Timestamp(precision, tz)) + } + Keyword::TIMESTAMPTZ => Ok(DataType::Timestamp( + self.parse_optional_precision()?, + TimezoneInfo::Tz, + )), + Keyword::TIME => { + let precision = self.parse_optional_precision()?; + let tz = if self.parse_keyword(Keyword::WITH) { + self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; + TimezoneInfo::WithTimeZone + } else if self.parse_keyword(Keyword::WITHOUT) { + self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; + TimezoneInfo::WithoutTimeZone + } else { + TimezoneInfo::None + }; + Ok(DataType::Time(precision, tz)) + } + Keyword::TIMETZ => Ok(DataType::Time( + self.parse_optional_precision()?, + TimezoneInfo::Tz, + )), + // Interval types can be followed by a complicated interval + // qualifier that we don't currently support. See + // parse_interval for a taste. + Keyword::INTERVAL => Ok(DataType::Interval), + Keyword::JSON => Ok(DataType::JSON), + Keyword::JSONB => Ok(DataType::JSONB), + Keyword::REGCLASS => Ok(DataType::Regclass), + Keyword::STRING => Ok(DataType::String(self.parse_optional_precision()?)), + Keyword::FIXEDSTRING => { + self.expect_token(&Token::LParen)?; + let character_length = self.parse_literal_uint()?; + self.expect_token(&Token::RParen)?; + Ok(DataType::FixedString(character_length)) + } + Keyword::TEXT => Ok(DataType::Text), + Keyword::TINYTEXT => Ok(DataType::TinyText), + Keyword::MEDIUMTEXT => Ok(DataType::MediumText), + Keyword::LONGTEXT => Ok(DataType::LongText), + Keyword::BYTEA => Ok(DataType::Bytea), + Keyword::NUMERIC => Ok(DataType::Numeric( + self.parse_exact_number_optional_precision_scale()?, + )), + Keyword::DECIMAL => Ok(DataType::Decimal( + self.parse_exact_number_optional_precision_scale()?, + )), + Keyword::DEC => Ok(DataType::Dec( + self.parse_exact_number_optional_precision_scale()?, + )), + Keyword::BIGNUMERIC => Ok(DataType::BigNumeric( + self.parse_exact_number_optional_precision_scale()?, + )), + Keyword::BIGDECIMAL => Ok(DataType::BigDecimal( + self.parse_exact_number_optional_precision_scale()?, + )), + Keyword::ENUM => Ok(DataType::Enum(self.parse_enum_values()?, None)), + Keyword::ENUM8 => Ok(DataType::Enum(self.parse_enum_values()?, Some(8))), + Keyword::ENUM16 => Ok(DataType::Enum(self.parse_enum_values()?, Some(16))), + Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)), + Keyword::ARRAY => { + if dialect_of!(self is SnowflakeDialect) { + Ok(DataType::Array(ArrayElemTypeDef::None)) + } else if dialect_of!(self is ClickHouseDialect) { + Ok(self.parse_sub_type(|internal_type| { + DataType::Array(ArrayElemTypeDef::Parenthesis(internal_type)) + })?) + } else { + self.expect_token(&Token::Lt)?; + let (inside_type, _trailing_bracket) = self.parse_data_type_helper()?; + trailing_bracket = self.expect_closing_angle_bracket(_trailing_bracket)?; + Ok(DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + inside_type, + )))) + } + } + Keyword::STRUCT if dialect_of!(self is DuckDbDialect) => { + self.prev_token(); + let field_defs = self.parse_duckdb_struct_type_def()?; + Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses)) + } + Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { + self.prev_token(); + let (field_defs, _trailing_bracket) = + self.parse_struct_type_def(Self::parse_struct_field_def)?; + trailing_bracket = _trailing_bracket; + Ok(DataType::Struct( + field_defs, + StructBracketKind::AngleBrackets, + )) + } + Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => { + self.prev_token(); + let fields = self.parse_union_type_def()?; + Ok(DataType::Union(fields)) + } + Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Ok(self.parse_sub_type(DataType::Nullable)?) + } + Keyword::LOWCARDINALITY if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Ok(self.parse_sub_type(DataType::LowCardinality)?) + } + Keyword::MAP if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.prev_token(); + let (key_data_type, value_data_type) = self.parse_click_house_map_def()?; + Ok(DataType::Map( + Box::new(key_data_type), + Box::new(value_data_type), + )) + } + Keyword::NESTED if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.expect_token(&Token::LParen)?; + let field_defs = self.parse_comma_separated(Parser::parse_column_def)?; + self.expect_token(&Token::RParen)?; + Ok(DataType::Nested(field_defs)) + } + Keyword::TUPLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.prev_token(); + let field_defs = self.parse_click_house_tuple_def()?; + Ok(DataType::Tuple(field_defs)) + } + Keyword::TRIGGER => Ok(DataType::Trigger), + _ => { + self.prev_token(); + let type_name = self.parse_object_name(false)?; + if let Some(modifiers) = self.parse_optional_type_modifiers()? { + Ok(DataType::Custom(type_name, modifiers)) + } else { + Ok(DataType::Custom(type_name, vec![])) + } + } + }, + _ => self.expected("a data type name", next_token), + }?; + + // Parse array data types. Note: this is postgresql-specific and different from + // Keyword::ARRAY syntax from above + while self.consume_token(&Token::LBracket) { + let size = if dialect_of!(self is GenericDialect | DuckDbDialect | PostgreSqlDialect) { + self.maybe_parse(|p| p.parse_literal_uint())? + } else { + None + }; + self.expect_token(&Token::RBracket)?; + data = DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(data), size)) + } + Ok((data, trailing_bracket)) + } + + pub fn parse_exact_number_optional_precision_scale( + &mut self, + ) -> Result { + if self.consume_token(&Token::LParen) { + let precision = self.parse_literal_uint()?; + let scale = if self.consume_token(&Token::Comma) { + Some(self.parse_literal_uint()?) + } else { + None + }; + + self.expect_token(&Token::RParen)?; + + match scale { + None => Ok(ExactNumberInfo::Precision(precision)), + Some(scale) => Ok(ExactNumberInfo::PrecisionAndScale(precision, scale)), + } + } else { + Ok(ExactNumberInfo::None) + } + } + + pub fn parse_optional_character_length( + &mut self, + ) -> Result, ParserError> { + if self.consume_token(&Token::LParen) { + let character_length = self.parse_character_length()?; + self.expect_token(&Token::RParen)?; + Ok(Some(character_length)) + } else { + Ok(None) + } + } + + pub fn parse_optional_precision(&mut self) -> Result, ParserError> { + if self.consume_token(&Token::LParen) { + let n = self.parse_literal_uint()?; + self.expect_token(&Token::RParen)?; + Ok(Some(n)) + } else { + Ok(None) + } + } + + pub fn parse_optional_type_modifiers(&mut self) -> Result>, ParserError> { + if self.consume_token(&Token::LParen) { + let mut modifiers = Vec::new(); + loop { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => modifiers.push(w.to_string()), + Token::Number(n, _) => modifiers.push(n), + Token::SingleQuotedString(s) => modifiers.push(s), + + Token::Comma => { + continue; + } + Token::RParen => { + break; + } + _ => self.expected("type modifiers", next_token)?, + } + } + + Ok(Some(modifiers)) + } else { + Ok(None) + } + } + + pub fn parse_string_values(&mut self) -> Result, ParserError> { + self.expect_token(&Token::LParen)?; + let mut values = Vec::new(); + loop { + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(value) => values.push(value), + _ => self.expected("a string", next_token)?, + } + let next_token = self.next_token(); + match next_token.token { + Token::Comma => (), + Token::RParen => break, + _ => self.expected(", or }", next_token)?, + } + } + Ok(values) + } + + /// Parse a field definition in a [struct] or [tuple]. + /// Syntax: + /// + /// ```sql + /// [field_name] field_type + /// ``` + /// + /// [struct]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type + /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + pub(crate) fn parse_struct_field_def( + &mut self, + ) -> Result<(StructField, MatchedTrailingBracket), ParserError> { + // Look beyond the next item to infer whether both field name + // and type are specified. + let is_anonymous_field = !matches!( + (self.peek_nth_token(0).token, self.peek_nth_token(1).token), + (Token::Word(_), Token::Word(_)) + ); + + let field_name = if is_anonymous_field { + None + } else { + Some(self.parse_identifier(false)?) + }; + + let (field_type, trailing_bracket) = self.parse_data_type_helper()?; + + Ok(( + StructField { + field_name, + field_type, + }, + trailing_bracket, + )) + } + + /// Parse a parenthesized sub data type + fn parse_sub_type(&mut self, parent_type: F) -> Result + where + F: FnOnce(Box) -> DataType, + { + self.expect_token(&Token::LParen)?; + let inside_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + Ok(parent_type(inside_type.into())) + } + + /// For nested types that use the angle bracket syntax, this matches either + /// `>`, `>>` or nothing depending on which variant is expected (specified by the previously + /// matched `trailing_bracket` argument). It returns whether there is a trailing + /// left to be matched - (i.e. if '>>' was matched). + pub(crate) fn expect_closing_angle_bracket( + &mut self, + trailing_bracket: MatchedTrailingBracket, + ) -> Result { + let trailing_bracket = if !trailing_bracket.0 { + match self.peek_token().token { + Token::Gt => { + self.next_token(); + false.into() + } + Token::ShiftRight => { + self.next_token(); + true.into() + } + _ => return self.expected(">", self.peek_token()), + } + } else { + false.into() + }; + + Ok(trailing_bracket) + } + + /// DuckDB specific: Parse a Union type definition as a sequence of field-value pairs. + /// + /// Syntax: + /// + /// ```sql + /// UNION(field_name field_type[,...]) + /// ``` + /// + /// [1]: https://duckdb.org/docs/sql/data_types/union.html + fn parse_union_type_def(&mut self) -> Result, ParserError> { + self.expect_keyword(Keyword::UNION)?; + + self.expect_token(&Token::LParen)?; + + let fields = self.parse_comma_separated(|p| { + Ok(UnionField { + field_name: p.parse_identifier(false)?, + field_type: p.parse_data_type()?, + }) + })?; + + self.expect_token(&Token::RParen)?; + + Ok(fields) + } +} diff --git a/src/parser/window.rs b/src/parser/window.rs new file mode 100644 index 000000000..9f6130b15 --- /dev/null +++ b/src/parser/window.rs @@ -0,0 +1,105 @@ +use crate::parser::*; + +impl Parser<'_> { + pub fn parse_named_window(&mut self) -> Result { + let ident = self.parse_identifier(false)?; + self.expect_keyword(Keyword::AS)?; + + let window_expr = if self.consume_token(&Token::LParen) { + NamedWindowExpr::WindowSpec(self.parse_window_spec()?) + } else if self.dialect.supports_window_clause_named_window_reference() { + NamedWindowExpr::NamedWindow(self.parse_identifier(false)?) + } else { + return self.expected("(", self.peek_token()); + }; + + Ok(NamedWindowDefinition(ident, window_expr)) + } + + pub fn parse_window_spec(&mut self) -> Result { + let window_name = match self.peek_token().token { + Token::Word(word) if word.keyword == Keyword::NoKeyword => { + self.parse_optional_indent()? + } + _ => None, + }; + + let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + + let window_frame = if !self.consume_token(&Token::RParen) { + let window_frame = self.parse_window_frame()?; + self.expect_token(&Token::RParen)?; + Some(window_frame) + } else { + None + }; + Ok(WindowSpec { + window_name, + partition_by, + order_by, + window_frame, + }) + } + + pub fn parse_window_frame(&mut self) -> Result { + let units = self.parse_window_frame_units()?; + let (start_bound, end_bound) = if self.parse_keyword(Keyword::BETWEEN) { + let start_bound = self.parse_window_frame_bound()?; + self.expect_keyword(Keyword::AND)?; + let end_bound = Some(self.parse_window_frame_bound()?); + (start_bound, end_bound) + } else { + (self.parse_window_frame_bound()?, None) + }; + Ok(WindowFrame { + units, + start_bound, + end_bound, + }) + } + + pub fn parse_window_frame_units(&mut self) -> Result { + let next_token = self.next_token(); + match &next_token.token { + Token::Word(w) => match w.keyword { + Keyword::ROWS => Ok(WindowFrameUnits::Rows), + Keyword::RANGE => Ok(WindowFrameUnits::Range), + Keyword::GROUPS => Ok(WindowFrameUnits::Groups), + _ => self.expected("ROWS, RANGE, GROUPS", next_token)?, + }, + _ => self.expected("ROWS, RANGE, GROUPS", next_token), + } + } + + /// Parse `CURRENT ROW` or `{ | UNBOUNDED } { PRECEDING | FOLLOWING }` + pub fn parse_window_frame_bound(&mut self) -> Result { + if self.parse_keywords(&[Keyword::CURRENT, Keyword::ROW]) { + Ok(WindowFrameBound::CurrentRow) + } else { + let rows = if self.parse_keyword(Keyword::UNBOUNDED) { + None + } else { + Some(Box::new(match self.peek_token().token { + Token::SingleQuotedString(_) => self.parse_interval()?, + _ => self.parse_expr()?, + })) + }; + if self.parse_keyword(Keyword::PRECEDING) { + Ok(WindowFrameBound::Preceding(rows)) + } else if self.parse_keyword(Keyword::FOLLOWING) { + Ok(WindowFrameBound::Following(rows)) + } else { + self.expected("PRECEDING or FOLLOWING", self.peek_token()) + } + } + } +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs index aacfc16fa..cf4bb819f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -40,7 +40,7 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::ast::DollarQuotedString; +use crate::ast::{DollarQuotedString, Ident}; use crate::dialect::Dialect; use crate::dialect::{ BigQueryDialect, DuckDbDialect, GenericDialect, MySqlDialect, PostgreSqlDialect, @@ -388,6 +388,14 @@ impl fmt::Display for Word { } impl Word { + pub fn to_ident(&self, span: Span) -> Ident { + Ident { + value: self.value.clone(), + quote_style: self.quote_style, + span, + } + } + fn matching_end_quote(ch: char) -> char { match ch { '"' => '"', // ANSI and most dialects