Skip to content

Commit

Permalink
Parse SETTINGS clause for ClickHouse table-valued functions (apache#1358
Browse files Browse the repository at this point in the history
)
  • Loading branch information
Jesse-Bakker authored Aug 1, 2024
1 parent a692ba5 commit d49acc6
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 34 deletions.
4 changes: 2 additions & 2 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ pub use self::query::{
OffsetRows, OrderBy, OrderByExpr, PivotValueSource, Query, RenameSelectItem,
RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select,
SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table,
TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode,
Values, WildcardAdditionalOptions, With, WithFill,
TableAlias, TableFactor, TableFunctionArgs, TableVersion, TableWithJoins, Top, TopQuantity,
ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill,
};
pub use self::value::{
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
Expand Down
25 changes: 23 additions & 2 deletions src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,19 @@ impl fmt::Display for ExprWithAlias {
}
}

/// Arguments to a table-valued function
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct TableFunctionArgs {
pub args: Vec<FunctionArg>,
/// ClickHouse-specific SETTINGS clause.
/// For example,
/// `SELECT * FROM executable('generate_random.py', TabSeparated, 'id UInt32, random String', SETTINGS send_chunk_header = false, pool_size = 16)`
/// [`executable` table function](https://clickhouse.com/docs/en/engines/table-functions/executable)
pub settings: Option<Vec<Setting>>,
}

/// A table name or a parenthesized subquery with an optional alias
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
Expand All @@ -916,7 +929,7 @@ pub enum TableFactor {
/// This field's value is `Some(v)`, where `v` is a (possibly empty)
/// vector of arguments, in the case of a table-valued function call,
/// whereas it's `None` in the case of a regular table name.
args: Option<Vec<FunctionArg>>,
args: Option<TableFunctionArgs>,
/// MSSQL-specific `WITH (...)` hints such as NOLOCK.
with_hints: Vec<Expr>,
/// Optional version qualifier to facilitate table time-travel, as
Expand Down Expand Up @@ -1314,7 +1327,15 @@ impl fmt::Display for TableFactor {
write!(f, "PARTITION ({})", display_comma_separated(partitions))?;
}
if let Some(args) = args {
write!(f, "({})", display_comma_separated(args))?;
write!(f, "(")?;
write!(f, "{}", display_comma_separated(&args.args))?;
if let Some(ref settings) = args.settings {
if !args.args.is_empty() {
write!(f, ", ")?;
}
write!(f, "SETTINGS {}", display_comma_separated(settings))?;
}
write!(f, ")")?;
}
if *with_ordinality {
write!(f, " WITH ORDINALITY")?;
Expand Down
95 changes: 65 additions & 30 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3430,6 +3430,29 @@ impl<'a> Parser<'a> {
Ok(values)
}

/// Parse the comma of a comma-separated syntax element.
/// Returns true if there is a next element
fn is_parse_comma_separated_end(&mut self) -> bool {
if !self.consume_token(&Token::Comma) {
true
} else if self.options.trailing_commas {
let token = self.peek_token().token;
match token {
Token::Word(ref kw)
if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) =>
{
true
}
Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => {
true
}
_ => false,
}
} else {
false
}
}

/// Parse a comma-separated list of 1+ items accepted by `F`
pub fn parse_comma_separated<T, F>(&mut self, mut f: F) -> Result<Vec<T>, ParserError>
where
Expand All @@ -3438,22 +3461,8 @@ impl<'a> Parser<'a> {
let mut values = vec![];
loop {
values.push(f(self)?);
if !self.consume_token(&Token::Comma) {
if self.is_parse_comma_separated_end() {
break;
} else if self.options.trailing_commas {
match self.peek_token().token {
Token::Word(kw)
if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) =>
{
break;
}
Token::RParen
| Token::SemiColon
| Token::EOF
| Token::RBracket
| Token::RBrace => break,
_ => continue,
}
}
}
Ok(values)
Expand Down Expand Up @@ -8104,19 +8113,7 @@ impl<'a> Parser<'a> {
vec![]
};

let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect)
&& self.parse_keyword(Keyword::SETTINGS)
{
let key_values = self.parse_comma_separated(|p| {
let key = p.parse_identifier(false)?;
p.expect_token(&Token::Eq)?;
let value = p.parse_value()?;
Ok(Setting { key, value })
})?;
Some(key_values)
} else {
None
};
let settings = self.parse_settings()?;

let fetch = if self.parse_keyword(Keyword::FETCH) {
Some(self.parse_fetch()?)
Expand Down Expand Up @@ -8163,6 +8160,23 @@ impl<'a> Parser<'a> {
}
}

fn parse_settings(&mut self) -> Result<Option<Vec<Setting>>, ParserError> {
let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect)
&& self.parse_keyword(Keyword::SETTINGS)
{
let key_values = self.parse_comma_separated(|p| {
let key = p.parse_identifier(false)?;
p.expect_token(&Token::Eq)?;
let value = p.parse_value()?;
Ok(Setting { key, value })
})?;
Some(key_values)
} else {
None
};
Ok(settings)
}

/// Parse a mssql `FOR [XML | JSON | BROWSE]` clause
pub fn parse_for_clause(&mut self) -> Result<Option<ForClause>, ParserError> {
if self.parse_keyword(Keyword::XML) {
Expand Down Expand Up @@ -9382,9 +9396,9 @@ impl<'a> Parser<'a> {
// Parse potential version qualifier
let version = self.parse_table_version()?;

// Postgres, MSSQL: table-valued functions:
// Postgres, MSSQL, ClickHouse: table-valued functions:
let args = if self.consume_token(&Token::LParen) {
Some(self.parse_optional_args()?)
Some(self.parse_table_function_args()?)
} else {
None
};
Expand Down Expand Up @@ -10327,6 +10341,27 @@ impl<'a> Parser<'a> {
}
}

fn parse_table_function_args(&mut self) -> Result<TableFunctionArgs, ParserError> {
if self.consume_token(&Token::RParen) {
return Ok(TableFunctionArgs {
args: vec![],
settings: None,
});
}
let mut args = vec![];
let settings = loop {
if let Some(settings) = self.parse_settings()? {
break Some(settings);
}
args.push(self.parse_function_args()?);
if self.is_parse_comma_separated_end() {
break None;
}
};
self.expect_token(&Token::RParen)?;
Ok(TableFunctionArgs { args, settings })
}

/// Parses a potentially empty list of arguments to a window function
/// (including the closing parenthesis).
///
Expand Down
77 changes: 77 additions & 0 deletions tests/sqlparser_clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1151,6 +1151,83 @@ fn parse_create_table_on_commit_and_as_query() {
}
}

#[test]
fn parse_select_table_function_settings() {
fn check_settings(sql: &str, expected: &TableFunctionArgs) {
match clickhouse_and_generic().verified_stmt(sql) {
Statement::Query(q) => {
let from = &q.body.as_select().unwrap().from;
assert_eq!(from.len(), 1);
assert_eq!(from[0].joins, vec![]);
match &from[0].relation {
Table { args, .. } => {
let args = args.as_ref().unwrap();
assert_eq!(args, expected);
}
_ => unreachable!(),
}
}
_ => unreachable!(),
}
}
check_settings(
"SELECT * FROM table_function(arg, SETTINGS s0 = 3, s1 = 's')",
&TableFunctionArgs {
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(
Expr::Identifier("arg".into()),
))],

settings: Some(vec![
Setting {
key: "s0".into(),
value: Value::Number("3".parse().unwrap(), false),
},
Setting {
key: "s1".into(),
value: Value::SingleQuotedString("s".into()),
},
]),
},
);
check_settings(
r#"SELECT * FROM table_function(arg)"#,
&TableFunctionArgs {
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(
Expr::Identifier("arg".into()),
))],
settings: None,
},
);
check_settings(
"SELECT * FROM table_function(SETTINGS s0 = 3, s1 = 's')",
&TableFunctionArgs {
args: vec![],
settings: Some(vec![
Setting {
key: "s0".into(),
value: Value::Number("3".parse().unwrap(), false),
},
Setting {
key: "s1".into(),
value: Value::SingleQuotedString("s".into()),
},
]),
},
);
let invalid_cases = vec![
"SELECT * FROM t(SETTINGS a)",
"SELECT * FROM t(SETTINGS a=)",
"SELECT * FROM t(SETTINGS a=1, b)",
"SELECT * FROM t(SETTINGS a=1, b=)",
"SELECT * FROM t(SETTINGS a=1, b=c)",
];
for sql in invalid_cases {
clickhouse_and_generic()
.parse_sql_statements(sql)
.expect_err("Expected: SETTINGS key = value, found: ");
}
}

fn clickhouse() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(ClickHouseDialect {})],
Expand Down

0 comments on commit d49acc6

Please sign in to comment.