From 4739c45a5892c36fb1b8dda16887e81c6a7acda9 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Sun, 25 May 2025 20:53:34 +0200 Subject: [PATCH 1/9] Support TABLESAMPLE pipe operator --- src/ast/query.rs | 8 ++++++++ src/parser/mod.rs | 7 +++++++ tests/sqlparser_common.rs | 6 ++++++ 3 files changed, 21 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index 5b784b199..5f6adec33 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2680,6 +2680,10 @@ pub enum PipeOperator { full_table_exprs: Vec, group_by_expr: Vec, }, + /// Selects a random sample of rows from the input table. + /// Syntax: `|> TABLESAMPLE ( {ROWS | PERCENT})` + /// See more at + TableSample { sample: Box }, } impl fmt::Display for PipeOperator { @@ -2731,6 +2735,10 @@ impl fmt::Display for PipeOperator { PipeOperator::OrderBy { exprs } => { write!(f, "ORDER BY {}", display_comma_separated(exprs.as_slice())) } + + PipeOperator::TableSample { sample } => { + write!(f, " {}", sample) + } } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4299d1566..71d698b43 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11047,6 +11047,7 @@ impl<'a> Parser<'a> { Keyword::LIMIT, Keyword::AGGREGATE, Keyword::ORDER, + Keyword::TABLESAMPLE, ])?; match kw { Keyword::SELECT => { @@ -11109,6 +11110,12 @@ impl<'a> Parser<'a> { let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; pipe_operators.push(PipeOperator::OrderBy { exprs }) } + Keyword::TABLESAMPLE => { + if let Some(sample) = self.maybe_parse_table_sample()? { + pipe_operators.push(PipeOperator::TableSample { sample }); + }; + + } unhandled => { return Err(ParserError::ParserError(format!( "`expect_one_of_keywords` further up allowed unhandled keyword: {unhandled:?}" diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 86c473d7d..93c9566cd 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15155,6 +15155,12 @@ fn parse_pipeline_operator() { dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC"); dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name ASC"); + // tablesample pipe operator + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE BERNOULLI (50)"); + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50)"); + // TODO: Technically, REPEATABLE is not available in BigQuery, but it is used with TABLESAMPLE in other dialects + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); + // many pipes dialects.verified_stmt( "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC", From caacef8037c5f3f7a7db0bf77d627030a71fe78f Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Mon, 26 May 2025 08:37:25 +0200 Subject: [PATCH 2/9] Implement tablesample --- src/ast/query.rs | 22 +++++++++++----------- src/parser/mod.rs | 15 ++++++++------- tests/sqlparser_common.rs | 6 +++--- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 5f6adec33..2f393bec2 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -104,7 +104,7 @@ impl fmt::Display for Query { format.fmt(f)?; } for pipe_operator in &self.pipe_operators { - f.write_str(" |> ")?; + f.write_str(" |>")?; pipe_operator.fmt(f)?; } Ok(()) @@ -2690,22 +2690,22 @@ impl fmt::Display for PipeOperator { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { PipeOperator::Select { exprs } => { - write!(f, "SELECT {}", display_comma_separated(exprs.as_slice())) + write!(f, " SELECT {}", display_comma_separated(exprs.as_slice())) } PipeOperator::Extend { exprs } => { - write!(f, "EXTEND {}", display_comma_separated(exprs.as_slice())) + write!(f, " EXTEND {}", display_comma_separated(exprs.as_slice())) } PipeOperator::Set { assignments } => { - write!(f, "SET {}", display_comma_separated(assignments.as_slice())) + write!(f, " SET {}", display_comma_separated(assignments.as_slice())) } PipeOperator::Drop { columns } => { - write!(f, "DROP {}", display_comma_separated(columns.as_slice())) + write!(f, " DROP {}", display_comma_separated(columns.as_slice())) } PipeOperator::As { alias } => { - write!(f, "AS {}", alias) + write!(f, " AS {}", alias) } PipeOperator::Limit { expr, offset } => { - write!(f, "LIMIT {}", expr)?; + write!(f, " LIMIT {}", expr)?; if let Some(offset) = offset { write!(f, " OFFSET {}", offset)?; } @@ -2715,7 +2715,7 @@ impl fmt::Display for PipeOperator { full_table_exprs, group_by_expr, } => { - write!(f, "AGGREGATE")?; + write!(f, " AGGREGATE")?; if !full_table_exprs.is_empty() { write!( f, @@ -2730,14 +2730,14 @@ impl fmt::Display for PipeOperator { } PipeOperator::Where { expr } => { - write!(f, "WHERE {}", expr) + write!(f, " WHERE {}", expr) } PipeOperator::OrderBy { exprs } => { - write!(f, "ORDER BY {}", display_comma_separated(exprs.as_slice())) + write!(f, " ORDER BY {}", display_comma_separated(exprs.as_slice())) } PipeOperator::TableSample { sample } => { - write!(f, " {}", sample) + write!(f, "{}", sample) } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 71d698b43..b54209fd3 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11111,10 +11111,8 @@ impl<'a> Parser<'a> { pipe_operators.push(PipeOperator::OrderBy { exprs }) } Keyword::TABLESAMPLE => { - if let Some(sample) = self.maybe_parse_table_sample()? { - pipe_operators.push(PipeOperator::TableSample { sample }); - }; - + let sample = self.parse_table_sample(TableSampleModifier::TableSample)?; + pipe_operators.push(PipeOperator::TableSample { sample }); } unhandled => { return Err(ParserError::ParserError(format!( @@ -12760,8 +12758,11 @@ impl<'a> Parser<'a> { } else { return Ok(None); }; + self.parse_table_sample(modifier).map(|sample| Some(sample)) + } - let name = match self.parse_one_of_keywords(&[ + fn parse_table_sample(&mut self, modifier: TableSampleModifier ) -> Result, ParserError> { +let name = match self.parse_one_of_keywords(&[ Keyword::BERNOULLI, Keyword::ROW, Keyword::SYSTEM, @@ -12842,14 +12843,14 @@ impl<'a> Parser<'a> { None }; - Ok(Some(Box::new(TableSample { + Ok(Box::new(TableSample { modifier, name, quantity, seed, bucket, offset, - }))) + })) } fn parse_table_sample_seed( diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 93c9566cd..aec9b9d8f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15156,10 +15156,10 @@ fn parse_pipeline_operator() { dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name ASC"); // tablesample pipe operator - dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE BERNOULLI (50)"); - dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50)"); + dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE BERNOULLI (50)"); + dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50)"); // TODO: Technically, REPEATABLE is not available in BigQuery, but it is used with TABLESAMPLE in other dialects - dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); + dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); // many pipes dialects.verified_stmt( From b08d9b1cf55d4ce1d21e2467a913959a08aca1ae Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Wed, 28 May 2025 10:17:35 +0200 Subject: [PATCH 3/9] Undo display changes for other operators --- src/ast/query.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 2f393bec2..4957e3d0b 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -104,7 +104,7 @@ impl fmt::Display for Query { format.fmt(f)?; } for pipe_operator in &self.pipe_operators { - f.write_str(" |>")?; + f.write_str(" |> ")?; pipe_operator.fmt(f)?; } Ok(()) @@ -2690,24 +2690,24 @@ impl fmt::Display for PipeOperator { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { PipeOperator::Select { exprs } => { - write!(f, " SELECT {}", display_comma_separated(exprs.as_slice())) + write!(f, "SELECT {}", display_comma_separated(exprs.as_slice())) } PipeOperator::Extend { exprs } => { - write!(f, " EXTEND {}", display_comma_separated(exprs.as_slice())) + write!(f, "EXTEND {}", display_comma_separated(exprs.as_slice())) } PipeOperator::Set { assignments } => { - write!(f, " SET {}", display_comma_separated(assignments.as_slice())) + write!(f, "SET {}", display_comma_separated(assignments.as_slice())) } PipeOperator::Drop { columns } => { - write!(f, " DROP {}", display_comma_separated(columns.as_slice())) + write!(f, "DROP {}", display_comma_separated(columns.as_slice())) } PipeOperator::As { alias } => { - write!(f, " AS {}", alias) + write!(f, "AS {}", alias) } PipeOperator::Limit { expr, offset } => { - write!(f, " LIMIT {}", expr)?; + write!(f, "LIMIT {}", expr)?; if let Some(offset) = offset { - write!(f, " OFFSET {}", offset)?; + write!(f, "OFFSET {}", offset)?; } Ok(()) } @@ -2715,25 +2715,25 @@ impl fmt::Display for PipeOperator { full_table_exprs, group_by_expr, } => { - write!(f, " AGGREGATE")?; + write!(f, "AGGREGATE")?; if !full_table_exprs.is_empty() { write!( f, - " {}", + "{}", display_comma_separated(full_table_exprs.as_slice()) )?; } if !group_by_expr.is_empty() { - write!(f, " GROUP BY {}", display_comma_separated(group_by_expr))?; + write!(f, "GROUP BY {}", display_comma_separated(group_by_expr))?; } Ok(()) } PipeOperator::Where { expr } => { - write!(f, " WHERE {}", expr) + write!(f, "WHERE {}", expr) } PipeOperator::OrderBy { exprs } => { - write!(f, " ORDER BY {}", display_comma_separated(exprs.as_slice())) + write!(f, "ORDER BY {}", display_comma_separated(exprs.as_slice())) } PipeOperator::TableSample { sample } => { From 9b6157e1aa00df0726cb1aef627f2fd1e0a44430 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Wed, 28 May 2025 10:18:54 +0200 Subject: [PATCH 4/9] Undo display changes for other operators --- src/ast/query.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 4957e3d0b..47c994881 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2707,7 +2707,7 @@ impl fmt::Display for PipeOperator { PipeOperator::Limit { expr, offset } => { write!(f, "LIMIT {}", expr)?; if let Some(offset) = offset { - write!(f, "OFFSET {}", offset)?; + write!(f, " OFFSET {}", offset)?; } Ok(()) } @@ -2719,12 +2719,12 @@ impl fmt::Display for PipeOperator { if !full_table_exprs.is_empty() { write!( f, - "{}", + " {}", display_comma_separated(full_table_exprs.as_slice()) )?; } if !group_by_expr.is_empty() { - write!(f, "GROUP BY {}", display_comma_separated(group_by_expr))?; + write!(f, " GROUP BY {}", display_comma_separated(group_by_expr))?; } Ok(()) } From ed4cf2ffbfbde1bd7b37b9c308eae459262bdbfa Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Wed, 28 May 2025 10:19:59 +0200 Subject: [PATCH 5/9] Simplify syntax comment --- src/ast/query.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 47c994881..b93e20109 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2681,7 +2681,7 @@ pub enum PipeOperator { group_by_expr: Vec, }, /// Selects a random sample of rows from the input table. - /// Syntax: `|> TABLESAMPLE ( {ROWS | PERCENT})` + /// Syntax: `|> TABLESAMPLE SYSTEM (10 PERCENT) /// See more at TableSample { sample: Box }, } From c2d713fb6efd4435f216dbd0677509e85de8a7ea Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Wed, 28 May 2025 10:22:44 +0200 Subject: [PATCH 6/9] Fix formatting of tablesample --- src/ast/query.rs | 6 +++--- tests/sqlparser_common.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index b93e20109..06e3347fc 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1559,7 +1559,7 @@ impl fmt::Display for TableSampleBucket { } impl fmt::Display for TableSample { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, " {}", self.modifier)?; + write!(f, "{}", self.modifier)?; if let Some(name) = &self.name { write!(f, " {}", name)?; } @@ -1862,7 +1862,7 @@ impl fmt::Display for TableFactor { write!(f, " WITH ORDINALITY")?; } if let Some(TableSampleKind::BeforeTableAlias(sample)) = sample { - write!(f, "{sample}")?; + write!(f, " {sample}")?; } if let Some(alias) = alias { write!(f, " AS {alias}")?; @@ -1877,7 +1877,7 @@ impl fmt::Display for TableFactor { write!(f, "{version}")?; } if let Some(TableSampleKind::AfterTableAlias(sample)) = sample { - write!(f, "{sample}")?; + write!(f, " {sample}")?; } Ok(()) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index aec9b9d8f..500acbfc6 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15157,7 +15157,7 @@ fn parse_pipeline_operator() { // tablesample pipe operator dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE BERNOULLI (50)"); - dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50)"); + dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50 PERCENT)"); // TODO: Technically, REPEATABLE is not available in BigQuery, but it is used with TABLESAMPLE in other dialects dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); From 89f51d397962036bbfed67de49a81db11a0873ff Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Wed, 28 May 2025 10:26:27 +0200 Subject: [PATCH 7/9] Remove TODO --- tests/sqlparser_common.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 500acbfc6..a638f4793 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15158,7 +15158,6 @@ fn parse_pipeline_operator() { // tablesample pipe operator dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE BERNOULLI (50)"); dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50 PERCENT)"); - // TODO: Technically, REPEATABLE is not available in BigQuery, but it is used with TABLESAMPLE in other dialects dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); // many pipes From 2e33f91fad5313b33254c81e092a63732763fff3 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Thu, 29 May 2025 16:41:33 +0200 Subject: [PATCH 8/9] fmt --- src/ast/query.rs | 2 +- src/parser/mod.rs | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 06e3347fc..ffe1e4023 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2683,7 +2683,7 @@ pub enum PipeOperator { /// Selects a random sample of rows from the input table. /// Syntax: `|> TABLESAMPLE SYSTEM (10 PERCENT) /// See more at - TableSample { sample: Box }, + TableSample { sample: Box }, } impl fmt::Display for PipeOperator { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b54209fd3..679246747 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -12758,11 +12758,14 @@ impl<'a> Parser<'a> { } else { return Ok(None); }; - self.parse_table_sample(modifier).map(|sample| Some(sample)) + self.parse_table_sample(modifier).map(|sample| Some(sample)) } - fn parse_table_sample(&mut self, modifier: TableSampleModifier ) -> Result, ParserError> { -let name = match self.parse_one_of_keywords(&[ + fn parse_table_sample( + &mut self, + modifier: TableSampleModifier, + ) -> Result, ParserError> { + let name = match self.parse_one_of_keywords(&[ Keyword::BERNOULLI, Keyword::ROW, Keyword::SYSTEM, From 075484aa82bb26329b48b32529ce768fc0db07b2 Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Thu, 29 May 2025 16:45:33 +0200 Subject: [PATCH 9/9] clippy --- src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 679246747..391203ab9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -12758,7 +12758,7 @@ impl<'a> Parser<'a> { } else { return Ok(None); }; - self.parse_table_sample(modifier).map(|sample| Some(sample)) + self.parse_table_sample(modifier).map(Some) } fn parse_table_sample(