Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update for API changes
Browse files Browse the repository at this point in the history
alamb committed Jan 19, 2025
1 parent 65f92e4 commit 4b06bc8
Showing 11 changed files with 214 additions and 104 deletions.
1 change: 1 addition & 0 deletions datafusion/expr/src/logical_plan/statement.rs
Original file line number Diff line number Diff line change
@@ -153,6 +153,7 @@ pub enum TransactionIsolationLevel {
ReadCommitted,
RepeatableRead,
Serializable,
Snapshot,
}

/// Indicator that the following statements should be committed or rolled back atomically
219 changes: 138 additions & 81 deletions datafusion/sql/src/expr/mod.rs
Original file line number Diff line number Diff line change
@@ -21,14 +21,14 @@ use datafusion_expr::planner::{
PlannerResult, RawBinaryExpr, RawDictionaryExpr, RawFieldAccessExpr,
};
use sqlparser::ast::{
BinaryOperator, CastFormat, CastKind, DataType as SQLDataType, DictionaryField,
Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, MapEntry, StructField, Subscript,
TrimWhereField, Value,
AccessExpr, BinaryOperator, CastFormat, CastKind, DataType as SQLDataType,
DictionaryField, Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, MapEntry,
StructField, Subscript, TrimWhereField, Value,
};

use datafusion_common::{
internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result,
ScalarValue,
internal_datafusion_err, internal_err, not_impl_err, plan_err, Column, DFSchema,
Result, ScalarValue,
};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::expr::{InList, WildcardOptions};
@@ -236,14 +236,14 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
self.sql_identifier_to_expr(id, schema, planner_context)
}

SQLExpr::MapAccess { .. } => {
not_impl_err!("Map Access")
}

// <expr>["foo"], <expr>[4] or <expr>[4:5]
SQLExpr::Subscript { expr, subscript } => {
self.sql_subscript_to_expr(*expr, subscript, schema, planner_context)
}
SQLExpr::CompoundFieldAccess { root, access_chain } => self
.sql_compound_field_access_to_expr(
*root,
access_chain,
schema,
planner_context,
),

SQLExpr::CompoundIdentifier(ids) => {
self.sql_compound_identifier_to_expr(ids, schema, planner_context)
@@ -984,84 +984,141 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
Ok(Expr::Cast(Cast::new(Box::new(expr), dt)))
}

fn sql_subscript_to_expr(
fn sql_compound_field_access_to_expr(
&self,
expr: SQLExpr,
subscript: Box<Subscript>,
root: SQLExpr,
access_chain: Vec<AccessExpr>,
schema: &DFSchema,
planner_context: &mut PlannerContext,
) -> Result<Expr> {
let expr = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;

let field_access = match *subscript {
Subscript::Index { index } => {
// index can be a name, in which case it is a named field access
match index {
SQLExpr::Value(
Value::SingleQuotedString(s) | Value::DoubleQuotedString(s),
) => GetFieldAccess::NamedStructField {
name: ScalarValue::from(s),
},
SQLExpr::JsonAccess { .. } => {
return not_impl_err!("JsonAccess");
let mut root = self.sql_expr_to_logical_expr(root, schema, planner_context)?;
let fields = access_chain
.into_iter()
.map(|field| match field {
AccessExpr::Subscript(subscript) => {
match subscript {
Subscript::Index { index } => {
// index can be a name, in which case it is a named field access
match index {
SQLExpr::Value(
Value::SingleQuotedString(s)
| Value::DoubleQuotedString(s),
) => Ok(Some(GetFieldAccess::NamedStructField {
name: ScalarValue::from(s),
})),
SQLExpr::JsonAccess { .. } => {
not_impl_err!("JsonAccess")
}
// otherwise treat like a list index
_ => Ok(Some(GetFieldAccess::ListIndex {
key: Box::new(self.sql_expr_to_logical_expr(
index,
schema,
planner_context,
)?),
})),
}
}
Subscript::Slice {
lower_bound,
upper_bound,
stride,
} => {
// Means access like [:2]
let lower_bound = if let Some(lower_bound) = lower_bound {
self.sql_expr_to_logical_expr(
lower_bound,
schema,
planner_context,
)
} else {
not_impl_err!("Slice subscript requires a lower bound")
}?;

// means access like [2:]
let upper_bound = if let Some(upper_bound) = upper_bound {
self.sql_expr_to_logical_expr(
upper_bound,
schema,
planner_context,
)
} else {
not_impl_err!("Slice subscript requires an upper bound")
}?;

// stride, default to 1
let stride = if let Some(stride) = stride {
self.sql_expr_to_logical_expr(
stride,
schema,
planner_context,
)?
} else {
lit(1i64)
};

Ok(Some(GetFieldAccess::ListRange {
start: Box::new(lower_bound),
stop: Box::new(upper_bound),
stride: Box::new(stride),
}))
}
}
// otherwise treat like a list index
_ => GetFieldAccess::ListIndex {
key: Box::new(self.sql_expr_to_logical_expr(
index,
schema,
planner_context,
)?),
},
}
}
Subscript::Slice {
lower_bound,
upper_bound,
stride,
} => {
// Means access like [:2]
let lower_bound = if let Some(lower_bound) = lower_bound {
self.sql_expr_to_logical_expr(lower_bound, schema, planner_context)
} else {
not_impl_err!("Slice subscript requires a lower bound")
}?;

// means access like [2:]
let upper_bound = if let Some(upper_bound) = upper_bound {
self.sql_expr_to_logical_expr(upper_bound, schema, planner_context)
} else {
not_impl_err!("Slice subscript requires an upper bound")
}?;

// stride, default to 1
let stride = if let Some(stride) = stride {
self.sql_expr_to_logical_expr(stride, schema, planner_context)?
} else {
lit(1i64)
};

GetFieldAccess::ListRange {
start: Box::new(lower_bound),
stop: Box::new(upper_bound),
stride: Box::new(stride),
AccessExpr::Dot(expr) => {
let expr =
self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
match expr {
Expr::Column(Column { name, relation }) => {
if let Some(relation) = &relation {
// If the first part of the dot access is a column reference, we should
// check if the column is from the same table as the root expression.
// If it is, we should replace the root expression with the column reference.
// Otherwise, we should treat the dot access as a named field access.
if relation.table() == root.schema_name().to_string() {
root = Expr::Column(Column {
name,
relation: Some(relation.clone()),
});
Ok(None)
} else {
plan_err!(
"table name mismatch: {} != {}",
relation.table(),
root.schema_name()
)
}
} else {
Ok(Some(GetFieldAccess::NamedStructField {
name: ScalarValue::from(name),
}))
}
}
_ => not_impl_err!(
"Dot access not supported for non-column expr: {expr:?}"
),
}
}
}
};
})
.collect::<Result<Vec<_>>>()?;

let mut field_access_expr = RawFieldAccessExpr { expr, field_access };
for planner in self.context_provider.get_expr_planners() {
match planner.plan_field_access(field_access_expr, schema)? {
PlannerResult::Planned(expr) => return Ok(expr),
PlannerResult::Original(expr) => {
field_access_expr = expr;
fields
.into_iter()
.flatten()
.try_fold(root, |expr, field_access| {
let mut field_access_expr = RawFieldAccessExpr { expr, field_access };
for planner in self.context_provider.get_expr_planners() {
match planner.plan_field_access(field_access_expr, schema)? {
PlannerResult::Planned(expr) => return Ok(expr),
PlannerResult::Original(expr) => {
field_access_expr = expr;
}
}
}
}
}

not_impl_err!(
"GetFieldAccess not supported by ExprPlanner: {field_access_expr:?}"
)
not_impl_err!(
"GetFieldAccess not supported by ExprPlanner: {field_access_expr:?}"
)
})
}
}

6 changes: 3 additions & 3 deletions datafusion/sql/src/parser.rs
Original file line number Diff line number Diff line change
@@ -563,7 +563,7 @@ impl<'a> DFParser<'a> {

loop {
if let Token::Word(_) = self.parser.peek_token().token {
let identifier = self.parser.parse_identifier(false)?;
let identifier = self.parser.parse_identifier()?;
partitions.push(identifier.to_string());
} else {
return self.expected("partition name", self.parser.peek_token());
@@ -666,7 +666,7 @@ impl<'a> DFParser<'a> {
}

fn parse_column_def(&mut self) -> Result<ColumnDef, ParserError> {
let name = self.parser.parse_identifier(false)?;
let name = self.parser.parse_identifier()?;
let data_type = self.parser.parse_data_type()?;
let collation = if self.parser.parse_keyword(Keyword::COLLATE) {
Some(self.parser.parse_object_name(false)?)
@@ -676,7 +676,7 @@ impl<'a> DFParser<'a> {
let mut options = vec![];
loop {
if self.parser.parse_keyword(Keyword::CONSTRAINT) {
let name = Some(self.parser.parse_identifier(false)?);
let name = Some(self.parser.parse_identifier()?);
if let Some(option) = self.parser.parse_optional_column_option()? {
options.push(ColumnOptionDef { name, option });
} else {
9 changes: 7 additions & 2 deletions datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
@@ -430,7 +430,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLDataType::UnsignedBigInt(_) | SQLDataType::UnsignedInt8(_) => Ok(DataType::UInt64),
SQLDataType::Float(_) => Ok(DataType::Float32),
SQLDataType::Real | SQLDataType::Float4 => Ok(DataType::Float32),
SQLDataType::Double | SQLDataType::DoublePrecision | SQLDataType::Float8 => Ok(DataType::Float64),
SQLDataType::Double(ExactNumberInfo::None) | SQLDataType::DoublePrecision | SQLDataType::Float8 => Ok(DataType::Float64),
SQLDataType::Double(ExactNumberInfo::Precision(_)|ExactNumberInfo::PrecisionAndScale(_, _)) => {
not_impl_err!("Unsupported SQL type (precision/scale not supported) {sql_type}")
}
SQLDataType::Char(_)
| SQLDataType::Text
| SQLDataType::String(_) => Ok(DataType::Utf8),
@@ -566,7 +569,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
| SQLDataType::MediumText
| SQLDataType::LongText
| SQLDataType::Bit(_)
|SQLDataType::BitVarying(_)
| SQLDataType::BitVarying(_)
// BIG Query UDFs
| SQLDataType::AnyType
=> not_impl_err!(
"Unsupported SQL type {sql_type:?}"
),
8 changes: 7 additions & 1 deletion datafusion/sql/src/relation/join.rs
Original file line number Diff line number Diff line change
@@ -123,7 +123,13 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
.join_on(right, join_type, Some(expr))?
.build()
}
JoinConstraint::Using(idents) => {
JoinConstraint::Using(mut object_names) => {
if object_names.len() != 1 {
return not_impl_err!(
"Only one column name is supported in USING clause"
);
};
let idents = object_names.pop().unwrap().0;
let keys: Vec<Column> = idents
.into_iter()
.map(|x| Column::from_name(self.ident_normalizer.normalize(x)))
3 changes: 3 additions & 0 deletions datafusion/sql/src/set_expr.rs
Original file line number Diff line number Diff line change
@@ -88,6 +88,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
(SetOperator::Except, false) => {
LogicalPlanBuilder::except(left_plan, right_plan, false)
}
(SetOperator::Minus, _) => {
not_impl_err!("MINUS Set Operator not implemented")
}
}
}
}
51 changes: 44 additions & 7 deletions datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
@@ -56,7 +56,7 @@ use datafusion_expr::{
};
use sqlparser::ast::{
self, BeginTransactionKind, NullsDistinctOption, ShowStatementIn,
ShowStatementOptions, SqliteOnConflict,
ShowStatementOptions, SqliteOnConflict, TableObject, UpdateTableFromKind,
};
use sqlparser::ast::{
Assignment, AssignmentTarget, ColumnDef, CreateIndex, CreateTable,
@@ -497,6 +497,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
if_not_exists,
temporary,
to,
params,
} => {
if materialized {
return not_impl_err!("Materialized views not supported")?;
@@ -532,6 +533,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
if_not_exists,
temporary,
to,
params,
};
let sql = stmt.to_string();
let Statement::CreateView {
@@ -818,7 +820,6 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
Statement::Insert(Insert {
or,
into,
table_name,
columns,
overwrite,
source,
@@ -832,7 +833,17 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
mut replace_into,
priority,
insert_alias,
assignments,
has_table_keyword,
settings,
format_clause,
}) => {
let table_name = match table {
TableObject::TableName(table_name) => table_name,
TableObject::TableFunction(_) => {
return not_impl_err!("INSERT INTO Table functions not supported")
}
};
if let Some(or) = or {
match or {
SqliteOnConflict::Replace => replace_into = true,
@@ -845,9 +856,6 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
if !after_columns.is_empty() {
plan_err!("After-columns clause not supported")?;
}
if table {
plan_err!("Table clause not supported")?;
}
if on.is_some() {
plan_err!("Insert-on clause not supported")?;
}
@@ -873,7 +881,18 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
if insert_alias.is_some() {
plan_err!("Inserts with an alias not supported")?;
}
let _ = into; // optional keyword doesn't change behavior
if !assignments.is_empty() {
plan_err!("Inserts with assignments not supported")?;
}
if settings.is_some() {
plan_err!("Inserts with settings not supported")?;
}
if format_clause.is_some() {
plan_err!("Inserts with format clause not supported")?;
}
// optional keywords don't change behavior
let _ = into;
let _ = has_table_keyword;
self.insert_to_plan(table_name, columns, source, overwrite, replace_into)
}
Statement::Update {
@@ -884,6 +903,11 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
returning,
or,
} => {
let from =
from.map(|update_table_from_kind| match update_table_from_kind {
UpdateTableFromKind::BeforeSet(from) => from,
UpdateTableFromKind::AfterSet(from) => from,
});
if returning.is_some() {
plan_err!("Update-returning clause not yet supported")?;
}
@@ -969,6 +993,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
ast::TransactionIsolationLevel::Serializable => {
TransactionIsolationLevel::Serializable
}
ast::TransactionIsolationLevel::Snapshot => {
TransactionIsolationLevel::Snapshot
}
};
let access_mode = match access_mode {
ast::TransactionAccessMode::ReadOnly => {
@@ -984,7 +1011,17 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
});
Ok(LogicalPlan::Statement(statement))
}
Statement::Commit { chain } => {
Statement::Commit {
chain,
end,
modifier,
} => {
if end {
return not_impl_err!("COMMIT AND END not supported");
};
if let Some(modifier) = modifier {
return not_impl_err!("COMMIT {modifier} not supported");
};
let statement = PlanStatement::TransactionEnd(TransactionEnd {
conclusion: TransactionConclusion::Commit,
chain,
1 change: 1 addition & 0 deletions datafusion/sql/src/unparser/ast.rs
Original file line number Diff line number Diff line change
@@ -466,6 +466,7 @@ impl TableRelationBuilder {
partitions: self.partitions.clone(),
with_ordinality: false,
json_path: None,
sample: None,
})
}
fn create_empty() -> Self {
9 changes: 4 additions & 5 deletions datafusion/sql/src/unparser/dialect.rs
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@

use std::{collections::HashMap, sync::Arc};

use super::{utils::character_length_to_sql, utils::date_part_to_sql, Unparser};
use arrow_schema::TimeUnit;
use datafusion_common::Result;
use datafusion_expr::Expr;
@@ -27,8 +28,6 @@ use sqlparser::{
keywords::ALL_KEYWORDS,
};

use super::{utils::character_length_to_sql, utils::date_part_to_sql, Unparser};

pub type ScalarFnToSqlHandler =
Box<dyn Fn(&Unparser, &[Expr]) -> Result<Option<ast::Expr>> + Send + Sync>;

@@ -63,7 +62,7 @@ pub trait Dialect: Send + Sync {
/// Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
/// E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
fn float64_ast_dtype(&self) -> ast::DataType {
ast::DataType::Double
ast::DataType::Double(ast::ExactNumberInfo::None)
}

/// The SQL type to use for Arrow Utf8 unparsing
@@ -511,7 +510,7 @@ impl Default for CustomDialect {
supports_nulls_first_in_sort: true,
use_timestamp_for_date64: false,
interval_style: IntervalStyle::SQLStandard,
float64_ast_dtype: ast::DataType::Double,
float64_ast_dtype: ast::DataType::Double(ast::ExactNumberInfo::None),
utf8_cast_dtype: ast::DataType::Varchar(None),
large_utf8_cast_dtype: ast::DataType::Text,
date_field_extract_style: DateFieldExtractStyle::DatePart,
@@ -692,7 +691,7 @@ impl CustomDialectBuilder {
supports_nulls_first_in_sort: true,
use_timestamp_for_date64: false,
interval_style: IntervalStyle::PostgresVerbose,
float64_ast_dtype: ast::DataType::Double,
float64_ast_dtype: ast::DataType::Double(ast::ExactNumberInfo::None),
utf8_cast_dtype: ast::DataType::Varchar(None),
large_utf8_cast_dtype: ast::DataType::Text,
date_field_extract_style: DateFieldExtractStyle::DatePart,
9 changes: 5 additions & 4 deletions datafusion/sql/src/unparser/expr.rs
Original file line number Diff line number Diff line change
@@ -523,9 +523,9 @@ impl Unparser<'_> {
}
let array = self.expr_to_sql(&args[0])?;
let index = self.expr_to_sql(&args[1])?;
Ok(ast::Expr::Subscript {
expr: Box::new(array),
subscript: Box::new(Subscript::Index { index }),
Ok(ast::Expr::CompoundFieldAccess {
root: Box::new(array),
access_chain: vec![ast::AccessExpr::Subscript(Subscript::Index { index })],
})
}

@@ -1633,6 +1633,7 @@ mod tests {
use datafusion_functions_nested::expr_fn::{array_element, make_array};
use datafusion_functions_nested::map::map;
use datafusion_functions_window::row_number::row_number_udwf;
use sqlparser::ast::ExactNumberInfo;

use crate::unparser::dialect::{
CharacterLengthStyle, CustomDialect, CustomDialectBuilder, DateFieldExtractStyle,
@@ -2123,7 +2124,7 @@ mod tests {
#[test]
fn custom_dialect_float64_ast_dtype() -> Result<()> {
for (float64_ast_dtype, identifier) in [
(ast::DataType::Double, "DOUBLE"),
(ast::DataType::Double(ExactNumberInfo::None), "DOUBLE"),
(ast::DataType::DoublePrecision, "DOUBLE PRECISION"),
] {
let dialect = CustomDialectBuilder::new()
2 changes: 1 addition & 1 deletion datafusion/sql/src/unparser/plan.rs
Original file line number Diff line number Diff line change
@@ -1109,7 +1109,7 @@ impl Unparser<'_> {
_ => return None,
}
}
Some(ast::JoinConstraint::Using(idents))
Some(ast::JoinConstraint::Using(vec![ast::ObjectName(idents)]))
}

/// Convert a join constraint and associated conditions and filter to a SQL AST node

0 comments on commit 4b06bc8

Please sign in to comment.