From 5dae401dbdcf4077cbdf03628bec4088f2d34902 Mon Sep 17 00:00:00 2001 From: Ian Joiner <14581281+iajoiner@users.noreply.github.com> Date: Mon, 4 Nov 2024 12:24:10 -0500 Subject: [PATCH] feat: add basic sqlparser adaptions --- crates/proof-of-sql-parser/Cargo.toml | 8 +- crates/proof-of-sql-parser/src/lib.rs | 2 + crates/proof-of-sql-parser/src/sqlparser.rs | 216 ++++++++++++++++++++ 3 files changed, 224 insertions(+), 2 deletions(-) create mode 100644 crates/proof-of-sql-parser/src/sqlparser.rs diff --git a/crates/proof-of-sql-parser/Cargo.toml b/crates/proof-of-sql-parser/Cargo.toml index 29c23bc56..c447fd2b2 100644 --- a/crates/proof-of-sql-parser/Cargo.toml +++ b/crates/proof-of-sql-parser/Cargo.toml @@ -16,12 +16,16 @@ test = true [dependencies] arrayvec = { workspace = true, features = ["serde"] } -bigdecimal = { workspace = true } +bigdecimal = { workspace = true, default_features = false } chrono = { workspace = true, features = ["serde"] } lalrpop-util = { workspace = true, features = ["lexer", "unicode"] } serde = { workspace = true, features = ["serde_derive", "alloc"] } snafu = { workspace = true } -sqlparser = { workspace = true } +sqlparser = { workspace = true, default_features = false, features = ["bigdecimal"] } + +[dependencies.num-traits] +version = "0.2" +default-features = false [build-dependencies] lalrpop = { workspace = true } diff --git a/crates/proof-of-sql-parser/src/lib.rs b/crates/proof-of-sql-parser/src/lib.rs index a600d6b97..ae9181132 100644 --- a/crates/proof-of-sql-parser/src/lib.rs +++ b/crates/proof-of-sql-parser/src/lib.rs @@ -32,6 +32,8 @@ pub use identifier::Identifier; pub mod resource_id; pub use resource_id::ResourceId; +pub mod sqlparser; + // lalrpop-generated code is not clippy-compliant lalrpop_mod!(#[allow(clippy::all, missing_docs, clippy::missing_docs_in_private_items, clippy::pedantic, clippy::missing_panics_doc)] pub sql); diff --git a/crates/proof-of-sql-parser/src/sqlparser.rs b/crates/proof-of-sql-parser/src/sqlparser.rs new file mode 100644 index 000000000..0ec0b0677 --- /dev/null +++ b/crates/proof-of-sql-parser/src/sqlparser.rs @@ -0,0 +1,216 @@ +//! This module exists to adapt the current parser to `sqlparser`. +use crate::{ + intermediate_ast::{ + AliasedResultExpr, BinaryOperator as PoSqlBinaryOperator, Expression, Literal, + OrderBy as PoSqlOrderBy, OrderByDirection, SelectResultExpr, SetExpression, + TableExpression, UnaryOperator as PoSqlUnaryOperator, + }, + Identifier, ResourceId, SelectStatement, +}; +use alloc::{boxed::Box, string::ToString, vec}; +use bigdecimal::BigDecimal; +use sqlparser::ast::{ + BinaryOperator, Expr, Function, FunctionArg, FunctionArgExpr, GroupByExpr, Ident, ObjectName, + Offset, OffsetRows, OrderByExpr, Query, Select, SelectItem, SetExpr, TableFactor, + TableWithJoins, UnaryOperator, Value, WildcardAdditionalOptions, +}; + +/// Convert a number into a [`Expr`]. +fn number(val: T) -> Expr +where + T: Into, +{ + Expr::Value(Value::Number(val.into(), false)) +} + +/// Convert an [`Identifier`] into a [`Expr`]. +fn id(id: Identifier) -> Expr { + Expr::Identifier(id.into()) +} + +impl From for Ident { + fn from(id: Identifier) -> Self { + Ident::new(id.as_str()) + } +} + +impl From for ObjectName { + fn from(id: ResourceId) -> Self { + ObjectName(vec![id.schema().into(), id.object_name().into()]) + } +} + +impl From for TableFactor { + fn from(table: TableExpression) -> Self { + match table { + TableExpression::Named { table, schema } => { + let object_name = if let Some(schema) = schema { + ObjectName(vec![schema.into(), table.into()]) + } else { + ObjectName(vec![table.into()]) + }; + TableFactor::Table { + name: object_name, + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + } + } + } + } +} + +impl From for Value { + fn from(literal: Literal) -> Self { + match literal { + Literal::VarChar(s) => Value::SingleQuotedString(s), + Literal::BigInt(n) => Value::Number(BigDecimal::from(n), false), + Literal::Int128(n) => Value::Number(BigDecimal::from(n), false), + Literal::Decimal(n) => Value::Number(n, false), + Literal::Boolean(b) => Value::Boolean(b), + Literal::Timestamp(_ts) => todo!(), + } + } +} + +impl From for BinaryOperator { + fn from(op: PoSqlBinaryOperator) -> Self { + match op { + PoSqlBinaryOperator::And => BinaryOperator::And, + PoSqlBinaryOperator::Or => BinaryOperator::Or, + PoSqlBinaryOperator::Equal => BinaryOperator::Eq, + PoSqlBinaryOperator::LessThanOrEqual => BinaryOperator::LtEq, + PoSqlBinaryOperator::GreaterThanOrEqual => BinaryOperator::GtEq, + PoSqlBinaryOperator::Add => BinaryOperator::Plus, + PoSqlBinaryOperator::Subtract => BinaryOperator::Minus, + PoSqlBinaryOperator::Multiply => BinaryOperator::Multiply, + PoSqlBinaryOperator::Division => BinaryOperator::Divide, + } + } +} + +impl From for UnaryOperator { + fn from(op: PoSqlUnaryOperator) -> Self { + match op { + PoSqlUnaryOperator::Not => UnaryOperator::Not, + } + } +} + +impl From for OrderByExpr { + fn from(order_by: PoSqlOrderBy) -> Self { + let asc = match order_by.direction { + OrderByDirection::Asc => Some(true), + OrderByDirection::Desc => Some(false), + }; + OrderByExpr { + expr: id(order_by.expr), + asc, + nulls_first: None, + } + } +} + +impl From for Expr { + fn from(expr: Expression) -> Self { + match expr { + Expression::Literal(literal) => Expr::Value(literal.into()), + Expression::Column(identifier) => id(identifier), + Expression::Unary { op, expr } => Expr::UnaryOp { + op: op.into(), + expr: Box::new((*expr).into()), + }, + Expression::Binary { op, left, right } => Expr::BinaryOp { + left: Box::new((*left).into()), + op: op.into(), + right: Box::new((*right).into()), + }, + Expression::Wildcard => Expr::Wildcard, + Expression::Aggregation { op, expr } => Expr::Function(Function { + name: ObjectName(vec![Ident::new(op.to_string())]), + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr((*expr).into()))], + filter: None, + null_treatment: None, + over: None, + distinct: false, + special: false, + order_by: vec![], + }), + } + } +} + +impl From for SelectItem { + fn from(select: SelectResultExpr) -> Self { + match select { + SelectResultExpr::ALL => SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: None, + opt_except: None, + opt_rename: None, + opt_replace: None, + }), + SelectResultExpr::AliasedResultExpr(AliasedResultExpr { expr, alias }) => { + SelectItem::ExprWithAlias { + expr: (*expr).into(), + alias: alias.into(), + } + } + } + } +} + +impl From for Select { + fn from(select: SetExpression) -> Self { + match select { + SetExpression::Query { + result_exprs, + from, + where_expr, + group_by, + } => Select { + distinct: None, + top: None, + projection: result_exprs.into_iter().map(SelectItem::from).collect(), + into: None, + from: from + .into_iter() + .map(|table_expression| TableWithJoins { + relation: (*table_expression).into(), + joins: vec![], + }) + .collect(), + lateral_views: vec![], + selection: where_expr.map(|expr| (*expr).into()), + group_by: GroupByExpr::Expressions(group_by.into_iter().map(id).collect()), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + value_table_mode: None, + }, + } + } +} + +impl From for Query { + fn from(select: SelectStatement) -> Self { + Query { + with: None, + body: Box::new(SetExpr::Select(Box::new((*select.expr).into()))), + order_by: select.order_by.into_iter().map(OrderByExpr::from).collect(), + limit: select.slice.clone().map(|slice| number(slice.number_rows)), + limit_by: vec![], + offset: select.slice.map(|slice| Offset { + value: number(slice.number_rows), + rows: OffsetRows::None, + }), + fetch: None, + locks: vec![], + for_clause: None, + } + } +}