Skip to content

Commit

Permalink
Add unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
xinlifoobar committed Jul 10, 2024
1 parent c97323e commit 47f0595
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 11 deletions.
3 changes: 2 additions & 1 deletion datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,8 @@ impl SessionState {
ParserOptions {
parse_float_as_decimal: sql_parser_options.parse_float_as_decimal,
enable_ident_normalization: sql_parser_options.enable_ident_normalization,
enable_options_value_normalization: sql_parser_options.enable_options_value_normalization,
enable_options_value_normalization: sql_parser_options
.enable_options_value_normalization,
support_varchar_with_length: sql_parser_options.support_varchar_with_length,
}
}
Expand Down
12 changes: 8 additions & 4 deletions datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ use datafusion_common::{
field_not_found, internal_err, plan_datafusion_err, DFSchemaRef, SchemaError,
};
use datafusion_expr::planner::UserDefinedSQLPlanner;
use sqlparser::ast::{TimezoneInfo, Value};
use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo};
use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption};
use sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias};
use sqlparser::ast::{TimezoneInfo, Value};

use datafusion_common::TableReference;
use datafusion_common::{
Expand Down Expand Up @@ -296,8 +296,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let default_expr = self
.sql_to_expr(default_sql_expr.clone(), &empty_schema, planner_context)
.map_err(error_desc)?;
column_defaults
.push((self.ident_normalizer.normalize(column.name.clone()), default_expr));
column_defaults.push((
self.ident_normalizer.normalize(column.name.clone()),
default_expr,
));
}
}
Ok(column_defaults)
Expand All @@ -312,7 +314,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let plan = self.apply_expr_alias(plan, alias.columns)?;

LogicalPlanBuilder::from(plan)
.alias(TableReference::bare(self.ident_normalizer.normalize(alias.name)))?
.alias(TableReference::bare(
self.ident_normalizer.normalize(alias.name),
))?
.build()
}

Expand Down
11 changes: 7 additions & 4 deletions datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
}
};

let options_map: HashMap<String, String> = self.parse_options_map(statement.options)?;
let options_map: HashMap<String, String> =
self.parse_options_map(statement.options, true)?;

let maybe_file_type = if let Some(stored_as) = &statement.stored_as {
if let Ok(ext_file_type) = self.context_provider.get_file_type(stored_as) {
Expand Down Expand Up @@ -964,7 +965,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let inline_constraints = calc_inline_constraints_from_columns(&columns);
all_constraints.extend(inline_constraints);

let options_map = self.parse_options_map(options)?;
let options_map = self.parse_options_map(options, false)?;

let compression = options_map
.get("format.compression")
Expand Down Expand Up @@ -1018,10 +1019,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {

fn parse_options_map(
&self,
options: Vec<(String, Value)>) -> Result<HashMap<String, String>> {
options: Vec<(String, Value)>,
allow_duplicates: bool,
) -> Result<HashMap<String, String>> {
let mut options_map = HashMap::new();
for (key, value) in options {
if options_map.contains_key(&key) {
if !allow_duplicates && options_map.contains_key(&key) {
return plan_err!("Option {key} is specified multiple times");
}

Expand Down
66 changes: 65 additions & 1 deletion datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@ use datafusion_common::{
assert_contains, DataFusionError, ParamValues, Result, ScalarValue,
};
use datafusion_expr::{
dml::CopyTo,
logical_plan::{LogicalPlan, Prepare},
test::function_stub::sum_udaf,
ColumnarValue, ScalarUDF, ScalarUDFImpl, Signature, Volatility,
ColumnarValue, CreateExternalTable, DdlStatement, ScalarUDF, ScalarUDFImpl,
Signature, Volatility,
};
use datafusion_functions::{string, unicode};
use datafusion_sql::{
Expand Down Expand Up @@ -151,6 +153,68 @@ fn parse_ident_normalization() {
}
}

#[test]
fn test_parse_options_value_normalization() {
let test_data = [
(
"CREATE EXTERNAL TABLE test OPTIONS ('location' 'LoCaTiOn') STORED AS PARQUET LOCATION 'fake_location'",
"CreateExternalTable: Bare { table: \"test\" }",
HashMap::from([("format.location", "LoCaTiOn")]),
false,
),
(
"CREATE EXTERNAL TABLE test OPTIONS ('location' 'LoCaTiOn') STORED AS PARQUET LOCATION 'fake_location'",
"CreateExternalTable: Bare { table: \"test\" }",
HashMap::from([("format.location", "location")]),
true,
),
(
"COPY test TO 'fake_location' STORED AS PARQUET OPTIONS ('location' 'LoCaTiOn')",
"CopyTo: format=csv output_url=fake_location options: (format.location LoCaTiOn)\n TableScan: test",
HashMap::from([("format.location", "LoCaTiOn")]),
false,
),
(
"COPY test TO 'fake_location' STORED AS PARQUET OPTIONS ('location' 'LoCaTiOn')",
"CopyTo: format=csv output_url=fake_location options: (format.location location)\n TableScan: test",
HashMap::from([("format.location", "location")]),
true,
),
];

for (sql, expected_plan, expected_options, enable_options_value_normalization) in
test_data
{
let plan = logical_plan_with_options(
sql,
ParserOptions {
parse_float_as_decimal: false,
enable_ident_normalization: false,
support_varchar_with_length: false,
enable_options_value_normalization,
},
);
if plan.is_ok() {
let plan = plan.unwrap();
assert_eq!(expected_plan, format!("{plan:?}"));

match plan {
LogicalPlan::Ddl(DdlStatement::CreateExternalTable(
CreateExternalTable { options, .. },
))
| LogicalPlan::Copy(CopyTo { options, .. }) => {
expected_options.iter().for_each(|(k, v)| {
assert_eq!(Some(&v.to_string()), options.get(*k));
});
}
_ => panic!("Expected Ddl(CreateExternalTable) but got {:?}", plan),
}
} else {
assert_eq!(expected_plan, plan.unwrap_err().strip_backtrace());
}
}
}

#[test]
fn select_no_relation() {
quick_test(
Expand Down
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/information_schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ datafusion.optimizer.skip_failed_rules false
datafusion.optimizer.top_down_join_key_reordering true
datafusion.sql_parser.dialect generic
datafusion.sql_parser.enable_ident_normalization true
datafusion.sql_parser.enable_options_value_normalization true
datafusion.sql_parser.parse_float_as_decimal false
datafusion.sql_parser.support_varchar_with_length true

Expand Down Expand Up @@ -322,6 +323,7 @@ datafusion.optimizer.skip_failed_rules false When set to true, the logical plan
datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.enable_options_value_normalization true When set to true, SQL parser will normalize options value (convert value to lowercase)
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.

Expand Down
3 changes: 2 additions & 1 deletion docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
| datafusion.explain.show_sizes | true | When set to true, the explain statement will print the partition sizes |
| datafusion.explain.show_schema | false | When set to true, the explain statement will print schema information |
| datafusion.sql_parser.parse_float_as_decimal | false | When set to true, SQL parser will parse float as decimal type |
| datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) |
| datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
| datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) | |
| datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. |
| datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |

0 comments on commit 47f0595

Please sign in to comment.