Skip to content

Commit

Permalink
feat(sql): improve interval expression, support shortened version (#4182
Browse files Browse the repository at this point in the history
)

* feat(sql): improve interval expression, support shortened version

* fix(sql): remove accidental change of sqlness assertion

* fix(sql): address CR feedback, add more tests

* chore(sql): add more tests
  • Loading branch information
etolbakov authored Jun 24, 2024
1 parent 4b42c7b commit cdd4baf
Show file tree
Hide file tree
Showing 5 changed files with 306 additions and 1 deletion.
4 changes: 4 additions & 0 deletions src/sql/src/statements/transform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,17 @@ use sqlparser::ast::{visit_expressions_mut, Expr};

use crate::error::Result;
use crate::statements::statement::Statement;
mod expand_interval;
mod type_alias;

use expand_interval::ExpandIntervalTransformRule;
pub use type_alias::get_data_type_by_alias_name;
use type_alias::TypeAliasTransformRule;

lazy_static! {
/// [TransformRule] registry
static ref RULES: Vec<Arc<dyn TransformRule>> = vec![
Arc::new(ExpandIntervalTransformRule{}),
Arc::new(TypeAliasTransformRule{}),
];
}
Expand Down
249 changes: 249 additions & 0 deletions src/sql/src/statements/transform/expand_interval.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::collections::HashMap;
use std::ops::ControlFlow;

use itertools::Itertools;
use lazy_static::lazy_static;
use regex::Regex;
use sqlparser::ast::{Expr, Interval, Value};

use crate::statements::transform::TransformRule;

lazy_static! {
/// Matches either one or more digits `(\d+)` or one or more non-digits `(\D+)` characters
/// Negative sign before digits is matched optionally
static ref INTERVAL_SHORT_NAME_PATTERN: Regex = Regex::new(r"(-?\d+|\D+)").unwrap();

static ref INTERVAL_SHORT_NAME_MAPPING: HashMap<&'static str, &'static str> = HashMap::from([
("y","years"),
("mon","months"),
("w","weeks"),
("d","days"),
("h","hours"),
("m","minutes"),
("s","seconds"),
("millis","milliseconds"),
("mils","milliseconds"),
("ms","microseconds"),
("us","microseconds"),
("ns","nanoseconds"),
]);
}

/// 'Interval' expression transformer
/// - `y` for `years`
/// - `mon` for `months`
/// - `w` for `weeks`
/// - `d` for `days`
/// - `h` for `hours`
/// - `m` for `minutes`
/// - `s` for `seconds`
/// - `millis` for `milliseconds`
/// - `mils` for `milliseconds`
/// - `ms` for `microseconds`
/// - `us` for `microseconds`
/// - `ns` for `nanoseconds`
/// Required for use cases that use the shortened version of Interval declaration,
/// f.e `select interval '1h'` or `select interval '3w'`
pub(crate) struct ExpandIntervalTransformRule;

impl TransformRule for ExpandIntervalTransformRule {
/// Applies transform rule for `Interval` type by extending the shortened version (e.g. '1h', '2d')
/// In case when `Interval` has `BinaryOp` value (e.g. query like `SELECT INTERVAL '2h' - INTERVAL '1h'`)
/// it's AST has `left` part of type `Value::SingleQuotedString` which needs to be handled specifically.
/// To handle the `right` part which is `Interval` no extra steps are needed.
fn visit_expr(&self, expr: &mut Expr) -> ControlFlow<()> {
if let Expr::Interval(interval) = expr {
match *interval.value.clone() {
Expr::Value(Value::SingleQuotedString(value))
| Expr::Value(Value::DoubleQuotedString(value)) => {
if let Some(data) = expand_interval_name(&value) {
*expr = create_interval_with_expanded_name(
interval,
single_quoted_string_expr(data),
);
}
}
Expr::BinaryOp { left, op, right } => match *left {
Expr::Value(Value::SingleQuotedString(value))
| Expr::Value(Value::DoubleQuotedString(value)) => {
if let Some(data) = expand_interval_name(&value) {
let new_value = Box::new(Expr::BinaryOp {
left: single_quoted_string_expr(data),
op,
right,
});
*expr = create_interval_with_expanded_name(interval, new_value);
}
}
_ => {}
},
_ => {}
}
}
ControlFlow::<()>::Continue(())
}
}

fn single_quoted_string_expr(data: String) -> Box<Expr> {
Box::new(Expr::Value(Value::SingleQuotedString(data)))
}

fn create_interval_with_expanded_name(interval: &Interval, new_value: Box<Expr>) -> Expr {
Expr::Interval(Interval {
value: new_value,
leading_field: interval.leading_field.clone(),
leading_precision: interval.leading_precision,
last_field: interval.last_field.clone(),
fractional_seconds_precision: interval.fractional_seconds_precision,
})
}

/// Expands a shortened interval name to its full name.
/// Returns an interval's full name (e.g., "years", "hours", "minutes") according to `INTERVAL_SHORT_NAME_MAPPING` mapping
/// If the `interval_str` contains whitespaces, the interval name is considered to be in a full form.
/// Hybrid format "1y 2 days 3h" is not supported.
fn expand_interval_name(interval_str: &str) -> Option<String> {
return if !interval_str.contains(|c: char| c.is_whitespace()) {
Some(
INTERVAL_SHORT_NAME_PATTERN
.find_iter(interval_str)
.map(|mat| match INTERVAL_SHORT_NAME_MAPPING.get(mat.as_str()) {
Some(&expanded_name) => expanded_name,
None => mat.as_str(),
})
.join(" "),
)
} else {
None
};
}

#[cfg(test)]
mod tests {
use std::ops::ControlFlow;

use sqlparser::ast::{BinaryOperator, Expr, Interval, Value};

use crate::statements::transform::expand_interval::{
expand_interval_name, single_quoted_string_expr, ExpandIntervalTransformRule,
};
use crate::statements::transform::TransformRule;

#[test]
fn test_transform_interval_conversions() {
let test_cases = vec![
("1y", "1 years"),
("4mon", "4 months"),
("-3w", "-3 weeks"),
("55h", "55 hours"),
("3d", "3 days"),
("5s", "5 seconds"),
("2m", "2 minutes"),
("100millis", "100 milliseconds"),
("150mils", "150 milliseconds"),
("200ms", "200 microseconds"),
("350us", "350 microseconds"),
("400ns", "400 nanoseconds"),
("2y4w1h", "2 years 4 weeks 1 hours"),
];
for (input, expected) in test_cases {
let result = expand_interval_name(input).unwrap();
assert_eq!(result, expected);
}

let test_cases = vec!["1 year 2 months 3 days 4 hours", "-2 months"];
for input in test_cases {
assert_eq!(expand_interval_name(input), None);
}
}

#[test]
fn test_visit_expr_when_interval_is_single_quoted_string_expr() {
let interval_transformation_rule = ExpandIntervalTransformRule {};

let mut string_expr = Expr::Interval(Interval {
value: single_quoted_string_expr("5y".to_string()),
leading_field: None,
leading_precision: None,
last_field: None,
fractional_seconds_precision: None,
});

let control_flow = interval_transformation_rule.visit_expr(&mut string_expr);

assert_eq!(control_flow, ControlFlow::Continue(()));
assert_eq!(
string_expr,
Expr::Interval(Interval {
value: Box::new(Expr::Value(Value::SingleQuotedString(
"5 years".to_string()
))),
leading_field: None,
leading_precision: None,
last_field: None,
fractional_seconds_precision: None,
})
);
}

#[test]
fn test_visit_expr_when_interval_is_binary_op() {
let interval_transformation_rule = ExpandIntervalTransformRule {};

let mut binary_op_expr = Expr::Interval(Interval {
value: Box::new(Expr::BinaryOp {
left: single_quoted_string_expr("2d".to_string()),
op: BinaryOperator::Minus,
right: Box::new(Expr::Interval(Interval {
value: single_quoted_string_expr("1d".to_string()),
leading_field: None,
leading_precision: None,
last_field: None,
fractional_seconds_precision: None,
})),
}),
leading_field: None,
leading_precision: None,
last_field: None,
fractional_seconds_precision: None,
});

let control_flow = interval_transformation_rule.visit_expr(&mut binary_op_expr);

assert_eq!(control_flow, ControlFlow::Continue(()));
assert_eq!(
binary_op_expr,
Expr::Interval(Interval {
value: Box::new(Expr::BinaryOp {
left: single_quoted_string_expr("2 days".to_string()),
op: BinaryOperator::Minus,
right: Box::new(Expr::Interval(Interval {
value: single_quoted_string_expr("1d".to_string()),
leading_field: None,
leading_precision: None,
last_field: None,
fractional_seconds_precision: None,
})),
}),
leading_field: None,
leading_precision: None,
last_field: None,
fractional_seconds_precision: None,
})
);
}
}
2 changes: 1 addition & 1 deletion src/sql/src/statements/transform/type_alias.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ impl TransformRule for TypeAliasTransformRule {

fn replace_type_alias(data_type: &mut DataType) {
match data_type {
// TODO(dennis): The sqlparser latest version contains the Int8 alias for postres Bigint.
// TODO(dennis): The sqlparser latest version contains the Int8 alias for Postgres Bigint.
// Which means 8 bytes in postgres (not 8 bits). If we upgrade the sqlparser, need to process it.
// See https://docs.rs/sqlparser/latest/sqlparser/ast/enum.DataType.html#variant.Int8
DataType::Custom(name, tokens) if name.0.len() == 1 && tokens.is_empty() => {
Expand Down
41 changes: 41 additions & 0 deletions tests/cases/standalone/common/types/interval/interval.result
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,47 @@ SELECT TIMESTAMP '1992-09-20 11:30:00.123456' - interval_value as new_value from
| 1980-09-07T23:17:48.111443988 |
+-------------------------------+

-- Interval shortened names
SELECT INTERVAL '55h';

+--------------------------------------------------------+
| IntervalMonthDayNano("198000000000000") |
+--------------------------------------------------------+
| 0 years 0 mons 0 days 55 hours 0 mins 0.000000000 secs |
+--------------------------------------------------------+

SELECT INTERVAL '-2mon';

+---------------------------------------------------------+
| IntervalMonthDayNano("-158456325028528675187087900672") |
+---------------------------------------------------------+
| 0 years -2 mons 0 days 0 hours 0 mins 0.000000000 secs |
+---------------------------------------------------------+

SELECT INTERVAL '1y2w3d4h';

+---------------------------------------------------------+
| IntervalMonthDayNano("950737950484766714775589781504") |
+---------------------------------------------------------+
| 0 years 12 mons 17 days 4 hours 0 mins 0.000000000 secs |
+---------------------------------------------------------+

SELECT INTERVAL '7 days' - INTERVAL '1d';

+----------------------------------------------------------------------------------------------+
| IntervalMonthDayNano("129127208515966861312") - IntervalMonthDayNano("18446744073709551616") |
+----------------------------------------------------------------------------------------------+
| 0 years 0 mons 6 days 0 hours 0 mins 0.000000000 secs |
+----------------------------------------------------------------------------------------------+

SELECT INTERVAL '2h' + INTERVAL '1h';

+-------------------------------------------------------------------------------+
| IntervalMonthDayNano("7200000000000") + IntervalMonthDayNano("3600000000000") |
+-------------------------------------------------------------------------------+
| 0 years 0 mons 0 days 3 hours 0 mins 0.000000000 secs |
+-------------------------------------------------------------------------------+

-- Interval type does not support aggregation functions.
SELECT MIN(interval_value) from intervals;

Expand Down
11 changes: 11 additions & 0 deletions tests/cases/standalone/common/types/interval/interval.sql
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,17 @@ SELECT TIMESTAMP '1992-09-20 11:30:00.123456' + interval_value as new_value from
-- TIMESTAMP CONSTANT - INTERVAL
SELECT TIMESTAMP '1992-09-20 11:30:00.123456' - interval_value as new_value from intervals;

-- Interval shortened names
SELECT INTERVAL '55h';

SELECT INTERVAL '-2mon';

SELECT INTERVAL '1y2w3d4h';

SELECT INTERVAL '7 days' - INTERVAL '1d';

SELECT INTERVAL '2h' + INTERVAL '1h';


-- Interval type does not support aggregation functions.
SELECT MIN(interval_value) from intervals;
Expand Down

0 comments on commit cdd4baf

Please sign in to comment.