From c964a2f29d3d55400dae3ee522e287c240287a16 Mon Sep 17 00:00:00 2001 From: Seth Paydar <29551413+spaydar@users.noreply.github.com> Date: Thu, 2 Nov 2023 22:32:20 -0700 Subject: [PATCH 1/8] feat: test queries for to_timestamp(float) WIP --- .../pg_compat/pg_compat_timestamps.slt | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 datafusion/sqllogictest/test_files/pg_compat/pg_compat_timestamps.slt diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_timestamps.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_timestamps.slt new file mode 100644 index 000000000000..6ee9ae12182f --- /dev/null +++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_timestamps.slt @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# to_timestamp float inputs +query +SELECT to_timestamp(1.1); + +query +SELECT to_timestamp(-1.1); + +query +SELECT to_timestamp(0.0); + +query +SELECT to_timestamp(1.23456789); + +query +SELECT to_timestamp(123456789.123456789); From 34c72b9d5d753b14be78c048f285c5b8783d77ad Mon Sep 17 00:00:00 2001 From: Seth Paydar <29551413+spaydar@users.noreply.github.com> Date: Mon, 13 Nov 2023 20:51:33 -0800 Subject: [PATCH 2/8] feat: Float64 input for to_timestamp --- datafusion/expr/src/built_in_function.rs | 1 + datafusion/physical-expr/src/functions.rs | 15 +++++++++ .../pg_compat/pg_compat_timestamps.slt | 32 ------------------- .../sqllogictest/test_files/timestamps.slt | 29 +++++++++++++++++ 4 files changed, 45 insertions(+), 32 deletions(-) delete mode 100644 datafusion/sqllogictest/test_files/pg_compat/pg_compat_timestamps.slt diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 4db565abfcf7..a7d00387a55e 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -970,6 +970,7 @@ impl BuiltinScalarFunction { 1, vec![ Int64, + Float64, Timestamp(Nanosecond, None), Timestamp(Microsecond, None), Timestamp(Millisecond, None), diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index 8422862043ae..474294db53bc 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -81,6 +81,21 @@ pub fn create_physical_expr( None, ) }, + Ok(DataType::Float64) => |col_values: &[ColumnarValue]| { + if let ColumnarValue::Scalar(ScalarValue::Float64(Some(float_ts))) = &col_values[0] { + cast_column( + &ColumnarValue::Scalar(ScalarValue::Int64(Some((float_ts * 1_000_000_000 as f64).trunc() as i64))), + &DataType::Timestamp(TimeUnit::Nanosecond, None), + None, + ) + } else { + cast_column( + &col_values[0], + &DataType::Timestamp(TimeUnit::Nanosecond, None), + None, + ) + } + }, Ok(DataType::Timestamp(_, None)) => |col_values: &[ColumnarValue]| { cast_column( &col_values[0], diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_timestamps.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_timestamps.slt deleted file mode 100644 index 6ee9ae12182f..000000000000 --- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_timestamps.slt +++ /dev/null @@ -1,32 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# to_timestamp float inputs -query -SELECT to_timestamp(1.1); - -query -SELECT to_timestamp(-1.1); - -query -SELECT to_timestamp(0.0); - -query -SELECT to_timestamp(1.23456789); - -query -SELECT to_timestamp(123456789.123456789); diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index e186aa12f7a9..bec6280f9fcb 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -291,6 +291,35 @@ SELECT COUNT(*) FROM ts_data_secs where ts > to_timestamp_seconds('2020-09-08T12 ---- 2 + +# to_timestamp float inputs + +query P +SELECT to_timestamp(1.1); +---- +1970-01-01T00:00:01.100 + +query P +SELECT to_timestamp(-1.1); +---- +1969-12-31T23:59:58.900 + +query P +SELECT to_timestamp(0.0); +---- +1970-01-01T00:00:00 + +query P +SELECT to_timestamp(1.23456789); +---- +1970-01-01T00:00:01.234567890 + +query P +SELECT to_timestamp(123456789.123456789); +---- +1973-11-29T21:33:09.123456784 + + # from_unixtime # 1599566400 is '2020-09-08T12:00:00+00:00' From 9256de42c0743778d54282ef55d1f93d1315591c Mon Sep 17 00:00:00 2001 From: Seth Paydar <29551413+spaydar@users.noreply.github.com> Date: Mon, 13 Nov 2023 20:58:52 -0800 Subject: [PATCH 3/8] cargo fmt --- datafusion/physical-expr/src/functions.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index 474294db53bc..afc80941b56a 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -82,9 +82,13 @@ pub fn create_physical_expr( ) }, Ok(DataType::Float64) => |col_values: &[ColumnarValue]| { - if let ColumnarValue::Scalar(ScalarValue::Float64(Some(float_ts))) = &col_values[0] { + if let ColumnarValue::Scalar(ScalarValue::Float64(Some(float_ts))) = + &col_values[0] + { cast_column( - &ColumnarValue::Scalar(ScalarValue::Int64(Some((float_ts * 1_000_000_000 as f64).trunc() as i64))), + &ColumnarValue::Scalar(ScalarValue::Int64(Some( + (float_ts * 1_000_000_000 as f64).trunc() as i64, + ))), &DataType::Timestamp(TimeUnit::Nanosecond, None), None, ) From e489e930971c2b749407f2e7f4bf285d56f41ede Mon Sep 17 00:00:00 2001 From: Seth Paydar <29551413+spaydar@users.noreply.github.com> Date: Mon, 13 Nov 2023 21:18:39 -0800 Subject: [PATCH 4/8] clippy --- datafusion/physical-expr/src/functions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index afc80941b56a..474091840311 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -87,7 +87,7 @@ pub fn create_physical_expr( { cast_column( &ColumnarValue::Scalar(ScalarValue::Int64(Some( - (float_ts * 1_000_000_000 as f64).trunc() as i64, + (float_ts * 1_000_000_000_f64).trunc() as i64, ))), &DataType::Timestamp(TimeUnit::Nanosecond, None), None, From c784ca9afa9b0d70bd97dacaf612fea51159c8f0 Mon Sep 17 00:00:00 2001 From: Seth Paydar <29551413+spaydar@users.noreply.github.com> Date: Mon, 13 Nov 2023 22:13:54 -0800 Subject: [PATCH 5/8] docs: double input type for to_timestamp --- docs/source/user-guide/sql/scalar_functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index b7426baea3da..509f096558ac 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1392,8 +1392,8 @@ extract(field FROM source) ### `to_timestamp` Converts a value to RFC3339 nanosecond timestamp format (`YYYY-MM-DDT00:00:00Z`). -Supports timestamp, integer, and unsigned integer types as input. -Integers and unsigned integers are parsed as Unix second timestamps and +Supports timestamp, integer, unsigned integer, and double types as input. +Integers, unsigned integers, and doubles are parsed as Unix second timestamps and return the corresponding RFC3339 timestamp. ``` From d5c908ccddc311742129cc577526a0940ec4eec5 Mon Sep 17 00:00:00 2001 From: Seth Paydar <29551413+spaydar@users.noreply.github.com> Date: Tue, 28 Nov 2023 21:16:47 -0800 Subject: [PATCH 6/8] feat: cast floats to timestamp --- .../physical-expr/src/datetime_expressions.rs | 21 ++----------- .../physical-expr/src/expressions/cast.rs | 12 ++++++-- .../sqllogictest/test_files/timestamps.slt | 30 +++++++++---------- 3 files changed, 28 insertions(+), 35 deletions(-) diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs index 4e7c27ec1482..f343646b6927 100644 --- a/datafusion/physical-expr/src/datetime_expressions.rs +++ b/datafusion/physical-expr/src/datetime_expressions.rs @@ -968,25 +968,10 @@ pub fn to_timestamp_invoke(args: &[ColumnarValue]) -> Result { match args[0].data_type() { DataType::Int64 => { cast_column(&args[0], &DataType::Timestamp(TimeUnit::Second, None), None) - } + }, DataType::Float64 => { - if let ColumnarValue::Scalar(ScalarValue::Float64(Some(float_ts))) = &args[0] - { - cast_column( - &ColumnarValue::Scalar(ScalarValue::Int64(Some( - (float_ts * 1_000_000_000_f64).trunc() as i64, - ))), - &DataType::Timestamp(TimeUnit::Nanosecond, None), - None, - ) - } else { - cast_column( - &args[0], - &DataType::Timestamp(TimeUnit::Nanosecond, None), - None, - ) - } - } + cast_column(&args[0], &DataType::Timestamp(TimeUnit::Nanosecond, None), None) + }, DataType::Timestamp(_, None) => cast_column( &args[0], &DataType::Timestamp(TimeUnit::Nanosecond, None), diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs index 780e042156b8..a2b745fcda2e 100644 --- a/datafusion/physical-expr/src/expressions/cast.rs +++ b/datafusion/physical-expr/src/expressions/cast.rs @@ -178,7 +178,13 @@ pub fn cast_column( kernels::cast::cast_with_options(array, cast_type, &cast_options)?, )), ColumnarValue::Scalar(scalar) => { - let scalar_array = scalar.to_array()?; + let scalar_array = if let ScalarValue::Float64(Some(float_ts)) = scalar { + ScalarValue::Int64(Some( + (float_ts * 1_000_000_000_f64).trunc() as i64, + )).to_array()? + } else { + scalar.to_array()? + }; let cast_array = kernels::cast::cast_with_options( &scalar_array, cast_type, @@ -203,7 +209,9 @@ pub fn cast_with_options( let expr_type = expr.data_type(input_schema)?; if expr_type == cast_type { Ok(expr.clone()) - } else if can_cast_types(&expr_type, &cast_type) { + } else if can_cast_types(&expr_type, &cast_type) || + (expr_type == DataType::Float64 && cast_type == DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None)) + { Ok(Arc::new(CastExpr::new(expr, cast_type, cast_options))) } else { not_impl_err!("Unsupported CAST from {expr_type:?} to {cast_type:?}") diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index bec6280f9fcb..317b7ed48fd4 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -294,30 +294,30 @@ SELECT COUNT(*) FROM ts_data_secs where ts > to_timestamp_seconds('2020-09-08T12 # to_timestamp float inputs -query P -SELECT to_timestamp(1.1); +query PPP +SELECT to_timestamp(1.1) as c1, cast(1.1 as timestamp) as c2, 1.1::timestamp as c3; ---- -1970-01-01T00:00:01.100 +1970-01-01T00:00:01.100 1970-01-01T00:00:01.100 1970-01-01T00:00:01.100 -query P -SELECT to_timestamp(-1.1); +query PPP +SELECT to_timestamp(-1.1) as c1, cast(-1.1 as timestamp) as c2, (-1.1)::timestamp as c3; ---- -1969-12-31T23:59:58.900 +1969-12-31T23:59:58.900 1969-12-31T23:59:58.900 1969-12-31T23:59:58.900 -query P -SELECT to_timestamp(0.0); +query PPP +SELECT to_timestamp(0.0) as c1, cast(0.0 as timestamp) as c2, 0.0::timestamp as c3; ---- -1970-01-01T00:00:00 +1970-01-01T00:00:00 1970-01-01T00:00:00 1970-01-01T00:00:00 -query P -SELECT to_timestamp(1.23456789); +query PPP +SELECT to_timestamp(1.23456789) as c1, cast(1.23456789 as timestamp) as c2, 1.23456789::timestamp as c3; ---- -1970-01-01T00:00:01.234567890 +1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890 -query P -SELECT to_timestamp(123456789.123456789); +query PPP +SELECT to_timestamp(123456789.123456789) as c1, cast(123456789.123456789 as timestamp) as c2, 123456789.123456789::timestamp as c3; ---- -1973-11-29T21:33:09.123456784 +1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784 # from_unixtime From 340688fe84c822850e07fb15ef1fc1da76434989 Mon Sep 17 00:00:00 2001 From: Seth Paydar <29551413+spaydar@users.noreply.github.com> Date: Tue, 28 Nov 2023 21:20:26 -0800 Subject: [PATCH 7/8] style: cargo fmt --- datafusion/physical-expr/src/datetime_expressions.rs | 10 ++++++---- datafusion/physical-expr/src/expressions/cast.rs | 10 +++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs index f343646b6927..b358887b9f5f 100644 --- a/datafusion/physical-expr/src/datetime_expressions.rs +++ b/datafusion/physical-expr/src/datetime_expressions.rs @@ -968,10 +968,12 @@ pub fn to_timestamp_invoke(args: &[ColumnarValue]) -> Result { match args[0].data_type() { DataType::Int64 => { cast_column(&args[0], &DataType::Timestamp(TimeUnit::Second, None), None) - }, - DataType::Float64 => { - cast_column(&args[0], &DataType::Timestamp(TimeUnit::Nanosecond, None), None) - }, + } + DataType::Float64 => cast_column( + &args[0], + &DataType::Timestamp(TimeUnit::Nanosecond, None), + None, + ), DataType::Timestamp(_, None) => cast_column( &args[0], &DataType::Timestamp(TimeUnit::Nanosecond, None), diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs index a2b745fcda2e..416701be71ae 100644 --- a/datafusion/physical-expr/src/expressions/cast.rs +++ b/datafusion/physical-expr/src/expressions/cast.rs @@ -179,9 +179,8 @@ pub fn cast_column( )), ColumnarValue::Scalar(scalar) => { let scalar_array = if let ScalarValue::Float64(Some(float_ts)) = scalar { - ScalarValue::Int64(Some( - (float_ts * 1_000_000_000_f64).trunc() as i64, - )).to_array()? + ScalarValue::Int64(Some((float_ts * 1_000_000_000_f64).trunc() as i64)) + .to_array()? } else { scalar.to_array()? }; @@ -209,8 +208,9 @@ pub fn cast_with_options( let expr_type = expr.data_type(input_schema)?; if expr_type == cast_type { Ok(expr.clone()) - } else if can_cast_types(&expr_type, &cast_type) || - (expr_type == DataType::Float64 && cast_type == DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None)) + } else if can_cast_types(&expr_type, &cast_type) + || (expr_type == DataType::Float64 + && cast_type == DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None)) { Ok(Arc::new(CastExpr::new(expr, cast_type, cast_options))) } else { From 55ffc5c83cdfb52e89459870082f02b116f69b85 Mon Sep 17 00:00:00 2001 From: Seth Paydar <29551413+spaydar@users.noreply.github.com> Date: Tue, 28 Nov 2023 22:07:46 -0800 Subject: [PATCH 8/8] fix: float64 cast for timestamp nanos only --- datafusion/physical-expr/src/expressions/cast.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs index abb423130e77..b3ca95292a37 100644 --- a/datafusion/physical-expr/src/expressions/cast.rs +++ b/datafusion/physical-expr/src/expressions/cast.rs @@ -176,9 +176,17 @@ pub fn cast_column( kernels::cast::cast_with_options(array, cast_type, &cast_options)?, )), ColumnarValue::Scalar(scalar) => { - let scalar_array = if let ScalarValue::Float64(Some(float_ts)) = scalar { - ScalarValue::Int64(Some((float_ts * 1_000_000_000_f64).trunc() as i64)) + let scalar_array = if cast_type + == &DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None) + { + if let ScalarValue::Float64(Some(float_ts)) = scalar { + ScalarValue::Int64( + Some((float_ts * 1_000_000_000_f64).trunc() as i64), + ) .to_array()? + } else { + scalar.to_array()? + } } else { scalar.to_array()? };