diff --git a/datafusion-examples/examples/advanced_udf.rs b/datafusion-examples/examples/advanced_udf.rs index 9a3ee9c8ebcd..aee3be6c9285 100644 --- a/datafusion-examples/examples/advanced_udf.rs +++ b/datafusion-examples/examples/advanced_udf.rs @@ -91,7 +91,11 @@ impl ScalarUDFImpl for PowUdf { /// /// However, it also means the implementation is more complex than when /// using `create_udf`. - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { // DataFusion has arranged for the correct inputs to be passed to this // function, but we check again to make sure assert_eq!(args.len(), 2); diff --git a/datafusion-examples/examples/function_factory.rs b/datafusion-examples/examples/function_factory.rs index b42f25437d77..b2771149aae5 100644 --- a/datafusion-examples/examples/function_factory.rs +++ b/datafusion-examples/examples/function_factory.rs @@ -26,7 +26,9 @@ use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{exec_err, internal_err, DataFusionError}; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; -use datafusion_expr::{CreateFunction, Expr, ScalarUDF, ScalarUDFImpl, Signature}; +use datafusion_expr::{ + CreateFunction, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, +}; /// This example shows how to utilize [FunctionFactory] to implement simple /// SQL-macro like functions using a `CREATE FUNCTION` statement. The same @@ -132,9 +134,9 @@ impl ScalarUDFImpl for ScalarFunctionWrapper { Ok(self.return_type.clone()) } - fn invoke( + fn invoke_with_args( &self, - _args: &[datafusion_expr::ColumnarValue], + _args: ScalarFunctionArgs, ) -> Result { // Since this function is always simplified to another expression, it // should never actually be invoked diff --git a/datafusion-examples/examples/optimizer_rule.rs b/datafusion-examples/examples/optimizer_rule.rs index e0b552620a9a..0f28a1670252 100644 --- a/datafusion-examples/examples/optimizer_rule.rs +++ b/datafusion-examples/examples/optimizer_rule.rs @@ -205,7 +205,11 @@ impl ScalarUDFImpl for MyEq { Ok(DataType::Boolean) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { // this example simply returns "true" which is not what a real // implementation would do. Ok(ColumnarValue::Scalar(ScalarValue::from(true))) diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs index 2c2ff6d48aec..3ac40bfb62ea 100644 --- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs @@ -1382,7 +1382,11 @@ mod tests { Ok(DataType::Int32) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { unimplemented!("DummyUDF::invoke") } } diff --git a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs index 262f68079f3f..881949047bff 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs @@ -581,7 +581,11 @@ impl ScalarUDFImpl for TestScalarUDF { Ok(input[0].sort_properties) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; let arr: ArrayRef = match args[0].data_type() { diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index cf403e5d640f..a59394f90814 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -520,10 +520,6 @@ impl ScalarUDFImpl for AddIndexToStringVolatileScalarUDF { Ok(self.return_type.clone()) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { - not_impl_err!("index_with_offset function does not accept arguments") - } - fn invoke_batch( &self, args: &[ColumnarValue], @@ -720,7 +716,11 @@ impl ScalarUDFImpl for CastToI64UDF { Ok(ExprSimplifyResult::Simplified(new_expr)) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { unimplemented!("Function should have been simplified prior to evaluation") } } @@ -848,7 +848,11 @@ impl ScalarUDFImpl for TakeUDF { } // The actual implementation - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let take_idx = match &args[2] { ColumnarValue::Scalar(ScalarValue::Int64(Some(v))) if v < &2 => *v as usize, _ => unreachable!(), @@ -956,7 +960,11 @@ impl ScalarUDFImpl for ScalarFunctionWrapper { Ok(self.return_type.clone()) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { internal_err!("This function should not get invoked!") } @@ -1240,7 +1248,11 @@ impl ScalarUDFImpl for MyRegexUdf { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args { [ColumnarValue::Scalar(ScalarValue::Utf8(value))] => { Ok(ColumnarValue::Scalar(ScalarValue::Boolean( diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 8490c08a70bb..b442a94bdccf 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2389,7 +2389,7 @@ mod test { use crate::expr_fn::col; use crate::{ case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue, - ScalarUDF, ScalarUDFImpl, Volatility, + ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility, }; use sqlparser::ast; use sqlparser::ast::{Ident, IdentWithAlias}; @@ -2518,7 +2518,10 @@ mod test { Ok(DataType::Utf8) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_with_args( + &self, + _args: ScalarFunctionArgs, + ) -> Result { Ok(ColumnarValue::Scalar(ScalarValue::from("a"))) } } diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 7fd4e64e0e62..60cd3f911e13 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -462,7 +462,11 @@ impl ScalarUDFImpl for SimpleScalarUDF { Ok(self.return_type.clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { (self.fun)(args) } } diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 57b8d9c6b02e..3c4a98445a2e 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -213,13 +213,11 @@ impl ScalarUDF { self.inner.is_nullable(args, schema) } - #[deprecated(since = "43.0.0", note = "Use `invoke_with_args` instead")] pub fn invoke_batch( &self, args: &[ColumnarValue], number_rows: usize, ) -> Result { - #[allow(deprecated)] self.inner.invoke_batch(args, number_rows) } @@ -545,7 +543,6 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments /// to arrays, which will likely be simpler code, but be slower. fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - #[allow(deprecated)] self.invoke_batch(args.args, args.number_rows) } diff --git a/datafusion/functions-nested/benches/map.rs b/datafusion/functions-nested/benches/map.rs index 3c4a09c65992..0f1d9ed50636 100644 --- a/datafusion/functions-nested/benches/map.rs +++ b/datafusion/functions-nested/benches/map.rs @@ -96,9 +96,9 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args map_udf() - .invoke(&[keys.clone(), values.clone()]) + .invoke_batch(&[keys.clone(), values.clone()], 1) .expect("map should work on valid values"), ); }); diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index fe1d05199e80..d9eefae7ff46 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -98,7 +98,11 @@ impl ScalarUDFImpl for ArrayHas { Ok(DataType::Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match &args[1] { ColumnarValue::Array(array_needle) => { // the needle is already an array, convert the haystack to an array of the same length @@ -322,7 +326,11 @@ impl ScalarUDFImpl for ArrayHasAll { Ok(DataType::Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_has_all_inner)(args) } @@ -403,7 +411,11 @@ impl ScalarUDFImpl for ArrayHasAny { Ok(DataType::Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_has_any_inner)(args) } diff --git a/datafusion/functions-nested/src/cardinality.rs b/datafusion/functions-nested/src/cardinality.rs index b6661e0807f4..5f7c5ef2d9d1 100644 --- a/datafusion/functions-nested/src/cardinality.rs +++ b/datafusion/functions-nested/src/cardinality.rs @@ -83,7 +83,11 @@ impl ScalarUDFImpl for Cardinality { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(cardinality_inner)(args) } diff --git a/datafusion/functions-nested/src/concat.rs b/datafusion/functions-nested/src/concat.rs index 4aa6bb5da9b2..1895f5c94479 100644 --- a/datafusion/functions-nested/src/concat.rs +++ b/datafusion/functions-nested/src/concat.rs @@ -86,7 +86,11 @@ impl ScalarUDFImpl for ArrayAppend { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_append_inner)(args) } @@ -182,7 +186,11 @@ impl ScalarUDFImpl for ArrayPrepend { Ok(arg_types[1].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_prepend_inner)(args) } @@ -302,7 +310,11 @@ impl ScalarUDFImpl for ArrayConcat { Ok(expr_type) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_concat_inner)(args) } diff --git a/datafusion/functions-nested/src/dimension.rs b/datafusion/functions-nested/src/dimension.rs index 7df0ed2b40bd..d91484cece44 100644 --- a/datafusion/functions-nested/src/dimension.rs +++ b/datafusion/functions-nested/src/dimension.rs @@ -81,7 +81,11 @@ impl ScalarUDFImpl for ArrayDims { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_dims_inner)(args) } @@ -166,7 +170,11 @@ impl ScalarUDFImpl for ArrayNdims { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_ndims_inner)(args) } diff --git a/datafusion/functions-nested/src/distance.rs b/datafusion/functions-nested/src/distance.rs index 4f890e4166e9..2f8eeba6477e 100644 --- a/datafusion/functions-nested/src/distance.rs +++ b/datafusion/functions-nested/src/distance.rs @@ -96,7 +96,11 @@ impl ScalarUDFImpl for ArrayDistance { Ok(result) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_distance_inner)(args) } diff --git a/datafusion/functions-nested/src/empty.rs b/datafusion/functions-nested/src/empty.rs index 5d310eb23952..ccdae97ad9e2 100644 --- a/datafusion/functions-nested/src/empty.rs +++ b/datafusion/functions-nested/src/empty.rs @@ -73,7 +73,11 @@ impl ScalarUDFImpl for ArrayEmpty { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_empty_inner)(args) } diff --git a/datafusion/functions-nested/src/except.rs b/datafusion/functions-nested/src/except.rs index 100fb587d642..41d93f3a0b24 100644 --- a/datafusion/functions-nested/src/except.rs +++ b/datafusion/functions-nested/src/except.rs @@ -73,7 +73,11 @@ impl ScalarUDFImpl for ArrayExcept { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_except_inner)(args) } diff --git a/datafusion/functions-nested/src/extract.rs b/datafusion/functions-nested/src/extract.rs index 275095832edb..f2e39f2a6213 100644 --- a/datafusion/functions-nested/src/extract.rs +++ b/datafusion/functions-nested/src/extract.rs @@ -143,7 +143,11 @@ impl ScalarUDFImpl for ArrayElement { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_element_inner)(args) } @@ -347,7 +351,11 @@ impl ScalarUDFImpl for ArraySlice { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_slice_inner)(args) } @@ -656,7 +664,11 @@ impl ScalarUDFImpl for ArrayPopFront { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_pop_front_inner)(args) } @@ -762,7 +774,11 @@ impl ScalarUDFImpl for ArrayPopBack { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_pop_back_inner)(args) } @@ -877,7 +893,11 @@ impl ScalarUDFImpl for ArrayAnyValue { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_any_value_inner)(args) } fn aliases(&self) -> &[String] { diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs index 4fe631517b09..5b405a8a30b3 100644 --- a/datafusion/functions-nested/src/flatten.rs +++ b/datafusion/functions-nested/src/flatten.rs @@ -91,7 +91,11 @@ impl ScalarUDFImpl for Flatten { Ok(data_type) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(flatten_inner)(args) } diff --git a/datafusion/functions-nested/src/length.rs b/datafusion/functions-nested/src/length.rs index 3e039f286421..179906d661f1 100644 --- a/datafusion/functions-nested/src/length.rs +++ b/datafusion/functions-nested/src/length.rs @@ -77,7 +77,11 @@ impl ScalarUDFImpl for ArrayLength { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_length_inner)(args) } diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs index 73aad10a8e26..728ce060009b 100644 --- a/datafusion/functions-nested/src/map.rs +++ b/datafusion/functions-nested/src/map.rs @@ -238,7 +238,11 @@ impl ScalarUDFImpl for MapFunc { )) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_map_batch(args) } diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index d2bb6595fe76..9efcd563051e 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -85,7 +85,11 @@ impl ScalarUDFImpl for MapExtract { )))) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(map_extract_inner)(args) } diff --git a/datafusion/functions-nested/src/map_keys.rs b/datafusion/functions-nested/src/map_keys.rs index 03e381e372f6..e418918ea164 100644 --- a/datafusion/functions-nested/src/map_keys.rs +++ b/datafusion/functions-nested/src/map_keys.rs @@ -79,7 +79,11 @@ impl ScalarUDFImpl for MapKeysFunc { )))) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(map_keys_inner)(args) } diff --git a/datafusion/functions-nested/src/map_values.rs b/datafusion/functions-nested/src/map_values.rs index dc7d9c9db8ee..62df9def0a13 100644 --- a/datafusion/functions-nested/src/map_values.rs +++ b/datafusion/functions-nested/src/map_values.rs @@ -79,7 +79,11 @@ impl ScalarUDFImpl for MapValuesFunc { )))) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(map_values_inner)(args) } diff --git a/datafusion/functions-nested/src/position.rs b/datafusion/functions-nested/src/position.rs index adb45141601d..9fb9e75cdec1 100644 --- a/datafusion/functions-nested/src/position.rs +++ b/datafusion/functions-nested/src/position.rs @@ -82,7 +82,11 @@ impl ScalarUDFImpl for ArrayPosition { Ok(UInt64) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_position_inner)(args) } @@ -253,7 +257,11 @@ impl ScalarUDFImpl for ArrayPositions { Ok(List(Arc::new(Field::new("item", UInt64, true)))) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_positions_inner)(args) } diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs index ddc56b1e4ee8..cf741b9f1e78 100644 --- a/datafusion/functions-nested/src/range.rs +++ b/datafusion/functions-nested/src/range.rs @@ -117,7 +117,11 @@ impl ScalarUDFImpl for Range { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.iter().any(|arg| arg.data_type().is_null()) { return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1)))); } @@ -255,7 +259,11 @@ impl ScalarUDFImpl for GenSeries { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.iter().any(|arg| arg.data_type().is_null()) { return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1)))); } diff --git a/datafusion/functions-nested/src/remove.rs b/datafusion/functions-nested/src/remove.rs index dc1ed4833c67..cc111a2ffe6d 100644 --- a/datafusion/functions-nested/src/remove.rs +++ b/datafusion/functions-nested/src/remove.rs @@ -74,7 +74,11 @@ impl ScalarUDFImpl for ArrayRemove { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_remove_inner)(args) } @@ -160,7 +164,11 @@ impl ScalarUDFImpl for ArrayRemoveN { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_remove_n_inner)(args) } @@ -248,7 +256,11 @@ impl ScalarUDFImpl for ArrayRemoveAll { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_remove_all_inner)(args) } diff --git a/datafusion/functions-nested/src/repeat.rs b/datafusion/functions-nested/src/repeat.rs index 55584c143a54..c02508485865 100644 --- a/datafusion/functions-nested/src/repeat.rs +++ b/datafusion/functions-nested/src/repeat.rs @@ -79,7 +79,11 @@ impl ScalarUDFImpl for ArrayRepeat { )))) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_repeat_inner)(args) } diff --git a/datafusion/functions-nested/src/replace.rs b/datafusion/functions-nested/src/replace.rs index 1d0a1d1f2815..d5272d9a0b4f 100644 --- a/datafusion/functions-nested/src/replace.rs +++ b/datafusion/functions-nested/src/replace.rs @@ -90,7 +90,11 @@ impl ScalarUDFImpl for ArrayReplace { Ok(args[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_replace_inner)(args) } @@ -172,7 +176,11 @@ impl ScalarUDFImpl for ArrayReplaceN { Ok(args[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_replace_n_inner)(args) } @@ -256,7 +264,11 @@ impl ScalarUDFImpl for ArrayReplaceAll { Ok(args[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_replace_all_inner)(args) } diff --git a/datafusion/functions-nested/src/resize.rs b/datafusion/functions-nested/src/resize.rs index b0255e7be2a3..88329e452b00 100644 --- a/datafusion/functions-nested/src/resize.rs +++ b/datafusion/functions-nested/src/resize.rs @@ -80,7 +80,11 @@ impl ScalarUDFImpl for ArrayResize { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_resize_inner)(args) } diff --git a/datafusion/functions-nested/src/reverse.rs b/datafusion/functions-nested/src/reverse.rs index 1ecf7f848468..46d4a01959c1 100644 --- a/datafusion/functions-nested/src/reverse.rs +++ b/datafusion/functions-nested/src/reverse.rs @@ -72,7 +72,11 @@ impl ScalarUDFImpl for ArrayReverse { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_reverse_inner)(args) } diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index ce8d248319fe..79a10ff8c352 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -98,7 +98,11 @@ impl ScalarUDFImpl for ArrayUnion { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_union_inner)(args) } @@ -186,7 +190,11 @@ impl ScalarUDFImpl for ArrayIntersect { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_intersect_inner)(args) } @@ -282,7 +290,11 @@ impl ScalarUDFImpl for ArrayDistinct { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_distinct_inner)(args) } diff --git a/datafusion/functions-nested/src/sort.rs b/datafusion/functions-nested/src/sort.rs index b29c187f0679..f8db54910a4c 100644 --- a/datafusion/functions-nested/src/sort.rs +++ b/datafusion/functions-nested/src/sort.rs @@ -86,7 +86,11 @@ impl ScalarUDFImpl for ArraySort { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_sort_inner)(args) } diff --git a/datafusion/functions-nested/src/string.rs b/datafusion/functions-nested/src/string.rs index da4ab2bed49a..78cd96614a96 100644 --- a/datafusion/functions-nested/src/string.rs +++ b/datafusion/functions-nested/src/string.rs @@ -159,7 +159,11 @@ impl ScalarUDFImpl for ArrayToString { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(array_to_string_inner)(args) } @@ -260,7 +264,11 @@ impl ScalarUDFImpl for StringToArray { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { Utf8 | Utf8View => make_scalar_function(string_to_array_inner::)(args), LargeUtf8 => make_scalar_function(string_to_array_inner::)(args), diff --git a/datafusion/functions/benches/character_length.rs b/datafusion/functions/benches/character_length.rs index 9ba16807de01..b3fdb8dc8561 100644 --- a/datafusion/functions/benches/character_length.rs +++ b/datafusion/functions/benches/character_length.rs @@ -86,8 +86,8 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("character_length_StringArray_ascii_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(character_length.invoke(&args_string_ascii)) + // TODO use invoke_with_args + black_box(character_length.invoke_batch(&args_string_ascii, n_rows)) }) }, ); @@ -98,8 +98,8 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("character_length_StringArray_utf8_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(character_length.invoke(&args_string_utf8)) + // TODO use invoke_with_args + black_box(character_length.invoke_batch(&args_string_utf8, n_rows)) }) }, ); @@ -110,8 +110,10 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("character_length_StringViewArray_ascii_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(character_length.invoke(&args_string_view_ascii)) + // TODO use invoke_with_args + black_box( + character_length.invoke_batch(&args_string_view_ascii, n_rows), + ) }) }, ); @@ -122,8 +124,10 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("character_length_StringViewArray_utf8_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(character_length.invoke(&args_string_view_utf8)) + // TODO use invoke_with_args + black_box( + character_length.invoke_batch(&args_string_view_utf8, n_rows), + ) }) }, ); diff --git a/datafusion/functions/benches/concat.rs b/datafusion/functions/benches/concat.rs index 280819778f93..0f287ab36dad 100644 --- a/datafusion/functions/benches/concat.rs +++ b/datafusion/functions/benches/concat.rs @@ -39,8 +39,8 @@ fn criterion_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("concat function"); group.bench_function(BenchmarkId::new("concat", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - criterion::black_box(concat().invoke(&args).unwrap()) + // TODO use invoke_with_args + criterion::black_box(concat().invoke_batch(&args, size).unwrap()) }) }); group.finish(); diff --git a/datafusion/functions/benches/cot.rs b/datafusion/functions/benches/cot.rs index a33f00b4b73e..bb0585a2de9b 100644 --- a/datafusion/functions/benches/cot.rs +++ b/datafusion/functions/benches/cot.rs @@ -34,16 +34,16 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("cot f32 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(cot_fn.invoke(&f32_args).unwrap()) + // TODO use invoke_with_args + black_box(cot_fn.invoke_batch(&f32_args, size).unwrap()) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("cot f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(cot_fn.invoke(&f64_args).unwrap()) + // TODO use invoke_with_args + black_box(cot_fn.invoke_batch(&f64_args, size).unwrap()) }) }); } diff --git a/datafusion/functions/benches/date_bin.rs b/datafusion/functions/benches/date_bin.rs index 4a8682c42f94..aa7c7710617d 100644 --- a/datafusion/functions/benches/date_bin.rs +++ b/datafusion/functions/benches/date_bin.rs @@ -19,7 +19,7 @@ extern crate criterion; use std::sync::Arc; -use arrow::array::{ArrayRef, TimestampSecondArray}; +use arrow::array::{Array, ArrayRef, TimestampSecondArray}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use datafusion_common::ScalarValue; use rand::rngs::ThreadRng; @@ -40,14 +40,16 @@ fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray { fn criterion_benchmark(c: &mut Criterion) { c.bench_function("date_bin_1000", |b| { let mut rng = rand::thread_rng(); + let timestamps_array = Arc::new(timestamps(&mut rng)) as ArrayRef; + let batch_len = timestamps_array.len(); let interval = ColumnarValue::Scalar(ScalarValue::new_interval_dt(0, 1_000_000)); - let timestamps = ColumnarValue::Array(Arc::new(timestamps(&mut rng)) as ArrayRef); + let timestamps = ColumnarValue::Array(timestamps_array); let udf = date_bin(); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( - udf.invoke(&[interval.clone(), timestamps.clone()]) + udf.invoke_batch(&[interval.clone(), timestamps.clone()], batch_len) .expect("date_bin should work on valid values"), ) }) diff --git a/datafusion/functions/benches/encoding.rs b/datafusion/functions/benches/encoding.rs index 0615091e90d4..e37842a62b4a 100644 --- a/datafusion/functions/benches/encoding.rs +++ b/datafusion/functions/benches/encoding.rs @@ -29,29 +29,35 @@ fn criterion_benchmark(c: &mut Criterion) { let str_array = Arc::new(create_string_array_with_len::(size, 0.2, 32)); c.bench_function(&format!("base64_decode/{size}"), |b| { let method = ColumnarValue::Scalar("base64".into()); - #[allow(deprecated)] // TODO use invoke_batch + // TODO: use invoke_with_args let encoded = encoding::encode() - .invoke(&[ColumnarValue::Array(str_array.clone()), method.clone()]) + .invoke_batch( + &[ColumnarValue::Array(str_array.clone()), method.clone()], + size, + ) .unwrap(); let args = vec![encoded, method]; b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(decode.invoke(&args).unwrap()) + // TODO use invoke_with_args + black_box(decode.invoke_batch(&args, size).unwrap()) }) }); c.bench_function(&format!("hex_decode/{size}"), |b| { let method = ColumnarValue::Scalar("hex".into()); - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args let encoded = encoding::encode() - .invoke(&[ColumnarValue::Array(str_array.clone()), method.clone()]) + .invoke_batch( + &[ColumnarValue::Array(str_array.clone()), method.clone()], + size, + ) .unwrap(); let args = vec![encoded, method]; b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(decode.invoke(&args).unwrap()) + // TODO use invoke_with_args + black_box(decode.invoke_batch(&args, size).unwrap()) }) }); } diff --git a/datafusion/functions/benches/isnan.rs b/datafusion/functions/benches/isnan.rs index 3e50de658b36..605a520715f4 100644 --- a/datafusion/functions/benches/isnan.rs +++ b/datafusion/functions/benches/isnan.rs @@ -33,16 +33,16 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("isnan f32 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(isnan.invoke(&f32_args).unwrap()) + // TODO use invoke_with_args + black_box(isnan.invoke_batch(&f32_args, size).unwrap()) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("isnan f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(isnan.invoke(&f64_args).unwrap()) + // TODO use invoke_with_args + black_box(isnan.invoke_batch(&f64_args, size).unwrap()) }) }); } diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs index 3e6ac97063ca..48fb6fbed9c3 100644 --- a/datafusion/functions/benches/iszero.rs +++ b/datafusion/functions/benches/iszero.rs @@ -30,19 +30,21 @@ fn criterion_benchmark(c: &mut Criterion) { let iszero = iszero(); for size in [1024, 4096, 8192] { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); + let batch_len = f32_array.len(); let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("iszero f32 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(iszero.invoke(&f32_args).unwrap()) + // TODO use invoke_with_args + black_box(iszero.invoke_batch(&f32_args, batch_len).unwrap()) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); + let batch_len = f64_array.len(); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("iszero f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(iszero.invoke(&f64_args).unwrap()) + // TODO use invoke_with_args + black_box(iszero.invoke_batch(&f64_args, batch_len).unwrap()) }) }); } diff --git a/datafusion/functions/benches/lower.rs b/datafusion/functions/benches/lower.rs index 6cc67791464f..114ac4a16fe5 100644 --- a/datafusion/functions/benches/lower.rs +++ b/datafusion/functions/benches/lower.rs @@ -125,8 +125,8 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args1(size, 32); c.bench_function(&format!("lower_all_values_are_ascii: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(lower.invoke(&args)) + // TODO use invoke_with_args + black_box(lower.invoke_batch(&args, size)) }) }); @@ -135,8 +135,8 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("lower_the_first_value_is_nonascii: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(lower.invoke(&args)) + // TODO use invoke_with_args + black_box(lower.invoke_batch(&args, size)) }) }, ); @@ -146,8 +146,8 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("lower_the_middle_value_is_nonascii: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(lower.invoke(&args)) + // TODO use invoke_with_args + black_box(lower.invoke_batch(&args, size)) }) }, ); @@ -167,8 +167,8 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}", size, str_len, null_density, mixed), |b| b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(lower.invoke(&args)) + // TODO use invoke_with_args + black_box(lower.invoke_batch(&args, size)) }), ); @@ -177,8 +177,8 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}", size, str_len, null_density, mixed), |b| b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(lower.invoke(&args)) + // TODO use invoke_with_args + black_box(lower.invoke_batch(&args, size)) }), ); @@ -187,8 +187,8 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("lower_some_values_are_nonascii_string_views: size: {}, str_len: {}, non_ascii_density: {}, null_density: {}, mixed: {}", size, str_len, 0.1, null_density, mixed), |b| b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(lower.invoke(&args)) + // TODO use invoke_with_args + black_box(lower.invoke_batch(&args, size)) }), ); } diff --git a/datafusion/functions/benches/ltrim.rs b/datafusion/functions/benches/ltrim.rs index 4f94729b6fef..fed455eeac91 100644 --- a/datafusion/functions/benches/ltrim.rs +++ b/datafusion/functions/benches/ltrim.rs @@ -141,8 +141,8 @@ fn run_with_string_type( ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(ltrim.invoke(&args)) + // TODO use invoke_with_args + black_box(ltrim.invoke_batch(&args, size)) }) }, ); diff --git a/datafusion/functions/benches/make_date.rs b/datafusion/functions/benches/make_date.rs index a9844e4b2541..d9309bcd3db2 100644 --- a/datafusion/functions/benches/make_date.rs +++ b/datafusion/functions/benches/make_date.rs @@ -19,7 +19,7 @@ extern crate criterion; use std::sync::Arc; -use arrow::array::{ArrayRef, Int32Array}; +use arrow::array::{Array, ArrayRef, Int32Array}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::rngs::ThreadRng; use rand::Rng; @@ -57,15 +57,20 @@ fn days(rng: &mut ThreadRng) -> Int32Array { fn criterion_benchmark(c: &mut Criterion) { c.bench_function("make_date_col_col_col_1000", |b| { let mut rng = rand::thread_rng(); - let years = ColumnarValue::Array(Arc::new(years(&mut rng)) as ArrayRef); + let years_array = Arc::new(years(&mut rng)) as ArrayRef; + let batch_len = years_array.len(); + let years = ColumnarValue::Array(years_array); let months = ColumnarValue::Array(Arc::new(months(&mut rng)) as ArrayRef); let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( make_date() - .invoke(&[years.clone(), months.clone(), days.clone()]) + .invoke_batch( + &[years.clone(), months.clone(), days.clone()], + batch_len, + ) .expect("make_date should work on valid values"), ) }) @@ -74,14 +79,19 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("make_date_scalar_col_col_1000", |b| { let mut rng = rand::thread_rng(); let year = ColumnarValue::Scalar(ScalarValue::Int32(Some(2025))); - let months = ColumnarValue::Array(Arc::new(months(&mut rng)) as ArrayRef); + let months_arr = Arc::new(months(&mut rng)) as ArrayRef; + let batch_len = months_arr.len(); + let months = ColumnarValue::Array(months_arr); let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( make_date() - .invoke(&[year.clone(), months.clone(), days.clone()]) + .invoke_batch( + &[year.clone(), months.clone(), days.clone()], + batch_len, + ) .expect("make_date should work on valid values"), ) }) @@ -91,13 +101,15 @@ fn criterion_benchmark(c: &mut Criterion) { let mut rng = rand::thread_rng(); let year = ColumnarValue::Scalar(ScalarValue::Int32(Some(2025))); let month = ColumnarValue::Scalar(ScalarValue::Int32(Some(11))); - let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); + let day_arr = Arc::new(days(&mut rng)); + let batch_len = day_arr.len(); + let days = ColumnarValue::Array(day_arr); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( make_date() - .invoke(&[year.clone(), month.clone(), days.clone()]) + .invoke_batch(&[year.clone(), month.clone(), days.clone()], batch_len) .expect("make_date should work on valid values"), ) }) @@ -109,10 +121,10 @@ fn criterion_benchmark(c: &mut Criterion) { let day = ColumnarValue::Scalar(ScalarValue::Int32(Some(26))); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( make_date() - .invoke(&[year.clone(), month.clone(), day.clone()]) + .invoke_batch(&[year.clone(), month.clone(), day.clone()], 1) .expect("make_date should work on valid values"), ) }) diff --git a/datafusion/functions/benches/nullif.rs b/datafusion/functions/benches/nullif.rs index 6e1154cf182a..e29fd03aa819 100644 --- a/datafusion/functions/benches/nullif.rs +++ b/datafusion/functions/benches/nullif.rs @@ -34,8 +34,8 @@ fn criterion_benchmark(c: &mut Criterion) { ]; c.bench_function(&format!("nullif scalar array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(nullif.invoke(&args).unwrap()) + // TODO use invoke_with_args + black_box(nullif.invoke_batch(&args, size).unwrap()) }) }); } diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs index 4b21ca373047..6f267b350a35 100644 --- a/datafusion/functions/benches/pad.rs +++ b/datafusion/functions/benches/pad.rs @@ -102,24 +102,24 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("utf8 type", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - criterion::black_box(lpad().invoke(&args).unwrap()) + // TODO use invoke_with_args + criterion::black_box(lpad().invoke_batch(&args, size).unwrap()) }) }); let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - criterion::black_box(lpad().invoke(&args).unwrap()) + // TODO use invoke_with_args + criterion::black_box(lpad().invoke_batch(&args, size).unwrap()) }) }); let args = create_args::(size, 32, true); group.bench_function(BenchmarkId::new("stringview type", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - criterion::black_box(lpad().invoke(&args).unwrap()) + // TODO use invoke_with_args + criterion::black_box(lpad().invoke_batch(&args, size).unwrap()) }) }); @@ -130,16 +130,16 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("utf8 type", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - criterion::black_box(rpad().invoke(&args).unwrap()) + // TODO use invoke_with_args + criterion::black_box(rpad().invoke_batch(&args, size).unwrap()) }) }); let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - criterion::black_box(rpad().invoke(&args).unwrap()) + // TODO use invoke_with_args + criterion::black_box(rpad().invoke_batch(&args, size).unwrap()) }) }); @@ -147,8 +147,8 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args::(size, 32, true); group.bench_function(BenchmarkId::new("stringview type", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - criterion::black_box(rpad().invoke(&args).unwrap()) + // TODO use invoke_with_args + criterion::black_box(rpad().invoke_batch(&args, size).unwrap()) }) }); diff --git a/datafusion/functions/benches/repeat.rs b/datafusion/functions/benches/repeat.rs index 6e54c92b9b26..e7e3c634ea82 100644 --- a/datafusion/functions/benches/repeat.rs +++ b/datafusion/functions/benches/repeat.rs @@ -73,8 +73,8 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(repeat.invoke(&args)) + // TODO use invoke_with_args + black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, ); @@ -87,8 +87,8 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(repeat.invoke(&args)) + // TODO use invoke_with_args + black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, ); @@ -101,8 +101,8 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(repeat.invoke(&args)) + // TODO use invoke_with_args + black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, ); @@ -124,8 +124,8 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(repeat.invoke(&args)) + // TODO use invoke_with_args + black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, ); @@ -138,8 +138,8 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(repeat.invoke(&args)) + // TODO use invoke_with_args + black_box(repeat.invoke_batch(&args, size)) }) }, ); @@ -152,8 +152,8 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(repeat.invoke(&args)) + // TODO use invoke_with_args + black_box(repeat.invoke_batch(&args, repeat_times as usize)) }) }, ); diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs index ea1f5433df4e..a51b2ebe5ab7 100644 --- a/datafusion/functions/benches/signum.rs +++ b/datafusion/functions/benches/signum.rs @@ -30,19 +30,22 @@ fn criterion_benchmark(c: &mut Criterion) { let signum = signum(); for size in [1024, 4096, 8192] { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); + let batch_len = f32_array.len(); let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("signum f32 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(signum.invoke(&f32_args).unwrap()) + // TODO use invoke_with_args + black_box(signum.invoke_batch(&f32_args, batch_len).unwrap()) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); + let batch_len = f64_array.len(); + let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("signum f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(signum.invoke(&f64_args).unwrap()) + // TODO use invoke_with_args + black_box(signum.invoke_batch(&f64_args, batch_len).unwrap()) }) }); } diff --git a/datafusion/functions/benches/strpos.rs b/datafusion/functions/benches/strpos.rs index 31ca61e34c3a..f4962380dfbf 100644 --- a/datafusion/functions/benches/strpos.rs +++ b/datafusion/functions/benches/strpos.rs @@ -114,8 +114,8 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("strpos_StringArray_ascii_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(strpos.invoke(&args_string_ascii)) + // TODO use invoke_with_args + black_box(strpos.invoke_batch(&args_string_ascii, n_rows)) }) }, ); @@ -126,8 +126,8 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("strpos_StringArray_utf8_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(strpos.invoke(&args_string_utf8)) + // TODO use invoke_with_args + black_box(strpos.invoke_batch(&args_string_utf8, n_rows)) }) }, ); @@ -138,8 +138,8 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("strpos_StringViewArray_ascii_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(strpos.invoke(&args_string_view_ascii)) + // TODO use invoke_with_args + black_box(strpos.invoke_batch(&args_string_view_ascii, n_rows)) }) }, ); @@ -150,8 +150,8 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("strpos_StringViewArray_utf8_str_len_{}", str_len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(strpos.invoke(&args_string_view_utf8)) + // TODO use invoke_with_args + black_box(strpos.invoke_batch(&args_string_view_utf8, n_rows)) }) }, ); diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs index 21020dad31a4..8b8e8dbc4279 100644 --- a/datafusion/functions/benches/substr.rs +++ b/datafusion/functions/benches/substr.rs @@ -109,8 +109,8 @@ fn criterion_benchmark(c: &mut Criterion) { format!("substr_string_view [size={}, strlen={}]", size, len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + // TODO use invoke_with_args + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -120,8 +120,8 @@ fn criterion_benchmark(c: &mut Criterion) { format!("substr_string [size={}, strlen={}]", size, len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + // TODO use invoke_with_args + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -131,8 +131,8 @@ fn criterion_benchmark(c: &mut Criterion) { format!("substr_large_string [size={}, strlen={}]", size, len), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + // TODO use invoke_with_args + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -154,8 +154,8 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + // TODO use invoke_with_args + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -168,8 +168,8 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + // TODO use invoke_with_args + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -182,8 +182,8 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + // TODO use invoke_with_args + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -205,8 +205,8 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + // TODO use invoke_with_args + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -219,8 +219,8 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + // TODO use invoke_with_args + black_box(substr.invoke_batch(&args, size)) }) }, ); @@ -233,8 +233,8 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(substr.invoke(&args)) + // TODO use invoke_with_args + black_box(substr.invoke_batch(&args, size)) }) }, ); diff --git a/datafusion/functions/benches/substr_index.rs b/datafusion/functions/benches/substr_index.rs index 1e793cf4db8c..1ea8e2606f0d 100644 --- a/datafusion/functions/benches/substr_index.rs +++ b/datafusion/functions/benches/substr_index.rs @@ -84,16 +84,17 @@ fn data() -> (StringArray, StringArray, Int64Array) { fn criterion_benchmark(c: &mut Criterion) { c.bench_function("substr_index_array_array_1000", |b| { let (strings, delimiters, counts) = data(); + let batch_len = counts.len(); let strings = ColumnarValue::Array(Arc::new(strings) as ArrayRef); let delimiters = ColumnarValue::Array(Arc::new(delimiters) as ArrayRef); let counts = ColumnarValue::Array(Arc::new(counts) as ArrayRef); let args = [strings, delimiters, counts]; b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + #[allow(deprecated)] // TODO: invoke_with_args black_box( substr_index() - .invoke(&args) + .invoke_batch(&args, batch_len) .expect("substr_index should work on valid values"), ) }) diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs index 09032fdf2de1..72eae45b1e1b 100644 --- a/datafusion/functions/benches/to_char.rs +++ b/datafusion/functions/benches/to_char.rs @@ -82,14 +82,16 @@ fn patterns(rng: &mut ThreadRng) -> StringArray { fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_char_array_array_1000", |b| { let mut rng = rand::thread_rng(); - let data = ColumnarValue::Array(Arc::new(data(&mut rng)) as ArrayRef); + let data_arr = data(&mut rng); + let batch_len = data_arr.len(); + let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef); let patterns = ColumnarValue::Array(Arc::new(patterns(&mut rng)) as ArrayRef); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_char() - .invoke(&[data.clone(), patterns.clone()]) + .invoke_batch(&[data.clone(), patterns.clone()], batch_len) .expect("to_char should work on valid values"), ) }) @@ -97,15 +99,17 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_char_array_scalar_1000", |b| { let mut rng = rand::thread_rng(); - let data = ColumnarValue::Array(Arc::new(data(&mut rng)) as ArrayRef); + let data_arr = data(&mut rng); + let batch_len = data_arr.len(); + let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef); let patterns = ColumnarValue::Scalar(ScalarValue::Utf8(Some("%Y-%m-%d".to_string()))); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_char() - .invoke(&[data.clone(), patterns.clone()]) + .invoke_batch(&[data.clone(), patterns.clone()], batch_len) .expect("to_char should work on valid values"), ) }) @@ -126,10 +130,10 @@ fn criterion_benchmark(c: &mut Criterion) { ))); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_char() - .invoke(&[data.clone(), pattern.clone()]) + .invoke_batch(&[data.clone(), pattern.clone()], 1) .expect("to_char should work on valid values"), ) }) diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs index 11816fe9c64f..9f5f6661f998 100644 --- a/datafusion/functions/benches/to_timestamp.rs +++ b/datafusion/functions/benches/to_timestamp.rs @@ -20,7 +20,7 @@ extern crate criterion; use std::sync::Arc; use arrow::array::builder::StringBuilder; -use arrow::array::{ArrayRef, StringArray}; +use arrow::array::{Array, ArrayRef, StringArray}; use arrow::compute::cast; use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; @@ -110,13 +110,15 @@ fn data_with_formats() -> (StringArray, StringArray, StringArray, StringArray) { } fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_timestamp_no_formats_utf8", |b| { - let string_array = ColumnarValue::Array(Arc::new(data()) as ArrayRef); + let arr_data = data(); + let batch_len = arr_data.len(); + let string_array = ColumnarValue::Array(Arc::new(arr_data) as ArrayRef); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_timestamp() - .invoke(&[string_array.clone()]) + .invoke_batch(&[string_array.clone()], batch_len) .expect("to_timestamp should work on valid values"), ) }) @@ -124,13 +126,14 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_timestamp_no_formats_largeutf8", |b| { let data = cast(&data(), &DataType::LargeUtf8).unwrap(); + let batch_len = data.len(); let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_timestamp() - .invoke(&[string_array.clone()]) + .invoke_batch(&[string_array.clone()], batch_len) .expect("to_timestamp should work on valid values"), ) }) @@ -138,13 +141,14 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_timestamp_no_formats_utf8view", |b| { let data = cast(&data(), &DataType::Utf8View).unwrap(); + let batch_len = data.len(); let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_timestamp() - .invoke(&[string_array.clone()]) + .invoke_batch(&[string_array.clone()], batch_len) .expect("to_timestamp should work on valid values"), ) }) @@ -152,6 +156,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_timestamp_with_formats_utf8", |b| { let (inputs, format1, format2, format3) = data_with_formats(); + let batch_len = inputs.len(); let args = [ ColumnarValue::Array(Arc::new(inputs) as ArrayRef), @@ -160,10 +165,10 @@ fn criterion_benchmark(c: &mut Criterion) { ColumnarValue::Array(Arc::new(format3) as ArrayRef), ]; b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_timestamp() - .invoke(&args.clone()) + .invoke_batch(&args.clone(), batch_len) .expect("to_timestamp should work on valid values"), ) }) @@ -171,6 +176,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_timestamp_with_formats_largeutf8", |b| { let (inputs, format1, format2, format3) = data_with_formats(); + let batch_len = inputs.len(); let args = [ ColumnarValue::Array( @@ -187,10 +193,10 @@ fn criterion_benchmark(c: &mut Criterion) { ), ]; b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_timestamp() - .invoke(&args.clone()) + .invoke_batch(&args.clone(), batch_len) .expect("to_timestamp should work on valid values"), ) }) @@ -199,6 +205,8 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_timestamp_with_formats_utf8view", |b| { let (inputs, format1, format2, format3) = data_with_formats(); + let batch_len = inputs.len(); + let args = [ ColumnarValue::Array( Arc::new(cast(&inputs, &DataType::Utf8View).unwrap()) as ArrayRef @@ -214,10 +222,10 @@ fn criterion_benchmark(c: &mut Criterion) { ), ]; b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch + // TODO use invoke_with_args black_box( to_timestamp() - .invoke(&args.clone()) + .invoke_batch(&args.clone(), batch_len) .expect("to_timestamp should work on valid values"), ) }) diff --git a/datafusion/functions/benches/trunc.rs b/datafusion/functions/benches/trunc.rs index 07ce522eb913..83d5b761e809 100644 --- a/datafusion/functions/benches/trunc.rs +++ b/datafusion/functions/benches/trunc.rs @@ -34,16 +34,16 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("trunc f32 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(trunc.invoke(&f32_args).unwrap()) + // TODO use invoke_with_args + black_box(trunc.invoke_batch(&f32_args, size).unwrap()) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("trunc f64 array: {}", size), |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(trunc.invoke(&f64_args).unwrap()) + // TODO use invoke_with_args + black_box(trunc.invoke_batch(&f64_args, size).unwrap()) }) }); } diff --git a/datafusion/functions/benches/upper.rs b/datafusion/functions/benches/upper.rs index ac4ecacff941..9b41a15b11c7 100644 --- a/datafusion/functions/benches/upper.rs +++ b/datafusion/functions/benches/upper.rs @@ -38,8 +38,8 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args(size, 32); c.bench_function("upper_all_values_are_ascii", |b| { b.iter(|| { - #[allow(deprecated)] // TODO use invoke_batch - black_box(upper.invoke(&args)) + // TODO use invoke_with_args + black_box(upper.invoke_batch(&args, size)) }) }); } diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs index a3e3feaa17e3..37a811f55494 100644 --- a/datafusion/functions/src/core/arrow_cast.rs +++ b/datafusion/functions/src/core/arrow_cast.rs @@ -104,7 +104,11 @@ impl ScalarUDFImpl for ArrowCastFunc { data_type_from_args(args) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { internal_err!("arrow_cast should have been simplified to cast") } diff --git a/datafusion/functions/src/core/arrowtypeof.rs b/datafusion/functions/src/core/arrowtypeof.rs index a425aff6caad..f6351e71e277 100644 --- a/datafusion/functions/src/core/arrowtypeof.rs +++ b/datafusion/functions/src/core/arrowtypeof.rs @@ -58,7 +58,11 @@ impl ScalarUDFImpl for ArrowTypeOfFunc { Ok(DataType::Utf8) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 1 { return exec_err!( "arrow_typeof function requires 1 arguments, got {}", diff --git a/datafusion/functions/src/core/coalesce.rs b/datafusion/functions/src/core/coalesce.rs index a05f3f08232c..da9742ef66de 100644 --- a/datafusion/functions/src/core/coalesce.rs +++ b/datafusion/functions/src/core/coalesce.rs @@ -74,7 +74,11 @@ impl ScalarUDFImpl for CoalesceFunc { } /// coalesce evaluates to the first value which is not NULL - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { // do not accept 0 arguments. if args.is_empty() { return exec_err!( diff --git a/datafusion/functions/src/core/getfield.rs b/datafusion/functions/src/core/getfield.rs index c0af4d35966b..aa3b15a46b50 100644 --- a/datafusion/functions/src/core/getfield.rs +++ b/datafusion/functions/src/core/getfield.rs @@ -160,7 +160,11 @@ impl ScalarUDFImpl for GetFieldFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 2 { return exec_err!( "get_field function requires 2 arguments, got {}", diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs index 0211ed3fe691..a6452b32289a 100644 --- a/datafusion/functions/src/core/named_struct.rs +++ b/datafusion/functions/src/core/named_struct.rs @@ -158,7 +158,11 @@ impl ScalarUDFImpl for NamedStructFunc { Ok(DataType::Struct(Fields::from(return_fields))) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { named_struct_expr(args) } diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs index 05af8d3f589e..8327bc1e708d 100644 --- a/datafusion/functions/src/core/nullif.rs +++ b/datafusion/functions/src/core/nullif.rs @@ -75,7 +75,11 @@ impl ScalarUDFImpl for NullIfFunc { Ok(arg_types[0].to_owned()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { nullif_func(args) } diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs index 24b6f5fc14fe..612772ec0eef 100644 --- a/datafusion/functions/src/core/nvl.rs +++ b/datafusion/functions/src/core/nvl.rs @@ -88,7 +88,11 @@ impl ScalarUDFImpl for NVLFunc { Ok(arg_types[0].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { nvl_func(args) } diff --git a/datafusion/functions/src/core/nvl2.rs b/datafusion/functions/src/core/nvl2.rs index cfcdb4480787..de35d3ee6409 100644 --- a/datafusion/functions/src/core/nvl2.rs +++ b/datafusion/functions/src/core/nvl2.rs @@ -63,7 +63,11 @@ impl ScalarUDFImpl for NVL2Func { Ok(arg_types[1].clone()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { nvl2_func(args) } diff --git a/datafusion/functions/src/core/struct.rs b/datafusion/functions/src/core/struct.rs index 75d1d4eca698..d68d5570cdec 100644 --- a/datafusion/functions/src/core/struct.rs +++ b/datafusion/functions/src/core/struct.rs @@ -101,7 +101,11 @@ impl ScalarUDFImpl for StructFunc { Ok(DataType::Struct(Fields::from(return_fields))) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { struct_expr(args) } diff --git a/datafusion/functions/src/crypto/digest.rs b/datafusion/functions/src/crypto/digest.rs index f738c6e3e40f..5e5c1e68584b 100644 --- a/datafusion/functions/src/crypto/digest.rs +++ b/datafusion/functions/src/crypto/digest.rs @@ -69,7 +69,11 @@ impl ScalarUDFImpl for DigestFunc { fn return_type(&self, arg_types: &[DataType]) -> Result { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { digest(args) } diff --git a/datafusion/functions/src/crypto/md5.rs b/datafusion/functions/src/crypto/md5.rs index 0e8ff1cd3192..1ccd55e342dc 100644 --- a/datafusion/functions/src/crypto/md5.rs +++ b/datafusion/functions/src/crypto/md5.rs @@ -85,7 +85,11 @@ impl ScalarUDFImpl for Md5Func { } }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { md5(args) } diff --git a/datafusion/functions/src/crypto/sha224.rs b/datafusion/functions/src/crypto/sha224.rs index f0bfcb9fab3b..fa0b677e0124 100644 --- a/datafusion/functions/src/crypto/sha224.rs +++ b/datafusion/functions/src/crypto/sha224.rs @@ -91,7 +91,11 @@ impl ScalarUDFImpl for SHA224Func { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { sha224(args) } diff --git a/datafusion/functions/src/crypto/sha256.rs b/datafusion/functions/src/crypto/sha256.rs index 0a0044f72206..39012f8663c4 100644 --- a/datafusion/functions/src/crypto/sha256.rs +++ b/datafusion/functions/src/crypto/sha256.rs @@ -65,7 +65,11 @@ impl ScalarUDFImpl for SHA256Func { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { sha256(args) } diff --git a/datafusion/functions/src/crypto/sha384.rs b/datafusion/functions/src/crypto/sha384.rs index 7f8220e5f9d5..4939b32570d7 100644 --- a/datafusion/functions/src/crypto/sha384.rs +++ b/datafusion/functions/src/crypto/sha384.rs @@ -65,7 +65,11 @@ impl ScalarUDFImpl for SHA384Func { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { sha384(args) } diff --git a/datafusion/functions/src/crypto/sha512.rs b/datafusion/functions/src/crypto/sha512.rs index d2d51bfa53ab..0aeedfd591cf 100644 --- a/datafusion/functions/src/crypto/sha512.rs +++ b/datafusion/functions/src/crypto/sha512.rs @@ -65,7 +65,11 @@ impl ScalarUDFImpl for SHA512Func { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { sha512(args) } diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs index 3b819c470d1e..49f7cf968c3b 100644 --- a/datafusion/functions/src/datetime/current_date.rs +++ b/datafusion/functions/src/datetime/current_date.rs @@ -73,7 +73,11 @@ impl ScalarUDFImpl for CurrentDateFunc { Ok(Date32) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { internal_err!( "invoke should not be called on a simplified current_date() function" ) diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs index ca591f922305..57270752d1c0 100644 --- a/datafusion/functions/src/datetime/current_time.rs +++ b/datafusion/functions/src/datetime/current_time.rs @@ -70,7 +70,11 @@ impl ScalarUDFImpl for CurrentTimeFunc { Ok(Time64(Nanosecond)) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { internal_err!( "invoke should not be called on a simplified current_time() function" ) diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index 671967a89325..760585559b2a 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -133,7 +133,11 @@ impl ScalarUDFImpl for DateBinFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() == 2 { // Default to unix EPOCH let origin = ColumnarValue::Scalar(ScalarValue::TimestampNanosecond( @@ -515,6 +519,7 @@ mod tests { use chrono::TimeDelta; #[test] + #[allow(deprecated)] // TODO migrate UDF invoke from invoke_batch fn test_date_bin() { let res = DateBinFunc::new().invoke_batch( &[ @@ -532,7 +537,7 @@ mod tests { assert!(res.is_ok()); let timestamps = Arc::new((1..6).map(Some).collect::()); - let batch_size = timestamps.len(); + let batch_len = timestamps.len(); let res = DateBinFunc::new().invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::IntervalDayTime(Some( @@ -544,7 +549,7 @@ mod tests { ColumnarValue::Array(timestamps), ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)), ], - batch_size, + batch_len, ); assert!(res.is_ok()); @@ -720,14 +725,13 @@ mod tests { }) .collect::(), ); - let batch_size = intervals.len(); let res = DateBinFunc::new().invoke_batch( &[ ColumnarValue::Array(intervals), ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)), ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)), ], - batch_size, + 1, ); assert_eq!( res.err().unwrap().strip_backtrace(), @@ -736,7 +740,7 @@ mod tests { // unsupported array type for origin let timestamps = Arc::new((1..6).map(Some).collect::()); - let batch_size = timestamps.len(); + let batch_len = timestamps.len(); let res = DateBinFunc::new().invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::IntervalDayTime(Some( @@ -748,7 +752,7 @@ mod tests { ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)), ColumnarValue::Array(timestamps), ], - batch_size, + batch_len, ); assert_eq!( res.err().unwrap().strip_backtrace(), @@ -864,7 +868,8 @@ mod tests { .map(|s| Some(string_to_timestamp_nanos(s).unwrap())) .collect::() .with_timezone_opt(tz_opt.clone()); - let batch_size = input.len(); + let batch_len = input.len(); + #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch let result = DateBinFunc::new() .invoke_batch( &[ @@ -875,7 +880,7 @@ mod tests { tz_opt.clone(), )), ], - batch_size, + batch_len, ) .unwrap(); if let ColumnarValue::Array(result) = result { diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index 98c6b659a824..2fbc1952718e 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -165,7 +165,11 @@ impl ScalarUDFImpl for DatePartFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 2 { return exec_err!("Expected two arguments in DATE_PART"); } diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index 5ec308ef9c81..36c7432f3834 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -137,7 +137,11 @@ impl ScalarUDFImpl for DateTruncFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let (granularity, array) = (&args[0], &args[1]); let granularity = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) = @@ -724,14 +728,15 @@ mod tests { .map(|s| Some(string_to_timestamp_nanos(s).unwrap())) .collect::() .with_timezone_opt(tz_opt.clone()); - let batch_size = input.len(); + let batch_len = input.len(); + #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch let result = DateTruncFunc::new() .invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::from("day")), ColumnarValue::Array(Arc::new(input)), ], - batch_size, + batch_len, ) .unwrap(); if let ColumnarValue::Array(result) = result { @@ -886,14 +891,15 @@ mod tests { .map(|s| Some(string_to_timestamp_nanos(s).unwrap())) .collect::() .with_timezone_opt(tz_opt.clone()); - let batch_size = input.len(); + let batch_len = input.len(); + #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch let result = DateTruncFunc::new() .invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::from("hour")), ColumnarValue::Array(Arc::new(input)), ], - batch_size, + batch_len, ) .unwrap(); if let ColumnarValue::Array(result) = result { diff --git a/datafusion/functions/src/datetime/from_unixtime.rs b/datafusion/functions/src/datetime/from_unixtime.rs index 29b2f29b14c2..177f437f2e19 100644 --- a/datafusion/functions/src/datetime/from_unixtime.rs +++ b/datafusion/functions/src/datetime/from_unixtime.rs @@ -88,7 +88,11 @@ impl ScalarUDFImpl for FromUnixtimeFunc { internal_err!("call return_type_from_exprs instead") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let len = args.len(); if len != 1 && len != 2 { return exec_err!( @@ -162,8 +166,8 @@ mod test { fn test_without_timezone() { let args = [ColumnarValue::Scalar(Int64(Some(1729900800)))]; - #[allow(deprecated)] // TODO use invoke_batch - let result = FromUnixtimeFunc::new().invoke(&args).unwrap(); + // TODO use invoke_with_args + let result = FromUnixtimeFunc::new().invoke_batch(&args, 1).unwrap(); match result { ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec), None)) => { @@ -182,8 +186,8 @@ mod test { ))), ]; - #[allow(deprecated)] // TODO use invoke_batch - let result = FromUnixtimeFunc::new().invoke(&args).unwrap(); + // TODO use invoke_with_args + let result = FromUnixtimeFunc::new().invoke_batch(&args, 2).unwrap(); match result { ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec), Some(tz))) => { diff --git a/datafusion/functions/src/datetime/make_date.rs b/datafusion/functions/src/datetime/make_date.rs index a13511f33398..9019200b715d 100644 --- a/datafusion/functions/src/datetime/make_date.rs +++ b/datafusion/functions/src/datetime/make_date.rs @@ -72,7 +72,11 @@ impl ScalarUDFImpl for MakeDateFunc { Ok(Date32) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 3 { return exec_err!( "make_date function requires 3 arguments, got {}", @@ -234,6 +238,7 @@ mod tests { #[test] fn test_make_date() { + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new() .invoke_batch( &[ @@ -251,6 +256,7 @@ mod tests { panic!("Expected a scalar value") } + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new() .invoke_batch( &[ @@ -268,6 +274,7 @@ mod tests { panic!("Expected a scalar value") } + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new() .invoke_batch( &[ @@ -288,7 +295,8 @@ mod tests { let years = Arc::new((2021..2025).map(Some).collect::()); let months = Arc::new((1..5).map(Some).collect::()); let days = Arc::new((11..15).map(Some).collect::()); - let batch_size = years.len(); + let batch_len = years.len(); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new() .invoke_batch( &[ @@ -296,7 +304,7 @@ mod tests { ColumnarValue::Array(months), ColumnarValue::Array(days), ], - batch_size, + batch_len, ) .expect("that make_date parsed values without error"); @@ -317,6 +325,7 @@ mod tests { // // invalid number of arguments + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new() .invoke_batch(&[ColumnarValue::Scalar(ScalarValue::Int32(Some(1)))], 1); assert_eq!( @@ -325,6 +334,7 @@ mod tests { ); // invalid type + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new().invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::IntervalYearMonth(Some(1))), @@ -339,6 +349,7 @@ mod tests { ); // overflow of month + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new().invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::Int32(Some(2023))), @@ -353,6 +364,7 @@ mod tests { ); // overflow of day + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let res = MakeDateFunc::new().invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::Int32(Some(2023))), diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs index cadc4fce04f1..3e79d43546e1 100644 --- a/datafusion/functions/src/datetime/now.rs +++ b/datafusion/functions/src/datetime/now.rs @@ -72,7 +72,11 @@ impl ScalarUDFImpl for NowFunc { Ok(Timestamp(Nanosecond, Some("+00:00".into()))) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { internal_err!("invoke should not be called on a simplified now() function") } diff --git a/datafusion/functions/src/datetime/to_char.rs b/datafusion/functions/src/datetime/to_char.rs index dd4ae7b8464e..a6a1e606c116 100644 --- a/datafusion/functions/src/datetime/to_char.rs +++ b/datafusion/functions/src/datetime/to_char.rs @@ -107,7 +107,11 @@ impl ScalarUDFImpl for ToCharFunc { Ok(Utf8) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 2 { return exec_err!( "to_char function requires 2 arguments, got {}", @@ -384,6 +388,7 @@ mod tests { ]; for (value, format, expected) in scalar_data { + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let result = ToCharFunc::new() .invoke_batch( &[ColumnarValue::Scalar(value), ColumnarValue::Scalar(format)], @@ -461,14 +466,15 @@ mod tests { ]; for (value, format, expected) in scalar_array_data { - let batch_size = format.len(); + let batch_len = format.len(); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let result = ToCharFunc::new() .invoke_batch( &[ ColumnarValue::Scalar(value), ColumnarValue::Array(Arc::new(format) as ArrayRef), ], - batch_size, + batch_len, ) .expect("that to_char parsed values without error"); @@ -590,14 +596,15 @@ mod tests { ]; for (value, format, expected) in array_scalar_data { - let batch_size = value.len(); + let batch_len = value.len(); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let result = ToCharFunc::new() .invoke_batch( &[ ColumnarValue::Array(value as ArrayRef), ColumnarValue::Scalar(format), ], - batch_size, + batch_len, ) .expect("that to_char parsed values without error"); @@ -610,14 +617,15 @@ mod tests { } for (value, format, expected) in array_array_data { - let batch_size = value.len(); + let batch_len = value.len(); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let result = ToCharFunc::new() .invoke_batch( &[ ColumnarValue::Array(value), ColumnarValue::Array(Arc::new(format) as ArrayRef), ], - batch_size, + batch_len, ) .expect("that to_char parsed values without error"); @@ -634,6 +642,7 @@ mod tests { // // invalid number of arguments + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let result = ToCharFunc::new() .invoke_batch(&[ColumnarValue::Scalar(ScalarValue::Int32(Some(1)))], 1); assert_eq!( @@ -642,6 +651,7 @@ mod tests { ); // invalid type + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let result = ToCharFunc::new().invoke_batch( &[ ColumnarValue::Scalar(ScalarValue::Int32(Some(1))), diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs index ff322ce31960..77dbcade56df 100644 --- a/datafusion/functions/src/datetime/to_date.rs +++ b/datafusion/functions/src/datetime/to_date.rs @@ -140,7 +140,11 @@ impl ScalarUDFImpl for ToDateFunc { Ok(Date32) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.is_empty() { return exec_err!("to_date function requires 1 or more arguments, got 0"); } @@ -213,6 +217,7 @@ mod tests { } fn test_scalar(sv: ScalarValue, tc: &TestCase) { + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new().invoke_batch(&[ColumnarValue::Scalar(sv)], 1); @@ -234,9 +239,10 @@ mod tests { A: From> + Array + 'static, { let date_array = A::from(vec![tc.date_str]); - let batch_size = date_array.len(); + let batch_len = date_array.len(); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new() - .invoke_batch(&[ColumnarValue::Array(Arc::new(date_array))], batch_size); + .invoke_batch(&[ColumnarValue::Array(Arc::new(date_array))], batch_len); match to_date_result { Ok(ColumnarValue::Array(a)) => { @@ -325,6 +331,7 @@ mod tests { fn test_scalar(sv: ScalarValue, tc: &TestCase) { let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string())); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new().invoke_batch( &[ ColumnarValue::Scalar(sv), @@ -351,14 +358,15 @@ mod tests { { let date_array = A::from(vec![tc.formatted_date]); let format_array = A::from(vec![tc.format_str]); + let batch_len = date_array.len(); - let batch_size = date_array.len(); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new().invoke_batch( &[ ColumnarValue::Array(Arc::new(date_array)), ColumnarValue::Array(Arc::new(format_array)), ], - batch_size, + batch_len, ); match to_date_result { @@ -391,6 +399,7 @@ mod tests { let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into())); let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into())); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new().invoke_batch( &[ ColumnarValue::Scalar(formatted_date_scalar), @@ -422,6 +431,7 @@ mod tests { for date_str in test_cases { let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into())); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new() .invoke_batch(&[ColumnarValue::Scalar(formatted_date_scalar)], 1); @@ -440,6 +450,7 @@ mod tests { let date_str = "20241231"; let date_scalar = ScalarValue::Utf8(Some(date_str.into())); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new().invoke_batch(&[ColumnarValue::Scalar(date_scalar)], 1); @@ -461,6 +472,7 @@ mod tests { let date_str = "202412311"; let date_scalar = ScalarValue::Utf8(Some(date_str.into())); + #[allow(deprecated)] // TODO migrate UDF to invoke from invoke_batch let to_date_result = ToDateFunc::new().invoke_batch(&[ColumnarValue::Scalar(date_scalar)], 1); diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs index 5048b8fd47ec..3ee316c28cee 100644 --- a/datafusion/functions/src/datetime/to_local_time.rs +++ b/datafusion/functions/src/datetime/to_local_time.rs @@ -320,7 +320,11 @@ impl ScalarUDFImpl for ToLocalTimeFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 1 { return exec_err!( "to_local_time function requires 1 argument, got {:?}", diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs index 78a7bf505dac..a0cb35cc28c7 100644 --- a/datafusion/functions/src/datetime/to_timestamp.rs +++ b/datafusion/functions/src/datetime/to_timestamp.rs @@ -150,7 +150,11 @@ impl ScalarUDFImpl for ToTimestampFunc { Ok(return_type_for(&arg_types[0], Nanosecond)) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.is_empty() { return exec_err!( "to_timestamp function requires 1 or more arguments, got {}", @@ -247,7 +251,11 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc { Ok(return_type_for(&arg_types[0], Second)) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.is_empty() { return exec_err!( "to_timestamp_seconds function requires 1 or more arguments, got {}", @@ -335,7 +343,11 @@ impl ScalarUDFImpl for ToTimestampMillisFunc { Ok(return_type_for(&arg_types[0], Millisecond)) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.is_empty() { return exec_err!( "to_timestamp_millis function requires 1 or more arguments, got {}", @@ -425,7 +437,11 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc { Ok(return_type_for(&arg_types[0], Microsecond)) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.is_empty() { return exec_err!( "to_timestamp_micros function requires 1 or more arguments, got {}", @@ -515,7 +531,11 @@ impl ScalarUDFImpl for ToTimestampNanosFunc { Ok(return_type_for(&arg_types[0], Nanosecond)) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.is_empty() { return exec_err!( "to_timestamp_nanos function requires 1 or more arguments, got {}", diff --git a/datafusion/functions/src/encoding/inner.rs b/datafusion/functions/src/encoding/inner.rs index 0649c7cbb5c0..b8219d6c18db 100644 --- a/datafusion/functions/src/encoding/inner.rs +++ b/datafusion/functions/src/encoding/inner.rs @@ -90,7 +90,11 @@ impl ScalarUDFImpl for EncodeFunc { Ok(arg_types[0].to_owned()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { encode(args) } @@ -177,7 +181,11 @@ impl ScalarUDFImpl for DecodeFunc { Ok(arg_types[0].to_owned()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { decode(args) } diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs index 9bc038e71edc..bc6685589091 100644 --- a/datafusion/functions/src/macros.rs +++ b/datafusion/functions/src/macros.rs @@ -208,7 +208,11 @@ macro_rules! make_math_unary_udf { $EVALUATE_BOUNDS(inputs) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; let arr: ArrayRef = match args[0].data_type() { DataType::Float64 => Arc::new( @@ -316,7 +320,11 @@ macro_rules! make_math_binary_udf { $OUTPUT_ORDERING(input) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; let arr: ArrayRef = match args[0].data_type() { DataType::Float64 => { diff --git a/datafusion/functions/src/math/abs.rs b/datafusion/functions/src/math/abs.rs index 798939162a63..70796ba22063 100644 --- a/datafusion/functions/src/math/abs.rs +++ b/datafusion/functions/src/math/abs.rs @@ -160,7 +160,11 @@ impl ScalarUDFImpl for AbsFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; if args.len() != 1 { diff --git a/datafusion/functions/src/math/cot.rs b/datafusion/functions/src/math/cot.rs index eded50a20d8d..2daea09172ae 100644 --- a/datafusion/functions/src/math/cot.rs +++ b/datafusion/functions/src/math/cot.rs @@ -95,7 +95,11 @@ impl ScalarUDFImpl for CotFunc { Some(get_cot_doc()) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(cot, vec![])(args) } } diff --git a/datafusion/functions/src/math/factorial.rs b/datafusion/functions/src/math/factorial.rs index bacdf47524f4..9f72c2bcff67 100644 --- a/datafusion/functions/src/math/factorial.rs +++ b/datafusion/functions/src/math/factorial.rs @@ -68,7 +68,11 @@ impl ScalarUDFImpl for FactorialFunc { Ok(Int64) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(factorial, vec![])(args) } diff --git a/datafusion/functions/src/math/gcd.rs b/datafusion/functions/src/math/gcd.rs index f4edef3acca3..3579dd2c6d46 100644 --- a/datafusion/functions/src/math/gcd.rs +++ b/datafusion/functions/src/math/gcd.rs @@ -68,7 +68,11 @@ impl ScalarUDFImpl for GcdFunc { Ok(Int64) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(gcd, vec![])(args) } diff --git a/datafusion/functions/src/math/iszero.rs b/datafusion/functions/src/math/iszero.rs index 7e5d4fe77ffa..7da926ef07da 100644 --- a/datafusion/functions/src/math/iszero.rs +++ b/datafusion/functions/src/math/iszero.rs @@ -71,7 +71,11 @@ impl ScalarUDFImpl for IsZeroFunc { Ok(Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(iszero, vec![])(args) } diff --git a/datafusion/functions/src/math/lcm.rs b/datafusion/functions/src/math/lcm.rs index 64b07ce606f2..1f3c19c09ffa 100644 --- a/datafusion/functions/src/math/lcm.rs +++ b/datafusion/functions/src/math/lcm.rs @@ -69,7 +69,11 @@ impl ScalarUDFImpl for LcmFunc { Ok(Int64) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(lcm, vec![])(args) } diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 14b6dc3e054e..3c134bded4ef 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -125,7 +125,11 @@ impl ScalarUDFImpl for LogFunc { } // Support overloaded log(base, x) and log(x) which defaults to log(10, x) - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; let mut base = ColumnarValue::Scalar(ScalarValue::Float32(Some(10.0))); diff --git a/datafusion/functions/src/math/nans.rs b/datafusion/functions/src/math/nans.rs index c1dd1aacc35a..68a73772f964 100644 --- a/datafusion/functions/src/math/nans.rs +++ b/datafusion/functions/src/math/nans.rs @@ -69,7 +69,11 @@ impl ScalarUDFImpl for IsNanFunc { Ok(DataType::Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; let arr: ArrayRef = match args[0].data_type() { diff --git a/datafusion/functions/src/math/nanvl.rs b/datafusion/functions/src/math/nanvl.rs index cfd21256dd96..5a4ab91f37fd 100644 --- a/datafusion/functions/src/math/nanvl.rs +++ b/datafusion/functions/src/math/nanvl.rs @@ -73,7 +73,11 @@ impl ScalarUDFImpl for NanvlFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(nanvl, vec![])(args) } diff --git a/datafusion/functions/src/math/pi.rs b/datafusion/functions/src/math/pi.rs index 70cc76f03c58..a3bc0a930a24 100644 --- a/datafusion/functions/src/math/pi.rs +++ b/datafusion/functions/src/math/pi.rs @@ -20,7 +20,7 @@ use std::sync::OnceLock; use arrow::datatypes::DataType; use arrow::datatypes::DataType::Float64; -use datafusion_common::{internal_err, not_impl_err, Result, ScalarValue}; +use datafusion_common::{internal_err, Result, ScalarValue}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::{ @@ -63,10 +63,6 @@ impl ScalarUDFImpl for PiFunc { Ok(Float64) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { - not_impl_err!("{} function does not accept arguments", self.name()) - } - fn invoke_batch( &self, args: &[ColumnarValue], diff --git a/datafusion/functions/src/math/power.rs b/datafusion/functions/src/math/power.rs index acf5f84df92b..da2b5779d110 100644 --- a/datafusion/functions/src/math/power.rs +++ b/datafusion/functions/src/math/power.rs @@ -84,7 +84,11 @@ impl ScalarUDFImpl for PowerFunc { &self.aliases } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; let arr: ArrayRef = match args[0].data_type() { diff --git a/datafusion/functions/src/math/round.rs b/datafusion/functions/src/math/round.rs index 6000e5d765de..6e7e1095e29d 100644 --- a/datafusion/functions/src/math/round.rs +++ b/datafusion/functions/src/math/round.rs @@ -80,7 +80,11 @@ impl ScalarUDFImpl for RoundFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(round, vec![])(args) } diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs index 33ff630f309f..eafec66461be 100644 --- a/datafusion/functions/src/math/signum.rs +++ b/datafusion/functions/src/math/signum.rs @@ -80,7 +80,11 @@ impl ScalarUDFImpl for SignumFunc { Ok(input[0].sort_properties) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(signum, vec![])(args) } diff --git a/datafusion/functions/src/math/trunc.rs b/datafusion/functions/src/math/trunc.rs index 9a05684d238e..df9794898753 100644 --- a/datafusion/functions/src/math/trunc.rs +++ b/datafusion/functions/src/math/trunc.rs @@ -85,7 +85,11 @@ impl ScalarUDFImpl for TruncFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(trunc, vec![])(args) } diff --git a/datafusion/functions/src/regex/regexpcount.rs b/datafusion/functions/src/regex/regexpcount.rs index 819463795b7f..6b35330201e7 100644 --- a/datafusion/functions/src/regex/regexpcount.rs +++ b/datafusion/functions/src/regex/regexpcount.rs @@ -81,7 +81,11 @@ impl ScalarUDFImpl for RegexpCountFunc { Ok(Int64) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let len = args .iter() .fold(Option::::None, |acc, arg| match arg { diff --git a/datafusion/functions/src/regex/regexplike.rs b/datafusion/functions/src/regex/regexplike.rs index 13de7888aa5f..be8f96461ec7 100644 --- a/datafusion/functions/src/regex/regexplike.rs +++ b/datafusion/functions/src/regex/regexplike.rs @@ -135,7 +135,11 @@ impl ScalarUDFImpl for RegexpLikeFunc { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let len = args .iter() .fold(Option::::None, |acc, arg| match arg { diff --git a/datafusion/functions/src/regex/regexpmatch.rs b/datafusion/functions/src/regex/regexpmatch.rs index 019666bd7b2d..fcd9a7891233 100644 --- a/datafusion/functions/src/regex/regexpmatch.rs +++ b/datafusion/functions/src/regex/regexpmatch.rs @@ -83,7 +83,11 @@ impl ScalarUDFImpl for RegexpMatchFunc { other => DataType::List(Arc::new(Field::new("item", other.clone(), true))), }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let len = args .iter() .fold(Option::::None, |acc, arg| match arg { diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index 4d8e5e5fe3e3..1f988fa55dcd 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -106,7 +106,11 @@ impl ScalarUDFImpl for RegexpReplaceFunc { } }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let len = args .iter() .fold(Option::::None, |acc, arg| match arg { diff --git a/datafusion/functions/src/string/ascii.rs b/datafusion/functions/src/string/ascii.rs index b76d70d7e9d2..5422958f493d 100644 --- a/datafusion/functions/src/string/ascii.rs +++ b/datafusion/functions/src/string/ascii.rs @@ -64,7 +64,11 @@ impl ScalarUDFImpl for AsciiFunc { Ok(Int32) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(ascii, vec![])(args) } diff --git a/datafusion/functions/src/string/bit_length.rs b/datafusion/functions/src/string/bit_length.rs index cb815df15e4b..d46a7625ad8e 100644 --- a/datafusion/functions/src/string/bit_length.rs +++ b/datafusion/functions/src/string/bit_length.rs @@ -62,7 +62,11 @@ impl ScalarUDFImpl for BitLengthFunc { utf8_to_int_type(&arg_types[0], "bit_length") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 1 { return exec_err!( "bit_length function requires 1 argument, got {}", diff --git a/datafusion/functions/src/string/btrim.rs b/datafusion/functions/src/string/btrim.rs index e215b18d9c3c..e5afc827dc41 100644 --- a/datafusion/functions/src/string/btrim.rs +++ b/datafusion/functions/src/string/btrim.rs @@ -80,7 +80,11 @@ impl ScalarUDFImpl for BTrimFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8 | DataType::Utf8View => make_scalar_function( btrim::, diff --git a/datafusion/functions/src/string/chr.rs b/datafusion/functions/src/string/chr.rs index 0d94cab08d91..3a51b7881081 100644 --- a/datafusion/functions/src/string/chr.rs +++ b/datafusion/functions/src/string/chr.rs @@ -96,7 +96,11 @@ impl ScalarUDFImpl for ChrFunc { Ok(Utf8) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(chr, vec![])(args) } diff --git a/datafusion/functions/src/string/concat.rs b/datafusion/functions/src/string/concat.rs index d49a2777b4ff..0b77dd5b5157 100644 --- a/datafusion/functions/src/string/concat.rs +++ b/datafusion/functions/src/string/concat.rs @@ -85,7 +85,11 @@ impl ScalarUDFImpl for ConcatFunc { /// Concatenates the text representations of all the arguments. NULL arguments are ignored. /// concat('abcde', 2, NULL, 22) = 'abcde222' - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let mut return_datatype = DataType::Utf8; args.iter().for_each(|col| { if col.data_type() == DataType::Utf8View { @@ -472,6 +476,7 @@ mod tests { ]))); let args = &[c0, c1, c2, c3, c4]; + #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch let result = ConcatFunc::new().invoke_batch(args, 3)?; let expected = Arc::new(StringViewArray::from(vec!["foo,x,a", "bar,,", "baz,z,b"])) diff --git a/datafusion/functions/src/string/concat_ws.rs b/datafusion/functions/src/string/concat_ws.rs index 98a75f121c35..3b0a7adf7901 100644 --- a/datafusion/functions/src/string/concat_ws.rs +++ b/datafusion/functions/src/string/concat_ws.rs @@ -75,7 +75,11 @@ impl ScalarUDFImpl for ConcatWsFunc { /// Concatenates all but the first argument, with separators. The first argument is used as the separator string, and should not be NULL. Other NULL arguments are ignored. /// concat_ws(',', 'abcde', 2, NULL, 22) = 'abcde,2,22' - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { // do not accept 0 arguments. if args.len() < 2 { return exec_err!( @@ -467,6 +471,7 @@ mod tests { ]))); let args = &[c0, c1, c2]; + #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch let result = ConcatWsFunc::new().invoke_batch(args, 3)?; let expected = Arc::new(StringArray::from(vec!["foo,x", "bar", "baz,z"])) as ArrayRef; @@ -492,6 +497,7 @@ mod tests { ]))); let args = &[c0, c1, c2]; + #[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch let result = ConcatWsFunc::new().invoke_batch(args, 3)?; let expected = Arc::new(StringArray::from(vec![Some("foo,x"), None, Some("baz+z")])) diff --git a/datafusion/functions/src/string/contains.rs b/datafusion/functions/src/string/contains.rs index 3acd2464524d..9728457de8fa 100644 --- a/datafusion/functions/src/string/contains.rs +++ b/datafusion/functions/src/string/contains.rs @@ -66,7 +66,11 @@ impl ScalarUDFImpl for ContainsFunc { Ok(Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(contains, vec![])(args) } @@ -145,6 +149,7 @@ mod test { Some("yyy?()"), ]))); let scalar = ColumnarValue::Scalar(ScalarValue::Utf8(Some("x?(".to_string()))); + #[allow(deprecated)] // TODO migrate UDF to invoke let actual = udf.invoke_batch(&[array, scalar], 2).unwrap(); let expect = ColumnarValue::Array(Arc::new(BooleanArray::from(vec![ Some(true), diff --git a/datafusion/functions/src/string/ends_with.rs b/datafusion/functions/src/string/ends_with.rs index 88978a35c0b7..9a134183a034 100644 --- a/datafusion/functions/src/string/ends_with.rs +++ b/datafusion/functions/src/string/ends_with.rs @@ -63,7 +63,11 @@ impl ScalarUDFImpl for EndsWithFunc { Ok(DataType::Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8View | DataType::Utf8 | DataType::LargeUtf8 => { make_scalar_function(ends_with, vec![])(args) diff --git a/datafusion/functions/src/string/initcap.rs b/datafusion/functions/src/string/initcap.rs index 5fd1e7929881..d7e8db31b745 100644 --- a/datafusion/functions/src/string/initcap.rs +++ b/datafusion/functions/src/string/initcap.rs @@ -64,7 +64,11 @@ impl ScalarUDFImpl for InitcapFunc { utf8_to_str_type(&arg_types[0], "initcap") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8 => make_scalar_function(initcap::, vec![])(args), DataType::LargeUtf8 => make_scalar_function(initcap::, vec![])(args), diff --git a/datafusion/functions/src/string/levenshtein.rs b/datafusion/functions/src/string/levenshtein.rs index 558e71239f84..70a8340f9e1f 100644 --- a/datafusion/functions/src/string/levenshtein.rs +++ b/datafusion/functions/src/string/levenshtein.rs @@ -65,7 +65,11 @@ impl ScalarUDFImpl for LevenshteinFunc { utf8_to_int_type(&arg_types[0], "levenshtein") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8View | DataType::Utf8 => { make_scalar_function(levenshtein::, vec![])(args) diff --git a/datafusion/functions/src/string/lower.rs b/datafusion/functions/src/string/lower.rs index 78887fde0a8e..e59f5db46dfb 100644 --- a/datafusion/functions/src/string/lower.rs +++ b/datafusion/functions/src/string/lower.rs @@ -62,7 +62,11 @@ impl ScalarUDFImpl for LowerFunc { utf8_to_str_type(&arg_types[0], "lower") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { to_lower(args, "lower") } @@ -99,14 +103,15 @@ fn get_lower_doc() -> &'static Documentation { #[cfg(test)] mod tests { use super::*; - use arrow::array::{ArrayRef, StringArray}; + use arrow::array::{Array, ArrayRef, StringArray}; use std::sync::Arc; fn to_lower(input: ArrayRef, expected: ArrayRef) -> Result<()> { let func = LowerFunc::new(); - let batch_size = input.len(); + let batch_len = input.len(); let args = vec![ColumnarValue::Array(input)]; - let result = match func.invoke_batch(&args, batch_size)? { + #[allow(deprecated)] // TODO migrate UDF to invoke + let result = match func.invoke_batch(&args, batch_len)? { ColumnarValue::Array(result) => result, _ => unreachable!("lower"), }; diff --git a/datafusion/functions/src/string/ltrim.rs b/datafusion/functions/src/string/ltrim.rs index 0b4c197646b6..0de5ca317ef8 100644 --- a/datafusion/functions/src/string/ltrim.rs +++ b/datafusion/functions/src/string/ltrim.rs @@ -78,7 +78,11 @@ impl ScalarUDFImpl for LtrimFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8 | DataType::Utf8View => make_scalar_function( ltrim::, diff --git a/datafusion/functions/src/string/octet_length.rs b/datafusion/functions/src/string/octet_length.rs index 89f71d457199..42db548bce9c 100644 --- a/datafusion/functions/src/string/octet_length.rs +++ b/datafusion/functions/src/string/octet_length.rs @@ -62,7 +62,11 @@ impl ScalarUDFImpl for OctetLengthFunc { utf8_to_int_type(&arg_types[0], "octet_length") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { if args.len() != 1 { return exec_err!( "octet_length function requires 1 argument, got {}", diff --git a/datafusion/functions/src/string/overlay.rs b/datafusion/functions/src/string/overlay.rs index 796776304f4a..fa4ff3953080 100644 --- a/datafusion/functions/src/string/overlay.rs +++ b/datafusion/functions/src/string/overlay.rs @@ -77,7 +77,11 @@ impl ScalarUDFImpl for OverlayFunc { utf8_to_str_type(&arg_types[0], "overlay") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8View | DataType::Utf8 => { make_scalar_function(overlay::, vec![])(args) diff --git a/datafusion/functions/src/string/repeat.rs b/datafusion/functions/src/string/repeat.rs index 249ce15d6dbe..d4662ec293c0 100644 --- a/datafusion/functions/src/string/repeat.rs +++ b/datafusion/functions/src/string/repeat.rs @@ -72,7 +72,11 @@ impl ScalarUDFImpl for RepeatFunc { utf8_to_str_type(&arg_types[0], "repeat") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(repeat, vec![])(args) } diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs index 91abc39da058..51d06bb53769 100644 --- a/datafusion/functions/src/string/replace.rs +++ b/datafusion/functions/src/string/replace.rs @@ -64,7 +64,11 @@ impl ScalarUDFImpl for ReplaceFunc { utf8_to_str_type(&arg_types[0], "replace") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8 => make_scalar_function(replace::, vec![])(args), DataType::LargeUtf8 => make_scalar_function(replace::, vec![])(args), diff --git a/datafusion/functions/src/string/rtrim.rs b/datafusion/functions/src/string/rtrim.rs index e934147efbbe..d36e906934a4 100644 --- a/datafusion/functions/src/string/rtrim.rs +++ b/datafusion/functions/src/string/rtrim.rs @@ -78,7 +78,11 @@ impl ScalarUDFImpl for RtrimFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8 | DataType::Utf8View => make_scalar_function( rtrim::, diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs index ea01cb1f56f9..934d6fc2ad6d 100644 --- a/datafusion/functions/src/string/split_part.rs +++ b/datafusion/functions/src/string/split_part.rs @@ -81,7 +81,11 @@ impl ScalarUDFImpl for SplitPartFunc { utf8_to_str_type(&arg_types[0], "split_part") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { // First, determine if any of the arguments is an Array let len = args.iter().find_map(|arg| match arg { ColumnarValue::Array(a) => Some(a.len()), diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs index dce161a2e14b..cbb50e014671 100644 --- a/datafusion/functions/src/string/starts_with.rs +++ b/datafusion/functions/src/string/starts_with.rs @@ -70,7 +70,11 @@ impl ScalarUDFImpl for StartsWithFunc { Ok(DataType::Boolean) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8View | DataType::Utf8 | DataType::LargeUtf8 => { make_scalar_function(starts_with, vec![])(args) diff --git a/datafusion/functions/src/string/to_hex.rs b/datafusion/functions/src/string/to_hex.rs index e0033d2d1cb0..635e3b18e576 100644 --- a/datafusion/functions/src/string/to_hex.rs +++ b/datafusion/functions/src/string/to_hex.rs @@ -103,7 +103,11 @@ impl ScalarUDFImpl for ToHexFunc { }) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Int32 => make_scalar_function(to_hex::, vec![])(args), DataType::Int64 => make_scalar_function(to_hex::, vec![])(args), diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs index 5039d094f2d6..b7b44806def5 100644 --- a/datafusion/functions/src/string/upper.rs +++ b/datafusion/functions/src/string/upper.rs @@ -61,7 +61,11 @@ impl ScalarUDFImpl for UpperFunc { utf8_to_str_type(&arg_types[0], "upper") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { to_upper(args, "upper") } @@ -99,14 +103,15 @@ fn get_upper_doc() -> &'static Documentation { #[cfg(test)] mod tests { use super::*; - use arrow::array::{ArrayRef, StringArray}; + use arrow::array::{Array, ArrayRef, StringArray}; use std::sync::Arc; fn to_upper(input: ArrayRef, expected: ArrayRef) -> Result<()> { let func = UpperFunc::new(); - let batch_size = input.len(); + let batch_len = input.len(); let args = vec![ColumnarValue::Array(input)]; - let result = match func.invoke_batch(&args, batch_size)? { + #[allow(deprecated)] // TODO migrate UDF to invoke + let result = match func.invoke_batch(&args, batch_len)? { ColumnarValue::Array(result) => result, _ => unreachable!("upper"), }; diff --git a/datafusion/functions/src/unicode/character_length.rs b/datafusion/functions/src/unicode/character_length.rs index eca8d3fd493d..39bea9acdd75 100644 --- a/datafusion/functions/src/unicode/character_length.rs +++ b/datafusion/functions/src/unicode/character_length.rs @@ -72,7 +72,11 @@ impl ScalarUDFImpl for CharacterLengthFunc { utf8_to_int_type(&arg_types[0], "character_length") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(character_length, vec![])(args) } diff --git a/datafusion/functions/src/unicode/find_in_set.rs b/datafusion/functions/src/unicode/find_in_set.rs index cad860e41088..16794a2ac01c 100644 --- a/datafusion/functions/src/unicode/find_in_set.rs +++ b/datafusion/functions/src/unicode/find_in_set.rs @@ -76,7 +76,11 @@ impl ScalarUDFImpl for FindInSetFunc { utf8_to_int_type(&arg_types[0], "find_in_set") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(find_in_set, vec![])(args) } diff --git a/datafusion/functions/src/unicode/left.rs b/datafusion/functions/src/unicode/left.rs index a6c2b9768f0b..ca178024bd9f 100644 --- a/datafusion/functions/src/unicode/left.rs +++ b/datafusion/functions/src/unicode/left.rs @@ -81,7 +81,11 @@ impl ScalarUDFImpl for LeftFunc { utf8_to_str_type(&arg_types[0], "left") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8 | DataType::Utf8View => { make_scalar_function(left::, vec![])(args) diff --git a/datafusion/functions/src/unicode/lpad.rs b/datafusion/functions/src/unicode/lpad.rs index a639bcedcd1f..a2c65bf53ea0 100644 --- a/datafusion/functions/src/unicode/lpad.rs +++ b/datafusion/functions/src/unicode/lpad.rs @@ -90,7 +90,11 @@ impl ScalarUDFImpl for LPadFunc { utf8_to_str_type(&arg_types[0], "lpad") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { Utf8 | Utf8View => make_scalar_function(lpad::, vec![])(args), LargeUtf8 => make_scalar_function(lpad::, vec![])(args), diff --git a/datafusion/functions/src/unicode/reverse.rs b/datafusion/functions/src/unicode/reverse.rs index baf3b56636e2..fd3ba1c8b724 100644 --- a/datafusion/functions/src/unicode/reverse.rs +++ b/datafusion/functions/src/unicode/reverse.rs @@ -72,7 +72,11 @@ impl ScalarUDFImpl for ReverseFunc { utf8_to_str_type(&arg_types[0], "reverse") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { Utf8 | Utf8View => make_scalar_function(reverse::, vec![])(args), LargeUtf8 => make_scalar_function(reverse::, vec![])(args), diff --git a/datafusion/functions/src/unicode/right.rs b/datafusion/functions/src/unicode/right.rs index ab3b7ba1a27e..523508016860 100644 --- a/datafusion/functions/src/unicode/right.rs +++ b/datafusion/functions/src/unicode/right.rs @@ -81,7 +81,11 @@ impl ScalarUDFImpl for RightFunc { utf8_to_str_type(&arg_types[0], "right") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match args[0].data_type() { DataType::Utf8 | DataType::Utf8View => { make_scalar_function(right::, vec![])(args) diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs index bd9d625105e9..a88f4725359b 100644 --- a/datafusion/functions/src/unicode/rpad.rs +++ b/datafusion/functions/src/unicode/rpad.rs @@ -89,7 +89,11 @@ impl ScalarUDFImpl for RPadFunc { utf8_to_str_type(&arg_types[0], "rpad") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { match ( args.len(), args[0].data_type(), diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs index 9c84590f7f94..de2dbfc78a52 100644 --- a/datafusion/functions/src/unicode/strpos.rs +++ b/datafusion/functions/src/unicode/strpos.rs @@ -66,7 +66,11 @@ impl ScalarUDFImpl for StrposFunc { utf8_to_int_type(&arg_types[0], "strpos/instr/position") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(strpos, vec![])(args) } diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index edfe57210b71..cfe49fc86ab6 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -75,7 +75,11 @@ impl ScalarUDFImpl for SubstrFunc { } } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(substr, vec![])(args) } diff --git a/datafusion/functions/src/unicode/substrindex.rs b/datafusion/functions/src/unicode/substrindex.rs index c04839783f58..01b9ad0f0e8a 100644 --- a/datafusion/functions/src/unicode/substrindex.rs +++ b/datafusion/functions/src/unicode/substrindex.rs @@ -78,7 +78,11 @@ impl ScalarUDFImpl for SubstrIndexFunc { utf8_to_str_type(&arg_types[0], "substr_index") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(substr_index, vec![])(args) } diff --git a/datafusion/functions/src/unicode/translate.rs b/datafusion/functions/src/unicode/translate.rs index 845d34c708d4..ae2a7e820faf 100644 --- a/datafusion/functions/src/unicode/translate.rs +++ b/datafusion/functions/src/unicode/translate.rs @@ -76,7 +76,11 @@ impl ScalarUDFImpl for TranslateFunc { utf8_to_str_type(&arg_types[0], "translate") } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { make_scalar_function(invoke_translate, vec![])(args) } diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index b56c2dc604a9..7dddc7cd2cb9 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -1250,7 +1250,11 @@ mod test { Ok(Utf8) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { Ok(ColumnarValue::Scalar(ScalarValue::from("a"))) } } diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 16a4fa6be38d..0ea2d24effbb 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -1450,7 +1450,11 @@ mod test { Ok(DataType::Float64) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { unimplemented!() } } diff --git a/datafusion/optimizer/src/eliminate_group_by_constant.rs b/datafusion/optimizer/src/eliminate_group_by_constant.rs index 13d03d647fe2..035a1d2da229 100644 --- a/datafusion/optimizer/src/eliminate_group_by_constant.rs +++ b/datafusion/optimizer/src/eliminate_group_by_constant.rs @@ -155,7 +155,11 @@ mod tests { fn return_type(&self, _args: &[DataType]) -> Result { Ok(DataType::Int32) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { unimplemented!() } } diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index 23cd46803c78..29ce1e55a877 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -3301,7 +3301,11 @@ Projection: a, b Ok(DataType::Int32) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { Ok(ColumnarValue::Scalar(ScalarValue::from(1))) } } diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index 74d0ecdadd32..138774d806f2 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -140,6 +140,11 @@ impl PhysicalExpr for ScalarFunctionExpr { .map(|e| e.evaluate(batch)) .collect::>>()?; + let input_empty = inputs.is_empty(); + let input_all_scalar = inputs + .iter() + .all(|arg| matches!(arg, ColumnarValue::Scalar(_))); + // evaluate the function let output = self.fun.invoke_with_args(ScalarFunctionArgs { args: inputs.as_slice(), @@ -151,11 +156,8 @@ impl PhysicalExpr for ScalarFunctionExpr { if array.len() != batch.num_rows() { // If the arguments are a non-empty slice of scalar values, we can assume that // returning a one-element array is equivalent to returning a scalar. - let preserve_scalar = array.len() == 1 - && !inputs.is_empty() - && inputs - .iter() - .all(|arg| matches!(arg, ColumnarValue::Scalar(_))); + let preserve_scalar = + array.len() == 1 && !input_empty && input_all_scalar; return if preserve_scalar { ScalarValue::try_from_array(array, 0).map(ColumnarValue::Scalar) } else { diff --git a/datafusion/physical-expr/src/utils/mod.rs b/datafusion/physical-expr/src/utils/mod.rs index 1abb11137a52..e4b8b133a315 100644 --- a/datafusion/physical-expr/src/utils/mod.rs +++ b/datafusion/physical-expr/src/utils/mod.rs @@ -311,7 +311,11 @@ pub(crate) mod tests { Ok(input[0].sort_properties) } - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { let args = ColumnarValue::values_to_arrays(args)?; let arr: ArrayRef = match args[0].data_type() { diff --git a/datafusion/proto/tests/cases/mod.rs b/datafusion/proto/tests/cases/mod.rs index 4d69ca075483..f36b7178313a 100644 --- a/datafusion/proto/tests/cases/mod.rs +++ b/datafusion/proto/tests/cases/mod.rs @@ -69,9 +69,10 @@ impl ScalarUDFImpl for MyRegexUdf { plan_err!("regex_udf only accepts Utf8 arguments") } } - fn invoke( + fn invoke_batch( &self, _args: &[ColumnarValue], + _number_rows: usize, ) -> datafusion_common::Result { unimplemented!() } diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index ae2607de00a2..f9c6397f594a 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -1621,7 +1621,11 @@ mod tests { Ok(DataType::Int32) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { unimplemented!("DummyUDF::invoke") } } diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index ab7e6c8d0bb7..8f2325fa2d78 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -2684,7 +2684,11 @@ impl ScalarUDFImpl for DummyUDF { Ok(self.return_type.clone()) } - fn invoke(&self, _args: &[ColumnarValue]) -> Result { + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { unimplemented!("DummyUDF::invoke") } } diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index fe3990b90c3c..2044cfaa70fc 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -30,7 +30,8 @@ This page covers how to add UDFs to DataFusion. In particular, it covers how to | Aggregate | A function that takes a group of rows and returns a single value. | [simple_udaf.rs][3] | | Table | A function that takes parameters and returns a `TableProvider` to be used in an query plan. | [simple_udtf.rs][4] | -First we'll talk about adding an Scalar UDF end-to-end, then we'll talk about the differences between the different types of UDFs. +First we'll talk about adding an Scalar UDF end-to-end, then we'll talk about the differences between the different +types of UDFs. ## Adding a Scalar UDF @@ -40,12 +41,14 @@ an Arrow Array with the same number of rows as output. To create a Scalar UDF, you -1. Implement the `ScalarUDFImpl` trait to tell DataFusion about your function such as what types of arguments it takes and how to calculate the results. -2. Create a `ScalarUDF` and register it with `SessionContext::register_udf` so it can be invoked by name. +1. Implement the `ScalarUDFImpl` trait to tell DataFusion about your function such as what types of arguments it takes + and how to calculate the results. +2. Create a `ScalarUDF` and register it with `SessionContext::register_udf` so it can be invoked by name. In the following example, we will add a function takes a single i64 and returns a single i64 with 1 added to it: -For brevity, we'll skipped some error handling, but e.g. you may want to check that `args.len()` is the expected number of arguments. +For brevity, we'll skipped some error handling, but e.g. you may want to check that `args.len()` is the expected number +of arguments. ### Adding by `impl ScalarUDFImpl` @@ -77,20 +80,20 @@ impl ScalarUDFImpl for AddOne { fn name(&self) -> &str { "add_one" } fn signature(&self) -> &Signature { &self.signature } fn return_type(&self, args: &[DataType]) -> Result { - if !matches!(args.get(0), Some(&DataType::Int32)) { - return plan_err!("add_one only accepts Int32 arguments"); - } - Ok(DataType::Int32) + if !matches!(args.get(0), Some(&DataType::Int32)) { + return plan_err!("add_one only accepts Int32 arguments"); + } + Ok(DataType::Int32) } // The actual implementation would add one to the argument - fn invoke(&self, args: &[ColumnarValue]) -> Result { + fn invoke_batch(&self, args: &[ColumnarValue], _number_rows: usize) -> Result { let args = columnar_values_to_array(args)?; let i64s = as_int64_array(&args[0])?; let new_array = i64s - .iter() - .map(|array_elem| array_elem.map(|value| value + 1)) - .collect::(); + .iter() + .map(|array_elem| array_elem.map(|value| value + 1)) + .collect::(); Ok(Arc::new(new_array)) } } @@ -130,31 +133,34 @@ pub fn add_one(args: &[ColumnarValue]) -> Result { let i64s = as_int64_array(&args[0])?; let new_array = i64s - .iter() - .map(|array_elem| array_elem.map(|value| value + 1)) - .collect::(); + .iter() + .map(|array_elem| array_elem.map(|value| value + 1)) + .collect::(); Ok(Arc::new(new_array)) } ``` -This "works" in isolation, i.e. if you have a slice of `ArrayRef`s, you can call `add_one` and it will return a new `ArrayRef` with 1 added to each value. +This "works" in isolation, i.e. if you have a slice of `ArrayRef`s, you can call `add_one` and it will return a new +`ArrayRef` with 1 added to each value. ```rust let input = vec![Some(1), None, Some(3)]; let input = Arc::new(Int64Array::from(input)) as ArrayRef; -let result = add_one(&[input]).unwrap(); +let result = add_one( & [input]).unwrap(); let result = result.as_any().downcast_ref::().unwrap(); assert_eq!(result, &Int64Array::from(vec![Some(2), None, Some(4)])); ``` -The challenge however is that DataFusion doesn't know about this function. We need to register it with DataFusion so that it can be used in the context of a query. +The challenge however is that DataFusion doesn't know about this function. We need to register it with DataFusion so +that it can be used in the context of a query. #### Registering a Scalar UDF -To register a Scalar UDF, you need to wrap the function implementation in a [`ScalarUDF`] struct and then register it with the `SessionContext`. +To register a Scalar UDF, you need to wrap the function implementation in a [`ScalarUDF`] struct and then register it +with the `SessionContext`. DataFusion provides the [`create_udf`] and helper functions to make this easier. ```rust @@ -163,11 +169,11 @@ use datafusion::arrow::datatypes::DataType; use std::sync::Arc; let udf = create_udf( - "add_one", - vec![DataType::Int64], - Arc::new(DataType::Int64), - Volatility::Immutable, - Arc::new(add_one), +"add_one", +vec![DataType::Int64], +Arc::new(DataType::Int64), +Volatility::Immutable, +Arc::new(add_one), ); ``` @@ -179,9 +185,13 @@ let udf = create_udf( A few things to note: - The first argument is the name of the function. This is the name that will be used in SQL queries. -- The second argument is a vector of `DataType`s. This is the list of argument types that the function accepts. I.e. in this case, the function accepts a single `Int64` argument. +- The second argument is a vector of `DataType`s. This is the list of argument types that the function accepts. I.e. in + this case, the function accepts a single `Int64` argument. - The third argument is the return type of the function. I.e. in this case, the function returns an `Int64`. -- The fourth argument is the volatility of the function. In short, this is used to determine if the function's performance can be optimized in some situations. In this case, the function is `Immutable` because it always returns the same value for the same input. A random number generator would be `Volatile` because it returns a different value for the same input. +- The fourth argument is the volatility of the function. In short, this is used to determine if the function's + performance can be optimized in some situations. In this case, the function is `Immutable` because it always returns + the same value for the same input. A random number generator would be `Volatile` because it returns a different value + for the same input. - The fifth argument is the function implementation. This is the function that we defined above. That gives us a `ScalarUDF` that we can register with the `SessionContext`: @@ -199,12 +209,13 @@ At this point, you can use the `add_one` function in your query: ```rust let sql = "SELECT add_one(1)"; -let df = ctx.sql(&sql).await.unwrap(); +let df = ctx.sql( & sql).await.unwrap(); ``` ## Adding a Window UDF -Scalar UDFs are functions that take a row of data and return a single value. Window UDFs are similar, but they also have access to the rows around them. Access to the proximal rows is helpful, but adds some complexity to the implementation. +Scalar UDFs are functions that take a row of data and return a single value. Window UDFs are similar, but they also have +access to the rows around them. Access to the proximal rows is helpful, but adds some complexity to the implementation. For example, we will declare a user defined window function that computes a moving average. @@ -277,7 +288,8 @@ fn make_partition_evaluator() -> Result> { ### Registering a Window UDF -To register a Window UDF, you need to wrap the function implementation in a [`WindowUDF`] struct and then register it with the `SessionContext`. DataFusion provides the [`create_udwf`] helper functions to make this easier. +To register a Window UDF, you need to wrap the function implementation in a [`WindowUDF`] struct and then register it +with the `SessionContext`. DataFusion provides the [`create_udwf`] helper functions to make this easier. There is a lower level API with more functionality but is more complex, that is documented in [`advanced_udwf.rs`]. ```rust @@ -287,11 +299,11 @@ use std::sync::Arc; // here is where we define the UDWF. We also declare its signature: let smooth_it = create_udwf( - "smooth_it", - DataType::Float64, - Arc::new(DataType::Float64), - Volatility::Immutable, - Arc::new(make_partition_evaluator), +"smooth_it", +DataType::Float64, +Arc::new(DataType::Float64), +Volatility::Immutable, +Arc::new(make_partition_evaluator), ); ``` @@ -302,9 +314,13 @@ let smooth_it = create_udwf( The `create_udwf` has five arguments to check: - The first argument is the name of the function. This is the name that will be used in SQL queries. -- **The second argument** is the `DataType` of input array (attention: this is not a list of arrays). I.e. in this case, the function accepts `Float64` as argument. +- **The second argument** is the `DataType` of input array (attention: this is not a list of arrays). I.e. in this case, + the function accepts `Float64` as argument. - The third argument is the return type of the function. I.e. in this case, the function returns an `Float64`. -- The fourth argument is the volatility of the function. In short, this is used to determine if the function's performance can be optimized in some situations. In this case, the function is `Immutable` because it always returns the same value for the same input. A random number generator would be `Volatile` because it returns a different value for the same input. +- The fourth argument is the volatility of the function. In short, this is used to determine if the function's + performance can be optimized in some situations. In this case, the function is `Immutable` because it always returns + the same value for the same input. A random number generator would be `Volatile` because it returns a different value + for the same input. - **The fifth argument** is the function implementation. This is the function that we defined above. That gives us a `WindowUDF` that we can register with the `SessionContext`: @@ -319,7 +335,8 @@ ctx.register_udwf(smooth_it); At this point, you can use the `smooth_it` function in your query: -For example, if we have a [`cars.csv`](https://github.com/apache/datafusion/blob/main/datafusion/core/tests/data/cars.csv) whose contents like +For example, if we have a [ +`cars.csv`](https://github.com/apache/datafusion/blob/main/datafusion/core/tests/data/cars.csv) whose contents like ``` car,speed,time @@ -336,11 +353,11 @@ Then, we can query like below: use datafusion::datasource::file_format::options::CsvReadOptions; // register csv table first let csv_path = "cars.csv".to_string(); -ctx.register_csv("cars", &csv_path, CsvReadOptions::default().has_header(true)).await?; +ctx.register_csv("cars", & csv_path, CsvReadOptions::default ().has_header(true)).await?; // do query with smooth_it let df = ctx - .sql( - "SELECT \ +.sql( +"SELECT \ car, \ speed, \ smooth_it(speed) OVER (PARTITION BY car ORDER BY time) as smooth_speed,\ @@ -348,8 +365,8 @@ let df = ctx from cars \ ORDER BY \ car", - ) - .await?; +) +.await?; // print the results df.show().await?; ``` @@ -379,7 +396,8 @@ the output will be like: ## Adding an Aggregate UDF -Aggregate UDFs are functions that take a group of rows and return a single value. These are akin to SQL's `SUM` or `COUNT` functions. +Aggregate UDFs are functions that take a group of rows and return a single value. These are akin to SQL's `SUM` or +`COUNT` functions. For example, we will declare a single-type, single return type UDAF that computes the geometric mean. @@ -474,7 +492,8 @@ impl Accumulator for GeometricMean { ### registering an Aggregate UDF -To register a Aggregate UDF, you need to wrap the function implementation in a [`AggregateUDF`] struct and then register it with the `SessionContext`. DataFusion provides the [`create_udaf`] helper functions to make this easier. +To register a Aggregate UDF, you need to wrap the function implementation in a [`AggregateUDF`] struct and then register +it with the `SessionContext`. DataFusion provides the [`create_udaf`] helper functions to make this easier. There is a lower level API with more functionality but is more complex, that is documented in [`advanced_udaf.rs`]. ```rust @@ -484,17 +503,17 @@ use std::sync::Arc; // here is where we define the UDAF. We also declare its signature: let geometric_mean = create_udaf( - // the name; used to represent it in plan descriptions and in the registry, to use in SQL. - "geo_mean", - // the input type; DataFusion guarantees that the first entry of `values` in `update` has this type. - vec![DataType::Float64], - // the return type; DataFusion expects this to match the type returned by `evaluate`. - Arc::new(DataType::Float64), - Volatility::Immutable, - // This is the accumulator factory; DataFusion uses it to create new accumulators. - Arc::new(|_| Ok(Box::new(GeometricMean::new()))), - // This is the description of the state. `state()` must match the types here. - Arc::new(vec![DataType::Float64, DataType::UInt32]), +// the name; used to represent it in plan descriptions and in the registry, to use in SQL. +"geo_mean", +// the input type; DataFusion guarantees that the first entry of `values` in `update` has this type. +vec![DataType::Float64], +// the return type; DataFusion expects this to match the type returned by `evaluate`. +Arc::new(DataType::Float64), +Volatility::Immutable, +// This is the accumulator factory; DataFusion uses it to create new accumulators. +Arc::new( | _ | Ok(Box::new(GeometricMean::new()))), +// This is the description of the state. `state()` must match the types here. +Arc::new(vec![DataType::Float64, DataType::UInt32]), ); ``` @@ -505,9 +524,13 @@ let geometric_mean = create_udaf( The `create_udaf` has six arguments to check: - The first argument is the name of the function. This is the name that will be used in SQL queries. -- The second argument is a vector of `DataType`s. This is the list of argument types that the function accepts. I.e. in this case, the function accepts a single `Float64` argument. +- The second argument is a vector of `DataType`s. This is the list of argument types that the function accepts. I.e. in + this case, the function accepts a single `Float64` argument. - The third argument is the return type of the function. I.e. in this case, the function returns an `Int64`. -- The fourth argument is the volatility of the function. In short, this is used to determine if the function's performance can be optimized in some situations. In this case, the function is `Immutable` because it always returns the same value for the same input. A random number generator would be `Volatile` because it returns a different value for the same input. +- The fourth argument is the volatility of the function. In short, this is used to determine if the function's + performance can be optimized in some situations. In this case, the function is `Immutable` because it always returns + the same value for the same input. A random number generator would be `Volatile` because it returns a different value + for the same input. - The fifth argument is the function implementation. This is the function that we defined above. - The sixth argument is the description of the state, which will by passed between execution stages. @@ -531,9 +554,14 @@ let df = ctx.sql("SELECT geo_mean(a) FROM t").await?; A User-Defined Table Function (UDTF) is a function that takes parameters and returns a `TableProvider`. -Because we're returning a `TableProvider`, in this example we'll use the `MemTable` data source to represent a table. This is a simple struct that holds a set of RecordBatches in memory and treats them as a table. In your case, this would be replaced with your own struct that implements `TableProvider`. +Because we're returning a `TableProvider`, in this example we'll use the `MemTable` data source to represent a table. +This is a simple struct that holds a set of RecordBatches in memory and treats them as a table. In your case, this would +be replaced with your own struct that implements `TableProvider`. -While this is a simple example for illustrative purposes, UDTFs have a lot of potential use cases. And can be particularly useful for reading data from external sources and interactive analysis. For example, see the [example][4] for a working example that reads from a CSV file. As another example, you could use the built-in UDTF `parquet_metadata` in the CLI to read the metadata from a Parquet file. +While this is a simple example for illustrative purposes, UDTFs have a lot of potential use cases. And can be +particularly useful for reading data from external sources and interactive analysis. For example, see the [example][4] +for a working example that reads from a CSV file. As another example, you could use the built-in UDTF `parquet_metadata` +in the CLI to read the metadata from a Parquet file. ```console > select filename, row_group_id, row_group_num_rows, row_group_bytes, stats_min, stats_max from parquet_metadata('./benchmarks/data/hits.parquet') where column_id = 17 limit 10; @@ -555,9 +583,12 @@ While this is a simple example for illustrative purposes, UDTFs have a lot of po ### Writing the UDTF -The simple UDTF used here takes a single `Int64` argument and returns a table with a single column with the value of the argument. To create a function in DataFusion, you need to implement the `TableFunctionImpl` trait. This trait has a single method, `call`, that takes a slice of `Expr`s and returns a `Result>`. +The simple UDTF used here takes a single `Int64` argument and returns a table with a single column with the value of the +argument. To create a function in DataFusion, you need to implement the `TableFunctionImpl` trait. This trait has a +single method, `call`, that takes a slice of `Expr`s and returns a `Result>`. -In the `call` method, you parse the input `Expr`s and return a `TableProvider`. You might also want to do some validation of the input `Expr`s, e.g. checking that the number of arguments is correct. +In the `call` method, you parse the input `Expr`s and return a `TableProvider`. You might also want to do some +validation of the input `Expr`s, e.g. checking that the number of arguments is correct. ```rust use datafusion::common::plan_err; @@ -600,7 +631,7 @@ use datafusion::execution::context::SessionContext; let ctx = SessionContext::new(); -ctx.register_udtf("echo", Arc::new(EchoFunction::default())); +ctx.register_udtf("echo", Arc::new(EchoFunction::default ())); ``` And if all goes well, you can use it in your query: @@ -611,7 +642,7 @@ use datafusion::arrow::util::pretty; let df = ctx.sql("SELECT * FROM echo(1)").await?; let results = df.collect().await?; -pretty::print_batches(&results)?; +pretty::print_batches( & results) ?; // +---+ // | a | // +---+