Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ji/add invoke fix tests #2

Draft
wants to merge 34 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
ae73371
Added support for `ScalarUDFImpl::invoke_with_return_type` where the …
joseph-isaacs Nov 19, 2024
6b3db8c
Move from invoke to invoke batch
joseph-isaacs Nov 19, 2024
1e99688
ex
joseph-isaacs Nov 19, 2024
8c98325
of
joseph-isaacs Nov 19, 2024
9110ca9
docs
joseph-isaacs Nov 19, 2024
9877079
fx
joseph-isaacs Nov 19, 2024
a2695ff
fx
joseph-isaacs Nov 20, 2024
912f691
fx
joseph-isaacs Nov 20, 2024
423b260
fix
joseph-isaacs Nov 20, 2024
b04840a
fix
joseph-isaacs Nov 20, 2024
88e5608
Do not yet deprecate invoke_batch, add docs to invoke_with_args
alamb Nov 21, 2024
9ec7e4a
add ticket reference
alamb Nov 21, 2024
99a65b5
Merge pull request #3 from alamb/alamb/docs_and_dep
joseph-isaacs Nov 21, 2024
47ee14f
Merge branch 'ji/add-invoke-with-return' into ji/add-invoke-fix-tests
joseph-isaacs Nov 21, 2024
a1b266e
fix
joseph-isaacs Nov 21, 2024
9f40559
fix
joseph-isaacs Nov 22, 2024
e54519f
fix
joseph-isaacs Nov 22, 2024
1fbcf2e
Merge branch 'main' into ji/add-invoke-fix-tests
joseph-isaacs Nov 22, 2024
dc3fee6
fix
joseph-isaacs Nov 22, 2024
41ada4c
fmt
joseph-isaacs Nov 22, 2024
9a6059a
fmt
joseph-isaacs Nov 22, 2024
52b33c9
remove invoke
joseph-isaacs Nov 22, 2024
c159c26
fix agg
joseph-isaacs Nov 22, 2024
11d0ee9
unused
joseph-isaacs Nov 22, 2024
33923c7
update func docs
joseph-isaacs Nov 22, 2024
51140b8
update tests and remove deprecation
joseph-isaacs Nov 22, 2024
f997f1c
remove dep
joseph-isaacs Nov 22, 2024
5b04445
oops
joseph-isaacs Nov 22, 2024
40b1860
internal as vec
joseph-isaacs Nov 22, 2024
609cee2
dep
joseph-isaacs Nov 22, 2024
b9835b9
fixup
joseph-isaacs Nov 25, 2024
55101eb
fixup
joseph-isaacs Nov 25, 2024
153b186
fix
joseph-isaacs Nov 25, 2024
73629d6
fix
joseph-isaacs Nov 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion datafusion-examples/examples/advanced_udf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,11 @@ impl ScalarUDFImpl for PowUdf {
///
/// However, it also means the implementation is more complex than when
/// using `create_udf`.
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
// DataFusion has arranged for the correct inputs to be passed to this
// function, but we check again to make sure
assert_eq!(args.len(), 2);
Expand Down
8 changes: 5 additions & 3 deletions datafusion-examples/examples/function_factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ use datafusion_common::tree_node::{Transformed, TreeNode};
use datafusion_common::{exec_err, internal_err, DataFusionError};
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
use datafusion_expr::{CreateFunction, Expr, ScalarUDF, ScalarUDFImpl, Signature};
use datafusion_expr::{
CreateFunction, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
};

/// This example shows how to utilize [FunctionFactory] to implement simple
/// SQL-macro like functions using a `CREATE FUNCTION` statement. The same
Expand Down Expand Up @@ -132,9 +134,9 @@ impl ScalarUDFImpl for ScalarFunctionWrapper {
Ok(self.return_type.clone())
}

fn invoke(
fn invoke_with_args(
&self,
_args: &[datafusion_expr::ColumnarValue],
_args: ScalarFunctionArgs,
) -> Result<datafusion_expr::ColumnarValue> {
// Since this function is always simplified to another expression, it
// should never actually be invoked
Expand Down
6 changes: 5 additions & 1 deletion datafusion-examples/examples/optimizer_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,11 @@ impl ScalarUDFImpl for MyEq {
Ok(DataType::Boolean)
}

fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
_args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
// this example simply returns "true" which is not what a real
// implementation would do.
Ok(ColumnarValue::Scalar(ScalarValue::from(true)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1382,7 +1382,11 @@ mod tests {
Ok(DataType::Int32)
}

fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
_args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
unimplemented!("DummyUDF::invoke")
}
}
Expand Down
6 changes: 5 additions & 1 deletion datafusion/core/tests/fuzz_cases/equivalence/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,11 @@ impl ScalarUDFImpl for TestScalarUDF {
Ok(input[0].sort_properties)
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(args)?;

let arr: ArrayRef = match args[0].data_type() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -520,10 +520,6 @@ impl ScalarUDFImpl for AddIndexToStringVolatileScalarUDF {
Ok(self.return_type.clone())
}

fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
not_impl_err!("index_with_offset function does not accept arguments")
}

fn invoke_batch(
&self,
args: &[ColumnarValue],
Expand Down Expand Up @@ -720,7 +716,11 @@ impl ScalarUDFImpl for CastToI64UDF {
Ok(ExprSimplifyResult::Simplified(new_expr))
}

fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
_args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
unimplemented!("Function should have been simplified prior to evaluation")
}
}
Expand Down Expand Up @@ -848,7 +848,11 @@ impl ScalarUDFImpl for TakeUDF {
}

// The actual implementation
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
let take_idx = match &args[2] {
ColumnarValue::Scalar(ScalarValue::Int64(Some(v))) if v < &2 => *v as usize,
_ => unreachable!(),
Expand Down Expand Up @@ -956,7 +960,11 @@ impl ScalarUDFImpl for ScalarFunctionWrapper {
Ok(self.return_type.clone())
}

fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
_args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
internal_err!("This function should not get invoked!")
}

Expand Down Expand Up @@ -1240,7 +1248,11 @@ impl ScalarUDFImpl for MyRegexUdf {
}
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
match args {
[ColumnarValue::Scalar(ScalarValue::Utf8(value))] => {
Ok(ColumnarValue::Scalar(ScalarValue::Boolean(
Expand Down
7 changes: 5 additions & 2 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2389,7 +2389,7 @@ mod test {
use crate::expr_fn::col;
use crate::{
case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue,
ScalarUDF, ScalarUDFImpl, Volatility,
ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility,
};
use sqlparser::ast;
use sqlparser::ast::{Ident, IdentWithAlias};
Expand Down Expand Up @@ -2518,7 +2518,10 @@ mod test {
Ok(DataType::Utf8)
}

fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_with_args(
&self,
_args: ScalarFunctionArgs,
) -> Result<ColumnarValue> {
Ok(ColumnarValue::Scalar(ScalarValue::from("a")))
}
}
Expand Down
6 changes: 5 additions & 1 deletion datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,11 @@ impl ScalarUDFImpl for SimpleScalarUDF {
Ok(self.return_type.clone())
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
(self.fun)(args)
}
}
Expand Down
3 changes: 0 additions & 3 deletions datafusion/expr/src/udf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,13 +213,11 @@ impl ScalarUDF {
self.inner.is_nullable(args, schema)
}

#[deprecated(since = "43.0.0", note = "Use `invoke_with_args` instead")]
pub fn invoke_batch(
&self,
args: &[ColumnarValue],
number_rows: usize,
) -> Result<ColumnarValue> {
#[allow(deprecated)]
self.inner.invoke_batch(args, number_rows)
}

Expand Down Expand Up @@ -545,7 +543,6 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
/// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments
/// to arrays, which will likely be simpler code, but be slower.
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
#[allow(deprecated)]
self.invoke_batch(args.args, args.number_rows)
}

Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions-nested/benches/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ fn criterion_benchmark(c: &mut Criterion) {

b.iter(|| {
black_box(
#[allow(deprecated)] // TODO use invoke_batch
// TODO use invoke_with_args
map_udf()
.invoke(&[keys.clone(), values.clone()])
.invoke_batch(&[keys.clone(), values.clone()], 1)
.expect("map should work on valid values"),
);
});
Expand Down
18 changes: 15 additions & 3 deletions datafusion/functions-nested/src/array_has.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,11 @@ impl ScalarUDFImpl for ArrayHas {
Ok(DataType::Boolean)
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
match &args[1] {
ColumnarValue::Array(array_needle) => {
// the needle is already an array, convert the haystack to an array of the same length
Expand Down Expand Up @@ -322,7 +326,11 @@ impl ScalarUDFImpl for ArrayHasAll {
Ok(DataType::Boolean)
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
make_scalar_function(array_has_all_inner)(args)
}

Expand Down Expand Up @@ -403,7 +411,11 @@ impl ScalarUDFImpl for ArrayHasAny {
Ok(DataType::Boolean)
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
make_scalar_function(array_has_any_inner)(args)
}

Expand Down
6 changes: 5 additions & 1 deletion datafusion/functions-nested/src/cardinality.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,11 @@ impl ScalarUDFImpl for Cardinality {
})
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
make_scalar_function(cardinality_inner)(args)
}

Expand Down
18 changes: 15 additions & 3 deletions datafusion/functions-nested/src/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,11 @@ impl ScalarUDFImpl for ArrayAppend {
Ok(arg_types[0].clone())
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
make_scalar_function(array_append_inner)(args)
}

Expand Down Expand Up @@ -182,7 +186,11 @@ impl ScalarUDFImpl for ArrayPrepend {
Ok(arg_types[1].clone())
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
make_scalar_function(array_prepend_inner)(args)
}

Expand Down Expand Up @@ -302,7 +310,11 @@ impl ScalarUDFImpl for ArrayConcat {
Ok(expr_type)
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
make_scalar_function(array_concat_inner)(args)
}

Expand Down
12 changes: 10 additions & 2 deletions datafusion/functions-nested/src/dimension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ impl ScalarUDFImpl for ArrayDims {
})
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
make_scalar_function(array_dims_inner)(args)
}

Expand Down Expand Up @@ -166,7 +170,11 @@ impl ScalarUDFImpl for ArrayNdims {
})
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
make_scalar_function(array_ndims_inner)(args)
}

Expand Down
6 changes: 5 additions & 1 deletion datafusion/functions-nested/src/distance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,11 @@ impl ScalarUDFImpl for ArrayDistance {
Ok(result)
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
make_scalar_function(array_distance_inner)(args)
}

Expand Down
6 changes: 5 additions & 1 deletion datafusion/functions-nested/src/empty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ impl ScalarUDFImpl for ArrayEmpty {
})
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
make_scalar_function(array_empty_inner)(args)
}

Expand Down
6 changes: 5 additions & 1 deletion datafusion/functions-nested/src/except.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ impl ScalarUDFImpl for ArrayExcept {
}
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
make_scalar_function(array_except_inner)(args)
}

Expand Down
Loading