Skip to content

Commit

Permalink
fix(udf): udf call with empty table and batch size (#3604)
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinzwang authored Dec 19, 2024
1 parent 7f2d3b1 commit c30f6a8
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
2 changes: 1 addition & 1 deletion daft/udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def get_args_for_slice(start: int, end: int):

return args, kwargs

if batch_size is None:
if batch_size is None or len(evaluated_expressions[0]) <= batch_size:
args, kwargs = get_args_for_slice(0, len(evaluated_expressions[0]))
try:
results = [func(*args, **kwargs)]
Expand Down
16 changes: 16 additions & 0 deletions tests/expressions/test_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,3 +423,19 @@ def noop(data):

with pytest.raises(OverflowError):
table.eval_expression_list([noop.override_options(batch_size=-1)(col("a"))])


@pytest.mark.parametrize("batch_size", [None, 1, 2])
@pytest.mark.parametrize("use_actor_pool", [False, True])
def test_udf_empty(batch_size, use_actor_pool):
df = daft.from_pydict({"a": []})

@udf(return_dtype=DataType.int64(), batch_size=batch_size)
def identity(data):
return data

if use_actor_pool:
identity = identity.with_concurrency(2)

result = df.select(identity(col("a")))
assert result.to_pydict() == {"a": []}

0 comments on commit c30f6a8

Please sign in to comment.