Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(rust, python): allow arithmetic operations between numeric Series and list Series #18901

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 30 additions & 6 deletions crates/polars-core/src/series/arithmetic/borrowed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -392,23 +392,35 @@ pub(crate) fn coerce_lhs_rhs<'a>(
if let Some(result) = coerce_time_units(lhs, rhs) {
return Ok(result);
}
let dtype = match (lhs.dtype(), rhs.dtype()) {
let (left_dtype, right_dtype) = (lhs.dtype(), rhs.dtype());
let leaf_super_dtype = match (left_dtype, right_dtype) {
#[cfg(feature = "dtype-struct")]
(DataType::Struct(_), DataType::Struct(_)) => {
return Ok((Cow::Borrowed(lhs), Cow::Borrowed(rhs)))
},
_ => try_get_supertype(lhs.dtype(), rhs.dtype())?,
_ => try_get_supertype(left_dtype.leaf_dtype(), right_dtype.leaf_dtype())?,
};

let left = if lhs.dtype() == &dtype {
let mut new_left_dtype = left_dtype.cast_leaf(leaf_super_dtype.clone());
let mut new_right_dtype = right_dtype.cast_leaf(leaf_super_dtype);

// If we have e.g. Array and List, we want to convert those too.
if (left_dtype.is_list() && right_dtype.is_array())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should do that. I want those casts to be explicit for now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By explicit do you mean "explicitly cast list to array" and vice versa as needed? This section is definitely necessary in some form, if I remove it:

[gw4] linux -- Python 3.12.3 /home/itamarst/devel/polars/.venv/bin/python3
tests/unit/datatypes/test_array.py:106: in test_array_list_supertype
    result = s1 == s2
polars/series/series.py:773: in __eq__
    return self._comp(other, "eq")
polars/series/series.py:753: in _comp
    return self._from_pyseries(getattr(self._s, op)(other._s))
E   pyo3_runtime.PanicException: called `Result::unwrap()` on an `Err` value: SchemaMismatch(ErrString("invalid series dtype: expected `FixedSizeList`, got `list[f64]`"))
-------------------------------- Captured stderr call --------------------------------thread '<unnamed>' panicked at crates/polars-core/src/series/comparison.rs:132:9:
called `Result::unwrap()` on an `Err` value: SchemaMismatch(ErrString("invalid series dtype: expected `FixedSizeList`, got `list[f64]`"))
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
_______________________________ test_eq_array_cmp_list _______________________________[gw10] linux -- Python 3.12.3 /home/itamarst/devel/polars/.venv/bin/python3
tests/unit/series/test_equals.py:57: in test_eq_array_cmp_list
    result = s == [1, 2]
polars/series/series.py:773: in __eq__
    return self._comp(other, "eq")
polars/series/series.py:753: in _comp
    return self._from_pyseries(getattr(self._s, op)(other._s))
E   pyo3_runtime.PanicException: called `Result::unwrap()` on an `Err` value: SchemaMismatch(ErrString("invalid series dtype: expected `FixedSizeList`, got `list[i64]`"))

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the user should cast explicitly. And we shuold return an error instead of panicking if they pass wrong types.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can do that, but given this will involving changing the behavior of those two tests (from e.g. "these two things are equal" to "this now errors asking for a cast"), this is a backwards incompatible change. So as long as that's OK.

|| (left_dtype.is_array() && right_dtype.is_list())
{
new_left_dtype = try_get_supertype(&new_left_dtype, &new_right_dtype)?;
new_right_dtype = new_left_dtype.clone();
}

let left = if lhs.dtype() == &new_left_dtype {
Cow::Borrowed(lhs)
} else {
Cow::Owned(lhs.cast(&dtype)?)
Cow::Owned(lhs.cast(&new_left_dtype)?)
};
let right = if rhs.dtype() == &dtype {
let right = if rhs.dtype() == &new_right_dtype {
Cow::Borrowed(rhs)
} else {
Cow::Owned(rhs.cast(&dtype)?)
Cow::Owned(rhs.cast(&new_right_dtype)?)
};
Ok((left, right))
}
Expand Down Expand Up @@ -522,6 +534,12 @@ impl Add for &Series {
(DataType::Struct(_), DataType::Struct(_)) => {
_struct_arithmetic(self, rhs, |a, b| a.add(b))
},
(left_dtype, DataType::List(_)) if left_dtype.is_numeric() => {
// Lists have implementation logic for rhs numeric:
let mut result = (rhs + self)?;
result.rename(self.name().clone());
Ok(result)
},
_ => {
let (lhs, rhs) = coerce_lhs_rhs(self, rhs)?;
lhs.add_to(rhs.as_ref())
Expand Down Expand Up @@ -574,6 +592,12 @@ impl Mul for &Series {
let out = rhs.multiply(self)?;
Ok(out.with_name(self.name().clone()))
},
(left_dtype, DataType::List(_)) if left_dtype.is_numeric() => {
// Lists have implementation logic for rhs numeric:
let mut result = (rhs * self)?;
result.rename(self.name().clone());
Ok(result)
},
_ => {
let (lhs, rhs) = coerce_lhs_rhs(self, rhs)?;
lhs.multiply(rhs.as_ref())
Expand Down
116 changes: 102 additions & 14 deletions crates/polars-core/src/series/arithmetic/list_borrowed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,23 +53,112 @@ fn lists_same_shapes(left: &ArrayRef, right: &ArrayRef) -> bool {
}
}

/// Arithmetic operations that can be applied to a Series
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there some existing code that does this? There's a whole bunch of similar things but I may've missed an actual implementation.

#[derive(Clone, Copy)]
enum Op {
Add,
Subtract,
Multiply,
Divide,
Remainder,
}

impl Op {
/// Apply the operation to a pair of Series.
fn apply_with_series(&self, lhs: &Series, rhs: &Series) -> PolarsResult<Series> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to do these two functions as a single generic function and failed...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why exactly? Seems this is possible with generics.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I couldn't figure out the constraints that work. I guess I can try another round and report back when I get stuck.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the issue is that when you add two &Series, you get a PolarsResult<Series>, and when you add a &Series and u8 you get Series. Since the return types don't match, the constraints conflict (the former wants T: std:ops::Add<Output=PolarsResult<Series>>, the latter wants T: std:ops::Add<Output=Series>).

use Op::*;

match self {
Add => lhs + rhs,
Subtract => lhs - rhs,
Multiply => lhs * rhs,
Divide => lhs / rhs,
Remainder => lhs % rhs,
}
}

/// Apply the operation to a Series and scalar.
fn apply_with_scalar<T: Num + NumCast>(&self, lhs: &Series, rhs: T) -> Series {
use Op::*;

match self {
Add => lhs + rhs,
Subtract => lhs - rhs,
Multiply => lhs * rhs,
Divide => lhs / rhs,
Remainder => lhs % rhs,
}
}
}

impl ListChunked {
/// Helper function for NumOpsDispatchInner implementation for ListChunked.
///
/// Run the given `op` on `self` and `rhs`, for cases where `rhs` has a
/// primitive numeric dtype.
fn arithm_helper_numeric(&self, rhs: &Series, op: Op) -> PolarsResult<Series> {
let mut result = AnonymousListBuilder::new(
self.name().clone(),
self.len(),
Some(self.inner_dtype().clone()),
);
macro_rules! combine {
($ca:expr) => {{
self.amortized_iter()
.zip($ca.iter())
.map(|(a, b)| {
let (Some(a_owner), Some(b)) = (a, b) else {
// Operations with nulls always result in nulls:
return Ok(None);
};
let a = a_owner.as_ref().rechunk();
let leaf_result = op.apply_with_scalar(&a.get_leaf_array(), b);
let result =
reshape_list_based_on(&leaf_result.chunks()[0], &a.chunks()[0]);
Ok(Some(result))
})
.collect::<PolarsResult<Vec<Option<Box<dyn Array>>>>>()?
}};
}
let combined = downcast_as_macro_arg_physical!(rhs, combine);
for arr in combined.iter() {
if let Some(arr) = arr {
result.append_array(arr.as_ref());
} else {
result.append_null();
}
}
Ok(result.finish().into())
}

/// Helper function for NumOpsDispatchInner implementation for ListChunked.
///
/// Run the given `op` on `self` and `rhs`.
fn arithm_helper(
&self,
rhs: &Series,
op: &dyn Fn(&Series, &Series) -> PolarsResult<Series>,
has_nulls: Option<bool>,
) -> PolarsResult<Series> {
fn arithm_helper(&self, rhs: &Series, op: Op, has_nulls: Option<bool>) -> PolarsResult<Series> {
polars_ensure!(
self.dtype().leaf_dtype().is_numeric() && rhs.dtype().leaf_dtype().is_numeric(),
InvalidOperation: "List Series can only do arithmetic operations if they and other Series are numeric, left and right dtypes are {:?} and {:?}",
self.dtype(),
rhs.dtype()
);
polars_ensure!(
self.len() == rhs.len(),
InvalidOperation: "can only do arithmetic operations on Series of the same size; got {} and {}",
self.len(),
rhs.len()
);

if rhs.dtype().is_numeric() {
return self.arithm_helper_numeric(rhs, op);
}

polars_ensure!(
self.dtype() == rhs.dtype(),
InvalidOperation: "List Series doing arithmetic operations to each other should have same dtype; got {:?} and {:?}",
self.dtype(),
rhs.dtype()
);

let mut has_nulls = has_nulls.unwrap_or(false);
if !has_nulls {
for chunk in self.chunks().iter() {
Expand Down Expand Up @@ -118,7 +207,7 @@ impl ListChunked {
// along.
a_listchunked.arithm_helper(b, op, Some(true))
} else {
op(a, b)
op.apply_with_series(a, b)
};
chunk_result.map(Some)
}).collect::<PolarsResult<Vec<Option<Series>>>>()?;
Expand All @@ -139,8 +228,7 @@ impl ListChunked {
InvalidOperation: "can only do arithmetic operations on lists of the same size"
);

let result = op(&l_leaf_array, &r_leaf_array)?;

let result = op.apply_with_series(&l_leaf_array, &r_leaf_array)?;
// We now need to wrap the Arrow arrays with the metadata that turns
// them into lists:
// TODO is there a way to do this without cloning the underlying data?
Expand All @@ -160,18 +248,18 @@ impl ListChunked {

impl NumOpsDispatchInner for ListType {
fn add_to(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
lhs.arithm_helper(rhs, &|l, r| l.add_to(r), None)
lhs.arithm_helper(rhs, Op::Add, None)
}
fn subtract(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
lhs.arithm_helper(rhs, &|l, r| l.subtract(r), None)
lhs.arithm_helper(rhs, Op::Subtract, None)
}
fn multiply(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
lhs.arithm_helper(rhs, &|l, r| l.multiply(r), None)
lhs.arithm_helper(rhs, Op::Multiply, None)
}
fn divide(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
lhs.arithm_helper(rhs, &|l, r| l.divide(r), None)
lhs.arithm_helper(rhs, Op::Divide, None)
}
fn remainder(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
lhs.arithm_helper(rhs, &|l, r| l.remainder(r), None)
lhs.arithm_helper(rhs, Op::Remainder, None)
}
}
28 changes: 28 additions & 0 deletions crates/polars-plan/src/plans/aexpr/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,15 @@ fn func_args_to_fields(
.collect()
}

/// Left is List(_), right is numeric primitive, we want to cast left's leaf
/// dtype.
fn try_get_list_super_type(left: &DataType, right: &DataType) -> PolarsResult<DataType> {
debug_assert!(left.is_list());
debug_assert!(right.is_numeric());
let super_type = try_get_supertype(left.leaf_dtype(), right)?;
Ok(left.cast_leaf(super_type))
}

fn get_arithmetic_field(
left: Node,
right: Node,
Expand Down Expand Up @@ -355,6 +364,9 @@ fn get_arithmetic_field(
(_, Time) | (Time, _) => {
polars_bail!(InvalidOperation: "{} not allowed on {} and {}", op, left_field.dtype, right_type)
},
(List(_), _) if right_type.is_numeric() => {
try_get_list_super_type(&left_field.dtype, &right_type)?
},
(left, right) => try_get_supertype(left, right)?,
}
},
Expand All @@ -380,6 +392,12 @@ fn get_arithmetic_field(
polars_bail!(InvalidOperation: "{} not allowed on {} and {}", op, left_field.dtype, right_type)
},
(Boolean, Boolean) => IDX_DTYPE,
(List(_), _) if right_type.is_numeric() => {
try_get_list_super_type(&left_field.dtype, &right_type)?
},
(_, List(_)) if left_field.dtype.is_numeric() => {
try_get_supertype(&left_field.dtype, right_type.leaf_dtype())?
},
(left, right) => try_get_supertype(left, right)?,
}
},
Expand Down Expand Up @@ -412,6 +430,16 @@ fn get_arithmetic_field(
polars_bail!(InvalidOperation: "{} not allowed on {} and {}", op, left_field.dtype, right_type)
},
},
(List(_), _) if right_type.is_numeric() => {
let dtype = try_get_list_super_type(&left_field.dtype, &right_type)?;
left_field.coerce(dtype);
return Ok(left_field);
},
(_, List(_)) if left_field.dtype.is_numeric() => {
let dtype = try_get_supertype(&left_field.dtype, right_type.leaf_dtype())?;
left_field.coerce(dtype);
return Ok(left_field);
},
_ => {
// Avoid needlessly type casting numeric columns during arithmetic
// with literals.
Expand Down
14 changes: 12 additions & 2 deletions crates/polars-plan/src/plans/conversion/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,14 @@ fn process_list_arithmetic(
(DataType::List(_), _) => {
let leaf = type_left.leaf_dtype();
if type_right != *leaf {
let new_dtype = if type_right.is_nested() {
type_left.cast_leaf(leaf.clone())
} else {
leaf.clone()
};
let new_node_right = expr_arena.add(AExpr::Cast {
expr: node_right,
dtype: type_left.cast_leaf(leaf.clone()),
dtype: new_dtype,
options: CastOptions::NonStrict,
});

Expand All @@ -77,9 +82,14 @@ fn process_list_arithmetic(
(_, DataType::List(_)) => {
let leaf = type_right.leaf_dtype();
if type_left != *leaf {
let new_dtype = if type_left.is_nested() {
type_right.cast_leaf(leaf.clone())
} else {
leaf.clone()
};
let new_node_left = expr_arena.add(AExpr::Cast {
expr: node_left,
dtype: type_right.cast_leaf(leaf.clone()),
dtype: new_dtype,
options: CastOptions::NonStrict,
});

Expand Down
Loading