From 36b124458beefd0d2d9cfebb61294674b44128b5 Mon Sep 17 00:00:00 2001 From: Gijs Burghoorn Date: Fri, 6 Dec 2024 08:14:16 +0100 Subject: [PATCH] fix: Invalid `bitwise_xor` for ScalarColumn (#20140) --- crates/polars-core/src/frame/column/mod.rs | 19 ++++++++----------- crates/polars-core/src/frame/column/scalar.rs | 16 ++++++++++------ .../tests/unit/operations/test_bitwise.py | 13 +++++++++++++ 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/crates/polars-core/src/frame/column/mod.rs b/crates/polars-core/src/frame/column/mod.rs index 39f363f6463d..6a94750e425c 100644 --- a/crates/polars-core/src/frame/column/mod.rs +++ b/crates/polars-core/src/frame/column/mod.rs @@ -1196,19 +1196,13 @@ impl Column { } pub fn bitand(&self, rhs: &Self) -> PolarsResult { - // @partition-opt - // @scalar-opt - (self.as_materialized_series() & rhs.as_materialized_series()).map(Column::from) + self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l & r) } pub fn bitor(&self, rhs: &Self) -> PolarsResult { - // @partition-opt - // @scalar-opt - (self.as_materialized_series() | rhs.as_materialized_series()).map(Column::from) + self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l | r) } pub fn bitxor(&self, rhs: &Self) -> PolarsResult { - // @partition-opt - // @scalar-opt - (self.as_materialized_series() ^ rhs.as_materialized_series()).map(Column::from) + self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l ^ r) } pub fn try_add_owned(self, other: Self) -> PolarsResult { @@ -1341,7 +1335,11 @@ impl Column { Column::Scalar(s) => { // We don't really want to deal with handling the full semantics here so we just // cast to a single value series. This is a tiny bit wasteful, but probably fine. - s.as_single_value_series().xor_reduce() + // + // We have to deal with the fact that xor is 0 if there is an even number of + // elements and the value if there is an odd number of elements. If there are zero + // elements the result should be `null`. + s.as_n_values_series(2 - s.len() % 2).xor_reduce() }, } } @@ -1349,7 +1347,6 @@ impl Column { match self { Column::Series(s) => s.n_unique(), Column::Partitioned(s) => s.partitions().n_unique(), - // @scalar-opt Column::Scalar(s) => s.as_single_value_series().n_unique(), } } diff --git a/crates/polars-core/src/frame/column/scalar.rs b/crates/polars-core/src/frame/column/scalar.rs index aabadf5ab933..cc33697a42ff 100644 --- a/crates/polars-core/src/frame/column/scalar.rs +++ b/crates/polars-core/src/frame/column/scalar.rs @@ -108,13 +108,17 @@ impl ScalarColumn { /// /// If the [`ScalarColumn`] has `length=0` the resulting `Series` will also have `length=0`. pub fn as_single_value_series(&self) -> Series { + self.as_n_values_series(1) + } + + /// Take the [`ScalarColumn`] as a series with a `n` values. + /// + /// If the [`ScalarColumn`] has `length=0` the resulting `Series` will also have `length=0`. + pub fn as_n_values_series(&self, n: usize) -> Series { + let length = usize::min(n, self.length); match self.materialized.get() { - Some(s) => s.head(Some(1)), - None => Self::_to_series( - self.name.clone(), - self.scalar.clone(), - usize::min(1, self.length), - ), + Some(s) => s.head(Some(length)), + None => Self::_to_series(self.name.clone(), self.scalar.clone(), length), } } diff --git a/py-polars/tests/unit/operations/test_bitwise.py b/py-polars/tests/unit/operations/test_bitwise.py index f701e64b1c70..ae56c44d8b12 100644 --- a/py-polars/tests/unit/operations/test_bitwise.py +++ b/py-polars/tests/unit/operations/test_bitwise.py @@ -205,3 +205,16 @@ def test_bit_group_by(dtype: pl.DataType) -> None: ), check_row_order=False, ) + + +def test_scalar_bitwise_xor() -> None: + df = pl.select( + pl.repeat(pl.lit(0x80, pl.UInt8), i).bitwise_xor().alias(f"l{i}") + for i in range(5) + ).transpose() + + assert_series_equal( + df.to_series(), + pl.Series("x", [None, 0x80, 0x00, 0x80, 0x00], pl.UInt8), + check_names=False, + )