From cfd71715890a7ce9eef9700c5712045467dfa06e Mon Sep 17 00:00:00 2001 From: coastalwhite Date: Sat, 28 Sep 2024 13:22:11 +0200 Subject: [PATCH 1/9] feat: Bitwise operations / aggregations This implements the `bitwise` namespace for expressions that allows performing bitwise operations on booleans, integers and floating point values. This also allows for bitwise aggregations. List of implemented functions: - [x] `bitwise.count_ones` - [x] `bitwise.count_zeros` - [x] `bitwise.leading_ones` - [x] `bitwise.leading_zeros` - [x] `bitwise.trailing_ones` - [x] `bitwise.trailing_zeros` - [ ] `bitwise.and` - [ ] `bitwise.or` - [ ] `bitwise.xor` [skip ci] --- crates/polars-compute/src/lib.rs | 1 + crates/polars-core/src/frame/column/mod.rs | 24 +++++++++++++++++++ crates/polars-ops/src/series/ops/mod.rs | 2 ++ .../polars-plan/src/dsl/function_expr/mod.rs | 9 +++++++ .../src/dsl/function_expr/schema.rs | 1 + crates/polars-plan/src/dsl/mod.rs | 7 ++++++ crates/polars-python/src/expr/mod.rs | 1 + .../src/lazyframe/visitor/expr_nodes.rs | 3 +++ py-polars/polars/expr/expr.py | 10 ++++++++ 9 files changed, 58 insertions(+) diff --git a/crates/polars-compute/src/lib.rs b/crates/polars-compute/src/lib.rs index df8e1df6585a..a89303ff8f7f 100644 --- a/crates/polars-compute/src/lib.rs +++ b/crates/polars-compute/src/lib.rs @@ -9,6 +9,7 @@ use arrow::types::NativeType; pub mod arithmetic; pub mod arity; +pub mod bitwise; pub mod comparisons; pub mod filter; pub mod float_sum; diff --git a/crates/polars-core/src/frame/column/mod.rs b/crates/polars-core/src/frame/column/mod.rs index 3a6343415a6a..7f1e0eb93cd6 100644 --- a/crates/polars-core/src/frame/column/mod.rs +++ b/crates/polars-core/src/frame/column/mod.rs @@ -1003,6 +1003,30 @@ impl Column { }, } } + + pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column { + match self { + Column::Series(s) => f(s).into(), + Column::Scalar(s) => { + ScalarColumn::from_single_value_series(f(&s.as_single_value_series()), s.len()) + .into() + }, + } + } + + pub fn try_apply_unary_elementwise( + &self, + f: impl Fn(&Series) -> PolarsResult, + ) -> PolarsResult { + match self { + Column::Series(s) => f(s).map(Column::from), + Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series( + f(&s.as_single_value_series())?, + s.len(), + ) + .into()), + } + } } impl Default for Column { diff --git a/crates/polars-ops/src/series/ops/mod.rs b/crates/polars-ops/src/series/ops/mod.rs index ed4a446f3cca..88b509ff6450 100644 --- a/crates/polars-ops/src/series/ops/mod.rs +++ b/crates/polars-ops/src/series/ops/mod.rs @@ -5,6 +5,7 @@ mod approx_algo; #[cfg(feature = "approx_unique")] mod approx_unique; mod arg_min_max; +mod bitwise; #[cfg(feature = "business")] mod business; mod clip; @@ -71,6 +72,7 @@ pub use approx_algo::*; #[cfg(feature = "approx_unique")] pub use approx_unique::*; pub use arg_min_max::ArgAgg; +pub use bitwise::*; #[cfg(feature = "business")] pub use business::*; pub use clip::*; diff --git a/crates/polars-plan/src/dsl/function_expr/mod.rs b/crates/polars-plan/src/dsl/function_expr/mod.rs index 6347f6cee7b4..d64adc740f3a 100644 --- a/crates/polars-plan/src/dsl/function_expr/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/mod.rs @@ -5,6 +5,7 @@ mod arg_where; #[cfg(feature = "dtype-array")] mod array; mod binary; +mod bitwise; mod boolean; mod bounds; #[cfg(feature = "business")] @@ -89,6 +90,7 @@ use schema::FieldsMapper; use serde::{Deserialize, Serialize}; pub(crate) use self::binary::BinaryFunction; +pub use self::bitwise::BitwiseFunction; pub use self::boolean::BooleanFunction; #[cfg(feature = "business")] pub(super) use self::business::BusinessFunction; @@ -127,6 +129,7 @@ pub enum FunctionExpr { StructExpr(StructFunction), #[cfg(feature = "temporal")] TemporalExpr(TemporalFunction), + Bitwise(BitwiseFunction), // Other expressions Boolean(BooleanFunction), @@ -376,6 +379,7 @@ impl Hash for FunctionExpr { StructExpr(f) => f.hash(state), #[cfg(feature = "temporal")] TemporalExpr(f) => f.hash(state), + Bitwise(f) => f.hash(state), // Other expressions Boolean(f) => f.hash(state), @@ -602,6 +606,10 @@ impl Display for FunctionExpr { StructExpr(func) => return write!(f, "{func}"), #[cfg(feature = "temporal")] TemporalExpr(func) => return write!(f, "{func}"), + Bitwise(func) => { + f.write_str("bitwise.")?; + return Display::fmt(func, f); + }, // Other expressions Boolean(func) => return write!(f, "{func}"), @@ -871,6 +879,7 @@ impl From for SpecialEq> { StructExpr(func) => func.into(), #[cfg(feature = "temporal")] TemporalExpr(func) => func.into(), + Bitwise(func) => func.into(), // Other expressions Boolean(func) => func.into(), diff --git a/crates/polars-plan/src/dsl/function_expr/schema.rs b/crates/polars-plan/src/dsl/function_expr/schema.rs index 11b190b41d50..48eeea1ad073 100644 --- a/crates/polars-plan/src/dsl/function_expr/schema.rs +++ b/crates/polars-plan/src/dsl/function_expr/schema.rs @@ -26,6 +26,7 @@ impl FunctionExpr { StructExpr(s) => s.get_field(mapper), #[cfg(feature = "temporal")] TemporalExpr(fun) => fun.get_field(mapper), + Bitwise(fun) => fun.get_field(mapper), // Other expressions Boolean(func) => func.get_field(mapper), diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs index 0d591ce81313..c543f63f0824 100644 --- a/crates/polars-plan/src/dsl/mod.rs +++ b/crates/polars-plan/src/dsl/mod.rs @@ -13,6 +13,7 @@ mod arity; #[cfg(feature = "dtype-array")] mod array; pub mod binary; +mod bitwise; #[cfg(feature = "temporal")] pub mod dt; mod expr; @@ -44,6 +45,7 @@ pub use arity::*; #[cfg(feature = "dtype-array")] pub use array::*; use arrow::legacy::prelude::QuantileInterpolOptions; +pub use bitwise::*; pub use expr::*; pub use function_expr::schema::FieldsMapper; pub use function_expr::*; @@ -1929,6 +1931,11 @@ impl Expr { list::ListNameSpace(self) } + /// Get the [`bitwise::BitwiseNameSpace`] + pub fn bitwise(self) -> bitwise::BitwiseNameSpace { + bitwise::BitwiseNameSpace(self) + } + /// Get the [`name::ExprNameNameSpace`] pub fn name(self) -> name::ExprNameNameSpace { name::ExprNameNameSpace(self) diff --git a/crates/polars-python/src/expr/mod.rs b/crates/polars-python/src/expr/mod.rs index 85d44fefbf98..ab9bcf4eeadd 100644 --- a/crates/polars-python/src/expr/mod.rs +++ b/crates/polars-python/src/expr/mod.rs @@ -2,6 +2,7 @@ mod array; #[cfg(feature = "pymethods")] mod binary; +mod bitwise; #[cfg(feature = "pymethods")] mod categorical; #[cfg(feature = "pymethods")] diff --git a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs index a1e5b26f1e27..5445bbabeda9 100644 --- a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs @@ -758,6 +758,9 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult { FunctionExpr::ListExpr(_) => { return Err(PyNotImplementedError::new_err("list expr")) }, + FunctionExpr::Bitwise(_) => { + return Err(PyNotImplementedError::new_err("bitwise expr")) + }, FunctionExpr::StringExpr(strfun) => match strfun { StringFunction::ConcatHorizontal { delimiter, diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 341f99876b21..ac4870c6d621 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -47,6 +47,7 @@ from polars.exceptions import CustomUFuncWarning, PolarsInefficientMapWarning from polars.expr.array import ExprArrayNameSpace from polars.expr.binary import ExprBinaryNameSpace +from polars.expr.bitwise import ExprBitwiseNameSpace from polars.expr.categorical import ExprCatNameSpace from polars.expr.datetime import ExprDateTimeNameSpace from polars.expr.list import ExprListNameSpace @@ -10621,6 +10622,15 @@ def dt(self) -> ExprDateTimeNameSpace: # Keep the `list` and `str` properties below at the end of the definition of Expr, # as to not confuse mypy with the type annotation `str` and `list` + @property + def bitwise(self) -> ExprBitwiseNameSpace: + """ + Create an object namespace of all bitwise related methods. + + See the individual method pages for full details. + """ + return ExprBitwiseNameSpace(self) + @property def list(self) -> ExprListNameSpace: """ From 1b32b3879c382d9624c4016e0e831b6510a0ee27 Mon Sep 17 00:00:00 2001 From: coastalwhite Date: Mon, 30 Sep 2024 10:38:07 +0200 Subject: [PATCH 2/9] fully implement bitwise operations --- crates/polars-compute/src/bitwise/mod.rs | 285 ++++++++++++++++++ .../src/chunked_array/ops/bitwise_reduce.rs | 80 +++++ .../polars-core/src/chunked_array/ops/mod.rs | 10 + .../frame/group_by/aggregations/boolean.rs | 46 +++ .../src/frame/group_by/aggregations/mod.rs | 69 +++++ crates/polars-core/src/frame/group_by/mod.rs | 23 ++ .../src/series/implementations/boolean.rs | 59 ++++ .../src/series/implementations/floats.rs | 82 ++++- .../src/series/implementations/mod.rs | 58 ++++ crates/polars-core/src/series/series_trait.rs | 30 ++ .../src/expressions/aggregation.rs | 26 ++ crates/polars-expr/src/planner.rs | 1 + crates/polars-ops/src/series/ops/bitwise.rs | 57 ++++ crates/polars-plan/src/dsl/bitwise.rs | 50 +++ crates/polars-plan/src/dsl/expr.rs | 3 + .../src/dsl/function_expr/bitwise.rs | 114 +++++++ .../polars-plan/src/dsl/function_expr/mod.rs | 2 +- crates/polars-plan/src/dsl/mod.rs | 6 - crates/polars-plan/src/plans/aexpr/mod.rs | 4 + crates/polars-plan/src/plans/aexpr/schema.rs | 6 + .../polars-plan/src/plans/aexpr/traverse.rs | 2 + .../src/plans/conversion/expr_to_ir.rs | 4 + .../src/plans/conversion/ir_to_dsl.rs | 4 + crates/polars-plan/src/plans/format.rs | 9 + crates/polars-plan/src/plans/ir/format.rs | 9 + crates/polars-plan/src/plans/iterator.rs | 1 + crates/polars-plan/src/plans/visitor/expr.rs | 1 + crates/polars-python/src/expr/bitwise.rs | 42 +++ .../src/lazyframe/visitor/expr_nodes.rs | 10 + .../src/physical_plan/lower_expr.rs | 3 +- py-polars/polars/expr/expr.py | 64 +++- 31 files changed, 1139 insertions(+), 21 deletions(-) create mode 100644 crates/polars-compute/src/bitwise/mod.rs create mode 100644 crates/polars-core/src/chunked_array/ops/bitwise_reduce.rs create mode 100644 crates/polars-ops/src/series/ops/bitwise.rs create mode 100644 crates/polars-plan/src/dsl/bitwise.rs create mode 100644 crates/polars-plan/src/dsl/function_expr/bitwise.rs create mode 100644 crates/polars-python/src/expr/bitwise.rs diff --git a/crates/polars-compute/src/bitwise/mod.rs b/crates/polars-compute/src/bitwise/mod.rs new file mode 100644 index 000000000000..7717d2606bef --- /dev/null +++ b/crates/polars-compute/src/bitwise/mod.rs @@ -0,0 +1,285 @@ +use std::convert::identity; + +use arrow::array::{BooleanArray, PrimitiveArray}; +use arrow::datatypes::ArrowDataType; +use arrow::legacy::utils::CustomIterTools; +use bytemuck::Zeroable; + +pub trait BitwiseKernel { + type Scalar; + + fn count_ones(&self) -> PrimitiveArray; + fn count_zeros(&self) -> PrimitiveArray; + + fn leading_ones(&self) -> PrimitiveArray; + fn leading_zeros(&self) -> PrimitiveArray; + + fn trailing_ones(&self) -> PrimitiveArray; + fn trailing_zeros(&self) -> PrimitiveArray; + + fn reduce_and(&self) -> Option; + fn reduce_or(&self) -> Option; + fn reduce_xor(&self) -> Option; + + fn bit_and(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar; + fn bit_or(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar; + fn bit_xor(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar; +} + +macro_rules! impl_bitwise_kernel { + ($(($T:ty, $to_bits:expr, $from_bits:expr)),+ $(,)?) => { + $( + impl BitwiseKernel for PrimitiveArray<$T> { + type Scalar = $T; + + #[inline(never)] + fn count_ones(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt8, + self.values() + .iter() + .map(|&v| ($to_bits(v).count_ones() & 0xFF) as u8) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(never)] + fn count_zeros(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt8, + self + .values() + .iter() + .map(|&v| ($to_bits(v).count_zeros() & 0xFF) as u8) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(never)] + fn leading_ones(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt8, + self.values() + .iter() + .map(|&v| ($to_bits(v).leading_ones() & 0xFF) as u8) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(never)] + fn leading_zeros(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt8, + self.values() + .iter() + .map(|&v| ($to_bits(v).leading_zeros() & 0xFF) as u8) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(never)] + fn trailing_ones(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt8, + self.values() + .iter() + .map(|&v| ($to_bits(v).trailing_ones() & 0xFF) as u8) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(never)] + fn trailing_zeros(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt8, + self.values().iter() + .map(|&v| ($to_bits(v).trailing_zeros() & 0xFF) as u8) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(never)] + fn reduce_and(&self) -> Option { + if self.validity().map_or(false, |v| v.unset_bits() > 0) { + return None; + } + + let values = self.values(); + + if values.is_empty() { + return None; + } + + Some($from_bits(values.iter().fold(!$to_bits(<$T>::zeroed()), |a, &b| a & $to_bits(b)))) + } + + #[inline(never)] + fn reduce_or(&self) -> Option { + if self.validity().map_or(false, |v| v.unset_bits() > 0) { + return None; + } + + let values = self.values(); + + if values.is_empty() { + return None; + } + + Some($from_bits(values.iter().fold($to_bits(<$T>::zeroed()), |a, &b| a | $to_bits(b)))) + } + + #[inline(never)] + fn reduce_xor(&self) -> Option { + if self.validity().map_or(false, |v| v.unset_bits() > 0) { + return None; + } + + let values = self.values(); + + if values.is_empty() { + return None; + } + + Some($from_bits(values.iter().fold($to_bits(<$T>::zeroed()), |a, &b| a ^ $to_bits(b)))) + } + + fn bit_and(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar { + $from_bits($to_bits(lhs) & $to_bits(rhs)) + } + fn bit_or(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar { + $from_bits($to_bits(lhs) | $to_bits(rhs)) + } + fn bit_xor(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar { + $from_bits($to_bits(lhs) ^ $to_bits(rhs)) + } + } + )+ + }; +} + +impl_bitwise_kernel! { + (i8, identity, identity), + (i16, identity, identity), + (i32, identity, identity), + (i64, identity, identity), + (u8, identity, identity), + (u16, identity, identity), + (u32, identity, identity), + (u64, identity, identity), + (f32, f32::to_bits, f32::from_bits), + (f64, f64::to_bits, f64::from_bits), +} + +impl BitwiseKernel for BooleanArray { + type Scalar = bool; + + #[inline(never)] + fn count_ones(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt8, + self.values() + .iter() + .map(u8::from) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(never)] + fn count_zeros(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt8, + self.values() + .iter() + .map(|v| u8::from(!v)) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(always)] + fn leading_ones(&self) -> PrimitiveArray { + self.count_ones() + } + + #[inline(always)] + fn leading_zeros(&self) -> PrimitiveArray { + self.count_zeros() + } + + #[inline(always)] + fn trailing_ones(&self) -> PrimitiveArray { + self.count_ones() + } + + #[inline(always)] + fn trailing_zeros(&self) -> PrimitiveArray { + self.count_zeros() + } + + fn reduce_and(&self) -> Option { + if self.validity().map_or(false, |v| v.unset_bits() > 0) { + return None; + } + + let values = self.values(); + + if values.is_empty() { + return None; + } + + Some(values.unset_bits() == 0) + } + + fn reduce_or(&self) -> Option { + if self.validity().map_or(false, |v| v.unset_bits() > 0) { + return None; + } + + let values = self.values(); + + if values.is_empty() { + return None; + } + + Some(values.set_bits() > 0) + } + + fn reduce_xor(&self) -> Option { + if self.validity().map_or(false, |v| v.unset_bits() > 0) { + return None; + } + + let values = self.values(); + + if values.is_empty() { + return None; + } + + Some(values.set_bits() % 2 == 1) + } + + fn bit_and(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar { + lhs & rhs + } + fn bit_or(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar { + lhs | rhs + } + fn bit_xor(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar { + lhs ^ rhs + } +} diff --git a/crates/polars-core/src/chunked_array/ops/bitwise_reduce.rs b/crates/polars-core/src/chunked_array/ops/bitwise_reduce.rs new file mode 100644 index 000000000000..5e033b53ab5d --- /dev/null +++ b/crates/polars-core/src/chunked_array/ops/bitwise_reduce.rs @@ -0,0 +1,80 @@ +use arrow::array::{Array, PrimitiveArray}; +use arrow::types::NativeType; +use polars_compute::bitwise::BitwiseKernel; + +use super::{BooleanType, ChunkBitwiseReduce, ChunkedArray, PolarsNumericType}; + +impl ChunkBitwiseReduce for ChunkedArray +where + T: PolarsNumericType, + T::Native: NativeType, + PrimitiveArray: BitwiseKernel, +{ + type Physical = T::Native; + + fn and_reduce(&self) -> Option { + if self.null_count() > 0 { + return None; + } + + self.downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| BitwiseKernel::reduce_and(arr).unwrap()) + .reduce( as BitwiseKernel>::bit_and) + } + fn or_reduce(&self) -> Option { + if self.null_count() > 0 { + return None; + } + + self.downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| BitwiseKernel::reduce_or(arr).unwrap()) + .reduce( as BitwiseKernel>::bit_or) + } + fn xor_reduce(&self) -> Option { + if self.null_count() > 0 { + return None; + } + + self.downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| BitwiseKernel::reduce_xor(arr).unwrap()) + .reduce( as BitwiseKernel>::bit_xor) + } +} + +impl ChunkBitwiseReduce for ChunkedArray { + type Physical = bool; + + fn and_reduce(&self) -> Option { + if self.null_count() > 0 { + return None; + } + + self.downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| BitwiseKernel::reduce_and(arr).unwrap()) + .reduce(|a, b| a & b) + } + fn or_reduce(&self) -> Option { + if self.null_count() > 0 { + return None; + } + + self.downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| BitwiseKernel::reduce_or(arr).unwrap()) + .reduce(|a, b| a | b) + } + fn xor_reduce(&self) -> Option { + if self.null_count() > 0 { + return None; + } + + self.downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| BitwiseKernel::reduce_xor(arr).unwrap()) + .reduce(|a, b| a ^ b) + } +} diff --git a/crates/polars-core/src/chunked_array/ops/mod.rs b/crates/polars-core/src/chunked_array/ops/mod.rs index 2bc1337e598f..9633dad9ae69 100644 --- a/crates/polars-core/src/chunked_array/ops/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/mod.rs @@ -9,6 +9,7 @@ pub(crate) mod append; mod apply; pub mod arity; mod bit_repr; +mod bitwise_reduce; pub(crate) mod chunkops; pub(crate) mod compare_inner; #[cfg(feature = "dtype-decimal")] @@ -295,6 +296,15 @@ pub trait ChunkVar { } } +/// Bitwise Reduction Operations. +pub trait ChunkBitwiseReduce { + type Physical; + + fn and_reduce(&self) -> Option; + fn or_reduce(&self) -> Option; + fn xor_reduce(&self) -> Option; +} + /// Compare [`Series`] and [`ChunkedArray`]'s and get a `boolean` mask that /// can be used to filter rows. /// diff --git a/crates/polars-core/src/frame/group_by/aggregations/boolean.rs b/crates/polars-core/src/frame/group_by/aggregations/boolean.rs index fd7e537dc0ab..74f65b7ce344 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/boolean.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/boolean.rs @@ -17,7 +17,53 @@ where ca.into_series() } +unsafe fn bitwise_agg( + ca: &BooleanChunked, + groups: &GroupsProxy, + f: fn(&BooleanChunked) -> Option, +) -> Series { + // Prevent a rechunk for every individual group. + let s = if groups.len() > 1 { + ca.rechunk() + } else { + ca.clone() + }; + + match groups { + GroupsProxy::Idx(groups) => _agg_helper_idx_bool::<_>(groups, |(_, idx)| { + debug_assert!(idx.len() <= s.len()); + if idx.is_empty() { + None + } else { + let take = s.take_unchecked(idx); + f(&take) + } + }), + GroupsProxy::Slice { groups, .. } => _agg_helper_slice_bool::<_>(groups, |[first, len]| { + debug_assert!(len <= s.len() as IdxSize); + if len == 0 { + None + } else { + let take = _slice_from_offsets(&s, first, len); + f(&take) + } + }), + } +} + impl BooleanChunked { + pub(crate) unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { + bitwise_agg(self, groups, ChunkBitwiseReduce::and_reduce) + } + + pub(crate) unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { + bitwise_agg(self, groups, ChunkBitwiseReduce::or_reduce) + } + + pub(crate) unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { + bitwise_agg(self, groups, ChunkBitwiseReduce::xor_reduce) + } + pub(crate) unsafe fn agg_min(&self, groups: &GroupsProxy) -> Series { // faster paths match (self.is_sorted_flag(), self.null_count()) { diff --git a/crates/polars-core/src/frame/group_by/aggregations/mod.rs b/crates/polars-core/src/frame/group_by/aggregations/mod.rs index fa7cd62df9a5..824a94fb3ec1 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/mod.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/mod.rs @@ -455,6 +455,75 @@ where } } +/// # Safety +/// +/// No bounds checks on `groups`. +unsafe fn bitwise_agg( + ca: &ChunkedArray, + groups: &GroupsProxy, + f: fn(&ChunkedArray) -> Option, +) -> Series +where + ChunkedArray: + ChunkTakeUnchecked<[IdxSize]> + ChunkBitwiseReduce + IntoSeries, +{ + // Prevent a rechunk for every individual group. + let s = if groups.len() > 1 { + ca.rechunk() + } else { + ca.clone() + }; + + match groups { + GroupsProxy::Idx(groups) => agg_helper_idx_on_all::(groups, |idx| { + debug_assert!(idx.len() <= s.len()); + if idx.is_empty() { + None + } else { + let take = unsafe { s.take_unchecked(idx) }; + f(&take) + } + }), + GroupsProxy::Slice { groups, .. } => _agg_helper_slice::(groups, |[first, len]| { + debug_assert!(len <= s.len() as IdxSize); + if len == 0 { + None + } else { + let take = _slice_from_offsets(&s, first, len); + f(&take) + } + }), + } +} + +impl ChunkedArray +where + T: PolarsNumericType, + ChunkedArray: + ChunkTakeUnchecked<[IdxSize]> + ChunkBitwiseReduce + IntoSeries, +{ + /// # Safety + /// + /// No bounds checks on `groups`. + pub(crate) unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { + unsafe { bitwise_agg(self, groups, ChunkBitwiseReduce::and_reduce) } + } + + /// # Safety + /// + /// No bounds checks on `groups`. + pub(crate) unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { + unsafe { bitwise_agg(self, groups, ChunkBitwiseReduce::or_reduce) } + } + + /// # Safety + /// + /// No bounds checks on `groups`. + pub(crate) unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { + unsafe { bitwise_agg(self, groups, ChunkBitwiseReduce::xor_reduce) } + } +} + impl ChunkedArray where T: PolarsNumericType + Sync, diff --git a/crates/polars-core/src/frame/group_by/mod.rs b/crates/polars-core/src/frame/group_by/mod.rs index e2fbb90d6e74..aad0dbed9c12 100644 --- a/crates/polars-core/src/frame/group_by/mod.rs +++ b/crates/polars-core/src/frame/group_by/mod.rs @@ -873,6 +873,14 @@ pub enum GroupByMethod { Implode, Std(u8), Var(u8), + Bitwise(GroupByBitwiseMethod), +} + +#[derive(Copy, Clone, Debug)] +pub enum GroupByBitwiseMethod { + And, + Or, + Xor, } impl Display for GroupByMethod { @@ -895,11 +903,25 @@ impl Display for GroupByMethod { Implode => "list", Std(_) => "std", Var(_) => "var", + Bitwise(t) => { + f.write_str("bitwise_")?; + return Display::fmt(t, f); + }, }; write!(f, "{s}") } } +impl Display for GroupByBitwiseMethod { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::And => f.write_str("and"), + Self::Or => f.write_str("or"), + Self::Xor => f.write_str("xor"), + } + } +} + // Formatting functions used in eager and lazy code for renaming grouped columns pub fn fmt_group_by_column(name: &str, method: GroupByMethod) -> PlSmallStr { use GroupByMethod::*; @@ -920,6 +942,7 @@ pub fn fmt_group_by_column(name: &str, method: GroupByMethod) -> PlSmallStr { Quantile(quantile, _interpol) => format_pl_smallstr!("{name}_quantile_{quantile:.2}"), Std(_) => format_pl_smallstr!("{name}_agg_std"), Var(_) => format_pl_smallstr!("{name}_agg_var"), + Bitwise(_) => format_pl_smallstr!("{name}_agg_var"), } } diff --git a/crates/polars-core/src/series/implementations/boolean.rs b/crates/polars-core/src/series/implementations/boolean.rs index a4e9d662226d..1d5369b0e275 100644 --- a/crates/polars-core/src/series/implementations/boolean.rs +++ b/crates/polars-core/src/series/implementations/boolean.rs @@ -84,6 +84,16 @@ impl private::PrivateSeries for SeriesWrap { .agg_var(groups, _ddof) } + unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { + self.0.agg_and(groups) + } + unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { + self.0.agg_or(groups) + } + unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { + self.0.agg_xor(groups) + } + #[cfg(feature = "algorithm_group_by")] fn group_tuples(&self, multithreaded: bool, sorted: bool) -> PolarsResult { IntoGroupsProxy::group_tuples(&self.0, multithreaded, sorted) @@ -308,6 +318,55 @@ impl SeriesTrait for SeriesWrap { let v = sc.value().cast(&DataType::Float64); Ok(Scalar::new(DataType::Float64, v)) } + fn and_reduce(&self) -> PolarsResult { + let dt = DataType::Boolean; + if self.0.null_count() > 0 { + return Ok(Scalar::new(dt, AnyValue::Null)); + } + + Ok(Scalar::new( + dt, + self.0 + .downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| polars_compute::bitwise::BitwiseKernel::reduce_and(arr).unwrap()) + .reduce(|a, b| a & b) + .map_or(AnyValue::Null, Into::into), + )) + } + fn or_reduce(&self) -> PolarsResult { + let dt = DataType::Boolean; + if self.0.null_count() > 0 { + return Ok(Scalar::new(dt, AnyValue::Null)); + } + + Ok(Scalar::new( + dt, + self.0 + .downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| polars_compute::bitwise::BitwiseKernel::reduce_or(arr).unwrap()) + .reduce(|a, b| a | b) + .map_or(AnyValue::Null, Into::into), + )) + } + fn xor_reduce(&self) -> PolarsResult { + let dt = DataType::Boolean; + if self.0.null_count() > 0 { + return Ok(Scalar::new(dt, AnyValue::Null)); + } + + Ok(Scalar::new( + dt, + self.0 + .downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| polars_compute::bitwise::BitwiseKernel::reduce_xor(arr).unwrap()) + .reduce(|a, b| a ^ b) + .map_or(AnyValue::Null, Into::into), + )) + } + fn clone_inner(&self) -> Arc { Arc::new(SeriesWrap(Clone::clone(&self.0))) } diff --git a/crates/polars-core/src/series/implementations/floats.rs b/crates/polars-core/src/series/implementations/floats.rs index 6f83811561e3..445b45a5c64e 100644 --- a/crates/polars-core/src/series/implementations/floats.rs +++ b/crates/polars-core/src/series/implementations/floats.rs @@ -5,7 +5,7 @@ use crate::frame::group_by::*; use crate::prelude::*; macro_rules! impl_dyn_series { - ($ca: ident) => { + ($ca: ident, $pdt:ident) => { impl private::PrivateSeries for SeriesWrap<$ca> { fn compute_len(&mut self) { self.0.compute_len() @@ -96,6 +96,16 @@ macro_rules! impl_dyn_series { self.0.agg_list(groups) } + unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { + self.0.agg_and(groups) + } + unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { + self.0.agg_or(groups) + } + unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { + self.0.agg_xor(groups) + } + fn subtract(&self, rhs: &Series) -> PolarsResult { polars_ensure!( self.dtype() == rhs.dtype(), @@ -356,6 +366,72 @@ macro_rules! impl_dyn_series { ) -> PolarsResult { QuantileAggSeries::quantile_reduce(&self.0, quantile, interpol) } + fn and_reduce(&self) -> PolarsResult { + let dt = <$pdt as PolarsDataType>::get_dtype(); + if self.0.null_count() > 0 { + return Ok(Scalar::new(dt, AnyValue::Null)); + } + + Ok(Scalar::new( + dt, + self.0 + .downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| { + polars_compute::bitwise::BitwiseKernel::reduce_and(arr) + .unwrap() + .to_bits() + }) + .reduce(|a, b| a & b) + .map_or(AnyValue::Null, |v| { + <$pdt as PolarsDataType>::Physical::from_bits(v).into() + }), + )) + } + fn or_reduce(&self) -> PolarsResult { + let dt = <$pdt as PolarsDataType>::get_dtype(); + if self.0.null_count() > 0 { + return Ok(Scalar::new(dt, AnyValue::Null)); + } + + Ok(Scalar::new( + dt, + self.0 + .downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| { + polars_compute::bitwise::BitwiseKernel::reduce_or(arr) + .unwrap() + .to_bits() + }) + .reduce(|a, b| a | b) + .map_or(AnyValue::Null, |v| { + <$pdt as PolarsDataType>::Physical::from_bits(v).into() + }), + )) + } + fn xor_reduce(&self) -> PolarsResult { + let dt = <$pdt as PolarsDataType>::get_dtype(); + if self.0.null_count() > 0 { + return Ok(Scalar::new(dt, AnyValue::Null)); + } + + Ok(Scalar::new( + dt, + self.0 + .downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| { + polars_compute::bitwise::BitwiseKernel::reduce_xor(arr) + .unwrap() + .to_bits() + }) + .reduce(|a, b| a ^ b) + .map_or(AnyValue::Null, |v| { + <$pdt as PolarsDataType>::Physical::from_bits(v).into() + }), + )) + } fn clone_inner(&self) -> Arc { Arc::new(SeriesWrap(Clone::clone(&self.0))) @@ -372,5 +448,5 @@ macro_rules! impl_dyn_series { }; } -impl_dyn_series!(Float32Chunked); -impl_dyn_series!(Float64Chunked); +impl_dyn_series!(Float32Chunked, Float32Type); +impl_dyn_series!(Float64Chunked, Float64Type); diff --git a/crates/polars-core/src/series/implementations/mod.rs b/crates/polars-core/src/series/implementations/mod.rs index 6094dff6a838..8e211a0ce70c 100644 --- a/crates/polars-core/src/series/implementations/mod.rs +++ b/crates/polars-core/src/series/implementations/mod.rs @@ -169,6 +169,16 @@ macro_rules! impl_dyn_series { self.0.agg_list(groups) } + unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { + self.0.agg_and(groups) + } + unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { + self.0.agg_or(groups) + } + unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { + self.0.agg_xor(groups) + } + fn subtract(&self, rhs: &Series) -> PolarsResult { polars_ensure!( self.dtype() == rhs.dtype(), @@ -459,6 +469,54 @@ macro_rules! impl_dyn_series { ) -> PolarsResult { QuantileAggSeries::quantile_reduce(&self.0, quantile, interpol) } + fn and_reduce(&self) -> PolarsResult { + let dt = <$pdt as PolarsDataType>::get_dtype(); + if self.0.null_count() > 0 { + return Ok(Scalar::new(dt, AnyValue::Null)); + } + + Ok(Scalar::new( + dt, + self.0 + .downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| polars_compute::bitwise::BitwiseKernel::reduce_and(arr).unwrap()) + .reduce(|a, b| a & b) + .map_or(AnyValue::Null, Into::into), + )) + } + fn or_reduce(&self) -> PolarsResult { + let dt = <$pdt as PolarsDataType>::get_dtype(); + if self.0.null_count() > 0 { + return Ok(Scalar::new(dt, AnyValue::Null)); + } + + Ok(Scalar::new( + dt, + self.0 + .downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| polars_compute::bitwise::BitwiseKernel::reduce_or(arr).unwrap()) + .reduce(|a, b| a | b) + .map_or(AnyValue::Null, Into::into), + )) + } + fn xor_reduce(&self) -> PolarsResult { + let dt = <$pdt as PolarsDataType>::get_dtype(); + if self.0.null_count() > 0 { + return Ok(Scalar::new(dt, AnyValue::Null)); + } + + Ok(Scalar::new( + dt, + self.0 + .downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| polars_compute::bitwise::BitwiseKernel::reduce_xor(arr).unwrap()) + .reduce(|a, b| a ^ b) + .map_or(AnyValue::Null, Into::into), + )) + } fn clone_inner(&self) -> Arc { Arc::new(SeriesWrap(Clone::clone(&self.0))) diff --git a/crates/polars-core/src/series/series_trait.rs b/crates/polars-core/src/series/series_trait.rs index d9e11e5c5e8c..677aa65472d8 100644 --- a/crates/polars-core/src/series/series_trait.rs +++ b/crates/polars-core/src/series/series_trait.rs @@ -147,6 +147,24 @@ pub(crate) mod private { unsafe fn agg_list(&self, groups: &GroupsProxy) -> Series { Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) } + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { + Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) + } + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { + Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) + } + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { + Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) + } fn subtract(&self, _rhs: &Series) -> PolarsResult { polars_bail!(opq = subtract, self._dtype()); @@ -481,6 +499,18 @@ pub trait SeriesTrait: ) -> PolarsResult { polars_bail!(opq = quantile, self._dtype()); } + /// Get the bitwise AND of the Series as a new Series of length 1, + fn and_reduce(&self) -> PolarsResult { + polars_bail!(opq = sum, self._dtype()); + } + /// Get the bitwise OR of the Series as a new Series of length 1, + fn or_reduce(&self) -> PolarsResult { + polars_bail!(opq = sum, self._dtype()); + } + /// Get the bitwise XOR of the Series as a new Series of length 1, + fn xor_reduce(&self) -> PolarsResult { + polars_bail!(opq = sum, self._dtype()); + } /// Clone inner ChunkedArray and wrap in a new Arc fn clone_inner(&self) -> Arc; diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs index e41886a29590..407047e6cd6c 100644 --- a/crates/polars-expr/src/expressions/aggregation.rs +++ b/crates/polars-expr/src/expressions/aggregation.rs @@ -175,6 +175,23 @@ impl PhysicalExpr for AggregationExpr { .var_reduce(ddof) .map(|sc| sc.into_series(s.name().clone())), GroupByMethod::Quantile(_, _) => unimplemented!(), + GroupByMethod::Bitwise(f) => match f { + GroupByBitwiseMethod::And => parallel_op_series( + |s| s.and_reduce().map(|sc| sc.into_series(s.name().clone())), + s, + allow_threading, + ), + GroupByBitwiseMethod::Or => parallel_op_series( + |s| s.or_reduce().map(|sc| sc.into_series(s.name().clone())), + s, + allow_threading, + ), + GroupByBitwiseMethod::Xor => parallel_op_series( + |s| s.xor_reduce().map(|sc| sc.into_series(s.name().clone())), + s, + allow_threading, + ), + }, } } #[allow(clippy::ptr_arg)] @@ -407,6 +424,15 @@ impl PhysicalExpr for AggregationExpr { // implemented explicitly in AggQuantile struct unimplemented!() }, + GroupByMethod::Bitwise(f) => { + let (s, groups) = ac.get_final_aggregation(); + let agg_s = match f { + GroupByBitwiseMethod::And => s.agg_and(&groups), + GroupByBitwiseMethod::Or => s.agg_or(&groups), + GroupByBitwiseMethod::Xor => s.agg_xor(&groups), + }; + AggregatedScalar(rename_series(agg_s, keep_name)) + }, GroupByMethod::NanMin => { #[cfg(feature = "propagate_nans")] { diff --git a/crates/polars-expr/src/planner.rs b/crates/polars-expr/src/planner.rs index c7208b10d63f..2e21d14f13d5 100644 --- a/crates/polars-expr/src/planner.rs +++ b/crates/polars-expr/src/planner.rs @@ -386,6 +386,7 @@ fn create_physical_expr_inner( }, I::Std(_, ddof) => GBM::Std(*ddof), I::Var(_, ddof) => GBM::Var(*ddof), + I::Bitwise(_, f) => GBM::Bitwise((*f).into()), I::AggGroups(_) => { polars_bail!(InvalidOperation: "agg groups expression only supported in aggregation context") }, diff --git a/crates/polars-ops/src/series/ops/bitwise.rs b/crates/polars-ops/src/series/ops/bitwise.rs new file mode 100644 index 000000000000..42d07835c051 --- /dev/null +++ b/crates/polars-ops/src/series/ops/bitwise.rs @@ -0,0 +1,57 @@ +use polars_core::chunked_array::ops::arity::unary_mut_values; +use polars_core::chunked_array::ChunkedArray; +use polars_core::prelude::DataType; +use polars_core::series::Series; +use polars_core::{with_match_physical_float_polars_type, with_match_physical_integer_polars_type}; +use polars_error::{polars_bail, PolarsResult}; + +use super::*; + +macro_rules! apply_bitwise_op { + ($($op:ident),+ $(,)?) => { + $( + pub fn $op(s: &Series) -> PolarsResult { + match s.dtype() { + DataType::Boolean => { + let ca: &ChunkedArray = s.as_any().downcast_ref().unwrap(); + Ok(unary_mut_values::( + ca, + |a| polars_compute::bitwise::BitwiseKernel::$op(a), + ).into_series()) + }, + dt if dt.is_integer() => { + with_match_physical_integer_polars_type!(dt, |$T| { + let ca: &ChunkedArray<$T> = s.as_any().downcast_ref().unwrap(); + Ok(unary_mut_values::<$T, UInt8Type, _, _>( + ca, + |a| polars_compute::bitwise::BitwiseKernel::$op(a), + ).into_series()) + }) + }, + dt if dt.is_float() => { + with_match_physical_float_polars_type!(dt, |$T| { + let ca: &ChunkedArray<$T> = s.as_any().downcast_ref().unwrap(); + Ok(unary_mut_values::<$T, UInt8Type, _, _>( + ca, + |a| polars_compute::bitwise::BitwiseKernel::$op(a), + ).into_series()) + }) + }, + dt => { + polars_bail!(InvalidOperation: "dtype {:?} not supported in '{}' operation", dt, stringify!($op)) + }, + } + } + )+ + + }; +} + +apply_bitwise_op! { + count_ones, + count_zeros, + leading_ones, + leading_zeros, + trailing_ones, + trailing_zeros, +} diff --git a/crates/polars-plan/src/dsl/bitwise.rs b/crates/polars-plan/src/dsl/bitwise.rs new file mode 100644 index 000000000000..b9e66c2c632e --- /dev/null +++ b/crates/polars-plan/src/dsl/bitwise.rs @@ -0,0 +1,50 @@ +use std::sync::Arc; + +use super::{AggExpr, BitwiseAggFunction, BitwiseFunction, Expr, FunctionExpr}; + +impl Expr { + /// Evaluate the number of set bits. + pub fn bitwise_count_ones(self) -> Self { + self.apply_private(FunctionExpr::Bitwise(BitwiseFunction::CountOnes)) + } + + /// Evaluate the number of unset bits. + pub fn bitwise_count_zeros(self) -> Self { + self.apply_private(FunctionExpr::Bitwise(BitwiseFunction::CountZeros)) + } + + /// Evaluate the number most-significant set bits before seeing an unset bit. + pub fn bitwise_leading_ones(self) -> Self { + self.apply_private(FunctionExpr::Bitwise(BitwiseFunction::LeadingOnes)) + } + + /// Evaluate the number most-significant unset bits before seeing an set bit. + pub fn bitwise_leading_zeros(self) -> Self { + self.apply_private(FunctionExpr::Bitwise(BitwiseFunction::LeadingZeros)) + } + + /// Evaluate the number least-significant set bits before seeing an unset bit. + pub fn bitwise_trailing_ones(self) -> Self { + self.apply_private(FunctionExpr::Bitwise(BitwiseFunction::TrailingOnes)) + } + + /// Evaluate the number least-significant unset bits before seeing an set bit. + pub fn bitwise_trailing_zeros(self) -> Self { + self.apply_private(FunctionExpr::Bitwise(BitwiseFunction::TrailingZeros)) + } + + /// Perform an aggregation of bitwise ANDs + pub fn bitwise_and(self) -> Self { + Expr::Agg(AggExpr::Bitwise(Arc::new(self), BitwiseAggFunction::And)) + } + + /// Perform an aggregation of bitwise ORs + pub fn bitwise_or(self) -> Self { + Expr::Agg(AggExpr::Bitwise(Arc::new(self), BitwiseAggFunction::Or)) + } + + /// Perform an aggregation of bitwise XORs + pub fn bitwise_xor(self) -> Self { + Expr::Agg(AggExpr::Bitwise(Arc::new(self), BitwiseAggFunction::Xor)) + } +} diff --git a/crates/polars-plan/src/dsl/expr.rs b/crates/polars-plan/src/dsl/expr.rs index 0bbecd7e1d77..f3e31b07c4a7 100644 --- a/crates/polars-plan/src/dsl/expr.rs +++ b/crates/polars-plan/src/dsl/expr.rs @@ -7,6 +7,7 @@ use polars_core::prelude::*; use serde::{Deserialize, Serialize}; pub use super::expr_dyn_fn::*; +use super::function_expr::BitwiseAggFunction; use crate::prelude::*; #[derive(PartialEq, Clone, Hash)] @@ -37,6 +38,7 @@ pub enum AggExpr { AggGroups(Arc), Std(Arc, u8), Var(Arc, u8), + Bitwise(Arc, BitwiseAggFunction), } impl AsRef for AggExpr { @@ -57,6 +59,7 @@ impl AsRef for AggExpr { AggGroups(e) => e, Std(e, _) => e, Var(e, _) => e, + Bitwise(e, _) => e, } } } diff --git a/crates/polars-plan/src/dsl/function_expr/bitwise.rs b/crates/polars-plan/src/dsl/function_expr/bitwise.rs new file mode 100644 index 000000000000..3e9ea4f662f1 --- /dev/null +++ b/crates/polars-plan/src/dsl/function_expr/bitwise.rs @@ -0,0 +1,114 @@ +use std::fmt; +use std::sync::Arc; + +use polars_core::prelude::*; + +use super::{ColumnsUdf, SpecialEq}; +use crate::dsl::FieldsMapper; +use crate::map; + +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash)] +pub enum BitwiseFunction { + CountOnes, + CountZeros, + + LeadingOnes, + LeadingZeros, + + TrailingOnes, + TrailingZeros, +} + +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash)] +pub enum BitwiseAggFunction { + And, + Or, + Xor, +} + +impl fmt::Display for BitwiseFunction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result { + use BitwiseFunction as B; + + let s = match self { + B::CountOnes => "count_ones", + B::CountZeros => "count_zeros", + B::LeadingOnes => "leading_ones", + B::LeadingZeros => "leading_zeros", + B::TrailingOnes => "trailing_ones", + B::TrailingZeros => "trailing_zeros", + }; + + f.write_str(s) + } +} + +impl From for SpecialEq> { + fn from(func: BitwiseFunction) -> Self { + use BitwiseFunction as B; + + match func { + B::CountOnes => map!(count_ones), + B::CountZeros => map!(count_zeros), + B::LeadingOnes => map!(leading_ones), + B::LeadingZeros => map!(leading_zeros), + B::TrailingOnes => map!(trailing_ones), + B::TrailingZeros => map!(trailing_zeros), + } + } +} + +impl From for GroupByBitwiseMethod { + fn from(value: BitwiseAggFunction) -> Self { + match value { + BitwiseAggFunction::And => Self::And, + BitwiseAggFunction::Or => Self::Or, + BitwiseAggFunction::Xor => Self::Xor, + } + } +} + +impl BitwiseFunction { + pub(super) fn get_field(&self, mapper: FieldsMapper) -> PolarsResult { + mapper.try_map_dtype(|dtype| { + let is_valid = match dtype { + DataType::Boolean => true, + dt if dt.is_integer() => true, + dt if dt.is_float() => true, + _ => false, + }; + + if !is_valid { + polars_bail!(InvalidOperation: "dtype {} not supported in '{}' operation", dtype, self); + } + + Ok(DataType::UInt8) + }) + } +} + +fn count_ones(c: &Column) -> PolarsResult { + c.try_apply_unary_elementwise(polars_ops::series::count_ones) +} + +fn count_zeros(c: &Column) -> PolarsResult { + c.try_apply_unary_elementwise(polars_ops::series::count_zeros) +} + +fn leading_ones(c: &Column) -> PolarsResult { + c.try_apply_unary_elementwise(polars_ops::series::leading_ones) +} + +fn leading_zeros(c: &Column) -> PolarsResult { + c.try_apply_unary_elementwise(polars_ops::series::leading_zeros) +} + +fn trailing_ones(c: &Column) -> PolarsResult { + c.try_apply_unary_elementwise(polars_ops::series::trailing_zeros) +} + +fn trailing_zeros(c: &Column) -> PolarsResult { + c.try_apply_unary_elementwise(polars_ops::series::trailing_zeros) +} diff --git a/crates/polars-plan/src/dsl/function_expr/mod.rs b/crates/polars-plan/src/dsl/function_expr/mod.rs index d64adc740f3a..2fee12fbbedd 100644 --- a/crates/polars-plan/src/dsl/function_expr/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/mod.rs @@ -90,7 +90,7 @@ use schema::FieldsMapper; use serde::{Deserialize, Serialize}; pub(crate) use self::binary::BinaryFunction; -pub use self::bitwise::BitwiseFunction; +pub use self::bitwise::{BitwiseAggFunction, BitwiseFunction}; pub use self::boolean::BooleanFunction; #[cfg(feature = "business")] pub(super) use self::business::BusinessFunction; diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs index c543f63f0824..37102de5cbcf 100644 --- a/crates/polars-plan/src/dsl/mod.rs +++ b/crates/polars-plan/src/dsl/mod.rs @@ -45,7 +45,6 @@ pub use arity::*; #[cfg(feature = "dtype-array")] pub use array::*; use arrow::legacy::prelude::QuantileInterpolOptions; -pub use bitwise::*; pub use expr::*; pub use function_expr::schema::FieldsMapper; pub use function_expr::*; @@ -1931,11 +1930,6 @@ impl Expr { list::ListNameSpace(self) } - /// Get the [`bitwise::BitwiseNameSpace`] - pub fn bitwise(self) -> bitwise::BitwiseNameSpace { - bitwise::BitwiseNameSpace(self) - } - /// Get the [`name::ExprNameNameSpace`] pub fn name(self) -> name::ExprNameNameSpace { name::ExprNameNameSpace(self) diff --git a/crates/polars-plan/src/plans/aexpr/mod.rs b/crates/polars-plan/src/plans/aexpr/mod.rs index 42bfff7cabab..14d650611431 100644 --- a/crates/polars-plan/src/plans/aexpr/mod.rs +++ b/crates/polars-plan/src/plans/aexpr/mod.rs @@ -50,6 +50,7 @@ pub enum IRAggExpr { Count(Node, bool), Std(Node, u8), Var(Node, u8), + Bitwise(Node, BitwiseAggFunction), AggGroups(Node), } @@ -62,6 +63,7 @@ impl Hash for IRAggExpr { }, Self::Quantile { interpol, .. } => interpol.hash(state), Self::Std(_, v) | Self::Var(_, v) => v.hash(state), + Self::Bitwise(_, f) => f.hash(state), _ => {}, } } @@ -91,6 +93,7 @@ impl IRAggExpr { (Quantile { interpol: l, .. }, Quantile { interpol: r, .. }) => l == r, (Std(_, l), Std(_, r)) => l == r, (Var(_, l), Var(_, r)) => l == r, + (Bitwise(_, l), Bitwise(_, r)) => l == r, _ => std::mem::discriminant(self) == std::mem::discriminant(other), } } @@ -124,6 +127,7 @@ impl From for GroupByMethod { Count(_, include_nulls) => GroupByMethod::Count { include_nulls }, Std(_, ddof) => GroupByMethod::Std(ddof), Var(_, ddof) => GroupByMethod::Var(ddof), + Bitwise(_, f) => GroupByMethod::Bitwise(f.into()), AggGroups(_) => GroupByMethod::Groups, Quantile { .. } => unreachable!(), } diff --git a/crates/polars-plan/src/plans/aexpr/schema.rs b/crates/polars-plan/src/plans/aexpr/schema.rs index 0145776684f4..1b59db205161 100644 --- a/crates/polars-plan/src/plans/aexpr/schema.rs +++ b/crates/polars-plan/src/plans/aexpr/schema.rs @@ -217,6 +217,12 @@ impl AExpr { float_type(&mut field); Ok(field) }, + Bitwise(expr, _) => { + *nested = nested.saturating_sub(1); + let field = arena.get(*expr).to_field_impl(schema, arena, nested)?; + // @Q? Do we need to coerce here? + Ok(field) + }, } }, Cast { expr, dtype, .. } => { diff --git a/crates/polars-plan/src/plans/aexpr/traverse.rs b/crates/polars-plan/src/plans/aexpr/traverse.rs index 29999ef6995f..f6f337e23acb 100644 --- a/crates/polars-plan/src/plans/aexpr/traverse.rs +++ b/crates/polars-plan/src/plans/aexpr/traverse.rs @@ -197,6 +197,7 @@ impl IRAggExpr { Std(input, _) => Single(*input), Var(input, _) => Single(*input), AggGroups(input) => Single(*input), + Bitwise(input, _) => Single(*input), } } pub fn set_input(&mut self, input: Node) { @@ -216,6 +217,7 @@ impl IRAggExpr { Std(input, _) => input, Var(input, _) => input, AggGroups(input) => input, + Bitwise(input, _) => input, }; *node = input; } diff --git a/crates/polars-plan/src/plans/conversion/expr_to_ir.rs b/crates/polars-plan/src/plans/conversion/expr_to_ir.rs index fe13dd1d3592..74b2c01539f9 100644 --- a/crates/polars-plan/src/plans/conversion/expr_to_ir.rs +++ b/crates/polars-plan/src/plans/conversion/expr_to_ir.rs @@ -260,6 +260,10 @@ pub(super) fn to_aexpr_impl( AggExpr::AggGroups(expr) => { IRAggExpr::AggGroups(to_aexpr_impl_materialized_lit(owned(expr), arena, state)?) }, + AggExpr::Bitwise(expr, f) => IRAggExpr::Bitwise( + to_aexpr_impl_materialized_lit(owned(expr), arena, state)?, + f, + ), }; AExpr::Agg(a_agg) }, diff --git a/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs b/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs index c90590914e47..e677d5ba1b30 100644 --- a/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs +++ b/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs @@ -160,6 +160,10 @@ pub fn node_to_expr(node: Node, expr_arena: &Arena) -> Expr { let expr = node_to_expr(expr, expr_arena); AggExpr::Count(Arc::new(expr), include_nulls).into() }, + IRAggExpr::Bitwise(expr, f) => { + let expr = node_to_expr(expr, expr_arena); + AggExpr::Bitwise(Arc::new(expr), f).into() + }, }, AExpr::Ternary { predicate, diff --git a/crates/polars-plan/src/plans/format.rs b/crates/polars-plan/src/plans/format.rs index d39f3dd35cc9..f38b6bb627ab 100644 --- a/crates/polars-plan/src/plans/format.rs +++ b/crates/polars-plan/src/plans/format.rs @@ -120,6 +120,15 @@ impl fmt::Debug for Expr { Var(expr, _) => write!(f, "{expr:?}.var()"), Std(expr, _) => write!(f, "{expr:?}.std()"), Quantile { expr, .. } => write!(f, "{expr:?}.quantile()"), + Bitwise(expr, t) => { + let t = match t { + BitwiseAggFunction::And => "and", + BitwiseAggFunction::Or => "or", + BitwiseAggFunction::Xor => "xor", + }; + + write!(f, "{expr:?}.bitwise.{t}()") + }, } }, Cast { diff --git a/crates/polars-plan/src/plans/ir/format.rs b/crates/polars-plan/src/plans/ir/format.rs index 76de9f3beb24..b49c589027a1 100644 --- a/crates/polars-plan/src/plans/ir/format.rs +++ b/crates/polars-plan/src/plans/ir/format.rs @@ -587,6 +587,15 @@ impl<'a> Display for ExprIRDisplay<'a> { Var(expr, _) => write!(f, "{}.var()", self.with_root(expr)), Std(expr, _) => write!(f, "{}.std()", self.with_root(expr)), Quantile { expr, .. } => write!(f, "{}.quantile()", self.with_root(expr)), + Bitwise(expr, t) => { + let t = match t { + BitwiseAggFunction::And => "and", + BitwiseAggFunction::Or => "or", + BitwiseAggFunction::Xor => "xor", + }; + + write!(f, "{}.bitwise.{t}()", self.with_root(expr)) + }, } }, Cast { diff --git a/crates/polars-plan/src/plans/iterator.rs b/crates/polars-plan/src/plans/iterator.rs index 2dc13870b553..f879fe9255f9 100644 --- a/crates/polars-plan/src/plans/iterator.rs +++ b/crates/polars-plan/src/plans/iterator.rs @@ -56,6 +56,7 @@ macro_rules! push_expr { AggGroups(e) => $push($c, e), Std(e, _) => $push($c, e), Var(e, _) => $push($c, e), + Bitwise(e, _) => $push($c, e), } }, Ternary { diff --git a/crates/polars-plan/src/plans/visitor/expr.rs b/crates/polars-plan/src/plans/visitor/expr.rs index 2f5fce9bc283..a4089e7af060 100644 --- a/crates/polars-plan/src/plans/visitor/expr.rs +++ b/crates/polars-plan/src/plans/visitor/expr.rs @@ -72,6 +72,7 @@ impl TreeWalker for Expr { AggGroups(x) => AggGroups(am(x, f)?), Std(x, ddf) => Std(am(x, f)?, ddf), Var(x, ddf) => Var(am(x, f)?, ddf), + Bitwise(x, t) => Bitwise(am(x, f)?, t), }), Ternary { predicate, truthy, falsy } => Ternary { predicate: am(predicate, &mut f)?, truthy: am(truthy, &mut f)?, falsy: am(falsy, f)? }, Function { input, function, options } => Function { input: input.into_iter().map(f).collect::>()?, function, options }, diff --git a/crates/polars-python/src/expr/bitwise.rs b/crates/polars-python/src/expr/bitwise.rs new file mode 100644 index 000000000000..550b1930942e --- /dev/null +++ b/crates/polars-python/src/expr/bitwise.rs @@ -0,0 +1,42 @@ +use pyo3::prelude::*; + +use crate::PyExpr; + +#[pymethods] +impl PyExpr { + fn bitwise_count_ones(&self) -> Self { + self.inner.clone().bitwise_count_ones().into() + } + + fn bitwise_count_zeros(&self) -> Self { + self.inner.clone().bitwise_count_zeros().into() + } + + fn bitwise_leading_ones(&self) -> Self { + self.inner.clone().bitwise_leading_ones().into() + } + + fn bitwise_leading_zeros(&self) -> Self { + self.inner.clone().bitwise_leading_zeros().into() + } + + fn bitwise_trailing_ones(&self) -> Self { + self.inner.clone().bitwise_trailing_ones().into() + } + + fn bitwise_trailing_zeros(&self) -> Self { + self.inner.clone().bitwise_trailing_zeros().into() + } + + fn bitwise_and(&self) -> Self { + self.inner.clone().bitwise_and().into() + } + + fn bitwise_or(&self) -> Self { + self.inner.clone().bitwise_or().into() + } + + fn bitwise_xor(&self) -> Self { + self.inner.clone().bitwise_xor().into() + } +} diff --git a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs index 5445bbabeda9..a05ea6891e15 100644 --- a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs @@ -723,6 +723,16 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult { arguments: vec![n.0], options: py.None(), }, + IRAggExpr::Bitwise(n, f) => Agg { + name: "bitwise".to_object(py), + arguments: vec![n.0], + options: match f { + polars::prelude::BitwiseAggFunction::And => "and", + polars::prelude::BitwiseAggFunction::Or => "or", + polars::prelude::BitwiseAggFunction::Xor => "xor", + } + .to_object(py), + }, } .into_py(py), AExpr::Ternary { diff --git a/crates/polars-stream/src/physical_plan/lower_expr.rs b/crates/polars-stream/src/physical_plan/lower_expr.rs index 39493af054c2..7ee0b16fda7e 100644 --- a/crates/polars-stream/src/physical_plan/lower_expr.rs +++ b/crates/polars-stream/src/physical_plan/lower_expr.rs @@ -598,7 +598,8 @@ fn lower_exprs_with_ctx( | IRAggExpr::Count(_, _) | IRAggExpr::Std(_, _) | IRAggExpr::Var(_, _) - | IRAggExpr::AggGroups(_) => { + | IRAggExpr::AggGroups(_) + | IRAggExpr::Bitwise(_, _) => { let out_name = unique_column_name(); fallback_subset.push(ExprIR::new(expr, OutputName::Alias(out_name.clone()))); transformed_exprs.push(ctx.expr_arena.add(AExpr::Column(out_name))); diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index ac4870c6d621..36e95cedbe01 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -47,7 +47,6 @@ from polars.exceptions import CustomUFuncWarning, PolarsInefficientMapWarning from polars.expr.array import ExprArrayNameSpace from polars.expr.binary import ExprBinaryNameSpace -from polars.expr.bitwise import ExprBitwiseNameSpace from polars.expr.categorical import ExprCatNameSpace from polars.expr.datetime import ExprDateTimeNameSpace from polars.expr.list import ExprListNameSpace @@ -10473,6 +10472,60 @@ def replace_strict( self._pyexpr.replace_strict(old, new, default, return_dtype) ) + def bitwise_count_ones(self) -> Expr: + """ + Evaluate the number of set bits. + """ + return self._from_pyexpr(self._pyexpr.bitwise_count_ones()) + + def bitwise_count_zeros(self) -> Expr: + """ + Evaluate the number of unset bits. + """ + return self._from_pyexpr(self._pyexpr.bitwise_count_zeros()) + + def bitwise_leading_ones(self) -> Expr: + """ + Evaluate the number most-significant set bits before seeing an unset bit. + """ + return self._from_pyexpr(self._pyexpr.bitwise_leading_ones()) + + def bitwise_leading_zeros(self) -> Expr: + """ + Evaluate the number most-significant unset bits before seeing a set bit. + """ + return self._from_pyexpr(self._pyexpr.bitwise_leading_zeros()) + + def bitwise_trailing_ones(self) -> Expr: + """ + Evaluate the number least-significant set bits before seeing an unset bit. + """ + return self._from_pyexpr(self._pyexpr.bitwise_trailing_ones()) + + def bitwise_trailing_zeros(self) -> Expr: + """ + Evaluate the number least-significant unset bits before seeing a set bit. + """ + return self._from_pyexpr(self._pyexpr.bitwise_trailing_zeros()) + + def bitwise_and(self) -> Expr: + """ + Perform an aggregation of bitwise ANDs + """ + return self._from_pyexpr(self._pyexpr.bitwise_and()) + + def bitwise_or(self) -> Expr: + """ + Perform an aggregation of bitwise ORs + """ + return self._from_pyexpr(self._pyexpr.bitwise_or()) + + def bitwise_xor(self) -> Expr: + """ + Perform an aggregation of bitwise XORs + """ + return self._from_pyexpr(self._pyexpr.bitwise_xor()) + @deprecate_function( "Use `polars.plugins.register_plugin_function` instead.", version="0.20.16" ) @@ -10622,15 +10675,6 @@ def dt(self) -> ExprDateTimeNameSpace: # Keep the `list` and `str` properties below at the end of the definition of Expr, # as to not confuse mypy with the type annotation `str` and `list` - @property - def bitwise(self) -> ExprBitwiseNameSpace: - """ - Create an object namespace of all bitwise related methods. - - See the individual method pages for full details. - """ - return ExprBitwiseNameSpace(self) - @property def list(self) -> ExprListNameSpace: """ From f3670061d8e1562fb081c47b7a9e6b024bc66fed Mon Sep 17 00:00:00 2001 From: coastalwhite Date: Mon, 30 Sep 2024 10:42:40 +0200 Subject: [PATCH 3/9] pyfmt --- py-polars/polars/expr/expr.py | 36 +++++++++-------------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 36e95cedbe01..95c0ed2e6cb1 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -10473,57 +10473,39 @@ def replace_strict( ) def bitwise_count_ones(self) -> Expr: - """ - Evaluate the number of set bits. - """ + """Evaluate the number of set bits.""" return self._from_pyexpr(self._pyexpr.bitwise_count_ones()) def bitwise_count_zeros(self) -> Expr: - """ - Evaluate the number of unset bits. - """ + """Evaluate the number of unset bits.""" return self._from_pyexpr(self._pyexpr.bitwise_count_zeros()) def bitwise_leading_ones(self) -> Expr: - """ - Evaluate the number most-significant set bits before seeing an unset bit. - """ + """Evaluate the number most-significant set bits before seeing an unset bit.""" return self._from_pyexpr(self._pyexpr.bitwise_leading_ones()) def bitwise_leading_zeros(self) -> Expr: - """ - Evaluate the number most-significant unset bits before seeing a set bit. - """ + """Evaluate the number most-significant unset bits before seeing a set bit.""" return self._from_pyexpr(self._pyexpr.bitwise_leading_zeros()) def bitwise_trailing_ones(self) -> Expr: - """ - Evaluate the number least-significant set bits before seeing an unset bit. - """ + """Evaluate the number least-significant set bits before seeing an unset bit.""" return self._from_pyexpr(self._pyexpr.bitwise_trailing_ones()) def bitwise_trailing_zeros(self) -> Expr: - """ - Evaluate the number least-significant unset bits before seeing a set bit. - """ + """Evaluate the number least-significant unset bits before seeing a set bit.""" return self._from_pyexpr(self._pyexpr.bitwise_trailing_zeros()) def bitwise_and(self) -> Expr: - """ - Perform an aggregation of bitwise ANDs - """ + """Perform an aggregation of bitwise ANDs.""" return self._from_pyexpr(self._pyexpr.bitwise_and()) def bitwise_or(self) -> Expr: - """ - Perform an aggregation of bitwise ORs - """ + """Perform an aggregation of bitwise ORs.""" return self._from_pyexpr(self._pyexpr.bitwise_or()) def bitwise_xor(self) -> Expr: - """ - Perform an aggregation of bitwise XORs - """ + """Perform an aggregation of bitwise XORs.""" return self._from_pyexpr(self._pyexpr.bitwise_xor()) @deprecate_function( From 0c45d90196a6ea8c36e7d83d1a7af19a7bfd254a Mon Sep 17 00:00:00 2001 From: coastalwhite Date: Mon, 30 Sep 2024 14:12:03 +0200 Subject: [PATCH 4/9] finish --- crates/polars-compute/src/bitwise/mod.rs | 68 +++---- crates/polars-core/Cargo.toml | 1 + .../polars-core/src/chunked_array/ops/mod.rs | 2 + crates/polars-core/src/frame/column/mod.rs | 4 + .../frame/group_by/aggregations/boolean.rs | 4 + .../src/frame/group_by/aggregations/mod.rs | 2 + crates/polars-core/src/frame/group_by/mod.rs | 11 +- .../src/series/implementations/boolean.rs | 3 + .../src/series/implementations/floats.rs | 72 ++----- .../src/series/implementations/mod.rs | 51 ++--- crates/polars-core/src/series/series_trait.rs | 3 + crates/polars-core/src/utils/mod.rs | 8 +- crates/polars-expr/Cargo.toml | 1 + .../src/expressions/aggregation.rs | 2 + crates/polars-expr/src/planner.rs | 1 + crates/polars-lazy/Cargo.toml | 1 + crates/polars-ops/Cargo.toml | 1 + crates/polars-ops/src/series/ops/bitwise.rs | 6 +- crates/polars-plan/Cargo.toml | 2 + crates/polars-plan/src/dsl/expr.rs | 5 +- .../src/dsl/function_expr/bitwise.rs | 4 +- .../polars-plan/src/dsl/function_expr/mod.rs | 11 +- .../src/dsl/function_expr/schema.rs | 1 + crates/polars-plan/src/dsl/mod.rs | 1 + crates/polars-plan/src/plans/aexpr/mod.rs | 4 + crates/polars-plan/src/plans/aexpr/schema.rs | 1 + .../polars-plan/src/plans/aexpr/traverse.rs | 2 + .../src/plans/conversion/expr_to_ir.rs | 1 + .../src/plans/conversion/ir_to_dsl.rs | 1 + crates/polars-plan/src/plans/format.rs | 1 + crates/polars-plan/src/plans/ir/format.rs | 1 + crates/polars-plan/src/plans/iterator.rs | 1 + crates/polars-plan/src/plans/visitor/expr.rs | 1 + crates/polars-python/Cargo.toml | 3 +- crates/polars-python/src/expr/mod.rs | 1 + crates/polars-stream/Cargo.toml | 1 + .../src/physical_plan/lower_expr.rs | 9 +- crates/polars/Cargo.toml | 1 + py-polars/polars/series/series.py | 27 +++ .../tests/unit/operations/test_bitwise.py | 183 ++++++++++++++++++ 40 files changed, 356 insertions(+), 147 deletions(-) diff --git a/crates/polars-compute/src/bitwise/mod.rs b/crates/polars-compute/src/bitwise/mod.rs index 7717d2606bef..578acb91b1ac 100644 --- a/crates/polars-compute/src/bitwise/mod.rs +++ b/crates/polars-compute/src/bitwise/mod.rs @@ -8,14 +8,14 @@ use bytemuck::Zeroable; pub trait BitwiseKernel { type Scalar; - fn count_ones(&self) -> PrimitiveArray; - fn count_zeros(&self) -> PrimitiveArray; + fn count_ones(&self) -> PrimitiveArray; + fn count_zeros(&self) -> PrimitiveArray; - fn leading_ones(&self) -> PrimitiveArray; - fn leading_zeros(&self) -> PrimitiveArray; + fn leading_ones(&self) -> PrimitiveArray; + fn leading_zeros(&self) -> PrimitiveArray; - fn trailing_ones(&self) -> PrimitiveArray; - fn trailing_zeros(&self) -> PrimitiveArray; + fn trailing_ones(&self) -> PrimitiveArray; + fn trailing_zeros(&self) -> PrimitiveArray; fn reduce_and(&self) -> Option; fn reduce_or(&self) -> Option; @@ -33,12 +33,12 @@ macro_rules! impl_bitwise_kernel { type Scalar = $T; #[inline(never)] - fn count_ones(&self) -> PrimitiveArray { + fn count_ones(&self) -> PrimitiveArray { PrimitiveArray::new( - ArrowDataType::UInt8, + ArrowDataType::UInt32, self.values() .iter() - .map(|&v| ($to_bits(v).count_ones() & 0xFF) as u8) + .map(|&v| $to_bits(v).count_ones()) .collect_trusted::>() .into(), self.validity().cloned(), @@ -46,13 +46,13 @@ macro_rules! impl_bitwise_kernel { } #[inline(never)] - fn count_zeros(&self) -> PrimitiveArray { + fn count_zeros(&self) -> PrimitiveArray { PrimitiveArray::new( - ArrowDataType::UInt8, + ArrowDataType::UInt32, self .values() .iter() - .map(|&v| ($to_bits(v).count_zeros() & 0xFF) as u8) + .map(|&v| $to_bits(v).count_zeros()) .collect_trusted::>() .into(), self.validity().cloned(), @@ -60,12 +60,12 @@ macro_rules! impl_bitwise_kernel { } #[inline(never)] - fn leading_ones(&self) -> PrimitiveArray { + fn leading_ones(&self) -> PrimitiveArray { PrimitiveArray::new( - ArrowDataType::UInt8, + ArrowDataType::UInt32, self.values() .iter() - .map(|&v| ($to_bits(v).leading_ones() & 0xFF) as u8) + .map(|&v| $to_bits(v).leading_ones()) .collect_trusted::>() .into(), self.validity().cloned(), @@ -73,12 +73,12 @@ macro_rules! impl_bitwise_kernel { } #[inline(never)] - fn leading_zeros(&self) -> PrimitiveArray { + fn leading_zeros(&self) -> PrimitiveArray { PrimitiveArray::new( - ArrowDataType::UInt8, + ArrowDataType::UInt32, self.values() .iter() - .map(|&v| ($to_bits(v).leading_zeros() & 0xFF) as u8) + .map(|&v| $to_bits(v).leading_zeros()) .collect_trusted::>() .into(), self.validity().cloned(), @@ -86,12 +86,12 @@ macro_rules! impl_bitwise_kernel { } #[inline(never)] - fn trailing_ones(&self) -> PrimitiveArray { + fn trailing_ones(&self) -> PrimitiveArray { PrimitiveArray::new( - ArrowDataType::UInt8, + ArrowDataType::UInt32, self.values() .iter() - .map(|&v| ($to_bits(v).trailing_ones() & 0xFF) as u8) + .map(|&v| $to_bits(v).trailing_ones()) .collect_trusted::>() .into(), self.validity().cloned(), @@ -99,11 +99,11 @@ macro_rules! impl_bitwise_kernel { } #[inline(never)] - fn trailing_zeros(&self) -> PrimitiveArray { + fn trailing_zeros(&self) -> PrimitiveArray { PrimitiveArray::new( - ArrowDataType::UInt8, + ArrowDataType::UInt32, self.values().iter() - .map(|&v| ($to_bits(v).trailing_zeros() & 0xFF) as u8) + .map(|&v| $to_bits(v).trailing_zeros()) .collect_trusted::>() .into(), self.validity().cloned(), @@ -186,12 +186,12 @@ impl BitwiseKernel for BooleanArray { type Scalar = bool; #[inline(never)] - fn count_ones(&self) -> PrimitiveArray { + fn count_ones(&self) -> PrimitiveArray { PrimitiveArray::new( - ArrowDataType::UInt8, + ArrowDataType::UInt32, self.values() .iter() - .map(u8::from) + .map(u32::from) .collect_trusted::>() .into(), self.validity().cloned(), @@ -199,12 +199,12 @@ impl BitwiseKernel for BooleanArray { } #[inline(never)] - fn count_zeros(&self) -> PrimitiveArray { + fn count_zeros(&self) -> PrimitiveArray { PrimitiveArray::new( - ArrowDataType::UInt8, + ArrowDataType::UInt32, self.values() .iter() - .map(|v| u8::from(!v)) + .map(|v| u32::from(!v)) .collect_trusted::>() .into(), self.validity().cloned(), @@ -212,22 +212,22 @@ impl BitwiseKernel for BooleanArray { } #[inline(always)] - fn leading_ones(&self) -> PrimitiveArray { + fn leading_ones(&self) -> PrimitiveArray { self.count_ones() } #[inline(always)] - fn leading_zeros(&self) -> PrimitiveArray { + fn leading_zeros(&self) -> PrimitiveArray { self.count_zeros() } #[inline(always)] - fn trailing_ones(&self) -> PrimitiveArray { + fn trailing_ones(&self) -> PrimitiveArray { self.count_ones() } #[inline(always)] - fn trailing_zeros(&self) -> PrimitiveArray { + fn trailing_zeros(&self) -> PrimitiveArray { self.count_zeros() } diff --git a/crates/polars-core/Cargo.toml b/crates/polars-core/Cargo.toml index a3f477e84dd6..a3c5b26e8386 100644 --- a/crates/polars-core/Cargo.toml +++ b/crates/polars-core/Cargo.toml @@ -75,6 +75,7 @@ fmt_no_tty = ["comfy-table"] rows = [] # operations +bitwise = ["algorithm_group_by"] zip_with = [] round_series = [] checked_arithmetic = [] diff --git a/crates/polars-core/src/chunked_array/ops/mod.rs b/crates/polars-core/src/chunked_array/ops/mod.rs index 9633dad9ae69..56e6145f2ef4 100644 --- a/crates/polars-core/src/chunked_array/ops/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/mod.rs @@ -9,6 +9,7 @@ pub(crate) mod append; mod apply; pub mod arity; mod bit_repr; +#[cfg(feature = "bitwise")] mod bitwise_reduce; pub(crate) mod chunkops; pub(crate) mod compare_inner; @@ -297,6 +298,7 @@ pub trait ChunkVar { } /// Bitwise Reduction Operations. +#[cfg(feature = "bitwise")] pub trait ChunkBitwiseReduce { type Physical; diff --git a/crates/polars-core/src/frame/column/mod.rs b/crates/polars-core/src/frame/column/mod.rs index 7f1e0eb93cd6..e1a447d437bb 100644 --- a/crates/polars-core/src/frame/column/mod.rs +++ b/crates/polars-core/src/frame/column/mod.rs @@ -525,6 +525,7 @@ impl Column { /// # Safety /// /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] pub unsafe fn agg_first(&self, groups: &GroupsProxy) -> Self { // @scalar-opt unsafe { self.as_materialized_series().agg_first(groups) }.into() @@ -533,6 +534,7 @@ impl Column { /// # Safety /// /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] pub unsafe fn agg_last(&self, groups: &GroupsProxy) -> Self { // @scalar-opt unsafe { self.as_materialized_series().agg_last(groups) }.into() @@ -541,6 +543,7 @@ impl Column { /// # Safety /// /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] pub unsafe fn agg_n_unique(&self, groups: &GroupsProxy) -> Self { // @scalar-opt unsafe { self.as_materialized_series().agg_n_unique(groups) }.into() @@ -549,6 +552,7 @@ impl Column { /// # Safety /// /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] pub unsafe fn agg_quantile( &self, groups: &GroupsProxy, diff --git a/crates/polars-core/src/frame/group_by/aggregations/boolean.rs b/crates/polars-core/src/frame/group_by/aggregations/boolean.rs index 74f65b7ce344..36cd8e9a8d41 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/boolean.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/boolean.rs @@ -17,6 +17,7 @@ where ca.into_series() } +#[cfg(feature = "bitwise")] unsafe fn bitwise_agg( ca: &BooleanChunked, groups: &GroupsProxy, @@ -51,6 +52,7 @@ unsafe fn bitwise_agg( } } +#[cfg(feature = "bitwise")] impl BooleanChunked { pub(crate) unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { bitwise_agg(self, groups, ChunkBitwiseReduce::and_reduce) @@ -63,7 +65,9 @@ impl BooleanChunked { pub(crate) unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { bitwise_agg(self, groups, ChunkBitwiseReduce::xor_reduce) } +} +impl BooleanChunked { pub(crate) unsafe fn agg_min(&self, groups: &GroupsProxy) -> Series { // faster paths match (self.is_sorted_flag(), self.null_count()) { diff --git a/crates/polars-core/src/frame/group_by/aggregations/mod.rs b/crates/polars-core/src/frame/group_by/aggregations/mod.rs index 824a94fb3ec1..092d660fb4d2 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/mod.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/mod.rs @@ -458,6 +458,7 @@ where /// # Safety /// /// No bounds checks on `groups`. +#[cfg(feature = "bitwise")] unsafe fn bitwise_agg( ca: &ChunkedArray, groups: &GroupsProxy, @@ -496,6 +497,7 @@ where } } +#[cfg(feature = "bitwise")] impl ChunkedArray where T: PolarsNumericType, diff --git a/crates/polars-core/src/frame/group_by/mod.rs b/crates/polars-core/src/frame/group_by/mod.rs index aad0dbed9c12..89c72f5a0eac 100644 --- a/crates/polars-core/src/frame/group_by/mod.rs +++ b/crates/polars-core/src/frame/group_by/mod.rs @@ -869,13 +869,17 @@ pub enum GroupByMethod { Groups, NUnique, Quantile(f64, QuantileInterpolOptions), - Count { include_nulls: bool }, + Count { + include_nulls: bool, + }, Implode, Std(u8), Var(u8), + #[cfg(feature = "bitwise")] Bitwise(GroupByBitwiseMethod), } +#[cfg(feature = "bitwise")] #[derive(Copy, Clone, Debug)] pub enum GroupByBitwiseMethod { And, @@ -903,6 +907,7 @@ impl Display for GroupByMethod { Implode => "list", Std(_) => "std", Var(_) => "var", + #[cfg(feature = "bitwise")] Bitwise(t) => { f.write_str("bitwise_")?; return Display::fmt(t, f); @@ -912,6 +917,7 @@ impl Display for GroupByMethod { } } +#[cfg(feature = "bitwise")] impl Display for GroupByBitwiseMethod { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { @@ -942,7 +948,8 @@ pub fn fmt_group_by_column(name: &str, method: GroupByMethod) -> PlSmallStr { Quantile(quantile, _interpol) => format_pl_smallstr!("{name}_quantile_{quantile:.2}"), Std(_) => format_pl_smallstr!("{name}_agg_std"), Var(_) => format_pl_smallstr!("{name}_agg_var"), - Bitwise(_) => format_pl_smallstr!("{name}_agg_var"), + #[cfg(feature = "bitwise")] + Bitwise(f) => format_pl_smallstr!("{name}_agg_bitwise_{f}"), } } diff --git a/crates/polars-core/src/series/implementations/boolean.rs b/crates/polars-core/src/series/implementations/boolean.rs index 1d5369b0e275..14409fbdb91c 100644 --- a/crates/polars-core/src/series/implementations/boolean.rs +++ b/crates/polars-core/src/series/implementations/boolean.rs @@ -84,12 +84,15 @@ impl private::PrivateSeries for SeriesWrap { .agg_var(groups, _ddof) } + #[cfg(feature = "bitwise")] unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { self.0.agg_and(groups) } + #[cfg(feature = "bitwise")] unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { self.0.agg_or(groups) } + #[cfg(feature = "bitwise")] unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { self.0.agg_xor(groups) } diff --git a/crates/polars-core/src/series/implementations/floats.rs b/crates/polars-core/src/series/implementations/floats.rs index 445b45a5c64e..eeba7c9b0a6b 100644 --- a/crates/polars-core/src/series/implementations/floats.rs +++ b/crates/polars-core/src/series/implementations/floats.rs @@ -96,12 +96,15 @@ macro_rules! impl_dyn_series { self.0.agg_list(groups) } + #[cfg(feature = "bitwise")] unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { self.0.agg_and(groups) } + #[cfg(feature = "bitwise")] unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { self.0.agg_or(groups) } + #[cfg(feature = "bitwise")] unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { self.0.agg_xor(groups) } @@ -366,71 +369,26 @@ macro_rules! impl_dyn_series { ) -> PolarsResult { QuantileAggSeries::quantile_reduce(&self.0, quantile, interpol) } + #[cfg(feature = "bitwise")] fn and_reduce(&self) -> PolarsResult { let dt = <$pdt as PolarsDataType>::get_dtype(); - if self.0.null_count() > 0 { - return Ok(Scalar::new(dt, AnyValue::Null)); - } - - Ok(Scalar::new( - dt, - self.0 - .downcast_iter() - .filter(|arr| !arr.is_empty()) - .map(|arr| { - polars_compute::bitwise::BitwiseKernel::reduce_and(arr) - .unwrap() - .to_bits() - }) - .reduce(|a, b| a & b) - .map_or(AnyValue::Null, |v| { - <$pdt as PolarsDataType>::Physical::from_bits(v).into() - }), - )) + let av = self.0.and_reduce().map_or(AnyValue::Null, Into::into); + + Ok(Scalar::new(dt, av)) } + #[cfg(feature = "bitwise")] fn or_reduce(&self) -> PolarsResult { let dt = <$pdt as PolarsDataType>::get_dtype(); - if self.0.null_count() > 0 { - return Ok(Scalar::new(dt, AnyValue::Null)); - } - - Ok(Scalar::new( - dt, - self.0 - .downcast_iter() - .filter(|arr| !arr.is_empty()) - .map(|arr| { - polars_compute::bitwise::BitwiseKernel::reduce_or(arr) - .unwrap() - .to_bits() - }) - .reduce(|a, b| a | b) - .map_or(AnyValue::Null, |v| { - <$pdt as PolarsDataType>::Physical::from_bits(v).into() - }), - )) + let av = self.0.or_reduce().map_or(AnyValue::Null, Into::into); + + Ok(Scalar::new(dt, av)) } + #[cfg(feature = "bitwise")] fn xor_reduce(&self) -> PolarsResult { let dt = <$pdt as PolarsDataType>::get_dtype(); - if self.0.null_count() > 0 { - return Ok(Scalar::new(dt, AnyValue::Null)); - } - - Ok(Scalar::new( - dt, - self.0 - .downcast_iter() - .filter(|arr| !arr.is_empty()) - .map(|arr| { - polars_compute::bitwise::BitwiseKernel::reduce_xor(arr) - .unwrap() - .to_bits() - }) - .reduce(|a, b| a ^ b) - .map_or(AnyValue::Null, |v| { - <$pdt as PolarsDataType>::Physical::from_bits(v).into() - }), - )) + let av = self.0.xor_reduce().map_or(AnyValue::Null, Into::into); + + Ok(Scalar::new(dt, av)) } fn clone_inner(&self) -> Arc { diff --git a/crates/polars-core/src/series/implementations/mod.rs b/crates/polars-core/src/series/implementations/mod.rs index 8e211a0ce70c..8d4d2caa3ddf 100644 --- a/crates/polars-core/src/series/implementations/mod.rs +++ b/crates/polars-core/src/series/implementations/mod.rs @@ -169,12 +169,15 @@ macro_rules! impl_dyn_series { self.0.agg_list(groups) } + #[cfg(feature = "bitwise")] unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { self.0.agg_and(groups) } + #[cfg(feature = "bitwise")] unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { self.0.agg_or(groups) } + #[cfg(feature = "bitwise")] unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { self.0.agg_xor(groups) } @@ -469,53 +472,29 @@ macro_rules! impl_dyn_series { ) -> PolarsResult { QuantileAggSeries::quantile_reduce(&self.0, quantile, interpol) } + + #[cfg(feature = "bitwise")] fn and_reduce(&self) -> PolarsResult { let dt = <$pdt as PolarsDataType>::get_dtype(); - if self.0.null_count() > 0 { - return Ok(Scalar::new(dt, AnyValue::Null)); - } + let av = self.0.and_reduce().map_or(AnyValue::Null, Into::into); - Ok(Scalar::new( - dt, - self.0 - .downcast_iter() - .filter(|arr| !arr.is_empty()) - .map(|arr| polars_compute::bitwise::BitwiseKernel::reduce_and(arr).unwrap()) - .reduce(|a, b| a & b) - .map_or(AnyValue::Null, Into::into), - )) + Ok(Scalar::new(dt, av)) } + + #[cfg(feature = "bitwise")] fn or_reduce(&self) -> PolarsResult { let dt = <$pdt as PolarsDataType>::get_dtype(); - if self.0.null_count() > 0 { - return Ok(Scalar::new(dt, AnyValue::Null)); - } + let av = self.0.or_reduce().map_or(AnyValue::Null, Into::into); - Ok(Scalar::new( - dt, - self.0 - .downcast_iter() - .filter(|arr| !arr.is_empty()) - .map(|arr| polars_compute::bitwise::BitwiseKernel::reduce_or(arr).unwrap()) - .reduce(|a, b| a | b) - .map_or(AnyValue::Null, Into::into), - )) + Ok(Scalar::new(dt, av)) } + + #[cfg(feature = "bitwise")] fn xor_reduce(&self) -> PolarsResult { let dt = <$pdt as PolarsDataType>::get_dtype(); - if self.0.null_count() > 0 { - return Ok(Scalar::new(dt, AnyValue::Null)); - } + let av = self.0.xor_reduce().map_or(AnyValue::Null, Into::into); - Ok(Scalar::new( - dt, - self.0 - .downcast_iter() - .filter(|arr| !arr.is_empty()) - .map(|arr| polars_compute::bitwise::BitwiseKernel::reduce_xor(arr).unwrap()) - .reduce(|a, b| a ^ b) - .map_or(AnyValue::Null, Into::into), - )) + Ok(Scalar::new(dt, av)) } fn clone_inner(&self) -> Arc { diff --git a/crates/polars-core/src/series/series_trait.rs b/crates/polars-core/src/series/series_trait.rs index 677aa65472d8..46b45633b74e 100644 --- a/crates/polars-core/src/series/series_trait.rs +++ b/crates/polars-core/src/series/series_trait.rs @@ -150,18 +150,21 @@ pub(crate) mod private { /// # Safety /// /// Does no bounds checks, groups must be correct. + #[cfg(feature = "bitwise")] unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) } /// # Safety /// /// Does no bounds checks, groups must be correct. + #[cfg(feature = "bitwise")] unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) } /// # Safety /// /// Does no bounds checks, groups must be correct. + #[cfg(feature = "bitwise")] unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) } diff --git a/crates/polars-core/src/utils/mod.rs b/crates/polars-core/src/utils/mod.rs index c123072d8c36..08b33b06c3b1 100644 --- a/crates/polars-core/src/utils/mod.rs +++ b/crates/polars-core/src/utils/mod.rs @@ -521,15 +521,15 @@ macro_rules! with_match_physical_integer_polars_type {( use $crate::datatypes::DataType::*; use $crate::datatypes::*; match $key_type { - #[cfg(feature = "dtype-i8")] + #[cfg(feature = "dtype-i8")] Int8 => __with_ty__! { Int8Type }, - #[cfg(feature = "dtype-i16")] + #[cfg(feature = "dtype-i16")] Int16 => __with_ty__! { Int16Type }, Int32 => __with_ty__! { Int32Type }, Int64 => __with_ty__! { Int64Type }, - #[cfg(feature = "dtype-u8")] + #[cfg(feature = "dtype-u8")] UInt8 => __with_ty__! { UInt8Type }, - #[cfg(feature = "dtype-u16")] + #[cfg(feature = "dtype-u16")] UInt16 => __with_ty__! { UInt16Type }, UInt32 => __with_ty__! { UInt32Type }, UInt64 => __with_ty__! { UInt64Type }, diff --git a/crates/polars-expr/Cargo.toml b/crates/polars-expr/Cargo.toml index 7a1f974b41ff..1b2b6063de9b 100644 --- a/crates/polars-expr/Cargo.toml +++ b/crates/polars-expr/Cargo.toml @@ -68,6 +68,7 @@ dtype-u8 = ["polars-plan/dtype-u8"] approx_unique = ["polars-plan/approx_unique"] is_in = ["polars-plan/is_in", "polars-ops/is_in"] +bitwise = ["polars-core/bitwise", "polars-plan/bitwise"] round_series = ["polars-plan/round_series", "polars-ops/round_series"] is_between = ["polars-plan/is_between"] dynamic_group_by = ["polars-plan/dynamic_group_by", "polars-time", "temporal"] diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs index 407047e6cd6c..e1d2a1e716ab 100644 --- a/crates/polars-expr/src/expressions/aggregation.rs +++ b/crates/polars-expr/src/expressions/aggregation.rs @@ -175,6 +175,7 @@ impl PhysicalExpr for AggregationExpr { .var_reduce(ddof) .map(|sc| sc.into_series(s.name().clone())), GroupByMethod::Quantile(_, _) => unimplemented!(), + #[cfg(feature = "bitwise")] GroupByMethod::Bitwise(f) => match f { GroupByBitwiseMethod::And => parallel_op_series( |s| s.and_reduce().map(|sc| sc.into_series(s.name().clone())), @@ -424,6 +425,7 @@ impl PhysicalExpr for AggregationExpr { // implemented explicitly in AggQuantile struct unimplemented!() }, + #[cfg(feature = "bitwise")] GroupByMethod::Bitwise(f) => { let (s, groups) = ac.get_final_aggregation(); let agg_s = match f { diff --git a/crates/polars-expr/src/planner.rs b/crates/polars-expr/src/planner.rs index 2e21d14f13d5..b771a717050d 100644 --- a/crates/polars-expr/src/planner.rs +++ b/crates/polars-expr/src/planner.rs @@ -386,6 +386,7 @@ fn create_physical_expr_inner( }, I::Std(_, ddof) => GBM::Std(*ddof), I::Var(_, ddof) => GBM::Var(*ddof), + #[cfg(feature = "bitwise")] I::Bitwise(_, f) => GBM::Bitwise((*f).into()), I::AggGroups(_) => { polars_bail!(InvalidOperation: "agg groups expression only supported in aggregation context") diff --git a/crates/polars-lazy/Cargo.toml b/crates/polars-lazy/Cargo.toml index 333fdc1211d2..79bd3f72b785 100644 --- a/crates/polars-lazy/Cargo.toml +++ b/crates/polars-lazy/Cargo.toml @@ -159,6 +159,7 @@ true_div = ["polars-plan/true_div"] extract_jsonpath = ["polars-plan/extract_jsonpath", "polars-ops/extract_jsonpath"] # operations +bitwise = ["polars-plan/bitwise", "polars-expr/bitwise", "polars-core/bitwise", "polars-stream/bitwise", "polars-ops/bitwise"] approx_unique = ["polars-plan/approx_unique"] is_in = ["polars-plan/is_in", "polars-ops/is_in", "polars-expr/is_in"] repeat_by = ["polars-plan/repeat_by"] diff --git a/crates/polars-ops/Cargo.toml b/crates/polars-ops/Cargo.toml index 2f37857c9cd2..027d846b485e 100644 --- a/crates/polars-ops/Cargo.toml +++ b/crates/polars-ops/Cargo.toml @@ -90,6 +90,7 @@ binary_encoding = ["base64", "hex"] string_encoding = ["base64", "hex"] # ops +bitwise = ["polars-core/bitwise"] to_dummies = [] interpolate = [] interpolate_by = [] diff --git a/crates/polars-ops/src/series/ops/bitwise.rs b/crates/polars-ops/src/series/ops/bitwise.rs index 42d07835c051..1471e35feb63 100644 --- a/crates/polars-ops/src/series/ops/bitwise.rs +++ b/crates/polars-ops/src/series/ops/bitwise.rs @@ -14,7 +14,7 @@ macro_rules! apply_bitwise_op { match s.dtype() { DataType::Boolean => { let ca: &ChunkedArray = s.as_any().downcast_ref().unwrap(); - Ok(unary_mut_values::( + Ok(unary_mut_values::( ca, |a| polars_compute::bitwise::BitwiseKernel::$op(a), ).into_series()) @@ -22,7 +22,7 @@ macro_rules! apply_bitwise_op { dt if dt.is_integer() => { with_match_physical_integer_polars_type!(dt, |$T| { let ca: &ChunkedArray<$T> = s.as_any().downcast_ref().unwrap(); - Ok(unary_mut_values::<$T, UInt8Type, _, _>( + Ok(unary_mut_values::<$T, UInt32Type, _, _>( ca, |a| polars_compute::bitwise::BitwiseKernel::$op(a), ).into_series()) @@ -31,7 +31,7 @@ macro_rules! apply_bitwise_op { dt if dt.is_float() => { with_match_physical_float_polars_type!(dt, |$T| { let ca: &ChunkedArray<$T> = s.as_any().downcast_ref().unwrap(); - Ok(unary_mut_values::<$T, UInt8Type, _, _>( + Ok(unary_mut_values::<$T, UInt32Type, _, _>( ca, |a| polars_compute::bitwise::BitwiseKernel::$op(a), ).into_series()) diff --git a/crates/polars-plan/Cargo.toml b/crates/polars-plan/Cargo.toml index 7edc15ea8616..d41be032bcf0 100644 --- a/crates/polars-plan/Cargo.toml +++ b/crates/polars-plan/Cargo.toml @@ -106,6 +106,7 @@ nightly = ["polars-utils/nightly", "polars-ops/nightly"] extract_jsonpath = ["polars-ops/extract_jsonpath"] # operations +bitwise = ["polars-core/bitwise", "polars-ops/bitwise"] approx_unique = ["polars-ops/approx_unique"] is_in = ["polars-ops/is_in"] repeat_by = ["polars-ops/repeat_by"] @@ -192,6 +193,7 @@ panic_on_schema = [] [package.metadata.docs.rs] features = [ + "bitwise", "temporal", "serde", "rolling_window", diff --git a/crates/polars-plan/src/dsl/expr.rs b/crates/polars-plan/src/dsl/expr.rs index f3e31b07c4a7..2b9dff7cb5f2 100644 --- a/crates/polars-plan/src/dsl/expr.rs +++ b/crates/polars-plan/src/dsl/expr.rs @@ -7,7 +7,6 @@ use polars_core::prelude::*; use serde::{Deserialize, Serialize}; pub use super::expr_dyn_fn::*; -use super::function_expr::BitwiseAggFunction; use crate::prelude::*; #[derive(PartialEq, Clone, Hash)] @@ -38,7 +37,8 @@ pub enum AggExpr { AggGroups(Arc), Std(Arc, u8), Var(Arc, u8), - Bitwise(Arc, BitwiseAggFunction), + #[cfg(feature = "bitwise")] + Bitwise(Arc, super::function_expr::BitwiseAggFunction), } impl AsRef for AggExpr { @@ -59,6 +59,7 @@ impl AsRef for AggExpr { AggGroups(e) => e, Std(e, _) => e, Var(e, _) => e, + #[cfg(feature = "bitwise")] Bitwise(e, _) => e, } } diff --git a/crates/polars-plan/src/dsl/function_expr/bitwise.rs b/crates/polars-plan/src/dsl/function_expr/bitwise.rs index 3e9ea4f662f1..2d4dd779cff0 100644 --- a/crates/polars-plan/src/dsl/function_expr/bitwise.rs +++ b/crates/polars-plan/src/dsl/function_expr/bitwise.rs @@ -84,7 +84,7 @@ impl BitwiseFunction { polars_bail!(InvalidOperation: "dtype {} not supported in '{}' operation", dtype, self); } - Ok(DataType::UInt8) + Ok(DataType::UInt32) }) } } @@ -106,7 +106,7 @@ fn leading_zeros(c: &Column) -> PolarsResult { } fn trailing_ones(c: &Column) -> PolarsResult { - c.try_apply_unary_elementwise(polars_ops::series::trailing_zeros) + c.try_apply_unary_elementwise(polars_ops::series::trailing_ones) } fn trailing_zeros(c: &Column) -> PolarsResult { diff --git a/crates/polars-plan/src/dsl/function_expr/mod.rs b/crates/polars-plan/src/dsl/function_expr/mod.rs index 2fee12fbbedd..0458b2b4a1d0 100644 --- a/crates/polars-plan/src/dsl/function_expr/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/mod.rs @@ -5,6 +5,7 @@ mod arg_where; #[cfg(feature = "dtype-array")] mod array; mod binary; +#[cfg(feature = "bitwise")] mod bitwise; mod boolean; mod bounds; @@ -90,6 +91,7 @@ use schema::FieldsMapper; use serde::{Deserialize, Serialize}; pub(crate) use self::binary::BinaryFunction; +#[cfg(feature = "bitwise")] pub use self::bitwise::{BitwiseAggFunction, BitwiseFunction}; pub use self::boolean::BooleanFunction; #[cfg(feature = "business")] @@ -129,6 +131,7 @@ pub enum FunctionExpr { StructExpr(StructFunction), #[cfg(feature = "temporal")] TemporalExpr(TemporalFunction), + #[cfg(feature = "bitwise")] Bitwise(BitwiseFunction), // Other expressions @@ -379,6 +382,7 @@ impl Hash for FunctionExpr { StructExpr(f) => f.hash(state), #[cfg(feature = "temporal")] TemporalExpr(f) => f.hash(state), + #[cfg(feature = "bitwise")] Bitwise(f) => f.hash(state), // Other expressions @@ -606,10 +610,8 @@ impl Display for FunctionExpr { StructExpr(func) => return write!(f, "{func}"), #[cfg(feature = "temporal")] TemporalExpr(func) => return write!(f, "{func}"), - Bitwise(func) => { - f.write_str("bitwise.")?; - return Display::fmt(func, f); - }, + #[cfg(feature = "bitwise")] + Bitwise(func) => return write!(f, "bitwise_{func}"), // Other expressions Boolean(func) => return write!(f, "{func}"), @@ -879,6 +881,7 @@ impl From for SpecialEq> { StructExpr(func) => func.into(), #[cfg(feature = "temporal")] TemporalExpr(func) => func.into(), + #[cfg(feature = "bitwise")] Bitwise(func) => func.into(), // Other expressions diff --git a/crates/polars-plan/src/dsl/function_expr/schema.rs b/crates/polars-plan/src/dsl/function_expr/schema.rs index 48eeea1ad073..7cc5b8c5c7ad 100644 --- a/crates/polars-plan/src/dsl/function_expr/schema.rs +++ b/crates/polars-plan/src/dsl/function_expr/schema.rs @@ -26,6 +26,7 @@ impl FunctionExpr { StructExpr(s) => s.get_field(mapper), #[cfg(feature = "temporal")] TemporalExpr(fun) => fun.get_field(mapper), + #[cfg(feature = "bitwise")] Bitwise(fun) => fun.get_field(mapper), // Other expressions diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs index 37102de5cbcf..85393de3f4ce 100644 --- a/crates/polars-plan/src/dsl/mod.rs +++ b/crates/polars-plan/src/dsl/mod.rs @@ -13,6 +13,7 @@ mod arity; #[cfg(feature = "dtype-array")] mod array; pub mod binary; +#[cfg(feature = "bitwise")] mod bitwise; #[cfg(feature = "temporal")] pub mod dt; diff --git a/crates/polars-plan/src/plans/aexpr/mod.rs b/crates/polars-plan/src/plans/aexpr/mod.rs index 14d650611431..53bf24ff838e 100644 --- a/crates/polars-plan/src/plans/aexpr/mod.rs +++ b/crates/polars-plan/src/plans/aexpr/mod.rs @@ -50,6 +50,7 @@ pub enum IRAggExpr { Count(Node, bool), Std(Node, u8), Var(Node, u8), + #[cfg(feature = "bitwise")] Bitwise(Node, BitwiseAggFunction), AggGroups(Node), } @@ -63,6 +64,7 @@ impl Hash for IRAggExpr { }, Self::Quantile { interpol, .. } => interpol.hash(state), Self::Std(_, v) | Self::Var(_, v) => v.hash(state), + #[cfg(feature = "bitwise")] Self::Bitwise(_, f) => f.hash(state), _ => {}, } @@ -93,6 +95,7 @@ impl IRAggExpr { (Quantile { interpol: l, .. }, Quantile { interpol: r, .. }) => l == r, (Std(_, l), Std(_, r)) => l == r, (Var(_, l), Var(_, r)) => l == r, + #[cfg(feature = "bitwise")] (Bitwise(_, l), Bitwise(_, r)) => l == r, _ => std::mem::discriminant(self) == std::mem::discriminant(other), } @@ -127,6 +130,7 @@ impl From for GroupByMethod { Count(_, include_nulls) => GroupByMethod::Count { include_nulls }, Std(_, ddof) => GroupByMethod::Std(ddof), Var(_, ddof) => GroupByMethod::Var(ddof), + #[cfg(feature = "bitwise")] Bitwise(_, f) => GroupByMethod::Bitwise(f.into()), AggGroups(_) => GroupByMethod::Groups, Quantile { .. } => unreachable!(), diff --git a/crates/polars-plan/src/plans/aexpr/schema.rs b/crates/polars-plan/src/plans/aexpr/schema.rs index 1b59db205161..b002ccf19504 100644 --- a/crates/polars-plan/src/plans/aexpr/schema.rs +++ b/crates/polars-plan/src/plans/aexpr/schema.rs @@ -217,6 +217,7 @@ impl AExpr { float_type(&mut field); Ok(field) }, + #[cfg(feature = "bitwise")] Bitwise(expr, _) => { *nested = nested.saturating_sub(1); let field = arena.get(*expr).to_field_impl(schema, arena, nested)?; diff --git a/crates/polars-plan/src/plans/aexpr/traverse.rs b/crates/polars-plan/src/plans/aexpr/traverse.rs index f6f337e23acb..7163e18de165 100644 --- a/crates/polars-plan/src/plans/aexpr/traverse.rs +++ b/crates/polars-plan/src/plans/aexpr/traverse.rs @@ -197,6 +197,7 @@ impl IRAggExpr { Std(input, _) => Single(*input), Var(input, _) => Single(*input), AggGroups(input) => Single(*input), + #[cfg(feature = "bitwise")] Bitwise(input, _) => Single(*input), } } @@ -217,6 +218,7 @@ impl IRAggExpr { Std(input, _) => input, Var(input, _) => input, AggGroups(input) => input, + #[cfg(feature = "bitwise")] Bitwise(input, _) => input, }; *node = input; diff --git a/crates/polars-plan/src/plans/conversion/expr_to_ir.rs b/crates/polars-plan/src/plans/conversion/expr_to_ir.rs index 74b2c01539f9..95eca45a9bf6 100644 --- a/crates/polars-plan/src/plans/conversion/expr_to_ir.rs +++ b/crates/polars-plan/src/plans/conversion/expr_to_ir.rs @@ -260,6 +260,7 @@ pub(super) fn to_aexpr_impl( AggExpr::AggGroups(expr) => { IRAggExpr::AggGroups(to_aexpr_impl_materialized_lit(owned(expr), arena, state)?) }, + #[cfg(feature = "bitwise")] AggExpr::Bitwise(expr, f) => IRAggExpr::Bitwise( to_aexpr_impl_materialized_lit(owned(expr), arena, state)?, f, diff --git a/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs b/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs index e677d5ba1b30..5d2e4c373b30 100644 --- a/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs +++ b/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs @@ -160,6 +160,7 @@ pub fn node_to_expr(node: Node, expr_arena: &Arena) -> Expr { let expr = node_to_expr(expr, expr_arena); AggExpr::Count(Arc::new(expr), include_nulls).into() }, + #[cfg(feature = "bitwise")] IRAggExpr::Bitwise(expr, f) => { let expr = node_to_expr(expr, expr_arena); AggExpr::Bitwise(Arc::new(expr), f).into() diff --git a/crates/polars-plan/src/plans/format.rs b/crates/polars-plan/src/plans/format.rs index f38b6bb627ab..c72cfad20e39 100644 --- a/crates/polars-plan/src/plans/format.rs +++ b/crates/polars-plan/src/plans/format.rs @@ -120,6 +120,7 @@ impl fmt::Debug for Expr { Var(expr, _) => write!(f, "{expr:?}.var()"), Std(expr, _) => write!(f, "{expr:?}.std()"), Quantile { expr, .. } => write!(f, "{expr:?}.quantile()"), + #[cfg(feature = "bitwise")] Bitwise(expr, t) => { let t = match t { BitwiseAggFunction::And => "and", diff --git a/crates/polars-plan/src/plans/ir/format.rs b/crates/polars-plan/src/plans/ir/format.rs index b49c589027a1..36eeee98d98d 100644 --- a/crates/polars-plan/src/plans/ir/format.rs +++ b/crates/polars-plan/src/plans/ir/format.rs @@ -587,6 +587,7 @@ impl<'a> Display for ExprIRDisplay<'a> { Var(expr, _) => write!(f, "{}.var()", self.with_root(expr)), Std(expr, _) => write!(f, "{}.std()", self.with_root(expr)), Quantile { expr, .. } => write!(f, "{}.quantile()", self.with_root(expr)), + #[cfg(feature = "bitwise")] Bitwise(expr, t) => { let t = match t { BitwiseAggFunction::And => "and", diff --git a/crates/polars-plan/src/plans/iterator.rs b/crates/polars-plan/src/plans/iterator.rs index f879fe9255f9..997e38fa9d12 100644 --- a/crates/polars-plan/src/plans/iterator.rs +++ b/crates/polars-plan/src/plans/iterator.rs @@ -56,6 +56,7 @@ macro_rules! push_expr { AggGroups(e) => $push($c, e), Std(e, _) => $push($c, e), Var(e, _) => $push($c, e), + #[cfg(feature = "bitwise")] Bitwise(e, _) => $push($c, e), } }, diff --git a/crates/polars-plan/src/plans/visitor/expr.rs b/crates/polars-plan/src/plans/visitor/expr.rs index a4089e7af060..71b287d03b85 100644 --- a/crates/polars-plan/src/plans/visitor/expr.rs +++ b/crates/polars-plan/src/plans/visitor/expr.rs @@ -72,6 +72,7 @@ impl TreeWalker for Expr { AggGroups(x) => AggGroups(am(x, f)?), Std(x, ddf) => Std(am(x, f)?, ddf), Var(x, ddf) => Var(am(x, f)?, ddf), + #[cfg(feature = "bitwise")] Bitwise(x, t) => Bitwise(am(x, f)?, t), }), Ternary { predicate, truthy, falsy } => Ternary { predicate: am(predicate, &mut f)?, truthy: am(truthy, &mut f)?, falsy: am(falsy, f)? }, diff --git a/crates/polars-python/Cargo.toml b/crates/polars-python/Cargo.toml index 9ed35648c89f..1df741a4f51f 100644 --- a/crates/polars-python/Cargo.toml +++ b/crates/polars-python/Cargo.toml @@ -13,7 +13,7 @@ polars-core = { workspace = true, features = ["python"] } polars-error = { workspace = true } polars-io = { workspace = true } polars-lazy = { workspace = true, features = ["python"] } -polars-ops = { workspace = true } +polars-ops = { workspace = true, features = ["bitwise"] } polars-parquet = { workspace = true, optional = true } polars-plan = { workspace = true } polars-time = { workspace = true } @@ -48,6 +48,7 @@ features = [ "approx_unique", "array_any_all", "arg_where", + "bitwise", "business", "concat_str", "cum_agg", diff --git a/crates/polars-python/src/expr/mod.rs b/crates/polars-python/src/expr/mod.rs index ab9bcf4eeadd..93a00018a683 100644 --- a/crates/polars-python/src/expr/mod.rs +++ b/crates/polars-python/src/expr/mod.rs @@ -2,6 +2,7 @@ mod array; #[cfg(feature = "pymethods")] mod binary; +#[cfg(feature = "pymethods")] mod bitwise; #[cfg(feature = "pymethods")] mod categorical; diff --git a/crates/polars-stream/Cargo.toml b/crates/polars-stream/Cargo.toml index b1a70997a3c3..78cdbc9115d0 100644 --- a/crates/polars-stream/Cargo.toml +++ b/crates/polars-stream/Cargo.toml @@ -37,3 +37,4 @@ version_check = { workspace = true } [features] nightly = [] +bitwise = ["polars-core/bitwise", "polars-plan/bitwise"] diff --git a/crates/polars-stream/src/physical_plan/lower_expr.rs b/crates/polars-stream/src/physical_plan/lower_expr.rs index 7ee0b16fda7e..618ec358f209 100644 --- a/crates/polars-stream/src/physical_plan/lower_expr.rs +++ b/crates/polars-stream/src/physical_plan/lower_expr.rs @@ -598,8 +598,13 @@ fn lower_exprs_with_ctx( | IRAggExpr::Count(_, _) | IRAggExpr::Std(_, _) | IRAggExpr::Var(_, _) - | IRAggExpr::AggGroups(_) - | IRAggExpr::Bitwise(_, _) => { + | IRAggExpr::AggGroups(_) => { + let out_name = unique_column_name(); + fallback_subset.push(ExprIR::new(expr, OutputName::Alias(out_name.clone()))); + transformed_exprs.push(ctx.expr_arena.add(AExpr::Column(out_name))); + }, + #[cfg(feature = "bitwise")] + IRAggExpr::Bitwise(_, _) => { let out_name = unique_column_name(); fallback_subset.push(ExprIR::new(expr, OutputName::Alias(out_name.clone()))); transformed_exprs.push(ctx.expr_arena.add(AExpr::Column(out_name))); diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml index b858dbc36678..a23d23c7c1ae 100644 --- a/crates/polars/Cargo.toml +++ b/crates/polars/Cargo.toml @@ -132,6 +132,7 @@ array_any_all = ["polars-lazy?/array_any_all", "dtype-array"] asof_join = ["polars-lazy?/asof_join", "polars-ops/asof_join"] iejoin = ["polars-lazy?/iejoin"] binary_encoding = ["polars-ops/binary_encoding", "polars-lazy?/binary_encoding", "polars-sql?/binary_encoding"] +bitwise = ["polars-core/bitwise", "polars-plan?/bitwise", "polars-ops/bitwise", "polars-lazy?/bitwise"] business = ["polars-lazy?/business", "polars-ops/business"] checked_arithmetic = ["polars-core/checked_arithmetic"] chunked_ids = ["polars-ops?/chunked_ids"] diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index be3734f8e7ca..92edbba20e33 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -7364,6 +7364,33 @@ def implode(self) -> Self: ] """ + def bitwise_count_ones(self) -> Self: + """Evaluate the number of set bits.""" + + def bitwise_count_zeros(self) -> Self: + """Evaluate the number of unset Self.""" + + def bitwise_leading_ones(self) -> Self: + """Evaluate the number most-significant set bits before seeing an unset bit.""" + + def bitwise_leading_zeros(self) -> Self: + """Evaluate the number most-significant unset bits before seeing a set bit.""" + + def bitwise_trailing_ones(self) -> Self: + """Evaluate the number least-significant set bits before seeing an unset bit.""" + + def bitwise_trailing_zeros(self) -> Self: + """Evaluate the number least-significant unset bits before seeing a set bit.""" + + def bitwise_and(self) -> Self: + """Perform an aggregation of bitwise ANDs.""" + + def bitwise_or(self) -> Self: + """Perform an aggregation of bitwise ORs.""" + + def bitwise_xor(self) -> Self: + """Perform an aggregation of bitwise XORs.""" + # Keep the `list` and `str` properties below at the end of the definition of Series, # as to not confuse mypy with the type annotation `str` and `list` diff --git a/py-polars/tests/unit/operations/test_bitwise.py b/py-polars/tests/unit/operations/test_bitwise.py index e69fe2b218bd..8c6b7955e60b 100644 --- a/py-polars/tests/unit/operations/test_bitwise.py +++ b/py-polars/tests/unit/operations/test_bitwise.py @@ -1,6 +1,9 @@ +from __future__ import annotations + import pytest import polars as pl +from polars.testing import assert_frame_equal, assert_series_equal @pytest.mark.parametrize("op", ["and_", "or_"]) @@ -17,3 +20,183 @@ def test_bitwise_single_null_value_schema(op: str) -> None: result_schema = q.collect_schema() assert result_schema.len() == 1 assert "a" in result_schema + + +def leading_zeros(v: int | None, nb: int) -> int | None: + if v is None: + return None + + b = bin(v)[2:] + blen = len(b) - len(b.lstrip("0")) + if blen == len(b): + return nb + else: + return nb - len(b) + blen + + +def leading_ones(v: int | None, nb: int) -> int | None: + if v is None: + return None + + b = bin(v)[2:] + if len(b) < nb: + return 0 + else: + return len(b) - len(b.lstrip("1")) + + +def trailing_zeros(v: int | None, nb: int) -> int | None: + if v is None: + return None + + b = bin(v)[2:] + blen = len(b) - len(b.rstrip("0")) + if blen == len(b): + return nb + else: + return blen + + +def trailing_ones(v: int | None) -> int | None: + if v is None: + return None + + b = bin(v)[2:] + return len(b) - len(b.rstrip("1")) + + +@pytest.mark.parametrize( + "value", + [ + 0x00, + 0x01, + 0xFCEF_0123, + 0xFFFF_FFFF, + 0xFFF0_FFE1_ABCD_EF01, + 0xAAAA_AAAA_AAAA_AAAA, + None, + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Int8, + pl.Int16, + pl.Int32, + pl.Int64, + pl.UInt8, + pl.UInt16, + pl.UInt32, + pl.UInt64, + pl.Boolean, + ], +) +def test_bit_counts(value: int, dtype: pl.DataType) -> None: + bitsize = 8 + if "Boolean" in str(dtype): + bitsize = 1 + if "16" in str(dtype): + bitsize = 16 + elif "32" in str(dtype): + bitsize = 32 + elif "64" in str(dtype): + bitsize = 64 + + if bitsize == 1 and value is not None: + value = value & 1 != 0 + + co = 1 if value else 0 + cz = 0 if value else 1 + elif value is not None: + value = value & ((1 << bitsize) - 1) + + if dtype.is_signed_integer() and value >> (bitsize - 1) > 0: + value = value - pow(2, bitsize - 1) + + co = value.bit_count() + cz = bitsize - co + else: + co = None + cz = None + + assert_series_equal( + pl.Series("a", [value], dtype).bitwise_count_ones(), + pl.Series("a", [co], pl.UInt32), + ) + assert_series_equal( + pl.Series("a", [value], dtype).bitwise_count_zeros(), + pl.Series("a", [cz], pl.UInt32), + ) + assert_series_equal( + pl.Series("a", [value], dtype).bitwise_leading_ones(), + pl.Series("a", [leading_ones(value, bitsize)], pl.UInt32), + ) + assert_series_equal( + pl.Series("a", [value], dtype).bitwise_leading_zeros(), + pl.Series("a", [leading_zeros(value, bitsize)], pl.UInt32), + ) + assert_series_equal( + pl.Series("a", [value], dtype).bitwise_trailing_ones(), + pl.Series("a", [trailing_ones(value)], pl.UInt32), + ) + assert_series_equal( + pl.Series("a", [value], dtype).bitwise_trailing_zeros(), + pl.Series("a", [trailing_zeros(value, bitsize)], pl.UInt32), + ) + + +@pytest.mark.parametrize( + "dtype", + [pl.Int8, pl.Int16, pl.Int32, pl.Int64, pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64], +) +def test_bit_aggregations(dtype: pl.DataType) -> None: + s = pl.Series("a", [0x74, 0x1C, 0x05], dtype) + + df = s.to_frame().select( + AND=pl.col.a.bitwise_and(), + OR=pl.col.a.bitwise_or(), + XOR=pl.col.a.bitwise_xor(), + ) + + assert_frame_equal( + df, + pl.DataFrame( + [ + pl.Series("AND", [0x04], dtype), + pl.Series("OR", [0x7D], dtype), + pl.Series("XOR", [0x6D], dtype), + ] + ), + ) + + +@pytest.mark.parametrize( + "dtype", + [pl.Int8, pl.Int16, pl.Int32, pl.Int64, pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64], +) +def test_bit_group_by(dtype: pl.DataType) -> None: + df = pl.DataFrame( + [ + pl.Series("g", [1, 1, 2, 3, 2, 4, 4], pl.Int8), + pl.Series("a", [0x74, 0x1C, 0x05, None, 0x70, 0x01, None], dtype), + ] + ) + + df = df.group_by("g").agg( + AND=pl.col.a.bitwise_and(), + OR=pl.col.a.bitwise_or(), + XOR=pl.col.a.bitwise_xor(), + ) + + assert_frame_equal( + df, + pl.DataFrame( + [ + pl.Series("g", [1, 2, 3, 4], pl.Int8), + pl.Series("AND", [0x74 & 0x1C, 0x05 & 0x70, None, None], dtype), + pl.Series("OR", [0x74 | 0x1C, 0x05 | 0x70, None, None], dtype), + pl.Series("XOR", [0x74 ^ 0x1C, 0x05 ^ 0x70, None, None], dtype), + ] + ), + check_row_order=False, + ) From 008d6aaae2a8b65ae8cf6292ea7a8c25947f7b51 Mon Sep 17 00:00:00 2001 From: coastalwhite Date: Mon, 30 Sep 2024 14:14:07 +0200 Subject: [PATCH 5/9] dprint --- crates/polars-lazy/Cargo.toml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/polars-lazy/Cargo.toml b/crates/polars-lazy/Cargo.toml index 79bd3f72b785..2dfd642cde1f 100644 --- a/crates/polars-lazy/Cargo.toml +++ b/crates/polars-lazy/Cargo.toml @@ -159,7 +159,13 @@ true_div = ["polars-plan/true_div"] extract_jsonpath = ["polars-plan/extract_jsonpath", "polars-ops/extract_jsonpath"] # operations -bitwise = ["polars-plan/bitwise", "polars-expr/bitwise", "polars-core/bitwise", "polars-stream/bitwise", "polars-ops/bitwise"] +bitwise = [ + "polars-plan/bitwise", + "polars-expr/bitwise", + "polars-core/bitwise", + "polars-stream/bitwise", + "polars-ops/bitwise", +] approx_unique = ["polars-plan/approx_unique"] is_in = ["polars-plan/is_in", "polars-ops/is_in", "polars-expr/is_in"] repeat_by = ["polars-plan/repeat_by"] From b3d75f01dbaf30289699b18fe2834f5d63c1a4e9 Mon Sep 17 00:00:00 2001 From: ritchie Date: Tue, 1 Oct 2024 15:38:14 +0200 Subject: [PATCH 6/9] skip test on 3.9 --- py-polars/tests/unit/operations/test_bitwise.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/py-polars/tests/unit/operations/test_bitwise.py b/py-polars/tests/unit/operations/test_bitwise.py index 8c6b7955e60b..859263b707a3 100644 --- a/py-polars/tests/unit/operations/test_bitwise.py +++ b/py-polars/tests/unit/operations/test_bitwise.py @@ -1,5 +1,7 @@ from __future__ import annotations +import sys + import pytest import polars as pl @@ -91,6 +93,7 @@ def trailing_ones(v: int | None) -> int | None: pl.Boolean, ], ) +@pytest.mark.skipif(sys.version_info < (3, 10), reason="bit_count introduced in 3.10") def test_bit_counts(value: int, dtype: pl.DataType) -> None: bitsize = 8 if "Boolean" in str(dtype): From 359a97bcc3bfacc0bba3585e882b0eee6e96e087 Mon Sep 17 00:00:00 2001 From: ritchie Date: Tue, 1 Oct 2024 15:48:26 +0200 Subject: [PATCH 7/9] mypi --- py-polars/tests/unit/operations/test_bitwise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/tests/unit/operations/test_bitwise.py b/py-polars/tests/unit/operations/test_bitwise.py index 859263b707a3..65c51886e40d 100644 --- a/py-polars/tests/unit/operations/test_bitwise.py +++ b/py-polars/tests/unit/operations/test_bitwise.py @@ -116,7 +116,7 @@ def test_bit_counts(value: int, dtype: pl.DataType) -> None: if dtype.is_signed_integer() and value >> (bitsize - 1) > 0: value = value - pow(2, bitsize - 1) - co = value.bit_count() + co = value.bit_count() # type: ignore[attr-defined] cz = bitsize - co else: co = None From 81d634c98714b2dca41f20d2ca0fce7e44979600 Mon Sep 17 00:00:00 2001 From: ritchie Date: Tue, 1 Oct 2024 16:32:48 +0200 Subject: [PATCH 8/9] fix: liniting --- py-polars/tests/unit/operations/test_bitwise.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/py-polars/tests/unit/operations/test_bitwise.py b/py-polars/tests/unit/operations/test_bitwise.py index 65c51886e40d..f9efb52a40f0 100644 --- a/py-polars/tests/unit/operations/test_bitwise.py +++ b/py-polars/tests/unit/operations/test_bitwise.py @@ -1,6 +1,7 @@ from __future__ import annotations import sys +import typing import pytest @@ -94,6 +95,7 @@ def trailing_ones(v: int | None) -> int | None: ], ) @pytest.mark.skipif(sys.version_info < (3, 10), reason="bit_count introduced in 3.10") +@typing.no_type_check() def test_bit_counts(value: int, dtype: pl.DataType) -> None: bitsize = 8 if "Boolean" in str(dtype): @@ -116,7 +118,7 @@ def test_bit_counts(value: int, dtype: pl.DataType) -> None: if dtype.is_signed_integer() and value >> (bitsize - 1) > 0: value = value - pow(2, bitsize - 1) - co = value.bit_count() # type: ignore[attr-defined] + co = value.bit_count() cz = bitsize - co else: co = None From 995526d3dc271207c888365d4504afcc9bdb3d9e Mon Sep 17 00:00:00 2001 From: ritchie Date: Tue, 1 Oct 2024 16:59:05 +0200 Subject: [PATCH 9/9] skip and docs --- .../docs/source/reference/expressions/computation.rst | 8 ++++++++ py-polars/docs/source/reference/series/computation.rst | 8 ++++++++ py-polars/tests/unit/operations/test_bitwise.py | 2 +- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/py-polars/docs/source/reference/expressions/computation.rst b/py-polars/docs/source/reference/expressions/computation.rst index 46dba474834f..4ad8e68a1bfd 100644 --- a/py-polars/docs/source/reference/expressions/computation.rst +++ b/py-polars/docs/source/reference/expressions/computation.rst @@ -15,6 +15,14 @@ Computation Expr.arctan Expr.arctanh Expr.arg_unique + Expr.bitwise_count_ones + Expr.bitwise_count_zeros + Expr.bitwise_leading_ones + Expr.bitwise_trailing_ones + Expr.bitwise_trailing_zeros + Expr.bitwise_and + Expr.bitwise_or + Expr.bitwise_xor Expr.cbrt Expr.cos Expr.cosh diff --git a/py-polars/docs/source/reference/series/computation.rst b/py-polars/docs/source/reference/series/computation.rst index 9e3edb3ac0f6..887fed5b0ec2 100644 --- a/py-polars/docs/source/reference/series/computation.rst +++ b/py-polars/docs/source/reference/series/computation.rst @@ -15,6 +15,14 @@ Computation Series.arctanh Series.arg_true Series.arg_unique + Series.bitwise_count_ones + Series.bitwise_count_zeros + Series.bitwise_leading_ones + Series.bitwise_trailing_ones + Series.bitwise_trailing_zeros + Series.bitwise_and + Series.bitwise_or + Series.bitwise_xor Series.cbrt Series.cos Series.cosh diff --git a/py-polars/tests/unit/operations/test_bitwise.py b/py-polars/tests/unit/operations/test_bitwise.py index f9efb52a40f0..e7a957fb534b 100644 --- a/py-polars/tests/unit/operations/test_bitwise.py +++ b/py-polars/tests/unit/operations/test_bitwise.py @@ -95,7 +95,7 @@ def trailing_ones(v: int | None) -> int | None: ], ) @pytest.mark.skipif(sys.version_info < (3, 10), reason="bit_count introduced in 3.10") -@typing.no_type_check() +@typing.no_type_check def test_bit_counts(value: int, dtype: pl.DataType) -> None: bitsize = 8 if "Boolean" in str(dtype):