Skip to content

Commit

Permalink
feat: Bitwise operations / aggregations (#18994)
Browse files Browse the repository at this point in the history
Co-authored-by: ritchie <[email protected]>
  • Loading branch information
coastalwhite and ritchie46 authored Oct 1, 2024
1 parent becead9 commit a2d00f1
Show file tree
Hide file tree
Showing 49 changed files with 1,403 additions and 9 deletions.
285 changes: 285 additions & 0 deletions crates/polars-compute/src/bitwise/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
use std::convert::identity;

use arrow::array::{BooleanArray, PrimitiveArray};
use arrow::datatypes::ArrowDataType;
use arrow::legacy::utils::CustomIterTools;
use bytemuck::Zeroable;

pub trait BitwiseKernel {
type Scalar;

fn count_ones(&self) -> PrimitiveArray<u32>;
fn count_zeros(&self) -> PrimitiveArray<u32>;

fn leading_ones(&self) -> PrimitiveArray<u32>;
fn leading_zeros(&self) -> PrimitiveArray<u32>;

fn trailing_ones(&self) -> PrimitiveArray<u32>;
fn trailing_zeros(&self) -> PrimitiveArray<u32>;

fn reduce_and(&self) -> Option<Self::Scalar>;
fn reduce_or(&self) -> Option<Self::Scalar>;
fn reduce_xor(&self) -> Option<Self::Scalar>;

fn bit_and(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar;
fn bit_or(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar;
fn bit_xor(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar;
}

macro_rules! impl_bitwise_kernel {
($(($T:ty, $to_bits:expr, $from_bits:expr)),+ $(,)?) => {
$(
impl BitwiseKernel for PrimitiveArray<$T> {
type Scalar = $T;

#[inline(never)]
fn count_ones(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self.values()
.iter()
.map(|&v| $to_bits(v).count_ones())
.collect_trusted::<Vec<_>>()
.into(),
self.validity().cloned(),
)
}

#[inline(never)]
fn count_zeros(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self
.values()
.iter()
.map(|&v| $to_bits(v).count_zeros())
.collect_trusted::<Vec<_>>()
.into(),
self.validity().cloned(),
)
}

#[inline(never)]
fn leading_ones(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self.values()
.iter()
.map(|&v| $to_bits(v).leading_ones())
.collect_trusted::<Vec<_>>()
.into(),
self.validity().cloned(),
)
}

#[inline(never)]
fn leading_zeros(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self.values()
.iter()
.map(|&v| $to_bits(v).leading_zeros())
.collect_trusted::<Vec<_>>()
.into(),
self.validity().cloned(),
)
}

#[inline(never)]
fn trailing_ones(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self.values()
.iter()
.map(|&v| $to_bits(v).trailing_ones())
.collect_trusted::<Vec<_>>()
.into(),
self.validity().cloned(),
)
}

#[inline(never)]
fn trailing_zeros(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self.values().iter()
.map(|&v| $to_bits(v).trailing_zeros())
.collect_trusted::<Vec<_>>()
.into(),
self.validity().cloned(),
)
}

#[inline(never)]
fn reduce_and(&self) -> Option<Self::Scalar> {
if self.validity().map_or(false, |v| v.unset_bits() > 0) {
return None;
}

let values = self.values();

if values.is_empty() {
return None;
}

Some($from_bits(values.iter().fold(!$to_bits(<$T>::zeroed()), |a, &b| a & $to_bits(b))))
}

#[inline(never)]
fn reduce_or(&self) -> Option<Self::Scalar> {
if self.validity().map_or(false, |v| v.unset_bits() > 0) {
return None;
}

let values = self.values();

if values.is_empty() {
return None;
}

Some($from_bits(values.iter().fold($to_bits(<$T>::zeroed()), |a, &b| a | $to_bits(b))))
}

#[inline(never)]
fn reduce_xor(&self) -> Option<Self::Scalar> {
if self.validity().map_or(false, |v| v.unset_bits() > 0) {
return None;
}

let values = self.values();

if values.is_empty() {
return None;
}

Some($from_bits(values.iter().fold($to_bits(<$T>::zeroed()), |a, &b| a ^ $to_bits(b))))
}

fn bit_and(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar {
$from_bits($to_bits(lhs) & $to_bits(rhs))
}
fn bit_or(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar {
$from_bits($to_bits(lhs) | $to_bits(rhs))
}
fn bit_xor(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar {
$from_bits($to_bits(lhs) ^ $to_bits(rhs))
}
}
)+
};
}

impl_bitwise_kernel! {
(i8, identity, identity),
(i16, identity, identity),
(i32, identity, identity),
(i64, identity, identity),
(u8, identity, identity),
(u16, identity, identity),
(u32, identity, identity),
(u64, identity, identity),
(f32, f32::to_bits, f32::from_bits),
(f64, f64::to_bits, f64::from_bits),
}

impl BitwiseKernel for BooleanArray {
type Scalar = bool;

#[inline(never)]
fn count_ones(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self.values()
.iter()
.map(u32::from)
.collect_trusted::<Vec<_>>()
.into(),
self.validity().cloned(),
)
}

#[inline(never)]
fn count_zeros(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self.values()
.iter()
.map(|v| u32::from(!v))
.collect_trusted::<Vec<_>>()
.into(),
self.validity().cloned(),
)
}

#[inline(always)]
fn leading_ones(&self) -> PrimitiveArray<u32> {
self.count_ones()
}

#[inline(always)]
fn leading_zeros(&self) -> PrimitiveArray<u32> {
self.count_zeros()
}

#[inline(always)]
fn trailing_ones(&self) -> PrimitiveArray<u32> {
self.count_ones()
}

#[inline(always)]
fn trailing_zeros(&self) -> PrimitiveArray<u32> {
self.count_zeros()
}

fn reduce_and(&self) -> Option<Self::Scalar> {
if self.validity().map_or(false, |v| v.unset_bits() > 0) {
return None;
}

let values = self.values();

if values.is_empty() {
return None;
}

Some(values.unset_bits() == 0)
}

fn reduce_or(&self) -> Option<Self::Scalar> {
if self.validity().map_or(false, |v| v.unset_bits() > 0) {
return None;
}

let values = self.values();

if values.is_empty() {
return None;
}

Some(values.set_bits() > 0)
}

fn reduce_xor(&self) -> Option<Self::Scalar> {
if self.validity().map_or(false, |v| v.unset_bits() > 0) {
return None;
}

let values = self.values();

if values.is_empty() {
return None;
}

Some(values.set_bits() % 2 == 1)
}

fn bit_and(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar {
lhs & rhs
}
fn bit_or(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar {
lhs | rhs
}
fn bit_xor(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar {
lhs ^ rhs
}
}
1 change: 1 addition & 0 deletions crates/polars-compute/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use arrow::types::NativeType;

pub mod arithmetic;
pub mod arity;
pub mod bitwise;
pub mod comparisons;
pub mod filter;
pub mod float_sum;
Expand Down
1 change: 1 addition & 0 deletions crates/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ fmt_no_tty = ["comfy-table"]
rows = []

# operations
bitwise = ["algorithm_group_by"]
zip_with = []
round_series = []
checked_arithmetic = []
Expand Down
80 changes: 80 additions & 0 deletions crates/polars-core/src/chunked_array/ops/bitwise_reduce.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
use arrow::array::{Array, PrimitiveArray};
use arrow::types::NativeType;
use polars_compute::bitwise::BitwiseKernel;

use super::{BooleanType, ChunkBitwiseReduce, ChunkedArray, PolarsNumericType};

impl<T> ChunkBitwiseReduce for ChunkedArray<T>
where
T: PolarsNumericType,
T::Native: NativeType,
PrimitiveArray<T::Native>: BitwiseKernel<Scalar = T::Native>,
{
type Physical = T::Native;

fn and_reduce(&self) -> Option<Self::Physical> {
if self.null_count() > 0 {
return None;
}

self.downcast_iter()
.filter(|arr| !arr.is_empty())
.map(|arr| BitwiseKernel::reduce_and(arr).unwrap())
.reduce(<PrimitiveArray<T::Native> as BitwiseKernel>::bit_and)
}
fn or_reduce(&self) -> Option<Self::Physical> {
if self.null_count() > 0 {
return None;
}

self.downcast_iter()
.filter(|arr| !arr.is_empty())
.map(|arr| BitwiseKernel::reduce_or(arr).unwrap())
.reduce(<PrimitiveArray<T::Native> as BitwiseKernel>::bit_or)
}
fn xor_reduce(&self) -> Option<Self::Physical> {
if self.null_count() > 0 {
return None;
}

self.downcast_iter()
.filter(|arr| !arr.is_empty())
.map(|arr| BitwiseKernel::reduce_xor(arr).unwrap())
.reduce(<PrimitiveArray<T::Native> as BitwiseKernel>::bit_xor)
}
}

impl ChunkBitwiseReduce for ChunkedArray<BooleanType> {
type Physical = bool;

fn and_reduce(&self) -> Option<Self::Physical> {
if self.null_count() > 0 {
return None;
}

self.downcast_iter()
.filter(|arr| !arr.is_empty())
.map(|arr| BitwiseKernel::reduce_and(arr).unwrap())
.reduce(|a, b| a & b)
}
fn or_reduce(&self) -> Option<Self::Physical> {
if self.null_count() > 0 {
return None;
}

self.downcast_iter()
.filter(|arr| !arr.is_empty())
.map(|arr| BitwiseKernel::reduce_or(arr).unwrap())
.reduce(|a, b| a | b)
}
fn xor_reduce(&self) -> Option<Self::Physical> {
if self.null_count() > 0 {
return None;
}

self.downcast_iter()
.filter(|arr| !arr.is_empty())
.map(|arr| BitwiseKernel::reduce_xor(arr).unwrap())
.reduce(|a, b| a ^ b)
}
}
Loading

0 comments on commit a2d00f1

Please sign in to comment.