Skip to content

Commit

Permalink
chore: Remove apply_generic, use unary_elementwise (pola-rs#17902)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored Jul 30, 2024
1 parent aa1950c commit fae85ff
Show file tree
Hide file tree
Showing 20 changed files with 133 additions and 156 deletions.
3 changes: 2 additions & 1 deletion crates/polars-core/src/chunked_array/arithmetic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use arrow::compute::utils::combine_validities_and;
use num_traits::{Num, NumCast, ToPrimitive};
pub use numeric::ArithmeticChunked;

use crate::prelude::arity::unary_elementwise_values;
use crate::prelude::*;

#[inline]
Expand Down Expand Up @@ -135,7 +136,7 @@ impl Add for &BooleanChunked {
if rhs.len() == 1 {
let rhs = rhs.get(0);
return match rhs {
Some(rhs) => self.apply_values_generic(|v| v as IdxSize + rhs as IdxSize),
Some(rhs) => unary_elementwise_values(self, |v| v as IdxSize + rhs as IdxSize),
None => IdxCa::full_null(self.name(), self.len()),
};
}
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-core/src/chunked_array/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use arrow::legacy::kernels::set::set_at_nulls;
use num_traits::Float;
use polars_utils::total_ord::{canonical_f32, canonical_f64};

use crate::prelude::arity::unary_elementwise_values;
use crate::prelude::*;

impl<T> ChunkedArray<T>
Expand Down Expand Up @@ -57,6 +58,6 @@ where
T::Native: Float + Canonical,
{
pub fn to_canonical(&self) -> Self {
self.apply_values_generic(|v| v.canonical())
unary_elementwise_values(self, |v| v.canonical())
}
}
4 changes: 2 additions & 2 deletions crates/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ pub type ChunkLenIter<'a> = std::iter::Map<std::slice::Iter<'a, ArrayRef>, fn(&A
///
/// ```rust
/// # use polars_core::prelude::*;
/// fn apply_cosine_and_cast(ca: &Float32Chunked) -> Float64Chunked {
/// ca.apply_values_generic(|v| v.cos() as f64)
/// fn apply_cosine_and_cast(ca: &Float32Chunked) -> Float32Chunked {
/// ca.apply_values(|v| v.cos())
/// }
/// ```
///
Expand Down
4 changes: 3 additions & 1 deletion crates/polars-core/src/chunked_array/ops/aggregate/var.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use arity::unary_elementwise_values;

use super::*;

pub trait VarAggSeries {
Expand All @@ -19,7 +21,7 @@ where
}

let mean = self.mean()?;
let squared: Float64Chunked = ChunkedArray::apply_values_generic(self, |value| {
let squared: Float64Chunked = unary_elementwise_values(self, |value| {
let tmp = value.to_f64().unwrap() - mean;
tmp * tmp
});
Expand Down
61 changes: 6 additions & 55 deletions crates/polars-core/src/chunked_array/ops/apply.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Implementations of the ChunkApply Trait.
use std::borrow::Cow;

use crate::chunked_array::arity::{unary_elementwise, unary_elementwise_values};
use crate::chunked_array::cast::CastOptions;
use crate::prelude::*;
use crate::series::IsSorted;
Expand All @@ -9,23 +10,6 @@ impl<T> ChunkedArray<T>
where
T: PolarsDataType,
{
// Applies a function to all elements, regardless of whether they
// are null or not, after which the null mask is copied from the
// original array.
pub fn apply_values_generic<'a, U, K, F>(&'a self, mut op: F) -> ChunkedArray<U>
where
U: PolarsDataType,
F: FnMut(T::Physical<'a>) -> K,
U::Array: ArrayFromIter<K>,
{
let iter = self.downcast_iter().map(|arr| {
let out: U::Array = arr.values_iter().map(&mut op).collect_arr();
out.with_validity_typed(arr.validity().cloned())
});

ChunkedArray::from_chunk_iter(self.name(), iter)
}

/// Applies a function only to the non-null elements, propagating nulls.
pub fn apply_nonnull_values_generic<'a, U, K, F>(
&'a self,
Expand Down Expand Up @@ -83,39 +67,6 @@ where
ChunkedArray::try_from_chunk_iter(self.name(), iter)
}

pub fn apply_generic<'a, U, K, F>(&'a self, mut op: F) -> ChunkedArray<U>
where
U: PolarsDataType,
F: FnMut(Option<T::Physical<'a>>) -> Option<K>,
U::Array: ArrayFromIter<Option<K>>,
{
if self.null_count() == 0 {
let iter = self
.downcast_iter()
.map(|arr| arr.values_iter().map(|x| op(Some(x))).collect_arr());
ChunkedArray::from_chunk_iter(self.name(), iter)
} else {
let iter = self
.downcast_iter()
.map(|arr| arr.iter().map(&mut op).collect_arr());
ChunkedArray::from_chunk_iter(self.name(), iter)
}
}

pub fn try_apply_generic<'a, U, K, F, E>(&'a self, op: F) -> Result<ChunkedArray<U>, E>
where
U: PolarsDataType,
F: FnMut(Option<T::Physical<'a>>) -> Result<Option<K>, E> + Copy,
U::Array: ArrayFromIter<Option<K>>,
{
let iter = self.downcast_iter().map(|arr| {
let array: U::Array = arr.iter().map(op).try_collect_arr()?;
Ok(array.with_validity_typed(arr.validity().cloned()))
});

ChunkedArray::try_from_chunk_iter(self.name(), iter)
}

pub fn apply_into_string_amortized<'a, F>(&'a self, mut f: F) -> StringChunked
where
F: FnMut(T::Physical<'a>, &mut String),
Expand Down Expand Up @@ -329,7 +280,7 @@ impl<'a> ChunkApply<'a, bool> for BooleanChunked {
where
F: Fn(Option<bool>) -> Option<bool> + Copy,
{
self.apply_generic(f)
unary_elementwise(self, f)
}

fn apply_to_slice<F, T>(&'a self, f: F, slice: &mut [T])
Expand Down Expand Up @@ -386,14 +337,14 @@ impl<'a> ChunkApply<'a, &'a str> for StringChunked {
where
F: Fn(&'a str) -> Cow<'a, str> + Copy,
{
ChunkedArray::apply_values_generic(self, f)
unary_elementwise_values(self, f)
}

fn apply<F>(&'a self, f: F) -> Self
where
F: Fn(Option<&'a str>) -> Option<Cow<'a, str>> + Copy,
{
self.apply_generic(f)
unary_elementwise(self, f)
}

fn apply_to_slice<F, T>(&'a self, f: F, slice: &mut [T])
Expand Down Expand Up @@ -422,14 +373,14 @@ impl<'a> ChunkApply<'a, &'a [u8]> for BinaryChunked {
where
F: Fn(&'a [u8]) -> Cow<'a, [u8]> + Copy,
{
self.apply_values_generic(f)
unary_elementwise_values(self, f)
}

fn apply<F>(&'a self, f: F) -> Self
where
F: Fn(Option<&'a [u8]>) -> Option<Cow<'a, [u8]>> + Copy,
{
self.apply_generic(f)
unary_elementwise(self, f)
}

fn apply_to_slice<F, T>(&'a self, f: F, slice: &mut [T])
Expand Down
15 changes: 11 additions & 4 deletions crates/polars-core/src/chunked_array/ops/arity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,17 @@ where
F: UnaryFnMut<Option<T::Physical<'a>>>,
V::Array: ArrayFromIter<<F as UnaryFnMut<Option<T::Physical<'a>>>>::Ret>,
{
let iter = ca
.downcast_iter()
.map(|arr| arr.iter().map(&mut op).collect_arr());
ChunkedArray::from_chunk_iter(ca.name(), iter)
if ca.has_nulls() {
let iter = ca
.downcast_iter()
.map(|arr| arr.iter().map(&mut op).collect_arr());
ChunkedArray::from_chunk_iter(ca.name(), iter)
} else {
let iter = ca
.downcast_iter()
.map(|arr| arr.values_iter().map(|x| op(Some(x))).collect_arr());
ChunkedArray::from_chunk_iter(ca.name(), iter)
}
}

#[inline]
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-ops/src/chunked_array/binary/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use base64::engine::general_purpose;
#[cfg(feature = "binary_encoding")]
use base64::Engine as _;
use memchr::memmem::find;
use polars_core::prelude::arity::broadcast_binary_elementwise_values;
use polars_core::prelude::arity::{broadcast_binary_elementwise_values, unary_elementwise_values};

use super::*;

Expand All @@ -15,7 +15,7 @@ pub trait BinaryNameSpaceImpl: AsBinary {
fn contains(&self, lit: &[u8]) -> BooleanChunked {
let ca = self.as_binary();
let f = |s: &[u8]| find(s, lit).is_some();
ca.apply_values_generic(f)
unary_elementwise_values(ca, f)
}

fn contains_chunked(&self, lit: &BinaryChunked) -> BooleanChunked {
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-ops/src/chunked_array/strings/concat.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use arrow::array::{Utf8Array, ValueSize};
use arrow::compute::cast::utf8_to_utf8view;
use polars_core::prelude::arity::unary_elementwise;
use polars_core::prelude::*;

// Vertically concatenate all strings in a StringChunked.
Expand Down Expand Up @@ -67,7 +68,7 @@ pub fn hor_str_concat(
return if !ignore_nulls || ca.null_count() == 0 {
Ok(ca.clone())
} else {
Ok(ca.apply_generic(|val| Some(val.unwrap_or(""))))
Ok(unary_elementwise(ca, |val| Some(val.unwrap_or(""))))
};
}

Expand Down
9 changes: 7 additions & 2 deletions crates/polars-ops/src/chunked_array/strings/find_many.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
use arrow::array::Utf8ViewArray;
use polars_core::prelude::arity::unary_elementwise;
use polars_core::prelude::*;
use polars_core::utils::align_chunks_binary;

Expand Down Expand Up @@ -27,7 +28,9 @@ pub fn contains_any(
) -> PolarsResult<BooleanChunked> {
let ac = build_ac(patterns, ascii_case_insensitive)?;

Ok(ca.apply_generic(|opt_val| opt_val.map(|val| ac.find(val).is_some())))
Ok(unary_elementwise(ca, |opt_val| {
opt_val.map(|val| ac.find(val).is_some())
}))
}

pub fn replace_all(
Expand All @@ -52,7 +55,9 @@ pub fn replace_all(

let ac = build_ac(patterns, ascii_case_insensitive)?;

Ok(ca.apply_generic(|opt_val| opt_val.map(|val| ac.replace_all(val, replace_with.as_slice()))))
Ok(unary_elementwise(ca, |opt_val| {
opt_val.map(|val| ac.replace_all(val, replace_with.as_slice()))
}))
}

fn push(val: &str, builder: &mut ListStringChunkedBuilder, ac: &AhoCorasick, overlapping: bool) {
Expand Down
20 changes: 10 additions & 10 deletions crates/polars-ops/src/chunked_array/strings/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ pub trait StringNameSpaceImpl: AsString {
let res_reg = Regex::new(pat);
let opt_reg = if strict { Some(res_reg?) } else { res_reg.ok() };
let out: BooleanChunked = if let Some(reg) = opt_reg {
ca.apply_values_generic(|s| reg.is_match(s))
unary_elementwise_values(ca, |s| reg.is_match(s))
} else {
BooleanChunked::full_null(ca.name(), ca.len())
};
Expand All @@ -289,11 +289,9 @@ pub trait StringNameSpaceImpl: AsString {
fn find(&self, pat: &str, strict: bool) -> PolarsResult<UInt32Chunked> {
let ca = self.as_string();
match Regex::new(pat) {
Ok(rx) => {
Ok(ca.apply_generic(|opt_s| {
opt_s.and_then(|s| rx.find(s)).map(|m| m.start() as u32)
}))
},
Ok(rx) => Ok(unary_elementwise(ca, |opt_s| {
opt_s.and_then(|s| rx.find(s)).map(|m| m.start() as u32)
})),
Err(_) if !strict => Ok(UInt32Chunked::full_null(ca.name(), ca.len())),
Err(e) => Err(PolarsError::ComputeError(
format!("Invalid regular expression: {}", e).into(),
Expand Down Expand Up @@ -419,7 +417,7 @@ pub trait StringNameSpaceImpl: AsString {
fn strip_chars(&self, pat: &Series) -> PolarsResult<StringChunked> {
let ca = self.as_string();
if pat.dtype() == &DataType::Null {
Ok(ca.apply_generic(|opt_s| opt_s.map(|s| s.trim())))
Ok(unary_elementwise(ca, |opt_s| opt_s.map(|s| s.trim())))
} else {
Ok(strip_chars(ca, pat.str()?))
}
Expand All @@ -428,7 +426,7 @@ pub trait StringNameSpaceImpl: AsString {
fn strip_chars_start(&self, pat: &Series) -> PolarsResult<StringChunked> {
let ca = self.as_string();
if pat.dtype() == &DataType::Null {
return Ok(ca.apply_generic(|opt_s| opt_s.map(|s| s.trim_start())));
return Ok(unary_elementwise(ca, |opt_s| opt_s.map(|s| s.trim_start())));
} else {
Ok(strip_chars_start(ca, pat.str()?))
}
Expand All @@ -437,7 +435,7 @@ pub trait StringNameSpaceImpl: AsString {
fn strip_chars_end(&self, pat: &Series) -> PolarsResult<StringChunked> {
let ca = self.as_string();
if pat.dtype() == &DataType::Null {
return Ok(ca.apply_generic(|opt_s| opt_s.map(|s| s.trim_end())));
return Ok(unary_elementwise(ca, |opt_s| opt_s.map(|s| s.trim_end())));
} else {
Ok(strip_chars_end(ca, pat.str()?))
}
Expand Down Expand Up @@ -524,7 +522,9 @@ pub trait StringNameSpaceImpl: AsString {
Regex::new(pat)?
};

Ok(ca.apply_generic(|opt_s| opt_s.map(|s| reg.find_iter(s).count() as u32)))
Ok(unary_elementwise(ca, |opt_s| {
opt_s.map(|s| reg.find_iter(s).count() as u32)
}))
}

/// Count all successive non-overlapping regex matches.
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-ops/src/chunked_array/strings/reverse.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use polars_core::prelude::arity::unary_elementwise;
use polars_core::prelude::StringChunked;
use unicode_reverse::reverse_grapheme_clusters_in_place;

Expand All @@ -10,5 +11,5 @@ fn to_reverse_helper(s: Option<&str>) -> Option<String> {
}

pub fn reverse(ca: &StringChunked) -> StringChunked {
ca.apply_generic(to_reverse_helper)
unary_elementwise(ca, to_reverse_helper)
}
Loading

0 comments on commit fae85ff

Please sign in to comment.