Skip to content

Commit

Permalink
chore(rust): Move mode operation from core to ops crate (#11543)
Browse files Browse the repository at this point in the history
  • Loading branch information
c-peters authored Oct 6, 2023
1 parent 3b01e2a commit aafdb55
Show file tree
Hide file tree
Showing 21 changed files with 143 additions and 168 deletions.
2 changes: 0 additions & 2 deletions crates/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ dot_product = []
row_hash = []
reinterpret = []
take_opt_iter = []
mode = []
# allow group_by operation on list type
group_by_list = []
# cumsum, cummin, etc.
Expand Down Expand Up @@ -150,7 +149,6 @@ docs-selection = [
"asof_join",
"dot_product",
"row_hash",
"mode",
"cum_agg",
"rolling_window",
"diff",
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/chunked_array/ops/bit_repr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ impl UInt32Chunked {
/// Used to save compilation paths. Use carefully. Although this is safe,
/// if misused it can lead to incorrect results.
impl Float32Chunked {
pub(crate) fn apply_as_ints<F>(&self, f: F) -> Series
pub fn apply_as_ints<F>(&self, f: F) -> Series
where
F: Fn(&Series) -> Series,
{
Expand All @@ -257,7 +257,7 @@ impl Float32Chunked {
}
}
impl Float64Chunked {
pub(crate) fn apply_as_ints<F>(&self, f: F) -> Series
pub fn apply_as_ints<F>(&self, f: F) -> Series
where
F: Fn(&Series) -> Series,
{
Expand Down
6 changes: 0 additions & 6 deletions crates/polars-core/src/chunked_array/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -381,12 +381,6 @@ pub trait ChunkUnique<T: PolarsDataType> {
fn n_unique(&self) -> PolarsResult<usize> {
self.arg_unique().map(|v| v.len())
}

/// The most occurring value(s). Can return multiple Values
#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<ChunkedArray<T>> {
polars_bail!(opq = mode, T::get_dtype());
}
}

#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
Expand Down
96 changes: 0 additions & 96 deletions crates/polars-core/src/chunked_array/ops/unique/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ use arrow::bitmap::MutableBitmap;
use crate::datatypes::ObjectType;
use crate::datatypes::PlHashSet;
use crate::frame::group_by::GroupsProxy;
#[cfg(feature = "mode")]
use crate::frame::group_by::IntoGroupsProxy;
use crate::hashing::_HASHMAP_INIT_SIZE;
use crate::prelude::*;
use crate::series::IsSorted;
Expand Down Expand Up @@ -74,54 +72,6 @@ where
unique
}

#[cfg(feature = "mode")]
fn mode_indices(groups: GroupsProxy) -> Vec<IdxSize> {
match groups {
GroupsProxy::Idx(groups) => {
let mut groups = groups.into_iter().collect_trusted::<Vec<_>>();
groups.sort_unstable_by_key(|k| k.1.len());
let last = &groups.last().unwrap();
let max_occur = last.1.len();
groups
.iter()
.rev()
.take_while(|v| v.1.len() == max_occur)
.map(|v| v.0)
.collect()
},
GroupsProxy::Slice { groups, .. } => {
let last = groups.last().unwrap();
let max_occur = last[1];

groups
.iter()
.rev()
.take_while(|v| {
let len = v[1];
len == max_occur
})
.map(|v| v[0])
.collect()
},
}
}

#[cfg(feature = "mode")]
fn mode<T: PolarsDataType>(ca: &ChunkedArray<T>) -> ChunkedArray<T>
where
ChunkedArray<T>: IntoGroupsProxy + ChunkTake<[IdxSize]>,
{
if ca.is_empty() {
return ca.clone();
}
let groups = ca.group_tuples(true, false).unwrap();
let idx = mode_indices(groups);

// Safety:
// group indices are in bounds
unsafe { ca.take_unchecked(idx.as_slice()) }
}

macro_rules! arg_unique_ca {
($ca:expr) => {{
match $ca.has_validity() {
Expand Down Expand Up @@ -219,11 +169,6 @@ where
},
}
}

#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<Self> {
Ok(mode(self))
}
}

impl ChunkUnique<Utf8Type> for Utf8Chunked {
Expand All @@ -239,12 +184,6 @@ impl ChunkUnique<Utf8Type> for Utf8Chunked {
fn n_unique(&self) -> PolarsResult<usize> {
self.as_binary().n_unique()
}

#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<Self> {
let out = self.as_binary().mode()?;
Ok(unsafe { out.to_utf8() })
}
}

impl ChunkUnique<BinaryType> for BinaryChunked {
Expand Down Expand Up @@ -293,11 +232,6 @@ impl ChunkUnique<BinaryType> for BinaryChunked {
Ok(set.len())
}
}

#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<Self> {
Ok(mode(self))
}
}

impl ChunkUnique<BooleanType> for BooleanChunked {
Expand Down Expand Up @@ -330,12 +264,6 @@ impl ChunkUnique<Float32Type> for Float32Chunked {
fn arg_unique(&self) -> PolarsResult<IdxCa> {
self.bit_repr_small().arg_unique()
}
#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<ChunkedArray<Float32Type>> {
let s = self.apply_as_ints(|v| v.mode().unwrap());
let ca = s.f32().unwrap().clone();
Ok(ca)
}
}

impl ChunkUnique<Float64Type> for Float64Chunked {
Expand All @@ -348,12 +276,6 @@ impl ChunkUnique<Float64Type> for Float64Chunked {
fn arg_unique(&self) -> PolarsResult<IdxCa> {
self.bit_repr_large().arg_unique()
}
#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<ChunkedArray<Float64Type>> {
let s = self.apply_as_ints(|v| v.mode().unwrap());
let ca = s.f64().unwrap().clone();
Ok(ca)
}
}

#[cfg(test)]
Expand Down Expand Up @@ -392,22 +314,4 @@ mod test {
vec![Some(0), Some(1), Some(4)]
);
}

#[test]
#[cfg(feature = "mode")]
fn mode() {
let ca = Int32Chunked::from_slice("a", &[0, 1, 2, 3, 4, 4, 5, 6, 5, 0]);
let mut result = Vec::from(&ca.mode().unwrap());
result.sort_by_key(|a| a.unwrap());
assert_eq!(&result, &[Some(0), Some(4), Some(5)]);

let ca2 = Int32Chunked::from_slice("b", &[1, 1]);
let mut result2 = Vec::from(&ca2.mode().unwrap());
result2.sort_by_key(|a| a.unwrap());
assert_eq!(&result2, &[Some(1)]);

let ca3 = Int32Chunked::from_slice("c", &[]);
let result3 = Vec::from(&ca3.mode().unwrap());
assert_eq!(result3, &[]);
}
}
5 changes: 0 additions & 5 deletions crates/polars-core/src/series/implementations/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,4 @@ impl SeriesTrait for SeriesWrap<BinaryChunked> {
fn clone_inner(&self) -> Arc<dyn SeriesTrait> {
Arc::new(SeriesWrap(Clone::clone(&self.0)))
}

#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<Series> {
Ok(self.0.mode()?.into_series())
}
}
5 changes: 0 additions & 5 deletions crates/polars-core/src/series/implementations/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,9 +314,4 @@ impl SeriesTrait for SeriesWrap<BooleanChunked> {
fn clone_inner(&self) -> Arc<dyn SeriesTrait> {
Arc::new(SeriesWrap(Clone::clone(&self.0)))
}

#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<Series> {
Ok(self.0.mode()?.into_series())
}
}
6 changes: 0 additions & 6 deletions crates/polars-core/src/series/implementations/categorical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -317,12 +317,6 @@ impl SeriesTrait for SeriesWrap<CategoricalChunked> {
fn clone_inner(&self) -> Arc<dyn SeriesTrait> {
Arc::new(SeriesWrap(Clone::clone(&self.0)))
}

#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<Series> {
let cats = self.0.logical().mode()?;
Ok(self.finish_with_state(false, cats).into_series())
}
}

impl private::PrivateSeriesNumeric for SeriesWrap<CategoricalChunked> {
Expand Down
5 changes: 0 additions & 5 deletions crates/polars-core/src/series/implementations/dates_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,11 +401,6 @@ macro_rules! impl_dyn_series {
fn clone_inner(&self) -> Arc<dyn SeriesTrait> {
Arc::new(SeriesWrap(Clone::clone(&self.0)))
}

#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<Series> {
self.0.mode().map(|ca| ca.$into_logical().into_series())
}
}
};
}
Expand Down
8 changes: 0 additions & 8 deletions crates/polars-core/src/series/implementations/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,12 +409,4 @@ impl SeriesTrait for SeriesWrap<DatetimeChunked> {
fn clone_inner(&self) -> Arc<dyn SeriesTrait> {
Arc::new(SeriesWrap(Clone::clone(&self.0)))
}

#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<Series> {
self.0.mode().map(|ca| {
ca.into_datetime(self.0.time_unit(), self.0.time_zone().clone())
.into_series()
})
}
}
7 changes: 0 additions & 7 deletions crates/polars-core/src/series/implementations/duration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -446,11 +446,4 @@ impl SeriesTrait for SeriesWrap<DurationChunked> {
fn clone_inner(&self) -> Arc<dyn SeriesTrait> {
Arc::new(SeriesWrap(Clone::clone(&self.0)))
}

#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<Series> {
self.0
.mode()
.map(|ca| ca.into_duration(self.0.time_unit()).into_series())
}
}
5 changes: 0 additions & 5 deletions crates/polars-core/src/series/implementations/floats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -339,11 +339,6 @@ macro_rules! impl_dyn_series {
fn checked_div(&self, rhs: &Series) -> PolarsResult<Series> {
self.0.checked_div(rhs)
}

#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<Series> {
Ok(self.0.mode()?.into_series())
}
}
};
}
Expand Down
4 changes: 0 additions & 4 deletions crates/polars-core/src/series/implementations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,10 +443,6 @@ macro_rules! impl_dyn_series {
fn as_any(&self) -> &dyn Any {
&self.0
}
#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<Series> {
Ok(self.0.mode()?.into_series())
}

fn tile(&self, n: usize) -> Series {
self.0.tile(n).into_series()
Expand Down
6 changes: 3 additions & 3 deletions crates/polars-core/src/series/implementations/utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,8 @@ impl SeriesTrait for SeriesWrap<Utf8Chunked> {
Arc::new(SeriesWrap(Clone::clone(&self.0)))
}

#[cfg(feature = "mode")]
fn mode(&self) -> PolarsResult<Series> {
Ok(self.0.mode()?.into_series())
#[cfg(feature = "concat_str")]
fn str_concat(&self, delimiter: &str) -> Utf8Chunked {
self.0.str_concat(delimiter)
}
}
6 changes: 0 additions & 6 deletions crates/polars-core/src/series/series_trait.rs
Original file line number Diff line number Diff line change
Expand Up @@ -483,12 +483,6 @@ pub trait SeriesTrait:
polars_bail!(opq = checked_div, self._dtype());
}

#[cfg(feature = "mode")]
/// Compute the most occurring element in the array.
fn mode(&self) -> PolarsResult<Series> {
polars_bail!(opq = mode, self._dtype());
}

#[cfg(feature = "rolling_window")]
/// Apply a custom function over a rolling/ moving window of the array.
/// This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
Expand Down
1 change: 1 addition & 0 deletions crates/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ hash = []
group_by_list = ["polars-core/group_by_list"]
rolling_window = ["polars-core/rolling_window"]
moment = ["polars-core/moment"]
mode = []
search_sorted = []
merge_sorted = []
top_k = []
Expand Down
3 changes: 3 additions & 0 deletions crates/polars-ops/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ mod sum;
#[cfg(feature = "top_k")]
mod top_k;

#[cfg(feature = "mode")]
pub mod mode;

pub mod gather_skip_nulls;
#[cfg(feature = "repeat_by")]
mod repeat_by;
Expand Down
Loading

0 comments on commit aafdb55

Please sign in to comment.