Skip to content

Commit

Permalink
refactor!: remove <Series>$compare() (#1272)
Browse files Browse the repository at this point in the history
  • Loading branch information
eitsupi authored Nov 2, 2024
1 parent 343e37f commit e4ea2c5
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 215 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Polars R Package (development version)

### Breaking changes

- `<Series>$compare()` is removed. (#1272)

### Bug fixes

- Maintain level order when converting Enums to factors (#1252, @andyquinterom).
Expand Down
2 changes: 0 additions & 2 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -1358,8 +1358,6 @@ RPolarsSeries$get_fmt <- function(index, str_length) .Call(wrap__RPolarsSeries__

RPolarsSeries$to_fmt_char <- function(str_length) .Call(wrap__RPolarsSeries__to_fmt_char, self, str_length)

RPolarsSeries$compare <- function(other, op) .Call(wrap__RPolarsSeries__compare, self, other, op)

RPolarsSeries$rep <- function(n, rechunk) .Call(wrap__RPolarsSeries__rep, self, n, rechunk)

RPolarsSeries$shape <- function() .Call(wrap__RPolarsSeries__shape, self)
Expand Down
66 changes: 22 additions & 44 deletions R/series__series.R
Original file line number Diff line number Diff line change
Expand Up @@ -475,57 +475,35 @@ Series_pow = function(exponent) {
self$to_frame()$select(pl$col(self$name)$pow(as_polars_series(exponent)))$to_series(0)
}


#' Compare Series
#'
#' Check the (in)equality of two Series.
#'
#' @param other A Series or something a Series can be created from
#' @param op The chosen operator, must be one of `"equal"`, `"not_equal"`,
#' `"lt"`, `"gt"`, `"lt_eq"` or `"gt_eq"`
#' @return [Series][Series_class]
#' @examples
#' # We can either use `compare()`...
#' as_polars_series(1:5)$compare(as_polars_series(c(1:3, NA_integer_, 10L)), op = "equal")
#'
#' # ... or the more classic way
#' as_polars_series(1:5) == as_polars_series(c(1:3, NA_integer_, 10L))
Series_compare = function(other, op) {
other_s = as_polars_series(other)
s_len = self$len()
o_len = other_s$len()
if (
s_len != o_len &&
o_len != 1 &&
s_len != 1
) {
stop("in compare Series: not same length or either of length 1.")
}
.pr$Series$compare(self, as_polars_series(other), op) |>
unwrap(paste0("in $compare() with operator `", op, "`:"))
#' @export
`==.RPolarsSeries` = function(s1, s2) {
pl$select(pl$lit(s1)$eq(pl$lit(as_polars_series(s2))$cast(s1$dtype)))$to_series()
}


#' @export
#' @rdname Series_compare
#' @param s1 lhs Series
#' @param s2 rhs Series or any into Series
"==.RPolarsSeries" = function(s1, s2) as_polars_series(s1)$compare(s2, "equal")
#' @export
#' @rdname Series_compare
"!=.RPolarsSeries" = function(s1, s2) as_polars_series(s1)$compare(s2, "not_equal")
`!=.RPolarsSeries` = function(s1, s2) {
pl$select(pl$lit(self)$neq(pl$lit(as_polars_series(s2))$cast(s1$dtype)))$to_series()
}

#' @export
#' @rdname Series_compare
"<.RPolarsSeries" = function(s1, s2) as_polars_series(s1)$compare(s2, "lt")
`<.RPolarsSeries` = function(s1, s2) {
pl$select(pl$lit(s1)$lt(pl$lit(as_polars_series(s2))$cast(s1$dtype)))$to_series()
}

#' @export
#' @rdname Series_compare
">.RPolarsSeries" = function(s1, s2) as_polars_series(s1)$compare(s2, "gt")
`>.RPolarsSeries` = function(s1, s2) {
pl$select(pl$lit(s1)$gt(pl$lit(as_polars_series(s2))$cast(s1$dtype)))$to_series()
}

#' @export
#' @rdname Series_compare
"<=.RPolarsSeries" = function(s1, s2) as_polars_series(s1)$compare(s2, "lt_eq")
`<=.RPolarsSeries` = function(s1, s2) {
pl$select(pl$lit(s1)$lt_eq(pl$lit(as_polars_series(s2))$cast(s1$dtype)))$to_series()
}

#' @export
#' @rdname Series_compare
">=.RPolarsSeries" = function(s1, s2) as_polars_series(s1)$compare(s2, "gt_eq")
`>=.RPolarsSeries` = function(s1, s2) {
pl$select(pl$lit(s1)$gt_eq(pl$lit(as_polars_series(s2))$cast(s1$dtype)))$to_series()
}


#' Convert Series to R vector or list
Expand Down
49 changes: 0 additions & 49 deletions man/Series_compare.Rd

This file was deleted.

4 changes: 0 additions & 4 deletions src/rust/src/rdataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,10 +226,6 @@ impl RPolarsDataFrame {
l
}

// fn compare_other_(&self) -> bool {
// self.0.compare
// }

pub fn to_list(&self, int64_conversion: &str) -> List {
let robj_vec_res: Result<Vec<Robj>, _> = collect_hinted_result(
self.0.width(),
Expand Down
39 changes: 0 additions & 39 deletions src/rust/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,45 +251,6 @@ impl RPolarsSeries {
res
}

pub fn compare(&self, other: &RPolarsSeries, op: String) -> List {
//try cast other to self, downcast(dc) to chunkedarray and compare with operator(op) elementwise
macro_rules! comp {
($self:expr, $other:expr, $dc:ident, $op:expr) => {{
let dtype = self.0.dtype();
let lhs = $self.0.$dc().unwrap().clone();
let casted_series = $other.0.cast(dtype).map_err(|err| err.to_string())?;
let rhs = casted_series.$dc().map_err(|err| err.to_string())?;

let ca_bool = match $op.as_str() {
"equal" => lhs.equal(rhs),
"not_equal" => lhs.not_equal(rhs),
"gt" => lhs.gt(rhs),
"gt_eq" => lhs.gt_eq(rhs),
"lt" => lhs.lt(rhs),
"lt_eq" => lhs.lt_eq(rhs),
_ => panic!("not supported operator"),
};
Ok(RPolarsSeries(ca_bool.into_series()))
}};
}

use polars::prelude::ChunkCompare;
let dtype = self.0.dtype();
use pl::DataType::*;
let res = (|| match dtype {
Int32 => comp!(self, other, i32, op),
Int64 => comp!(self, other, i64, op),
Float64 => comp!(self, other, f64, op),
Boolean => comp!(self, other, bool, op),
String => comp!(self, other, str, op),
_ => Err(format!(
"oups this type: {} is not supported yet, but easily could be",
dtype
)),
})();
r_result_list(res)
}

//names repeat_ as repeat is locked keyword in R
pub fn rep(&self, n: Robj, rechunk: Robj) -> std::result::Result<RPolarsSeries, String> {
use crate::robj_to;
Expand Down
152 changes: 75 additions & 77 deletions tests/testthat/_snaps/after-wrappers.md
Original file line number Diff line number Diff line change
Expand Up @@ -648,62 +648,61 @@
[19] "arr" "backward_fill" "bin"
[22] "bottom_k" "cast" "cat"
[25] "ceil" "chunk_lengths" "clear"
[28] "clip" "clone" "compare"
[31] "cos" "cosh" "count"
[34] "cum_count" "cum_max" "cum_min"
[37] "cum_prod" "cum_sum" "cumulative_eval"
[40] "cut" "diff" "div"
[43] "dot" "drop_nans" "drop_nulls"
[46] "dt" "dtype" "entropy"
[49] "eq" "eq_missing" "equals"
[52] "ewm_mean" "ewm_std" "ewm_var"
[55] "exp" "explode" "extend_constant"
[58] "fill_nan" "fill_null" "filter"
[61] "first" "flags" "flatten"
[64] "floor" "floor_div" "forward_fill"
[67] "gather" "gather_every" "gt"
[70] "gt_eq" "has_nulls" "hash"
[73] "head" "implode" "interpolate"
[76] "is_between" "is_duplicated" "is_finite"
[79] "is_first_distinct" "is_in" "is_infinite"
[82] "is_last_distinct" "is_nan" "is_not_nan"
[85] "is_not_null" "is_null" "is_numeric"
[88] "is_sorted" "is_unique" "item"
[91] "kurtosis" "last" "len"
[94] "limit" "list" "log"
[97] "log10" "lower_bound" "lt"
[100] "lt_eq" "map_batches" "map_elements"
[103] "max" "mean" "median"
[106] "min" "mod" "mode"
[109] "mul" "n_chunks" "n_unique"
[112] "name" "nan_max" "nan_min"
[115] "neq" "neq_missing" "not"
[118] "null_count" "or" "pct_change"
[121] "peak_max" "peak_min" "pow"
[124] "print" "product" "qcut"
[127] "quantile" "rank" "rechunk"
[130] "reinterpret" "rename" "rep"
[133] "repeat_by" "replace" "replace_strict"
[136] "reshape" "reverse" "rle"
[139] "rle_id" "rolling_max" "rolling_max_by"
[142] "rolling_mean" "rolling_mean_by" "rolling_median"
[145] "rolling_median_by" "rolling_min" "rolling_min_by"
[148] "rolling_quantile" "rolling_quantile_by" "rolling_skew"
[151] "rolling_std" "rolling_std_by" "rolling_sum"
[154] "rolling_sum_by" "rolling_var" "rolling_var_by"
[157] "round" "sample" "search_sorted"
[160] "set_sorted" "shape" "shift"
[163] "shrink_dtype" "shuffle" "sign"
[166] "sin" "sinh" "skew"
[169] "slice" "sort" "sort_by"
[172] "sqrt" "std" "str"
[175] "struct" "sub" "sum"
[178] "tail" "tan" "tanh"
[181] "to_frame" "to_list" "to_lit"
[184] "to_physical" "to_r" "to_vector"
[187] "top_k" "unique" "unique_counts"
[190] "upper_bound" "value_counts" "var"
[193] "xor"
[28] "clip" "clone" "cos"
[31] "cosh" "count" "cum_count"
[34] "cum_max" "cum_min" "cum_prod"
[37] "cum_sum" "cumulative_eval" "cut"
[40] "diff" "div" "dot"
[43] "drop_nans" "drop_nulls" "dt"
[46] "dtype" "entropy" "eq"
[49] "eq_missing" "equals" "ewm_mean"
[52] "ewm_std" "ewm_var" "exp"
[55] "explode" "extend_constant" "fill_nan"
[58] "fill_null" "filter" "first"
[61] "flags" "flatten" "floor"
[64] "floor_div" "forward_fill" "gather"
[67] "gather_every" "gt" "gt_eq"
[70] "has_nulls" "hash" "head"
[73] "implode" "interpolate" "is_between"
[76] "is_duplicated" "is_finite" "is_first_distinct"
[79] "is_in" "is_infinite" "is_last_distinct"
[82] "is_nan" "is_not_nan" "is_not_null"
[85] "is_null" "is_numeric" "is_sorted"
[88] "is_unique" "item" "kurtosis"
[91] "last" "len" "limit"
[94] "list" "log" "log10"
[97] "lower_bound" "lt" "lt_eq"
[100] "map_batches" "map_elements" "max"
[103] "mean" "median" "min"
[106] "mod" "mode" "mul"
[109] "n_chunks" "n_unique" "name"
[112] "nan_max" "nan_min" "neq"
[115] "neq_missing" "not" "null_count"
[118] "or" "pct_change" "peak_max"
[121] "peak_min" "pow" "print"
[124] "product" "qcut" "quantile"
[127] "rank" "rechunk" "reinterpret"
[130] "rename" "rep" "repeat_by"
[133] "replace" "replace_strict" "reshape"
[136] "reverse" "rle" "rle_id"
[139] "rolling_max" "rolling_max_by" "rolling_mean"
[142] "rolling_mean_by" "rolling_median" "rolling_median_by"
[145] "rolling_min" "rolling_min_by" "rolling_quantile"
[148] "rolling_quantile_by" "rolling_skew" "rolling_std"
[151] "rolling_std_by" "rolling_sum" "rolling_sum_by"
[154] "rolling_var" "rolling_var_by" "round"
[157] "sample" "search_sorted" "set_sorted"
[160] "shape" "shift" "shrink_dtype"
[163] "shuffle" "sign" "sin"
[166] "sinh" "skew" "slice"
[169] "sort" "sort_by" "sqrt"
[172] "std" "str" "struct"
[175] "sub" "sum" "tail"
[178] "tan" "tanh" "to_frame"
[181] "to_list" "to_lit" "to_physical"
[184] "to_r" "to_vector" "top_k"
[187] "unique" "unique_counts" "upper_bound"
[190] "value_counts" "var" "xor"

---

Expand All @@ -715,27 +714,26 @@
[5] "append_mut" "arg_max"
[7] "arg_min" "can_fast_explode_flag"
[9] "chunk_lengths" "clear"
[11] "clone" "compare"
[13] "div" "dtype"
[15] "equals" "export_stream"
[17] "from_arrow_array_robj" "get_fmt"
[19] "import_stream" "is_sorted"
[21] "is_sorted_ascending_flag" "is_sorted_descending_flag"
[23] "len" "map_elements"
[25] "max" "mean"
[27] "median" "min"
[29] "mul" "n_chunks"
[31] "n_unique" "name"
[33] "new" "panic"
[35] "print" "rem"
[37] "rename_mut" "rep"
[39] "set_sorted_mut" "shape"
[41] "sleep" "sort"
[43] "std" "struct_fields"
[45] "sub" "sum"
[47] "to_fmt_char" "to_frame"
[49] "to_r" "value_counts"
[51] "var"
[11] "clone" "div"
[13] "dtype" "equals"
[15] "export_stream" "from_arrow_array_robj"
[17] "get_fmt" "import_stream"
[19] "is_sorted" "is_sorted_ascending_flag"
[21] "is_sorted_descending_flag" "len"
[23] "map_elements" "max"
[25] "mean" "median"
[27] "min" "mul"
[29] "n_chunks" "n_unique"
[31] "name" "new"
[33] "panic" "print"
[35] "rem" "rename_mut"
[37] "rep" "set_sorted_mut"
[39] "shape" "sleep"
[41] "sort" "std"
[43] "struct_fields" "sub"
[45] "sum" "to_fmt_char"
[47] "to_frame" "to_r"
[49] "value_counts" "var"

# public and private methods of each class RThreadHandle

Expand Down

0 comments on commit e4ea2c5

Please sign in to comment.