Skip to content

Commit

Permalink
Added replace method to Expr - closes #47
Browse files Browse the repository at this point in the history
  • Loading branch information
ankane committed Feb 16, 2024
1 parent 3ac4a96 commit 43299ac
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## 0.9.0 (unreleased)

- Updated Polars to 0.37.0
- Added `replace` method to `Expr`
- Added `schema_overrides` option to `read_database` method
- Fixed error with `BigDecimal` objects

Expand Down
1 change: 1 addition & 0 deletions ext/polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ features = [
"regex",
"reinterpret",
"repeat_by",
"replace",
"rle",
"rolling_window",
"round_series",
Expand Down
18 changes: 18 additions & 0 deletions ext/polars/src/expr/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -803,4 +803,22 @@ impl RbExpr {
};
self.inner.clone().set_sorted_flag(is_sorted).into()
}

pub fn replace(
&self,
old: &RbExpr,
new: &RbExpr,
default: Option<&RbExpr>,
return_dtype: Option<Wrap<DataType>>,
) -> Self {
self.inner
.clone()
.replace(
old.inner.clone(),
new.inner.clone(),
default.map(|e| e.inner.clone()),
return_dtype.map(|dt| dt.0),
)
.into()
}
}
1 change: 1 addition & 0 deletions ext/polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
class.define_method("entropy", method!(RbExpr::entropy, 2))?;
class.define_method("_hash", method!(RbExpr::hash, 4))?;
class.define_method("set_sorted_flag", method!(RbExpr::set_sorted_flag, 1))?;
class.define_method("replace", method!(RbExpr::replace, 4))?;

// meta
class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
Expand Down
165 changes: 165 additions & 0 deletions lib/polars/expr.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
module Polars
# Expressions that can be used in various contexts.
class Expr
# @private
NO_DEFAULT = Object.new

# @private
attr_accessor :_rbexpr

Expand Down Expand Up @@ -5297,6 +5300,168 @@ def shrink_dtype
wrap_expr(_rbexpr.shrink_dtype)
end

# Replace values by different values.
#
# @param old [Object]
# Value or sequence of values to replace.
# Accepts expression input. Sequences are parsed as Series,
# other non-expression inputs are parsed as literals.
# Also accepts a mapping of values to their replacement.
# @param new [Object]
# Value or sequence of values to replace by.
# Accepts expression input. Sequences are parsed as Series,
# other non-expression inputs are parsed as literals.
# Length must match the length of `old` or have length 1.
# @param default [Object]
# Set values that were not replaced to this value.
# Defaults to keeping the original value.
# Accepts expression input. Non-expression inputs are parsed as literals.
# @param return_dtype [Object]
# The data type of the resulting expression. If set to `nil` (default),
# the data type is determined automatically based on the other inputs.
#
# @return [Expr]
#
# @example Replace a single value by another value. Values that were not replaced remain unchanged.
# df = Polars::DataFrame.new({"a" => [1, 2, 2, 3]})
# df.with_columns(replaced: Polars.col("a").replace(2, 100))
# # =>
# # shape: (4, 2)
# # ┌─────┬──────────┐
# # │ a ┆ replaced │
# # │ --- ┆ --- │
# # │ i64 ┆ i64 │
# # ╞═════╪══════════╡
# # │ 1 ┆ 1 │
# # │ 2 ┆ 100 │
# # │ 2 ┆ 100 │
# # │ 3 ┆ 3 │
# # └─────┴──────────┘
#
# @example Replace multiple values by passing sequences to the `old` and `new` parameters.
# df.with_columns(replaced: Polars.col("a").replace([2, 3], [100, 200]))
# # =>
# # shape: (4, 2)
# # ┌─────┬──────────┐
# # │ a ┆ replaced │
# # │ --- ┆ --- │
# # │ i64 ┆ i64 │
# # ╞═════╪══════════╡
# # │ 1 ┆ 1 │
# # │ 2 ┆ 100 │
# # │ 2 ┆ 100 │
# # │ 3 ┆ 200 │
# # └─────┴──────────┘
#
# @example Passing a mapping with replacements is also supported as syntactic sugar. Specify a default to set all values that were not matched.
# mapping = {2 => 100, 3 => 200}
# df.with_columns(replaced: Polars.col("a").replace(mapping, default: -1))
# # =>
# # shape: (4, 2)
# # ┌─────┬──────────┐
# # │ a ┆ replaced │
# # │ --- ┆ --- │
# # │ i64 ┆ i64 │
# # ╞═════╪══════════╡
# # │ 1 ┆ -1 │
# # │ 2 ┆ 100 │
# # │ 2 ┆ 100 │
# # │ 3 ┆ 200 │
# # └─────┴──────────┘
#
# @example Replacing by values of a different data type sets the return type based on a combination of the `new` data type and either the original data type or the default data type if it was set.
# df = Polars::DataFrame.new({"a" => ["x", "y", "z"]})
# mapping = {"x" => 1, "y" => 2, "z" => 3}
# df.with_columns(replaced: Polars.col("a").replace(mapping))
# # =>
# # shape: (3, 2)
# # ┌─────┬──────────┐
# # │ a ┆ replaced │
# # │ --- ┆ --- │
# # │ str ┆ str │
# # ╞═════╪══════════╡
# # │ x ┆ 1 │
# # │ y ┆ 2 │
# # │ z ┆ 3 │
# # └─────┴──────────┘
#
# @example
# df.with_columns(replaced: Polars.col("a").replace(mapping, default: nil))
# # =>
# # shape: (3, 2)
# # ┌─────┬──────────┐
# # │ a ┆ replaced │
# # │ --- ┆ --- │
# # │ str ┆ i64 │
# # ╞═════╪══════════╡
# # │ x ┆ 1 │
# # │ y ┆ 2 │
# # │ z ┆ 3 │
# # └─────┴──────────┘
#
# @example Set the `return_dtype` parameter to control the resulting data type directly.
# df.with_columns(
# replaced: Polars.col("a").replace(mapping, return_dtype: Polars::UInt8)
# )
# # =>
# # shape: (3, 2)
# # ┌─────┬──────────┐
# # │ a ┆ replaced │
# # │ --- ┆ --- │
# # │ str ┆ u8 │
# # ╞═════╪══════════╡
# # │ x ┆ 1 │
# # │ y ┆ 2 │
# # │ z ┆ 3 │
# # └─────┴──────────┘
#
# @example Expression input is supported for all parameters.
# df = Polars::DataFrame.new({"a" => [1, 2, 2, 3], "b" => [1.5, 2.5, 5.0, 1.0]})
# df.with_columns(
# replaced: Polars.col("a").replace(
# Polars.col("a").max,
# Polars.col("b").sum,
# default: Polars.col("b")
# )
# )
# # =>
# # shape: (4, 3)
# # ┌─────┬─────┬──────────┐
# # │ a ┆ b ┆ replaced │
# # │ --- ┆ --- ┆ --- │
# # │ i64 ┆ f64 ┆ f64 │
# # ╞═════╪═════╪══════════╡
# # │ 1 ┆ 1.5 ┆ 1.5 │
# # │ 2 ┆ 2.5 ┆ 2.5 │
# # │ 2 ┆ 5.0 ┆ 5.0 │
# # │ 3 ┆ 1.0 ┆ 10.0 │
# # └─────┴─────┴──────────┘
def replace(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil)
if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
new = Series.new(old.values)
old = Series.new(old.keys)
else
if old.is_a?(::Array)
old = Series.new(old)
end
if new.is_a?(::Array)
new = Series.new(new)
end
end

old = Utils.parse_as_expression(old, str_as_lit: true)
new = Utils.parse_as_expression(new, str_as_lit: true)

default =
if default.eql?(NO_DEFAULT)
nil
else
Utils.parse_as_expression(default, str_as_lit: true)
end

wrap_expr(_rbexpr.replace(old, new, default, return_dtype))
end

# Create an object namespace of all list related methods.
#
# @return [ListExpr]
Expand Down

0 comments on commit 43299ac

Please sign in to comment.