Skip to content

Commit

Permalink
working without test and without escape_regex as function
Browse files Browse the repository at this point in the history
  • Loading branch information
barak1412 committed Oct 16, 2024
1 parent d89fdcd commit 9b42c1f
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 1 deletion.
11 changes: 11 additions & 0 deletions crates/polars-ops/src/chunked_array/strings/escape_regex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
use polars_core::prelude::arity::unary_elementwise;
use polars_core::prelude::StringChunked;
use regex::escape;

fn escape_regex_helper(s: Option<&str>) -> Option<String> {
s.map(escape)
}

pub fn escape_regex(ca: &StringChunked) -> StringChunked {
unary_elementwise(ca, escape_regex_helper)
}
3 changes: 2 additions & 1 deletion crates/polars-ops/src/chunked_array/strings/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ mod case;
#[cfg(feature = "strings")]
mod concat;
#[cfg(feature = "strings")]
mod escape_regex;
#[cfg(feature = "strings")]
mod extract;
#[cfg(feature = "find_many")]
mod find_many;
Expand All @@ -20,7 +22,6 @@ mod split;
mod strip;
#[cfg(feature = "strings")]
mod substring;

#[cfg(all(not(feature = "nightly"), feature = "strings"))]
mod unicode_internals;

Expand Down
6 changes: 6 additions & 0 deletions crates/polars-ops/src/chunked_array/strings/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,12 @@ pub trait StringNameSpaceImpl: AsString {

substring::tail(ca, n.i64()?)
}
#[cfg(feature = "strings")]
/// Escapes all regular expression meta characters in the string.
fn str_escape_regex(&self) -> StringChunked {
let ca = self.as_string();
escape_regex::escape_regex(ca)
}
}

impl StringNameSpaceImpl for StringChunked {}
11 changes: 11 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ pub enum StringFunction {
ascii_case_insensitive: bool,
overlapping: bool,
},
EscapeRegex,
}

impl StringFunction {
Expand Down Expand Up @@ -197,6 +198,8 @@ impl StringFunction {
ReplaceMany { .. } => mapper.with_same_dtype(),
#[cfg(feature = "find_many")]
ExtractMany { .. } => mapper.with_dtype(DataType::List(Box::new(DataType::String))),
#[cfg(feature = "strings")]
EscapeRegex => mapper.with_same_dtype(),
}
}
}
Expand Down Expand Up @@ -285,6 +288,7 @@ impl Display for StringFunction {
ReplaceMany { .. } => "replace_many",
#[cfg(feature = "find_many")]
ExtractMany { .. } => "extract_many",
EscapeRegex => "escape_regex",
};
write!(f, "str.{s}")
}
Expand Down Expand Up @@ -400,6 +404,7 @@ impl From<StringFunction> for SpecialEq<Arc<dyn ColumnsUdf>> {
} => {
map_as_slice!(extract_many, ascii_case_insensitive, overlapping)
},
EscapeRegex => map!(escape_regex),
}
}
}
Expand Down Expand Up @@ -1023,3 +1028,9 @@ pub(super) fn json_path_match(s: &[Column]) -> PolarsResult<Column> {
let pat = s[1].str()?;
Ok(ca.json_path_match(pat)?.into_column())
}

#[cfg(feature = "strings")]
pub(super) fn escape_regex(s: &Column) -> PolarsResult<Column> {
let ca = s.str()?;
Ok(ca.str_escape_regex().into_column())
}
10 changes: 10 additions & 0 deletions crates/polars-plan/src/dsl/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -592,4 +592,14 @@ impl StringNameSpace {
None,
)
}

#[cfg(feature = "strings")]
pub fn escape_regex(self) -> Expr {
self.0.map_many_private(
FunctionExpr::StringExpr(StringFunction::EscapeRegex),
&[],
false,
None,
)
}
}
5 changes: 5 additions & 0 deletions crates/polars-python/src/expr/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -339,4 +339,9 @@ impl PyExpr {
.extract_many(patterns.inner, ascii_case_insensitive, overlapping)
.into()
}

#[cfg(feature = "regex")]
fn str_escape_regex(&self) -> Self {
self.inner.clone().str().escape_regex().into()
}
}
4 changes: 4 additions & 0 deletions crates/polars-python/src/lazyframe/visitor/expr_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ pub enum PyStringFunction {
ZFill,
ContainsMany,
ReplaceMany,
EscapeRegex,
}

#[pymethods]
Expand Down Expand Up @@ -952,6 +953,9 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
StringFunction::ExtractMany { .. } => {
return Err(PyNotImplementedError::new_err("extract_many"))
},
StringFunction::EscapeRegex => {
(PyStringFunction::EscapeRegex.into_py(py),).to_object(py)
},
},
FunctionExpr::StructExpr(_) => {
return Err(PyNotImplementedError::new_err("struct expr"))
Expand Down
22 changes: 22 additions & 0 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -2781,6 +2781,28 @@ def concat(
delimiter = "-"
return self.join(delimiter, ignore_nulls=ignore_nulls)

def escape_regex(self) -> Expr:
r"""
Returns string values with all regular expression meta characters escaped.
Examples
--------
>>> df = pl.DataFrame({"text": ["abc", "def", None, "abc(\\w+)"]})
>>> df.with_columns(pl.col("text").str.escape_regex().alias("escaped"))
shape: (4, 2)
┌──────────┬──────────────┐
│ text ┆ escaped │
│ --- ┆ --- │
│ str ┆ str │
╞══════════╪══════════════╡
│ abc ┆ abc │
│ def ┆ def │
│ null ┆ null │
│ abc(\\w+) ┆ abc\\(\\w\\+\\) │
└──────────┴──────────────┘
"""
return wrap_expr(self._pyexpr.str_escape_regex())


def _validate_format_argument(format: str | None) -> None:
if format is not None and ".%f" in format:
Expand Down

0 comments on commit 9b42c1f

Please sign in to comment.