diff --git a/crates/polars-ops/src/chunked_array/strings/escape_regex.rs b/crates/polars-ops/src/chunked_array/strings/escape_regex.rs new file mode 100644 index 000000000000..800e36b3fd9b --- /dev/null +++ b/crates/polars-ops/src/chunked_array/strings/escape_regex.rs @@ -0,0 +1,11 @@ +use polars_core::prelude::arity::unary_elementwise; +use polars_core::prelude::StringChunked; +use regex::escape; + +fn escape_regex_helper(s: Option<&str>) -> Option { + s.map(escape) +} + +pub fn escape_regex(ca: &StringChunked) -> StringChunked { + unary_elementwise(ca, escape_regex_helper) +} diff --git a/crates/polars-ops/src/chunked_array/strings/mod.rs b/crates/polars-ops/src/chunked_array/strings/mod.rs index b9149983307b..ea525bb76f82 100644 --- a/crates/polars-ops/src/chunked_array/strings/mod.rs +++ b/crates/polars-ops/src/chunked_array/strings/mod.rs @@ -3,6 +3,8 @@ mod case; #[cfg(feature = "strings")] mod concat; #[cfg(feature = "strings")] +mod escape_regex; +#[cfg(feature = "strings")] mod extract; #[cfg(feature = "find_many")] mod find_many; @@ -20,7 +22,6 @@ mod split; mod strip; #[cfg(feature = "strings")] mod substring; - #[cfg(all(not(feature = "nightly"), feature = "strings"))] mod unicode_internals; diff --git a/crates/polars-ops/src/chunked_array/strings/namespace.rs b/crates/polars-ops/src/chunked_array/strings/namespace.rs index 812dfbfcba91..93574e5f3080 100644 --- a/crates/polars-ops/src/chunked_array/strings/namespace.rs +++ b/crates/polars-ops/src/chunked_array/strings/namespace.rs @@ -640,6 +640,12 @@ pub trait StringNameSpaceImpl: AsString { substring::tail(ca, n.i64()?) } + #[cfg(feature = "strings")] + /// Escapes all regular expression meta characters in the string. + fn str_escape_regex(&self) -> StringChunked { + let ca = self.as_string(); + escape_regex::escape_regex(ca) + } } impl StringNameSpaceImpl for StringChunked {} diff --git a/crates/polars-plan/src/dsl/function_expr/strings.rs b/crates/polars-plan/src/dsl/function_expr/strings.rs index ba06dc00e67c..c394b03af62e 100644 --- a/crates/polars-plan/src/dsl/function_expr/strings.rs +++ b/crates/polars-plan/src/dsl/function_expr/strings.rs @@ -130,6 +130,7 @@ pub enum StringFunction { ascii_case_insensitive: bool, overlapping: bool, }, + EscapeRegex, } impl StringFunction { @@ -197,6 +198,8 @@ impl StringFunction { ReplaceMany { .. } => mapper.with_same_dtype(), #[cfg(feature = "find_many")] ExtractMany { .. } => mapper.with_dtype(DataType::List(Box::new(DataType::String))), + #[cfg(feature = "strings")] + EscapeRegex => mapper.with_same_dtype(), } } } @@ -285,6 +288,7 @@ impl Display for StringFunction { ReplaceMany { .. } => "replace_many", #[cfg(feature = "find_many")] ExtractMany { .. } => "extract_many", + EscapeRegex => "escape_regex", }; write!(f, "str.{s}") } @@ -400,6 +404,7 @@ impl From for SpecialEq> { } => { map_as_slice!(extract_many, ascii_case_insensitive, overlapping) }, + EscapeRegex => map!(escape_regex), } } } @@ -1023,3 +1028,9 @@ pub(super) fn json_path_match(s: &[Column]) -> PolarsResult { let pat = s[1].str()?; Ok(ca.json_path_match(pat)?.into_column()) } + +#[cfg(feature = "strings")] +pub(super) fn escape_regex(s: &Column) -> PolarsResult { + let ca = s.str()?; + Ok(ca.str_escape_regex().into_column()) +} diff --git a/crates/polars-plan/src/dsl/string.rs b/crates/polars-plan/src/dsl/string.rs index d392d403d1b6..efa34f59c04c 100644 --- a/crates/polars-plan/src/dsl/string.rs +++ b/crates/polars-plan/src/dsl/string.rs @@ -592,4 +592,14 @@ impl StringNameSpace { None, ) } + + #[cfg(feature = "strings")] + pub fn escape_regex(self) -> Expr { + self.0.map_many_private( + FunctionExpr::StringExpr(StringFunction::EscapeRegex), + &[], + false, + None, + ) + } } diff --git a/crates/polars-python/src/expr/string.rs b/crates/polars-python/src/expr/string.rs index 6f0836ad8d13..87521a2b7aa1 100644 --- a/crates/polars-python/src/expr/string.rs +++ b/crates/polars-python/src/expr/string.rs @@ -339,4 +339,9 @@ impl PyExpr { .extract_many(patterns.inner, ascii_case_insensitive, overlapping) .into() } + + #[cfg(feature = "regex")] + fn str_escape_regex(&self) -> Self { + self.inner.clone().str().escape_regex().into() + } } diff --git a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs index 67c25d755084..c8c23f29b092 100644 --- a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs @@ -174,6 +174,7 @@ pub enum PyStringFunction { ZFill, ContainsMany, ReplaceMany, + EscapeRegex, } #[pymethods] @@ -952,6 +953,9 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult { StringFunction::ExtractMany { .. } => { return Err(PyNotImplementedError::new_err("extract_many")) }, + StringFunction::EscapeRegex => { + (PyStringFunction::EscapeRegex.into_py(py),).to_object(py) + }, }, FunctionExpr::StructExpr(_) => { return Err(PyNotImplementedError::new_err("struct expr")) diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index 7582758d5921..be8beba18b8f 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -2781,6 +2781,28 @@ def concat( delimiter = "-" return self.join(delimiter, ignore_nulls=ignore_nulls) + def escape_regex(self) -> Expr: + r""" + Returns string values with all regular expression meta characters escaped. + + Examples + -------- + >>> df = pl.DataFrame({"text": ["abc", "def", None, "abc(\\w+)"]}) + >>> df.with_columns(pl.col("text").str.escape_regex().alias("escaped")) + shape: (4, 2) + ┌──────────┬──────────────┐ + │ text ┆ escaped │ + │ --- ┆ --- │ + │ str ┆ str │ + ╞══════════╪══════════════╡ + │ abc ┆ abc │ + │ def ┆ def │ + │ null ┆ null │ + │ abc(\\w+) ┆ abc\\(\\w\\+\\) │ + └──────────┴──────────────┘ + """ + return wrap_expr(self._pyexpr.str_escape_regex()) + def _validate_format_argument(format: str | None) -> None: if format is not None and ".%f" in format: