From 7b6db94238bfa026d01e3bd1cf1c0f0993ef074d Mon Sep 17 00:00:00 2001 From: Dotan Nahum Date: Sun, 6 Nov 2022 18:34:06 +0200 Subject: [PATCH 1/2] regex: add missing functions --- src/builtins/impls/regex.rs | 28 ++++++++++++++++++++++++++++ src/builtins/mod.rs | 7 +++++++ 2 files changed, 35 insertions(+) diff --git a/src/builtins/impls/regex.rs b/src/builtins/impls/regex.rs index bb61ca6..c6293ab 100644 --- a/src/builtins/impls/regex.rs +++ b/src/builtins/impls/regex.rs @@ -49,3 +49,31 @@ pub fn template_match( ) -> Result { bail!("not implemented"); } + +/// Find and replaces the text using the regular expression pattern. +#[tracing::instrument(name = "regex.replace", err)] +pub fn replace(s: String, pattern: String, value: String) -> Result { + bail!("not implemented"); +} + +/// Matches a string against a regular expression. +#[tracing::instrument(name = "regex.match", err)] +pub fn regex_match(pattern: String, value: String) -> Result { + bail!("not implemented"); +} + +/// Checks if a string is a valid regular expression. +#[tracing::instrument(name = "regex.is_valid", err)] +pub fn is_valid(pattern: String) -> Result { + bail!("not implemented"); +} + +/// Returns all successive matches of the expression. +#[tracing::instrument(name = "regex.find_all_string_submatch_n", err)] +pub fn find_all_string_submatch_n( + pattern: String, + value: String, + number: i64, +) -> Result>> { + bail!("not implemented"); +} diff --git a/src/builtins/mod.rs b/src/builtins/mod.rs index 65073c0..654b14b 100644 --- a/src/builtins/mod.rs +++ b/src/builtins/mod.rs @@ -116,6 +116,13 @@ pub fn resolve(name: &str) -> Result>> "regex.globs_match" => Ok(self::impls::regex::globs_match.wrap()), "regex.split" => Ok(self::impls::regex::split.wrap()), "regex.template_match" => Ok(self::impls::regex::template_match.wrap()), + "regex.replace" => Ok(self::impls::regex::replace.wrap()), + "regex.match" => Ok(self::impls::regex::regex_match.wrap()), + "regex.is_valid" => Ok(self::impls::regex::is_valid.wrap()), + "regex.find_all_string_submatch_n" => { + Ok(self::impls::regex::find_all_string_submatch_n.wrap()) + } + "rego.parse_module" => Ok(self::impls::rego::parse_module.wrap()), #[cfg(feature = "semver-builtins")] From affead317e132da3583df42cc90a7e7bed079b7b Mon Sep 17 00:00:00 2001 From: Dotan Nahum Date: Wed, 9 Nov 2022 17:18:48 +0200 Subject: [PATCH 2/2] feat: add regex builtins --- Cargo.toml | 23 ++++++++-- src/builtins/impls/mod.rs | 2 + src/builtins/impls/regex.rs | 62 ++++++++++++++++++++------ src/builtins/mod.rs | 8 ++++ tests/infra-fixtures/test-regex.rego | 51 +++++++++++++++++++++ tests/smoke_test.rs | 2 + tests/snapshots/smoke_test__regex.snap | 46 +++++++++++++++++++ 7 files changed, 177 insertions(+), 17 deletions(-) create mode 100644 tests/infra-fixtures/test-regex.rego create mode 100644 tests/snapshots/smoke_test__regex.snap diff --git a/Cargo.toml b/Cargo.toml index 37f0e27..a7399c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,18 +14,25 @@ serde_json = "1" thiserror = "1" tokio = { version = "1.5", features = ["sync", "macros"] } tracing = "0.1.27" -wasmtime = { version = ">=0.40.0,<3", default-features = false, features = ["async"] } +wasmtime = { version = ">=0.40.0,<3", default-features = false, features = [ + "async", +] } # Loader tokio-tar = { version = "0.3", optional = true } -async-compression = { version = "0.3", optional = true, features = ["tokio", "gzip"] } +async-compression = { version = "0.3", optional = true, features = [ + "tokio", + "gzip", +] } futures-util = { version = "0.3", optional = true } # CLI camino = { version = "1", optional = true } clap = { version = "4", features = ["derive"], optional = true } tracing-forest = { version = "0.1.4", optional = true } -tracing-subscriber = { version = "0.3", features = ["env-filter"], optional = true } +tracing-subscriber = { version = "0.3", features = [ + "env-filter", +], optional = true } # Builtins base64 = { version = ">=0.5,<0.14", optional = true } @@ -41,13 +48,19 @@ sha2 = { version = "0.10", optional = true } sprintf = { version = "0.1", optional = true } parse-size = { version = "1", features = ["std"], optional = true } serde_yaml = { version = "0.9.1", optional = true } +regex = { version = "1.7.0", optional = true } +route-pattern = { version = "0.1.0", optional = true } +regex-intersect = { version = "1.2.0", optional = true } [dev-dependencies.tokio] version = "1.5" features = ["macros", "fs", "rt", "rt-multi-thread"] [dev-dependencies] -wasmtime= { version = ">=0.40.0,<3", default-features = false, features = ["cranelift", "memory-init-cow"] } +wasmtime = { version = ">=0.40.0,<3", default-features = false, features = [ + "cranelift", + "memory-init-cow", +] } insta = { version = "1", features = ["yaml"] } [build-dependencies] @@ -92,6 +105,7 @@ json-builtins = ["dep:json-patch"] units-builtins = ["dep:parse-size"] rand-builtins = ["rng"] yaml-builtins = ["dep:serde_yaml"] +regex-builtins = ["dep:regex", "dep:route-pattern", "dep:regex-intersect"] all-crypto-builtins = [ "crypto-digest-builtins", @@ -111,6 +125,7 @@ all-builtins = [ "sprintf-builtins", "units-builtins", "yaml-builtins", + "regex-builtins", ] [[test]] diff --git a/src/builtins/impls/mod.rs b/src/builtins/impls/mod.rs index 7c6b17e..de946d3 100644 --- a/src/builtins/impls/mod.rs +++ b/src/builtins/impls/mod.rs @@ -36,7 +36,9 @@ pub mod object; pub mod opa; #[cfg(feature = "rng")] pub mod rand; +#[cfg(feature = "regex-builtins")] pub mod regex; + pub mod rego; #[cfg(feature = "semver-builtins")] pub mod semver; diff --git a/src/builtins/impls/regex.rs b/src/builtins/impls/regex.rs index c6293ab..efa9e70 100644 --- a/src/builtins/impls/regex.rs +++ b/src/builtins/impls/regex.rs @@ -14,13 +14,19 @@ //! Builtins related to regular expressions -use anyhow::{bail, Result}; +use anyhow::{bail, Context, Result}; +use regex::Regex; /// Returns the specified number of matches when matching the input against the /// pattern. #[tracing::instrument(name = "regex.find_n", err)] -pub fn find_n(pattern: String, value: String, number: i64) -> Result> { - bail!("not implemented"); +pub fn find_n(pattern: String, value: String, number: i32) -> Result> { + let re = Regex::new(&pattern)?; + Ok(re + .find_iter(&value) + .take(usize::try_from(number).unwrap_or(usize::MAX)) + .map(|m| m.as_str().to_string()) + .collect::>()) } /// Checks if the intersection of two glob-style regular expressions matches a @@ -30,13 +36,18 @@ pub fn find_n(pattern: String, value: String, number: i64) -> Result /// `[`, `-`, `]` and `\\` are treated as special symbols. #[tracing::instrument(name = "regex.globs_match", err)] pub fn globs_match(glob1: String, glob2: String) -> Result { - bail!("not implemented"); + regex_intersect::non_empty(&glob1, &glob2).context("expressions should parse") } /// Splits the input string by the occurrences of the given pattern. #[tracing::instrument(name = "regex.split", err)] pub fn split(pattern: String, value: String) -> Result> { - bail!("not implemented"); + let re = Regex::new(&pattern)?; + Ok(re + .split(&value) + .into_iter() + .map(ToString::to_string) + .collect::>()) } /// Matches a string against a pattern, where there pattern may be glob-like @@ -47,25 +58,40 @@ pub fn template_match( delimiter_start: String, delimiter_end: String, ) -> Result { - bail!("not implemented"); + if let (Some(dstart), Some(dend)) = + (delimiter_start.chars().next(), delimiter_end.chars().next()) + { + route_pattern::is_match(&pattern, dstart, dend, &value) + .context("route pattern should parse") + } else { + bail!("delimiters must be a single character each"); + } } /// Find and replaces the text using the regular expression pattern. +/// The semantics of `replace` in OPA is actually `replace_all` #[tracing::instrument(name = "regex.replace", err)] pub fn replace(s: String, pattern: String, value: String) -> Result { - bail!("not implemented"); + let re = Regex::new(&pattern)?; + Ok(re.replace_all(&s, &value).to_string()) } /// Matches a string against a regular expression. #[tracing::instrument(name = "regex.match", err)] pub fn regex_match(pattern: String, value: String) -> Result { - bail!("not implemented"); + let re = Regex::new(&pattern)?; + Ok(re.is_match(&value)) } /// Checks if a string is a valid regular expression. -#[tracing::instrument(name = "regex.is_valid", err)] -pub fn is_valid(pattern: String) -> Result { - bail!("not implemented"); +#[tracing::instrument(name = "regex.is_valid")] +pub fn is_valid(pattern: String) -> bool { + // Note: we're using `regex` and not `regex-syntax` directly which may be + // cheaper because `regex-syntax` is considered an implementation detail of + // `regex`. So, since we're going to use `regex` in all other places, it's + // better to use it to validate in case it decides to change its + // implementation of parsing. + Regex::new(&pattern).is_ok() } /// Returns all successive matches of the expression. @@ -73,7 +99,17 @@ pub fn is_valid(pattern: String) -> Result { pub fn find_all_string_submatch_n( pattern: String, value: String, - number: i64, + number: usize, ) -> Result>> { - bail!("not implemented"); + let re = Regex::new(&pattern)?; + Ok(re + .captures_iter(&value) + .take(number) + .map(|m| { + m.iter() + .flatten() + .map(|m| m.as_str().to_string()) + .collect::>() + }) + .collect::>()) } diff --git a/src/builtins/mod.rs b/src/builtins/mod.rs index 654b14b..3e0dc2a 100644 --- a/src/builtins/mod.rs +++ b/src/builtins/mod.rs @@ -112,13 +112,21 @@ pub fn resolve(name: &str) -> Result>> #[cfg(feature = "rng")] "rand.intn" => Ok(self::impls::rand::intn.wrap()), + #[cfg(feature = "regex-builtins")] "regex.find_n" => Ok(self::impls::regex::find_n.wrap()), + #[cfg(feature = "regex-builtins")] "regex.globs_match" => Ok(self::impls::regex::globs_match.wrap()), + #[cfg(feature = "regex-builtins")] "regex.split" => Ok(self::impls::regex::split.wrap()), + #[cfg(feature = "regex-builtins")] "regex.template_match" => Ok(self::impls::regex::template_match.wrap()), + #[cfg(feature = "regex-builtins")] "regex.replace" => Ok(self::impls::regex::replace.wrap()), + #[cfg(feature = "regex-builtins")] "regex.match" => Ok(self::impls::regex::regex_match.wrap()), + #[cfg(feature = "regex-builtins")] "regex.is_valid" => Ok(self::impls::regex::is_valid.wrap()), + #[cfg(feature = "regex-builtins")] "regex.find_all_string_submatch_n" => { Ok(self::impls::regex::find_all_string_submatch_n.wrap()) } diff --git a/tests/infra-fixtures/test-regex.rego b/tests/infra-fixtures/test-regex.rego new file mode 100644 index 0000000..9453651 --- /dev/null +++ b/tests/infra-fixtures/test-regex.rego @@ -0,0 +1,51 @@ +package test + +is_valid_true := regex.is_valid(".*") + +is_valid_false := regex.is_valid("*") + +find_n_all := regex.find_n("[oa]+", "foo bar", -1) + +find_n_few := regex.find_n("[oa]+", "foo bar", 1) + +find_n_none := regex.find_n("[oa]+", "foo bar", 0) + +split_1 := regex.split("/", "foo//bar/baz") + +split_2 := regex.split("/", "") + +split_3 := regex.split("/", "foo-bar-baz") + +globs_match_true := regex.globs_match("a.a.", ".b.b") + +globs_match_false := regex.globs_match("[a-z]+", "[0-9]*") + +template_match_true := regex.template_match("/users/id-{[0-9]{1,4}}/update", "/users/id-123/update", "{", "}") + +template_match_false := regex.template_match("/users/id-{[0-9]{1,4}}/update", "/users/id-123123/update", "{", "}") + +replace_all := regex.replace("abc 123 abcdefg", "[abc]+", "XXX") + +replace_empty := regex.replace("", "abc", "XXX") + +match_true := regex.match(".*", "foobar") + +match_false := regex.match("[0-9]+", "foobar") + +submatch_all := regex.find_all_string_submatch_n( + "([a-z]+)/([a-z]+)", + "home/user ~ home/root ~ home/admin", + -1, +) + +submatch_some := regex.find_all_string_submatch_n( + "([a-z]+)/([a-z]+)", + "home/user ~ home/root ~ home/admin", + 1, +) + +submatch_none := regex.find_all_string_submatch_n( + "([a-z]+)/([a-z]+)", + "home/user ~ home/root ~ home/admin", + 0, +) diff --git a/tests/smoke_test.rs b/tests/smoke_test.rs index 0952b39..f68d995 100644 --- a/tests/smoke_test.rs +++ b/tests/smoke_test.rs @@ -120,6 +120,8 @@ integration_test!(test_units, "test-units"); integration_test!(test_rand, "test-rand"); integration_test!(test_yaml, "test-yaml"); +integration_test!(test_regex, "test-regex"); + /* #[tokio::test] async fn test_uuid() { diff --git a/tests/snapshots/smoke_test__regex.snap b/tests/snapshots/smoke_test__regex.snap new file mode 100644 index 0000000..3c267a7 --- /dev/null +++ b/tests/snapshots/smoke_test__regex.snap @@ -0,0 +1,46 @@ +--- +source: tests/smoke_test.rs +expression: "test_policy(\"test-regex\", None).await.expect(\"error in test suite\")" +--- +- result: + find_n_all: + - oo + - a + find_n_few: + - oo + find_n_none: [] + globs_match_false: false + globs_match_true: true + is_valid_false: false + is_valid_true: true + match_false: false + match_true: true + replace_all: XXX 123 XXXdefg + replace_empty: "" + split_1: + - foo + - "" + - bar + - baz + split_2: + - "" + split_3: + - foo-bar-baz + submatch_all: + - - home/user + - home + - user + - - home/root + - home + - root + - - home/admin + - home + - admin + submatch_none: [] + submatch_some: + - - home/user + - home + - user + template_match_false: false + template_match_true: true +