Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Regex built ins #128

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,25 @@ serde_json = "1"
thiserror = "1"
tokio = { version = "1.5", features = ["sync", "macros"] }
tracing = "0.1.27"
wasmtime = { version = ">=0.40.0,<3", default-features = false, features = ["async"] }
wasmtime = { version = ">=0.40.0,<3", default-features = false, features = [
"async",
] }

# Loader
tokio-tar = { version = "0.3", optional = true }
async-compression = { version = "0.3", optional = true, features = ["tokio", "gzip"] }
async-compression = { version = "0.3", optional = true, features = [
"tokio",
"gzip",
] }
futures-util = { version = "0.3", optional = true }

# CLI
camino = { version = "1", optional = true }
clap = { version = "4", features = ["derive"], optional = true }
tracing-forest = { version = "0.1.4", optional = true }
tracing-subscriber = { version = "0.3", features = ["env-filter"], optional = true }
tracing-subscriber = { version = "0.3", features = [
"env-filter",
], optional = true }

# Builtins
base64 = { version = ">=0.5,<0.14", optional = true }
Expand All @@ -41,13 +48,19 @@ sha2 = { version = "0.10", optional = true }
sprintf = { version = "0.1", optional = true }
parse-size = { version = "1", features = ["std"], optional = true }
serde_yaml = { version = "0.9.1", optional = true }
regex = { version = "1.7.0", optional = true }
route-pattern = { version = "0.1.0", optional = true }
regex-intersect = { version = "1.2.0", optional = true }

[dev-dependencies.tokio]
version = "1.5"
features = ["macros", "fs", "rt", "rt-multi-thread"]

[dev-dependencies]
wasmtime= { version = ">=0.40.0,<3", default-features = false, features = ["cranelift", "memory-init-cow"] }
wasmtime = { version = ">=0.40.0,<3", default-features = false, features = [
"cranelift",
"memory-init-cow",
] }
insta = { version = "1", features = ["yaml"] }

[build-dependencies]
Expand Down Expand Up @@ -92,6 +105,7 @@ json-builtins = ["dep:json-patch"]
units-builtins = ["dep:parse-size"]
rand-builtins = ["rng"]
yaml-builtins = ["dep:serde_yaml"]
regex-builtins = ["dep:regex", "dep:route-pattern", "dep:regex-intersect"]

all-crypto-builtins = [
"crypto-digest-builtins",
Expand All @@ -111,6 +125,7 @@ all-builtins = [
"sprintf-builtins",
"units-builtins",
"yaml-builtins",
"regex-builtins",
]

[[test]]
Expand Down
2 changes: 2 additions & 0 deletions src/builtins/impls/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ pub mod object;
pub mod opa;
#[cfg(feature = "rng")]
pub mod rand;
#[cfg(feature = "regex-builtins")]
pub mod regex;

pub mod rego;
#[cfg(feature = "semver-builtins")]
pub mod semver;
Expand Down
76 changes: 70 additions & 6 deletions src/builtins/impls/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,19 @@

//! Builtins related to regular expressions

use anyhow::{bail, Result};
use anyhow::{bail, Context, Result};
use regex::Regex;

/// Returns the specified number of matches when matching the input against the
/// pattern.
#[tracing::instrument(name = "regex.find_n", err)]
pub fn find_n(pattern: String, value: String, number: i64) -> Result<Vec<String>> {
bail!("not implemented");
pub fn find_n(pattern: String, value: String, number: i32) -> Result<Vec<String>> {
let re = Regex::new(&pattern)?;
Ok(re
.find_iter(&value)
.take(usize::try_from(number).unwrap_or(usize::MAX))
.map(|m| m.as_str().to_string())
.collect::<Vec<_>>())
}

/// Checks if the intersection of two glob-style regular expressions matches a
Expand All @@ -30,13 +36,18 @@ pub fn find_n(pattern: String, value: String, number: i64) -> Result<Vec<String>
/// `[`, `-`, `]` and `\\` are treated as special symbols.
#[tracing::instrument(name = "regex.globs_match", err)]
pub fn globs_match(glob1: String, glob2: String) -> Result<bool> {
bail!("not implemented");
regex_intersect::non_empty(&glob1, &glob2).context("expressions should parse")
}

/// Splits the input string by the occurrences of the given pattern.
#[tracing::instrument(name = "regex.split", err)]
pub fn split(pattern: String, value: String) -> Result<Vec<String>> {
bail!("not implemented");
let re = Regex::new(&pattern)?;
Ok(re
.split(&value)
.into_iter()
.map(ToString::to_string)
.collect::<Vec<_>>())
}

/// Matches a string against a pattern, where there pattern may be glob-like
Expand All @@ -47,5 +58,58 @@ pub fn template_match(
delimiter_start: String,
delimiter_end: String,
) -> Result<bool> {
bail!("not implemented");
if let (Some(dstart), Some(dend)) =
(delimiter_start.chars().next(), delimiter_end.chars().next())
{
route_pattern::is_match(&pattern, dstart, dend, &value)
.context("route pattern should parse")
} else {
bail!("delimiters must be a single character each");
}
}

/// Find and replaces the text using the regular expression pattern.
/// The semantics of `replace` in OPA is actually `replace_all`
#[tracing::instrument(name = "regex.replace", err)]
pub fn replace(s: String, pattern: String, value: String) -> Result<String> {
let re = Regex::new(&pattern)?;
Ok(re.replace_all(&s, &value).to_string())
}

/// Matches a string against a regular expression.
#[tracing::instrument(name = "regex.match", err)]
pub fn regex_match(pattern: String, value: String) -> Result<bool> {
let re = Regex::new(&pattern)?;
Ok(re.is_match(&value))
}

/// Checks if a string is a valid regular expression.
#[tracing::instrument(name = "regex.is_valid")]
pub fn is_valid(pattern: String) -> bool {
// Note: we're using `regex` and not `regex-syntax` directly which may be
// cheaper because `regex-syntax` is considered an implementation detail of
// `regex`. So, since we're going to use `regex` in all other places, it's
// better to use it to validate in case it decides to change its
// implementation of parsing.
Regex::new(&pattern).is_ok()
}

/// Returns all successive matches of the expression.
#[tracing::instrument(name = "regex.find_all_string_submatch_n", err)]
pub fn find_all_string_submatch_n(
pattern: String,
value: String,
number: usize,
) -> Result<Vec<Vec<String>>> {
let re = Regex::new(&pattern)?;
Ok(re
.captures_iter(&value)
.take(number)
.map(|m| {
m.iter()
.flatten()
.map(|m| m.as_str().to_string())
.collect::<Vec<_>>()
})
.collect::<Vec<_>>())
}
15 changes: 15 additions & 0 deletions src/builtins/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,25 @@ pub fn resolve<C: EvaluationContext>(name: &str) -> Result<Box<dyn Builtin<C>>>
#[cfg(feature = "rng")]
"rand.intn" => Ok(self::impls::rand::intn.wrap()),

#[cfg(feature = "regex-builtins")]
"regex.find_n" => Ok(self::impls::regex::find_n.wrap()),
#[cfg(feature = "regex-builtins")]
"regex.globs_match" => Ok(self::impls::regex::globs_match.wrap()),
#[cfg(feature = "regex-builtins")]
"regex.split" => Ok(self::impls::regex::split.wrap()),
#[cfg(feature = "regex-builtins")]
"regex.template_match" => Ok(self::impls::regex::template_match.wrap()),
#[cfg(feature = "regex-builtins")]
"regex.replace" => Ok(self::impls::regex::replace.wrap()),
#[cfg(feature = "regex-builtins")]
"regex.match" => Ok(self::impls::regex::regex_match.wrap()),
#[cfg(feature = "regex-builtins")]
"regex.is_valid" => Ok(self::impls::regex::is_valid.wrap()),
#[cfg(feature = "regex-builtins")]
"regex.find_all_string_submatch_n" => {
Ok(self::impls::regex::find_all_string_submatch_n.wrap())
}

"rego.parse_module" => Ok(self::impls::rego::parse_module.wrap()),

#[cfg(feature = "semver-builtins")]
Expand Down
51 changes: 51 additions & 0 deletions tests/infra-fixtures/test-regex.rego
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package test

is_valid_true := regex.is_valid(".*")

is_valid_false := regex.is_valid("*")

find_n_all := regex.find_n("[oa]+", "foo bar", -1)

find_n_few := regex.find_n("[oa]+", "foo bar", 1)

find_n_none := regex.find_n("[oa]+", "foo bar", 0)

split_1 := regex.split("/", "foo//bar/baz")

split_2 := regex.split("/", "")

split_3 := regex.split("/", "foo-bar-baz")

globs_match_true := regex.globs_match("a.a.", ".b.b")

globs_match_false := regex.globs_match("[a-z]+", "[0-9]*")

template_match_true := regex.template_match("/users/id-{[0-9]{1,4}}/update", "/users/id-123/update", "{", "}")

template_match_false := regex.template_match("/users/id-{[0-9]{1,4}}/update", "/users/id-123123/update", "{", "}")

replace_all := regex.replace("abc 123 abcdefg", "[abc]+", "XXX")

replace_empty := regex.replace("", "abc", "XXX")

match_true := regex.match(".*", "foobar")

match_false := regex.match("[0-9]+", "foobar")

submatch_all := regex.find_all_string_submatch_n(
"([a-z]+)/([a-z]+)",
"home/user ~ home/root ~ home/admin",
-1,
)

submatch_some := regex.find_all_string_submatch_n(
"([a-z]+)/([a-z]+)",
"home/user ~ home/root ~ home/admin",
1,
)

submatch_none := regex.find_all_string_submatch_n(
"([a-z]+)/([a-z]+)",
"home/user ~ home/root ~ home/admin",
0,
)
2 changes: 2 additions & 0 deletions tests/smoke_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ integration_test!(test_units, "test-units");
integration_test!(test_rand, "test-rand");
integration_test!(test_yaml, "test-yaml");

integration_test!(test_regex, "test-regex");

/*
#[tokio::test]
async fn test_uuid() {
Expand Down
46 changes: 46 additions & 0 deletions tests/snapshots/smoke_test__regex.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
---
source: tests/smoke_test.rs
expression: "test_policy(\"test-regex\", None).await.expect(\"error in test suite\")"
---
- result:
find_n_all:
- oo
- a
find_n_few:
- oo
find_n_none: []
globs_match_false: false
globs_match_true: true
is_valid_false: false
is_valid_true: true
match_false: false
match_true: true
replace_all: XXX 123 XXXdefg
replace_empty: ""
split_1:
- foo
- ""
- bar
- baz
split_2:
- ""
split_3:
- foo-bar-baz
submatch_all:
- - home/user
- home
- user
- - home/root
- home
- root
- - home/admin
- home
- admin
submatch_none: []
submatch_some:
- - home/user
- home
- user
template_match_false: false
template_match_true: true