diff --git a/core/src/eval/operation.rs b/core/src/eval/operation.rs index ce8c409ef0..0eeaa1ba3c 100644 --- a/core/src/eval/operation.rs +++ b/core/src/eval/operation.rs @@ -1006,7 +1006,12 @@ impl VirtualMachine { "groups", RichTerm::from(Term::Array( Array::from_iter( - groups.into_iter().map(|s| Term::Str(s).into()) + groups + .into_iter() + // Unmatched groups get turned into empty strings. It + // might be nicer to have a 'Some s / 'None instead, + // but that would be an API break. + .map(|s| Term::Str(s.unwrap_or_default()).into()) ), ArrayAttrs::new().closurized() )) @@ -1029,7 +1034,13 @@ impl VirtualMachine { "groups", RichTerm::from(Term::Array( Array::from_iter( - found.groups.into_iter().map(|s| Term::Str(s).into()) + found + .groups + .into_iter() + // Unmatched groups get turned into empty strings. It + // might be nicer to have a 'Some s / 'None instead, + // but that would be an API break. + .map(|s| Term::Str(s.unwrap_or_default()).into()) ), ArrayAttrs::new().closurized() )) diff --git a/core/src/term/string.rs b/core/src/term/string.rs index aca416a510..5533542d30 100644 --- a/core/src/term/string.rs +++ b/core/src/term/string.rs @@ -316,7 +316,7 @@ impl NickelString { let groups = capt .iter() .skip(1) - .filter_map(|s_opt| s_opt.map(|s| s.as_str().into())) + .map(|s_opt| s_opt.map(|s| s.as_str().into())) .collect(); // The indices returned by the `regex` crate are byte offsets into @@ -357,7 +357,10 @@ impl Default for NickelString { pub struct RegexFindResult { pub matched: NickelString, pub index: Number, - pub groups: Vec, + /// If a capture group didn't match, we store a `None`. This `None` placeholders + /// make the indexing predictable, so it's possible to associate captures with + /// parenthesis groupings in the original regex. + pub groups: Vec>, } /// Errors returned by `NickelString`'s `substring` method. @@ -573,9 +576,7 @@ mod grapheme_cluster_preservation { ) -> impl Iterator> { needle.captures_iter(haystack).filter(|c| { c.iter().all(|maybe_match| { - maybe_match - .map(|m| does_match_start_and_end_on_boundary(haystack, &m)) - .unwrap_or(false) + maybe_match.map_or(true, |m| does_match_start_and_end_on_boundary(haystack, &m)) }) }) } diff --git a/core/tests/integration/inputs/stdlib/string_find.ncl b/core/tests/integration/inputs/stdlib/string_find.ncl index 2dd0a7a60d..8324ec5516 100644 --- a/core/tests/integration/inputs/stdlib/string_find.ncl +++ b/core/tests/integration/inputs/stdlib/string_find.ncl @@ -6,6 +6,7 @@ let { string, .. } = std in std.string.find "a" "aaa bbb ccc abc" == { groups = [], index = 0, matched = "a" }, std.string.find "([a-z]+)=([0-9]+)" "one=1, two=2, three=3" == { groups = ["one", "1"], index = 0, matched = "one=1" }, std.string.find "(\\d+)\\.(\\d+)\\.(\\d+)" "1.2.3" == { groups = ["1", "2", "3"], index = 0, matched = "1.2.3" }, - std.string.find "(\\p{Emoji})=(\\w+)" "😀=smiling" == { groups = ["😀", "smiling"], index = 0, matched = "😀=smiling" } + std.string.find "(\\p{Emoji})=(\\w+)" "😀=smiling" == { groups = ["😀", "smiling"], index = 0, matched = "😀=smiling" }, + std.string.find "a(b)?" "ac" == { groups = [""], index = 0, matched = "a" }, ] |> std.test.assert_all