From 286ac5dd22d99623929d78aa5c426fdd1637c3ce Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 19 Apr 2024 16:48:21 -0500 Subject: [PATCH 1/3] fix(sub)!: Abstract away substitution values This will give us more implementation flexibility --- crates/snapbox/src/lib.rs | 1 + crates/snapbox/src/substitutions.rs | 101 ++++++++++++++++++++++------ src/cases.rs | 6 +- 3 files changed, 87 insertions(+), 21 deletions(-) diff --git a/crates/snapbox/src/lib.rs b/crates/snapbox/src/lib.rs index df638c8c..cbdeeea6 100644 --- a/crates/snapbox/src/lib.rs +++ b/crates/snapbox/src/lib.rs @@ -116,6 +116,7 @@ pub use data::Data; pub use data::ToDebug; pub use error::Error; pub use snapbox_macros::debug; +pub use substitutions::SubstitutionValue; pub use substitutions::Substitutions; pub type Result = std::result::Result; diff --git a/crates/snapbox/src/substitutions.rs b/crates/snapbox/src/substitutions.rs index 26c12fc4..bc0af174 100644 --- a/crates/snapbox/src/substitutions.rs +++ b/crates/snapbox/src/substitutions.rs @@ -5,10 +5,13 @@ use std::borrow::Cow; /// Built-in expressions: /// - `...` on a line of its own: match multiple complete lines /// - `[..]`: match multiple characters within a line -#[derive(Default, Clone, Debug, PartialEq, Eq)] +#[derive(Default, Clone, Debug)] pub struct Substitutions { - vars: std::collections::BTreeMap<&'static str, std::collections::BTreeSet>>, - unused: std::collections::BTreeSet<&'static str>, + vars: std::collections::BTreeMap< + &'static str, + std::collections::BTreeSet, + >, + unused: std::collections::BTreeSet, } impl Substitutions { @@ -35,17 +38,14 @@ impl Substitutions { pub fn insert( &mut self, key: &'static str, - value: impl Into>, + value: impl Into, ) -> Result<(), crate::Error> { let key = validate_key(key)?; let value = value.into(); - if value.is_empty() { - self.unused.insert(key); + if let Some(inner) = value.inner { + self.vars.entry(key).or_default().insert(inner); } else { - self.vars - .entry(key) - .or_default() - .insert(crate::utils::normalize_text(value.as_ref()).into()); + self.unused.insert(SubstitutionValueInner::Str(key)); } Ok(()) } @@ -55,7 +55,7 @@ impl Substitutions { /// keys must be enclosed in `[` and `]`. pub fn extend( &mut self, - vars: impl IntoIterator>)>, + vars: impl IntoIterator)>, ) -> Result<(), crate::Error> { for (key, value) in vars { self.insert(key, value)?; @@ -88,9 +88,9 @@ impl Substitutions { let mut input = input.to_owned(); replace_many( &mut input, - self.vars.iter().flat_map(|(var, replaces)| { - replaces.iter().map(|replace| (replace.as_ref(), *var)) - }), + self.vars + .iter() + .flat_map(|(var, replaces)| replaces.iter().map(|replace| (replace, *var))), ); input } @@ -98,7 +98,7 @@ impl Substitutions { fn clear<'v>(&self, pattern: &'v str) -> Cow<'v, str> { if !self.unused.is_empty() && pattern.contains('[') { let mut pattern = pattern.to_owned(); - replace_many(&mut pattern, self.unused.iter().map(|var| (*var, ""))); + replace_many(&mut pattern, self.unused.iter().map(|var| (var, ""))); Cow::Owned(pattern) } else { Cow::Borrowed(pattern) @@ -106,17 +106,80 @@ impl Substitutions { } } +#[derive(Clone)] +pub struct SubstitutionValue { + inner: Option, +} + +#[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)] +enum SubstitutionValueInner { + Str(&'static str), + String(String), +} + +impl SubstitutionValueInner { + fn find_in(&self, buffer: &str) -> Option> { + match self { + Self::Str(s) => buffer.find(s).map(|offset| offset..(offset + s.len())), + Self::String(s) => buffer.find(s).map(|offset| offset..(offset + s.len())), + } + } +} + +impl From<&'static str> for SubstitutionValue { + fn from(inner: &'static str) -> Self { + if inner.is_empty() { + Self { inner: None } + } else { + Self { + inner: Some(SubstitutionValueInner::String( + crate::utils::normalize_text(inner), + )), + } + } + } +} + +impl From for SubstitutionValue { + fn from(inner: String) -> Self { + if inner.is_empty() { + Self { inner: None } + } else { + Self { + inner: Some(SubstitutionValueInner::String( + crate::utils::normalize_text(&inner), + )), + } + } + } +} + +impl From<&'_ String> for SubstitutionValue { + fn from(inner: &'_ String) -> Self { + inner.clone().into() + } +} + +impl From> for SubstitutionValue { + fn from(inner: Cow<'static, str>) -> Self { + match inner { + Cow::Borrowed(s) => s.into(), + Cow::Owned(s) => s.into(), + } + } +} + /// Replacements is `(from, to)` fn replace_many<'a>( buffer: &mut String, - replacements: impl IntoIterator, + replacements: impl IntoIterator, ) { for (var, replace) in replacements { let mut index = 0; - while let Some(offset) = buffer[index..].find(var) { - let old_range = (index + offset)..(index + offset + var.len()); + while let Some(offset) = var.find_in(&buffer[index..]) { + let old_range = (index + offset.start)..(index + offset.end); buffer.replace_range(old_range, replace); - index += offset + replace.len(); + index += offset.start + replace.len(); } } } diff --git a/src/cases.rs b/src/cases.rs index 405c425a..22518340 100644 --- a/src/cases.rs +++ b/src/cases.rs @@ -137,7 +137,7 @@ impl TestCases { var: &'static str, value: impl Into>, ) -> Result<&Self, crate::Error> { - self.substitutions.borrow_mut().insert(var, value)?; + self.substitutions.borrow_mut().insert(var, value.into())?; Ok(self) } @@ -148,7 +148,9 @@ impl TestCases { &self, vars: impl IntoIterator>)>, ) -> Result<&Self, crate::Error> { - self.substitutions.borrow_mut().extend(vars)?; + self.substitutions + .borrow_mut() + .extend(vars.into_iter().map(|(v, r)| (v, r.into())))?; Ok(self) } From b5220cd90edbb654f958b74c86a8b94c59c3c90b Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 19 Apr 2024 15:50:52 -0500 Subject: [PATCH 2/3] feat(sub): Allow regexes for substitutions --- Cargo.lock | 38 +++++++++--- crates/snapbox/Cargo.toml | 3 + crates/snapbox/src/substitutions.rs | 90 ++++++++++++++++++++++++++++- 3 files changed, 122 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d8098f26..9bb4eba7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "0.6.7" @@ -370,7 +379,7 @@ version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a1e17342619edbc21a964c2afbeb6c820c6a2560032872f397bb97ea127bd0a" dependencies = [ - "aho-corasick", + "aho-corasick 0.7.18", "bstr", "fnv", "log", @@ -547,9 +556,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.5.0" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "memoffset" @@ -693,20 +702,32 @@ dependencies = [ [[package]] name = "regex" -version = "1.6.0" +version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ - "aho-corasick", + "aho-corasick 1.1.3", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick 1.1.3", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" -version = "0.6.27" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "remove_dir_all" @@ -869,6 +890,7 @@ dependencies = [ "libtest-mimic", "normalize-line-endings", "os_pipe", + "regex", "serde_json", "similar", "snapbox-macros", diff --git a/crates/snapbox/Cargo.toml b/crates/snapbox/Cargo.toml index 46ac93e6..954330c3 100644 --- a/crates/snapbox/Cargo.toml +++ b/crates/snapbox/Cargo.toml @@ -42,6 +42,8 @@ path = ["dep:tempfile", "dep:walkdir", "dep:dunce", "detect-encoding", "dep:file cmd = ["dep:os_pipe", "dep:wait-timeout", "dep:libc", "dep:windows-sys"] ## Building of examples for snapshotting examples = ["dep:escargot"] +## Regex text substitutions +regex = ["dep:regex"] ## Snapshotting of json json = ["structured-data", "dep:serde_json"] @@ -94,6 +96,7 @@ document-features = { version = "0.2.6", optional = true } serde_json = { version = "1.0.85", optional = true} anstyle-svg = { version = "0.1.3", optional = true } +regex = { version = "1.10.4", optional = true, default-features = false, features = ["std"] } [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.52.0", features = ["Win32_Foundation"], optional = true } diff --git a/crates/snapbox/src/substitutions.rs b/crates/snapbox/src/substitutions.rs index bc0af174..6c902390 100644 --- a/crates/snapbox/src/substitutions.rs +++ b/crates/snapbox/src/substitutions.rs @@ -35,6 +35,17 @@ impl Substitutions { /// let mut subst = snapbox::Substitutions::new(); /// subst.insert("[EXE]", std::env::consts::EXE_SUFFIX); /// ``` + /// + /// With the `regex` feature, you can define patterns using regexes. + /// You can choose to replace a subset of the regex by giving it the named capture group + /// `replace`. + /// + /// ```rust + /// # #[cfg(feature = "regex")] { + /// let mut subst = snapbox::Substitutions::new(); + /// subst.insert("[OBJECT]", regex::Regex::new("(?(world|moon))").unwrap()); + /// # } + /// ``` pub fn insert( &mut self, key: &'static str, @@ -111,10 +122,12 @@ pub struct SubstitutionValue { inner: Option, } -#[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq)] +#[derive(Clone, Debug)] enum SubstitutionValueInner { Str(&'static str), String(String), + #[cfg(feature = "regex")] + Regex(regex::Regex), } impl SubstitutionValueInner { @@ -122,6 +135,21 @@ impl SubstitutionValueInner { match self { Self::Str(s) => buffer.find(s).map(|offset| offset..(offset + s.len())), Self::String(s) => buffer.find(s).map(|offset| offset..(offset + s.len())), + #[cfg(feature = "regex")] + Self::Regex(r) => { + let captures = r.captures(buffer)?; + let m = captures.name("replace").or_else(|| captures.get(0))?; + Some(m.range()) + } + } + } + + fn as_cmp(&self) -> &str { + match self { + Self::Str(s) => s, + Self::String(s) => s, + #[cfg(feature = "regex")] + Self::Regex(s) => s.as_str(), } } } @@ -169,6 +197,42 @@ impl From> for SubstitutionValue { } } +#[cfg(feature = "regex")] +impl From for SubstitutionValue { + fn from(inner: regex::Regex) -> Self { + Self { + inner: Some(SubstitutionValueInner::Regex(inner)), + } + } +} + +#[cfg(feature = "regex")] +impl From<&'_ regex::Regex> for SubstitutionValue { + fn from(inner: &'_ regex::Regex) -> Self { + inner.clone().into() + } +} + +impl PartialOrd for SubstitutionValueInner { + fn partial_cmp(&self, other: &Self) -> Option { + self.as_cmp().partial_cmp(other.as_cmp()) + } +} + +impl Ord for SubstitutionValueInner { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.as_cmp().cmp(other.as_cmp()) + } +} + +impl PartialEq for SubstitutionValueInner { + fn eq(&self, other: &Self) -> bool { + self.as_cmp().eq(other.as_cmp()) + } +} + +impl Eq for SubstitutionValueInner {} + /// Replacements is `(from, to)` fn replace_many<'a>( buffer: &mut String, @@ -480,4 +544,28 @@ mod test { let actual = normalize(input, pattern, &sub); assert_eq!(actual, pattern); } + + #[test] + #[cfg(feature = "regex")] + fn substitute_regex_unnamed() { + let input = "Hello world!"; + let pattern = "Hello [OBJECT]!"; + let mut sub = Substitutions::new(); + sub.insert("[OBJECT]", regex::Regex::new("world").unwrap()) + .unwrap(); + let actual = normalize(input, pattern, &sub); + assert_eq!(actual, pattern); + } + + #[test] + #[cfg(feature = "regex")] + fn substitute_regex_named() { + let input = "Hello world!"; + let pattern = "Hello [OBJECT]!"; + let mut sub = Substitutions::new(); + sub.insert("[OBJECT]", regex::Regex::new("(?world)!").unwrap()) + .unwrap(); + let actual = normalize(input, pattern, &sub); + assert_eq!(actual, pattern); + } } From 77787d8505b19e1ea8be50f67433d02eb78f730f Mon Sep 17 00:00:00 2001 From: Ed Page Date: Sat, 20 Apr 2024 12:34:20 -0500 Subject: [PATCH 3/3] test(sub): Ensure works on all platforms --- crates/snapbox/src/substitutions.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/snapbox/src/substitutions.rs b/crates/snapbox/src/substitutions.rs index 6c902390..2ff2352a 100644 --- a/crates/snapbox/src/substitutions.rs +++ b/crates/snapbox/src/substitutions.rs @@ -540,7 +540,8 @@ mod test { fn substitute_disabled() { let input = "cargo"; let pattern = "cargo[EXE]"; - let sub = Substitutions::with_exe(); + let mut sub = Substitutions::new(); + sub.insert("[EXE]", "").unwrap(); let actual = normalize(input, pattern, &sub); assert_eq!(actual, pattern); }