diff --git a/CHANGELOG.md b/CHANGELOG.md index 7372b3fb0..05b67c52e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * #644 Add `!^` and `!$` operator * #658 Add unique-strategy config option (`cat` command) * #672 Stabilize `select` command +* #673 Add contains relation matcher (`=?`) ### Changed diff --git a/README.md b/README.md index 3cf7dcd1c..426d0d7f0 100644 --- a/README.md +++ b/README.md @@ -128,10 +128,10 @@ between `/01` and `/10`. Simple subfield filter consists of the subfield code (single alpha-numerical character, ex `0`) a comparison operator (equal `==`, not equal `!=` not equal, starts with prefix `=^`, starts not with -prefix `!^`, ends with suffix `=$`, regex `=~`/`!~`, `in` and `not in`) -and a value enclosed in single quotes. These simple subfield expressions -can be grouped in parentheses and combined with boolean connectives (ex. -`(0 == 'abc' || 0 == 'def')`). +prefix `!^`, ends with suffix `=$`, regex `=~`/`!~`, contains substring +`=?`, `in` and `not in`) and a value enclosed in single quotes. These +simple subfield expressions can be grouped in parentheses and combined +with boolean connectives (ex. `(0 == 'abc' || 0 == 'def')`). A special existence operator can be used to check if a given field (`012A/00?`) or a subfield (`002@.0?` or `002@$0?`) exists. To test for diff --git a/pica-matcher/src/common.rs b/pica-matcher/src/common.rs index e38d46b4c..c3e8eb614 100644 --- a/pica-matcher/src/common.rs +++ b/pica-matcher/src/common.rs @@ -40,6 +40,7 @@ pub(crate) enum RelationalOp { EndsWith, // ends with, "=$" EndsNotWith, // ends not with, "!$" Similar, // similar, "=*" + Contains, // contains, "=?" } impl Display for RelationalOp { @@ -56,6 +57,7 @@ impl Display for RelationalOp { RelationalOp::EndsWith => write!(f, "=$"), RelationalOp::EndsNotWith => write!(f, "!$"), RelationalOp::Similar => write!(f, "=*"), + RelationalOp::Contains => write!(f, "=?"), } } } @@ -72,6 +74,7 @@ pub(crate) fn parse_relational_op_str( value(RelationalOp::EndsWith, tag("=$")), value(RelationalOp::EndsNotWith, tag("!$")), value(RelationalOp::Similar, tag("=*")), + value(RelationalOp::Contains, tag("=?")), ))(i) } @@ -238,6 +241,10 @@ mod tests { parse_relational_op_str(b"=*"), RelationalOp::Similar ); + assert_finished_and_eq!( + parse_relational_op_str(b"=?"), + RelationalOp::Contains + ); } #[test] diff --git a/pica-matcher/src/subfield_matcher.rs b/pica-matcher/src/subfield_matcher.rs index 80369daa1..ae086682c 100644 --- a/pica-matcher/src/subfield_matcher.rs +++ b/pica-matcher/src/subfield_matcher.rs @@ -272,6 +272,20 @@ impl RelationMatcher { score > options.strsim_threshold } + + /// Returns `true` if the given value is a substring of the value. + /// If the `case_ignore` flag is set, both strings will be + /// converted to lowercase first. + fn contains(&self, value: &[u8], options: &MatcherOptions) -> bool { + if options.case_ignore { + value + .to_lowercase() + .find(self.value.to_lowercase()) + .is_some() + } else { + value.find(&self.value).is_some() + } + } } impl Matcher for RelationMatcher { @@ -307,6 +321,9 @@ impl Matcher for RelationMatcher { RelationalOp::Similar => { self.is_similar(value, options) } + RelationalOp::Contains => { + self.contains(value, options) + } _ => unreachable!(), } }) diff --git a/pica-matcher/tests/subfield_matcher.rs b/pica-matcher/tests/subfield_matcher.rs index 6798df09f..fdfaea5a5 100644 --- a/pica-matcher/tests/subfield_matcher.rs +++ b/pica-matcher/tests/subfield_matcher.rs @@ -277,6 +277,45 @@ fn relational_matcher_similar() -> anyhow::Result<()> { Ok(()) } +#[test] +fn relational_matcher_contains() -> anyhow::Result<()> { + // default options + let matcher = RelationMatcher::new("a =? 'aba'")?; + let options = MatcherOptions::default(); + + assert!(matcher + .is_match(&SubfieldRef::from_bytes(b"\x1faaba")?, &options)); + assert!(matcher + .is_match(&SubfieldRef::from_bytes(b"\x1faxabax")?, &options)); + assert!(!matcher + .is_match(&SubfieldRef::from_bytes(b"\x1faabba")?, &options)); + + // case ignore + let matcher = RelationMatcher::new("a =? 'AbA'")?; + let options = MatcherOptions::default().case_ignore(true); + + assert!(matcher + .is_match(&SubfieldRef::from_bytes(b"\x1faaba")?, &options)); + assert!(matcher + .is_match(&SubfieldRef::from_bytes(b"\x1faxabax")?, &options)); + assert!(!matcher + .is_match(&SubfieldRef::from_bytes(b"\x1faabba")?, &options)); + + // multiple subfields + let matcher = RelationMatcher::new("a =? 'aba'")?; + let options = MatcherOptions::default(); + + assert!(matcher.is_match( + vec![ + &SubfieldRef::from_bytes(b"\x1faXabbaX")?, + &SubfieldRef::from_bytes(b"\x1faYabaY")?, + ], + &options + )); + + Ok(()) +} + #[test] fn regex_matcher() -> anyhow::Result<()> { // case sensitive diff --git a/pica-toolkit/tests/snapshot/filter/0114-filter-relation-matcher-curly-contains-f.toml b/pica-toolkit/tests/snapshot/filter/0114-filter-relation-matcher-curly-contains-f.toml new file mode 100644 index 000000000..3d1f2c2d8 --- /dev/null +++ b/pica-toolkit/tests/snapshot/filter/0114-filter-relation-matcher-curly-contains-f.toml @@ -0,0 +1,6 @@ +bin.name = "pica" +args = "filter \"012A{0 =? 'xyz' && 0?}\"" +status = "success" +stdin = "012A \u001f0abba\u001e\n" +stdout = "" +stderr = "" diff --git a/pica-toolkit/tests/snapshot/filter/0114-filter-relation-matcher-curly-contains-t.toml b/pica-toolkit/tests/snapshot/filter/0114-filter-relation-matcher-curly-contains-t.toml new file mode 100644 index 000000000..a7d5b1411 --- /dev/null +++ b/pica-toolkit/tests/snapshot/filter/0114-filter-relation-matcher-curly-contains-t.toml @@ -0,0 +1,6 @@ +bin.name = "pica" +args = "filter \"012A{0 =? 'bb' && 0?}\"" +status = "success" +stdin = "012A \u001f0abba\u001e\n" +stdout = "012A \u001f0abba\u001e\n" +stderr = "" diff --git a/pica-toolkit/tests/snapshot/filter/0114-filter-relation-matcher-simple-contains-f.toml b/pica-toolkit/tests/snapshot/filter/0114-filter-relation-matcher-simple-contains-f.toml new file mode 100644 index 000000000..a4c147caf --- /dev/null +++ b/pica-toolkit/tests/snapshot/filter/0114-filter-relation-matcher-simple-contains-f.toml @@ -0,0 +1,6 @@ +bin.name = "pica" +args = "filter \"012A.0 =? 'xyz'\"" +status = "success" +stdin = "012A \u001f0abba\u001e\n" +stdout = "" +stderr = "" diff --git a/pica-toolkit/tests/snapshot/filter/0114-filter-relation-matcher-simple-contains-t.toml b/pica-toolkit/tests/snapshot/filter/0114-filter-relation-matcher-simple-contains-t.toml new file mode 100644 index 000000000..b1d3e3092 --- /dev/null +++ b/pica-toolkit/tests/snapshot/filter/0114-filter-relation-matcher-simple-contains-t.toml @@ -0,0 +1,6 @@ +bin.name = "pica" +args = "filter \"012A.0 =? 'bb'\"" +status = "success" +stdin = "012A \u001f0abba\u001e\n" +stdout = "012A \u001f0abba\u001e\n" +stderr = ""