Skip to content

Commit

Permalink
Add contains relation matcher (=?) (#673)
Browse files Browse the repository at this point in the history
  • Loading branch information
nwagner84 authored Jul 23, 2023
1 parent 681b079 commit 98c2fea
Show file tree
Hide file tree
Showing 9 changed files with 92 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* #644 Add `!^` and `!$` operator
* #658 Add unique-strategy config option (`cat` command)
* #672 Stabilize `select` command
* #673 Add contains relation matcher (`=?`)

### Changed

Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,10 @@ between `/01` and `/10`.
Simple subfield filter consists of the subfield code (single
alpha-numerical character, ex `0`) a comparison operator (equal `==`,
not equal `!=` not equal, starts with prefix `=^`, starts not with
prefix `!^`, ends with suffix `=$`, regex `=~`/`!~`, `in` and `not in`)
and a value enclosed in single quotes. These simple subfield expressions
can be grouped in parentheses and combined with boolean connectives (ex.
`(0 == 'abc' || 0 == 'def')`).
prefix `!^`, ends with suffix `=$`, regex `=~`/`!~`, contains substring
`=?`, `in` and `not in`) and a value enclosed in single quotes. These
simple subfield expressions can be grouped in parentheses and combined
with boolean connectives (ex. `(0 == 'abc' || 0 == 'def')`).

A special existence operator can be used to check if a given field
(`012A/00?`) or a subfield (`[email protected]?` or `002@$0?`) exists. To test for
Expand Down
7 changes: 7 additions & 0 deletions pica-matcher/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ pub(crate) enum RelationalOp {
EndsWith, // ends with, "=$"
EndsNotWith, // ends not with, "!$"
Similar, // similar, "=*"
Contains, // contains, "=?"
}

impl Display for RelationalOp {
Expand All @@ -56,6 +57,7 @@ impl Display for RelationalOp {
RelationalOp::EndsWith => write!(f, "=$"),
RelationalOp::EndsNotWith => write!(f, "!$"),
RelationalOp::Similar => write!(f, "=*"),
RelationalOp::Contains => write!(f, "=?"),
}
}
}
Expand All @@ -72,6 +74,7 @@ pub(crate) fn parse_relational_op_str(
value(RelationalOp::EndsWith, tag("=$")),
value(RelationalOp::EndsNotWith, tag("!$")),
value(RelationalOp::Similar, tag("=*")),
value(RelationalOp::Contains, tag("=?")),
))(i)
}

Expand Down Expand Up @@ -238,6 +241,10 @@ mod tests {
parse_relational_op_str(b"=*"),
RelationalOp::Similar
);
assert_finished_and_eq!(
parse_relational_op_str(b"=?"),
RelationalOp::Contains
);
}

#[test]
Expand Down
17 changes: 17 additions & 0 deletions pica-matcher/src/subfield_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,20 @@ impl RelationMatcher {

score > options.strsim_threshold
}

/// Returns `true` if the given value is a substring of the value.
/// If the `case_ignore` flag is set, both strings will be
/// converted to lowercase first.
fn contains(&self, value: &[u8], options: &MatcherOptions) -> bool {
if options.case_ignore {
value
.to_lowercase()
.find(self.value.to_lowercase())
.is_some()
} else {
value.find(&self.value).is_some()
}
}
}

impl Matcher for RelationMatcher {
Expand Down Expand Up @@ -307,6 +321,9 @@ impl Matcher for RelationMatcher {
RelationalOp::Similar => {
self.is_similar(value, options)
}
RelationalOp::Contains => {
self.contains(value, options)
}
_ => unreachable!(),
}
})
Expand Down
39 changes: 39 additions & 0 deletions pica-matcher/tests/subfield_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,45 @@ fn relational_matcher_similar() -> anyhow::Result<()> {
Ok(())
}

#[test]
fn relational_matcher_contains() -> anyhow::Result<()> {
// default options
let matcher = RelationMatcher::new("a =? 'aba'")?;
let options = MatcherOptions::default();

assert!(matcher
.is_match(&SubfieldRef::from_bytes(b"\x1faaba")?, &options));
assert!(matcher
.is_match(&SubfieldRef::from_bytes(b"\x1faxabax")?, &options));
assert!(!matcher
.is_match(&SubfieldRef::from_bytes(b"\x1faabba")?, &options));

// case ignore
let matcher = RelationMatcher::new("a =? 'AbA'")?;
let options = MatcherOptions::default().case_ignore(true);

assert!(matcher
.is_match(&SubfieldRef::from_bytes(b"\x1faaba")?, &options));
assert!(matcher
.is_match(&SubfieldRef::from_bytes(b"\x1faxabax")?, &options));
assert!(!matcher
.is_match(&SubfieldRef::from_bytes(b"\x1faabba")?, &options));

// multiple subfields
let matcher = RelationMatcher::new("a =? 'aba'")?;
let options = MatcherOptions::default();

assert!(matcher.is_match(
vec![
&SubfieldRef::from_bytes(b"\x1faXabbaX")?,
&SubfieldRef::from_bytes(b"\x1faYabaY")?,
],
&options
));

Ok(())
}

#[test]
fn regex_matcher() -> anyhow::Result<()> {
// case sensitive
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
bin.name = "pica"
args = "filter \"012A{0 =? 'xyz' && 0?}\""
status = "success"
stdin = "012A \u001f0abba\u001e\n"
stdout = ""
stderr = ""
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
bin.name = "pica"
args = "filter \"012A{0 =? 'bb' && 0?}\""
status = "success"
stdin = "012A \u001f0abba\u001e\n"
stdout = "012A \u001f0abba\u001e\n"
stderr = ""
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
bin.name = "pica"
args = "filter \"012A.0 =? 'xyz'\""
status = "success"
stdin = "012A \u001f0abba\u001e\n"
stdout = ""
stderr = ""
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
bin.name = "pica"
args = "filter \"012A.0 =? 'bb'\""
status = "success"
stdin = "012A \u001f0abba\u001e\n"
stdout = "012A \u001f0abba\u001e\n"
stderr = ""

0 comments on commit 98c2fea

Please sign in to comment.