From 7ceb0f3210a7e22bb70adf029f3e3e958f67d0fe Mon Sep 17 00:00:00 2001 From: Nico Wagner Date: Wed, 21 Jun 2023 13:34:47 +0200 Subject: [PATCH] Allow single-line comments Closes #457 --- pica-matcher/src/common.rs | 19 +++++++++-- pica-matcher/src/field_matcher.rs | 50 ++++++++++++++-------------- pica-matcher/src/subfield_matcher.rs | 40 +++++++++++----------- 3 files changed, 61 insertions(+), 48 deletions(-) diff --git a/pica-matcher/src/common.rs b/pica-matcher/src/common.rs index 54d13e71c..e6819a7f7 100644 --- a/pica-matcher/src/common.rs +++ b/pica-matcher/src/common.rs @@ -3,10 +3,10 @@ use std::fmt::{self, Display}; use nom::branch::alt; use nom::bytes::complete::{is_not, tag}; use nom::character::complete::{char, multispace0, multispace1}; -use nom::combinator::{map, map_res, value, verify}; +use nom::combinator::{map, map_res, opt, value, verify}; use nom::multi::fold_many0; -use nom::sequence::{delimited, preceded}; -use nom::IResult; +use nom::sequence::{delimited, pair, preceded}; +use nom::{IResult, Parser}; use pica_record::parser::ParseResult; /// Boolean Operators. @@ -26,6 +26,19 @@ where delimited(multispace0, inner, multispace0) } +pub(crate) fn comment<'a, O, E: nom::error::ParseError<&'a [u8]>, F>( + mut inner: F, +) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], O, E> +where + F: Parser<&'a [u8], O, E>, +{ + move |i: &'a [u8]| { + let (i, o) = inner.parse(i)?; + let (i, _) = opt(pair(tag("//"), is_not("\n\r")))(i)?; + Ok((i, o)) + } +} + /// Relational Operator #[derive(Debug, Clone, PartialEq, Eq)] pub(crate) enum RelationalOp { diff --git a/pica-matcher/src/field_matcher.rs b/pica-matcher/src/field_matcher.rs index 44d349785..ece3eeef1 100644 --- a/pica-matcher/src/field_matcher.rs +++ b/pica-matcher/src/field_matcher.rs @@ -13,7 +13,7 @@ use pica_record::parser::ParseResult; use pica_record::Field; use crate::common::{ - parse_relational_op_usize, ws, BooleanOp, RelationalOp, + comment, parse_relational_op_usize, ws, BooleanOp, RelationalOp, }; use crate::occurrence_matcher::{ parse_occurrence_matcher, OccurrenceMatcher, @@ -579,21 +579,21 @@ fn parse_field_matcher_not(i: &[u8]) -> ParseResult { fn parse_field_matcher_and(i: &[u8]) -> ParseResult { let (i, (first, remainder)) = tuple(( alt(( - ws(parse_field_matcher_group), - ws(parse_field_matcher_cardinality), - ws(parse_field_matcher_singleton), - ws(parse_field_matcher_not), - ws(parse_field_matcher_exists), + comment(ws(parse_field_matcher_group)), + comment(ws(parse_field_matcher_cardinality)), + comment(ws(parse_field_matcher_singleton)), + comment(ws(parse_field_matcher_not)), + comment(ws(parse_field_matcher_exists)), )), many1(preceded( - ws(tag("&&")), - alt(( - ws(parse_field_matcher_group), - ws(parse_field_matcher_cardinality), - ws(parse_field_matcher_singleton), - ws(parse_field_matcher_not), - ws(parse_field_matcher_exists), - )), + comment(ws(tag("&&"))), + cut(alt(( + comment(ws(parse_field_matcher_group)), + comment(ws(parse_field_matcher_cardinality)), + comment(ws(parse_field_matcher_singleton)), + comment(ws(parse_field_matcher_not)), + comment(ws(parse_field_matcher_exists)), + ))), )), ))(i)?; @@ -606,20 +606,20 @@ fn parse_field_matcher_and(i: &[u8]) -> ParseResult { fn parse_field_matcher_or(i: &[u8]) -> ParseResult { let (i, (first, remainder)) = tuple(( alt(( - ws(parse_field_matcher_group), - ws(parse_field_matcher_and), - ws(parse_field_matcher_cardinality), - ws(parse_field_matcher_singleton), - ws(parse_field_matcher_exists), + comment(ws(parse_field_matcher_group)), + comment(ws(parse_field_matcher_and)), + comment(ws(parse_field_matcher_cardinality)), + comment(ws(parse_field_matcher_singleton)), + comment(ws(parse_field_matcher_exists)), )), many1(preceded( - ws(tag("||")), + comment(ws(tag("||"))), cut(alt(( - ws(parse_field_matcher_group), - ws(parse_field_matcher_and), - ws(parse_field_matcher_cardinality), - ws(parse_field_matcher_singleton), - ws(parse_field_matcher_exists), + comment(ws(parse_field_matcher_group)), + comment(ws(parse_field_matcher_and)), + comment(ws(parse_field_matcher_cardinality)), + comment(ws(parse_field_matcher_singleton)), + comment(ws(parse_field_matcher_exists)), ))), )), ))(i)?; diff --git a/pica-matcher/src/subfield_matcher.rs b/pica-matcher/src/subfield_matcher.rs index 95e94ba02..5e949683a 100644 --- a/pica-matcher/src/subfield_matcher.rs +++ b/pica-matcher/src/subfield_matcher.rs @@ -19,8 +19,8 @@ use regex::Regex; use strsim::normalized_levenshtein; use crate::common::{ - parse_relational_op_str, parse_relational_op_usize, parse_string, - ws, BooleanOp, RelationalOp, + comment, parse_relational_op_str, parse_relational_op_usize, + parse_string, ws, BooleanOp, RelationalOp, }; use crate::{MatcherOptions, ParseMatcherError}; @@ -735,18 +735,18 @@ fn parse_group_matcher(i: &[u8]) -> ParseResult { fn parse_or_matcher(i: &[u8]) -> ParseResult { let (i, (first, remainder)) = tuple(( alt(( - ws(parse_group_matcher), - ws(parse_and_matcher), - ws(parse_subfield_singleton_matcher), - ws(parse_not_matcher), + comment(ws(parse_group_matcher)), + comment(ws(parse_and_matcher)), + comment(ws(parse_subfield_singleton_matcher)), + comment(ws(parse_not_matcher)), )), many1(preceded( - ws(tag("||")), + comment(ws(tag("||"))), cut(alt(( - ws(parse_group_matcher), - ws(parse_and_matcher), - ws(parse_subfield_singleton_matcher), - ws(parse_not_matcher), + comment(ws(parse_group_matcher)), + comment(ws(parse_and_matcher)), + comment(ws(parse_subfield_singleton_matcher)), + comment(ws(parse_not_matcher)), ))), )), ))(i)?; @@ -760,22 +760,22 @@ fn parse_or_matcher(i: &[u8]) -> ParseResult { fn parse_and_matcher(i: &[u8]) -> ParseResult { let (i, (first, remainder)) = tuple(( alt(( - ws(parse_group_matcher), - map( + comment(ws(parse_group_matcher)), + comment(map( ws(parse_singleton_matcher), SubfieldMatcher::Singleton, - ), - ws(parse_not_matcher), + )), + comment(ws(parse_not_matcher)), )), many1(preceded( - ws(tag("&&")), + comment(ws(tag("&&"))), alt(( - ws(parse_group_matcher), - map( + comment(ws(parse_group_matcher)), + comment(map( ws(parse_singleton_matcher), SubfieldMatcher::Singleton, - ), - ws(parse_not_matcher), + )), + comment(ws(parse_not_matcher)), )), )), ))(i)?;