From 5ae2a0824e11db1ad7d296db6310fe4c9489202d Mon Sep 17 00:00:00 2001 From: Nico Wagner Date: Thu, 29 Aug 2024 13:40:12 +0200 Subject: [PATCH] Add group modifiers (format) (#811) Signed-off-by: Nico Wagner --- crates/pica-format/src/lib.rs | 46 +++++++++++++++++++++++++ crates/pica-format/src/parse.rs | 39 ++++++++++++++++++--- crates/pica-format/tests/integration.rs | 41 ++++++++++++++++++++++ 3 files changed, 121 insertions(+), 5 deletions(-) diff --git a/crates/pica-format/src/lib.rs b/crates/pica-format/src/lib.rs index 062a6197a..3c8c3c7ff 100644 --- a/crates/pica-format/src/lib.rs +++ b/crates/pica-format/src/lib.rs @@ -187,6 +187,36 @@ impl Formatter for Value { struct Group { fragments: Box, bounds: RangeTo, + modifier: Modifier, +} + +#[derive(Debug, Default, Clone, PartialEq)] +struct Modifier { + lowercase: bool, + uppercase: bool, + remove_ws: bool, + trim: bool, +} + +impl Modifier { + pub(crate) fn lowercase(&mut self, yes: bool) -> &mut Self { + self.lowercase = yes; + self + } + + pub(crate) fn uppercase(&mut self, yes: bool) -> &mut Self { + self.uppercase = yes; + self + } + + pub(crate) fn remove_ws(&mut self, yes: bool) -> &mut Self { + self.remove_ws = yes; + self + } + pub(crate) fn trim(&mut self, yes: bool) -> &mut Self { + self.trim = yes; + self + } } impl Formatter for Group { @@ -218,6 +248,22 @@ impl Formatter for Group { } } + if self.modifier.trim { + acc = acc.trim().to_string(); + } + + if self.modifier.remove_ws { + acc = acc.replace(' ', "").to_string(); + } + + if self.modifier.lowercase { + acc = acc.to_lowercase(); + } + + if self.modifier.uppercase { + acc = acc.to_uppercase(); + } + acc } } diff --git a/crates/pica-format/src/parse.rs b/crates/pica-format/src/parse.rs index b58eab8c4..a4749a129 100644 --- a/crates/pica-format/src/parse.rs +++ b/crates/pica-format/src/parse.rs @@ -14,7 +14,7 @@ use winnow::prelude::*; use winnow::stream::{AsChar, Compare, Stream, StreamIsPartial}; use winnow::token::{one_of, take_till}; -use crate::{Format, Fragments, Group, List, Value}; +use crate::{Format, Fragments, Group, List, Modifier, Value}; pub fn parse_format(i: &mut &[u8]) -> PResult { ( @@ -40,9 +40,9 @@ pub fn parse_format(i: &mut &[u8]) -> PResult { fn parse_fragments(i: &mut &[u8]) -> PResult { alt(( - parse_list.map(Fragments::List), - parse_group.map(Fragments::Group), - parse_value.map(Fragments::Value), + ws(parse_list).map(Fragments::List), + ws(parse_group).map(Fragments::Group), + ws(parse_value).map(Fragments::Value), )) .parse_next(i) } @@ -96,9 +96,37 @@ fn decrement_group_level() { }) } +fn parse_modifier(i: &mut &[u8]) -> PResult> { + opt(preceded( + '?', + repeat(1.., alt(('L', 'U', 'T', 'W'))).map(|codes: Vec<_>| { + let mut modifier = Modifier::default(); + if codes.contains(&'L') { + modifier.lowercase(true); + } + + if codes.contains(&'U') { + modifier.uppercase(true); + } + + if codes.contains(&'W') { + modifier.remove_ws(true); + } + + if codes.contains(&'T') { + modifier.trim(true); + } + + modifier + }), + )) + .parse_next(i) +} + fn parse_group(i: &mut &[u8]) -> PResult { ( terminated(ws('('), increment_group_level), + parse_modifier, parse_fragments, ws(')').map(|_| decrement_group_level()), alt(( @@ -112,9 +140,10 @@ fn parse_group(i: &mut &[u8]) -> PResult { empty.value(usize::MAX), )), ) - .map(|(_, fragments, _, end)| Group { + .map(|(_, modifier, fragments, _, end)| Group { fragments: Box::new(fragments), bounds: RangeTo { end }, + modifier: modifier.unwrap_or_default(), }) .parse_next(i) } diff --git a/crates/pica-format/tests/integration.rs b/crates/pica-format/tests/integration.rs index 540215cbe..31329f2fd 100644 --- a/crates/pica-format/tests/integration.rs +++ b/crates/pica-format/tests/integration.rs @@ -77,6 +77,47 @@ fn test_format_quantifier() -> TestResult { Ok(()) } +#[test] +fn test_format_modifier_trim() -> TestResult { + let ada = ByteRecord::from_bytes(ada_lovelace()).expect("record"); + let fmt = Format::from_str("042A{ (?T 'GND-SC: ' a.. ' ') }")?; + let result = ada.format(&fmt, &Default::default()); + assert_eq!(result, vec!["GND-SC: 28p GND-SC: 9.5p"]); + + Ok(()) +} + +#[test] +fn test_format_modifier_remove_ws() -> TestResult { + let ada = ByteRecord::from_bytes(ada_lovelace()).expect("record"); + let fmt = Format::from_str("028A{ (?W d) }")?; + let result = ada.format(&fmt, &Default::default()); + assert_eq!(result, vec!["AdaKing"]); + + Ok(()) +} + +#[test] +fn test_format_modifier_uppercase() -> TestResult { + let ada = ByteRecord::from_bytes(ada_lovelace()).expect("record"); + let fmt = + Format::from_str("028A{ (?U a) <$> (?U ', ' d <*> ' ' c) }")?; + let result = ada.format(&fmt, &Default::default()); + assert_eq!(result, vec!["LOVELACE, ADA KING OF".to_string()]); + + Ok(()) +} + +#[test] +fn test_format_modifier_lowercase() -> TestResult { + let ada = ByteRecord::from_bytes(ada_lovelace()).expect("record"); + let fmt = Format::from_str("028A{ a <$> (?L ', ' d <*> ' ' c) }")?; + let result = ada.format(&fmt, &Default::default()); + assert_eq!(result, vec!["Lovelace, ada king of".to_string()]); + + Ok(()) +} + #[test] fn test_format_conference() -> TestResult { let fmt = Format::new(