diff --git a/src/lang/mod.rs b/src/lang/mod.rs index 75cae079..0829bcd1 100644 --- a/src/lang/mod.rs +++ b/src/lang/mod.rs @@ -164,7 +164,7 @@ impl CharClass { } '(' | '[' | '{' => Self::NewSubclause, ')' | ']' | '}' => self, - '"' | '”' | '“' | '»' | '›' | '«' | '‹' | '‘' => self, + '"' | '”' | '“' | '»' | '›' | '«' | '‹' | '‘' | '\'' => self, ',' | ';' => { if self.is_in_word() { Self::MaybeSubclause @@ -913,6 +913,10 @@ mod tests { let title = case.transform("Around a table: the reason why we just could not care"); assert_eq!("Around a Table: The Reason Why We Just Could Not Care", title); + + let title = + case.transform("'My colleague is a robot' – exploring frontline employees' willingness to work with collaborative service robots"); + assert_eq!("'My Colleague Is a Robot' – Exploring Frontline Employees' Willingness to Work with Collaborative Service Robots", title); } #[test] diff --git a/src/main.rs b/src/main.rs index ad20ec03..4da99c77 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,13 +3,13 @@ use std::fs::{self, read_to_string}; use std::io::ErrorKind as IoErrorKind; use std::path::Path; use std::process::exit; -use std::str::FromStr; use citationberg::taxonomy::Locator; use citationberg::{ IndependentStyle, Locale, LocaleCode, LocaleFile, LongShortForm, Style, }; -use clap::{crate_version, Arg, ArgAction, Command}; +use clap::builder::PossibleValue; +use clap::{crate_version, Arg, ArgAction, Command, ValueEnum}; use strum::VariantNames; use hayagriva::archive::{locales, ArchivedStyle}; @@ -29,18 +29,25 @@ pub enum Format { Yaml, } -impl FromStr for Format { - type Err = &'static str; +impl ValueEnum for Format { + fn value_variants<'a>() -> &'a [Self] { + if cfg!(feature = "biblatex") { + &[Self::Bibtex, Self::Biblatex, Self::Yaml] + } else { + &[Self::Yaml] + } + } - fn from_str(s: &str) -> Result { - match s.to_ascii_lowercase().as_ref() { + fn to_possible_value(&self) -> Option { + let value = match self { #[cfg(feature = "biblatex")] - "bibtex" => Ok(Format::Bibtex), + Format::Bibtex => "bibtex", #[cfg(feature = "biblatex")] - "biblatex" => Ok(Format::Biblatex), - "yaml" => Ok(Format::Yaml), - _ => Err("unknown format"), - } + Format::Biblatex => "biblatex", + Format::Yaml => "yaml", + }; + + Some(PossibleValue::new(value)) } } @@ -59,7 +66,7 @@ fn main() { Arg::new("format") .long("format") .help("What input file format to expect") - .value_parser(clap::builder::PossibleValuesParser::new(Format::VARIANTS)) + .value_parser(clap::value_parser!(Format)) .ignore_case(true) .num_args(1) .global(true), diff --git a/src/types/page.rs b/src/types/page.rs index af792eb0..6251e053 100644 --- a/src/types/page.rs +++ b/src/types/page.rs @@ -1,9 +1,4 @@ -use std::{ - cmp::Ordering, - fmt::Display, - num::{NonZeroUsize, TryFromIntError}, - str::FromStr, -}; +use std::{cmp::Ordering, fmt::Display, num::TryFromIntError, str::FromStr}; use crate::{MaybeTyped, Numeric, NumericError}; @@ -372,15 +367,11 @@ where #[inline] fn next(&mut self) -> Option { - if self.string.is_empty() { - None - } else { - let mut len = 1; - for w in windows(self.string, 2) { - let chars: Vec<_> = w.chars().collect(); - let (c, d) = (chars[0], chars[1]); + if let Some(first_char) = self.string.chars().next() { + let mut len = first_char.len_utf8(); + for (c, d) in self.string.chars().zip(self.string.chars().skip(1)) { if (self.predicate)(c, d) { - len += c.len_utf8(); + len += d.len_utf8(); } else { break; } @@ -388,6 +379,8 @@ where let (head, tail) = self.string.split_at(len); self.string = tail; Some(head) + } else { + None } } @@ -397,52 +390,24 @@ where } } -/// Return an iterator of sliding windows of size `size` over `string`. -/// -/// # Panic -/// -/// Panics if `size` is zero. -pub(crate) fn windows(string: &str, size: usize) -> Windows<'_> { - assert!(size > 0); - Windows::new(string, NonZeroUsize::new(size).unwrap()) -} - -/// An iterator of sliding windows of size `size` over `string`. -/// -/// Each call of `next` advanced the window by one. -pub(crate) struct Windows<'a> { - string: &'a str, - size: NonZeroUsize, -} - -impl<'a> Windows<'a> { - pub(crate) fn new(string: &'a str, size: NonZeroUsize) -> Self { - Self { string, size } - } -} - -impl<'a> Iterator for Windows<'a> { - type Item = &'a str; - - fn next(&mut self) -> Option { - let size = self.size.get(); - if size > self.string.len() { - None - } else { - let mut indices = self.string.char_indices(); - let next = indices.nth(1).unwrap().0; - match indices.nth(size - 2) { - Some((idx, _)) => { - let ret = Some(&self.string[..idx]); - self.string = &self.string[next..]; - ret - } - None => { - let ret = Some(self.string); - self.string = ""; - ret - } - } +#[cfg(test)] +mod test { + #[test] + fn group_by() { + fn group(s: &str) -> Vec<&'_ str> { + super::group_by(s, |c, d| !(c == ',' || c == '&' || d == ',' || d == '&')) + .collect() } + assert_eq!(["a"], group("a").as_slice()); + assert_eq!(["a", ","], group("a,").as_slice()); + assert_eq!([",", "a"], group(",a").as_slice()); + assert_eq!([",", "a", ","], group(",a,").as_slice()); + assert_eq!(["a", ",", "b"], group("a,b").as_slice()); + assert_eq!(["a-"], group("a-").as_slice()); + // characters that are longer than 1 byte + assert_eq!(["a–"], group("a–").as_slice()); + assert_eq!(["–a"], group("–a").as_slice()); + assert_eq!(["–a", ","], group("–a,").as_slice()); + assert_eq!(["a–", ",", "–b"], group("a–,–b").as_slice()); } }