Skip to content

Introduce a InputSplit trait #1453

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 6 additions & 10 deletions src/character/complete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::lib::std::ops::{Range, RangeFrom, RangeTo};
use crate::traits::{
AsChar, FindToken, InputIter, InputLength, InputTake, InputTakeAtPosition, Slice,
};
use crate::traits::{Compare, CompareResult};
use crate::traits::{Compare, CompareResult, InputSplit};

/// Recognizes one character.
///
Expand Down Expand Up @@ -308,16 +308,12 @@ where
/// ```
pub fn anychar<T, E: ParseError<T>>(input: T) -> IResult<T, char, E>
where
T: InputIter + InputLength + Slice<RangeFrom<usize>>,
<T as InputIter>::Item: AsChar,
T: InputSplit,
<T as InputSplit>::Item: AsChar,
{
let mut it = input.iter_indices();
match it.next() {
None => Err(Err::Error(E::from_error_kind(input, ErrorKind::Eof))),
Some((_, c)) => match it.next() {
None => Ok((input.slice(input.input_len()..), c.as_char())),
Some((idx, _)) => Ok((input.slice(idx..), c.as_char())),
},
match input.split_first() {
Ok((first, tail)) => Ok((tail, first.as_char())),
Err(input) => Err(Err::Error(E::from_error_kind(input, ErrorKind::Eof))),
}
}

Expand Down
17 changes: 7 additions & 10 deletions src/character/streaming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::lib::std::ops::{Range, RangeFrom, RangeTo};
use crate::traits::{
AsChar, FindToken, InputIter, InputLength, InputTake, InputTakeAtPosition, Slice,
};
use crate::traits::{Compare, CompareResult};
use crate::traits::{Compare, CompareResult, InputSplit};

/// Recognizes one character.
///
Expand Down Expand Up @@ -288,16 +288,13 @@ where
/// ```
pub fn anychar<T, E: ParseError<T>>(input: T) -> IResult<T, char, E>
where
T: InputIter + InputLength + Slice<RangeFrom<usize>>,
<T as InputIter>::Item: AsChar,
T: InputSplit,
<T as InputSplit>::Item: AsChar,
{
let mut it = input.iter_indices();
match it.next() {
None => Err(Err::Incomplete(Needed::new(1))),
Some((_, c)) => match it.next() {
None => Ok((input.slice(input.input_len()..), c.as_char())),
Some((idx, _)) => Ok((input.slice(idx..), c.as_char())),
},
if let Ok((first, tail)) = input.split_first() {
Ok((tail, first.as_char()))
} else {
Err(Err::Incomplete(Needed::new(1)))
}
}

Expand Down
172 changes: 172 additions & 0 deletions src/traits/input_split.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
use crate::Needed;

/// Abstracts split_first
pub trait InputSplit: Sized {
/// The current input type is a sequence of that `Item` type.
///
/// Example: `u8` for `&[u8]` or `char` for `&str`
type Item;

/// Divides one input into two at an index.
///
/// Return the head first then the tail `Ok((head, tail))`
fn split_at(self, mid: usize) -> Result<(Self, Self), (Self, Needed)>;

/// Returns the first and all the rest of the elements of the slice, or None if it is empty.
fn split_first(self) -> Result<(Self::Item, Self), Self>;

/// Returns the last and all the rest of the elements of the slice, or None if it is empty.
///
/// The return order of the tuple is the opposite of split_first.
/// It's reduce potencial mistake and match slice pattern matching
fn split_last(self) -> Result<(Self, Self::Item), Self>;
}

impl<'a> InputSplit for &'a str {
type Item = char;

fn split_at(self, mid: usize) -> Result<(Self, Self), (Self, Needed)> {
if mid <= self.len() {
Ok(str::split_at(self, mid))
} else {
Err((self, Needed::new(mid - self.len())))
}
}

fn split_first(self) -> Result<(Self::Item, Self), Self> {
let mut chars = self.chars();
if let Some(c) = chars.next_back() {
Ok((c, chars.as_str()))
} else {
Err(self)
}
}

fn split_last(self) -> Result<(Self, Self::Item), Self> {
let mut chars = self.chars();
if let Some(c) = chars.next_back() {
Ok((chars.as_str(), c))
} else {
Err(self)
}
}
}

impl<'a> InputSplit for &'a [u8] {
type Item = u8;

fn split_at(self, mid: usize) -> Result<(Self, Self), (Self, Needed)> {
if mid <= self.len() {
Ok(<[u8]>::split_at(self, mid))
} else {
Err((self, Needed::new(mid - self.len())))
}
}

fn split_first(self) -> Result<(Self::Item, Self), Self> {
if let [first, tail @ ..] = self {
Ok((*first, tail))
} else {
Err(self)
}
}

fn split_last(self) -> Result<(Self, Self::Item), Self> {
if let [tail @ .., last] = self {
Ok((tail, *last))
} else {
Err(self)
}
}
}

#[cfg(test)]
mod tests {
use crate::Needed;
use core::fmt::Debug;

use super::InputSplit;

fn split_at_aux<Input>(input: Input, n: usize, expected: Result<(Input, Input), Needed>)
where
Input: InputSplit + Debug + PartialEq,
{
assert_eq!(input.split_at(n), expected);
}

#[test]
fn split_at_slice() {
let input = &b"abcd"[..];

for n in 0..input.len() {
split_at_aux(input, n, Ok((&input[..n], &input[n..])));
}

let n = input.len() + 1;
split_at_aux(input, n, Err(Needed::new(1)));
}

#[test]
fn split_at_str() {
let input = &"abcd"[..];

for n in 0..input.len() {
split_at_aux(input, n, Ok((&input[..n], &input[n..])));
}

let n = input.len() + 1;
split_at_aux(input, n, Err(Needed::new(1)));
}

fn split_first_aux<Input>(input: Input, expected: Option<(<Input as InputSplit>::Item, Input)>)
where
Input: InputSplit + Debug + PartialEq,
<Input as InputSplit>::Item: PartialEq + Debug,
{
assert_eq!(input.split_first(), expected);
}

#[test]
fn split_first_slice() {
let input = &b"abcd"[..];
split_first_aux(input, Some((b'a', &input[1..])));

let input = &b""[..];
split_first_aux(input, None);
}

#[test]
fn split_first_str() {
let input = &"abcd"[..];
split_first_aux(input, Some(('a', &input[1..])));

let input = &""[..];
split_first_aux(input, None);
}

fn split_last_aux<Input>(input: Input, expected: Option<(Input, <Input as InputSplit>::Item)>)
where
Input: InputSplit + Debug + PartialEq,
<Input as InputSplit>::Item: PartialEq + Debug,
{
assert_eq!(input.split_last(), expected);
}

#[test]
fn split_last_slice() {
let input = &b"abcd"[..];
split_last_aux(input, Some((&input[..3], b'd')));

let input = &b""[..];
split_last_aux(input, None);
}

#[test]
fn split_last_str() {
let input = &"abcd"[..];
split_last_aux(input, Some((&input[..3], 'd')));

let input = &""[..];
split_last_aux(input, None);
}
}
9 changes: 7 additions & 2 deletions src/traits.rs → src/traits/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
//! Traits input types have to implement to work with nom combinators

mod input_split;

pub use input_split::InputSplit;

use crate::error::{ErrorKind, ParseError};
use crate::internal::{Err, IResult, Needed};
use crate::lib::std::iter::{Copied, Enumerate};
Expand Down Expand Up @@ -369,7 +374,7 @@ impl<'a> InputTake for &'a [u8] {
}
#[inline]
fn take_split(&self, count: usize) -> (Self, Self) {
let (prefix, suffix) = self.split_at(count);
let (prefix, suffix) = <[u8]>::split_at(self, count);
(suffix, prefix)
}
}
Expand Down Expand Up @@ -422,7 +427,7 @@ impl<'a> InputTake for &'a str {
// return byte index
#[inline]
fn take_split(&self, count: usize) -> (Self, Self) {
let (prefix, suffix) = self.split_at(count);
let (prefix, suffix) = str::split_at(self, count);
(suffix, prefix)
}
}
Expand Down