diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index 54ecf4f6..c12ec11c 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -58,11 +58,10 @@ const WHITESPACE_CHARACTERS_AS_STR: &str = " \t\r\n\x09\x0B\x0C"; /// A slice of unsigned bytes that can be cheaply copied and which defines methods for parsing /// the various encoding elements of a text Ion stream. /// -/// Upon success, each parsing method on the `TextBufferView` will return the value that was read -/// and a new copy of the `TextBufferView` that starts _after_ the bytes that were parsed. -/// -/// Methods that begin with `match_` return the input slice that they matched OR a `MatchedValue` -/// that retains additional information found during the matching process. +/// Parsing methods have names that begin with `match_` and each return a `(match, remaining_input)` +/// pair. The `match` may be either the slice of the input that was matched (represented as another +/// `TextBufferView`) or a `MatchedValue` that retains information discovered during parsing that +/// will be useful if the match is later fully materialized into a value. #[derive(PartialEq, Clone, Copy)] pub(crate) struct TextBufferView<'a> { // `data` is a slice of remaining data in the larger input stream. @@ -79,17 +78,21 @@ pub(crate) struct TextBufferView<'a> { pub(crate) type ParseResult<'a, T> = IonResult<(T, TextBufferView<'a>)>; impl<'data> TextBufferView<'data> { - /// Constructs a new `TextBufferView` that wraps `data`. + /// Constructs a new `TextBufferView` that wraps `data`, setting the view's `offset` to zero. #[inline] pub fn new(data: &[u8]) -> TextBufferView { Self::new_with_offset(data, 0) } + /// Constructs a new `TextBufferView` that wraps `data`, setting the view's `offset` to the + /// specified value. This is useful when `data` is a slice from the middle of a larger stream. + /// Note that `offset` is the index of the larger stream at which `data` begins and not an + /// offset _into_ `data`. pub fn new_with_offset(data: &[u8], offset: usize) -> TextBufferView { TextBufferView { data, offset } } - /// Returns a subslice copy of the [`TextBufferView`] that starts at `offset` and continues for + /// Returns a subslice of the [`TextBufferView`] that starts at `offset` and continues for /// `length` bytes. /// /// Note that `offset` is relative to the beginning of the buffer, not the beginning of the @@ -101,7 +104,7 @@ impl<'data> TextBufferView<'data> { } } - /// Returns a subslice copy of the [`TextBufferView`] that starts at `offset` and continues + /// Returns a subslice of the [`TextBufferView`] that starts at `offset` and continues /// to the end. /// /// Note that `offset` is relative to the beginning of the buffer, not the beginning of the @@ -134,48 +137,44 @@ impl<'data> TextBufferView<'data> { self.data.is_empty() } - /// Creates a copy of this `TextBufferView` that begins `num_bytes_to_consume` further into the - /// slice. - #[inline] - pub fn consume(&self, num_bytes_to_consume: usize) -> Self { - // This assertion is always run during testing but is removed in the release build. - debug_assert!(num_bytes_to_consume <= self.len()); - Self { - data: &self.data[num_bytes_to_consume..], - offset: self.offset + num_bytes_to_consume, - } + pub fn match_whitespace(self) -> IonMatchResult<'data> { + is_a(WHITESPACE_CHARACTERS_AS_STR)(self) } - // An adapter for nom::combinator::success. - // Always succeeds and consumes none of the input. Returns an empty slice of the buffer. - pub fn match_nothing(self) -> IonMatchResult<'data> { - // Return an empty slice from the head position + /// Always succeeds and consumes none of the input. Returns an empty slice of the buffer. + // This method is useful for parsers that need to match an optional construct but don't want + // to return an Option<_>. For an example, see its use in `match_optional_whitespace`. + fn match_nothing(self) -> IonMatchResult<'data> { + // Use nom's `success` parser to return an empty slice from the head position success(self.slice(0, 0))(self) } - pub fn match_whitespace(self) -> IonMatchResult<'data> { - is_a(WHITESPACE_CHARACTERS_AS_STR)(self) - } - + /// Matches zero or more whitespace characters. pub fn match_optional_whitespace(self) -> IonMatchResult<'data> { // Either match whitespace and return what follows or just return the input as-is. - // This will always return `Ok`, but is packaged as an IonMatchResult for compatability + // This will always return `Ok`, but it is packaged as an IonMatchResult for compatability + // with other parsers. alt((Self::match_whitespace, Self::match_nothing))(self) } - pub fn read_top_level(self) -> IonParseResult<'data, RawStreamItem<'data, TextEncoding>> { - let (remaining, value) = match self.read_value() { + /// Matches a single top-level scalar value, the beginning of a container, or an IVM. + pub fn match_top_level(self) -> IonParseResult<'data, RawStreamItem<'data, TextEncoding>> { + let (remaining, value) = match self.match_value() { Ok(value) => value, Err(e) => return Err(e), }; + // TODO: Augment this method to take an `is_complete` flag that indicates whether the absence + // of further values should return an `Incomplete` or a `RawStreamItem::EndOfStream`. + // TODO: Check to see if `value` is actually an IVM. // => If it's a symbol, try the IVM parser on it and see if it succeeds. // For now, we just return the value. Ok((remaining, RawStreamItem::Value(value))) } - pub fn read_value(self) -> IonParseResult<'data, LazyRawTextValue<'data>> { + /// Matches a single scalar value or the beginning of a container. + pub fn match_value(self) -> IonParseResult<'data, LazyRawTextValue<'data>> { alt(( // For `null` and `bool`, we use `read_` instead of `match_` because there's no additional // parsing to be done. @@ -202,10 +201,12 @@ impl<'data> TextBufferView<'data> { .parse(self) } + /// Matches a boolean value. pub fn match_bool(self) -> IonMatchResult<'data> { recognize(Self::read_bool)(self) } + /// Matches and returns a boolean value. pub fn read_bool(self) -> IonParseResult<'data, bool> { terminated( alt((value(true, tag("true")), value(false, tag("false")))), @@ -213,10 +214,12 @@ impl<'data> TextBufferView<'data> { )(self) } + /// Matches any type of null. (`null`, `null.null`, `null.int`, etc) pub fn match_null(self) -> IonMatchResult<'data> { recognize(Self::read_null)(self) } + /// Matches and returns a null value. pub fn read_null(self) -> IonParseResult<'data, IonType> { delimited( tag("null"), @@ -227,10 +230,7 @@ impl<'data> TextBufferView<'data> { .parse(self) } - fn match_ion_type(self) -> IonMatchResult<'data> { - recognize(Self::read_ion_type)(self) - } - + /// Matches and returns an Ion type. fn read_ion_type(self) -> IonParseResult<'data, IonType> { alt(( value(IonType::Null, tag("null")), @@ -249,10 +249,12 @@ impl<'data> TextBufferView<'data> { ))(self) } + /// Matches any one of Ion's stop characters. fn match_stop_character(self) -> IonMatchResult<'data> { recognize(one_of("{}[](),\"' \t\n\r\u{0b}\u{0c}")).parse(self) } + /// Matches--but does not consume--any one of Ion's stop characters. fn peek_stop_character(self) -> IonMatchResult<'data> { peek(Self::match_stop_character).parse(self) } diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs index 3f846a38..b3f79056 100644 --- a/src/lazy/text/matched.rs +++ b/src/lazy/text/matched.rs @@ -31,6 +31,7 @@ use std::num::IntErrorKind; /// A partially parsed Ion value. #[derive(Copy, Clone, Debug, PartialEq)] pub(crate) enum MatchedValue { + // `Null` and `Bool` are fully parsed because they only involve matching a keyword. Null(IonType), Bool(bool), Int(MatchedInt), diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs index dfc9f863..22eedf61 100644 --- a/src/lazy/text/raw/reader.rs +++ b/src/lazy/text/raw/reader.rs @@ -3,77 +3,19 @@ use crate::lazy::encoding::TextEncoding; use crate::lazy::raw_stream_item::RawStreamItem; use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::parse_result::AddContext; -use crate::lazy::text::value::LazyRawTextValue; use crate::result::IonFailure; use crate::IonResult; -/// Wraps a [`TextBufferView`], allowing the reader to advance each time an item is successfully -/// parsed from it. -pub(crate) struct DataSource<'data> { - // The buffer we're reading from +/// A text Ion 1.0 reader that yields [`RawStreamItem`]s representing the top level values found +/// in the provided input stream. +pub struct LazyRawTextReader<'data> { + // The current view of the data we're reading from. buffer: TextBufferView<'data>, // Each time something is parsed from the buffer successfully, the caller will mark the number - // of bytes that may be skipped the next time `advance_to_next_item` is called. + // of bytes that may be skipped the next time the reader advances. bytes_to_skip: usize, } -impl<'data> DataSource<'data> { - pub(crate) fn new(buffer: TextBufferView<'data>) -> DataSource<'data> { - DataSource { - buffer, - bytes_to_skip: 0, - } - } - - pub(crate) fn buffer(&self) -> TextBufferView<'data> { - self.buffer - } - - fn advance_to_next_item(&mut self) -> IonResult> { - if self.buffer.len() < self.bytes_to_skip { - return IonResult::incomplete( - "cannot advance to next item, insufficient data in buffer", - self.buffer.offset(), - ); - } - - if self.bytes_to_skip > 0 { - Ok(self.buffer.consume(self.bytes_to_skip)) - } else { - Ok(self.buffer) - } - } - - /// Runs the provided parsing function on this DataSource's buffer. - /// If it succeeds, marks the `DataSource` as ready to advance by the 'n' bytes - /// that were consumed and returns `Some(value)`. - /// If it does not succeed, the `DataSource` remains unchanged. - pub(crate) fn try_parse_next< - F: Fn(TextBufferView<'data>) -> IonResult>>, - >( - &mut self, - parser: F, - ) -> IonResult>> { - let buffer_after = self.advance_to_next_item()?; - - let lazy_value = match parser(buffer_after) { - Ok(Some(output)) => output, - Ok(None) => return Ok(None), - Err(e) => return Err(e), - }; - - self.buffer = buffer_after; - self.bytes_to_skip = lazy_value.encoded_value.total_length(); - Ok(Some(lazy_value)) - } -} - -/// A text Ion 1.0 reader that yields [`LazyRawTextValue`]s representing the top level values found -/// in the provided input stream. -pub struct LazyRawTextReader<'data> { - data: DataSource<'data>, -} - impl<'data> LazyRawTextReader<'data> { /// Constructs a `LazyRawTextReader` positioned at the beginning of the provided input stream. pub fn new(data: &'data [u8]) -> LazyRawTextReader<'data> { @@ -85,15 +27,17 @@ impl<'data> LazyRawTextReader<'data> { /// of a larger data stream. This offset is used for reporting the absolute (stream-level) /// position of values encountered in `data`. fn new_with_offset(data: &'data [u8], offset: usize) -> LazyRawTextReader<'data> { - let data = DataSource::new(TextBufferView::new_with_offset(data, offset)); - LazyRawTextReader { data } + LazyRawTextReader { + buffer: TextBufferView::new_with_offset(data, offset), + bytes_to_skip: 0, + } } pub fn next<'top>(&'top mut self) -> IonResult> where 'data: 'top, { - let buffer = self.data.buffer; + let buffer = self.buffer; if buffer.is_empty() { return IonResult::incomplete("reading a top-level value", buffer.offset()); } @@ -101,10 +45,10 @@ impl<'data> LazyRawTextReader<'data> { .match_optional_whitespace() .with_context("skipping whitespace between top-level values", buffer)?; let (remaining, matched) = buffer_after_whitespace - .read_top_level() + .match_top_level() .with_context("reading a top-level value", buffer_after_whitespace)?; // If we successfully moved to the next value, store the remaining buffer view - self.data.buffer = remaining; + self.buffer = remaining; Ok(matched) } }