Skip to content

Commit

Permalink
Improved TextBufferView docs, removed DataSource
Browse files Browse the repository at this point in the history
  • Loading branch information
zslayton committed Jul 25, 2023
1 parent 89f79aa commit 840be4d
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 102 deletions.
70 changes: 36 additions & 34 deletions src/lazy/text/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,10 @@ const WHITESPACE_CHARACTERS_AS_STR: &str = " \t\r\n\x09\x0B\x0C";
/// A slice of unsigned bytes that can be cheaply copied and which defines methods for parsing
/// the various encoding elements of a text Ion stream.
///
/// Upon success, each parsing method on the `TextBufferView` will return the value that was read
/// and a new copy of the `TextBufferView` that starts _after_ the bytes that were parsed.
///
/// Methods that begin with `match_` return the input slice that they matched OR a `MatchedValue`
/// that retains additional information found during the matching process.
/// Parsing methods have names that begin with `match_` and each return a `(match, remaining_input)`
/// pair. The `match` may be either the slice of the input that was matched (represented as another
/// `TextBufferView`) or a `MatchedValue` that retains information discovered during parsing that
/// will be useful if the match is later fully materialized into a value.
#[derive(PartialEq, Clone, Copy)]
pub(crate) struct TextBufferView<'a> {
// `data` is a slice of remaining data in the larger input stream.
Expand All @@ -79,17 +78,21 @@ pub(crate) struct TextBufferView<'a> {
pub(crate) type ParseResult<'a, T> = IonResult<(T, TextBufferView<'a>)>;

impl<'data> TextBufferView<'data> {
/// Constructs a new `TextBufferView` that wraps `data`.
/// Constructs a new `TextBufferView` that wraps `data`, setting the view's `offset` to zero.
#[inline]
pub fn new(data: &[u8]) -> TextBufferView {
Self::new_with_offset(data, 0)
}

/// Constructs a new `TextBufferView` that wraps `data`, setting the view's `offset` to the
/// specified value. This is useful when `data` is a slice from the middle of a larger stream.
/// Note that `offset` is the index of the larger stream at which `data` begins and not an
/// offset _into_ `data`.
pub fn new_with_offset(data: &[u8], offset: usize) -> TextBufferView {
TextBufferView { data, offset }
}

/// Returns a subslice copy of the [`TextBufferView`] that starts at `offset` and continues for
/// Returns a subslice of the [`TextBufferView`] that starts at `offset` and continues for
/// `length` bytes.
///
/// Note that `offset` is relative to the beginning of the buffer, not the beginning of the
Expand All @@ -101,7 +104,7 @@ impl<'data> TextBufferView<'data> {
}
}

/// Returns a subslice copy of the [`TextBufferView`] that starts at `offset` and continues
/// Returns a subslice of the [`TextBufferView`] that starts at `offset` and continues
/// to the end.
///
/// Note that `offset` is relative to the beginning of the buffer, not the beginning of the
Expand Down Expand Up @@ -134,48 +137,44 @@ impl<'data> TextBufferView<'data> {
self.data.is_empty()
}

/// Creates a copy of this `TextBufferView` that begins `num_bytes_to_consume` further into the
/// slice.
#[inline]
pub fn consume(&self, num_bytes_to_consume: usize) -> Self {
// This assertion is always run during testing but is removed in the release build.
debug_assert!(num_bytes_to_consume <= self.len());
Self {
data: &self.data[num_bytes_to_consume..],
offset: self.offset + num_bytes_to_consume,
}
pub fn match_whitespace(self) -> IonMatchResult<'data> {
is_a(WHITESPACE_CHARACTERS_AS_STR)(self)
}

// An adapter for nom::combinator::success.
// Always succeeds and consumes none of the input. Returns an empty slice of the buffer.
pub fn match_nothing(self) -> IonMatchResult<'data> {
// Return an empty slice from the head position
/// Always succeeds and consumes none of the input. Returns an empty slice of the buffer.
// This method is useful for parsers that need to match an optional construct but don't want
// to return an Option<_>. For an example, see its use in `match_optional_whitespace`.
fn match_nothing(self) -> IonMatchResult<'data> {
// Use nom's `success` parser to return an empty slice from the head position
success(self.slice(0, 0))(self)
}

pub fn match_whitespace(self) -> IonMatchResult<'data> {
is_a(WHITESPACE_CHARACTERS_AS_STR)(self)
}

/// Matches zero or more whitespace characters.
pub fn match_optional_whitespace(self) -> IonMatchResult<'data> {
// Either match whitespace and return what follows or just return the input as-is.
// This will always return `Ok`, but is packaged as an IonMatchResult for compatability
// This will always return `Ok`, but it is packaged as an IonMatchResult for compatability
// with other parsers.
alt((Self::match_whitespace, Self::match_nothing))(self)
}

pub fn read_top_level(self) -> IonParseResult<'data, RawStreamItem<'data, TextEncoding>> {
let (remaining, value) = match self.read_value() {
/// Matches a single top-level scalar value, the beginning of a container, or an IVM.
pub fn match_top_level(self) -> IonParseResult<'data, RawStreamItem<'data, TextEncoding>> {
let (remaining, value) = match self.match_value() {
Ok(value) => value,
Err(e) => return Err(e),
};

// TODO: Augment this method to take an `is_complete` flag that indicates whether the absence
// of further values should return an `Incomplete` or a `RawStreamItem::EndOfStream`.

// TODO: Check to see if `value` is actually an IVM.
// => If it's a symbol, try the IVM parser on it and see if it succeeds.
// For now, we just return the value.
Ok((remaining, RawStreamItem::Value(value)))
}

pub fn read_value(self) -> IonParseResult<'data, LazyRawTextValue<'data>> {
/// Matches a single scalar value or the beginning of a container.
pub fn match_value(self) -> IonParseResult<'data, LazyRawTextValue<'data>> {
alt((
// For `null` and `bool`, we use `read_` instead of `match_` because there's no additional
// parsing to be done.
Expand All @@ -202,21 +201,25 @@ impl<'data> TextBufferView<'data> {
.parse(self)
}

/// Matches a boolean value.
pub fn match_bool(self) -> IonMatchResult<'data> {
recognize(Self::read_bool)(self)
}

/// Matches and returns a boolean value.
pub fn read_bool(self) -> IonParseResult<'data, bool> {
terminated(
alt((value(true, tag("true")), value(false, tag("false")))),
Self::peek_stop_character,
)(self)
}

/// Matches any type of null. (`null`, `null.null`, `null.int`, etc)
pub fn match_null(self) -> IonMatchResult<'data> {
recognize(Self::read_null)(self)
}

/// Matches and returns a null value.
pub fn read_null(self) -> IonParseResult<'data, IonType> {
delimited(
tag("null"),
Expand All @@ -227,10 +230,7 @@ impl<'data> TextBufferView<'data> {
.parse(self)
}

fn match_ion_type(self) -> IonMatchResult<'data> {
recognize(Self::read_ion_type)(self)
}

/// Matches and returns an Ion type.
fn read_ion_type(self) -> IonParseResult<'data, IonType> {
alt((
value(IonType::Null, tag("null")),
Expand All @@ -249,10 +249,12 @@ impl<'data> TextBufferView<'data> {
))(self)
}

/// Matches any one of Ion's stop characters.
fn match_stop_character(self) -> IonMatchResult<'data> {
recognize(one_of("{}[](),\"' \t\n\r\u{0b}\u{0c}")).parse(self)
}

/// Matches--but does not consume--any one of Ion's stop characters.
fn peek_stop_character(self) -> IonMatchResult<'data> {
peek(Self::match_stop_character).parse(self)
}
Expand Down
1 change: 1 addition & 0 deletions src/lazy/text/matched.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use std::num::IntErrorKind;
/// A partially parsed Ion value.
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) enum MatchedValue {
// `Null` and `Bool` are fully parsed because they only involve matching a keyword.
Null(IonType),
Bool(bool),
Int(MatchedInt),
Expand Down
80 changes: 12 additions & 68 deletions src/lazy/text/raw/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,77 +3,19 @@ use crate::lazy::encoding::TextEncoding;
use crate::lazy::raw_stream_item::RawStreamItem;
use crate::lazy::text::buffer::TextBufferView;
use crate::lazy::text::parse_result::AddContext;
use crate::lazy::text::value::LazyRawTextValue;
use crate::result::IonFailure;
use crate::IonResult;

/// Wraps a [`TextBufferView`], allowing the reader to advance each time an item is successfully
/// parsed from it.
pub(crate) struct DataSource<'data> {
// The buffer we're reading from
/// A text Ion 1.0 reader that yields [`RawStreamItem`]s representing the top level values found
/// in the provided input stream.
pub struct LazyRawTextReader<'data> {
// The current view of the data we're reading from.
buffer: TextBufferView<'data>,
// Each time something is parsed from the buffer successfully, the caller will mark the number
// of bytes that may be skipped the next time `advance_to_next_item` is called.
// of bytes that may be skipped the next time the reader advances.
bytes_to_skip: usize,
}

impl<'data> DataSource<'data> {
pub(crate) fn new(buffer: TextBufferView<'data>) -> DataSource<'data> {
DataSource {
buffer,
bytes_to_skip: 0,
}
}

pub(crate) fn buffer(&self) -> TextBufferView<'data> {
self.buffer
}

fn advance_to_next_item(&mut self) -> IonResult<TextBufferView<'data>> {
if self.buffer.len() < self.bytes_to_skip {
return IonResult::incomplete(
"cannot advance to next item, insufficient data in buffer",
self.buffer.offset(),
);
}

if self.bytes_to_skip > 0 {
Ok(self.buffer.consume(self.bytes_to_skip))
} else {
Ok(self.buffer)
}
}

/// Runs the provided parsing function on this DataSource's buffer.
/// If it succeeds, marks the `DataSource` as ready to advance by the 'n' bytes
/// that were consumed and returns `Some(value)`.
/// If it does not succeed, the `DataSource` remains unchanged.
pub(crate) fn try_parse_next<
F: Fn(TextBufferView<'data>) -> IonResult<Option<LazyRawTextValue<'data>>>,
>(
&mut self,
parser: F,
) -> IonResult<Option<LazyRawTextValue<'data>>> {
let buffer_after = self.advance_to_next_item()?;

let lazy_value = match parser(buffer_after) {
Ok(Some(output)) => output,
Ok(None) => return Ok(None),
Err(e) => return Err(e),
};

self.buffer = buffer_after;
self.bytes_to_skip = lazy_value.encoded_value.total_length();
Ok(Some(lazy_value))
}
}

/// A text Ion 1.0 reader that yields [`LazyRawTextValue`]s representing the top level values found
/// in the provided input stream.
pub struct LazyRawTextReader<'data> {
data: DataSource<'data>,
}

impl<'data> LazyRawTextReader<'data> {
/// Constructs a `LazyRawTextReader` positioned at the beginning of the provided input stream.
pub fn new(data: &'data [u8]) -> LazyRawTextReader<'data> {
Expand All @@ -85,26 +27,28 @@ impl<'data> LazyRawTextReader<'data> {
/// of a larger data stream. This offset is used for reporting the absolute (stream-level)
/// position of values encountered in `data`.
fn new_with_offset(data: &'data [u8], offset: usize) -> LazyRawTextReader<'data> {
let data = DataSource::new(TextBufferView::new_with_offset(data, offset));
LazyRawTextReader { data }
LazyRawTextReader {
buffer: TextBufferView::new_with_offset(data, offset),
bytes_to_skip: 0,
}
}

pub fn next<'top>(&'top mut self) -> IonResult<RawStreamItem<'data, TextEncoding>>
where
'data: 'top,
{
let buffer = self.data.buffer;
let buffer = self.buffer;
if buffer.is_empty() {
return IonResult::incomplete("reading a top-level value", buffer.offset());
}
let (buffer_after_whitespace, _whitespace) = buffer
.match_optional_whitespace()
.with_context("skipping whitespace between top-level values", buffer)?;
let (remaining, matched) = buffer_after_whitespace
.read_top_level()
.match_top_level()
.with_context("reading a top-level value", buffer_after_whitespace)?;
// If we successfully moved to the next value, store the remaining buffer view
self.data.buffer = remaining;
self.buffer = remaining;
Ok(matched)
}
}
Expand Down

0 comments on commit 840be4d

Please sign in to comment.