Skip to content

Commit

Permalink
Adds lazy text floats
Browse files Browse the repository at this point in the history
  • Loading branch information
zslayton committed Jul 27, 2023
1 parent 840be4d commit 5db1ff0
Show file tree
Hide file tree
Showing 6 changed files with 297 additions and 31 deletions.
156 changes: 153 additions & 3 deletions src/lazy/text/buffer.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::lazy::encoding::TextEncoding;
use crate::lazy::raw_stream_item::RawStreamItem;
use crate::lazy::text::encoded_value::EncodedTextValue;
use crate::lazy::text::matched::{MatchedInt, MatchedValue};
use crate::lazy::text::matched::{MatchedFloat, MatchedInt, MatchedValue};
use crate::lazy::text::parse_result::IonParseError;
use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult};
use crate::lazy::text::value::LazyRawTextValue;
Expand All @@ -12,7 +12,7 @@ use nom::character::streaming::{char, digit1, one_of};
use nom::combinator::{map, opt, peek, recognize, success, value};
use nom::error::{ErrorKind, ParseError};
use nom::multi::many0_count;
use nom::sequence::{delimited, pair, preceded, separated_pair, terminated};
use nom::sequence::{delimited, pair, preceded, separated_pair, terminated, tuple};
use nom::{CompareResult, IResult, InputLength, InputTake, Needed, Parser};
use std::fmt::{Debug, Formatter};
use std::iter::{Copied, Enumerate};
Expand Down Expand Up @@ -192,6 +192,12 @@ impl<'data> TextBufferView<'data> {
EncodedTextValue::new(MatchedValue::Int(matched_int), self.offset(), length)
},
),
map(
match_and_length(Self::match_float),
|(matched_float, length)| {
EncodedTextValue::new(MatchedValue::Float(matched_float), self.offset(), length)
},
),
// TODO: The other Ion types
))
.map(|encoded_value| LazyRawTextValue {
Expand Down Expand Up @@ -372,6 +378,111 @@ impl<'data> TextBufferView<'data> {
fn take_base_16_digits1(self) -> IonMatchResult<'data> {
take_while1(|b: u8| b.is_ascii_hexdigit())(self)
}

/// Matches an Ion float of any syntax
fn match_float(self) -> IonParseResult<'data, MatchedFloat> {
alt((
Self::match_float_special_value,
Self::match_float_numeric_value,
))(self)
}

/// Matches special IEEE-754 floating point values, including +/- infinity and NaN.
fn match_float_special_value(self) -> IonParseResult<'data, MatchedFloat> {
alt((
value(MatchedFloat::NotANumber, tag("nan")),
value(MatchedFloat::PositiveInfinity, tag("+inf")),
value(MatchedFloat::NegativeInfinity, tag("-inf")),
))(self)
}

/// Matches numeric IEEE-754 floating point values.
fn match_float_numeric_value(self) -> IonParseResult<'data, MatchedFloat> {
terminated(
recognize(pair(
Self::match_number_with_optional_dot_and_digits,
Self::match_float_exponent_marker_and_digits,
)),
Self::peek_stop_character,
)
.map(|_matched| MatchedFloat::Numeric)
.parse(self)
}

/// Matches a number that may or may not have a decimal place and trailing fractional digits.
/// If a decimal place is present, there must also be trailing digits.
/// For example:
/// 1000
/// 1000.559
/// -25.2
fn match_number_with_optional_dot_and_digits(self) -> IonMatchResult<'data> {
recognize(tuple((
opt(tag("-")),
Self::match_base_10_digits_before_dot,
opt(Self::match_dot_followed_by_base_10_digits),
)))(self)
}

/// In a float or decimal, matches the digits that are permitted before the decimal point.
/// This includes either a single zero, or a non-zero followed by any sequence of digits.
fn match_digits_before_dot(self) -> IonMatchResult<'data> {
alt((
tag("0"),
recognize(pair(Self::match_leading_digit, Self::match_trailing_digits)),
))(self)
}

/// Matches a single non-zero base 10 digit.
fn match_leading_digit(self) -> IonMatchResult<'data> {
recognize(one_of("123456789"))(self)
}

/// Matches any number of base 10 digits, allowing underscores at any position except the end.
fn match_trailing_digits(self) -> IonMatchResult<'data> {
recognize(many0_count(preceded(opt(char('_')), digit1)))(self)
}

/// Recognizes a decimal point followed by any number of base-10 digits.
fn match_dot_followed_by_base_10_digits(self) -> IonMatchResult<'data> {
recognize(preceded(tag("."), opt(Self::match_digits_after_dot)))(self)
}

/// Like `match_digits_before_dot`, but allows leading zeros.
fn match_digits_after_dot(self) -> IonMatchResult<'data> {
recognize(terminated(
// Zero or more digits-followed-by-underscores
many0_count(pair(digit1, char('_'))),
// One or more digits
digit1,
))(self)
}

/// Matches an `e` or `E` followed by an optional sign (`+` or `-`) followed by one or more
/// base 10 digits.
fn match_float_exponent_marker_and_digits(self) -> IonMatchResult<'data> {
preceded(one_of("eE"), Self::match_exponent_sign_and_digits)(self)
}

/// Recognizes the exponent portion of a decimal (everything after the 'd') or float
/// (everything after the 'e'). This includes:
/// * an optional '+' OR '-'
/// * any number of decimal digits, which may:
/// * have underscores in between them: `1_000_000`
/// * have one or more leading zeros: `0005`
fn match_exponent_sign_and_digits(self) -> IonMatchResult<'data> {
recognize(pair(
// Optional leading sign; if there's no sign, it's not negative.
opt(Self::match_any_sign),
Self::match_digits_after_dot,
))(self)
}

/// Matches `-` OR `+`.
///
/// This is used for matching exponent signs; most places in Ion do not allow `+`.
pub fn match_any_sign(self) -> IonMatchResult<'data> {
alt((tag("+"), tag("-")))(self)
}
}

// === nom trait implementations ===
Expand Down Expand Up @@ -602,7 +713,12 @@ mod tests {
{
let result = self.try_match(parser);
// We expect this to fail for one reason or another
result.unwrap_err();
assert!(
result.is_err(),
"Expected a parse failure for input: {:?}\nResult: {:?}",
self.input,
result
);
}
}

Expand Down Expand Up @@ -729,4 +845,38 @@ mod tests {
mismatch_int(input);
}
}

#[test]
fn test_match_float() {
fn match_float(input: &str) {
MatchTest::new(input).expect_match(match_length(TextBufferView::match_float));
}
fn mismatch_float(input: &str) {
MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_float));
}

let good_inputs = &[
"0.0e0", "0E0", "0e0", "305e1", "305e+1", "305e-1", "305e100", "305e-100", "305e+100",
"305.0e1", "0.279e3", "279e0", "279.5e0", "279.5E0",
];
for input in good_inputs {
match_float(input);
let negative = format!("-{input}");
match_float(&negative);
}

let bad_inputs = &[
"305", // Integer
"305e", // Has exponent delimiter but no exponent
".305e", // No digits before the decimal point
"305e0.5", // Fractional exponent
"305e-0.5", // Negative fractional exponent
"0305e1", // Leading zero
"+305e1", // Leading plus sign
"--305e1", // Multiple negative signs
];
for input in bad_inputs {
mismatch_float(input);
}
}
}
1 change: 1 addition & 0 deletions src/lazy/text/encoded_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ impl EncodedTextValue {
MatchedValue::Null(ion_type) => ion_type,
MatchedValue::Bool(_) => IonType::Bool,
MatchedValue::Int(_) => IonType::Int,
MatchedValue::Float(_) => IonType::Float,
}
}

Expand Down
57 changes: 52 additions & 5 deletions src/lazy/text/matched.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,17 @@
//! use the previously recorded information to minimize the amount of information that needs to be
//! re-discovered.

use crate::lazy::text::as_utf8::AsUtf8;
use crate::lazy::text::buffer::TextBufferView;
use crate::result::IonFailure;
use crate::{Int, IonResult, IonType};
use std::num::IntErrorKind;

use num_bigint::BigInt;
use num_traits::Num;
use smallvec::SmallVec;
use std::num::IntErrorKind;

use crate::lazy::text::as_utf8::AsUtf8;
use crate::lazy::text::buffer::TextBufferView;
use crate::lazy::text::parse_result::InvalidInputError;
use crate::result::IonFailure;
use crate::{Int, IonError, IonResult, IonType};

/// A partially parsed Ion value.
#[derive(Copy, Clone, Debug, PartialEq)]
Expand All @@ -35,6 +38,7 @@ pub(crate) enum MatchedValue {
Null(IonType),
Bool(bool),
Int(MatchedInt),
Float(MatchedFloat),
// TODO: ...the other types
}

Expand Down Expand Up @@ -107,3 +111,46 @@ impl MatchedInt {
Ok(int)
}
}

/// A partially parsed Ion float.
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) enum MatchedFloat {
/// `+inf`
PositiveInfinity,
/// `-inf`
NegativeInfinity,
/// `nan`
NotANumber,
/// Any numeric float value
Numeric,
}

impl MatchedFloat {
// Floats that take more than 32 bytes of text to represent will heap allocate a larger buffer.
const STACK_ALLOC_BUFFER_CAPACITY: usize = 32;

pub fn read(&self, matched_input: TextBufferView) -> IonResult<f64> {
use std::str::FromStr;

match self {
MatchedFloat::PositiveInfinity => return Ok(f64::INFINITY),
MatchedFloat::NegativeInfinity => return Ok(f64::NEG_INFINITY),
MatchedFloat::NotANumber => return Ok(f64::NAN),
MatchedFloat::Numeric => {} // fall through
};

let mut sanitized: SmallVec<[u8; Self::STACK_ALLOC_BUFFER_CAPACITY]> =
SmallVec::with_capacity(Self::STACK_ALLOC_BUFFER_CAPACITY);
sanitized.extend(matched_input.bytes().iter().copied().filter(|b| *b != b'_'));

let text = sanitized.as_utf8(matched_input.offset())?;
let float = f64::from_str(text).map_err(|e| {
let error: IonError = InvalidInputError::new(matched_input)
.with_description(format!("encountered an unexpected error ({:?})", e))
.with_label("parsing a float")
.into();
error
})?;
Ok(float)
}
}
68 changes: 45 additions & 23 deletions src/lazy/text/parse_result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,25 @@ impl<'data> From<InvalidInputError<'data>> for IonParseError<'data> {
}
}

// We cannot provide an analogous impl for `Incomplete` because it is missing necessary data.
impl<'data> From<InvalidInputError<'data>> for IonError {
fn from(invalid_input_error: InvalidInputError) -> Self {
let mut message = String::from(
invalid_input_error
.description()
.unwrap_or("invalid Ion syntax encountered"),
);
if let Some(label) = invalid_input_error.label {
message.push_str(" while ");
message.push_str(label.as_ref());
}
let position = Position::with_offset(invalid_input_error.input.offset())
.with_length(invalid_input_error.input.len());
let decoding_error = DecodingError::new(message).with_position(position);
IonError::Decoding(decoding_error)
}
}

impl<'data> From<nom::Err<IonParseError<'data>>> for IonParseError<'data> {
fn from(value: Err<IonParseError<'data>>) -> Self {
match value {
Expand Down Expand Up @@ -200,6 +219,31 @@ pub(crate) trait AddContext<'data, T> {
) -> IonResult<(TextBufferView<'data>, T)>;
}

impl<'data, T> AddContext<'data, T> for nom::Err<IonParseError<'data>> {
fn with_context(
self,
label: impl Into<Cow<'static, str>>,
input: TextBufferView<'data>,
) -> IonResult<(TextBufferView<'data>, T)> {
let ipe = IonParseError::from(self);
ipe.with_context(label, input)
}
}

// Turns an IonParseError into an IonResult
impl<'data, T> AddContext<'data, T> for IonParseError<'data> {
fn with_context(
self,
label: impl Into<Cow<'static, str>>,
input: TextBufferView<'data>,
) -> IonResult<(TextBufferView<'data>, T)> {
match self {
IonParseError::Incomplete => IonResult::incomplete(label, input.offset()),
IonParseError::Invalid(invalid_input_error) => Err(IonError::from(invalid_input_error)),
}
}
}

impl<'data, T> AddContext<'data, T> for IonParseResult<'data, T> {
fn with_context(
self,
Expand All @@ -209,29 +253,7 @@ impl<'data, T> AddContext<'data, T> for IonParseResult<'data, T> {
match self {
// No change needed in the ok case
Ok(matched) => Ok(matched),
// If the error was an incomplete
Err(e) => {
// Nom error to IonParseError
match IonParseError::from(e) {
IonParseError::Incomplete => IonResult::incomplete(label, input.offset()),
IonParseError::Invalid(invalid_input_error) => {
dbg!(&invalid_input_error.backtrace);
let mut message = String::from(
invalid_input_error
.description()
.unwrap_or("invalid text Ion syntax"),
);
if let Some(label) = invalid_input_error.label {
message.push_str(" while ");
message.push_str(label.as_ref());
}
let position = Position::with_offset(invalid_input_error.input.offset())
.with_length(invalid_input_error.input.len());
let decoding_error = DecodingError::new(message).with_position(position);
Err(IonError::Decoding(decoding_error))
}
}
}
Err(e) => e.with_context(label, input),
}
}
}
Expand Down
Loading

0 comments on commit 5db1ff0

Please sign in to comment.