amazon-ion · zslayton · Aug 22, 2023 · Jul 16, 2023 · Jul 25, 2023 · Jul 25, 2023
diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs
@@ -1,7 +1,7 @@
 use crate::lazy::encoding::TextEncoding;
 use crate::lazy::raw_stream_item::RawStreamItem;
 use crate::lazy::text::encoded_value::EncodedTextValue;
-use crate::lazy::text::matched::{MatchedInt, MatchedValue};
+use crate::lazy::text::matched::{MatchedFloat, MatchedInt, MatchedValue};
 use crate::lazy::text::parse_result::IonParseError;
 use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult};
 use crate::lazy::text::value::LazyRawTextValue;
@@ -12,7 +12,7 @@ use nom::character::streaming::{char, digit1, one_of};
 use nom::combinator::{map, opt, peek, recognize, success, value};
 use nom::error::{ErrorKind, ParseError};
 use nom::multi::many0_count;
-use nom::sequence::{delimited, pair, preceded, separated_pair, terminated};
+use nom::sequence::{delimited, pair, preceded, separated_pair, terminated, tuple};
 use nom::{CompareResult, IResult, InputLength, InputTake, Needed, Parser};
 use std::fmt::{Debug, Formatter};
 use std::iter::{Copied, Enumerate};
@@ -192,6 +192,12 @@ impl<'data> TextBufferView<'data> {
                     EncodedTextValue::new(MatchedValue::Int(matched_int), self.offset(), length)
                 },
             ),
+            map(
+                match_and_length(Self::match_float),
+                |(matched_float, length)| {
+                    EncodedTextValue::new(MatchedValue::Float(matched_float), self.offset(), length)
+                },
+            ),
             // TODO: The other Ion types
         ))
         .map(|encoded_value| LazyRawTextValue {
@@ -372,6 +378,111 @@ impl<'data> TextBufferView<'data> {
     fn take_base_16_digits1(self) -> IonMatchResult<'data> {
         take_while1(|b: u8| b.is_ascii_hexdigit())(self)
     }
+
+    /// Matches an Ion float of any syntax
+    fn match_float(self) -> IonParseResult<'data, MatchedFloat> {
+        alt((
+            Self::match_float_special_value,
+            Self::match_float_numeric_value,
+        ))(self)
+    }
+
+    /// Matches special IEEE-754 values, including +/- infinity and NaN.
+    fn match_float_special_value(self) -> IonParseResult<'data, MatchedFloat> {
+        alt((
+            value(MatchedFloat::NotANumber, tag("nan")),
+            value(MatchedFloat::PositiveInfinity, tag("+inf")),
+            value(MatchedFloat::NegativeInfinity, tag("-inf")),
+        ))(self)
+    }
+
+    /// Matches numeric IEEE-754 floating point values.
+    fn match_float_numeric_value(self) -> IonParseResult<'data, MatchedFloat> {
+        terminated(
+            recognize(pair(
+                Self::match_number_with_optional_dot_and_digits,
+                Self::match_float_exponent_marker_and_digits,
+            )),
+            Self::peek_stop_character,
+        )
+        .map(|_matched| MatchedFloat::Numeric)
+        .parse(self)
+    }
+
+    /// Matches a number that may or may not have a decimal place and trailing fractional digits.
+    /// If a decimal place is present, there must also be trailing digits.
+    /// For example:
+    ///   1000
+    ///   1000.559
+    ///   -25.2
+    fn match_number_with_optional_dot_and_digits(self) -> IonMatchResult<'data> {
+        recognize(tuple((
+            opt(tag("-")),
+            Self::match_base_10_digits_before_dot,
+            opt(Self::match_dot_followed_by_base_10_digits),
+        )))(self)
+    }
+
+    /// In a float or decimal, matches the digits that are permitted before the decimal point.
+    /// This includes either a single zero, or a non-zero followed by any sequence of digits.
+    fn match_digits_before_dot(self) -> IonMatchResult<'data> {
+        alt((
+            tag("0"),
+            recognize(pair(Self::match_leading_digit, Self::match_trailing_digits)),
+        ))(self)
+    }
+
+    /// Matches a single non-zero base 10 digit.
+    fn match_leading_digit(self) -> IonMatchResult<'data> {
+        recognize(one_of("123456789"))(self)
+    }
+
+    /// Matches any number of base 10 digits, allowing underscores at any position except the end.
+    fn match_trailing_digits(self) -> IonMatchResult<'data> {
+        recognize(many0_count(preceded(opt(char('_')), digit1)))(self)
+    }
+
+    /// Recognizes a decimal point followed by any number of base-10 digits.
+    fn match_dot_followed_by_base_10_digits(self) -> IonMatchResult<'data> {
+        recognize(preceded(tag("."), opt(Self::match_digits_after_dot)))(self)
+    }
+
+    /// Like `match_digits_before_dot`, but allows leading zeros.
+    fn match_digits_after_dot(self) -> IonMatchResult<'data> {
+        recognize(terminated(
+            // Zero or more digits-followed-by-underscores
+            many0_count(pair(digit1, char('_'))),
+            // One or more digits
+            digit1,
+        ))(self)
+    }
+
+    /// Matches an `e` or `E` followed by an optional sign (`+` or `-`) followed by one or more
+    /// base 10 digits.
+    fn match_float_exponent_marker_and_digits(self) -> IonMatchResult<'data> {
+        preceded(one_of("eE"), Self::match_exponent_sign_and_digits)(self)
+    }
+
+    /// Recognizes the exponent portion of a decimal (everything after the 'd') or float
+    /// (everything after the 'e'). This includes:
+    /// * an optional '+' OR '-'
+    /// * any number of decimal digits, which may:
+    ///    * have underscores in between them: `1_000_000`
+    ///    * have one or more leading zeros: `0005`
+    fn match_exponent_sign_and_digits(self) -> IonMatchResult<'data> {
+        recognize(pair(
+            // Optional leading sign; if there's no sign, it's not negative.
+            opt(Self::match_any_sign),
+            Self::match_digits_after_dot,
+        ))(self)
+    }
+
+    /// Matches `-` OR `+`.
+    ///
+    /// This is used for matching exponent signs; most places in Ion do not allow `+`.
+    pub fn match_any_sign(self) -> IonMatchResult<'data> {
+        alt((tag("+"), tag("-")))(self)
+    }
 }
 
 // === nom trait implementations ===
@@ -602,7 +713,12 @@ mod tests {
         {
             let result = self.try_match(parser);
             // We expect this to fail for one reason or another
-            result.unwrap_err();
+            assert!(
+                result.is_err(),
+                "Expected a parse failure for input: {:?}\nResult: {:?}",
+                self.input,
+                result
+            );
         }
     }
 
@@ -729,4 +845,38 @@ mod tests {
             mismatch_int(input);
         }
     }
+
+    #[test]
+    fn test_match_float() {
+        fn match_float(input: &str) {
+            MatchTest::new(input).expect_match(match_length(TextBufferView::match_float));
+        }
+        fn mismatch_float(input: &str) {
+            MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_float));
+        }
+
+        let good_inputs = &[
+            "0.0e0", "0E0", "0e0", "305e1", "305e+1", "305e-1", "305e100", "305e-100", "305e+100",
+            "305.0e1", "0.279e3", "279e0", "279.5e0", "279.5E0",
+        ];
+        for input in good_inputs {
+            match_float(input);
+            let negative = format!("-{input}");
+            match_float(&negative);
+        }
+
+        let bad_inputs = &[
+            "305",      // Integer
+            "305e",     // Has exponent delimiter but no exponent
+            ".305e",    // No digits before the decimal point
+            "305e0.5",  // Fractional exponent
+            "305e-0.5", // Negative fractional exponent
+            "0305e1",   // Leading zero
+            "+305e1",   // Leading plus sign
+            "--305e1",  // Multiple negative signs
+        ];
+        for input in bad_inputs {
+            mismatch_float(input);
+        }
+    }
 }
diff --git a/src/lazy/text/encoded_value.rs b/src/lazy/text/encoded_value.rs
@@ -114,6 +114,7 @@ impl EncodedTextValue {
             MatchedValue::Null(ion_type) => ion_type,
             MatchedValue::Bool(_) => IonType::Bool,
             MatchedValue::Int(_) => IonType::Int,
+            MatchedValue::Float(_) => IonType::Float,
         }
     }
 

diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs
@@ -19,14 +19,17 @@
 //! use the previously recorded information to minimize the amount of information that needs to be
 //! re-discovered.
 
-use crate::lazy::text::as_utf8::AsUtf8;
-use crate::lazy::text::buffer::TextBufferView;
-use crate::result::IonFailure;
-use crate::{Int, IonResult, IonType};
+use std::num::IntErrorKind;
+
 use num_bigint::BigInt;
 use num_traits::Num;
 use smallvec::SmallVec;
-use std::num::IntErrorKind;
+
+use crate::lazy::text::as_utf8::AsUtf8;
+use crate::lazy::text::buffer::TextBufferView;
+use crate::lazy::text::parse_result::InvalidInputError;
+use crate::result::IonFailure;
+use crate::{Int, IonError, IonResult, IonType};
 
 /// A partially parsed Ion value.
 #[derive(Copy, Clone, Debug, PartialEq)]
@@ -35,6 +38,7 @@ pub(crate) enum MatchedValue {
     Null(IonType),
     Bool(bool),
     Int(MatchedInt),
+    Float(MatchedFloat),
     // TODO: ...the other types
 }
 
@@ -107,3 +111,46 @@ impl MatchedInt {
         Ok(int)
     }
 }
+
+/// A partially parsed Ion float.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub(crate) enum MatchedFloat {
+    /// `+inf`
+    PositiveInfinity,
+    /// `-inf`
+    NegativeInfinity,
+    /// `nan`
+    NotANumber,
+    /// Any numeric float value
+    Numeric,
+}
+
+impl MatchedFloat {
+    // Floats that take more than 32 bytes of text to represent will heap allocate a larger buffer.
+    const STACK_ALLOC_BUFFER_CAPACITY: usize = 32;
+
+    pub fn read(&self, matched_input: TextBufferView) -> IonResult<f64> {
+        use std::str::FromStr;
+
+        match self {
+            MatchedFloat::PositiveInfinity => return Ok(f64::INFINITY),
+            MatchedFloat::NegativeInfinity => return Ok(f64::NEG_INFINITY),
+            MatchedFloat::NotANumber => return Ok(f64::NAN),
+            MatchedFloat::Numeric => {} // fall through
+        };
+
+        let mut sanitized: SmallVec<[u8; Self::STACK_ALLOC_BUFFER_CAPACITY]> =
+            SmallVec::with_capacity(Self::STACK_ALLOC_BUFFER_CAPACITY);
+        sanitized.extend(matched_input.bytes().iter().copied().filter(|b| *b != b'_'));
+
+        let text = sanitized.as_utf8(matched_input.offset())?;
+        let float = f64::from_str(text).map_err(|e| {
+            let error: IonError = InvalidInputError::new(matched_input)
+                .with_description(format!("encountered an unexpected error ({:?})", e))
+                .with_label("parsing a float")
+                .into();
+            error
+        })?;
+        Ok(float)
+    }
+}
diff --git a/src/lazy/text/parse_result.rs b/src/lazy/text/parse_result.rs
@@ -143,6 +143,25 @@ impl<'data> From<InvalidInputError<'data>> for IonParseError<'data> {
     }
 }
 
+// We cannot provide an analogous impl for `Incomplete` because it is missing necessary data.
+impl<'data> From<InvalidInputError<'data>> for IonError {
+    fn from(invalid_input_error: InvalidInputError) -> Self {
+        let mut message = String::from(
+            invalid_input_error
+                .description()
+                .unwrap_or("invalid Ion syntax encountered"),
+        );
+        if let Some(label) = invalid_input_error.label {
+            message.push_str(" while ");
+            message.push_str(label.as_ref());
+        }
+        let position = Position::with_offset(invalid_input_error.input.offset())
+            .with_length(invalid_input_error.input.len());
+        let decoding_error = DecodingError::new(message).with_position(position);
+        IonError::Decoding(decoding_error)
+    }
+}
+
 impl<'data> From<nom::Err<IonParseError<'data>>> for IonParseError<'data> {
     fn from(value: Err<IonParseError<'data>>) -> Self {
         match value {
@@ -200,6 +219,31 @@ pub(crate) trait AddContext<'data, T> {
     ) -> IonResult<(TextBufferView<'data>, T)>;
 }
 
+impl<'data, T> AddContext<'data, T> for nom::Err<IonParseError<'data>> {
+    fn with_context(
+        self,
+        label: impl Into<Cow<'static, str>>,
+        input: TextBufferView<'data>,
+    ) -> IonResult<(TextBufferView<'data>, T)> {
+        let ipe = IonParseError::from(self);
+        ipe.with_context(label, input)
+    }
+}
+
+// Turns an IonParseError into an IonResult
+impl<'data, T> AddContext<'data, T> for IonParseError<'data> {
+    fn with_context(
+        self,
+        label: impl Into<Cow<'static, str>>,
+        input: TextBufferView<'data>,
+    ) -> IonResult<(TextBufferView<'data>, T)> {
+        match self {
+            IonParseError::Incomplete => IonResult::incomplete(label, input.offset()),
+            IonParseError::Invalid(invalid_input_error) => Err(IonError::from(invalid_input_error)),
+        }
+    }
+}
+
 impl<'data, T> AddContext<'data, T> for IonParseResult<'data, T> {
     fn with_context(
         self,
@@ -209,29 +253,7 @@ impl<'data, T> AddContext<'data, T> for IonParseResult<'data, T> {
         match self {
             // No change needed in the ok case
             Ok(matched) => Ok(matched),
-            // If the error was an incomplete
-            Err(e) => {
-                // Nom error to IonParseError
-                match IonParseError::from(e) {
-                    IonParseError::Incomplete => IonResult::incomplete(label, input.offset()),
-                    IonParseError::Invalid(invalid_input_error) => {
-                        dbg!(&invalid_input_error.backtrace);
-                        let mut message = String::from(
-                            invalid_input_error
-                                .description()
-                                .unwrap_or("invalid text Ion syntax"),
-                        );
-                        if let Some(label) = invalid_input_error.label {
-                            message.push_str(" while ");
-                            message.push_str(label.as_ref());
-                        }
-                        let position = Position::with_offset(invalid_input_error.input.offset())
-                            .with_length(invalid_input_error.input.len());
-                        let decoding_error = DecodingError::new(message).with_position(position);
-                        Err(IonError::Decoding(decoding_error))
-                    }
-                }
-            }
+            Err(e) => e.with_context(label, input),
         }
     }
 }