From e436f96733337379efec36fb2e6b4cfe733ba5a0 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Sun, 11 Aug 2024 15:10:34 -0400 Subject: [PATCH] Adds the `(annotate ...)` form and bin read support for `flex_uint` parameters (#801) --- src/ion_data/ion_eq.rs | 2 +- src/ion_hash/type_qualifier.rs | 2 +- src/lazy/binary/encoded_value.rs | 7 + src/lazy/binary/immutable_buffer.rs | 3 + src/lazy/binary/raw/v1_1/e_expression.rs | 30 ++- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 36 ++++ src/lazy/binary/raw/v1_1/value.rs | 51 ++++++ .../encoder/binary/v1_1/container_writers.rs | 9 +- src/lazy/encoder/binary/v1_1/flex_uint.rs | 3 +- src/lazy/encoder/binary/v1_1/value_writer.rs | 7 +- src/lazy/encoder/value_writer.rs | 7 +- src/lazy/encoder/writer.rs | 5 +- src/lazy/expanded/compiler.rs | 70 ++++++- src/lazy/expanded/e_expression.rs | 7 +- src/lazy/expanded/macro_evaluator.rs | 171 +++++++++++++++++- src/lazy/expanded/macro_table.rs | 47 ++++- src/lazy/expanded/mod.rs | 32 ++-- src/lazy/expanded/template.rs | 4 +- src/lazy/streaming_raw_reader.rs | 1 - src/lazy/system_reader.rs | 4 +- src/lazy/value_ref.rs | 6 +- src/lib.rs | 3 +- src/symbol_table.rs | 7 +- src/text/text_formatter.rs | 1 + src/types/decimal/mod.rs | 1 + src/types/integer.rs | 68 +++---- 26 files changed, 472 insertions(+), 112 deletions(-) diff --git a/src/ion_data/ion_eq.rs b/src/ion_data/ion_eq.rs index d47d3fdb..444186f3 100644 --- a/src/ion_data/ion_eq.rs +++ b/src/ion_data/ion_eq.rs @@ -24,7 +24,7 @@ use std::ops::Deref; /// * Decimal `0.0` and `-0.0` are mathematically equivalent but not Ion equivalent. /// * Decimal `0.0` and `0.00` are mathematically equivalent but not Ion equivalent. /// * Timestamps representing the same point in time at different precisions or at different -/// timezone offsets are not Ion equivalent. +/// timezone offsets are not Ion equivalent. pub trait IonEq { fn ion_eq(&self, other: &Self) -> bool; } diff --git a/src/ion_hash/type_qualifier.rs b/src/ion_hash/type_qualifier.rs index f00a06d4..bada616d 100644 --- a/src/ion_hash/type_qualifier.rs +++ b/src/ion_hash/type_qualifier.rs @@ -18,7 +18,7 @@ pub(crate) struct TypeQualifier(u8); /// From the spec: /// /// > TQ: is a type qualifier octet consisting of a four-bit type code T -/// followed by a four-bit qualifier Q +/// > followed by a four-bit qualifier Q /// /// To compute a TQ from a `T` and a `Q`, all we need to is a bitwise shift! const fn combine(ion_type_code: IonTypeCode, q: u8) -> TypeQualifier { diff --git a/src/lazy/binary/encoded_value.rs b/src/lazy/binary/encoded_value.rs index 1e7410ba..f2f5850b 100644 --- a/src/lazy/binary/encoded_value.rs +++ b/src/lazy/binary/encoded_value.rs @@ -1,5 +1,6 @@ use crate::lazy::binary::raw::type_descriptor::Header; use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; +use crate::lazy::expanded::template::ParameterEncoding; use crate::IonType; use std::ops::Range; @@ -40,6 +41,7 @@ impl EncodedHeader for Header { /// without re-parsing its header information each time. #[derive(Clone, Copy, Debug, PartialEq)] pub(crate) struct EncodedValue { + pub(crate) encoding: ParameterEncoding, // If the compiler decides that a value is too large to be moved/copied with inline code, // it will relocate the value using memcpy instead. This can be quite slow by comparison. // @@ -88,6 +90,8 @@ pub(crate) struct EncodedValue { pub annotations_encoding: AnnotationsEncoding, // The offset of the type descriptor byte within the overall input stream. pub header_offset: usize, + // If this value was written with a tagless encoding, this will be 0. Otherwise, it's 1. + pub opcode_length: u8, // The number of bytes used to encode the optional length VarUInt following the header byte. pub length_length: u8, // The number of bytes used to encode the value itself, not including the header byte @@ -258,12 +262,14 @@ mod tests { use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::raw::type_descriptor::Header; use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; + use crate::lazy::expanded::template::ParameterEncoding; use crate::{IonResult, IonType}; #[test] fn accessors() -> IonResult<()> { // 3-byte String with 1-byte annotation let value = EncodedValue { + encoding: ParameterEncoding::Tagged, header: Header { ion_type: IonType::String, ion_type_code: IonTypeCode::String, @@ -273,6 +279,7 @@ mod tests { annotations_sequence_length: 1, annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset: 200, + opcode_length: 1, length_length: 0, value_body_length: 3, total_length: 7, diff --git a/src/lazy/binary/immutable_buffer.rs b/src/lazy/binary/immutable_buffer.rs index 0af21a15..05621174 100644 --- a/src/lazy/binary/immutable_buffer.rs +++ b/src/lazy/binary/immutable_buffer.rs @@ -16,6 +16,7 @@ use crate::lazy::decoder::LazyRawFieldExpr; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; use crate::lazy::encoding::BinaryEncoding_1_0; +use crate::lazy::expanded::template::ParameterEncoding; use crate::result::IonFailure; use crate::{Int, IonError, IonResult, IonType}; @@ -703,12 +704,14 @@ impl<'a> ImmutableBuffer<'a> { } let encoded_value = EncodedValue { + encoding: ParameterEncoding::Tagged, header, // If applicable, these are populated by the caller: `read_annotated_value()` annotations_header_length: 0, annotations_sequence_length: 0, annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset, + opcode_length: 1, length_length, value_body_length: value_length, total_length, diff --git a/src/lazy/binary/raw/v1_1/e_expression.rs b/src/lazy/binary/raw/v1_1/e_expression.rs index 9c63f201..183ca389 100644 --- a/src/lazy/binary/raw/v1_1/e_expression.rs +++ b/src/lazy/binary/raw/v1_1/e_expression.rs @@ -272,7 +272,7 @@ impl<'top> Iterator for BinaryEExpArgsInputIter<'top> { // argument encoding bitmap. ArgGrouping::ValueExprLiteral }; - // TODO: Tagless encodings + // TODO: More tagless encodings let (arg_expr, remaining_input) = match arg_grouping { // If the encoding is `empty`, there's nothing to do. Make an empty slice at the current // offset and build an empty BinaryEExpArgGroup with it. @@ -282,14 +282,30 @@ impl<'top> Iterator for BinaryEExpArgsInputIter<'top> { (EExpArg::new(parameter, expr), self.remaining_args_buffer) } // If it's a tagged value expression, parse it as usual. - ArgGrouping::ValueExprLiteral => { - let (expr, remaining) = try_or_some_err! { - self + ArgGrouping::ValueExprLiteral => match parameter.encoding() { + ParameterEncoding::Tagged => { + let (expr, remaining) = try_or_some_err! { + self .remaining_args_buffer .expect_eexp_arg_expr("reading tagged e-expr arg") - }; - (EExpArg::new(parameter, expr), remaining) - } + }; + (EExpArg::new(parameter, expr), remaining) + } + ParameterEncoding::FlexUInt => { + let (flex_uint_lazy_value, remaining) = try_or_some_err! { + self.remaining_args_buffer.read_flex_uint_as_lazy_value() + }; + let value_ref = &*self + .remaining_args_buffer + .context() + .allocator() + .alloc_with(|| flex_uint_lazy_value); + ( + EExpArg::new(parameter, EExpArgExpr::ValueLiteral(value_ref)), + remaining, + ) + } + }, // If it's an argument group... ArgGrouping::ArgGroup => { //...then it starts with a FlexUInt that indicates whether the group is length-prefixed diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 6a66e72b..47558914 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -20,6 +20,7 @@ use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_sym::FlexSym; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; use crate::lazy::expanded::macro_table::MacroRef; +use crate::lazy::expanded::template::ParameterEncoding; use crate::lazy::expanded::EncodingContextRef; use crate::lazy::text::raw::v1_1::arg_group::EExpArgExpr; use crate::result::IonFailure; @@ -223,6 +224,38 @@ impl<'a> ImmutableBuffer<'a> { Ok((flex_uint, remaining)) } + pub fn read_flex_uint_as_lazy_value(self) -> ParseResult<'a, LazyRawBinaryValue_1_1<'a>> { + let Some(first_byte) = self.peek_next_byte() else { + return IonResult::incomplete("a flex_uint", self.offset()); + }; + let size_in_bytes = match first_byte { + // If the first byte is zero, this flex_uint is encoded using 9+ bytes. That's pretty + // uncommon, so we'll just use the existing logic in the `read` method and discard the + // value. If this shows up in profiles, it can be optimized further. + 0 => FlexUInt::read(self.bytes(), self.offset())?.size_in_bytes(), + _ => first_byte.trailing_zeros() as usize + 1, + }; + + if self.len() < size_in_bytes { + return IonResult::incomplete("reading a flex_uint value", self.offset()); + } + // XXX: This *doesn't* slice `self` because FlexUInt::read() is faster if the input + // is at least the size of a u64. + let matched_input = self; + let remaining_input = self.slice_to_end(size_in_bytes); + let value = LazyRawBinaryValue_1_1::for_flex_uint(matched_input); + Ok((value, remaining_input)) + } + + pub fn slice_to_end(&self, offset: usize) -> ImmutableBuffer<'a> { + ImmutableBuffer { + data: &self.data[offset..], + // stream offset + local offset + offset: self.offset + offset, + context: self.context, + } + } + #[inline] pub fn read_flex_sym(self) -> ParseResult<'a, FlexSym<'a>> { let flex_sym = FlexSym::read(self.bytes(), self.offset())?; @@ -448,12 +481,15 @@ impl<'a> ImmutableBuffer<'a> { + value_length; let encoded_value = EncodedValue { + encoding: ParameterEncoding::Tagged, header, // If applicable, these are populated by the caller: `read_annotated_value()` annotations_header_length: 0, annotations_sequence_length: 0, annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset, + // This is a tagged value, so its opcode length is always 1 + opcode_length: 1, length_length, value_body_length: value_length, total_length, diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 383ebc35..70af7926 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -3,13 +3,16 @@ use std::fmt::Debug; use std::ops::Range; +use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; use crate::lazy::binary::raw::v1_1::r#struct::LazyRawBinaryStruct_1_1; use crate::lazy::binary::raw::v1_1::sequence::{LazyRawBinaryList_1_1, LazyRawBinarySExp_1_1}; use crate::lazy::bytes_ref::BytesRef; use crate::lazy::decoder::{HasRange, HasSpan, RawVersionMarker}; +use crate::lazy::expanded::template::ParameterEncoding; use crate::lazy::expanded::EncodingContextRef; use crate::lazy::span::Span; use crate::lazy::str_ref::StrRef; +use crate::v1_1::FlexUInt; use crate::{ lazy::{ binary::{ @@ -132,6 +135,12 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 } fn read(&self) -> IonResult> { + if self.encoded_value.encoding == ParameterEncoding::FlexUInt { + let flex_uint = FlexUInt::read(self.input.bytes(), self.input.offset())?; + let int: Int = flex_uint.value().into(); + return Ok(RawValueRef::Int(int)); + } + if self.is_null() { let ion_type = if self.encoded_value.header.ion_type_code == OpcodeType::TypedNull { let body = self.value_body(); @@ -176,6 +185,11 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 &self, context: EncodingContextRef<'top>, ) -> IonResult> { + if self.encoded_value.encoding == ParameterEncoding::FlexUInt { + let flex_uint = FlexUInt::read(self.input.bytes(), self.input.offset())?; + let int: Int = flex_uint.value().into(); + return Ok(ValueRef::Int(int)); + } if self.is_null() { return Ok(ValueRef::Null(self.ion_type())); } @@ -194,6 +208,12 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 value: &'a LazyRawBinaryValue_1_1<'a>, context: EncodingContextRef<'a>, ) -> IonResult> { + if value.encoded_value.encoding == ParameterEncoding::FlexUInt { + let flex_uint = FlexUInt::read(value.input.bytes(), value.input.offset())?; + let int: Int = flex_uint.value().into(); + return Ok(ValueRef::Int(int)); + } + if value.is_null() { return Ok(ValueRef::Null(value.ion_type())); } @@ -246,6 +266,37 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 } impl<'top> LazyRawBinaryValue_1_1<'top> { + /// Constructs a lazy raw binary value from an input buffer slice that has been found to contain + /// a complete `FlexUInt`. + pub(crate) fn for_flex_uint(input: ImmutableBuffer<'top>) -> Self { + let encoded_value = EncodedValue { + encoding: ParameterEncoding::FlexUInt, + header: Header { + // It is an int, that's true. + ion_type: IonType::Int, + // Nonsense values for now + ion_type_code: OpcodeType::Nop, + low_nibble: 0, + }, + + // FlexUInts cannot have any annotations + annotations_header_length: 0, + annotations_sequence_length: 0, + annotations_encoding: AnnotationsEncoding::SymbolAddress, + + header_offset: input.offset(), + opcode_length: 0, + length_length: 0, + value_body_length: input.len(), + total_length: input.len(), + }; + + LazyRawBinaryValue_1_1 { + encoded_value, + input, + } + } + /// Indicates the Ion data type of this value. Calling this method does not require additional /// parsing of the input stream. pub fn ion_type(&'top self) -> IonType { diff --git a/src/lazy/encoder/binary/v1_1/container_writers.rs b/src/lazy/encoder/binary/v1_1/container_writers.rs index 178efe65..708e1dd6 100644 --- a/src/lazy/encoder/binary/v1_1/container_writers.rs +++ b/src/lazy/encoder/binary/v1_1/container_writers.rs @@ -7,7 +7,7 @@ use crate::lazy::encoder::value_writer::internal::{FieldEncoder, MakeValueWriter use crate::lazy::encoder::value_writer::{EExpWriter, SequenceWriter, StructWriter}; use crate::lazy::encoder::write_as_ion::WriteAsIon; use crate::raw_symbol_ref::AsRawSymbolRef; -use crate::IonResult; +use crate::{IonResult, UInt}; /// A helper type that holds fields and logic that is common to [`BinaryListWriter_1_1`], /// [`BinarySExpWriter_1_1`], and [`BinaryStructWriter_1_1`]. @@ -393,4 +393,9 @@ impl<'value, 'top> SequenceWriter for BinaryEExpWriter_1_1<'value, 'top> { } } -impl<'value, 'top> EExpWriter for BinaryEExpWriter_1_1<'value, 'top> {} +impl<'value, 'top> EExpWriter for BinaryEExpWriter_1_1<'value, 'top> { + fn write_flex_uint(&mut self, value: impl Into) -> IonResult<()> { + FlexUInt::write(self.buffer, value)?; + Ok(()) + } +} diff --git a/src/lazy/encoder/binary/v1_1/flex_uint.rs b/src/lazy/encoder/binary/v1_1/flex_uint.rs index fe0756e5..62b26f47 100644 --- a/src/lazy/encoder/binary/v1_1/flex_uint.rs +++ b/src/lazy/encoder/binary/v1_1/flex_uint.rs @@ -65,7 +65,6 @@ impl FlexUInt { false, ); } - let flex_uint = Self::read_small_flex_uint(input); Ok(flex_uint) } @@ -115,7 +114,7 @@ impl FlexUInt { /// (`support_sign_extension=true`), then the six bits beyond the supported 64 must all be the /// same as the 64th (highest supported) bit. This will allow encodings of up to 70 bits /// to be correctly interpreted as positive, negative, or beyond the bounds of the 64 bit - /// limitation. + /// limitation. pub(crate) fn read_flex_primitive_as_uint( input: &[u8], offset: usize, diff --git a/src/lazy/encoder/binary/v1_1/value_writer.rs b/src/lazy/encoder/binary/v1_1/value_writer.rs index 38a34d5a..527c65cb 100644 --- a/src/lazy/encoder/binary/v1_1/value_writer.rs +++ b/src/lazy/encoder/binary/v1_1/value_writer.rs @@ -627,7 +627,7 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { MacroIdRef::LocalAddress(_address) => { todo!("macros with addresses higher than 64"); } - } + }; Ok(BinaryEExpWriter_1_1::new( self.allocator, self.encoding_buffer, @@ -832,6 +832,9 @@ impl<'value, 'top> BinaryAnnotatedValueWriter_1_1<'value, 'top> { #[cfg(test)] mod tests { + use num_traits::FloatConst; + use rstest::rstest; + use crate::ion_data::IonEq; use crate::lazy::encoder::annotate::{Annotatable, Annotated}; use crate::lazy::encoder::annotation_seq::AnnotationSeq; @@ -845,8 +848,6 @@ mod tests { v1_1, Decimal, Element, Int, IonResult, IonType, Null, RawSymbolRef, SymbolId, Timestamp, Writer, }; - use num_traits::FloatConst; - use rstest::rstest; fn encoding_test( test: impl FnOnce(&mut LazyRawBinaryWriter_1_1<&mut Vec>) -> IonResult<()>, diff --git a/src/lazy/encoder/value_writer.rs b/src/lazy/encoder/value_writer.rs index 3c360b31..3b8e3456 100644 --- a/src/lazy/encoder/value_writer.rs +++ b/src/lazy/encoder/value_writer.rs @@ -3,7 +3,7 @@ use crate::lazy::encoder::value_writer::internal::{FieldEncoder, MakeValueWriter use crate::lazy::encoder::write_as_ion::WriteAsIon; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::raw_symbol_ref::AsRawSymbolRef; -use crate::{Decimal, Int, IonResult, IonType, RawSymbolRef, Timestamp}; +use crate::{Decimal, Int, IonResult, IonType, RawSymbolRef, Timestamp, UInt}; pub mod internal { use crate::lazy::encoder::value_writer::ValueWriter; @@ -32,7 +32,10 @@ pub mod internal { } pub trait EExpWriter: SequenceWriter { - // TODO: methods for writing tagless encodings + // TODO: more methods for writing tagless encodings + fn write_flex_uint(&mut self, _value: impl Into) -> IonResult<()> { + todo!("current only implemented for binary 1.1 to enable unit testing for the reader") + } } pub trait AnnotatableWriter { diff --git a/src/lazy/encoder/writer.rs b/src/lazy/encoder/writer.rs index 5b4bb3bc..6088b5c4 100644 --- a/src/lazy/encoder/writer.rs +++ b/src/lazy/encoder/writer.rs @@ -20,7 +20,7 @@ use crate::result::IonFailure; use crate::write_config::WriteConfig; use crate::{ Decimal, Element, ElementWriter, Int, IonResult, IonType, RawSymbolRef, Symbol, SymbolId, - SymbolTable, Timestamp, Value, + SymbolTable, Timestamp, UInt, Value, }; pub(crate) struct WriteContext { @@ -505,6 +505,9 @@ impl<'value, V: ValueWriter> MakeValueWriter for ApplicationEExpWriter<'value, V impl<'value, V: ValueWriter> EExpWriter for ApplicationEExpWriter<'value, V> { // Default methods + fn write_flex_uint(&mut self, value: impl Into) -> IonResult<()> { + self.raw_eexp_writer.write_flex_uint(value) + } } impl ElementWriter for S { diff --git a/src/lazy/expanded/compiler.rs b/src/lazy/expanded/compiler.rs index 192b3c20..4b53907a 100644 --- a/src/lazy/expanded/compiler.rs +++ b/src/lazy/expanded/compiler.rs @@ -182,6 +182,25 @@ impl TemplateCompiler { } } + /// Confirms that the provided `value` is a symbol with known text. If so, returns `Ok(text)`. + /// If not, returns a decoding error containing the specified label. + fn expect_symbol<'a, Encoding: Decoder>( + label: &str, + source: &mut impl Iterator>>, + ) -> IonResult> { + match source.next() { + None => IonResult::decoding_error(format!("expected {label} but found nothing")), + Some(Ok(value)) if value.ion_type() == IonType::Symbol => Ok(value), + Some(Ok(value)) => IonResult::decoding_error(format!( + "expected {label} but found {}", + value.ion_type() + )), + Some(Err(e)) => IonResult::decoding_error(format!( + "expected {label} but encountered an error: {e:?}" + )), + } + } + /// Tries to pull the next `LazyValue` from the provided iterator. If the iterator is empty, /// returns a `IonError::Decoding` that includes the specified label. fn expect_next<'a, Encoding: Decoder>( @@ -236,6 +255,21 @@ impl TemplateCompiler { } } + fn encoding_for(value: LazyValue) -> IonResult { + match value.annotations().next() { + None => Ok(ParameterEncoding::Tagged), + Some(Ok(text)) => match text.expect_text()? { + "flex_uint" => Ok(ParameterEncoding::FlexUInt), + other => IonResult::decoding_error(format!( + "unsupported encoding '{other}' specified for parameter" + )), + }, + Some(Err(e)) => IonResult::decoding_error(format!( + "error occurred while parsing annotations for parameter: {e:?}" + )), + } + } + pub fn compile_from_sexp<'a, Encoding: Decoder>( context: EncodingContextRef<'a>, macro_def_sexp: LazySExp<'a, Encoding>, @@ -266,6 +300,7 @@ impl TemplateCompiler { while let Some(item) = param_items.next().transpose()? { is_final_parameter |= param_items.peek().is_none(); let name = Self::expect_symbol_text("a parameter name", item)?.to_owned(); + let parameter_encoding = Self::encoding_for(item)?; use ParameterCardinality::*; let mut cardinality = ExactlyOne; @@ -288,7 +323,7 @@ impl TemplateCompiler { // Therefore, rest syntax is not allowed. let compiled_param = Parameter::new( name, - ParameterEncoding::Tagged, + parameter_encoding, cardinality, RestSyntaxPolicy::NotAllowed, ); @@ -308,12 +343,8 @@ impl TemplateCompiler { RestSyntaxPolicy::NotAllowed }; - let compiled_param = Parameter::new( - name, - ParameterEncoding::Tagged, - cardinality, - rest_syntax_policy, - ); + let compiled_param = + Parameter::new(name, parameter_encoding, cardinality, rest_syntax_policy); compiled_params.push(compiled_param); } let signature = MacroSignature::new(compiled_params)?; @@ -780,7 +811,7 @@ mod tests { use crate::lazy::expanded::compiler::TemplateCompiler; use crate::lazy::expanded::template::{ - ExprRange, TemplateBodyExpr, TemplateMacro, TemplateValue, + ExprRange, ParameterEncoding, TemplateBodyExpr, TemplateMacro, TemplateValue, }; use crate::lazy::expanded::{EncodingContext, EncodingContextRef}; use crate::{Int, IntoAnnotations, IonResult, Symbol}; @@ -980,6 +1011,29 @@ mod tests { Ok(()) } + #[test] + fn identity_with_flex_uint() -> IonResult<()> { + let resources = TestResources::new(); + let context = resources.context(); + + let expression = "(macro identity (flex_uint::x) x)"; + + let template = TemplateCompiler::compile_from_text(context.get_ref(), expression)?; + assert_eq!(template.name(), "identity"); + assert_eq!(template.signature().len(), 1); + assert_eq!( + template + .signature() + .parameters() + .first() + .unwrap() + .encoding(), + ParameterEncoding::FlexUInt + ); + expect_variable(&template, 0, 0)?; + Ok(()) + } + #[test] fn literal() -> IonResult<()> { let resources = TestResources::new(); diff --git a/src/lazy/expanded/e_expression.rs b/src/lazy/expanded/e_expression.rs index ca7aa546..d930bf9d 100644 --- a/src/lazy/expanded/e_expression.rs +++ b/src/lazy/expanded/e_expression.rs @@ -8,9 +8,9 @@ use crate::lazy::decoder::{Decoder, RawValueExpr}; use crate::lazy::encoding::TextEncoding_1_1; use crate::lazy::expanded::compiler::{ExpansionAnalysis, ExpansionSingleton}; use crate::lazy::expanded::macro_evaluator::{ - EExpArgGroupIterator, EExpressionArgGroup, MacroExpansion, MacroExpansionKind, MacroExpr, - MacroExprArgsIterator, MakeStringExpansion, RawEExpression, TemplateExpansion, ValueExpr, - ValuesExpansion, + AnnotateExpansion, EExpArgGroupIterator, EExpressionArgGroup, MacroExpansion, + MacroExpansionKind, MacroExpr, MacroExprArgsIterator, MakeStringExpansion, RawEExpression, + TemplateExpansion, ValueExpr, ValuesExpansion, }; use crate::lazy::expanded::macro_table::{MacroKind, MacroRef}; use crate::lazy::expanded::template::TemplateMacroRef; @@ -124,6 +124,7 @@ impl<'top, D: Decoder> EExpression<'top, D> { MacroKind::MakeString => { MacroExpansionKind::MakeString(MakeStringExpansion::new(arguments)) } + MacroKind::Annotate => MacroExpansionKind::Annotate(AnnotateExpansion::new(arguments)), MacroKind::Template(template_body) => { let template_ref = TemplateMacroRef::new(invoked_macro, template_body); environment = self.new_evaluation_environment()?; diff --git a/src/lazy/expanded/macro_evaluator.rs b/src/lazy/expanded/macro_evaluator.rs index 1e095cfc..ea8fdfa8 100644 --- a/src/lazy/expanded/macro_evaluator.rs +++ b/src/lazy/expanded/macro_evaluator.rs @@ -16,6 +16,7 @@ use std::fmt::{Debug, Formatter}; use std::ops::Range; use bumpalo::collections::{String as BumpString, Vec as BumpVec}; +use ice_code::ice; use crate::lazy::decoder::{Decoder, HasSpan, LazyRawValueExpr}; use crate::lazy::expanded::e_expression::{ @@ -33,7 +34,7 @@ use crate::lazy::str_ref::StrRef; use crate::lazy::text::raw::v1_1::arg_group::EExpArg; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::result::IonFailure; -use crate::{ExpandedValueSource, HasRange, IonError, IonResult, ValueRef}; +use crate::{ExpandedValueSource, HasRange, IonError, IonResult, LazyValue, SymbolRef, ValueRef}; pub trait EExpArgGroupIterator<'top, D: Decoder>: Copy + Clone + Debug + Iterator>> @@ -316,6 +317,24 @@ pub enum ValueExpr<'top, D: Decoder> { } impl<'top, D: Decoder> ValueExpr<'top, D> { + pub fn expect_value_literal(&self) -> IonResult> { + match self { + ValueExpr::ValueLiteral(value) => Ok(*value), + _ => { + IonResult::decoding_error("expected a value literal, but found a macro invocation") + } + } + } + + pub fn expect_macro_invocation(&self) -> IonResult> { + match self { + ValueExpr::MacroInvocation(invocation) => Ok(*invocation), + _ => { + IonResult::decoding_error("expected a macro invocation, but found a value literal") + } + } + } + /// If this `ValueExpr` represents an entity encoded in te data stream, returns `Some(range)`. /// If it represents a template value or a constructed value, returns `None`. pub fn range(&self) -> Option> { @@ -348,6 +367,7 @@ pub enum MacroExpansionKind<'top, D: Decoder> { Void, Values(ValuesExpansion<'top, D>), MakeString(MakeStringExpansion<'top, D>), + Annotate(AnnotateExpansion<'top, D>), Template(TemplateExpansion<'top>), } @@ -374,7 +394,9 @@ impl<'top, D: Decoder> MacroExpansion<'top, D> { match self.next_step()? { // If the expansion produces anything other than a final value, there's a bug. MacroExpansionStep::FinalStep(Some(ValueExpr::ValueLiteral(value))) => Ok(value), - _ => unreachable!("e-expression-backed lazy values must yield a single value literal"), + _ => ice!(IonResult::decoding_error(format!( + "expansion of {self:?} was required to produce exactly one value", + ))), } } @@ -417,6 +439,7 @@ impl<'top, D: Decoder> MacroExpansion<'top, D> { Template(template_expansion) => template_expansion.next(context, environment), Values(values_expansion) => values_expansion.next(context, environment), MakeString(make_string_expansion) => make_string_expansion.next(context, environment), + Annotate(annotate_expansion) => annotate_expansion.next(context, environment), // `void` is trivial and requires no delegation Void => Ok(MacroExpansionStep::FinalStep(None)), } @@ -429,6 +452,7 @@ impl<'top, D: Decoder> Debug for MacroExpansion<'top, D> { MacroExpansionKind::Void => "void", MacroExpansionKind::Values(_) => "values", MacroExpansionKind::MakeString(_) => "make_string", + MacroExpansionKind::Annotate(_) => "annotate", MacroExpansionKind::Template(t) => { return if let Some(name) = t.template.name() { write!(f, "", name) @@ -959,10 +983,10 @@ impl<'top, D: Decoder> MakeStringExpansion<'top, D> { // Convert our BumpString<'bump> into a &'bump str that we can wrap in an `ExpandedValueRef` let constructed_text = buffer.into_bump_str(); - let value_ref: &'top ValueRef<'top, D> = context + let value_ref: &'top ValueRef<'top, _> = context .allocator() .alloc_with(|| ValueRef::String(StrRef::from(constructed_text))); - static EMPTY_ANNOTATIONS: &[&str] = &[]; + static EMPTY_ANNOTATIONS: &[SymbolRef] = &[]; Ok(MacroExpansionStep::FinalStep(Some( ValueExpr::ValueLiteral(LazyExpandedValue::from_constructed( @@ -974,6 +998,93 @@ impl<'top, D: Decoder> MakeStringExpansion<'top, D> { } } +#[derive(Copy, Clone, Debug)] +pub struct AnnotateExpansion<'top, D: Decoder> { + arguments: MacroExprArgsIterator<'top, D>, +} + +impl<'top, D: Decoder> AnnotateExpansion<'top, D> { + pub fn new(arguments: MacroExprArgsIterator<'top, D>) -> Self { + Self { arguments } + } + + pub fn next( + &mut self, + context: EncodingContextRef<'top>, + environment: Environment<'top, D>, + ) -> IonResult> { + let annotations_arg = match self.arguments.next() { + None => { + return IonResult::decoding_error("`annotate` takes two parameters, received none") + } + Some(Err(e)) => return Err(e), + Some(Ok(expr)) => expr, + }; + + // Collect all of the annotations (Strings/Symbols) from the output of the first arg expr + let mut annotations = BumpVec::new_in(context.allocator()); + match annotations_arg { + ValueExpr::ValueLiteral(value_literal) => { + annotations.push(value_literal.read_resolved()?.expect_text()?.into()) + } + ValueExpr::MacroInvocation(invocation) => { + let mut evaluator = MacroEvaluator::new_with_environment(environment); + evaluator.push(invocation.expand(environment)?); + while !evaluator.is_empty() { + match evaluator.next()? { + None => {} + Some(value) => { + let symbol_text = value.read_resolved()?.expect_text()?.into(); + annotations.push(symbol_text); + } + } + } + } + } + + // Get the second argument, which represents the value to annotate + let value_arg = match self.arguments.next() { + None => { + return IonResult::decoding_error("`annotate` takes two parameters, received one") + } + Some(Err(e)) => return Err(e), + Some(Ok(expr)) => expr, + }; + + // If there are more arguments in the iterator, there's an arity mismatch. + if !self.arguments.is_exhausted() { + return IonResult::decoding_error( + "`annotate` takes two parameters, received three or more", + ); + } + + // Evaluate the value argument if needed to get the value we'll be annotating further. + let expanded_value_to_annotate = match value_arg { + ValueExpr::ValueLiteral(value_literal) => value_literal, + ValueExpr::MacroInvocation(invocation) => { + invocation.expand(environment)?.expand_singleton()? + } + }; + + // If the value to annotate already has annotations, append them to the end of our vec. + let value_to_annotate = LazyValue::new(expanded_value_to_annotate); + for annotation in value_to_annotate.annotations() { + annotations.push(annotation?); + } + + // Read the value and store the resulting ValueRef in the bump. + let data = value_to_annotate.read()?; + let value_ref = context.allocator().alloc_with(|| data); + + // Combine our annotations vec and our value_ref to make a 'Constructed' value. + let annotated_value = + LazyExpandedValue::from_constructed(context, annotations.into_bump_slice(), value_ref); + Ok(MacroExpansionStep::FinalStep(Some( + ValueExpr::ValueLiteral(annotated_value), + ))) + } +} + // ===== Implementation of template macro expansion ===== /// The evaluation state of a template expansion. @@ -1036,7 +1147,7 @@ impl<'top> TemplateExpansion<'top> { #[cfg(test)] mod tests { - use crate::{v1_1, ElementReader, IonResult, Reader}; + use crate::{v1_1, ElementReader, Int, IonResult, Reader}; /// Reads `input` and `expected` using an expanding reader and asserts that their output /// is the same. @@ -1091,6 +1202,25 @@ mod tests { ) } + #[test] + fn annotate() -> IonResult<()> { + eval_template_invocation( + r#"(macro foo (x) (annotate (values "bar" "baz" "quux") x))"#, + r#" + (:foo 5) + (:foo quuz) + (:foo {a: 1, b: 2}) + (:foo already::annotated::5) + "#, + r#" + bar::baz::quux::5 + bar::baz::quux::quuz + bar::baz::quux::{a: 1, b: 2} + bar::baz::quux::already::annotated::5 + "#, + ) + } + mod cardinality { mod bang { @@ -1307,6 +1437,37 @@ mod tests { .unwrap() } + #[test] + fn flex_uint_parameters() -> IonResult<()> { + let template_definition = "(macro int_pair (flex_uint::$x flex_uint::$y) (values $x $y)))"; + let tests: &[(&[u8], (u64, u64))] = &[ + // invocation+args, expected arg values + (&[0x04, 0x01, 0x01], (0, 0)), + (&[0x04, 0x09, 0x03], (4, 1)), + (&[0x04, 0x0B, 0x0D], (5, 6)), // TODO: non-required cardinalities + ]; + + for test in tests { + let mut stream = vec![0xE0, 0x01, 0x00, 0xEA]; + stream.extend_from_slice(test.0); + println!( + "stream {:02X?} -> pair ({}, {})", + test.0, test.1 .0, test.1 .1 + ); + let mut reader = Reader::new(v1_1::Binary, stream.as_slice())?; + reader.register_template_src(template_definition)?; + assert_eq!( + reader.next()?.unwrap().read()?.expect_int()?, + Int::from(test.1 .0) + ); + assert_eq!( + reader.next()?.unwrap().read()?.expect_int()?, + Int::from(test.1 .1) + ); + } + Ok(()) + } + #[test] fn it_takes_all_kinds() -> IonResult<()> { eval_template_invocation( diff --git a/src/lazy/expanded/macro_table.rs b/src/lazy/expanded/macro_table.rs index 390997e5..ff4f1838 100644 --- a/src/lazy/expanded/macro_table.rs +++ b/src/lazy/expanded/macro_table.rs @@ -1,3 +1,9 @@ +use std::borrow::Cow; +use std::collections::HashMap; + +use delegate::delegate; +use rustc_hash::FxHashMap; + use crate::lazy::expanded::compiler::{ExpansionAnalysis, ExpansionSingleton}; use crate::lazy::expanded::template::{ MacroSignature, Parameter, ParameterCardinality, ParameterEncoding, RestSyntaxPolicy, @@ -6,9 +12,6 @@ use crate::lazy::expanded::template::{ use crate::lazy::text::raw::v1_1::reader::{MacroAddress, MacroIdRef}; use crate::result::IonFailure; use crate::{IonResult, IonType}; -use delegate::delegate; -use std::borrow::Cow; -use std::collections::HashMap; #[derive(Debug, Clone, PartialEq)] pub struct Macro { @@ -100,6 +103,7 @@ pub enum MacroKind { Void, Values, MakeString, + Annotate, Template(TemplateBody), } @@ -156,7 +160,7 @@ impl<'top> MacroRef<'top> { pub struct MacroTable { macros_by_address: Vec, // Maps names to an address that can be used to query the Vec above. - macros_by_name: HashMap, + macros_by_name: FxHashMap, } impl Default for MacroTable { @@ -166,12 +170,16 @@ impl Default for MacroTable { } impl MacroTable { - pub const SYSTEM_MACRO_KINDS: &'static [MacroKind] = - &[MacroKind::Void, MacroKind::Values, MacroKind::MakeString]; + pub const SYSTEM_MACRO_KINDS: &'static [MacroKind] = &[ + MacroKind::Void, + MacroKind::Values, + MacroKind::MakeString, + MacroKind::Annotate, + ]; pub const NUM_SYSTEM_MACROS: usize = Self::SYSTEM_MACRO_KINDS.len(); // When a user defines new macros, this is the first ID that will be assigned. This value // is expected to change as development continues. It is currently used in several unit tests. - pub const FIRST_USER_MACRO_ID: usize = 3; + pub const FIRST_USER_MACRO_ID: usize = 4; pub fn new() -> Self { let macros_by_id = vec![ @@ -227,6 +235,31 @@ impl MacroTable { }), }, ), + Macro::named( + "annotate", + MacroSignature::new(vec![ + Parameter::new( + "annotations", + ParameterEncoding::Tagged, + ParameterCardinality::ZeroOrMore, + RestSyntaxPolicy::NotAllowed, + ), + Parameter::new( + "value_to_annotate", + ParameterEncoding::Tagged, + ParameterCardinality::ExactlyOne, + RestSyntaxPolicy::NotAllowed, + ), + ]) + .unwrap(), + MacroKind::Annotate, + ExpansionAnalysis { + could_produce_system_value: true, + must_produce_exactly_one_value: true, + can_be_lazily_evaluated_at_top_level: false, + expansion_singleton: None, + }, + ), ]; let mut macros_by_name = HashMap::default(); for (id, mac) in macros_by_id.iter().enumerate() { diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index 543f08e4..289aef6f 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -35,7 +35,6 @@ use std::cell::{Cell, UnsafeCell}; use std::collections::HashMap; use std::fmt::{Debug, Formatter}; -use std::iter::empty; use std::ops::{Deref, Range}; use bumpalo::Bump as BumpAllocator; @@ -70,7 +69,7 @@ use crate::raw_symbol_ref::AsRawSymbolRef; use crate::result::IonFailure; use crate::{ Catalog, Decimal, HasRange, HasSpan, Int, IonResult, IonType, RawSymbolRef, RawVersionMarker, - Span, SymbolTable, Timestamp, ValueRef, + Span, SymbolRef, SymbolTable, Timestamp, ValueRef, }; // All of these modules (and most of their types) are currently `pub` as the lazy reader is gated @@ -149,6 +148,17 @@ impl EncodingContext { pub fn allocator(&self) -> &BumpAllocator { &self.allocator } + + // TODO: These methods are temporary; they will be removed once shared modules are supported. + pub fn register_template_src(&mut self, template_definition: &str) -> IonResult { + let template_macro: TemplateMacro = + TemplateCompiler::compile_from_text(self.get_ref(), template_definition)?; + self.register_template(template_macro) + } + + pub fn register_template(&mut self, template_macro: TemplateMacro) -> IonResult { + self.macro_table.add_macro(template_macro) + } } #[derive(Debug, Copy, Clone)] @@ -634,7 +644,7 @@ pub enum ExpandedValueSource<'top, D: Decoder> { // it to `Never` and the compiler can eliminate this code path where applicable. // Constructed data stored in the bump allocator. Holding references instead of the data // itself allows this type (and those that contain it) to impl `Copy`. - &'top [&'top str], // Annotations (if any) + &'top [SymbolRef<'top>], // Annotations (if any) &'top ValueRef<'top, D>, // Value ), } @@ -749,7 +759,7 @@ impl<'top, Encoding: Decoder> LazyExpandedValue<'top, Encoding> { pub(crate) fn from_constructed( context: EncodingContextRef<'top>, - annotations: &'top [&'top str], + annotations: &'top [SymbolRef<'top>], value: &'top ValueRef<'top, Encoding>, ) -> Self { Self { @@ -805,12 +815,11 @@ impl<'top, Encoding: Decoder> LazyExpandedValue<'top, Encoding> { Template(_, element) => ExpandedAnnotationsIterator::new( ExpandedAnnotationsSource::Template(SymbolsIterator::new(element.annotations())), ), - Constructed(_annotations, _value) => { - // TODO: iterate over constructed annotations + Constructed(annotations, _value) => { // For now we return an empty iterator - ExpandedAnnotationsIterator::new(ExpandedAnnotationsSource::Constructed(Box::new( - empty(), - ))) + ExpandedAnnotationsIterator::new(ExpandedAnnotationsSource::Constructed( + annotations.iter(), + )) } EExp(eexp) => { let annotations_range = 0..eexp.require_expansion_singleton().num_annotations(); @@ -928,8 +937,7 @@ impl<'top, Encoding: Decoder> From> for LazyLis pub enum ExpandedAnnotationsSource<'top, Encoding: Decoder> { ValueLiteral(Encoding::AnnotationsIterator<'top>), Template(SymbolsIterator<'top>), - // TODO: This is a placeholder impl and always returns an empty iterator - Constructed(Box>> + 'top>), + Constructed(std::slice::Iter<'top, SymbolRef<'top>>), } pub struct ExpandedAnnotationsIterator<'top, Encoding: Decoder> { @@ -952,7 +960,7 @@ impl<'top, Encoding: Decoder> Iterator for ExpandedAnnotationsIterator<'top, Enc Template(element_annotations_iter) => element_annotations_iter .next() .map(|symbol| Ok(symbol.as_raw_symbol_token_ref())), - Constructed(iter) => iter.next(), + Constructed(iter) => Some(Ok(iter.next()?.as_raw_symbol_token_ref())), } } } diff --git a/src/lazy/expanded/template.rs b/src/lazy/expanded/template.rs index 50cbd598..04272525 100644 --- a/src/lazy/expanded/template.rs +++ b/src/lazy/expanded/template.rs @@ -12,7 +12,7 @@ use crate::lazy::expanded::{ EncodingContextRef, ExpandedValueSource, LazyExpandedValue, TemplateVariableReference, }; use crate::lazy::expanded::compiler::ExpansionAnalysis; -use crate::lazy::expanded::macro_evaluator::{MacroEvaluator, MacroExpansion, MacroExpansionKind, MacroExpr, MacroExprArgsIterator, MakeStringExpansion, TemplateExpansion, ValueExpr, ValuesExpansion}; +use crate::lazy::expanded::macro_evaluator::{AnnotateExpansion, MacroEvaluator, MacroExpansion, MacroExpansionKind, MacroExpr, MacroExprArgsIterator, MakeStringExpansion, TemplateExpansion, ValueExpr, ValuesExpansion}; use crate::lazy::expanded::macro_table::{Macro, MacroKind, MacroRef}; use crate::lazy::expanded::r#struct::UnexpandedField; use crate::lazy::expanded::sequence::Environment; @@ -56,6 +56,7 @@ impl Parameter { pub enum ParameterEncoding { /// A 'tagged' type is one whose binary encoding begins with an opcode (sometimes called a 'tag'.) Tagged, + FlexUInt, // TODO: tagless types, including fixed-width types and macros } @@ -899,6 +900,7 @@ impl<'top> TemplateMacroInvocation<'top> { MacroKind::MakeString => { MacroExpansionKind::MakeString(MakeStringExpansion::new(arguments)) } + MacroKind::Annotate => MacroExpansionKind::Annotate(AnnotateExpansion::new(arguments)), MacroKind::Template(template_body) => { let template_ref = TemplateMacroRef::new(macro_ref, template_body); environment = self.new_evaluation_environment(environment)?; diff --git a/src/lazy/streaming_raw_reader.rs b/src/lazy/streaming_raw_reader.rs index e59a4bf3..4bd632be 100644 --- a/src/lazy/streaming_raw_reader.rs +++ b/src/lazy/streaming_raw_reader.rs @@ -342,7 +342,6 @@ impl IonDataSource for IonStream { /// /// In general, this trait is implemented by mapping `Self` to either: /// * [`IonSlice`], if `Self` is an implementation of `AsRef<[u8]>` -/// OR /// * [`IonStream`], if `Self` is an implementation of `io::Read` pub trait IonInput { type DataSource: IonDataSource; diff --git a/src/lazy/system_reader.rs b/src/lazy/system_reader.rs index 39286654..69840372 100644 --- a/src/lazy/system_reader.rs +++ b/src/lazy/system_reader.rs @@ -1087,11 +1087,11 @@ mod tests { // This directive defines two more. assert_eq!(new_macro_table.len(), 2 + MacroTable::NUM_SYSTEM_MACROS); assert_eq!( - new_macro_table.macro_with_id(3), + new_macro_table.macro_with_id(4), new_macro_table.macro_with_name("seventeen") ); assert_eq!( - new_macro_table.macro_with_id(4), + new_macro_table.macro_with_id(5), new_macro_table.macro_with_name("twelve") ); diff --git a/src/lazy/value_ref.rs b/src/lazy/value_ref.rs index ecde5ee8..974da5ba 100644 --- a/src/lazy/value_ref.rs +++ b/src/lazy/value_ref.rs @@ -135,7 +135,7 @@ impl<'top, D: Decoder> ValueRef<'top, D> { if let ValueRef::Int(i) = self { Ok(i) } else { - IonResult::decoding_error("expected an int") + IonResult::decoding_error(format!("expected an int but found a(n) {self:?}")) } } @@ -143,7 +143,7 @@ impl<'top, D: Decoder> ValueRef<'top, D> { if let ValueRef::Int(i) = self { i.expect_i64() } else { - IonResult::decoding_error("expected an int (i64)") + IonResult::decoding_error(format!("expected an int (i64) but found a(n) {self:?}")) } } @@ -242,7 +242,7 @@ impl<'top, D: Decoder> ValueRef<'top, D> { if let ValueRef::Struct(s) = self { Ok(s) } else { - IonResult::decoding_error("expected a struct") + IonResult::decoding_error(format!("expected a struct but found a(n) {self:?}")) } } diff --git a/src/lib.rs b/src/lib.rs index fb7ab133..bf56eef4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -194,7 +194,7 @@ pub use crate::lazy::decoder::{HasRange, HasSpan}; pub use crate::lazy::span::Span; macro_rules! v1_x_reader_writer { ($visibility:vis) => { - #[allow(unused_imports)] + #[allow(unused_imports)] $visibility use crate::{ lazy::streaming_raw_reader::{IonInput, IonSlice, IonStream}, lazy::decoder::Decoder, @@ -286,6 +286,7 @@ macro_rules! v1_x_tooling_apis { LazyExpandedField, LazyExpandedFieldName }, + lazy::expanded::e_expression::{EExpression, EExpressionArgsIterator}, lazy::expanded::sequence::{Environment, ExpandedListSource, ExpandedSExpSource, LazyExpandedList, LazyExpandedSExp}, lazy::expanded::{LazyExpandedValue, ExpandingReader, ExpandedValueSource, ExpandedAnnotationsSource, ExpandedValueRef}, lazy::system_stream_item::SystemStreamItem, diff --git a/src/symbol_table.rs b/src/symbol_table.rs index 3671c4c4..cf82d2ac 100644 --- a/src/symbol_table.rs +++ b/src/symbol_table.rs @@ -1,6 +1,7 @@ -use std::collections::HashMap; use std::sync::Arc; +use rustc_hash::FxHashMap; + use crate::constants::v1_0; use crate::lazy::any_encoding::IonVersion; use crate::{Symbol, SymbolId}; @@ -12,7 +13,7 @@ use crate::{Symbol, SymbolId}; pub struct SymbolTable { ion_version: IonVersion, symbols_by_id: Vec, - ids_by_text: HashMap, + ids_by_text: FxHashMap, } impl Default for SymbolTable { @@ -29,7 +30,7 @@ impl SymbolTable { let mut symbol_table = SymbolTable { ion_version, symbols_by_id: Vec::with_capacity(INITIAL_SYMBOLS_CAPACITY), - ids_by_text: HashMap::new(), + ids_by_text: FxHashMap::default(), }; symbol_table.initialize(); symbol_table diff --git a/src/text/text_formatter.rs b/src/text/text_formatter.rs index 43e684c2..461e0c47 100644 --- a/src/text/text_formatter.rs +++ b/src/text/text_formatter.rs @@ -246,6 +246,7 @@ impl<'a, W: std::fmt::Write> FmtValueFormatter<'a, W> { /// * `first_name` /// * `name_1` /// * `$name` + /// /// Unlike other symbols, identifiers don't have to be wrapped in quotes. fn token_is_identifier(token: &str) -> bool { if token.is_empty() { diff --git a/src/types/decimal/mod.rs b/src/types/decimal/mod.rs index e58cbe3d..cd9f9344 100644 --- a/src/types/decimal/mod.rs +++ b/src/types/decimal/mod.rs @@ -292,6 +292,7 @@ impl TryFrom for Decimal { /// * Infinity /// * Negative infinity /// * NaN (not-a-number) + /// /// Otherwise, returns Ok. /// /// Because Decimal can represent negative zero, f64::neg_zero() IS supported. diff --git a/src/types/integer.rs b/src/types/integer.rs index 738b9453..52435a08 100644 --- a/src/types/integer.rs +++ b/src/types/integer.rs @@ -9,7 +9,7 @@ use std::mem; use std::ops::{Add, Neg}; /// Represents an unsigned integer of any size. -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct UInt { pub(crate) data: u128, } @@ -67,26 +67,6 @@ impl UInt { } } -impl PartialEq for UInt { - fn eq(&self, other: &Self) -> bool { - self.data == other.data - } -} - -impl Eq for UInt {} - -impl PartialOrd for UInt { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for UInt { - fn cmp(&self, other: &Self) -> Ordering { - self.data.cmp(&other.data) - } -} - // This macro makes it possible to turn unsigned int primitives into a UInteger using `.into()`. // Note that it works for both signed and unsigned ints. The resulting UInteger will be the // absolute value of the integer being converted. @@ -213,7 +193,7 @@ macro_rules! impl_small_unsigned_int_try_from_uint { impl_small_unsigned_int_try_from_uint!(u8, u16, u32, u64, u128, usize); -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] /// A signed integer of arbitrary size. /// ``` /// # use ion_rs::IonResult; @@ -246,7 +226,7 @@ impl Int { } /// Returns a [`UInt`] representing the unsigned magnitude of this `Int`. - pub(crate) fn unsigned_abs(&self) -> UInt { + pub fn unsigned_abs(&self) -> UInt { self.data.unsigned_abs().into() } @@ -258,10 +238,25 @@ impl Int { /// If this value is small enough to fit in an `i64`, returns `Ok(i64)`. Otherwise, /// returns a [`DecodingError`](IonError::Decoding). + #[inline] pub fn expect_i64(&self) -> IonResult { - self.as_i64().ok_or_else(|| { - IonError::decoding_error(format!("Int {self} is too large to fit in an i64.")) - }) + self.as_i64().ok_or_else( + #[inline(never)] + || IonError::decoding_error(format!("Int {self} is too large to fit in an i64.")), + ) + } + + #[inline(always)] + pub fn as_u32(&self) -> Option { + u32::try_from(self.data).ok() + } + + #[inline] + pub fn expect_u32(&self) -> IonResult { + self.as_u32().ok_or_else( + #[inline(never)] + || IonError::decoding_error(format!("Int {self} is too large to fit in a u32.")), + ) } /// If this value is small enough to fit in an `i128`, returns `Ok(i128)`. Otherwise, @@ -284,15 +279,6 @@ impl Int { Some(self.data) } } - -impl PartialEq for Int { - fn eq(&self, other: &Self) -> bool { - self.data.eq(&other.data) - } -} - -impl Eq for Int {} - impl IonEq for Int { fn ion_eq(&self, other: &Self) -> bool { self == other @@ -313,18 +299,6 @@ impl Neg for Int { } } -impl PartialOrd for Int { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for Int { - fn cmp(&self, other: &Self) -> Ordering { - self.data.cmp(&other.data) - } -} - impl Add for Int { type Output = Int;