Skip to content

Commit

Permalink
Adds the (annotate ...) form and bin read support for flex_uint p…
Browse files Browse the repository at this point in the history
…arameters (#801)
  • Loading branch information
zslayton committed Aug 11, 2024
1 parent b245172 commit e436f96
Show file tree
Hide file tree
Showing 26 changed files with 472 additions and 112 deletions.
2 changes: 1 addition & 1 deletion src/ion_data/ion_eq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use std::ops::Deref;
/// * Decimal `0.0` and `-0.0` are mathematically equivalent but not Ion equivalent.
/// * Decimal `0.0` and `0.00` are mathematically equivalent but not Ion equivalent.
/// * Timestamps representing the same point in time at different precisions or at different
/// timezone offsets are not Ion equivalent.
/// timezone offsets are not Ion equivalent.
pub trait IonEq {
fn ion_eq(&self, other: &Self) -> bool;
}
Expand Down
2 changes: 1 addition & 1 deletion src/ion_hash/type_qualifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub(crate) struct TypeQualifier(u8);
/// From the spec:
///
/// > TQ: is a type qualifier octet consisting of a four-bit type code T
/// followed by a four-bit qualifier Q
/// > followed by a four-bit qualifier Q
///
/// To compute a TQ from a `T` and a `Q`, all we need to is a bitwise shift!
const fn combine(ion_type_code: IonTypeCode, q: u8) -> TypeQualifier {
Expand Down
7 changes: 7 additions & 0 deletions src/lazy/binary/encoded_value.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::lazy::binary::raw::type_descriptor::Header;
use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding;
use crate::lazy::expanded::template::ParameterEncoding;
use crate::IonType;
use std::ops::Range;

Expand Down Expand Up @@ -40,6 +41,7 @@ impl EncodedHeader for Header {
/// without re-parsing its header information each time.
#[derive(Clone, Copy, Debug, PartialEq)]
pub(crate) struct EncodedValue<HeaderType: EncodedHeader> {
pub(crate) encoding: ParameterEncoding,
// If the compiler decides that a value is too large to be moved/copied with inline code,
// it will relocate the value using memcpy instead. This can be quite slow by comparison.
//
Expand Down Expand Up @@ -88,6 +90,8 @@ pub(crate) struct EncodedValue<HeaderType: EncodedHeader> {
pub annotations_encoding: AnnotationsEncoding,
// The offset of the type descriptor byte within the overall input stream.
pub header_offset: usize,
// If this value was written with a tagless encoding, this will be 0. Otherwise, it's 1.
pub opcode_length: u8,
// The number of bytes used to encode the optional length VarUInt following the header byte.
pub length_length: u8,
// The number of bytes used to encode the value itself, not including the header byte
Expand Down Expand Up @@ -258,12 +262,14 @@ mod tests {
use crate::lazy::binary::encoded_value::EncodedValue;
use crate::lazy::binary::raw::type_descriptor::Header;
use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding;
use crate::lazy::expanded::template::ParameterEncoding;
use crate::{IonResult, IonType};

#[test]
fn accessors() -> IonResult<()> {
// 3-byte String with 1-byte annotation
let value = EncodedValue {
encoding: ParameterEncoding::Tagged,
header: Header {
ion_type: IonType::String,
ion_type_code: IonTypeCode::String,
Expand All @@ -273,6 +279,7 @@ mod tests {
annotations_sequence_length: 1,
annotations_encoding: AnnotationsEncoding::SymbolAddress,
header_offset: 200,
opcode_length: 1,
length_length: 0,
value_body_length: 3,
total_length: 7,
Expand Down
3 changes: 3 additions & 0 deletions src/lazy/binary/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::lazy::decoder::LazyRawFieldExpr;
use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt;
use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt;
use crate::lazy::encoding::BinaryEncoding_1_0;
use crate::lazy::expanded::template::ParameterEncoding;
use crate::result::IonFailure;
use crate::{Int, IonError, IonResult, IonType};

Expand Down Expand Up @@ -703,12 +704,14 @@ impl<'a> ImmutableBuffer<'a> {
}

let encoded_value = EncodedValue {
encoding: ParameterEncoding::Tagged,
header,
// If applicable, these are populated by the caller: `read_annotated_value()`
annotations_header_length: 0,
annotations_sequence_length: 0,
annotations_encoding: AnnotationsEncoding::SymbolAddress,
header_offset,
opcode_length: 1,
length_length,
value_body_length: value_length,
total_length,
Expand Down
30 changes: 23 additions & 7 deletions src/lazy/binary/raw/v1_1/e_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ impl<'top> Iterator for BinaryEExpArgsInputIter<'top> {
// argument encoding bitmap.
ArgGrouping::ValueExprLiteral
};
// TODO: Tagless encodings
// TODO: More tagless encodings
let (arg_expr, remaining_input) = match arg_grouping {
// If the encoding is `empty`, there's nothing to do. Make an empty slice at the current
// offset and build an empty BinaryEExpArgGroup with it.
Expand All @@ -282,14 +282,30 @@ impl<'top> Iterator for BinaryEExpArgsInputIter<'top> {
(EExpArg::new(parameter, expr), self.remaining_args_buffer)
}
// If it's a tagged value expression, parse it as usual.
ArgGrouping::ValueExprLiteral => {
let (expr, remaining) = try_or_some_err! {
self
ArgGrouping::ValueExprLiteral => match parameter.encoding() {
ParameterEncoding::Tagged => {
let (expr, remaining) = try_or_some_err! {
self
.remaining_args_buffer
.expect_eexp_arg_expr("reading tagged e-expr arg")
};
(EExpArg::new(parameter, expr), remaining)
}
};
(EExpArg::new(parameter, expr), remaining)
}
ParameterEncoding::FlexUInt => {
let (flex_uint_lazy_value, remaining) = try_or_some_err! {
self.remaining_args_buffer.read_flex_uint_as_lazy_value()
};
let value_ref = &*self
.remaining_args_buffer
.context()
.allocator()
.alloc_with(|| flex_uint_lazy_value);
(
EExpArg::new(parameter, EExpArgExpr::ValueLiteral(value_ref)),
remaining,
)
}
},
// If it's an argument group...
ArgGrouping::ArgGroup => {
//...then it starts with a FlexUInt that indicates whether the group is length-prefixed
Expand Down
36 changes: 36 additions & 0 deletions src/lazy/binary/raw/v1_1/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt;
use crate::lazy::encoder::binary::v1_1::flex_sym::FlexSym;
use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt;
use crate::lazy::expanded::macro_table::MacroRef;
use crate::lazy::expanded::template::ParameterEncoding;
use crate::lazy::expanded::EncodingContextRef;
use crate::lazy::text::raw::v1_1::arg_group::EExpArgExpr;
use crate::result::IonFailure;
Expand Down Expand Up @@ -223,6 +224,38 @@ impl<'a> ImmutableBuffer<'a> {
Ok((flex_uint, remaining))
}

pub fn read_flex_uint_as_lazy_value(self) -> ParseResult<'a, LazyRawBinaryValue_1_1<'a>> {
let Some(first_byte) = self.peek_next_byte() else {
return IonResult::incomplete("a flex_uint", self.offset());
};
let size_in_bytes = match first_byte {
// If the first byte is zero, this flex_uint is encoded using 9+ bytes. That's pretty
// uncommon, so we'll just use the existing logic in the `read` method and discard the
// value. If this shows up in profiles, it can be optimized further.
0 => FlexUInt::read(self.bytes(), self.offset())?.size_in_bytes(),
_ => first_byte.trailing_zeros() as usize + 1,
};

if self.len() < size_in_bytes {
return IonResult::incomplete("reading a flex_uint value", self.offset());
}
// XXX: This *doesn't* slice `self` because FlexUInt::read() is faster if the input
// is at least the size of a u64.
let matched_input = self;
let remaining_input = self.slice_to_end(size_in_bytes);
let value = LazyRawBinaryValue_1_1::for_flex_uint(matched_input);
Ok((value, remaining_input))
}

pub fn slice_to_end(&self, offset: usize) -> ImmutableBuffer<'a> {
ImmutableBuffer {
data: &self.data[offset..],
// stream offset + local offset
offset: self.offset + offset,
context: self.context,
}
}

#[inline]
pub fn read_flex_sym(self) -> ParseResult<'a, FlexSym<'a>> {
let flex_sym = FlexSym::read(self.bytes(), self.offset())?;
Expand Down Expand Up @@ -448,12 +481,15 @@ impl<'a> ImmutableBuffer<'a> {
+ value_length;

let encoded_value = EncodedValue {
encoding: ParameterEncoding::Tagged,
header,
// If applicable, these are populated by the caller: `read_annotated_value()`
annotations_header_length: 0,
annotations_sequence_length: 0,
annotations_encoding: AnnotationsEncoding::SymbolAddress,
header_offset,
// This is a tagged value, so its opcode length is always 1
opcode_length: 1,
length_length,
value_body_length: value_length,
total_length,
Expand Down
51 changes: 51 additions & 0 deletions src/lazy/binary/raw/v1_1/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@
use std::fmt::Debug;
use std::ops::Range;

use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding;
use crate::lazy::binary::raw::v1_1::r#struct::LazyRawBinaryStruct_1_1;
use crate::lazy::binary::raw::v1_1::sequence::{LazyRawBinaryList_1_1, LazyRawBinarySExp_1_1};
use crate::lazy::bytes_ref::BytesRef;
use crate::lazy::decoder::{HasRange, HasSpan, RawVersionMarker};
use crate::lazy::expanded::template::ParameterEncoding;
use crate::lazy::expanded::EncodingContextRef;
use crate::lazy::span::Span;
use crate::lazy::str_ref::StrRef;
use crate::v1_1::FlexUInt;
use crate::{
lazy::{
binary::{
Expand Down Expand Up @@ -132,6 +135,12 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1
}

fn read(&self) -> IonResult<RawValueRef<'top, BinaryEncoding_1_1>> {
if self.encoded_value.encoding == ParameterEncoding::FlexUInt {
let flex_uint = FlexUInt::read(self.input.bytes(), self.input.offset())?;
let int: Int = flex_uint.value().into();
return Ok(RawValueRef::Int(int));
}

if self.is_null() {
let ion_type = if self.encoded_value.header.ion_type_code == OpcodeType::TypedNull {
let body = self.value_body();
Expand Down Expand Up @@ -176,6 +185,11 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1
&self,
context: EncodingContextRef<'top>,
) -> IonResult<ValueRef<'top, BinaryEncoding_1_1>> {
if self.encoded_value.encoding == ParameterEncoding::FlexUInt {
let flex_uint = FlexUInt::read(self.input.bytes(), self.input.offset())?;
let int: Int = flex_uint.value().into();
return Ok(ValueRef::Int(int));
}
if self.is_null() {
return Ok(ValueRef::Null(self.ion_type()));
}
Expand All @@ -194,6 +208,12 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1
value: &'a LazyRawBinaryValue_1_1<'a>,
context: EncodingContextRef<'a>,
) -> IonResult<ValueRef<'a, BinaryEncoding_1_1>> {
if value.encoded_value.encoding == ParameterEncoding::FlexUInt {
let flex_uint = FlexUInt::read(value.input.bytes(), value.input.offset())?;
let int: Int = flex_uint.value().into();
return Ok(ValueRef::Int(int));
}

if value.is_null() {
return Ok(ValueRef::Null(value.ion_type()));
}
Expand Down Expand Up @@ -246,6 +266,37 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1
}

impl<'top> LazyRawBinaryValue_1_1<'top> {
/// Constructs a lazy raw binary value from an input buffer slice that has been found to contain
/// a complete `FlexUInt`.
pub(crate) fn for_flex_uint(input: ImmutableBuffer<'top>) -> Self {
let encoded_value = EncodedValue {
encoding: ParameterEncoding::FlexUInt,
header: Header {
// It is an int, that's true.
ion_type: IonType::Int,
// Nonsense values for now
ion_type_code: OpcodeType::Nop,
low_nibble: 0,
},

// FlexUInts cannot have any annotations
annotations_header_length: 0,
annotations_sequence_length: 0,
annotations_encoding: AnnotationsEncoding::SymbolAddress,

header_offset: input.offset(),
opcode_length: 0,
length_length: 0,
value_body_length: input.len(),
total_length: input.len(),
};

LazyRawBinaryValue_1_1 {
encoded_value,
input,
}
}

/// Indicates the Ion data type of this value. Calling this method does not require additional
/// parsing of the input stream.
pub fn ion_type(&'top self) -> IonType {
Expand Down
9 changes: 7 additions & 2 deletions src/lazy/encoder/binary/v1_1/container_writers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::lazy::encoder::value_writer::internal::{FieldEncoder, MakeValueWriter
use crate::lazy::encoder::value_writer::{EExpWriter, SequenceWriter, StructWriter};
use crate::lazy::encoder::write_as_ion::WriteAsIon;
use crate::raw_symbol_ref::AsRawSymbolRef;
use crate::IonResult;
use crate::{IonResult, UInt};

/// A helper type that holds fields and logic that is common to [`BinaryListWriter_1_1`],
/// [`BinarySExpWriter_1_1`], and [`BinaryStructWriter_1_1`].
Expand Down Expand Up @@ -393,4 +393,9 @@ impl<'value, 'top> SequenceWriter for BinaryEExpWriter_1_1<'value, 'top> {
}
}

impl<'value, 'top> EExpWriter for BinaryEExpWriter_1_1<'value, 'top> {}
impl<'value, 'top> EExpWriter for BinaryEExpWriter_1_1<'value, 'top> {
fn write_flex_uint(&mut self, value: impl Into<UInt>) -> IonResult<()> {
FlexUInt::write(self.buffer, value)?;
Ok(())
}
}
3 changes: 1 addition & 2 deletions src/lazy/encoder/binary/v1_1/flex_uint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ impl FlexUInt {
false,
);
}

let flex_uint = Self::read_small_flex_uint(input);
Ok(flex_uint)
}
Expand Down Expand Up @@ -115,7 +114,7 @@ impl FlexUInt {
/// (`support_sign_extension=true`), then the six bits beyond the supported 64 must all be the
/// same as the 64th (highest supported) bit. This will allow encodings of up to 70 bits
/// to be correctly interpreted as positive, negative, or beyond the bounds of the 64 bit
/// limitation.
/// limitation.
pub(crate) fn read_flex_primitive_as_uint(
input: &[u8],
offset: usize,
Expand Down
7 changes: 4 additions & 3 deletions src/lazy/encoder/binary/v1_1/value_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> {
MacroIdRef::LocalAddress(_address) => {
todo!("macros with addresses higher than 64");
}
}
};
Ok(BinaryEExpWriter_1_1::new(
self.allocator,
self.encoding_buffer,
Expand Down Expand Up @@ -832,6 +832,9 @@ impl<'value, 'top> BinaryAnnotatedValueWriter_1_1<'value, 'top> {

#[cfg(test)]
mod tests {
use num_traits::FloatConst;
use rstest::rstest;

use crate::ion_data::IonEq;
use crate::lazy::encoder::annotate::{Annotatable, Annotated};
use crate::lazy::encoder::annotation_seq::AnnotationSeq;
Expand All @@ -845,8 +848,6 @@ mod tests {
v1_1, Decimal, Element, Int, IonResult, IonType, Null, RawSymbolRef, SymbolId, Timestamp,
Writer,
};
use num_traits::FloatConst;
use rstest::rstest;

fn encoding_test(
test: impl FnOnce(&mut LazyRawBinaryWriter_1_1<&mut Vec<u8>>) -> IonResult<()>,
Expand Down
7 changes: 5 additions & 2 deletions src/lazy/encoder/value_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::lazy::encoder::value_writer::internal::{FieldEncoder, MakeValueWriter
use crate::lazy::encoder::write_as_ion::WriteAsIon;
use crate::lazy::text::raw::v1_1::reader::MacroIdRef;
use crate::raw_symbol_ref::AsRawSymbolRef;
use crate::{Decimal, Int, IonResult, IonType, RawSymbolRef, Timestamp};
use crate::{Decimal, Int, IonResult, IonType, RawSymbolRef, Timestamp, UInt};

pub mod internal {
use crate::lazy::encoder::value_writer::ValueWriter;
Expand Down Expand Up @@ -32,7 +32,10 @@ pub mod internal {
}

pub trait EExpWriter: SequenceWriter {
// TODO: methods for writing tagless encodings
// TODO: more methods for writing tagless encodings
fn write_flex_uint(&mut self, _value: impl Into<UInt>) -> IonResult<()> {
todo!("current only implemented for binary 1.1 to enable unit testing for the reader")
}
}

pub trait AnnotatableWriter {
Expand Down
5 changes: 4 additions & 1 deletion src/lazy/encoder/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::result::IonFailure;
use crate::write_config::WriteConfig;
use crate::{
Decimal, Element, ElementWriter, Int, IonResult, IonType, RawSymbolRef, Symbol, SymbolId,
SymbolTable, Timestamp, Value,
SymbolTable, Timestamp, UInt, Value,
};

pub(crate) struct WriteContext {
Expand Down Expand Up @@ -505,6 +505,9 @@ impl<'value, V: ValueWriter> MakeValueWriter for ApplicationEExpWriter<'value, V

impl<'value, V: ValueWriter> EExpWriter for ApplicationEExpWriter<'value, V> {
// Default methods
fn write_flex_uint(&mut self, value: impl Into<UInt>) -> IonResult<()> {
self.raw_eexp_writer.write_flex_uint(value)
}
}

impl<S: SequenceWriter> ElementWriter for S {
Expand Down
Loading

0 comments on commit e436f96

Please sign in to comment.