From 978fbd3b72a9ffa08e9e63802eba32d952bfe061 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Mon, 9 Sep 2024 02:43:51 -0700 Subject: [PATCH 1/2] Update conformance testing DSL to support unknown symbols --- src/lazy/binary/raw/v1_1/value.rs | 25 ++-- tests/conformance_dsl/context.rs | 1 + tests/conformance_dsl/continuation.rs | 97 ++++++++++----- tests/conformance_dsl/fragment.rs | 162 +++++++++++++++++++++++++- tests/conformance_dsl/mod.rs | 23 ++++ tests/conformance_dsl/model.rs | 151 +++++++++++++++++++----- tests/conformance_tests.rs | 16 +++ 7 files changed, 404 insertions(+), 71 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 7f0de9d0..49619143 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -119,13 +119,7 @@ impl<'top> HasRange for &'top LazyRawBinaryValue_1_1<'top> { impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1_1<'top> { fn ion_type(&self) -> IonType { - // Handle retrieving the type for a typed null. - if self.encoded_value.header.type_code() == OpcodeType::TypedNull { - let body = self.value_body(); - ION_1_1_TYPED_NULL_TYPES[body[0] as usize] - } else { - self.encoded_value.ion_type() - } + self.encoded_value.ion_type() } fn is_null(&self) -> bool { @@ -151,7 +145,7 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 } if self.is_null() { - return Ok(RawValueRef::Null(self.ion_type())); + return self.read_null(); } match self.ion_type() { @@ -171,6 +165,7 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 } } + /// This is a fast path for reading values that we know need to be resolved. /// /// When a `LazyValue` wrapping a raw binary value calls `read()`, it's clear that the `RawValueRef` will @@ -194,7 +189,7 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 return Ok(ValueRef::Int(int)); } if self.is_null() { - return Ok(ValueRef::Null(self.ion_type())); + return self.read_null()?.resolve(context); } // Anecdotally, string and integer values are very common in Ion streams. This `match` creates // an inlineable fast path for them while other types go through the general case impl. @@ -218,7 +213,7 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 } if value.is_null() { - return Ok(ValueRef::Null(value.ion_type())); + return value.read_null()?.resolve(context); } let value_ref = @@ -354,6 +349,16 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { <&'top Self as LazyRawValue<'top, BinaryEncoding_1_1>>::read(&self) } + fn read_null(&self) -> IonResult> { + let tpe = if self.encoded_value.header.type_code() == OpcodeType::TypedNull { + let body = self.value_body(); + ION_1_1_TYPED_NULL_TYPES[body[0] as usize] + } else { + self.encoded_value.ion_type() + }; + Ok(RawValueRef::Null(tpe)) + } + pub fn is_delimited(&self) -> bool { self.encoded_value.header.ion_type_code.is_delimited_start() } diff --git a/tests/conformance_dsl/context.rs b/tests/conformance_dsl/context.rs index 04f710e2..25209583 100644 --- a/tests/conformance_dsl/context.rs +++ b/tests/conformance_dsl/context.rs @@ -120,6 +120,7 @@ impl<'a> Context<'a> { let (data, data_encoding) = self.input(encoding)?; let data_slice = IonSlice::new(data); + if self.fragments.is_empty() { let empty: Vec = vec!(); return Ok(empty.into()); diff --git a/tests/conformance_dsl/continuation.rs b/tests/conformance_dsl/continuation.rs index 360c3216..221e61ec 100644 --- a/tests/conformance_dsl/continuation.rs +++ b/tests/conformance_dsl/continuation.rs @@ -13,10 +13,10 @@ pub(crate) enum Continuation { // expectations // Verify that reading the current document produces the expected data provided. - Produces(Vec), + Produces(Produces), // Verify that reading the current document produces the expected data, provided through data // model elements. - Denotes(Vec), + Denotes(Denotes), // Verify that reading the current document produces an error. Signals(String), // extensions @@ -39,18 +39,7 @@ impl Continuation { pub fn evaluate(&self, ctx: &Context) -> InnerResult<()> { match self { // Produces is terminal, so we can evaluate. - Continuation::Produces(expected_elems) => { - let elems = ctx.read_all(ctx.encoding())?; - if expected_elems.len() != elems.len() { - Err(ConformanceErrorKind::MismatchedProduce) - } else { - let zip = expected_elems.iter().zip(elems.iter()); - match zip.fold(true, |acc, (x, y)| acc && (*x == *y)) { - true => Ok(()), - false => Err(ConformanceErrorKind::MismatchedProduce), - } - } - } + Continuation::Produces(produces) => produces.evaluate(ctx), Continuation::Not(inner) => match inner.evaluate(ctx) { Err(_) => Ok(()), Ok(_) => Err(ConformanceErrorKind::UnknownError), @@ -62,18 +51,7 @@ impl Continuation { Ok(()) } Continuation::Then(then) => then.evaluate(ctx), - Continuation::Denotes(expected_vals) => { - let elems = ctx.read_all(ctx.encoding())?; - if expected_vals.len() != elems.len() { - Err(ConformanceErrorKind::MismatchedDenotes) - } else { - let zip = expected_vals.iter().zip(elems.iter()); - match zip.fold(true, |acc, (x, y)| acc && (*x == *y)) { - true => Ok(()), - false => Err(ConformanceErrorKind::MismatchedDenotes), - } - } - } + Continuation::Denotes(denotes) => denotes.evaluate(ctx), Continuation::Extensions(exts) => { for ext in exts { ext.evaluate(ctx)?; @@ -97,11 +75,12 @@ impl Continuation { } } } + } impl Default for Continuation { fn default() -> Self { - Continuation::Produces(vec!()) + Continuation::Produces(Produces { elems: vec!() }) } } @@ -109,7 +88,7 @@ impl Default for Continuation { pub fn parse_continuation(clause: Clause) -> InnerResult { let continuation = match clause.tpe { ClauseType::Produces => { - Continuation::Produces(clause.body.clone()) + Continuation::Produces(Produces { elems: clause.body.clone() }) } ClauseType::And => { if !clause.body.is_empty() { @@ -159,7 +138,7 @@ pub fn parse_continuation(clause: Clause) -> InnerResult { return Err(ConformanceErrorKind::ExpectedModelValue); } } - Continuation::Denotes(values) + Continuation::Denotes(Denotes { model: values }) } ClauseType::Each => { let mut parsing_branches = true; @@ -230,6 +209,66 @@ pub(crate) struct EachBranch { fragment: Fragment, } +#[derive(Clone, Debug, Default)] +pub(crate) struct Produces { + pub elems: Vec, +} + +impl Produces { + pub fn evaluate(&self, ctx: &Context) -> InnerResult<()> { + let (input, _encoding) = ctx.input(ctx.encoding())?; + let mut reader = ion_rs::Reader::new(ion_rs::AnyEncoding, input)?; + + let mut is_equal = true; + let mut elem_iter = self.elems.iter(); + + while is_equal { + let (actual_value, expected_elem) = (reader.next()?, elem_iter.next()); + match (actual_value, expected_elem) { + (None, None) => break, + (Some(actual_value), Some(expected_elem)) => + is_equal &= super::fragment::ProxyElement(expected_elem) == actual_value, + _ => is_equal = false, + } + } + + if is_equal { + Ok(()) + } else { + Err(ConformanceErrorKind::MismatchedProduce) + } + } +} + +#[derive(Clone, Debug, Default)] +pub(crate) struct Denotes { + pub model: Vec, +} + +impl Denotes { + pub fn evaluate(&self, ctx: &Context) -> InnerResult<()> { + let (input, _encoding) = ctx.input(ctx.encoding())?; + let mut reader = ion_rs::Reader::new(ion_rs::AnyEncoding, input)?; + let mut elem_iter = self.model.iter(); + + let mut is_equal = true; + while is_equal { + let (read_value, expected_element) = (reader.next()?, elem_iter.next()); + match (read_value, expected_element) { + (Some(actual), Some(expected)) => is_equal = is_equal && (expected == actual), + (None, None) => break, + _ => is_equal = false, + } + } + + if is_equal { + Ok(()) + } else { + Err(ConformanceErrorKind::MismatchedDenotes) + } + } +} + /// Represents a Then clause, it's optional name, the list of fragments, and continuation. A 'Then' /// acts as almost a sub-document. #[derive(Clone, Debug, Default)] diff --git a/tests/conformance_dsl/fragment.rs b/tests/conformance_dsl/fragment.rs index 10b69810..66fd7f6c 100644 --- a/tests/conformance_dsl/fragment.rs +++ b/tests/conformance_dsl/fragment.rs @@ -1,5 +1,6 @@ -use ion_rs::{Element, Sequence, SExp, Symbol}; +use ion_rs::{Element, Sequence, SExp, Struct, Symbol}; use ion_rs::{v1_0, v1_1, WriteConfig, Encoding, ion_seq}; +use ion_rs::{RawSymbolRef, StructWriter, SequenceWriter, Writer, WriteAsIon, ValueWriter}; use super::*; use super::context::Context; @@ -140,6 +141,161 @@ impl TryFrom for Fragment { } } +// newtype to handle writing an Element, after we check to make sure it's not a symbol that has our +// special absent symbol sauce. +#[derive(Debug)] +pub(crate) struct ProxyElement<'a>(pub &'a Element); + +impl<'a> ProxyElement<'a> { + + fn write_struct(&self, val: &Struct, writer: V) -> ion_rs::IonResult<()> { + let annotations: Vec<&Symbol> = self.0.annotations().iter().collect(); + let annot_writer = writer.with_annotations(annotations)?; + let mut strukt = annot_writer.struct_writer()?; + + for (name, value) in val.fields() { + match parse_absent_symbol(name.text().unwrap_or("")) { + (None, None) => { strukt.write(name, ProxyElement(value))?; } + (_, Some(id)) => { strukt.write(RawSymbolRef::from(id), ProxyElement(value))?; }, + _ => unreachable!(), + } + } + strukt.close() + } + + fn write_symbol(&self, writer: V) -> ion_rs::IonResult<()> { + if !self.0.is_null() { + let annotations: Vec<&Symbol> = self.0.annotations().iter().collect(); + let annot_writer = writer.with_annotations(annotations)?; + let symbol = self.0.as_symbol().unwrap(); + match parse_absent_symbol(symbol.text().unwrap_or("")) { + (None, None) => annot_writer.write(symbol), + (_, Some(id)) => annot_writer.write(RawSymbolRef::from(id)), + _ => unreachable!(), + } + } else { + writer.write(self.0) + } + } + +} + +impl PartialEq> for ProxyElement<'_> { + fn eq(&self, other: &ion_rs::LazyValue<'_, T>) -> bool { + use ion_rs::{LazyRawFieldName, LazyRawValue, v1_0::RawValueRef, ValueRef}; + match self.0.ion_type() { + IonType::Symbol => match parse_absent_symbol(self.0.as_symbol().unwrap().text().unwrap_or("")) { + (None, None) => *self.0 == Element::try_from(*other).expect("unable to convert lazyvalue into element"), + (_, Some(id)) => { + let Some(raw_symbol) = other.raw().map(|r| r.read()) else { + unreachable!() + }; + let raw_symbol = raw_symbol.expect("error reading symbol"); + + let RawValueRef::Symbol(raw_symbol) = raw_symbol else { + return false; + }; + raw_symbol.matches_sid_or_text(id, "") + } + _ => unreachable!(), + } + IonType::Struct => { + let ValueRef::Struct(actual_struct) = other.read().expect("error reading input value") else { + return false; + }; + + let mut is_equal = true; + let mut actual_iter = actual_struct.iter(); + let mut expected_field_iter = self.0.as_struct().expect("error reading struct").fields(); + + while is_equal { + let actual = actual_iter.next(); + let expected = expected_field_iter.next(); + + match (actual, expected) { + (Some(actual), Some((expected_field, expected_field_elem))) => { + let actual = actual.expect("unable to read struct field"); + let actual_field = actual.raw_name().map(|n| n.read()).expect("unable to get SymbolRef for field name"); + let actual_field = actual_field.expect("unable to read SymbolRef for field name"); + + is_equal &= match parse_absent_symbol(expected_field.text().unwrap_or("")) { + (None, None) => *self.0 == Element::try_from(*other).expect("unable to convert LazyValue into Element"), + (_, Some(id)) => actual_field.matches_sid_or_text(id, ""), + _ => unreachable!(), + }; + + let actual_value = actual.value(); + is_equal &= ProxyElement(expected_field_elem) == actual_value; + } + (None, None) => break, + _ => is_equal = false, + } + } + is_equal + } + IonType::List | IonType::SExp => { + let ValueRef::List(actual_list) = other.read().expect("error reading list") else { + return false; + }; + + let actual_list: ion_rs::IonResult>> = actual_list.iter().collect(); + let actual_list = actual_list.expect("error parsing list"); + + let expected_list = self.0.as_sequence().expect("unable to get sequence for list"); + let expected_list: Vec<&Element> = expected_list.iter().collect(); + + if actual_list.len() != expected_list.len() { + false + } else { + for (actual, expected) in actual_list.iter().zip(expected_list.iter()) { + if ProxyElement(expected) != *actual { + return false + } + } + true + } + } + _ => *self.0 == Element::try_from(*other).expect("unable to convert lazyvalue into element"), + } + } +} + +impl<'a> WriteAsIon for ProxyElement<'a> { + fn write_as_ion(&self, writer: V) -> ion_rs::IonResult<()> { + match self.0.ion_type() { + IonType::Symbol => self.write_symbol(writer), + IonType::Struct => { + if !self.0.is_null() { + self.write_struct(self.0.as_struct().unwrap(), writer) + } else { + writer.write(self.0) + } + } + IonType::List => { + let annotations: Vec<&Symbol> = self.0.annotations().iter().collect(); + let annot_writer = writer.with_annotations(annotations)?; + let mut list_writer = annot_writer.list_writer()?; + for elem in self.0.as_sequence().unwrap() { + list_writer.write(ProxyElement(elem))?; + } + list_writer.close()?; + Ok(()) + } + IonType::SExp => { + let annotations: Vec<&Symbol> = self.0.annotations().iter().collect(); + let annot_writer = writer.with_annotations(annotations)?; + let mut sexp_writer = annot_writer.sexp_writer()?; + for elem in self.0.as_sequence().unwrap() { + sexp_writer.write(ProxyElement(elem))?; + } + sexp_writer.close()?; + Ok(()) + } + _ => writer.write(self.0), + } + } +} + /// Implments the TopLevel fragment. #[derive(Clone, Debug, Default)] pub(crate) struct TopLevel { @@ -149,11 +305,11 @@ pub(crate) struct TopLevel { impl FragmentImpl for TopLevel { /// Encodes the provided ion literals into an ion stream, using the provided WriteConfig. fn encode(&self, config: impl Into>) -> InnerResult> { - use ion_rs::Writer; let mut buffer = Vec::with_capacity(1024); let mut writer = Writer::new(config, buffer)?; + for elem in self.elems.as_slice() { - writer.write(elem)?; + writer.write(ProxyElement(elem))?; } buffer = writer.close()?; Ok(buffer) diff --git a/tests/conformance_dsl/mod.rs b/tests/conformance_dsl/mod.rs index d1a468d7..c0bb77ef 100644 --- a/tests/conformance_dsl/mod.rs +++ b/tests/conformance_dsl/mod.rs @@ -306,3 +306,26 @@ pub(crate) fn parse_text_exp<'a, I: IntoIterator>(elems: I) -> let val_string = bytes.iter().map(|v| unsafe { String::from_utf8_unchecked(v.to_vec()) }).collect(); Ok(val_string) } + +/// Parses absent symbol notation from a symbol within a Toplevel fragment, or produces +/// Continuation. The notation is '#$' for an absent symbol id, or '#$#' for a symbol +/// ID from a specific symbol table named 'name'. +pub(crate) fn parse_absent_symbol>(txt: T) -> (Option, Option) { + let txt = txt.as_ref(); + if let Some(id_txt) = txt.strip_prefix("#$") { + let split_txt: Vec<&str> = id_txt.split('#').collect(); // format: '#$#' or '#$' + match split_txt.len() { + 1 => ( + None, + split_txt[0].parse::().map(Some).unwrap_or(None) + ), + 2 => ( + Some(split_txt[0].to_string()), + split_txt[1].parse::().map(Some).unwrap_or(None) + ), + _ => panic!("invalid absent symbol notation"), + } + } else { + (None, None) + } +} diff --git a/tests/conformance_dsl/model.rs b/tests/conformance_dsl/model.rs index f9d3aad7..c0294b29 100644 --- a/tests/conformance_dsl/model.rs +++ b/tests/conformance_dsl/model.rs @@ -1,8 +1,8 @@ -use ion_rs::{Decimal, Element, IonType, Sequence, Timestamp}; +use ion_rs::{Decimal, Element, IonType, Sequence, Timestamp, ValueRef}; +use ion_rs::{LazyRawValue, LazyRawFieldName, v1_0::RawValueRef}; use ion_rs::decimal::coefficient::Coefficient; use super::{Clause, ClauseType, ConformanceErrorKind, InnerResult, parse_text_exp, parse_bytes_exp}; -use std::collections::HashMap; /// Represents a symbol in the Data Model representation of ion data. #[derive(Debug, Clone, Eq, Hash, PartialEq)] @@ -12,6 +12,17 @@ pub(crate) enum SymbolToken { Absent(String, i64), } +impl std::fmt::Display for SymbolToken { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + SymbolToken::Text(txt) => write!(f, "{}", txt), + SymbolToken::Offset(id) => write!(f, "#${}", id), + SymbolToken::Absent(txt, id) => write!(f, "#${}#{}", txt, id), + } + } +} + + impl TryFrom<&Element> for SymbolToken { type Error = ConformanceErrorKind; @@ -28,9 +39,9 @@ impl TryFrom<&Element> for SymbolToken { Ok(SymbolToken::Text(text)) }, ClauseType::Absent => { - let text = clause.body.get(1).and_then(|v| v.as_string()).ok_or(ConformanceErrorKind::ExpectedSymbolType)?; - let offset = clause.body.get(2).and_then(|v| v.as_i64()).ok_or(ConformanceErrorKind::ExpectedSymbolType)?; - Ok(SymbolToken::Absent(text.to_string(), offset)) + let symtab = clause.body.first().and_then(|v| v.as_string()).ok_or(ConformanceErrorKind::ExpectedSymbolType)?; + let offset = clause.body.get(1).and_then(|v| v.as_i64()).ok_or(ConformanceErrorKind::ExpectedSymbolType)?; + Ok(SymbolToken::Absent(symtab.to_string(), offset)) } _ => unreachable!(), } @@ -57,7 +68,7 @@ pub(crate) enum ModelValue { Symbol(SymbolToken), List(Vec), Sexp(Vec), - Struct(HashMap), + Struct(Vec<(SymbolToken, ModelValue)>), Blob(Vec), Clob(Vec), } @@ -121,9 +132,9 @@ impl TryFrom<&Sequence> for ModelValue { Ok(ModelValue::Symbol(SymbolToken::Text(text))) }, ClauseType::Absent => { - let text = clause.body.get(1).and_then(|v| v.as_string()).ok_or(ConformanceErrorKind::ExpectedSymbolType)?; + let symtab= clause.body.get(1).and_then(|v| v.as_string()).ok_or(ConformanceErrorKind::ExpectedSymbolType)?; let offset = clause.body.get(2).and_then(|v| v.as_i64()).ok_or(ConformanceErrorKind::ExpectedSymbolType)?; - Ok(ModelValue::Symbol(SymbolToken::Absent(text.to_string(), offset))) + Ok(ModelValue::Symbol(SymbolToken::Absent(symtab.to_string(), offset))) } _ => unreachable!(), } @@ -151,7 +162,7 @@ impl TryFrom<&Sequence> for ModelValue { Ok(ModelValue::Sexp(sexp)) } "Struct" => { - let mut fields = HashMap::new(); + let mut fields = vec!(); for elem in elems.iter().skip(1) { if let Some(seq) = elem.as_sequence() { // Each elem should be a model symtok followed by a model value. @@ -172,7 +183,8 @@ impl TryFrom<&Sequence> for ModelValue { } _ => return Err(ConformanceErrorKind::ExpectedModelValue), }; - fields.insert(field_sym, value); + + fields.push((field_sym, value)); } } Ok(ModelValue::Struct(fields)) @@ -225,31 +237,14 @@ impl PartialEq for ModelValue { } true } - ModelValue::Struct(fields) => { - if other.ion_type() != IonType::Struct { - false - } else { - let struct_val = other.as_struct().unwrap(); - for (field, val) in struct_val.fields() { - let denoted_field = field.text().unwrap(); // TODO: Symbol IDs - let denoted_symtok = SymbolToken::Text(denoted_field.to_string()); - if let Some(expected_val) = fields.get(&denoted_symtok) { - if expected_val != val { - return false; - } - } - } - true - } - } + ModelValue::Struct(_) => panic!("should not be using element eq for struct"), ModelValue::Blob(data) => other.as_blob() == Some(data.as_slice()), ModelValue::Clob(data) => other.as_clob() == Some(data.as_slice()), ModelValue::Symbol(sym) => { if let Some(other_sym) = other.as_symbol() { match sym { SymbolToken::Text(text) => Some(text.as_ref()) == other_sym.text(), - SymbolToken::Offset(_offset) => todo!(), - SymbolToken::Absent(_text, _offset) => todo!(), + _ => unreachable!(), } } else { false @@ -260,6 +255,104 @@ impl PartialEq for ModelValue { } } +impl PartialEq for &ModelValue { + fn eq(&self, other: &Element) -> bool { + *self == other + } +} + +impl PartialEq> for &ModelValue { + fn eq(&self, other: &ion_rs::LazyValue<'_, T>) -> bool { + match self { + ModelValue::Symbol(symbol_token) if other.ion_type() == IonType::Symbol => { + let Some(raw_symbol) = other.raw().map(|r| r.read()) else { + unreachable!() + }; + let raw_symbol = raw_symbol.expect("error reading symbol"); + + let RawValueRef::Symbol(raw_symbol) = raw_symbol else { + return false + }; + + let ValueRef::Symbol(symbol_text) = other.read().expect("error resolving symbol") else { + return false; + }; + + let (expected_txt, expected_id) = match symbol_token { + SymbolToken::Text(txt) => return symbol_text == txt, + SymbolToken::Offset(id) => (&String::from(""), *id as usize), + SymbolToken::Absent(txt, id) => (txt, *id as usize), + }; + + raw_symbol.matches_sid_or_text(expected_id, expected_txt) + } + ModelValue::Struct(expected_fields) if other.ion_type() == IonType::Struct => { + let ValueRef::Struct(strukt) = other.read().expect("error reading struct") else { + return false; + }; + + let mut is_equal = true; + let mut strukt_iter = strukt.iter(); + let mut expected_iter = expected_fields.iter(); + + while is_equal { + let actual = strukt_iter.next(); + let expected = expected_iter.next(); + + match (actual, expected) { + (Some(actual), Some((expected_field, expected_val))) => { + let actual = actual.expect("unable to read struct field"); + let actual_field = actual.raw_name().map(|n| n.read()).expect("unable to get symbolref for field name"); + let actual_field = actual_field.expect("unable to read symbolref for field name"); + + let (expected_txt, expected_id) = match expected_field { + SymbolToken::Text(txt) => (txt, usize::MAX), + SymbolToken::Offset(id) => (&String::from(""), *id as usize), + SymbolToken::Absent(txt, id) => (txt, *id as usize), + }; + is_equal = is_equal && actual_field.matches_sid_or_text(expected_id, expected_txt); + + let actual_value = actual.value(); + + is_equal = is_equal && (expected_val == actual_value); + } + (None, None) => break, + _ => is_equal = false, + } + } + is_equal + } + ModelValue::List(expected) if other.ion_type() == IonType::List => { + let ValueRef::List(list) = other.read().expect("error reading list") else { + unreachable!(); + }; + + let actual: ion_rs::IonResult>> = list.iter().collect(); + let actual = actual.expect("Error parsing list"); + + actual.iter().zip(expected.iter()) + .fold(true, |acc, (actual_val, expected_val)| acc && (&expected_val == actual_val)) + } + ModelValue::Sexp(expected) if other.ion_type() == IonType::SExp => { + let ValueRef::SExp(sexp) = other.read().expect("error reading sexp") else { + unreachable!(); + }; + + let actual: ion_rs::IonResult>> = sexp.iter().collect(); + let actual = actual.expect("Error parsing sexp"); + actual.iter().zip(expected.iter()) + .fold(true, |acc, (actual_val, expected_val)| acc && (&expected_val == actual_val)) + } + _ => { + // Anything that reaches down here isn't a symbol, or can't contain a symbol. So + // we just have to worry about equality. + let other_elem: Element = Element::try_from(*other).expect("Unable to convert value into element"); + *self == other_elem + } + } + } +} + /// Parses a Timestamp clause into an ion-rs Timestamp. fn parse_timestamp<'a, I: IntoIterator>(elems: I) -> InnerResult { let mut iter = elems.into_iter(); diff --git a/tests/conformance_tests.rs b/tests/conformance_tests.rs index e091d60d..a57b66f1 100644 --- a/tests/conformance_tests.rs +++ b/tests/conformance_tests.rs @@ -1,4 +1,5 @@ #![cfg(feature = "experimental-reader-writer")] +#![cfg(feature = "experimental-tooling-apis")] mod conformance_dsl; use conformance_dsl::prelude::*; @@ -11,6 +12,21 @@ use std::str::FromStr; mod implementation { use super::*; + #[test] + fn test_absent_symbol() { + let test = r#" + (ion_1_1 + (toplevel '#$2' {'#$9': '#$8'}) + (text "") + (denotes (Symbol 2) (Struct (9 (Symbol 8)))) + ) + "#; + Document::from_str(test) + .unwrap_or_else(|e| panic!("Failed to load document:\n{:?}", e)) + .run() + .unwrap_or_else(|e| panic!("Test failed: {:?}", e)); + } + #[test] fn test_timestamps() { let tests: &[&str] = &[ From 9eab933b0e6b21a88298e032526f714e2bd5a45e Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Thu, 12 Sep 2024 04:33:19 -0700 Subject: [PATCH 2/2] Address PR feedback; Add in missing catalog build & symtab resolution; Refactor comparisons --- src/lazy/binary/raw/v1_1/value.rs | 12 +- tests/conformance_dsl/context.rs | 32 +++- tests/conformance_dsl/continuation.rs | 24 ++- tests/conformance_dsl/document.rs | 2 +- tests/conformance_dsl/fragment.rs | 96 +++++++---- tests/conformance_dsl/mod.rs | 74 +++++++- tests/conformance_dsl/model.rs | 240 ++++++++++++-------------- tests/conformance_tests.rs | 33 ++-- 8 files changed, 321 insertions(+), 192 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 49619143..e1f1e6dd 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -145,7 +145,7 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 } if self.is_null() { - return self.read_null(); + return Ok(RawValueRef::Null(self.read_null()?)); } match self.ion_type() { @@ -189,7 +189,7 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 return Ok(ValueRef::Int(int)); } if self.is_null() { - return self.read_null()?.resolve(context); + return Ok(ValueRef::Null(self.read_null()?)); } // Anecdotally, string and integer values are very common in Ion streams. This `match` creates // an inlineable fast path for them while other types go through the general case impl. @@ -213,7 +213,7 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 } if value.is_null() { - return value.read_null()?.resolve(context); + return Ok(ValueRef::Null(value.read_null()?)); } let value_ref = @@ -349,14 +349,14 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { <&'top Self as LazyRawValue<'top, BinaryEncoding_1_1>>::read(&self) } - fn read_null(&self) -> IonResult> { - let tpe = if self.encoded_value.header.type_code() == OpcodeType::TypedNull { + fn read_null(&self) -> IonResult { + let ion_type = if self.encoded_value.header.type_code() == OpcodeType::TypedNull { let body = self.value_body(); ION_1_1_TYPED_NULL_TYPES[body[0] as usize] } else { self.encoded_value.ion_type() }; - Ok(RawValueRef::Null(tpe)) + Ok(ion_type) } pub fn is_delimited(&self) -> bool { diff --git a/tests/conformance_dsl/context.rs b/tests/conformance_dsl/context.rs index 25209583..b1be2356 100644 --- a/tests/conformance_dsl/context.rs +++ b/tests/conformance_dsl/context.rs @@ -1,24 +1,27 @@ use crate::conformance_dsl::*; -use ion_rs::{Element, ElementReader, Sequence, Reader, IonSlice}; +use ion_rs::{Catalog, Element, ElementReader, Sequence, Reader, IonSlice, SharedSymbolTable, Symbol, SymbolId}; use ion_rs::{v1_0, v1_1}; /// A Context forms a scope for tracking all of the document fragments and any parent Contexts that /// also need to be considered. Through this context the ability to generate the full test /// document input, and evaluate any forced encodings or Ion versions, is provided. -#[derive(Clone, Copy, Debug)] +#[derive(Clone)] pub(crate) struct Context<'a> { version: IonVersion, encoding: IonEncoding, fragments: &'a Vec, parent_ctx: Option<&'a Context<'a>>, + symbol_tables: Vec, // To build Catalogs when needed, Catalog doesn't + // require Clone. } impl<'a> Context<'a> { /// Creates a new Context with the provided version, encoding and fragments. A parent context /// is not set. - pub fn new(version: IonVersion, encoding: IonEncoding, fragments: &'a Vec) -> Self { - Self { version, encoding, fragments, parent_ctx: None} + pub fn new(version: IonVersion, encoding: IonEncoding, fragments: &'a Vec) -> Result { + let symbol_tables = build_ion_tests_symtables()?; + Ok(Self { version, encoding, fragments, parent_ctx: None, symbol_tables }) } /// Creates a new Context with the provided fragments, based on the supplied `parent`. The @@ -29,6 +32,7 @@ impl<'a> Context<'a> { encoding: parent.encoding, parent_ctx: Some(parent), fragments, + symbol_tables: parent.symbol_tables.clone(), } } @@ -101,6 +105,26 @@ impl<'a> Context<'a> { } } + /// Build all of the symbol tables loaded from ion-tests/catalog into a usuable Catalog. + pub fn build_catalog(&self) -> impl Catalog { + let mut catalog = MapCatalog::new(); + + for sym_table in self.symbol_tables.iter() { + catalog.insert_table(sym_table.clone()); + } + + catalog + } + + /// Returns the symbol at the provided offset `offset` within the symbol table named `symtab`, + /// if either the symbol table, or the offset, is not valid then None is returned. + pub fn get_symbol_from_table>(&self, symtab: S, offset: SymbolId) -> Option { + self.symbol_tables.iter() + .filter(|st| st.name() == symtab.as_ref()) + .max_by_key(|sst| sst.version()) + .and_then(|st| st.symbols().get(offset - 1).cloned()) + } + /// Returns a Vec containing the serialized data consisting of all fragments in the path /// for this context. pub fn input(&self, child_encoding: IonEncoding) -> InnerResult<(Vec, IonEncoding)> { diff --git a/tests/conformance_dsl/continuation.rs b/tests/conformance_dsl/continuation.rs index 221e61ec..ad9bfd76 100644 --- a/tests/conformance_dsl/continuation.rs +++ b/tests/conformance_dsl/continuation.rs @@ -4,7 +4,7 @@ use super::*; use super::context::Context; -use super::model::ModelValue; +use super::model::{compare_values, ModelValue}; use ion_rs::{Element, Sequence}; @@ -209,15 +209,19 @@ pub(crate) struct EachBranch { fragment: Fragment, } +/// Represents a Produces clause, used for defining the expected ion values, using ion literals. #[derive(Clone, Debug, Default)] pub(crate) struct Produces { pub elems: Vec, } impl Produces { + /// Creates a reader using the provided context, and compares the read values from the input + /// document with the elements specified in the associated Produces clause for equality. pub fn evaluate(&self, ctx: &Context) -> InnerResult<()> { + use ion_rs::{Decoder, AnyEncoding}; let (input, _encoding) = ctx.input(ctx.encoding())?; - let mut reader = ion_rs::Reader::new(ion_rs::AnyEncoding, input)?; + let mut reader = ion_rs::Reader::new(AnyEncoding.with_catalog(ctx.build_catalog()), input)?; let mut is_equal = true; let mut elem_iter = self.elems.iter(); @@ -226,8 +230,9 @@ impl Produces { let (actual_value, expected_elem) = (reader.next()?, elem_iter.next()); match (actual_value, expected_elem) { (None, None) => break, - (Some(actual_value), Some(expected_elem)) => - is_equal &= super::fragment::ProxyElement(expected_elem) == actual_value, + (Some(actual_value), Some(expected_elem)) => { + is_equal &= super::fragment::ProxyElement(expected_elem, ctx) == actual_value + } _ => is_equal = false, } } @@ -240,6 +245,7 @@ impl Produces { } } +/// Represents a Denotes clause, used for defining the expected ion values using the Denotes Value Model. #[derive(Clone, Debug, Default)] pub(crate) struct Denotes { pub model: Vec, @@ -247,17 +253,19 @@ pub(crate) struct Denotes { impl Denotes { pub fn evaluate(&self, ctx: &Context) -> InnerResult<()> { + use ion_rs::{Decoder, AnyEncoding}; let (input, _encoding) = ctx.input(ctx.encoding())?; - let mut reader = ion_rs::Reader::new(ion_rs::AnyEncoding, input)?; + let mut reader = ion_rs::Reader::new(AnyEncoding.with_catalog(ctx.build_catalog()), input)?; let mut elem_iter = self.model.iter(); let mut is_equal = true; while is_equal { let (read_value, expected_element) = (reader.next()?, elem_iter.next()); - match (read_value, expected_element) { - (Some(actual), Some(expected)) => is_equal = is_equal && (expected == actual), + is_equal = match (read_value, expected_element) { + (Some(actual), Some(expected)) => + is_equal && compare_values(ctx, expected, &actual)?, (None, None) => break, - _ => is_equal = false, + _ => false, } } diff --git a/tests/conformance_dsl/document.rs b/tests/conformance_dsl/document.rs index 258ae57c..cdeaa794 100644 --- a/tests/conformance_dsl/document.rs +++ b/tests/conformance_dsl/document.rs @@ -39,7 +39,7 @@ pub(crate) struct Document { impl Document { /// Execute the test by evaluating the document's continuation. pub fn run(&self) -> Result<()> { - let ctx = Context::new(IonVersion::Unspecified, self.encoding(), &self.fragments); + let ctx = Context::new(IonVersion::Unspecified, self.encoding(), &self.fragments)?; self.continuation.evaluate(&ctx)?; Ok(()) } diff --git a/tests/conformance_dsl/fragment.rs b/tests/conformance_dsl/fragment.rs index 66fd7f6c..f788da45 100644 --- a/tests/conformance_dsl/fragment.rs +++ b/tests/conformance_dsl/fragment.rs @@ -1,3 +1,26 @@ +//! Fragments within the DSL are responsible for piecing together the ion document used during the +//! test. Each fragment contributes in some way to the full ion stream that is tested, usually +//! providing different mechanisms for representing ion data, but also by providing shortcuts to +//! adding common data to be tested such as encoding directives and macros. +//! +//! In order to produce the input document each fragment is encoded in the order that they appear +//! in the test document. In the case of `Text`, the data is concatenated to an ion text stream +//! verbatim. The `Toplevel` fragment requires a little more work in order to support the +//! missing/absent symbol notation. Since all of the Toplevel fragments are provided as ion +//! literals within the test document, any symbols that is unknown in the context of the test +//! document, or may otherwise differ in value, needs to be encoded using a notation that +//! communicates the Symbol ID and an optional Symbol Table, to the DSL parser. When the values are +//! read by the ion reader they're treated as normal quoted symbols. When the Toplevel fragment is +//! encoded however, each Element read from the Toplevel clause is wrapped within a `ProxyElement`, +//! which by implementing `WriteAsIon` is able to recursively look at all values being written to +//! the test's input and capture any symbols that match the unknown symbol notation. +//! +//! The `ProxyElement` type is also used for testing equality between LazyValues and Elements. The +//! type is used in order to hold onto a Context for accessing the input document's symbol tables, +//! so that symbols from the Produces continuation can be evaluated for unknown symbol notation as +//! well. + + use ion_rs::{Element, Sequence, SExp, Struct, Symbol}; use ion_rs::{v1_0, v1_1, WriteConfig, Encoding, ion_seq}; use ion_rs::{RawSymbolRef, StructWriter, SequenceWriter, Writer, WriteAsIon, ValueWriter}; @@ -8,7 +31,7 @@ use super::context::Context; /// Shared functionality for Fragments. trait FragmentImpl { /// Encode the current fragment into ion given the provided `WriteConfig` - fn encode(&self, config: impl Into>) -> InnerResult>; + fn encode(&self, ctx: &Context, config: impl Into>) -> InnerResult>; } @@ -45,9 +68,9 @@ impl Fragment { } /// Internal. Writes the fragment as binary ion using the provided WriteConfig. - fn write_as_binary(&self, _ctx: &Context, config: impl Into>) -> InnerResult> { + fn write_as_binary(&self, ctx: &Context, config: impl Into>) -> InnerResult> { match self { - Fragment::TopLevel(toplevel) => toplevel.encode(config), + Fragment::TopLevel(toplevel) => toplevel.encode(ctx, config), Fragment::Binary(bin) => Ok(bin.clone()), Fragment::Text(_) => unreachable!(), Fragment::Ivm(maj, min) => Ok([0xE0, *maj as u8, *min as u8, 0xEA].to_vec()), @@ -55,9 +78,9 @@ impl Fragment { } /// Internal. Writes the fragment as text ion using the provided WriteConfig. - fn write_as_text(&self, _ctx: &Context, config: impl Into>) -> InnerResult> { + fn write_as_text(&self, ctx: &Context, config: impl Into>) -> InnerResult> { match self { - Fragment::TopLevel(toplevel) => toplevel.encode(config), + Fragment::TopLevel(toplevel) => toplevel.encode(ctx, config), Fragment::Text(txt) => { let bytes = txt.as_bytes(); Ok(bytes.to_owned()) @@ -143,8 +166,7 @@ impl TryFrom for Fragment { // newtype to handle writing an Element, after we check to make sure it's not a symbol that has our // special absent symbol sauce. -#[derive(Debug)] -pub(crate) struct ProxyElement<'a>(pub &'a Element); +pub(crate) struct ProxyElement<'a>(pub &'a Element, pub &'a Context<'a>); impl<'a> ProxyElement<'a> { @@ -155,8 +177,8 @@ impl<'a> ProxyElement<'a> { for (name, value) in val.fields() { match parse_absent_symbol(name.text().unwrap_or("")) { - (None, None) => { strukt.write(name, ProxyElement(value))?; } - (_, Some(id)) => { strukt.write(RawSymbolRef::from(id), ProxyElement(value))?; }, + (None, None) => { strukt.write(name, ProxyElement(value, self.1))?; } + (_, Some(id)) => { strukt.write(RawSymbolRef::from(id), ProxyElement(value, self.1))?; }, _ => unreachable!(), } } @@ -170,7 +192,13 @@ impl<'a> ProxyElement<'a> { let symbol = self.0.as_symbol().unwrap(); match parse_absent_symbol(symbol.text().unwrap_or("")) { (None, None) => annot_writer.write(symbol), - (_, Some(id)) => annot_writer.write(RawSymbolRef::from(id)), + (None, Some(id)) => annot_writer.write(RawSymbolRef::from(id)), + (Some(symtab), Some(id)) => { + match self.1.get_symbol_from_table(symtab, id) { + Some(symbol) => annot_writer.write(RawSymbolRef::from(symbol.text().unwrap_or(""))), + None => annot_writer.write(RawSymbolRef::from(0)), // TODO: error. + } + } _ => unreachable!(), } } else { @@ -181,24 +209,13 @@ impl<'a> ProxyElement<'a> { } impl PartialEq> for ProxyElement<'_> { + // TODO: Move this out of PartialEq - there are a lot of potential errors comparing these two + // types that might be better bubbling up. fn eq(&self, other: &ion_rs::LazyValue<'_, T>) -> bool { - use ion_rs::{LazyRawFieldName, LazyRawValue, v1_0::RawValueRef, ValueRef}; + use ion_rs::{LazyRawFieldName, ValueRef}; + use super::compare_symbols_eq; match self.0.ion_type() { - IonType::Symbol => match parse_absent_symbol(self.0.as_symbol().unwrap().text().unwrap_or("")) { - (None, None) => *self.0 == Element::try_from(*other).expect("unable to convert lazyvalue into element"), - (_, Some(id)) => { - let Some(raw_symbol) = other.raw().map(|r| r.read()) else { - unreachable!() - }; - let raw_symbol = raw_symbol.expect("error reading symbol"); - - let RawValueRef::Symbol(raw_symbol) = raw_symbol else { - return false; - }; - raw_symbol.matches_sid_or_text(id, "") - } - _ => unreachable!(), - } + IonType::Symbol => compare_symbols_eq(self.1, other, self.0).unwrap_or(false), IonType::Struct => { let ValueRef::Struct(actual_struct) = other.read().expect("error reading input value") else { return false; @@ -220,12 +237,23 @@ impl PartialEq> for ProxyElement<'_ is_equal &= match parse_absent_symbol(expected_field.text().unwrap_or("")) { (None, None) => *self.0 == Element::try_from(*other).expect("unable to convert LazyValue into Element"), - (_, Some(id)) => actual_field.matches_sid_or_text(id, ""), - _ => unreachable!(), + (None, Some(id)) => actual_field.matches_sid_or_text(id, ""), + (Some(symtab), Some(id)) => { + let symbol_table = other.symbol_table(); + match self.1.get_symbol_from_table(symtab, id) { + None => actual_field.matches_sid_or_text(0, ""), + Some(shared_symbol) => { + let shared_symbol_txt = shared_symbol.text().unwrap_or(""); + let shared_id = symbol_table.sid_for(&shared_symbol_txt).unwrap_or(0); + actual_field.matches_sid_or_text(shared_id, shared_symbol_txt) + } + } + } + _ => unreachable!(), // Cannot have symtab without id. }; let actual_value = actual.value(); - is_equal &= ProxyElement(expected_field_elem) == actual_value; + is_equal &= ProxyElement(expected_field_elem, self.1) == actual_value; } (None, None) => break, _ => is_equal = false, @@ -248,7 +276,7 @@ impl PartialEq> for ProxyElement<'_ false } else { for (actual, expected) in actual_list.iter().zip(expected_list.iter()) { - if ProxyElement(expected) != *actual { + if ProxyElement(expected, self.1) != *actual { return false } } @@ -276,7 +304,7 @@ impl<'a> WriteAsIon for ProxyElement<'a> { let annot_writer = writer.with_annotations(annotations)?; let mut list_writer = annot_writer.list_writer()?; for elem in self.0.as_sequence().unwrap() { - list_writer.write(ProxyElement(elem))?; + list_writer.write(ProxyElement(elem, self.1))?; } list_writer.close()?; Ok(()) @@ -286,7 +314,7 @@ impl<'a> WriteAsIon for ProxyElement<'a> { let annot_writer = writer.with_annotations(annotations)?; let mut sexp_writer = annot_writer.sexp_writer()?; for elem in self.0.as_sequence().unwrap() { - sexp_writer.write(ProxyElement(elem))?; + sexp_writer.write(ProxyElement(elem, self.1))?; } sexp_writer.close()?; Ok(()) @@ -304,12 +332,12 @@ pub(crate) struct TopLevel { impl FragmentImpl for TopLevel { /// Encodes the provided ion literals into an ion stream, using the provided WriteConfig. - fn encode(&self, config: impl Into>) -> InnerResult> { + fn encode(&self, ctx: &Context, config: impl Into>) -> InnerResult> { let mut buffer = Vec::with_capacity(1024); let mut writer = Writer::new(config, buffer)?; for elem in self.elems.as_slice() { - writer.write(ProxyElement(elem))?; + writer.write(ProxyElement(elem, ctx))?; } buffer = writer.close()?; Ok(buffer) diff --git a/tests/conformance_dsl/mod.rs b/tests/conformance_dsl/mod.rs index c0bb77ef..ad50f07d 100644 --- a/tests/conformance_dsl/mod.rs +++ b/tests/conformance_dsl/mod.rs @@ -10,11 +10,12 @@ mod clause; use std::io::Read; use std::path::{Path, PathBuf}; -use ion_rs::{Element, IonError, IonStream, IonType}; +use ion_rs::{Element, IonError, IonStream, IonType, MapCatalog, SharedSymbolTable}; use clause::*; use document::*; use fragment::*; +use context::*; #[allow(unused)] pub(crate) mod prelude { @@ -198,7 +199,6 @@ pub(crate) fn parse_document_like(clause: &Clause) -> InnerResu /// A collection of Tests, usually stored together in a file. -#[derive(Debug)] pub(crate) struct TestCollection { documents: Vec, } @@ -263,6 +263,32 @@ impl TestCollection { } +/// Walks over all of the ion files contained in the ion-tests/catalog directory and extracts the +/// symbol tables from each. A Vec of the resulting SharedSymbolTables is returned. +pub(crate) fn build_ion_tests_symtables() -> Result> { + use std::{env, fs, ffi::OsStr}; + + let mut catalog_dir = env::current_dir()?; + catalog_dir.push("ion-tests/catalog"); + + let mut symbol_tables = vec!(); + + for entry in fs::read_dir(catalog_dir)? { + let entry = entry?; + let path = entry.path(); + + if Some(OsStr::new("ion")) == path.extension() { + let contents = fs::File::open(path)?; + for element in Element::iter(contents)? { + let element = element?; + symbol_tables.push(element.try_into()?); + } + } + } + + Ok(symbol_tables) +} + /// Parses a 'bytes*' expression. A bytes expression can be either an integer (0..255) or a string /// containing hexadecimal digits (whitespace allowed). The `elems` provided should be all of the /// arguments to be included in the bytes* expression. @@ -329,3 +355,47 @@ pub(crate) fn parse_absent_symbol>(txt: T) -> (Option, Opt (None, None) } } + +/// Given a LazyValue and an Element, this function will compare the two as symbols handling +/// parsing of the DSL's symbol notation and resolution. +pub(crate) fn compare_symbols_eq(ctx: &Context, actual: &ion_rs::LazyValue<'_, D>, expected: &Element) -> InnerResult { + use ion_rs::ValueRef; + + if expected.ion_type() != IonType::Symbol || actual.ion_type() != IonType::Symbol { + return Ok(false); + } + + let expected_symbol = expected.as_symbol().unwrap(); // SAFETY: Tested above. + let ValueRef::Symbol(actual_symbol_ref) = actual.read()? else { // SAFETY: Tested above. + return Ok(false); + }; + + let (expected_symtab, expected_offset) = parse_absent_symbol(expected_symbol.text().unwrap_or("")); + let (actual_symtab, actual_offset) = parse_absent_symbol(actual_symbol_ref.text().unwrap_or("")); + + let symbol_table = actual.symbol_table(); + + // Extract the symbol text for the expected value. + let expected_symbol_text= match (expected_symtab, expected_offset) { + (None, None) => expected_symbol.text().map(|t| t.to_owned()), + (None, Some(id)) => symbol_table.text_for(id).map(|t| t.to_owned()), + (Some(symtab), Some(id)) => match ctx.get_symbol_from_table(symtab, id) { + None => None, + Some(shared_symbol) => shared_symbol.text().map(|t| t.to_owned()), + } + _ => unreachable!(), // Cannot have a symtab without an id. + }; + + // Extract the symbol text for the actual value. + let actual_symbol_text = match (actual_symtab, actual_offset) { + (None, None) => actual_symbol_ref.text().map(|t| t.to_owned()), + (None, Some(id)) => symbol_table.text_for(id).map(|t| t.to_owned()), + (Some(symtab), Some(id)) => match ctx.get_symbol_from_table(symtab, id) { + None => None, + Some(shared_symbol) => shared_symbol.text().map(|t| t.to_owned()), + } + _ => unreachable!(), // CAnnot have a symtab without an id. + }; + + Ok(expected_symbol_text == actual_symbol_text) +} diff --git a/tests/conformance_dsl/model.rs b/tests/conformance_dsl/model.rs index c0294b29..5aa31a01 100644 --- a/tests/conformance_dsl/model.rs +++ b/tests/conformance_dsl/model.rs @@ -1,7 +1,7 @@ use ion_rs::{Decimal, Element, IonType, Sequence, Timestamp, ValueRef}; use ion_rs::{LazyRawValue, LazyRawFieldName, v1_0::RawValueRef}; use ion_rs::decimal::coefficient::Coefficient; -use super::{Clause, ClauseType, ConformanceErrorKind, InnerResult, parse_text_exp, parse_bytes_exp}; +use super::{Clause, ClauseType, Context, ConformanceErrorKind, InnerResult, parse_text_exp, parse_bytes_exp}; /// Represents a symbol in the Data Model representation of ion data. @@ -132,8 +132,8 @@ impl TryFrom<&Sequence> for ModelValue { Ok(ModelValue::Symbol(SymbolToken::Text(text))) }, ClauseType::Absent => { - let symtab= clause.body.get(1).and_then(|v| v.as_string()).ok_or(ConformanceErrorKind::ExpectedSymbolType)?; - let offset = clause.body.get(2).and_then(|v| v.as_i64()).ok_or(ConformanceErrorKind::ExpectedSymbolType)?; + let symtab= clause.body.first().and_then(|v| v.as_string()).ok_or(ConformanceErrorKind::ExpectedSymbolType)?; + let offset = clause.body.get(1).and_then(|v| v.as_i64()).ok_or(ConformanceErrorKind::ExpectedSymbolType)?; Ok(ModelValue::Symbol(SymbolToken::Absent(symtab.to_string(), offset))) } _ => unreachable!(), @@ -205,52 +205,12 @@ impl PartialEq for ModelValue { ModelValue::Float(val) => other.as_float() == Some(*val), ModelValue::Decimal(dec) => other.as_decimal() == Some(*dec), ModelValue::String(val) => other.as_string() == Some(val), - ModelValue::List(vals) => { - if other.ion_type() != IonType::List { - return false; - } - let other_seq = other.as_sequence().unwrap(); // SAFETY: Confirmed it's a list. - if other_seq.len() != vals.len() { - return false; - } - - for (ours, others) in vals.iter().zip(other_seq) { - if ours != others { - return false; - } - } - true - } - ModelValue::Sexp(vals) => { - if other.ion_type() != IonType::SExp { - return false; - } - let other_seq = other.as_sequence().unwrap(); // SAFETY: Confirmed it's a list. - if other_seq.len() != vals.len() { - return false; - } - - for (ours, others) in vals.iter().zip(other_seq) { - if ours != others { - return false; - } - } - true - } - ModelValue::Struct(_) => panic!("should not be using element eq for struct"), ModelValue::Blob(data) => other.as_blob() == Some(data.as_slice()), ModelValue::Clob(data) => other.as_clob() == Some(data.as_slice()), - ModelValue::Symbol(sym) => { - if let Some(other_sym) = other.as_symbol() { - match sym { - SymbolToken::Text(text) => Some(text.as_ref()) == other_sym.text(), - _ => unreachable!(), - } - } else { - false - } - } ModelValue::Timestamp(ts) => other.as_timestamp() == Some(*ts), + _ => unreachable!(), // SAFETY: EQ of Symbols, Lists, Structs, and SExps are handled + // via comparison to LazyValues after moving to using a Reader instead of Element + // API. These should join them but haven't yet. } } } @@ -261,94 +221,120 @@ impl PartialEq for &ModelValue { } } -impl PartialEq> for &ModelValue { - fn eq(&self, other: &ion_rs::LazyValue<'_, T>) -> bool { - match self { - ModelValue::Symbol(symbol_token) if other.ion_type() == IonType::Symbol => { - let Some(raw_symbol) = other.raw().map(|r| r.read()) else { - unreachable!() - }; - let raw_symbol = raw_symbol.expect("error reading symbol"); - - let RawValueRef::Symbol(raw_symbol) = raw_symbol else { - return false - }; - - let ValueRef::Symbol(symbol_text) = other.read().expect("error resolving symbol") else { - return false; - }; - - let (expected_txt, expected_id) = match symbol_token { - SymbolToken::Text(txt) => return symbol_text == txt, - SymbolToken::Offset(id) => (&String::from(""), *id as usize), - SymbolToken::Absent(txt, id) => (txt, *id as usize), - }; - - raw_symbol.matches_sid_or_text(expected_id, expected_txt) - } - ModelValue::Struct(expected_fields) if other.ion_type() == IonType::Struct => { - let ValueRef::Struct(strukt) = other.read().expect("error reading struct") else { - return false; - }; - - let mut is_equal = true; - let mut strukt_iter = strukt.iter(); - let mut expected_iter = expected_fields.iter(); - - while is_equal { - let actual = strukt_iter.next(); - let expected = expected_iter.next(); - - match (actual, expected) { - (Some(actual), Some((expected_field, expected_val))) => { - let actual = actual.expect("unable to read struct field"); - let actual_field = actual.raw_name().map(|n| n.read()).expect("unable to get symbolref for field name"); - let actual_field = actual_field.expect("unable to read symbolref for field name"); - - let (expected_txt, expected_id) = match expected_field { - SymbolToken::Text(txt) => (txt, usize::MAX), - SymbolToken::Offset(id) => (&String::from(""), *id as usize), - SymbolToken::Absent(txt, id) => (txt, *id as usize), - }; - is_equal = is_equal && actual_field.matches_sid_or_text(expected_id, expected_txt); - - let actual_value = actual.value(); - - is_equal = is_equal && (expected_val == actual_value); - } - (None, None) => break, - _ => is_equal = false, +/// Compares a ModelValue to a LazyValue for evaluating Denotes clauses. This is used in place of +/// PartialEq in order to communicate errors. +pub(crate) fn compare_values(ctx: &Context, model: &ModelValue, other: &ion_rs::LazyValue<'_, T>) -> InnerResult { + match model { + ModelValue::Symbol(symbol_token) if other.ion_type() == IonType::Symbol => { + // SAFETY: Tested other in the guard above, should not hit the else clause. + let Some(raw_symbol) = other.raw().map(|r| r.read()) else { + return Ok(false) + }; + + let raw_symbol = raw_symbol?; + + let RawValueRef::Symbol(raw_symbol) = raw_symbol else { + return Ok(false) + }; + + let ValueRef::Symbol(symbol_text) = other.read().expect("error resolving symbol") else { + return Ok(false); + }; + + let (expected_txt, expected_id) = match symbol_token { + SymbolToken::Text(txt) => return Ok(symbol_text == txt), + SymbolToken::Offset(id) => (String::from(""), *id as usize), + SymbolToken::Absent(symtab, id) => match ctx.get_symbol_from_table(symtab, *id as usize) { + None => (String::from(""), 0_usize), + Some(shared_symbol) => { + let shared_text = shared_symbol.text().unwrap_or(""); + (shared_text.to_string(), other.symbol_table().sid_for(&shared_text).unwrap_or(0)) } } - is_equal - } - ModelValue::List(expected) if other.ion_type() == IonType::List => { - let ValueRef::List(list) = other.read().expect("error reading list") else { - unreachable!(); - }; + }; + + Ok(raw_symbol.matches_sid_or_text(expected_id, &expected_txt)) + } + ModelValue::Struct(expected_fields) if other.ion_type() == IonType::Struct => { + // SAFETY: Tested other in the guard above, should not hit the else clause. + let ValueRef::Struct(strukt) = other.read().expect("error reading struct") else { + return Ok(false); + }; + + let mut is_equal = true; + let mut strukt_iter = strukt.iter(); + let mut expected_iter = expected_fields.iter(); + + while is_equal { + let actual = strukt_iter.next(); + let expected = expected_iter.next(); + + match (actual, expected) { + (Some(actual), Some((expected_field, expected_val))) => { + let actual = actual.expect("unable to read struct field"); + let actual_field = actual.raw_name().map(|n| n.read()).expect("unable to get symbolref for field name"); + let actual_field = actual_field.expect("unable to read symbolref for field name"); + + let (expected_txt, expected_id) = match expected_field { + SymbolToken::Text(txt) => (txt.clone(), usize::MAX), + SymbolToken::Offset(id) => (String::from(""), *id as usize), + SymbolToken::Absent(symtab, id) => match ctx.get_symbol_from_table(symtab, *id as usize) { + None => (String::from(""), 0_usize), + Some(shared_symbol) => { + let shared_text = shared_symbol.text().unwrap_or(""); + (shared_text.to_string(), other.symbol_table().sid_for(&shared_text).unwrap_or(0)) + } + } + }; + is_equal = is_equal && actual_field.matches_sid_or_text(expected_id, &expected_txt); - let actual: ion_rs::IonResult>> = list.iter().collect(); - let actual = actual.expect("Error parsing list"); + let actual_value = actual.value(); - actual.iter().zip(expected.iter()) - .fold(true, |acc, (actual_val, expected_val)| acc && (&expected_val == actual_val)) + is_equal = is_equal && compare_values(ctx, expected_val, &actual_value)?; + } + (None, None) => break, + _ => is_equal = false, + } } - ModelValue::Sexp(expected) if other.ion_type() == IonType::SExp => { - let ValueRef::SExp(sexp) = other.read().expect("error reading sexp") else { - unreachable!(); - }; - - let actual: ion_rs::IonResult>> = sexp.iter().collect(); - let actual = actual.expect("Error parsing sexp"); - actual.iter().zip(expected.iter()) - .fold(true, |acc, (actual_val, expected_val)| acc && (&expected_val == actual_val)) + Ok(is_equal) + } + ModelValue::List(expected) if other.ion_type() == IonType::List => { + // SAFETY: Tested other in the guard above, should not hit the else clause. + let ValueRef::List(list) = other.read().expect("error reading list") else { + return Ok(false); + }; + + let actual: ion_rs::IonResult>> = list.iter().collect(); + let actual = actual.expect("Error parsing list"); + + + for (actual_val, expected_val) in actual.iter().zip(expected.iter()) { + if !compare_values(ctx, expected_val, actual_val)? { + return Ok(false) + } } - _ => { - // Anything that reaches down here isn't a symbol, or can't contain a symbol. So - // we just have to worry about equality. - let other_elem: Element = Element::try_from(*other).expect("Unable to convert value into element"); - *self == other_elem + Ok(true) + } + ModelValue::Sexp(expected) if other.ion_type() == IonType::SExp => { + // SAFETY: Tested other in the guard above, should not hit the else clause. + let ValueRef::SExp(sexp) = other.read().expect("error reading sexp") else { + return Ok(false); + }; + + let actual: ion_rs::IonResult>> = sexp.iter().collect(); + let actual = actual?; + for (actual_val, expected_val) in actual.iter().zip(expected.iter()) { + if !compare_values(ctx, expected_val, actual_val)? { + return Ok(false) + } } + Ok(true) + } + _ => { + // Anything that reaches down here isn't a symbol, or can't contain a symbol. So + // we just have to worry about equality. + let other_elem: Element = Element::try_from(*other)?; + Ok(model == other_elem) } } } diff --git a/tests/conformance_tests.rs b/tests/conformance_tests.rs index a57b66f1..fc45173b 100644 --- a/tests/conformance_tests.rs +++ b/tests/conformance_tests.rs @@ -14,17 +14,30 @@ mod implementation { #[test] fn test_absent_symbol() { - let test = r#" - (ion_1_1 + let tests: &[&str] = &[ + r#"(ion_1_1 (toplevel '#$2' {'#$9': '#$8'}) (text "") (denotes (Symbol 2) (Struct (9 (Symbol 8)))) - ) - "#; - Document::from_str(test) - .unwrap_or_else(|e| panic!("Failed to load document:\n{:?}", e)) - .run() - .unwrap_or_else(|e| panic!("Test failed: {:?}", e)); + )"#, + r#"(ion_1_0 + (text '''$ion_symbol_table::{imports:[{name:"abcs", version: 2}]}''') + (text "$10 $11") + (produces '#$abcs#1' '#$abcs#2') + )"#, + r#"(ion_1_0 + (text '''$ion_symbol_table::{imports:[{name:"abcs", version: 2}]}''') + (text "$10 $11") + (denotes (Symbol (absent "abcs" 1)) (Symbol (absent "abcs" 2))) + )"#, + ]; + + for test in tests { + Document::from_str(test) + .unwrap_or_else(|e| panic!("Failed to load document:\n{:?}", e)) + .run() + .unwrap_or_else(|e| panic!("Test failed: {:?}", e)); + } } #[test] @@ -37,12 +50,13 @@ mod implementation { r#"(ion_1_1 "Timestamp Second" (text "2023-03-23T10:12:21Z") (denotes (Timestamp second 2023 3 23 (offset 0) 10 12 21))) "#, r#"(ion_1_1 "Timestamp Fractional" (text "2023-03-23T10:12:21.23Z") (denotes (Timestamp fraction 2023 3 23 (offset 0) 10 12 21 23 -2))) "#, ]; + for test in tests { Document::from_str(test) .unwrap_or_else(|e| panic!("Failed to load document: <<{}>>\n{:?}", test, e)) .run() .unwrap_or_else(|e| panic!("Test failed for simple doc: <<{}>>\n{:?}", test, e)); - } + } } #[test] @@ -103,7 +117,6 @@ mod ion_tests { println!("Testing: {}", file_name); let collection = TestCollection::load(file_name).expect("unable to load test file"); - println!("Collection: {:?}", collection); collection.run().expect("failed to run collection"); } }