From 9009481264e213759d99fe546a39e0416f22d8ee Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Tue, 3 Jun 2025 15:37:05 -0400 Subject: [PATCH 1/8] initial slash --- src/datetime.rs | 7 --- src/de.rs | 30 +----------- src/de/raw.rs | 4 +- src/document.rs | 17 ++----- src/lib.rs | 3 -- src/raw/document.rs | 11 +++++ src/raw/document_buf.rs | 17 ++++--- src/serde_helpers.rs | 48 +++++++++++++++++++- src/tests/modules/serializer_deserializer.rs | 7 ++- src/tests/spec/corpus.rs | 22 +++++---- 10 files changed, 89 insertions(+), 77 deletions(-) diff --git a/src/datetime.rs b/src/datetime.rs index eca1522b..b1394c32 100644 --- a/src/datetime.rs +++ b/src/datetime.rs @@ -386,13 +386,6 @@ impl crate::DateTime { } } - #[deprecated(since = "2.3.0", note = "Use try_to_rfc3339_string instead.")] - /// Convert this [`DateTime`] to an RFC 3339 formatted string. Panics if it could not be - /// represented in that format. - pub fn to_rfc3339_string(self) -> String { - self.try_to_rfc3339_string().unwrap() - } - /// Convert this [`DateTime`] to an RFC 3339 formatted string. pub fn try_to_rfc3339_string(self) -> Result { self.to_time_0_3() diff --git a/src/de.rs b/src/de.rs index 09ea1f0d..c1e1312a 100644 --- a/src/de.rs +++ b/src/de.rs @@ -137,40 +137,12 @@ where from_slice(bytes.as_slice()) } -/// Deserialize an instance of type `T` from an I/O stream of BSON, replacing any invalid UTF-8 -/// sequences with the Unicode replacement character. -/// -/// This is mainly useful when reading raw BSON returned from a MongoDB server, which -/// in rare cases can contain invalidly truncated strings (). -/// For most use cases, [`crate::from_reader`] can be used instead. -pub fn from_reader_utf8_lossy(reader: R) -> Result -where - T: DeserializeOwned, - R: Read, -{ - let bytes = reader_to_vec(reader)?; - from_slice_utf8_lossy(bytes.as_slice()) -} - /// Deserialize an instance of type `T` from a slice of BSON bytes. pub fn from_slice<'de, T>(bytes: &'de [u8]) -> Result where T: Deserialize<'de>, { - from_raw(raw::Deserializer::new(bytes, false)?) -} - -/// Deserialize an instance of type `T` from a slice of BSON bytes, replacing any invalid UTF-8 -/// sequences with the Unicode replacement character. -/// -/// This is mainly useful when reading raw BSON returned from a MongoDB server, which -/// in rare cases can contain invalidly truncated strings (). -/// For most use cases, [`crate::from_slice`] can be used instead. -pub fn from_slice_utf8_lossy<'de, T>(bytes: &'de [u8]) -> Result -where - T: Deserialize<'de>, -{ - from_raw(raw::Deserializer::new(bytes, true)?) + from_raw(raw::Deserializer::new(bytes)?) } pub(crate) fn from_raw<'de, T: Deserialize<'de>>( diff --git a/src/de/raw.rs b/src/de/raw.rs index 6ec3e437..d0a45a49 100644 --- a/src/de/raw.rs +++ b/src/de/raw.rs @@ -50,11 +50,11 @@ struct DeserializerOptions { } impl<'de> Deserializer<'de> { - pub(crate) fn new(buf: &'de [u8], utf8_lossy: bool) -> Result { + pub(crate) fn new(buf: &'de [u8]) -> Result { Ok(Self { element: RawElement::toplevel(buf)?, options: DeserializerOptions { - utf8_lossy, + utf8_lossy: false, human_readable: false, }, }) diff --git a/src/document.rs b/src/document.rs index 30e49283..eed8213c 100644 --- a/src/document.rs +++ b/src/document.rs @@ -695,9 +695,9 @@ impl Document { Ok(()) } - fn decode(reader: &mut R, utf_lossy: bool) -> crate::de::Result { + fn decode(reader: &mut R) -> crate::de::Result { let buf = crate::de::reader_to_vec(reader)?; - crate::de::from_raw(crate::de::RawDeserializer::new(&buf, utf_lossy)?) + crate::de::from_raw(crate::de::RawDeserializer::new(&buf)?) } /// Attempts to deserialize a [`Document`] from a byte stream. @@ -729,18 +729,7 @@ impl Document { /// # } /// ``` pub fn from_reader(mut reader: R) -> crate::de::Result { - Self::decode(&mut reader, false) - } - - /// Attempt to deserialize a [`Document`] that may contain invalid UTF-8 strings from a byte - /// stream. - /// - /// This is mainly useful when reading raw BSON returned from a MongoDB server, which - /// in rare cases can contain invalidly truncated strings (). - /// For most use cases, `Document::from_reader` can be used instead. - #[deprecated = "use bson::serde_helpers::Utf8LossyDeserialization"] - pub fn from_reader_utf8_lossy(mut reader: R) -> crate::de::Result { - Self::decode(&mut reader, true) + Self::decode(&mut reader) } } diff --git a/src/lib.rs b/src/lib.rs index d06f4601..3780be14 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -308,9 +308,6 @@ pub use self::{ uuid::{Uuid, UuidRepresentation}, }; -#[allow(deprecated)] -pub use self::de::{from_reader_utf8_lossy, from_slice_utf8_lossy}; - #[macro_use] mod macros; mod base64; diff --git a/src/raw/document.rs b/src/raw/document.rs index 468a3da0..772cd315 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -508,6 +508,17 @@ impl RawDocument { let bytes = self.cstring_bytes_at(start_at)?; try_to_str(bytes) } + + /// Copy this into a [`Document`], returning an error if invalid BSON is encountered. + pub fn to_document(&self) -> Result { + self.as_ref().try_into() + } + + /// Copy this into a [`Document`], returning an error if invalid BSON is encountered. Any + /// invalid UTF-8 sequences will be replaced with the Unicode replacement character. + pub fn to_document_utf8_lossy(&self) -> Result { + todo!() + } } impl<'de: 'a, 'a> Deserialize<'de> for &'a RawDocument { diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs index 07b0fdae..e89ef704 100644 --- a/src/raw/document_buf.rs +++ b/src/raw/document_buf.rs @@ -1,6 +1,6 @@ use std::{ borrow::{Borrow, Cow}, - convert::{TryFrom, TryInto}, + convert::TryFrom, iter::FromIterator, ops::Deref, }; @@ -65,7 +65,7 @@ pub struct RawDocumentBuf { impl RawDocumentBuf { /// Creates a new, empty [`RawDocumentBuf`]. - pub fn new() -> RawDocumentBuf { + pub fn new() -> Self { let mut data = Vec::new(); data.extend(MIN_BSON_DOCUMENT_SIZE.to_le_bytes()); data.push(0); @@ -89,11 +89,16 @@ impl RawDocumentBuf { /// let doc = RawDocumentBuf::from_bytes(b"\x05\0\0\0\0".to_vec())?; /// # Ok::<(), bson::error::Error>(()) /// ``` - pub fn from_bytes(data: Vec) -> Result { + pub fn from_bytes(data: Vec) -> Result { let _ = RawDocument::from_bytes(data.as_slice())?; Ok(Self { data }) } + pub fn from_reader(reader: R) -> Result { + let buf = crate::de::reader_to_vec(reader)?; + Self::from_bytes(buf) + } + /// Create a [`RawDocumentBuf`] from a [`Document`]. /// /// ``` @@ -213,12 +218,6 @@ impl RawDocumentBuf { .expect("key should not contain interior null byte") }) } - - /// Convert this [`RawDocumentBuf`] to a [`Document`], returning an error - /// if invalid BSON is encountered. - pub fn to_document(&self) -> Result { - self.as_ref().try_into() - } } impl Default for RawDocumentBuf { diff --git a/src/serde_helpers.rs b/src/serde_helpers.rs index a8877bdf..6cd1bb25 100644 --- a/src/serde_helpers.rs +++ b/src/serde_helpers.rs @@ -817,7 +817,8 @@ where /// /// This wrapper type has no impact on serialization. Serializing a `Utf8LossyDeserialization` /// will call the `serialize` method for the wrapped `T`. -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)] +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)] +#[repr(transparent)] pub struct Utf8LossyDeserialization(pub T); pub(crate) const UTF8_LOSSY_NEWTYPE: &str = "$__bson_private_utf8_lossy"; @@ -852,3 +853,48 @@ impl<'de, T: Deserialize<'de>> Deserialize<'de> for Utf8LossyDeserialization deserializer.deserialize_newtype_struct(UTF8_LOSSY_NEWTYPE, V(PhantomData)) } } + +impl std::fmt::Display for Utf8LossyDeserialization { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl From for Utf8LossyDeserialization { + fn from(value: T) -> Self { + Self(value) + } +} + +impl Deref for Utf8LossyDeserialization { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Utf8LossyDeserialization { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl AsRef for Utf8LossyDeserialization +where + R: ?Sized, + as Deref>::Target: AsRef, +{ + fn as_ref(&self) -> &R { + self.deref().as_ref() + } +} + +impl AsMut for Utf8LossyDeserialization +where + as Deref>::Target: AsMut, +{ + fn as_mut(&mut self) -> &mut R { + self.deref_mut().as_mut() + } +} diff --git a/src/tests/modules/serializer_deserializer.rs b/src/tests/modules/serializer_deserializer.rs index 1083e985..8161a451 100644 --- a/src/tests/modules/serializer_deserializer.rs +++ b/src/tests/modules/serializer_deserializer.rs @@ -18,6 +18,7 @@ use crate::{ Decimal128, Document, JavaScriptCodeWithScope, + RawDocumentBuf, Regex, Timestamp, }; @@ -73,8 +74,10 @@ fn test_encode_decode_utf8_string_invalid() { doc.to_writer(&mut buf).unwrap(); let expected = doc! { "key": "��" }; - #[allow(deprecated)] - let decoded = Document::from_reader_utf8_lossy(&mut Cursor::new(buf)).unwrap(); + let decoded = RawDocumentBuf::from_reader(&mut Cursor::new(buf)) + .unwrap() + .to_document_utf8_lossy() + .unwrap(); assert_eq!(decoded, expected); } diff --git a/src/tests/spec/corpus.rs b/src/tests/spec/corpus.rs index 93bc3220..1ef1893e 100644 --- a/src/tests/spec/corpus.rs +++ b/src/tests/spec/corpus.rs @@ -182,7 +182,7 @@ fn run_test(test: TestFile) { if !description.contains("$regex query operator") { // deserialize the field from raw Bytes into a RawBson let deserializer_raw = - crate::de::RawDeserializer::new(canonical_bson.as_slice(), false).unwrap(); + crate::de::RawDeserializer::new(canonical_bson.as_slice()).unwrap(); let raw_bson_field = deserializer_raw .deserialize_any(FieldVisitor(test_key.as_str(), PhantomData::)) .expect(&description); @@ -194,7 +194,7 @@ fn run_test(test: TestFile) { // deserialize the field from raw Bytes into an OwnedRawBson let deserializer_raw = - crate::de::RawDeserializer::new(canonical_bson.as_slice(), false).unwrap(); + crate::de::RawDeserializer::new(canonical_bson.as_slice()).unwrap(); let owned_raw_bson_field = deserializer_raw .deserialize_any(FieldVisitor(test_key.as_str(), PhantomData::)) .expect(&description); @@ -203,7 +203,7 @@ fn run_test(test: TestFile) { // deserialize the field from raw Bytes into a Bson let deserializer_value = - crate::de::RawDeserializer::new(canonical_bson.as_slice(), false).unwrap(); + crate::de::RawDeserializer::new(canonical_bson.as_slice()).unwrap(); let bson_field = deserializer_value .deserialize_any(FieldVisitor(test_key.as_str(), PhantomData::)) .expect(&description); @@ -555,13 +555,15 @@ fn run_test(test: TestFile) { crate::from_reader::<_, Document>(bson.as_slice()).expect_err(description.as_str()); if decode_error.description.contains("invalid UTF-8") { - #[allow(deprecated)] - crate::from_reader_utf8_lossy::<_, Document>(bson.as_slice()).unwrap_or_else(|err| { - panic!( - "{}: utf8_lossy should not fail (failed with {:?})", - description, err - ) - }); + RawDocumentBuf::from_reader(bson.as_slice()) + .expect(&description) + .to_document_utf8_lossy() + .unwrap_or_else(|err| { + panic!( + "{}: utf8_lossy should not fail (failed with {:?})", + description, err + ) + }); crate::from_slice::>(bson.as_slice()) .expect(&description); } From 90913c2066dfb172f58ed16f9fe400fd160de73d Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Wed, 4 Jun 2025 10:38:40 -0400 Subject: [PATCH 2/8] to_document_utf8_lossy --- src/raw/document.rs | 16 ++++++++++++++-- src/raw/iter.rs | 19 +++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/raw/document.rs b/src/raw/document.rs index 772cd315..8bc4603f 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -10,6 +10,7 @@ use crate::{ error::{Error, Result}, raw::{serde::OwnedOrBorrowedRawDocument, RAW_DOCUMENT_NEWTYPE}, DateTime, + RawBson, Timestamp, }; @@ -511,13 +512,24 @@ impl RawDocument { /// Copy this into a [`Document`], returning an error if invalid BSON is encountered. pub fn to_document(&self) -> Result { - self.as_ref().try_into() + self.try_into() } /// Copy this into a [`Document`], returning an error if invalid BSON is encountered. Any /// invalid UTF-8 sequences will be replaced with the Unicode replacement character. pub fn to_document_utf8_lossy(&self) -> Result { - todo!() + self.iter_elements() + .map(|res| { + res.and_then(|e| { + let key = e.key().to_owned(); + let raw_value: RawBson = match e.value_utf8_lossy()? { + Some(l) => l.into(), + None => e.value()?.to_raw_bson(), + }; + Ok((key, raw_value.try_into()?)) + }) + }) + .collect() } } diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 9a4150dc..d794de50 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -452,3 +452,22 @@ pub(crate) struct Utf8LossyJavaScriptCodeWithScope<'a> { pub(crate) code: String, pub(crate) scope: &'a RawDocument, } + +impl<'a> From> for RawBson { + fn from(value: Utf8LossyBson<'a>) -> Self { + match value { + Utf8LossyBson::String(s) => RawBson::String(s), + Utf8LossyBson::JavaScriptCode(s) => RawBson::JavaScriptCode(s), + Utf8LossyBson::JavaScriptCodeWithScope(Utf8LossyJavaScriptCodeWithScope { + code, + scope, + }) => RawBson::JavaScriptCodeWithScope(super::RawJavaScriptCodeWithScope { + code, + scope: scope.to_raw_document_buf(), + }), + Utf8LossyBson::Symbol(s) => RawBson::Symbol(s), + Utf8LossyBson::DbPointer(p) => RawBson::DbPointer(p), + Utf8LossyBson::RegularExpression(r) => RawBson::RegularExpression(r), + } + } +} From 36a564f18b72d255ab19c2d6da746dd511831ed1 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Wed, 4 Jun 2025 11:12:47 -0400 Subject: [PATCH 3/8] public iterator method --- src/de/raw.rs | 4 ++-- src/raw/document.rs | 10 +--------- src/raw/iter.rs | 9 ++++++++- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/de/raw.rs b/src/de/raw.rs index d0a45a49..4f809df1 100644 --- a/src/de/raw.rs +++ b/src/de/raw.rs @@ -71,7 +71,7 @@ impl<'de> Deserializer<'de> { V: serde::de::Visitor<'de>, { if self.options.utf8_lossy { - if let Some(lossy) = self.element.value_utf8_lossy()? { + if let Some(lossy) = self.element.value_utf8_lossy_inner()? { return match lossy { Utf8LossyBson::String(s) => visitor.visit_string(s), Utf8LossyBson::RegularExpression(re) => { @@ -178,7 +178,7 @@ impl<'de> Deserializer<'de> { fn get_string(&self) -> Result> { if self.options.utf8_lossy { - let value = self.element.value_utf8_lossy()?; + let value = self.element.value_utf8_lossy_inner()?; let s = match value { Some(Utf8LossyBson::String(s)) => s, _ => { diff --git a/src/raw/document.rs b/src/raw/document.rs index 8bc4603f..982a271a 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -10,7 +10,6 @@ use crate::{ error::{Error, Result}, raw::{serde::OwnedOrBorrowedRawDocument, RAW_DOCUMENT_NEWTYPE}, DateTime, - RawBson, Timestamp, }; @@ -520,14 +519,7 @@ impl RawDocument { pub fn to_document_utf8_lossy(&self) -> Result { self.iter_elements() .map(|res| { - res.and_then(|e| { - let key = e.key().to_owned(); - let raw_value: RawBson = match e.value_utf8_lossy()? { - Some(l) => l.into(), - None => e.value()?.to_raw_bson(), - }; - Ok((key, raw_value.try_into()?)) - }) + res.and_then(|e| Ok((e.key().to_owned(), e.value_utf8_lossy()?.try_into()?))) }) .collect() } diff --git a/src/raw/iter.rs b/src/raw/iter.rs index d794de50..62fdc8e6 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -265,7 +265,14 @@ impl<'a> RawElement<'a> { }) } - pub(crate) fn value_utf8_lossy(&self) -> Result>> { + pub fn value_utf8_lossy(&self) -> Result { + match self.value_utf8_lossy_inner()? { + Some(v) => Ok(v.into()), + None => Ok(self.value()?.to_raw_bson()), + } + } + + pub(crate) fn value_utf8_lossy_inner(&self) -> Result>> { Ok(Some(match self.kind { ElementType::String => Utf8LossyBson::String(self.read_utf8_lossy()), ElementType::JavaScriptCode => Utf8LossyBson::JavaScriptCode(self.read_utf8_lossy()), From c2083d4c2b5dce4b4209f8898086ecf95c3666ae Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Wed, 4 Jun 2025 12:03:36 -0400 Subject: [PATCH 4/8] recursion --- src/raw/array.rs | 5 +++ src/raw/iter.rs | 39 +++++++++++++++++++- src/tests/modules/serializer_deserializer.rs | 4 +- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index 4f2f47ba..196528a9 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -212,6 +212,11 @@ impl RawArray { pub fn is_empty(&self) -> bool { self.doc.is_empty() } + + /// Returns an iterator over the `RawElement`s in the array. + pub fn iter_elements(&self) -> RawIter { + RawIter::new(&self.doc) + } } impl std::fmt::Debug for RawArray { diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 62fdc8e6..f938ea90 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -9,9 +9,11 @@ use crate::{ DateTime, Decimal128, RawArray, + RawArrayBuf, RawBinaryRef, RawBson, RawDbPointerRef, + RawDocumentBuf, RawJavaScriptCodeWithScopeRef, RawRegexRef, Timestamp, @@ -267,8 +269,41 @@ impl<'a> RawElement<'a> { pub fn value_utf8_lossy(&self) -> Result { match self.value_utf8_lossy_inner()? { - Some(v) => Ok(v.into()), - None => Ok(self.value()?.to_raw_bson()), + Some(v) => Ok(match v { + Utf8LossyBson::JavaScriptCodeWithScope(Utf8LossyJavaScriptCodeWithScope { + code, + scope, + }) => { + let mut tmp = RawDocumentBuf::new(); + for elem in scope.iter_elements() { + let elem = elem?; + tmp.append(elem.key(), elem.value_utf8_lossy()?); + } + RawBson::JavaScriptCodeWithScope(super::RawJavaScriptCodeWithScope { + code, + scope: tmp, + }) + } + v => v.into(), + }), + None => Ok(match self.value()? { + RawBsonRef::Array(arr) => { + let mut tmp = RawArrayBuf::new(); + for elem in arr.iter_elements() { + tmp.push(elem?.value_utf8_lossy()?); + } + RawBson::Array(tmp) + } + RawBsonRef::Document(doc) => { + let mut tmp = RawDocumentBuf::new(); + for elem in doc.iter_elements() { + let elem = elem?; + tmp.append(elem.key(), elem.value_utf8_lossy()?); + } + RawBson::Document(tmp) + } + v => v.to_raw_bson(), + }), } } diff --git a/src/tests/modules/serializer_deserializer.rs b/src/tests/modules/serializer_deserializer.rs index 8161a451..63182cde 100644 --- a/src/tests/modules/serializer_deserializer.rs +++ b/src/tests/modules/serializer_deserializer.rs @@ -68,12 +68,12 @@ fn test_encode_decode_utf8_string_invalid() { let bytes = b"\x80\xae".to_vec(); let src = unsafe { String::from_utf8_unchecked(bytes) }; - let doc = doc! { "key": src }; + let doc = doc! { "key": &src, "subdoc": { "subkey": &src } }; let mut buf = Vec::new(); doc.to_writer(&mut buf).unwrap(); - let expected = doc! { "key": "��" }; + let expected = doc! { "key": "��", "subdoc": { "subkey": "��" } }; let decoded = RawDocumentBuf::from_reader(&mut Cursor::new(buf)) .unwrap() .to_document_utf8_lossy() From a70c452e82dc164ff22183ca45aec0376ddbe593 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Wed, 4 Jun 2025 13:07:18 -0400 Subject: [PATCH 5/8] shift recursion to doc conversion --- src/raw/document.rs | 48 ++++++++++++++++++++++++++++++++++++++++----- src/raw/iter.rs | 39 ++---------------------------------- 2 files changed, 45 insertions(+), 42 deletions(-) diff --git a/src/raw/document.rs b/src/raw/document.rs index 982a271a..b9005700 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -9,7 +9,11 @@ use crate::{ de::MIN_BSON_DOCUMENT_SIZE, error::{Error, Result}, raw::{serde::OwnedOrBorrowedRawDocument, RAW_DOCUMENT_NEWTYPE}, + Bson, DateTime, + JavaScriptCodeWithScope, + RawBson, + RawJavaScriptCodeWithScope, Timestamp, }; @@ -517,11 +521,45 @@ impl RawDocument { /// Copy this into a [`Document`], returning an error if invalid BSON is encountered. Any /// invalid UTF-8 sequences will be replaced with the Unicode replacement character. pub fn to_document_utf8_lossy(&self) -> Result { - self.iter_elements() - .map(|res| { - res.and_then(|e| Ok((e.key().to_owned(), e.value_utf8_lossy()?.try_into()?))) - }) - .collect() + let mut out = Document::new(); + for elem in self.iter_elements() { + let elem = elem?; + let value = deep_utf8_lossy(elem.value_utf8_lossy()?)?; + out.insert(elem.key(), value); + } + Ok(out) + } +} + +fn deep_utf8_lossy(src: RawBson) -> Result { + match src { + RawBson::Array(arr) => { + let mut tmp = vec![]; + for elem in arr.iter_elements() { + tmp.push(deep_utf8_lossy(elem?.value_utf8_lossy()?)?); + } + Ok(Bson::Array(tmp)) + } + RawBson::Document(doc) => { + let mut tmp = doc! {}; + for elem in doc.iter_elements() { + let elem = elem?; + tmp.insert(elem.key(), deep_utf8_lossy(elem.value_utf8_lossy()?)?); + } + Ok(Bson::Document(tmp)) + } + RawBson::JavaScriptCodeWithScope(RawJavaScriptCodeWithScope { code, scope }) => { + let mut tmp = doc! {}; + for elem in scope.iter_elements() { + let elem = elem?; + tmp.insert(elem.key(), deep_utf8_lossy(elem.value_utf8_lossy()?)?); + } + Ok(Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope { + code, + scope: tmp, + })) + } + v => v.try_into(), } } diff --git a/src/raw/iter.rs b/src/raw/iter.rs index f938ea90..62fdc8e6 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -9,11 +9,9 @@ use crate::{ DateTime, Decimal128, RawArray, - RawArrayBuf, RawBinaryRef, RawBson, RawDbPointerRef, - RawDocumentBuf, RawJavaScriptCodeWithScopeRef, RawRegexRef, Timestamp, @@ -269,41 +267,8 @@ impl<'a> RawElement<'a> { pub fn value_utf8_lossy(&self) -> Result { match self.value_utf8_lossy_inner()? { - Some(v) => Ok(match v { - Utf8LossyBson::JavaScriptCodeWithScope(Utf8LossyJavaScriptCodeWithScope { - code, - scope, - }) => { - let mut tmp = RawDocumentBuf::new(); - for elem in scope.iter_elements() { - let elem = elem?; - tmp.append(elem.key(), elem.value_utf8_lossy()?); - } - RawBson::JavaScriptCodeWithScope(super::RawJavaScriptCodeWithScope { - code, - scope: tmp, - }) - } - v => v.into(), - }), - None => Ok(match self.value()? { - RawBsonRef::Array(arr) => { - let mut tmp = RawArrayBuf::new(); - for elem in arr.iter_elements() { - tmp.push(elem?.value_utf8_lossy()?); - } - RawBson::Array(tmp) - } - RawBsonRef::Document(doc) => { - let mut tmp = RawDocumentBuf::new(); - for elem in doc.iter_elements() { - let elem = elem?; - tmp.append(elem.key(), elem.value_utf8_lossy()?); - } - RawBson::Document(tmp) - } - v => v.to_raw_bson(), - }), + Some(v) => Ok(v.into()), + None => Ok(self.value()?.to_raw_bson()), } } From affe749b1b8db8f23a00dc32dd32916d1cfd467a Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 6 Jun 2025 10:12:15 -0400 Subject: [PATCH 6/8] update for error changes --- src/raw/document.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/raw/document.rs b/src/raw/document.rs index b9005700..e62eb62e 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -514,13 +514,13 @@ impl RawDocument { } /// Copy this into a [`Document`], returning an error if invalid BSON is encountered. - pub fn to_document(&self) -> Result { + pub fn to_document(&self) -> RawResult { self.try_into() } /// Copy this into a [`Document`], returning an error if invalid BSON is encountered. Any /// invalid UTF-8 sequences will be replaced with the Unicode replacement character. - pub fn to_document_utf8_lossy(&self) -> Result { + pub fn to_document_utf8_lossy(&self) -> RawResult { let mut out = Document::new(); for elem in self.iter_elements() { let elem = elem?; @@ -531,7 +531,7 @@ impl RawDocument { } } -fn deep_utf8_lossy(src: RawBson) -> Result { +fn deep_utf8_lossy(src: RawBson) -> RawResult { match src { RawBson::Array(arr) => { let mut tmp = vec![]; From 64afd63e311d63147c8a36fe9a69e1b4c964d714 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Fri, 6 Jun 2025 10:26:21 -0400 Subject: [PATCH 7/8] re-update comment --- src/raw/array.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index 196528a9..a55daa03 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -213,7 +213,16 @@ impl RawArray { self.doc.is_empty() } - /// Returns an iterator over the `RawElement`s in the array. + /// Gets an iterator over the elements in the [`RawArray`], + /// which yields `Result>` values. These hold a + /// reference to the underlying array but do not explicitly + /// resolve the values. + /// + /// This iterator, which underpins the implementation of the + /// default iterator, produces `RawElement` objects that hold a + /// view onto the array but do not parse out or construct + /// values until the `.value()` or `.try_into()` methods are + /// called. pub fn iter_elements(&self) -> RawIter { RawIter::new(&self.doc) } From 8679191f8159ce98cbc2505049840b896bd7ce06 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Mon, 9 Jun 2025 12:14:54 -0400 Subject: [PATCH 8/8] error updates --- src/error.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/error.rs b/src/error.rs index 58f3e000..d1497760 100644 --- a/src/error.rs +++ b/src/error.rs @@ -50,6 +50,11 @@ pub enum ErrorKind { /// The kind of error that occurred. kind: ValueAccessErrorKind, }, + + /// A wrapped deserialization error. + /// TODO RUST-1406: collapse this + #[error("Deserialization error")] + DeError(crate::de::Error), } impl From for Error { @@ -62,6 +67,16 @@ impl From for Error { } } +impl From for Error { + fn from(value: crate::de::Error) -> Self { + Self { + kind: ErrorKind::DeError(value), + key: None, + index: None, + } + } +} + /// The types of errors that can occur when attempting to access a value in a document. #[derive(Debug, Error)] #[non_exhaustive]