From 06bcd4ffadfed98330837f5ca6f9a92e4c1198ec Mon Sep 17 00:00:00 2001 From: tycho garen Date: Sun, 31 Dec 2023 15:57:16 -0500 Subject: [PATCH 01/17] raw: lazy iterator base implementation --- src/raw/error.rs | 8 ++ src/raw/iter.rs | 21 +--- src/raw/lazy.rs | 321 +++++++++++++++++++++++++++++++++++++++++++++++ src/raw/mod.rs | 18 ++- 4 files changed, 340 insertions(+), 28 deletions(-) create mode 100644 src/raw/lazy.rs diff --git a/src/raw/error.rs b/src/raw/error.rs index 556b7fa0..b29fe280 100644 --- a/src/raw/error.rs +++ b/src/raw/error.rs @@ -48,6 +48,14 @@ pub enum ErrorKind { Utf8EncodingError(Utf8Error), } +impl ErrorKind { + pub(crate) fn new_malformed(e: impl ToString) -> Self { + ErrorKind::MalformedValue { + message: e.to_string(), + } + } +} + impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let p = self diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 7588090a..97ecf328 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -5,26 +5,13 @@ use crate::{ oid::ObjectId, raw::{Error, ErrorKind, Result}, spec::{BinarySubtype, ElementType}, - DateTime, - Decimal128, - Timestamp, + DateTime, Decimal128, Timestamp, }; use super::{ - bson_ref::RawDbPointerRef, - checked_add, - error::try_with_key, - f64_from_slice, - i32_from_slice, - i64_from_slice, - read_lenencoded, - read_nullterminated, - RawArray, - RawBinaryRef, - RawBsonRef, - RawDocument, - RawJavaScriptCodeWithScopeRef, - RawRegexRef, + bson_ref::RawDbPointerRef, checked_add, error::try_with_key, f64_from_slice, i32_from_slice, + i64_from_slice, read_lenencoded, read_nullterminated, RawArray, RawBinaryRef, RawBsonRef, + RawDocument, RawJavaScriptCodeWithScopeRef, RawRegexRef, }; /// An iterator over the document's entries. diff --git a/src/raw/lazy.rs b/src/raw/lazy.rs new file mode 100644 index 00000000..f1529be4 --- /dev/null +++ b/src/raw/lazy.rs @@ -0,0 +1,321 @@ +use std::convert::TryInto; + +use crate::{ + de::{read_bool, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE}, + oid::ObjectId, + raw::{Error, ErrorKind, Result}, + spec::{BinarySubtype, ElementType}, + DateTime, Decimal128, RawArray, RawBinaryRef, RawDbPointerRef, RawJavaScriptCodeWithScopeRef, + RawRegexRef, Timestamp, +}; + +use super::{ + checked_add, error::try_with_key, f64_from_slice, i32_from_slice, i64_from_slice, + read_len_and_end, read_lenencoded, read_nullterminated, RawBsonRef, RawDocument, +}; + +/// An iterator over the document's entries. +pub struct Iter<'a> { + doc: &'a RawDocument, + offset: usize, + + /// Whether the underlying doc is assumed to be valid or if an error has been encountered. + /// After an error, all subsequent iterations will return None. + valid: bool, +} + +impl<'a> Iter<'a> { + pub(crate) fn new(doc: &'a RawDocument) -> Self { + Self { + doc, + offset: 4, + valid: true, + } + } + + fn verify_enough_bytes(&self, start: usize, num_bytes: usize) -> Result<()> { + let end = checked_add(start, num_bytes)?; + if self.doc.as_bytes().get(start..end).is_none() { + return Err(Error::new_without_key(ErrorKind::new_malformed(format!( + "length exceeds remaining length of buffer: {} vs {}", + num_bytes, + self.doc.as_bytes().len() - start + )))); + } + Ok(()) + } + + fn next_document_len(&self, starting_at: usize) -> Result { + self.verify_enough_bytes(starting_at, MIN_BSON_DOCUMENT_SIZE as usize)?; + let size = i32_from_slice(&self.doc.as_bytes()[starting_at..])? as usize; + + if size < MIN_BSON_DOCUMENT_SIZE as usize { + return Err(Error::new_without_key(ErrorKind::new_malformed(format!( + "document too small: {} bytes", + size + )))); + } + + self.verify_enough_bytes(starting_at, size)?; + + if self.doc.as_bytes()[starting_at + size - 1] != 0 { + return Err(Error::new_without_key(ErrorKind::new_malformed( + "not null terminated", + ))); + } + Ok(size) + } +} + +#[derive(Clone)] +pub struct RawLazyElement<'a> { + pub key: &'a str, + pub kind: ElementType, + doc: &'a RawDocument, + start_at: usize, + end_at: usize, +} + +impl<'a> RawLazyElement<'a> { + pub fn resolve(self) -> Result> { + Ok(match self.kind { + ElementType::Null => RawBsonRef::Null, + ElementType::Undefined => RawBsonRef::Undefined, + ElementType::MinKey => RawBsonRef::MinKey, + ElementType::MaxKey => RawBsonRef::MaxKey, + ElementType::ObjectId => RawBsonRef::ObjectId(ObjectId::from_bytes( + self.doc.as_bytes()[self.start_at..self.end_at] + .try_into() + .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, + )), + ElementType::EmbeddedDocument => RawBsonRef::Document(RawDocument::from_bytes( + &self.doc.as_bytes()[self.start_at..self.end_at], + )?), + ElementType::Array => RawBsonRef::Array(RawArray::from_doc(RawDocument::from_bytes( + &self.doc.as_bytes()[self.start_at..self.end_at], + )?)), + ElementType::Int32 => { + RawBsonRef::Int32(i32_from_slice(&self.doc.as_bytes()[self.start_at..])?) + } + ElementType::Int64 => { + RawBsonRef::Int64(i64_from_slice(&self.doc.as_bytes()[self.start_at..])?) + } + ElementType::Double => { + RawBsonRef::Double(f64_from_slice(&self.doc.as_bytes()[self.start_at..])?) + } + ElementType::String => { + RawBsonRef::String(read_lenencoded(&self.doc.as_bytes()[self.start_at..])?) + } + ElementType::Boolean => RawBsonRef::Boolean( + read_bool(&self.doc.as_bytes()[self.start_at..]) + .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, + ), + ElementType::DateTime => RawBsonRef::DateTime(DateTime::from_millis(i64_from_slice( + &self.doc.as_bytes()[self.start_at..], + )?)), + ElementType::Decimal128 => RawBsonRef::Decimal128(Decimal128::from_bytes( + self.doc.as_bytes()[self.start_at..self.end_at] + .try_into() + .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, + )), + ElementType::JavaScriptCode => { + RawBsonRef::JavaScriptCode(read_lenencoded(&self.doc.as_bytes()[self.start_at..])?) + } + ElementType::Symbol => { + RawBsonRef::Symbol(read_lenencoded(&self.doc.as_bytes()[self.start_at..])?) + } + ElementType::DbPointer => RawBsonRef::DbPointer(RawDbPointerRef { + namespace: read_lenencoded(&self.doc.as_bytes()[self.start_at..])?, + id: ObjectId::from_bytes( + self.doc.as_bytes()[self.start_at..self.end_at] + .try_into() + .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, + ), + }), + ElementType::RegularExpression => { + let pattern = read_nullterminated(&self.doc.as_bytes()[self.start_at..])?; + let options = read_nullterminated( + &self.doc.as_bytes()[(self.start_at + pattern.len() + 1)..], + )?; + RawBsonRef::RegularExpression(RawRegexRef { pattern, options }) + } + ElementType::Timestamp => RawBsonRef::Timestamp( + Timestamp::from_reader(&self.doc.as_bytes()[self.start_at..]) + .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, + ), + ElementType::Binary => { + let len = i32_from_slice(&self.doc.as_bytes()[self.start_at..])? as usize; + let data_start = self.start_at + 4 + 1; + + if len >= i32::MAX as usize { + return Err(Error::new_with_key( + self.key, + ErrorKind::new_malformed(format!("binary length exceeds maximum: {}", len)), + )); + } + + let subtype = BinarySubtype::from(self.doc.as_bytes()[self.start_at + 4]); + let data = match subtype { + BinarySubtype::BinaryOld => { + if len < 4 { + return Err(Error::new_with_key( + self.key, + ErrorKind::new_malformed( + "old binary subtype has no inner declared length", + ), + )); + } + let oldlength = + i32_from_slice(&self.doc.as_bytes()[data_start..])? as usize; + if checked_add(oldlength, 4)? != len { + return Err(Error::new_with_key( + self.key, + ErrorKind::new_malformed( + "old binary subtype has wrong inner declared length", + ), + )); + } + &self.doc.as_bytes()[(data_start + 4)..(data_start + len)] + } + _ => &self.doc.as_bytes()[data_start..(data_start + len)], + }; + RawBsonRef::Binary(RawBinaryRef { + subtype, + bytes: data, + }) + } + ElementType::JavaScriptCodeWithScope => { + let slice = &&self.doc.as_bytes()[self.start_at..self.end_at]; + let code = read_lenencoded(&slice[4..])?; + let scope_start = 4 + 4 + code.len() + 1; + let scope = RawDocument::from_bytes(&slice[scope_start..])?; + + RawBsonRef::JavaScriptCodeWithScope(RawJavaScriptCodeWithScopeRef { code, scope }) + } + }) + } +} + +impl<'a> Iter<'a> { + pub(crate) fn into_eager(self) -> impl Iterator)>> { + self.map(|outer| { + outer.and_then(|val| -> Result<(&'a str, RawBsonRef<'a>)> { + Ok((val.key, val.resolve()?)) + }) + }) + } +} + +impl<'a> Iterator for Iter<'a> { + type Item = Result>; + + fn next(&mut self) -> Option>> { + if !self.valid { + return None; + } else if self.offset == self.doc.as_bytes().len() - 1 { + if self.doc.as_bytes()[self.offset] == 0 { + // end of document marker + return None; + } else { + self.valid = false; + return Some(Err(Error::new_without_key(ErrorKind::new_malformed( + "document not null terminated", + )))); + } + } else if self.offset >= self.doc.as_bytes().len() { + self.valid = false; + return Some(Err(Error::new_without_key(ErrorKind::new_malformed( + "iteration overflowed document", + )))); + } + + let key = match read_nullterminated(&self.doc.as_bytes()[self.offset + 1..]) { + Ok(k) => k, + Err(e) => { + self.valid = false; + return Some(Err(e)); + } + }; + + let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 + let kvp_result = try_with_key(key, || { + let element_type = match ElementType::from(self.doc.as_bytes()[self.offset]) { + Some(et) => et, + None => { + return Err(Error::new_with_key( + key, + ErrorKind::new_malformed(format!( + "invalid tag: {}", + self.doc.as_bytes()[self.offset] + )), + )) + } + }; + + let element_size = match element_type { + ElementType::Int32 => 4, + ElementType::Int64 => 8, + ElementType::Double => 8, + ElementType::String => read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0, + ElementType::EmbeddedDocument => self.next_document_len(valueoffset)?, + ElementType::Array => self.next_document_len(valueoffset)?, + ElementType::Binary => read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0, + ElementType::ObjectId => 12, + ElementType::Boolean => 1, + ElementType::DateTime => 8, + ElementType::RegularExpression => { + let pattern = read_nullterminated(&self.doc.as_bytes()[valueoffset..])?; + let options = read_nullterminated( + &self.doc.as_bytes()[(valueoffset + pattern.len() + 1)..], + )?; + pattern.len() + 1 + options.len() + 1 + } + ElementType::Null => 0, + ElementType::Undefined => 0, + ElementType::Timestamp => 8, + ElementType::JavaScriptCode => { + read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0 + } + ElementType::JavaScriptCodeWithScope => { + let length = read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0; + + if length < MIN_CODE_WITH_SCOPE_SIZE as usize { + return Err(Error::new_without_key(ErrorKind::new_malformed( + "code with scope length too small", + ))); + } + + length + } + ElementType::DbPointer => { + let length = read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0; + length + 1 + 12 + } + ElementType::Symbol => read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0, + ElementType::Decimal128 => 16, + ElementType::MinKey => 0, + ElementType::MaxKey => 0, + }; + + self.offset = valueoffset + element_size; + self.verify_enough_bytes(valueoffset, element_size)?; + + Ok((element_type, element_size)) + }); + + if kvp_result.is_err() { + self.valid = false; + } + + Some(match kvp_result { + Ok((kind, size)) => Ok(RawLazyElement { + key, + kind, + doc: self.doc, + start_at: valueoffset, + end_at: valueoffset + size, + }), + Err(e) => Err(e), + }) + } +} diff --git a/src/raw/mod.rs b/src/raw/mod.rs index a96f6d13..a3dbb2c7 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -120,6 +120,7 @@ mod document; mod document_buf; mod error; mod iter; +pub mod lazy; pub(crate) mod serde; #[cfg(test)] mod test; @@ -133,11 +134,7 @@ pub use self::{ array_buf::RawArrayBuf, bson::{RawBson, RawJavaScriptCodeWithScope}, bson_ref::{ - RawBinaryRef, - RawBsonRef, - RawDbPointerRef, - RawJavaScriptCodeWithScopeRef, - RawRegexRef, + RawBinaryRef, RawBsonRef, RawDbPointerRef, RawJavaScriptCodeWithScopeRef, RawRegexRef, }, document::RawDocument, document_buf::RawDocumentBuf, @@ -212,7 +209,7 @@ fn read_nullterminated(buf: &[u8]) -> Result<&str> { } } -fn read_lenencoded(buf: &[u8]) -> Result<&str> { +fn read_len_and_end(buf: &[u8]) -> Result<(usize, usize)> { if buf.len() < 4 { return Err(Error::new_without_key(ErrorKind::MalformedValue { message: format!( @@ -243,12 +240,11 @@ fn read_lenencoded(buf: &[u8]) -> Result<&str> { ), })); } + Ok((length as usize + 4, end)) +} - if buf[end - 1] != 0 { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "expected string to be null-terminated".to_string(), - })); - } +fn read_lenencoded(buf: &[u8]) -> Result<&str> { + let end = read_len_and_end(buf)?.1; // exclude null byte try_to_str(&buf[4..(end - 1)]) From 7f5ca0c32a8e21bf519af446bf47970872e37afc Mon Sep 17 00:00:00 2001 From: tycho garen Date: Sun, 31 Dec 2023 17:55:12 -0500 Subject: [PATCH 02/17] all but one test --- src/raw/array.rs | 17 +- src/raw/document.rs | 18 +- src/raw/document_buf.rs | 25 +-- src/raw/iter.rs | 405 +++++++++++++++++++++------------------- src/raw/lazy.rs | 321 ------------------------------- src/raw/mod.rs | 1 - src/raw/test/mod.rs | 35 ++-- 7 files changed, 240 insertions(+), 582 deletions(-) delete mode 100644 src/raw/lazy.rs diff --git a/src/raw/array.rs b/src/raw/array.rs index a77002ec..a31c18b1 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -5,21 +5,10 @@ use serde::{ser::SerializeSeq, Deserialize, Serialize}; use super::{ error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, serde::OwnedOrBorrowedRawArray, - Error, - Iter, - RawBinaryRef, - RawBsonRef, - RawDocument, - RawRegexRef, - Result, + Error, RawBinaryRef, RawBsonRef, RawDocument, RawRegexRef, Result, }; use crate::{ - oid::ObjectId, - raw::RAW_ARRAY_NEWTYPE, - spec::ElementType, - Bson, - DateTime, - RawArrayBuf, + oid::ObjectId, raw::RAW_ARRAY_NEWTYPE, spec::ElementType, Bson, DateTime, RawArrayBuf, Timestamp, }; @@ -267,7 +256,7 @@ impl<'a> IntoIterator for &'a RawArray { /// An iterator over borrowed raw BSON array values. pub struct RawArrayIter<'a> { - inner: Iter<'a>, + inner: Box)>> + 'a>, } impl<'a> Iterator for RawArrayIter<'a> { diff --git a/src/raw/document.rs b/src/raw/document.rs index 57a42f10..527cdcd5 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -8,20 +8,12 @@ use serde::{ser::SerializeMap, Deserialize, Serialize}; use crate::{ de::MIN_BSON_DOCUMENT_SIZE, raw::{error::ErrorKind, serde::OwnedOrBorrowedRawDocument, RAW_DOCUMENT_NEWTYPE}, - DateTime, - Timestamp, + DateTime, Timestamp, }; use super::{ error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, - i32_from_slice, - Error, - Iter, - RawArray, - RawBinaryRef, - RawBsonRef, - RawDocumentBuf, - RawRegexRef, + i32_from_slice, Error, Iter, RawArray, RawBinaryRef, RawBsonRef, RawDocumentBuf, RawRegexRef, Result, }; use crate::{oid::ObjectId, spec::ElementType, Document}; @@ -577,10 +569,10 @@ impl TryFrom<&RawDocument> for crate::Document { } impl<'a> IntoIterator for &'a RawDocument { - type IntoIter = Iter<'a>; + type IntoIter = Box + 'a>; type Item = Result<(&'a str, RawBsonRef<'a>)>; - fn into_iter(self) -> Iter<'a> { - Iter::new(self) + fn into_iter(self) -> Box)>> + 'a> { + Box::new(Iter::new(self).into_eager()) } } diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs index 978de443..684caf9a 100644 --- a/src/raw/document_buf.rs +++ b/src/raw/document_buf.rs @@ -8,22 +8,13 @@ use std::{ use serde::{Deserialize, Serialize}; use crate::{ - de::MIN_BSON_DOCUMENT_SIZE, - spec::BinarySubtype, - Document, - RawBinaryRef, + de::MIN_BSON_DOCUMENT_SIZE, spec::BinarySubtype, Document, RawBinaryRef, RawJavaScriptCodeWithScopeRef, }; use super::{ - bson::RawBson, - serde::OwnedOrBorrowedRawDocument, - Error, - ErrorKind, - Iter, - RawBsonRef, - RawDocument, - Result, + bson::RawBson, serde::OwnedOrBorrowedRawDocument, Error, ErrorKind, Iter, RawBsonRef, + RawDocument, Result, }; /// An owned BSON document (akin to [`std::path::PathBuf`]), backed by a buffer of raw BSON bytes. @@ -146,8 +137,8 @@ impl RawDocumentBuf { /// There is no owning iterator for [`RawDocumentBuf`]. If you need ownership over /// elements that might need to allocate, you must explicitly convert /// them to owned types yourself. - pub fn iter(&self) -> Iter<'_> { - self.into_iter() + pub fn iter(&self) -> Box)>> + '_> { + Iter::new(self).into_eager() } /// Return the contained data as a `Vec` @@ -369,11 +360,11 @@ impl TryFrom<&Document> for RawDocumentBuf { } impl<'a> IntoIterator for &'a RawDocumentBuf { - type IntoIter = Iter<'a>; + type IntoIter = Box + 'a>; type Item = Result<(&'a str, RawBsonRef<'a>)>; - fn into_iter(self) -> Iter<'a> { - Iter::new(self) + fn into_iter(self) -> Box + 'a> { + Box::new(Iter::new(self).into_eager()) } } diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 97ecf328..8b917634 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -5,13 +5,13 @@ use crate::{ oid::ObjectId, raw::{Error, ErrorKind, Result}, spec::{BinarySubtype, ElementType}, - DateTime, Decimal128, Timestamp, + DateTime, Decimal128, RawArray, RawBinaryRef, RawDbPointerRef, RawJavaScriptCodeWithScopeRef, + RawRegexRef, Timestamp, }; use super::{ - bson_ref::RawDbPointerRef, checked_add, error::try_with_key, f64_from_slice, i32_from_slice, - i64_from_slice, read_lenencoded, read_nullterminated, RawArray, RawBinaryRef, RawBsonRef, - RawDocument, RawJavaScriptCodeWithScopeRef, RawRegexRef, + checked_add, error::try_with_key, f64_from_slice, i32_from_slice, i64_from_slice, + read_len_and_end, read_lenencoded, read_nullterminated, RawBsonRef, RawDocument, }; /// An iterator over the document's entries. @@ -36,56 +36,180 @@ impl<'a> Iter<'a> { fn verify_enough_bytes(&self, start: usize, num_bytes: usize) -> Result<()> { let end = checked_add(start, num_bytes)?; if self.doc.as_bytes().get(start..end).is_none() { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: format!( - "length exceeds remaining length of buffer: {} vs {}", - num_bytes, - self.doc.as_bytes().len() - start - ), - })); + return Err(Error::new_without_key(ErrorKind::new_malformed(format!( + "length exceeds remaining length of buffer: {} vs {}", + num_bytes, + self.doc.as_bytes().len() - start + )))); } Ok(()) } - fn next_oid(&self, starting_at: usize) -> Result { - self.verify_enough_bytes(starting_at, 12)?; - let oid = ObjectId::from_bytes( - self.doc.as_bytes()[starting_at..(starting_at + 12)] - .try_into() - .unwrap(), // ok because we know slice is 12 bytes long - ); - Ok(oid) - } - - fn next_document(&self, starting_at: usize) -> Result<&'a RawDocument> { + fn next_document_len(&self, starting_at: usize) -> Result { self.verify_enough_bytes(starting_at, MIN_BSON_DOCUMENT_SIZE as usize)?; let size = i32_from_slice(&self.doc.as_bytes()[starting_at..])? as usize; if size < MIN_BSON_DOCUMENT_SIZE as usize { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: format!("document too small: {} bytes", size), - })); + return Err(Error::new_without_key(ErrorKind::new_malformed(format!( + "document too small: {} bytes", + size + )))); } self.verify_enough_bytes(starting_at, size)?; - let end = starting_at + size; - if self.doc.as_bytes()[end - 1] != 0 { - return Err(Error { - key: None, - kind: ErrorKind::MalformedValue { - message: "not null terminated".into(), - }, - }); + if self.doc.as_bytes()[starting_at + size - 1] != 0 { + return Err(Error::new_without_key(ErrorKind::new_malformed( + "not null terminated", + ))); } - RawDocument::from_bytes(&self.doc.as_bytes()[starting_at..end]) + Ok(size) + } +} + +#[derive(Clone)] +pub struct RawLazyElement<'a> { + pub key: &'a str, + pub kind: ElementType, + doc: &'a RawDocument, + start_at: usize, + size: usize, +} + +impl<'a> RawLazyElement<'a> { + pub fn resolve(self) -> Result> { + Ok(match self.kind { + ElementType::Null => RawBsonRef::Null, + ElementType::Undefined => RawBsonRef::Undefined, + ElementType::MinKey => RawBsonRef::MinKey, + ElementType::MaxKey => RawBsonRef::MaxKey, + ElementType::ObjectId => RawBsonRef::ObjectId(ObjectId::from_bytes( + self.doc.as_bytes()[self.start_at..(self.start_at + self.size)] + .try_into() + .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, + )), + ElementType::EmbeddedDocument => RawBsonRef::Document(RawDocument::from_bytes( + &self.doc.as_bytes()[self.start_at..(self.start_at + self.size)], + )?), + ElementType::Array => RawBsonRef::Array(RawArray::from_doc(RawDocument::from_bytes( + &self.doc.as_bytes()[self.start_at..(self.start_at + self.size)], + )?)), + ElementType::Int32 => { + RawBsonRef::Int32(i32_from_slice(&self.doc.as_bytes()[self.start_at..])?) + } + ElementType::Int64 => { + RawBsonRef::Int64(i64_from_slice(&self.doc.as_bytes()[self.start_at..])?) + } + ElementType::Double => { + RawBsonRef::Double(f64_from_slice(&self.doc.as_bytes()[self.start_at..])?) + } + ElementType::String => { + RawBsonRef::String(read_lenencoded(&self.doc.as_bytes()[self.start_at..])?) + } + ElementType::Boolean => RawBsonRef::Boolean( + read_bool(&self.doc.as_bytes()[self.start_at..]) + .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, + ), + ElementType::DateTime => RawBsonRef::DateTime(DateTime::from_millis(i64_from_slice( + &self.doc.as_bytes()[self.start_at..], + )?)), + ElementType::Decimal128 => RawBsonRef::Decimal128(Decimal128::from_bytes( + self.doc.as_bytes()[self.start_at..(self.start_at + self.size)] + .try_into() + .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, + )), + ElementType::JavaScriptCode => { + RawBsonRef::JavaScriptCode(read_lenencoded(&self.doc.as_bytes()[self.start_at..])?) + } + ElementType::Symbol => { + RawBsonRef::Symbol(read_lenencoded(&self.doc.as_bytes()[self.start_at..])?) + } + ElementType::DbPointer => RawBsonRef::DbPointer(RawDbPointerRef { + namespace: read_lenencoded(&self.doc.as_bytes()[self.start_at..])?, + id: ObjectId::from_bytes( + self.doc.as_bytes()[self.start_at..(self.start_at + self.size)] + .try_into() + .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, + ), + }), + ElementType::RegularExpression => { + let pattern = read_nullterminated(&self.doc.as_bytes()[self.start_at..])?; + let options = read_nullterminated( + &self.doc.as_bytes()[(self.start_at + pattern.len() + 1)..], + )?; + RawBsonRef::RegularExpression(RawRegexRef { pattern, options }) + } + ElementType::Timestamp => RawBsonRef::Timestamp( + Timestamp::from_reader(&self.doc.as_bytes()[self.start_at..]) + .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, + ), + ElementType::Binary => { + let len = i32_from_slice(&self.doc.as_bytes()[self.start_at..])? as usize; + let data_start = self.start_at + 4 + 1; + + if len >= i32::MAX as usize { + return Err(Error::new_with_key( + self.key, + ErrorKind::new_malformed(format!("binary length exceeds maximum: {}", len)), + )); + } + + let subtype = BinarySubtype::from(self.doc.as_bytes()[self.start_at + 4]); + let data = match subtype { + BinarySubtype::BinaryOld => { + if len < 4 { + return Err(Error::new_with_key( + self.key, + ErrorKind::new_malformed( + "old binary subtype has no inner declared length", + ), + )); + } + let oldlength = + i32_from_slice(&self.doc.as_bytes()[data_start..])? as usize; + if checked_add(oldlength, 4)? != len { + return Err(Error::new_with_key( + self.key, + ErrorKind::new_malformed( + "old binary subtype has wrong inner declared length", + ), + )); + } + &self.doc.as_bytes()[(data_start + 4)..(data_start + len)] + } + _ => &self.doc.as_bytes()[data_start..(data_start + len)], + }; + RawBsonRef::Binary(RawBinaryRef { + subtype, + bytes: data, + }) + } + ElementType::JavaScriptCodeWithScope => { + let slice = &self.doc.as_bytes()[self.start_at..(self.start_at + self.size)]; + let code = read_lenencoded(&slice[4..])?; + let scope_start = 4 + 4 + code.len() + 1; + let scope = RawDocument::from_bytes(&slice[scope_start..])?; + + RawBsonRef::JavaScriptCodeWithScope(RawJavaScriptCodeWithScopeRef { code, scope }) + } + }) + } +} + +impl<'a> Iter<'a> { + pub(crate) fn into_eager(self) -> Box)>>> { + Box::new(self.map(|outer| { + outer.and_then(|val| -> Result<(&'a str, RawBsonRef<'a>)> { + Ok((val.key, val.resolve()?)) + }) + })) } } impl<'a> Iterator for Iter<'a> { - type Item = Result<(&'a str, RawBsonRef<'a>)>; + type Item = Result>; - fn next(&mut self) -> Option)>> { + fn next(&mut self) -> Option>> { if !self.valid { return None; } else if self.offset == self.doc.as_bytes().len() - 1 { @@ -94,18 +218,15 @@ impl<'a> Iterator for Iter<'a> { return None; } else { self.valid = false; - return Some(Err(Error { - key: None, - kind: ErrorKind::MalformedValue { - message: "document not null terminated".into(), - }, - })); + return Some(Err(Error::new_without_key(ErrorKind::new_malformed( + "document not null terminated", + )))); } } else if self.offset >= self.doc.as_bytes().len() { self.valid = false; - return Some(Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "iteration overflowed document".to_string(), - }))); + return Some(Err(Error::new_without_key(ErrorKind::new_malformed( + "iteration overflowed document", + )))); } let key = match read_nullterminated(&self.doc.as_bytes()[self.offset + 1..]) { @@ -116,193 +237,87 @@ impl<'a> Iterator for Iter<'a> { } }; + let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 let kvp_result = try_with_key(key, || { - let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 - let element_type = match ElementType::from(self.doc.as_bytes()[self.offset]) { Some(et) => et, None => { return Err(Error::new_with_key( key, - ErrorKind::MalformedValue { - message: format!("invalid tag: {}", self.doc.as_bytes()[self.offset]), - }, + ErrorKind::new_malformed(format!( + "invalid tag: {}", + self.doc.as_bytes()[self.offset] + )), )) } }; - let (element, element_size) = match element_type { - ElementType::Int32 => { - let i = i32_from_slice(&self.doc.as_bytes()[valueoffset..])?; - (RawBsonRef::Int32(i), 4) - } - ElementType::Int64 => { - let i = i64_from_slice(&self.doc.as_bytes()[valueoffset..])?; - (RawBsonRef::Int64(i), 8) - } - ElementType::Double => { - let f = f64_from_slice(&self.doc.as_bytes()[valueoffset..])?; - (RawBsonRef::Double(f), 8) - } - ElementType::String => { - let s = read_lenencoded(&self.doc.as_bytes()[valueoffset..])?; - (RawBsonRef::String(s), 4 + s.len() + 1) - } - ElementType::EmbeddedDocument => { - let doc = self.next_document(valueoffset)?; - (RawBsonRef::Document(doc), doc.as_bytes().len()) - } - ElementType::Array => { - let doc = self.next_document(valueoffset)?; - ( - RawBsonRef::Array(RawArray::from_doc(doc)), - doc.as_bytes().len(), - ) - } + let element_size = match element_type { + ElementType::Int32 => 4, + ElementType::Int64 => 8, + ElementType::Double => 8, + ElementType::String => read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0, + ElementType::EmbeddedDocument => self.next_document_len(valueoffset)?, + ElementType::Array => self.next_document_len(valueoffset)?, ElementType::Binary => { - let len = i32_from_slice(&self.doc.as_bytes()[valueoffset..])? as usize; - let data_start = valueoffset + 4 + 1; - - if len >= i32::MAX as usize { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: format!("binary length exceeds maximum: {}", len), - })); - } - - self.verify_enough_bytes(valueoffset + 4, len + 1)?; - let subtype = BinarySubtype::from(self.doc.as_bytes()[valueoffset + 4]); - let data = match subtype { - BinarySubtype::BinaryOld => { - if len < 4 { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "old binary subtype has no inner declared length" - .into(), - })); - } - let oldlength = - i32_from_slice(&self.doc.as_bytes()[data_start..])? as usize; - if checked_add(oldlength, 4)? != len { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "old binary subtype has wrong inner declared length" - .into(), - })); - } - &self.doc.as_bytes()[(data_start + 4)..(data_start + len)] - } - _ => &self.doc.as_bytes()[data_start..(data_start + len)], - }; - ( - RawBsonRef::Binary(RawBinaryRef { - subtype, - bytes: data, - }), - 4 + 1 + len, - ) - } - ElementType::ObjectId => { - let oid = self.next_oid(valueoffset)?; - (RawBsonRef::ObjectId(oid), 12) - } - ElementType::Boolean => { - let b = read_bool(&self.doc.as_bytes()[valueoffset..]).map_err(|e| { - Error::new_with_key( - key, - ErrorKind::MalformedValue { - message: e.to_string(), - }, - ) - })?; - (RawBsonRef::Boolean(b), 1) - } - ElementType::DateTime => { - let ms = i64_from_slice(&self.doc.as_bytes()[valueoffset..])?; - (RawBsonRef::DateTime(DateTime::from_millis(ms)), 8) + (i32_from_slice(&self.doc.as_bytes()[valueoffset..])? + 4 + 1) as usize } + ElementType::ObjectId => 12, + ElementType::Boolean => 1, + ElementType::DateTime => 8, ElementType::RegularExpression => { let pattern = read_nullterminated(&self.doc.as_bytes()[valueoffset..])?; let options = read_nullterminated( &self.doc.as_bytes()[(valueoffset + pattern.len() + 1)..], )?; - ( - RawBsonRef::RegularExpression(RawRegexRef { pattern, options }), - pattern.len() + 1 + options.len() + 1, - ) - } - ElementType::Null => (RawBsonRef::Null, 0), - ElementType::Undefined => (RawBsonRef::Undefined, 0), - ElementType::Timestamp => { - let ts = Timestamp::from_reader(&self.doc.as_bytes()[valueoffset..]).map_err( - |e| { - Error::new_without_key(ErrorKind::MalformedValue { - message: e.to_string(), - }) - }, - )?; - (RawBsonRef::Timestamp(ts), 8) + pattern.len() + 1 + options.len() + 1 } + ElementType::Null => 0, + ElementType::Undefined => 0, + ElementType::Timestamp => 8, + ElementType::Symbol => read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0, ElementType::JavaScriptCode => { - let code = read_lenencoded(&self.doc.as_bytes()[valueoffset..])?; - (RawBsonRef::JavaScriptCode(code), 4 + code.len() + 1) + read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0 } ElementType::JavaScriptCodeWithScope => { - let length = i32_from_slice(&self.doc.as_bytes()[valueoffset..])? as usize; + let length = (i32_from_slice(&self.doc.as_bytes()[valueoffset..])?) as usize; if length < MIN_CODE_WITH_SCOPE_SIZE as usize { - return Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "code with scope length too small".to_string(), - })); + return Err(Error::new_without_key(ErrorKind::new_malformed( + "code with scope length too small", + ))); } - self.verify_enough_bytes(valueoffset, length)?; - let slice = &&self.doc.as_bytes()[valueoffset..(valueoffset + length)]; - let code = read_lenencoded(&slice[4..])?; - let scope_start = 4 + 4 + code.len() + 1; - let scope = RawDocument::from_bytes(&slice[scope_start..])?; - ( - RawBsonRef::JavaScriptCodeWithScope(RawJavaScriptCodeWithScopeRef { - code, - scope, - }), - length, - ) + length } ElementType::DbPointer => { - let namespace = read_lenencoded(&self.doc.as_bytes()[valueoffset..])?; - let id = self.next_oid(valueoffset + 4 + namespace.len() + 1)?; - ( - RawBsonRef::DbPointer(RawDbPointerRef { namespace, id }), - 4 + namespace.len() + 1 + 12, - ) - } - ElementType::Symbol => { - let s = read_lenencoded(&self.doc.as_bytes()[valueoffset..])?; - (RawBsonRef::Symbol(s), 4 + s.len() + 1) - } - ElementType::Decimal128 => { - self.verify_enough_bytes(valueoffset, 16)?; - ( - RawBsonRef::Decimal128(Decimal128::from_bytes( - self.doc.as_bytes()[valueoffset..(valueoffset + 16)] - .try_into() - .unwrap(), - )), - 16, - ) + let length = read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0; + length + 1 + 12 } - ElementType::MinKey => (RawBsonRef::MinKey, 0), - ElementType::MaxKey => (RawBsonRef::MaxKey, 0), + ElementType::Decimal128 => 16, + ElementType::MinKey => 0, + ElementType::MaxKey => 0, }; - self.offset = valueoffset + element_size; self.verify_enough_bytes(valueoffset, element_size)?; + self.offset = valueoffset + element_size; - Ok((key, element)) + Ok((element_type, element_size)) }); if kvp_result.is_err() { self.valid = false; } - Some(kvp_result) + Some(match kvp_result { + Ok((kind, size)) => Ok(RawLazyElement { + key, + kind, + doc: self.doc, + start_at: valueoffset, + size: size, + }), + Err(e) => Err(e), + }) } } diff --git a/src/raw/lazy.rs b/src/raw/lazy.rs deleted file mode 100644 index f1529be4..00000000 --- a/src/raw/lazy.rs +++ /dev/null @@ -1,321 +0,0 @@ -use std::convert::TryInto; - -use crate::{ - de::{read_bool, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE}, - oid::ObjectId, - raw::{Error, ErrorKind, Result}, - spec::{BinarySubtype, ElementType}, - DateTime, Decimal128, RawArray, RawBinaryRef, RawDbPointerRef, RawJavaScriptCodeWithScopeRef, - RawRegexRef, Timestamp, -}; - -use super::{ - checked_add, error::try_with_key, f64_from_slice, i32_from_slice, i64_from_slice, - read_len_and_end, read_lenencoded, read_nullterminated, RawBsonRef, RawDocument, -}; - -/// An iterator over the document's entries. -pub struct Iter<'a> { - doc: &'a RawDocument, - offset: usize, - - /// Whether the underlying doc is assumed to be valid or if an error has been encountered. - /// After an error, all subsequent iterations will return None. - valid: bool, -} - -impl<'a> Iter<'a> { - pub(crate) fn new(doc: &'a RawDocument) -> Self { - Self { - doc, - offset: 4, - valid: true, - } - } - - fn verify_enough_bytes(&self, start: usize, num_bytes: usize) -> Result<()> { - let end = checked_add(start, num_bytes)?; - if self.doc.as_bytes().get(start..end).is_none() { - return Err(Error::new_without_key(ErrorKind::new_malformed(format!( - "length exceeds remaining length of buffer: {} vs {}", - num_bytes, - self.doc.as_bytes().len() - start - )))); - } - Ok(()) - } - - fn next_document_len(&self, starting_at: usize) -> Result { - self.verify_enough_bytes(starting_at, MIN_BSON_DOCUMENT_SIZE as usize)?; - let size = i32_from_slice(&self.doc.as_bytes()[starting_at..])? as usize; - - if size < MIN_BSON_DOCUMENT_SIZE as usize { - return Err(Error::new_without_key(ErrorKind::new_malformed(format!( - "document too small: {} bytes", - size - )))); - } - - self.verify_enough_bytes(starting_at, size)?; - - if self.doc.as_bytes()[starting_at + size - 1] != 0 { - return Err(Error::new_without_key(ErrorKind::new_malformed( - "not null terminated", - ))); - } - Ok(size) - } -} - -#[derive(Clone)] -pub struct RawLazyElement<'a> { - pub key: &'a str, - pub kind: ElementType, - doc: &'a RawDocument, - start_at: usize, - end_at: usize, -} - -impl<'a> RawLazyElement<'a> { - pub fn resolve(self) -> Result> { - Ok(match self.kind { - ElementType::Null => RawBsonRef::Null, - ElementType::Undefined => RawBsonRef::Undefined, - ElementType::MinKey => RawBsonRef::MinKey, - ElementType::MaxKey => RawBsonRef::MaxKey, - ElementType::ObjectId => RawBsonRef::ObjectId(ObjectId::from_bytes( - self.doc.as_bytes()[self.start_at..self.end_at] - .try_into() - .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, - )), - ElementType::EmbeddedDocument => RawBsonRef::Document(RawDocument::from_bytes( - &self.doc.as_bytes()[self.start_at..self.end_at], - )?), - ElementType::Array => RawBsonRef::Array(RawArray::from_doc(RawDocument::from_bytes( - &self.doc.as_bytes()[self.start_at..self.end_at], - )?)), - ElementType::Int32 => { - RawBsonRef::Int32(i32_from_slice(&self.doc.as_bytes()[self.start_at..])?) - } - ElementType::Int64 => { - RawBsonRef::Int64(i64_from_slice(&self.doc.as_bytes()[self.start_at..])?) - } - ElementType::Double => { - RawBsonRef::Double(f64_from_slice(&self.doc.as_bytes()[self.start_at..])?) - } - ElementType::String => { - RawBsonRef::String(read_lenencoded(&self.doc.as_bytes()[self.start_at..])?) - } - ElementType::Boolean => RawBsonRef::Boolean( - read_bool(&self.doc.as_bytes()[self.start_at..]) - .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, - ), - ElementType::DateTime => RawBsonRef::DateTime(DateTime::from_millis(i64_from_slice( - &self.doc.as_bytes()[self.start_at..], - )?)), - ElementType::Decimal128 => RawBsonRef::Decimal128(Decimal128::from_bytes( - self.doc.as_bytes()[self.start_at..self.end_at] - .try_into() - .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, - )), - ElementType::JavaScriptCode => { - RawBsonRef::JavaScriptCode(read_lenencoded(&self.doc.as_bytes()[self.start_at..])?) - } - ElementType::Symbol => { - RawBsonRef::Symbol(read_lenencoded(&self.doc.as_bytes()[self.start_at..])?) - } - ElementType::DbPointer => RawBsonRef::DbPointer(RawDbPointerRef { - namespace: read_lenencoded(&self.doc.as_bytes()[self.start_at..])?, - id: ObjectId::from_bytes( - self.doc.as_bytes()[self.start_at..self.end_at] - .try_into() - .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, - ), - }), - ElementType::RegularExpression => { - let pattern = read_nullterminated(&self.doc.as_bytes()[self.start_at..])?; - let options = read_nullterminated( - &self.doc.as_bytes()[(self.start_at + pattern.len() + 1)..], - )?; - RawBsonRef::RegularExpression(RawRegexRef { pattern, options }) - } - ElementType::Timestamp => RawBsonRef::Timestamp( - Timestamp::from_reader(&self.doc.as_bytes()[self.start_at..]) - .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, - ), - ElementType::Binary => { - let len = i32_from_slice(&self.doc.as_bytes()[self.start_at..])? as usize; - let data_start = self.start_at + 4 + 1; - - if len >= i32::MAX as usize { - return Err(Error::new_with_key( - self.key, - ErrorKind::new_malformed(format!("binary length exceeds maximum: {}", len)), - )); - } - - let subtype = BinarySubtype::from(self.doc.as_bytes()[self.start_at + 4]); - let data = match subtype { - BinarySubtype::BinaryOld => { - if len < 4 { - return Err(Error::new_with_key( - self.key, - ErrorKind::new_malformed( - "old binary subtype has no inner declared length", - ), - )); - } - let oldlength = - i32_from_slice(&self.doc.as_bytes()[data_start..])? as usize; - if checked_add(oldlength, 4)? != len { - return Err(Error::new_with_key( - self.key, - ErrorKind::new_malformed( - "old binary subtype has wrong inner declared length", - ), - )); - } - &self.doc.as_bytes()[(data_start + 4)..(data_start + len)] - } - _ => &self.doc.as_bytes()[data_start..(data_start + len)], - }; - RawBsonRef::Binary(RawBinaryRef { - subtype, - bytes: data, - }) - } - ElementType::JavaScriptCodeWithScope => { - let slice = &&self.doc.as_bytes()[self.start_at..self.end_at]; - let code = read_lenencoded(&slice[4..])?; - let scope_start = 4 + 4 + code.len() + 1; - let scope = RawDocument::from_bytes(&slice[scope_start..])?; - - RawBsonRef::JavaScriptCodeWithScope(RawJavaScriptCodeWithScopeRef { code, scope }) - } - }) - } -} - -impl<'a> Iter<'a> { - pub(crate) fn into_eager(self) -> impl Iterator)>> { - self.map(|outer| { - outer.and_then(|val| -> Result<(&'a str, RawBsonRef<'a>)> { - Ok((val.key, val.resolve()?)) - }) - }) - } -} - -impl<'a> Iterator for Iter<'a> { - type Item = Result>; - - fn next(&mut self) -> Option>> { - if !self.valid { - return None; - } else if self.offset == self.doc.as_bytes().len() - 1 { - if self.doc.as_bytes()[self.offset] == 0 { - // end of document marker - return None; - } else { - self.valid = false; - return Some(Err(Error::new_without_key(ErrorKind::new_malformed( - "document not null terminated", - )))); - } - } else if self.offset >= self.doc.as_bytes().len() { - self.valid = false; - return Some(Err(Error::new_without_key(ErrorKind::new_malformed( - "iteration overflowed document", - )))); - } - - let key = match read_nullterminated(&self.doc.as_bytes()[self.offset + 1..]) { - Ok(k) => k, - Err(e) => { - self.valid = false; - return Some(Err(e)); - } - }; - - let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 - let kvp_result = try_with_key(key, || { - let element_type = match ElementType::from(self.doc.as_bytes()[self.offset]) { - Some(et) => et, - None => { - return Err(Error::new_with_key( - key, - ErrorKind::new_malformed(format!( - "invalid tag: {}", - self.doc.as_bytes()[self.offset] - )), - )) - } - }; - - let element_size = match element_type { - ElementType::Int32 => 4, - ElementType::Int64 => 8, - ElementType::Double => 8, - ElementType::String => read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0, - ElementType::EmbeddedDocument => self.next_document_len(valueoffset)?, - ElementType::Array => self.next_document_len(valueoffset)?, - ElementType::Binary => read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0, - ElementType::ObjectId => 12, - ElementType::Boolean => 1, - ElementType::DateTime => 8, - ElementType::RegularExpression => { - let pattern = read_nullterminated(&self.doc.as_bytes()[valueoffset..])?; - let options = read_nullterminated( - &self.doc.as_bytes()[(valueoffset + pattern.len() + 1)..], - )?; - pattern.len() + 1 + options.len() + 1 - } - ElementType::Null => 0, - ElementType::Undefined => 0, - ElementType::Timestamp => 8, - ElementType::JavaScriptCode => { - read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0 - } - ElementType::JavaScriptCodeWithScope => { - let length = read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0; - - if length < MIN_CODE_WITH_SCOPE_SIZE as usize { - return Err(Error::new_without_key(ErrorKind::new_malformed( - "code with scope length too small", - ))); - } - - length - } - ElementType::DbPointer => { - let length = read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0; - length + 1 + 12 - } - ElementType::Symbol => read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0, - ElementType::Decimal128 => 16, - ElementType::MinKey => 0, - ElementType::MaxKey => 0, - }; - - self.offset = valueoffset + element_size; - self.verify_enough_bytes(valueoffset, element_size)?; - - Ok((element_type, element_size)) - }); - - if kvp_result.is_err() { - self.valid = false; - } - - Some(match kvp_result { - Ok((kind, size)) => Ok(RawLazyElement { - key, - kind, - doc: self.doc, - start_at: valueoffset, - end_at: valueoffset + size, - }), - Err(e) => Err(e), - }) - } -} diff --git a/src/raw/mod.rs b/src/raw/mod.rs index a3dbb2c7..9fb69b67 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -120,7 +120,6 @@ mod document; mod document_buf; mod error; mod iter; -pub mod lazy; pub(crate) mod serde; #[cfg(test)] mod test; diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs index 250141d6..8ed8caea 100644 --- a/src/raw/test/mod.rs +++ b/src/raw/test/mod.rs @@ -3,15 +3,8 @@ mod props; use super::*; use crate::{ - doc, - oid::ObjectId, - raw::error::ValueAccessErrorKind, - spec::BinarySubtype, - Binary, - Bson, - DateTime, - Regex, - Timestamp, + doc, oid::ObjectId, raw::error::ValueAccessErrorKind, spec::BinarySubtype, Binary, Bson, + DateTime, Regex, Timestamp, }; #[test] @@ -87,10 +80,10 @@ fn iterate() { fn rawdoc_to_doc() { let rawdoc = rawdoc! { "f64": 2.5, - "string": "hello", "document": {}, "array": ["binary", "serialized", "object", "notation"], "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, + "string": "hello", "object_id": ObjectId::from_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), "boolean": true, "datetime": DateTime::now(), @@ -372,18 +365,18 @@ fn int64() { #[test] fn document_iteration() { let rawdoc = rawdoc! { - "f64": 2.5, - "string": "hello", - "document": {}, - "array": ["binary", "serialized", "object", "notation"], - "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, + "f64": 2.5, + "string": "hello", + "document": {}, + "array": ["binary", "serialized", "object", "notation"], + "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, "object_id": ObjectId::from_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), - "boolean": true, - "datetime": DateTime::now(), - "null": RawBson::Null, - "regex": Regex { pattern: String::from(r"end\s*$"), options: String::from("i") }, - "javascript": RawBson::JavaScriptCode(String::from("console.log(console);")), - "symbol": RawBson::Symbol(String::from("artist-formerly-known-as")), + "boolean": true, + "datetime": DateTime::now(), + "null": RawBson::Null, + "regex": Regex { pattern: String::from(r"end\s*$"), options: String::from("i") }, + "javascript": RawBson::JavaScriptCode(String::from("console.log(console);")), + "symbol": RawBson::Symbol(String::from("artist-formerly-known-as")), "javascript_with_scope": RawJavaScriptCodeWithScope { code: String::from("console.log(msg);"), scope: rawdoc! { "ok": true } From 89c41c8190b6d535bd0f360bfc02d2f30e0a4085 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Sun, 31 Dec 2023 18:09:08 -0500 Subject: [PATCH 03/17] cleanup len --- src/raw/iter.rs | 24 +++++++++++------------- src/raw/mod.rs | 11 ++++++----- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 8b917634..6eca8dae 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -10,8 +10,8 @@ use crate::{ }; use super::{ - checked_add, error::try_with_key, f64_from_slice, i32_from_slice, i64_from_slice, - read_len_and_end, read_lenencoded, read_nullterminated, RawBsonRef, RawDocument, + checked_add, error::try_with_key, f64_from_slice, i32_from_slice, i64_from_slice, read_len, + read_lenencode, read_nullterminated, RawBsonRef, RawDocument, }; /// An iterator over the document's entries. @@ -104,7 +104,7 @@ impl<'a> RawLazyElement<'a> { RawBsonRef::Double(f64_from_slice(&self.doc.as_bytes()[self.start_at..])?) } ElementType::String => { - RawBsonRef::String(read_lenencoded(&self.doc.as_bytes()[self.start_at..])?) + RawBsonRef::String(read_lenencode(&self.doc.as_bytes()[self.start_at..])?) } ElementType::Boolean => RawBsonRef::Boolean( read_bool(&self.doc.as_bytes()[self.start_at..]) @@ -119,13 +119,13 @@ impl<'a> RawLazyElement<'a> { .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, )), ElementType::JavaScriptCode => { - RawBsonRef::JavaScriptCode(read_lenencoded(&self.doc.as_bytes()[self.start_at..])?) + RawBsonRef::JavaScriptCode(read_lenencode(&self.doc.as_bytes()[self.start_at..])?) } ElementType::Symbol => { - RawBsonRef::Symbol(read_lenencoded(&self.doc.as_bytes()[self.start_at..])?) + RawBsonRef::Symbol(read_lenencode(&self.doc.as_bytes()[self.start_at..])?) } ElementType::DbPointer => RawBsonRef::DbPointer(RawDbPointerRef { - namespace: read_lenencoded(&self.doc.as_bytes()[self.start_at..])?, + namespace: read_lenencode(&self.doc.as_bytes()[self.start_at..])?, id: ObjectId::from_bytes( self.doc.as_bytes()[self.start_at..(self.start_at + self.size)] .try_into() @@ -186,7 +186,7 @@ impl<'a> RawLazyElement<'a> { } ElementType::JavaScriptCodeWithScope => { let slice = &self.doc.as_bytes()[self.start_at..(self.start_at + self.size)]; - let code = read_lenencoded(&slice[4..])?; + let code = read_lenencode(&slice[4..])?; let scope_start = 4 + 4 + code.len() + 1; let scope = RawDocument::from_bytes(&slice[scope_start..])?; @@ -256,7 +256,7 @@ impl<'a> Iterator for Iter<'a> { ElementType::Int32 => 4, ElementType::Int64 => 8, ElementType::Double => 8, - ElementType::String => read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0, + ElementType::String => read_len(&self.doc.as_bytes()[valueoffset..])?, ElementType::EmbeddedDocument => self.next_document_len(valueoffset)?, ElementType::Array => self.next_document_len(valueoffset)?, ElementType::Binary => { @@ -275,10 +275,8 @@ impl<'a> Iterator for Iter<'a> { ElementType::Null => 0, ElementType::Undefined => 0, ElementType::Timestamp => 8, - ElementType::Symbol => read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0, - ElementType::JavaScriptCode => { - read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0 - } + ElementType::Symbol => read_len(&self.doc.as_bytes()[valueoffset..])?, + ElementType::JavaScriptCode => read_len(&self.doc.as_bytes()[valueoffset..])?, ElementType::JavaScriptCodeWithScope => { let length = (i32_from_slice(&self.doc.as_bytes()[valueoffset..])?) as usize; @@ -291,7 +289,7 @@ impl<'a> Iterator for Iter<'a> { length } ElementType::DbPointer => { - let length = read_len_and_end(&self.doc.as_bytes()[valueoffset..])?.0; + let length = read_len(&self.doc.as_bytes()[valueoffset..])?; length + 1 + 12 } ElementType::Decimal128 => 16, diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 9fb69b67..14edf9a0 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -208,7 +208,7 @@ fn read_nullterminated(buf: &[u8]) -> Result<&str> { } } -fn read_len_and_end(buf: &[u8]) -> Result<(usize, usize)> { +fn read_len(buf: &[u8]) -> Result { if buf.len() < 4 { return Err(Error::new_without_key(ErrorKind::MalformedValue { message: format!( @@ -239,13 +239,14 @@ fn read_len_and_end(buf: &[u8]) -> Result<(usize, usize)> { ), })); } - Ok((length as usize + 4, end)) + + Ok(length as usize + 4) } -fn read_lenencoded(buf: &[u8]) -> Result<&str> { - let end = read_len_and_end(buf)?.1; +fn read_lenencode(buf: &[u8]) -> Result<&str> { + let end = read_len(buf)?; - // exclude null byte + // exclude length-prefix and null byte suffix try_to_str(&buf[4..(end - 1)]) } From 9d5c7177f6315f45ffe0810a999a5327d8d1383c Mon Sep 17 00:00:00 2001 From: tycho garen Date: Sun, 31 Dec 2023 19:08:58 -0500 Subject: [PATCH 04/17] fix dbpointer --- src/raw/iter.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 6eca8dae..6ceaafdf 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -124,14 +124,16 @@ impl<'a> RawLazyElement<'a> { ElementType::Symbol => { RawBsonRef::Symbol(read_lenencode(&self.doc.as_bytes()[self.start_at..])?) } - ElementType::DbPointer => RawBsonRef::DbPointer(RawDbPointerRef { - namespace: read_lenencode(&self.doc.as_bytes()[self.start_at..])?, - id: ObjectId::from_bytes( - self.doc.as_bytes()[self.start_at..(self.start_at + self.size)] + ElementType::DbPointer => { + let namespace = read_lenencode(&self.doc.as_bytes()[self.start_at..])?; + let oid_offset = self.start_at + 4 + namespace.len() + 1; + let id = ObjectId::from_bytes( + self.doc.as_bytes()[oid_offset..(oid_offset + 12)] .try_into() - .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, - ), - }), + .unwrap(), + ); + RawBsonRef::DbPointer(RawDbPointerRef { namespace, id }) + } ElementType::RegularExpression => { let pattern = read_nullterminated(&self.doc.as_bytes()[self.start_at..])?; let options = read_nullterminated( @@ -290,7 +292,7 @@ impl<'a> Iterator for Iter<'a> { } ElementType::DbPointer => { let length = read_len(&self.doc.as_bytes()[valueoffset..])?; - length + 1 + 12 + length + 12 } ElementType::Decimal128 => 16, ElementType::MinKey => 0, From 0abd2d032fa7c9d0b115beb9cdf35942461dda0f Mon Sep 17 00:00:00 2001 From: tycho garen Date: Sun, 31 Dec 2023 19:16:27 -0500 Subject: [PATCH 05/17] null terminated check --- src/raw/iter.rs | 2 +- src/raw/mod.rs | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 6ceaafdf..352bde3d 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -315,7 +315,7 @@ impl<'a> Iterator for Iter<'a> { kind, doc: self.doc, start_at: valueoffset, - size: size, + size, }), Err(e) => Err(e), }) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 14edf9a0..e096bf2a 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -240,6 +240,12 @@ fn read_len(buf: &[u8]) -> Result { })); } + if buf[end - 1] != 0 { + return Err(Error::new_without_key(ErrorKind::MalformedValue { + message: "expected string to be null-terminated".to_string(), + })); + } + Ok(length as usize + 4) } From f79c4c0ff802483bdbea02accbfbb61cecc1c70c Mon Sep 17 00:00:00 2001 From: tycho garen Date: Sun, 31 Dec 2023 19:37:17 -0500 Subject: [PATCH 06/17] expose lazy iter --- src/raw/bson.rs | 21 ++++-------------- src/raw/bson_ref.rs | 14 ++---------- src/raw/document_buf.rs | 18 ++++++++++++++++ src/raw/iter.rs | 48 +++++++++++++++++++++++++++++++++++------ 4 files changed, 66 insertions(+), 35 deletions(-) diff --git a/src/raw/bson.rs b/src/raw/bson.rs index 7bb475a4..9d748367 100644 --- a/src/raw/bson.rs +++ b/src/raw/bson.rs @@ -6,27 +6,14 @@ use crate::{ oid::{self, ObjectId}, raw::RAW_BSON_NEWTYPE, spec::ElementType, - Binary, - Bson, - DbPointer, - Decimal128, - RawArray, - RawArrayBuf, - RawBinaryRef, - RawBsonRef, - RawDbPointerRef, - RawDocument, - RawDocumentBuf, - RawJavaScriptCodeWithScopeRef, - RawRegexRef, - Regex, - Timestamp, + Binary, Bson, DbPointer, Decimal128, RawArray, RawArrayBuf, RawBinaryRef, RawBsonRef, + RawDbPointerRef, RawDocument, RawDocumentBuf, RawJavaScriptCodeWithScopeRef, RawRegexRef, + Regex, Timestamp, }; use super::{ serde::{bson_visitor::OwnedOrBorrowedRawBsonVisitor, OwnedOrBorrowedRawBson}, - Error, - Result, + Error, Result, }; /// A BSON value backed by owned raw BSON bytes. diff --git a/src/raw/bson_ref.rs b/src/raw/bson_ref.rs index 8241d879..877e89cf 100644 --- a/src/raw/bson_ref.rs +++ b/src/raw/bson_ref.rs @@ -6,24 +6,14 @@ use serde_bytes::Bytes; use super::{ bson::RawBson, serde::{bson_visitor::OwnedOrBorrowedRawBsonVisitor, OwnedOrBorrowedRawBson}, - Error, - RawArray, - RawDocument, - Result, + Error, RawArray, RawDocument, Result, }; use crate::{ extjson, oid::{self, ObjectId}, raw::{RawJavaScriptCodeWithScope, RAW_BSON_NEWTYPE}, spec::{BinarySubtype, ElementType}, - Binary, - Bson, - DbPointer, - Decimal128, - RawArrayBuf, - RawDocumentBuf, - Regex, - Timestamp, + Binary, Bson, DbPointer, Decimal128, RawArrayBuf, RawDocumentBuf, Regex, Timestamp, }; /// A BSON value referencing raw bytes stored elsewhere. diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs index 684caf9a..d091c1a6 100644 --- a/src/raw/document_buf.rs +++ b/src/raw/document_buf.rs @@ -141,6 +141,24 @@ impl RawDocumentBuf { Iter::new(self).into_eager() } + /// Gets an iterator over the elements in the [`RawDocumentBuf`], which yields + /// `Result<(&str, RawLazyElement<'_>)>`. + /// + /// This iterator, which underpins the implementation of the + /// default iterator, produces `RawLazyElement` objects, which + /// hold a view onto the document but do not parse out or + /// construct values until the `.value()` or `.try_into()` methods + /// are called. + /// + /// # Note: + /// + /// There is no owning iterator for [`RawDocumentBuf`]. If you + /// need ownership over elements that might need to allocate, you + /// must explicitly convert them to owned types yourself. + pub fn lazy_iter(&self) -> Iter<'_> { + Iter::new(self) + } + /// Return the contained data as a `Vec` /// /// ``` diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 352bde3d..816b072a 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -5,8 +5,8 @@ use crate::{ oid::ObjectId, raw::{Error, ErrorKind, Result}, spec::{BinarySubtype, ElementType}, - DateTime, Decimal128, RawArray, RawBinaryRef, RawDbPointerRef, RawJavaScriptCodeWithScopeRef, - RawRegexRef, Timestamp, + Bson, DateTime, Decimal128, RawArray, RawBinaryRef, RawBson, RawDbPointerRef, + RawJavaScriptCodeWithScopeRef, RawRegexRef, Timestamp, }; use super::{ @@ -69,15 +69,51 @@ impl<'a> Iter<'a> { #[derive(Clone)] pub struct RawLazyElement<'a> { - pub key: &'a str, - pub kind: ElementType, + key: &'a str, + kind: ElementType, doc: &'a RawDocument, start_at: usize, size: usize, } +impl<'a> TryInto> for RawLazyElement<'a> { + type Error = Error; + + fn try_into(self) -> Result> { + self.value() + } +} + +impl<'a> TryInto for RawLazyElement<'a> { + type Error = Error; + + fn try_into(self) -> Result { + Ok(self.value()?.to_raw_bson()) + } +} + +impl<'a> TryInto for RawLazyElement<'a> { + type Error = Error; + + fn try_into(self) -> Result { + self.value()?.to_raw_bson().try_into() + } +} + impl<'a> RawLazyElement<'a> { - pub fn resolve(self) -> Result> { + pub fn len(&self) -> usize { + self.size + } + + pub fn key(&self) -> String { + self.key.to_string() + } + + pub fn element_type(&self) -> ElementType { + self.kind + } + + pub fn value(&self) -> Result> { Ok(match self.kind { ElementType::Null => RawBsonRef::Null, ElementType::Undefined => RawBsonRef::Undefined, @@ -202,7 +238,7 @@ impl<'a> Iter<'a> { pub(crate) fn into_eager(self) -> Box)>>> { Box::new(self.map(|outer| { outer.and_then(|val| -> Result<(&'a str, RawBsonRef<'a>)> { - Ok((val.key, val.resolve()?)) + Ok((val.key, val.value()?)) }) })) } From e5ff849f9a6e4f4125653796f48ee564508a9522 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Mon, 1 Jan 2024 09:09:50 -0500 Subject: [PATCH 07/17] naming and cleanup --- src/raw/document.rs | 8 ++++---- src/raw/document_buf.rs | 10 ++++++---- src/raw/iter.rs | 16 ++++++++-------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/raw/document.rs b/src/raw/document.rs index 527cdcd5..41821a37 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -162,10 +162,10 @@ impl RawDocument { /// # Ok::<(), Error>(()) /// ``` pub fn get(&self, key: impl AsRef) -> Result>> { - for result in self.into_iter() { - let (k, v) = result?; - if key.as_ref() == k { - return Ok(Some(v)); + for elem in Iter::new(self) { + let elem = elem?; + if key.as_ref() == elem.key() { + return Ok(Some(elem.try_into()?)); } } Ok(None) diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs index d091c1a6..ce6fd639 100644 --- a/src/raw/document_buf.rs +++ b/src/raw/document_buf.rs @@ -141,11 +141,13 @@ impl RawDocumentBuf { Iter::new(self).into_eager() } - /// Gets an iterator over the elements in the [`RawDocumentBuf`], which yields - /// `Result<(&str, RawLazyElement<'_>)>`. + /// Gets an iterator over the elements in the [`RawDocumentBuf`], + /// which yields `Result>` values. These hold a + /// reference to the underlying document but do not explicitly + /// resolve the values. /// /// This iterator, which underpins the implementation of the - /// default iterator, produces `RawLazyElement` objects, which + /// default iterator, produces `RawElement` objects, which /// hold a view onto the document but do not parse out or /// construct values until the `.value()` or `.try_into()` methods /// are called. @@ -155,7 +157,7 @@ impl RawDocumentBuf { /// There is no owning iterator for [`RawDocumentBuf`]. If you /// need ownership over elements that might need to allocate, you /// must explicitly convert them to owned types yourself. - pub fn lazy_iter(&self) -> Iter<'_> { + pub fn iter_elements(&self) -> Iter<'_> { Iter::new(self) } diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 816b072a..c3c059c9 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -68,7 +68,7 @@ impl<'a> Iter<'a> { } #[derive(Clone)] -pub struct RawLazyElement<'a> { +pub struct RawElement<'a> { key: &'a str, kind: ElementType, doc: &'a RawDocument, @@ -76,7 +76,7 @@ pub struct RawLazyElement<'a> { size: usize, } -impl<'a> TryInto> for RawLazyElement<'a> { +impl<'a> TryInto> for RawElement<'a> { type Error = Error; fn try_into(self) -> Result> { @@ -84,7 +84,7 @@ impl<'a> TryInto> for RawLazyElement<'a> { } } -impl<'a> TryInto for RawLazyElement<'a> { +impl<'a> TryInto for RawElement<'a> { type Error = Error; fn try_into(self) -> Result { @@ -92,7 +92,7 @@ impl<'a> TryInto for RawLazyElement<'a> { } } -impl<'a> TryInto for RawLazyElement<'a> { +impl<'a> TryInto for RawElement<'a> { type Error = Error; fn try_into(self) -> Result { @@ -100,7 +100,7 @@ impl<'a> TryInto for RawLazyElement<'a> { } } -impl<'a> RawLazyElement<'a> { +impl<'a> RawElement<'a> { pub fn len(&self) -> usize { self.size } @@ -245,9 +245,9 @@ impl<'a> Iter<'a> { } impl<'a> Iterator for Iter<'a> { - type Item = Result>; + type Item = Result>; - fn next(&mut self) -> Option>> { + fn next(&mut self) -> Option>> { if !self.valid { return None; } else if self.offset == self.doc.as_bytes().len() - 1 { @@ -346,7 +346,7 @@ impl<'a> Iterator for Iter<'a> { } Some(match kvp_result { - Ok((kind, size)) => Ok(RawLazyElement { + Ok((kind, size)) => Ok(RawElement { key, kind, doc: self.doc, From 66555e5376ab16da05de708edea493f749d94323 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Mon, 1 Jan 2024 09:24:48 -0500 Subject: [PATCH 08/17] reduce churn --- src/raw/bson.rs | 21 +++++++++++++++++---- src/raw/bson_ref.rs | 14 ++++++++++++-- src/raw/test/mod.rs | 35 +++++++++++++++++++++-------------- 3 files changed, 50 insertions(+), 20 deletions(-) diff --git a/src/raw/bson.rs b/src/raw/bson.rs index 9d748367..7bb475a4 100644 --- a/src/raw/bson.rs +++ b/src/raw/bson.rs @@ -6,14 +6,27 @@ use crate::{ oid::{self, ObjectId}, raw::RAW_BSON_NEWTYPE, spec::ElementType, - Binary, Bson, DbPointer, Decimal128, RawArray, RawArrayBuf, RawBinaryRef, RawBsonRef, - RawDbPointerRef, RawDocument, RawDocumentBuf, RawJavaScriptCodeWithScopeRef, RawRegexRef, - Regex, Timestamp, + Binary, + Bson, + DbPointer, + Decimal128, + RawArray, + RawArrayBuf, + RawBinaryRef, + RawBsonRef, + RawDbPointerRef, + RawDocument, + RawDocumentBuf, + RawJavaScriptCodeWithScopeRef, + RawRegexRef, + Regex, + Timestamp, }; use super::{ serde::{bson_visitor::OwnedOrBorrowedRawBsonVisitor, OwnedOrBorrowedRawBson}, - Error, Result, + Error, + Result, }; /// A BSON value backed by owned raw BSON bytes. diff --git a/src/raw/bson_ref.rs b/src/raw/bson_ref.rs index 877e89cf..8241d879 100644 --- a/src/raw/bson_ref.rs +++ b/src/raw/bson_ref.rs @@ -6,14 +6,24 @@ use serde_bytes::Bytes; use super::{ bson::RawBson, serde::{bson_visitor::OwnedOrBorrowedRawBsonVisitor, OwnedOrBorrowedRawBson}, - Error, RawArray, RawDocument, Result, + Error, + RawArray, + RawDocument, + Result, }; use crate::{ extjson, oid::{self, ObjectId}, raw::{RawJavaScriptCodeWithScope, RAW_BSON_NEWTYPE}, spec::{BinarySubtype, ElementType}, - Binary, Bson, DbPointer, Decimal128, RawArrayBuf, RawDocumentBuf, Regex, Timestamp, + Binary, + Bson, + DbPointer, + Decimal128, + RawArrayBuf, + RawDocumentBuf, + Regex, + Timestamp, }; /// A BSON value referencing raw bytes stored elsewhere. diff --git a/src/raw/test/mod.rs b/src/raw/test/mod.rs index 8ed8caea..250141d6 100644 --- a/src/raw/test/mod.rs +++ b/src/raw/test/mod.rs @@ -3,8 +3,15 @@ mod props; use super::*; use crate::{ - doc, oid::ObjectId, raw::error::ValueAccessErrorKind, spec::BinarySubtype, Binary, Bson, - DateTime, Regex, Timestamp, + doc, + oid::ObjectId, + raw::error::ValueAccessErrorKind, + spec::BinarySubtype, + Binary, + Bson, + DateTime, + Regex, + Timestamp, }; #[test] @@ -80,10 +87,10 @@ fn iterate() { fn rawdoc_to_doc() { let rawdoc = rawdoc! { "f64": 2.5, + "string": "hello", "document": {}, "array": ["binary", "serialized", "object", "notation"], "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1, 2, 3] }, - "string": "hello", "object_id": ObjectId::from_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), "boolean": true, "datetime": DateTime::now(), @@ -365,18 +372,18 @@ fn int64() { #[test] fn document_iteration() { let rawdoc = rawdoc! { - "f64": 2.5, - "string": "hello", - "document": {}, - "array": ["binary", "serialized", "object", "notation"], - "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, + "f64": 2.5, + "string": "hello", + "document": {}, + "array": ["binary", "serialized", "object", "notation"], + "binary": Binary { subtype: BinarySubtype::Generic, bytes: vec![1u8, 2, 3] }, "object_id": ObjectId::from_bytes([1, 2, 3, 4, 5,6,7,8,9,10, 11,12]), - "boolean": true, - "datetime": DateTime::now(), - "null": RawBson::Null, - "regex": Regex { pattern: String::from(r"end\s*$"), options: String::from("i") }, - "javascript": RawBson::JavaScriptCode(String::from("console.log(console);")), - "symbol": RawBson::Symbol(String::from("artist-formerly-known-as")), + "boolean": true, + "datetime": DateTime::now(), + "null": RawBson::Null, + "regex": Regex { pattern: String::from(r"end\s*$"), options: String::from("i") }, + "javascript": RawBson::JavaScriptCode(String::from("console.log(console);")), + "symbol": RawBson::Symbol(String::from("artist-formerly-known-as")), "javascript_with_scope": RawJavaScriptCodeWithScope { code: String::from("console.log(msg);"), scope: rawdoc! { "ok": true } From 4d5bf476d2603744bd4a645d02ae24a60c5d1a87 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Mon, 1 Jan 2024 09:30:26 -0500 Subject: [PATCH 09/17] fmt fixup --- src/raw/array.rs | 14 ++++++++++++-- src/raw/document.rs | 12 ++++++++++-- src/raw/document_buf.rs | 15 ++++++++++++--- src/raw/iter.rs | 24 ++++++++++++++++++++---- src/raw/mod.rs | 6 +++++- 5 files changed, 59 insertions(+), 12 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index a31c18b1..bc13bd4f 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -5,10 +5,20 @@ use serde::{ser::SerializeSeq, Deserialize, Serialize}; use super::{ error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, serde::OwnedOrBorrowedRawArray, - Error, RawBinaryRef, RawBsonRef, RawDocument, RawRegexRef, Result, + Error, + RawBinaryRef, + RawBsonRef, + RawDocument, + RawRegexRef, + Result, }; use crate::{ - oid::ObjectId, raw::RAW_ARRAY_NEWTYPE, spec::ElementType, Bson, DateTime, RawArrayBuf, + oid::ObjectId, + raw::RAW_ARRAY_NEWTYPE, + spec::ElementType, + Bson, + DateTime, + RawArrayBuf, Timestamp, }; diff --git a/src/raw/document.rs b/src/raw/document.rs index 41821a37..d450cf3a 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -8,12 +8,20 @@ use serde::{ser::SerializeMap, Deserialize, Serialize}; use crate::{ de::MIN_BSON_DOCUMENT_SIZE, raw::{error::ErrorKind, serde::OwnedOrBorrowedRawDocument, RAW_DOCUMENT_NEWTYPE}, - DateTime, Timestamp, + DateTime, + Timestamp, }; use super::{ error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, - i32_from_slice, Error, Iter, RawArray, RawBinaryRef, RawBsonRef, RawDocumentBuf, RawRegexRef, + i32_from_slice, + Error, + Iter, + RawArray, + RawBinaryRef, + RawBsonRef, + RawDocumentBuf, + RawRegexRef, Result, }; use crate::{oid::ObjectId, spec::ElementType, Document}; diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs index ce6fd639..72be6d93 100644 --- a/src/raw/document_buf.rs +++ b/src/raw/document_buf.rs @@ -8,13 +8,22 @@ use std::{ use serde::{Deserialize, Serialize}; use crate::{ - de::MIN_BSON_DOCUMENT_SIZE, spec::BinarySubtype, Document, RawBinaryRef, + de::MIN_BSON_DOCUMENT_SIZE, + spec::BinarySubtype, + Document, + RawBinaryRef, RawJavaScriptCodeWithScopeRef, }; use super::{ - bson::RawBson, serde::OwnedOrBorrowedRawDocument, Error, ErrorKind, Iter, RawBsonRef, - RawDocument, Result, + bson::RawBson, + serde::OwnedOrBorrowedRawDocument, + Error, + ErrorKind, + Iter, + RawBsonRef, + RawDocument, + Result, }; /// An owned BSON document (akin to [`std::path::PathBuf`]), backed by a buffer of raw BSON bytes. diff --git a/src/raw/iter.rs b/src/raw/iter.rs index c3c059c9..5d5c175e 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -5,13 +5,29 @@ use crate::{ oid::ObjectId, raw::{Error, ErrorKind, Result}, spec::{BinarySubtype, ElementType}, - Bson, DateTime, Decimal128, RawArray, RawBinaryRef, RawBson, RawDbPointerRef, - RawJavaScriptCodeWithScopeRef, RawRegexRef, Timestamp, + Bson, + DateTime, + Decimal128, + RawArray, + RawBinaryRef, + RawBson, + RawDbPointerRef, + RawJavaScriptCodeWithScopeRef, + RawRegexRef, + Timestamp, }; use super::{ - checked_add, error::try_with_key, f64_from_slice, i32_from_slice, i64_from_slice, read_len, - read_lenencode, read_nullterminated, RawBsonRef, RawDocument, + checked_add, + error::try_with_key, + f64_from_slice, + i32_from_slice, + i64_from_slice, + read_len, + read_lenencode, + read_nullterminated, + RawBsonRef, + RawDocument, }; /// An iterator over the document's entries. diff --git a/src/raw/mod.rs b/src/raw/mod.rs index e096bf2a..9a5b8320 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -133,7 +133,11 @@ pub use self::{ array_buf::RawArrayBuf, bson::{RawBson, RawJavaScriptCodeWithScope}, bson_ref::{ - RawBinaryRef, RawBsonRef, RawDbPointerRef, RawJavaScriptCodeWithScopeRef, RawRegexRef, + RawBinaryRef, + RawBsonRef, + RawDbPointerRef, + RawJavaScriptCodeWithScopeRef, + RawRegexRef, }, document::RawDocument, document_buf::RawDocumentBuf, From ba581511d475124450548560a08781839b6c6497 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Mon, 1 Jan 2024 09:51:28 -0500 Subject: [PATCH 10/17] less dyn --- src/raw/array.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index bc13bd4f..a5512047 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -6,6 +6,7 @@ use super::{ error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, serde::OwnedOrBorrowedRawArray, Error, + Iter, RawBinaryRef, RawBsonRef, RawDocument, @@ -259,14 +260,14 @@ impl<'a> IntoIterator for &'a RawArray { fn into_iter(self) -> RawArrayIter<'a> { RawArrayIter { - inner: self.doc.into_iter(), + inner: Iter::new(&self.doc), } } } /// An iterator over borrowed raw BSON array values. pub struct RawArrayIter<'a> { - inner: Box)>> + 'a>, + inner: Iter<'a>, } impl<'a> Iterator for RawArrayIter<'a> { @@ -274,7 +275,10 @@ impl<'a> Iterator for RawArrayIter<'a> { fn next(&mut self) -> Option>> { match self.inner.next() { - Some(Ok((_, v))) => Some(Ok(v)), + Some(Ok(elem)) => match elem.value() { + Ok(value) => Some(Ok(value)), + Err(e) => Some(Err(e)), + }, Some(Err(e)) => Some(Err(e)), None => None, } From 364131fa5736158738e0b031a9d40f3ec22d7605 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Mon, 1 Jan 2024 12:06:42 -0500 Subject: [PATCH 11/17] more cleanup --- src/raw/document.rs | 17 ++++ src/raw/iter.rs | 209 +++++++++++++++++++++----------------------- src/raw/mod.rs | 16 ---- 3 files changed, 116 insertions(+), 126 deletions(-) diff --git a/src/raw/document.rs b/src/raw/document.rs index d450cf3a..c0d52b95 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -15,6 +15,7 @@ use crate::{ use super::{ error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, i32_from_slice, + try_to_str, Error, Iter, RawArray, @@ -492,6 +493,22 @@ impl RawDocument { pub fn is_empty(&self) -> bool { self.as_bytes().len() == MIN_BSON_DOCUMENT_SIZE as usize } + + pub(crate) fn read_cstring_at(&self, start_at: usize) -> Result<&str> { + let buf = &self.as_bytes()[start_at..]; + + let mut splits = buf.splitn(2, |x| *x == 0); + let value = splits + .next() + .ok_or_else(|| Error::new_without_key(ErrorKind::new_malformed("no value")))?; + if splits.next().is_some() { + Ok(try_to_str(value)?) + } else { + Err(Error::new_without_key(ErrorKind::new_malformed( + "expected null terminator", + ))) + } + } } impl<'de: 'a, 'a> Deserialize<'de> for &'a RawDocument { diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 5d5c175e..9242f495 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -25,7 +25,7 @@ use super::{ i64_from_slice, read_len, read_lenencode, - read_nullterminated, + try_to_str, RawBsonRef, RawDocument, }; @@ -135,103 +135,77 @@ impl<'a> RawElement<'a> { ElementType::Undefined => RawBsonRef::Undefined, ElementType::MinKey => RawBsonRef::MinKey, ElementType::MaxKey => RawBsonRef::MaxKey, - ElementType::ObjectId => RawBsonRef::ObjectId(ObjectId::from_bytes( - self.doc.as_bytes()[self.start_at..(self.start_at + self.size)] - .try_into() - .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, - )), - ElementType::EmbeddedDocument => RawBsonRef::Document(RawDocument::from_bytes( - &self.doc.as_bytes()[self.start_at..(self.start_at + self.size)], - )?), - ElementType::Array => RawBsonRef::Array(RawArray::from_doc(RawDocument::from_bytes( - &self.doc.as_bytes()[self.start_at..(self.start_at + self.size)], - )?)), - ElementType::Int32 => { - RawBsonRef::Int32(i32_from_slice(&self.doc.as_bytes()[self.start_at..])?) + ElementType::ObjectId => RawBsonRef::ObjectId(self.get_oid_at(self.start_at)?), + ElementType::Int32 => RawBsonRef::Int32(i32_from_slice(self.slice())?), + ElementType::Int64 => RawBsonRef::Int64(i64_from_slice(self.slice())?), + ElementType::Double => RawBsonRef::Double(f64_from_slice(self.slice())?), + ElementType::String => RawBsonRef::String(self.read_str()?), + ElementType::EmbeddedDocument => { + RawBsonRef::Document(RawDocument::from_bytes(self.slice())?) } - ElementType::Int64 => { - RawBsonRef::Int64(i64_from_slice(&self.doc.as_bytes()[self.start_at..])?) + ElementType::Array => { + RawBsonRef::Array(RawArray::from_doc(RawDocument::from_bytes(self.slice())?)) } - ElementType::Double => { - RawBsonRef::Double(f64_from_slice(&self.doc.as_bytes()[self.start_at..])?) + ElementType::Boolean => { + RawBsonRef::Boolean(read_bool(self.slice()).map_err(|e| self.malformed_error(e))?) } - ElementType::String => { - RawBsonRef::String(read_lenencode(&self.doc.as_bytes()[self.start_at..])?) + ElementType::DateTime => { + RawBsonRef::DateTime(DateTime::from_millis(i64_from_slice(self.slice())?)) } - ElementType::Boolean => RawBsonRef::Boolean( - read_bool(&self.doc.as_bytes()[self.start_at..]) - .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, - ), - ElementType::DateTime => RawBsonRef::DateTime(DateTime::from_millis(i64_from_slice( - &self.doc.as_bytes()[self.start_at..], - )?)), ElementType::Decimal128 => RawBsonRef::Decimal128(Decimal128::from_bytes( - self.doc.as_bytes()[self.start_at..(self.start_at + self.size)] + self.slice() .try_into() - .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, + .map_err(|e| self.malformed_error(e))?, )), - ElementType::JavaScriptCode => { - RawBsonRef::JavaScriptCode(read_lenencode(&self.doc.as_bytes()[self.start_at..])?) - } - ElementType::Symbol => { - RawBsonRef::Symbol(read_lenencode(&self.doc.as_bytes()[self.start_at..])?) - } - ElementType::DbPointer => { - let namespace = read_lenencode(&self.doc.as_bytes()[self.start_at..])?; - let oid_offset = self.start_at + 4 + namespace.len() + 1; - let id = ObjectId::from_bytes( - self.doc.as_bytes()[oid_offset..(oid_offset + 12)] - .try_into() - .unwrap(), - ); - RawBsonRef::DbPointer(RawDbPointerRef { namespace, id }) - } + ElementType::JavaScriptCode => RawBsonRef::JavaScriptCode(self.read_str()?), + ElementType::Symbol => RawBsonRef::Symbol(self.read_str()?), + ElementType::DbPointer => RawBsonRef::DbPointer(RawDbPointerRef { + namespace: read_lenencode(self.slice())?, + id: self.get_oid_at(self.start_at + (self.size - 12))?, + }), ElementType::RegularExpression => { - let pattern = read_nullterminated(&self.doc.as_bytes()[self.start_at..])?; - let options = read_nullterminated( - &self.doc.as_bytes()[(self.start_at + pattern.len() + 1)..], - )?; - RawBsonRef::RegularExpression(RawRegexRef { pattern, options }) + let pattern = self.doc.read_cstring_at(self.start_at)?; + RawBsonRef::RegularExpression(RawRegexRef { + pattern, + options: self + .doc + .read_cstring_at(self.start_at + pattern.len() + 1)?, + }) } ElementType::Timestamp => RawBsonRef::Timestamp( - Timestamp::from_reader(&self.doc.as_bytes()[self.start_at..]) - .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, + Timestamp::from_reader(self.slice()).map_err(|e| self.malformed_error(e))?, ), ElementType::Binary => { - let len = i32_from_slice(&self.doc.as_bytes()[self.start_at..])? as usize; + let len = self.size.checked_sub(4 + 1).ok_or_else(|| { + self.malformed_error(format!("length exceeds maximum: {}", self.size)) + })?; + let data_start = self.start_at + 4 + 1; - if len >= i32::MAX as usize { - return Err(Error::new_with_key( - self.key, - ErrorKind::new_malformed(format!("binary length exceeds maximum: {}", len)), - )); + if self.size >= i32::MAX as usize { + return Err( + self.malformed_error(format!("binary length exceeds maximum: {}", len)) + ); } let subtype = BinarySubtype::from(self.doc.as_bytes()[self.start_at + 4]); let data = match subtype { BinarySubtype::BinaryOld => { if len < 4 { - return Err(Error::new_with_key( - self.key, - ErrorKind::new_malformed( - "old binary subtype has no inner declared length", - ), + return Err(self.malformed_error( + "old binary subtype has no inner declared length", )); } let oldlength = i32_from_slice(&self.doc.as_bytes()[data_start..])? as usize; if checked_add(oldlength, 4)? != len { - return Err(Error::new_with_key( - self.key, - ErrorKind::new_malformed( - "old binary subtype has wrong inner declared length", - ), + return Err(self.malformed_error( + "old binary subtype has wrong inner declared length", )); } - &self.doc.as_bytes()[(data_start + 4)..(data_start + len)] + self.slice_bounds(data_start + 4, len - 4) } - _ => &self.doc.as_bytes()[data_start..(data_start + len)], + _ => self.slice_bounds(data_start, len), }; RawBsonRef::Binary(RawBinaryRef { subtype, @@ -239,7 +213,11 @@ impl<'a> RawElement<'a> { }) } ElementType::JavaScriptCodeWithScope => { - let slice = &self.doc.as_bytes()[self.start_at..(self.start_at + self.size)]; + if self.size < MIN_CODE_WITH_SCOPE_SIZE as usize { + return Err(self.malformed_error("code with scope length too small")); + } + + let slice = self.slice(); let code = read_lenencode(&slice[4..])?; let scope_start = 4 + 4 + code.len() + 1; let scope = RawDocument::from_bytes(&slice[scope_start..])?; @@ -248,6 +226,30 @@ impl<'a> RawElement<'a> { } }) } + + fn malformed_error(&self, e: impl ToString) -> Error { + Error::new_with_key(self.key, ErrorKind::new_malformed(e)) + } + + fn slice(&self) -> &'a [u8] { + self.slice_bounds(self.start_at, self.size) + } + + fn slice_bounds(&self, start_at: usize, size: usize) -> &'a [u8] { + &self.doc.as_bytes()[start_at..(start_at + size)] + } + + fn read_str(&self) -> Result<&'a str> { + try_to_str(self.slice_bounds(self.start_at + 4, self.size - 4 - 1)) + } + + fn get_oid_at(&self, start_at: usize) -> Result { + Ok(ObjectId::from_bytes( + self.doc.as_bytes()[start_at..(start_at + 12)] + .try_into() + .map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?, + )) + } } impl<'a> Iter<'a> { @@ -258,6 +260,10 @@ impl<'a> Iter<'a> { }) })) } + + fn get_next_length_at(&self, start_at: usize) -> Result { + i32_from_slice(&self.doc.as_bytes()[start_at..]) + } } impl<'a> Iterator for Iter<'a> { @@ -283,7 +289,7 @@ impl<'a> Iterator for Iter<'a> { )))); } - let key = match read_nullterminated(&self.doc.as_bytes()[self.offset + 1..]) { + let key = match self.doc.read_cstring_at(self.offset + 1) { Ok(k) => k, Err(e) => { self.valid = false; @@ -291,7 +297,7 @@ impl<'a> Iterator for Iter<'a> { } }; - let valueoffset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 + let offset = self.offset + 1 + key.len() + 1; // type specifier + key + \0 let kvp_result = try_with_key(key, || { let element_type = match ElementType::from(self.doc.as_bytes()[self.offset]) { Some(et) => et, @@ -307,52 +313,35 @@ impl<'a> Iterator for Iter<'a> { }; let element_size = match element_type { + ElementType::Boolean => 1, ElementType::Int32 => 4, ElementType::Int64 => 8, ElementType::Double => 8, - ElementType::String => read_len(&self.doc.as_bytes()[valueoffset..])?, - ElementType::EmbeddedDocument => self.next_document_len(valueoffset)?, - ElementType::Array => self.next_document_len(valueoffset)?, - ElementType::Binary => { - (i32_from_slice(&self.doc.as_bytes()[valueoffset..])? + 4 + 1) as usize - } - ElementType::ObjectId => 12, - ElementType::Boolean => 1, ElementType::DateTime => 8, - ElementType::RegularExpression => { - let pattern = read_nullterminated(&self.doc.as_bytes()[valueoffset..])?; - let options = read_nullterminated( - &self.doc.as_bytes()[(valueoffset + pattern.len() + 1)..], - )?; - pattern.len() + 1 + options.len() + 1 - } - ElementType::Null => 0, - ElementType::Undefined => 0, ElementType::Timestamp => 8, - ElementType::Symbol => read_len(&self.doc.as_bytes()[valueoffset..])?, - ElementType::JavaScriptCode => read_len(&self.doc.as_bytes()[valueoffset..])?, - ElementType::JavaScriptCodeWithScope => { - let length = (i32_from_slice(&self.doc.as_bytes()[valueoffset..])?) as usize; - - if length < MIN_CODE_WITH_SCOPE_SIZE as usize { - return Err(Error::new_without_key(ErrorKind::new_malformed( - "code with scope length too small", - ))); - } - - length - } - ElementType::DbPointer => { - let length = read_len(&self.doc.as_bytes()[valueoffset..])?; - length + 12 - } + ElementType::ObjectId => 12, ElementType::Decimal128 => 16, + ElementType::Null => 0, + ElementType::Undefined => 0, ElementType::MinKey => 0, ElementType::MaxKey => 0, + ElementType::String => read_len(&self.doc.as_bytes()[offset..])?, + ElementType::EmbeddedDocument => self.next_document_len(offset)?, + ElementType::Array => self.next_document_len(offset)?, + ElementType::Binary => (self.get_next_length_at(offset)? + 4 + 1) as usize, + ElementType::RegularExpression => { + let pattern = self.doc.read_cstring_at(offset)?; + let options = self.doc.read_cstring_at(offset + pattern.len() + 1)?; + pattern.len() + 1 + options.len() + 1 + } + ElementType::DbPointer => read_len(&self.doc.as_bytes()[offset..])? + 12, + ElementType::Symbol => read_len(&self.doc.as_bytes()[offset..])?, + ElementType::JavaScriptCode => read_len(&self.doc.as_bytes()[offset..])?, + ElementType::JavaScriptCodeWithScope => self.get_next_length_at(offset)? as usize, }; - self.verify_enough_bytes(valueoffset, element_size)?; - self.offset = valueoffset + element_size; + self.verify_enough_bytes(offset, element_size)?; + self.offset = offset + element_size; Ok((element_type, element_size)) }); @@ -366,7 +355,7 @@ impl<'a> Iterator for Iter<'a> { key, kind, doc: self.doc, - start_at: valueoffset, + start_at: offset, size, }), Err(e) => Err(e), diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 9a5b8320..13e2669c 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -196,22 +196,6 @@ fn i64_from_slice(val: &[u8]) -> Result { Ok(i64::from_le_bytes(arr)) } -fn read_nullterminated(buf: &[u8]) -> Result<&str> { - let mut splits = buf.splitn(2, |x| *x == 0); - let value = splits.next().ok_or_else(|| { - Error::new_without_key(ErrorKind::MalformedValue { - message: "no value".into(), - }) - })?; - if splits.next().is_some() { - Ok(try_to_str(value)?) - } else { - Err(Error::new_without_key(ErrorKind::MalformedValue { - message: "expected null terminator".into(), - })) - } -} - fn read_len(buf: &[u8]) -> Result { if buf.len() < 4 { return Err(Error::new_without_key(ErrorKind::MalformedValue { From 1056729baeface69e5f23b1b9b326aff49fe554b Mon Sep 17 00:00:00 2001 From: tycho garen Date: Sat, 6 Jan 2024 02:08:42 -0500 Subject: [PATCH 12/17] cr feedback --- src/raw/array.rs | 6 +++--- src/raw/document.rs | 11 +++++----- src/raw/document_buf.rs | 17 ++++++++------- src/raw/iter.rs | 46 +++++++++++++++++++++++++++++------------ src/raw/mod.rs | 2 +- 5 files changed, 52 insertions(+), 30 deletions(-) diff --git a/src/raw/array.rs b/src/raw/array.rs index a5512047..497e820c 100644 --- a/src/raw/array.rs +++ b/src/raw/array.rs @@ -6,10 +6,10 @@ use super::{ error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, serde::OwnedOrBorrowedRawArray, Error, - Iter, RawBinaryRef, RawBsonRef, RawDocument, + RawIter, RawRegexRef, Result, }; @@ -260,14 +260,14 @@ impl<'a> IntoIterator for &'a RawArray { fn into_iter(self) -> RawArrayIter<'a> { RawArrayIter { - inner: Iter::new(&self.doc), + inner: RawIter::new(&self.doc), } } } /// An iterator over borrowed raw BSON array values. pub struct RawArrayIter<'a> { - inner: Iter<'a>, + inner: RawIter<'a>, } impl<'a> Iterator for RawArrayIter<'a> { diff --git a/src/raw/document.rs b/src/raw/document.rs index c0d52b95..51ca6530 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -15,13 +15,14 @@ use crate::{ use super::{ error::{ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, i32_from_slice, + iter::Iter, try_to_str, Error, - Iter, RawArray, RawBinaryRef, RawBsonRef, RawDocumentBuf, + RawIter, RawRegexRef, Result, }; @@ -171,7 +172,7 @@ impl RawDocument { /// # Ok::<(), Error>(()) /// ``` pub fn get(&self, key: impl AsRef) -> Result>> { - for elem in Iter::new(self) { + for elem in RawIter::new(self) { let elem = elem?; if key.as_ref() == elem.key() { return Ok(Some(elem.try_into()?)); @@ -594,10 +595,10 @@ impl TryFrom<&RawDocument> for crate::Document { } impl<'a> IntoIterator for &'a RawDocument { - type IntoIter = Box + 'a>; + type IntoIter = Iter<'a>; type Item = Result<(&'a str, RawBsonRef<'a>)>; - fn into_iter(self) -> Box)>> + 'a> { - Box::new(Iter::new(self).into_eager()) + fn into_iter(self) -> Iter<'a> { + Iter::new(self) } } diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs index 72be6d93..525ebc44 100644 --- a/src/raw/document_buf.rs +++ b/src/raw/document_buf.rs @@ -17,12 +17,13 @@ use crate::{ use super::{ bson::RawBson, + iter::Iter, serde::OwnedOrBorrowedRawDocument, Error, ErrorKind, - Iter, RawBsonRef, RawDocument, + RawIter, Result, }; @@ -146,8 +147,8 @@ impl RawDocumentBuf { /// There is no owning iterator for [`RawDocumentBuf`]. If you need ownership over /// elements that might need to allocate, you must explicitly convert /// them to owned types yourself. - pub fn iter(&self) -> Box)>> + '_> { - Iter::new(self).into_eager() + pub fn iter(&self) -> Iter<'_> { + Iter::new(self) } /// Gets an iterator over the elements in the [`RawDocumentBuf`], @@ -166,8 +167,8 @@ impl RawDocumentBuf { /// There is no owning iterator for [`RawDocumentBuf`]. If you /// need ownership over elements that might need to allocate, you /// must explicitly convert them to owned types yourself. - pub fn iter_elements(&self) -> Iter<'_> { - Iter::new(self) + pub fn iter_elements(&self) -> RawIter<'_> { + RawIter::new(self) } /// Return the contained data as a `Vec` @@ -389,11 +390,11 @@ impl TryFrom<&Document> for RawDocumentBuf { } impl<'a> IntoIterator for &'a RawDocumentBuf { - type IntoIter = Box + 'a>; + type IntoIter = Iter<'a>; type Item = Result<(&'a str, RawBsonRef<'a>)>; - fn into_iter(self) -> Box + 'a> { - Box::new(Iter::new(self).into_eager()) + fn into_iter(self) -> Iter<'a> { + Iter::new(self) } } diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 9242f495..267c529e 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -32,6 +32,34 @@ use super::{ /// An iterator over the document's entries. pub struct Iter<'a> { + inner: RawIter<'a>, +} + +impl<'a> Iter<'a> { + pub(crate) fn new(doc: &'a RawDocument) -> Self { + Iter { + inner: RawIter::new(doc), + } + } +} + +impl<'a> Iterator for Iter<'a> { + type Item = Result<(&'a str, RawBsonRef<'a>)>; + + fn next(&mut self) -> Option)>> { + match self.inner.next() { + Some(Ok(elem)) => match elem.value() { + Err(e) => Some(Err(e)), + Ok(value) => Some(Ok((elem.key, value))), + }, + Some(Err(e)) => Some(Err(e)), + None => None, + } + } +} + +/// An iterator over the document's elements. +pub struct RawIter<'a> { doc: &'a RawDocument, offset: usize, @@ -40,7 +68,7 @@ pub struct Iter<'a> { valid: bool, } -impl<'a> Iter<'a> { +impl<'a> RawIter<'a> { pub(crate) fn new(doc: &'a RawDocument) -> Self { Self { doc, @@ -121,8 +149,8 @@ impl<'a> RawElement<'a> { self.size } - pub fn key(&self) -> String { - self.key.to_string() + pub fn key(&self) -> &str { + self.key } pub fn element_type(&self) -> ElementType { @@ -252,21 +280,13 @@ impl<'a> RawElement<'a> { } } -impl<'a> Iter<'a> { - pub(crate) fn into_eager(self) -> Box)>>> { - Box::new(self.map(|outer| { - outer.and_then(|val| -> Result<(&'a str, RawBsonRef<'a>)> { - Ok((val.key, val.value()?)) - }) - })) - } - +impl<'a> RawIter<'a> { fn get_next_length_at(&self, start_at: usize) -> Result { i32_from_slice(&self.doc.as_bytes()[start_at..]) } } -impl<'a> Iterator for Iter<'a> { +impl<'a> Iterator for RawIter<'a> { type Item = Result>; fn next(&mut self) -> Option>> { diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 13e2669c..56c6e130 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -142,7 +142,7 @@ pub use self::{ document::RawDocument, document_buf::RawDocumentBuf, error::{Error, ErrorKind, Result, ValueAccessError, ValueAccessErrorKind, ValueAccessResult}, - iter::Iter, + iter::RawIter, }; /// Special newtype name indicating that the type being (de)serialized is a raw BSON document. From e39ee2e2a173c4de37075fe5266ab2d127e5c2f3 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Sat, 6 Jan 2024 02:22:08 -0500 Subject: [PATCH 13/17] fix fuzz --- src/raw/iter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 267c529e..528f5b23 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -348,7 +348,7 @@ impl<'a> Iterator for RawIter<'a> { ElementType::String => read_len(&self.doc.as_bytes()[offset..])?, ElementType::EmbeddedDocument => self.next_document_len(offset)?, ElementType::Array => self.next_document_len(offset)?, - ElementType::Binary => (self.get_next_length_at(offset)? + 4 + 1) as usize, + ElementType::Binary => (self.get_next_length_at(offset)? as usize) + 4 + 1, ElementType::RegularExpression => { let pattern = self.doc.read_cstring_at(offset)?; let options = self.doc.read_cstring_at(offset + pattern.len() + 1)?; From ee56e066430c5bae16f5ed8e37cc1d6713a0332e Mon Sep 17 00:00:00 2001 From: tycho garen Date: Mon, 8 Jan 2024 12:49:31 -0500 Subject: [PATCH 14/17] cast better --- src/raw/iter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 528f5b23..f1062dca 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -348,7 +348,7 @@ impl<'a> Iterator for RawIter<'a> { ElementType::String => read_len(&self.doc.as_bytes()[offset..])?, ElementType::EmbeddedDocument => self.next_document_len(offset)?, ElementType::Array => self.next_document_len(offset)?, - ElementType::Binary => (self.get_next_length_at(offset)? as usize) + 4 + 1, + ElementType::Binary => ((4 + 1 as i32) + self.get_next_length_at(offset)?) as usize, ElementType::RegularExpression => { let pattern = self.doc.read_cstring_at(offset)?; let options = self.doc.read_cstring_at(offset + pattern.len() + 1)?; From 5f3985308f6ddd30b1a9b22fa883a1a9d041522f Mon Sep 17 00:00:00 2001 From: tycho garen Date: Tue, 9 Jan 2024 13:18:18 -0500 Subject: [PATCH 15/17] fix lint --- src/raw/iter.rs | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/src/raw/iter.rs b/src/raw/iter.rs index f1062dca..8aa62a1d 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -5,29 +5,13 @@ use crate::{ oid::ObjectId, raw::{Error, ErrorKind, Result}, spec::{BinarySubtype, ElementType}, - Bson, - DateTime, - Decimal128, - RawArray, - RawBinaryRef, - RawBson, - RawDbPointerRef, - RawJavaScriptCodeWithScopeRef, - RawRegexRef, - Timestamp, + Bson, DateTime, Decimal128, RawArray, RawBinaryRef, RawBson, RawDbPointerRef, + RawJavaScriptCodeWithScopeRef, RawRegexRef, Timestamp, }; use super::{ - checked_add, - error::try_with_key, - f64_from_slice, - i32_from_slice, - i64_from_slice, - read_len, - read_lenencode, - try_to_str, - RawBsonRef, - RawDocument, + checked_add, error::try_with_key, f64_from_slice, i32_from_slice, i64_from_slice, read_len, + read_lenencode, try_to_str, RawBsonRef, RawDocument, }; /// An iterator over the document's entries. @@ -348,7 +332,7 @@ impl<'a> Iterator for RawIter<'a> { ElementType::String => read_len(&self.doc.as_bytes()[offset..])?, ElementType::EmbeddedDocument => self.next_document_len(offset)?, ElementType::Array => self.next_document_len(offset)?, - ElementType::Binary => ((4 + 1 as i32) + self.get_next_length_at(offset)?) as usize, + ElementType::Binary => ((4 + 1_i32) + self.get_next_length_at(offset)?) as usize, ElementType::RegularExpression => { let pattern = self.doc.read_cstring_at(offset)?; let options = self.doc.read_cstring_at(offset + pattern.len() + 1)?; From b822da59faaf5d230204626c6765c5986b4d1654 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Wed, 10 Jan 2024 12:17:16 -0500 Subject: [PATCH 16/17] fix overflow --- src/raw/iter.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 8aa62a1d..bd838603 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -265,8 +265,15 @@ impl<'a> RawElement<'a> { } impl<'a> RawIter<'a> { - fn get_next_length_at(&self, start_at: usize) -> Result { - i32_from_slice(&self.doc.as_bytes()[start_at..]) + fn get_next_length_at(&self, start_at: usize) -> Result { + let len = i32_from_slice(&self.doc.as_bytes()[start_at..])?; + if len < 0 { + Err(Error::new_without_key(ErrorKind::new_malformed( + "lengths can't be negative", + ))) + } else { + Ok(len as usize) + } } } @@ -332,7 +339,7 @@ impl<'a> Iterator for RawIter<'a> { ElementType::String => read_len(&self.doc.as_bytes()[offset..])?, ElementType::EmbeddedDocument => self.next_document_len(offset)?, ElementType::Array => self.next_document_len(offset)?, - ElementType::Binary => ((4 + 1_i32) + self.get_next_length_at(offset)?) as usize, + ElementType::Binary => self.get_next_length_at(offset)? + 4 + 1, ElementType::RegularExpression => { let pattern = self.doc.read_cstring_at(offset)?; let options = self.doc.read_cstring_at(offset + pattern.len() + 1)?; From 093de42f9ab0bd2f1d1bcbc2a00603f435c4a0dc Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Wed, 10 Jan 2024 12:47:04 -0500 Subject: [PATCH 17/17] fix clippy/fmt --- src/raw/iter.rs | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/raw/iter.rs b/src/raw/iter.rs index bd838603..1208b7ee 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -5,13 +5,29 @@ use crate::{ oid::ObjectId, raw::{Error, ErrorKind, Result}, spec::{BinarySubtype, ElementType}, - Bson, DateTime, Decimal128, RawArray, RawBinaryRef, RawBson, RawDbPointerRef, - RawJavaScriptCodeWithScopeRef, RawRegexRef, Timestamp, + Bson, + DateTime, + Decimal128, + RawArray, + RawBinaryRef, + RawBson, + RawDbPointerRef, + RawJavaScriptCodeWithScopeRef, + RawRegexRef, + Timestamp, }; use super::{ - checked_add, error::try_with_key, f64_from_slice, i32_from_slice, i64_from_slice, read_len, - read_lenencode, try_to_str, RawBsonRef, RawDocument, + checked_add, + error::try_with_key, + f64_from_slice, + i32_from_slice, + i64_from_slice, + read_len, + read_lenencode, + try_to_str, + RawBsonRef, + RawDocument, }; /// An iterator over the document's entries. @@ -348,7 +364,7 @@ impl<'a> Iterator for RawIter<'a> { ElementType::DbPointer => read_len(&self.doc.as_bytes()[offset..])? + 12, ElementType::Symbol => read_len(&self.doc.as_bytes()[offset..])?, ElementType::JavaScriptCode => read_len(&self.doc.as_bytes()[offset..])?, - ElementType::JavaScriptCodeWithScope => self.get_next_length_at(offset)? as usize, + ElementType::JavaScriptCodeWithScope => self.get_next_length_at(offset)?, }; self.verify_enough_bytes(offset, element_size)?;