Skip to content

Commit

Permalink
add get_batson
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin committed Sep 21, 2024
1 parent 9da84a2 commit f03e33a
Show file tree
Hide file tree
Showing 7 changed files with 175 additions and 1 deletion.
10 changes: 10 additions & 0 deletions crates/batson/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,16 @@ impl<'b> HetArray<'b> {
writer.end_array();
Ok(())
}

pub fn move_to_end(&self, d: &mut Decoder<'b>) -> DecodeResult<()> {
d.index += match &self.offsets {
HetArrayOffsets::U8(v) => v.last().copied().unwrap() as usize,
HetArrayOffsets::U16(v) => v.last().copied().unwrap() as usize,
HetArrayOffsets::U32(v) => v.last().copied().unwrap() as usize,
};
let header = d.take_header()?;
d.move_to_end(header)
}
}

fn take_slice_as<'b, T: bytemuck::Pod>(d: &mut Decoder<'b>, length: Length) -> DecodeResult<&'b [T]> {
Expand Down
32 changes: 32 additions & 0 deletions crates/batson/src/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,38 @@ impl<'b> Decoder<'b> {
Self { bytes, index: 0 }
}

pub fn get_range(&self, start: usize, end: usize) -> DecodeResult<&'b [u8]> {
self.bytes
.get(start..end)
.ok_or_else(|| self.error(DecodeErrorType::EOF))
}

/// Get the length of the data that follows a header
pub fn move_to_end(&mut self, header: Header) -> DecodeResult<()> {
match header {
Header::Null | Header::Bool(_) => (),
Header::Int(n) | Header::Float(n) => {
self.index += n.data_length();
}
Header::Object(l) => {
let obj = Object::decode_header(self, l)?;
obj.move_to_end(self)?;
}
Header::I64Array(l) => {
let length = l.decode(self)?;
self.index += length * size_of::<i64>();
}
Header::HetArray(l) => {
let het = HetArray::decode_header(self, l)?;
het.move_to_end(self)?;
}
Header::IntBig(_, l) | Header::Str(l) | Header::HeaderArray(l) | Header::U8Array(l) => {
self.index += l.decode(self)?;
}
};
Ok(())
}

pub fn take_header(&mut self) -> DecodeResult<Header> {
let byte = self.next().ok_or_else(|| self.eof())?;
Header::decode(byte, self)
Expand Down
6 changes: 6 additions & 0 deletions crates/batson/src/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ impl Encoder {
Self { data: Vec::new() }
}

pub fn with_capacity(capacity: usize) -> Self {
Self {
data: Vec::with_capacity(capacity),
}
}

pub fn align<T>(&mut self) {
let align = align_of::<T>();
// same calculation as in `Decoder::align`
Expand Down
32 changes: 32 additions & 0 deletions crates/batson/src/get.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
#![allow(clippy::module_name_repetitions)]

use crate::array::{header_array_get, i64_array_get, u8_array_get, HetArray};
use crate::decoder::Decoder;
use crate::encoder::Encoder;
use crate::errors::{DecodeError, DecodeResult};
use crate::header::Header;
use crate::object::Object;
use std::borrow::Cow;

#[derive(Debug)]
pub enum BatsonPath<'s> {
Expand Down Expand Up @@ -35,6 +38,14 @@ pub fn get_int(bytes: &[u8], path: &[BatsonPath]) -> DecodeResult<Option<i64>> {
get_try_into(bytes, path)
}

pub fn get_batson<'b>(bytes: &'b [u8], path: &[BatsonPath]) -> DecodeResult<Option<Cow<'b, [u8]>>> {
if let Some(v) = GetValue::get(bytes, path)? {
v.into_batson().map(Some)
} else {
Ok(None)
}
}

pub fn contains(bytes: &[u8], path: &[BatsonPath]) -> DecodeResult<bool> {
GetValue::get(bytes, path).map(|v| v.is_some())
}
Expand Down Expand Up @@ -137,6 +148,27 @@ impl<'b> GetValue<'b> {
_ => Ok(None),
}
}

fn into_batson(self) -> DecodeResult<Cow<'b, [u8]>> {
match self {
Self::Header(mut decoder, header) => {
let start = decoder.index - 1;
decoder.move_to_end(header)?;
let end = decoder.index;
decoder.get_range(start, end).map(Cow::Borrowed)
}
Self::U8(int) => {
let mut encoder = Encoder::with_capacity(2);
encoder.encode_i64(i64::from(int));
Ok(Cow::Owned(encoder.into()))
}
Self::I64(int) => {
let mut encoder = Encoder::with_capacity(9);
encoder.encode_i64(int);
Ok(Cow::Owned(encoder.into()))
}
}
}
}

impl From<GetValue<'_>> for Option<bool> {
Expand Down
10 changes: 10 additions & 0 deletions crates/batson/src/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,16 @@ impl NumberHint {
_ => None,
}
}

/// Get the length of the data that follows the header
pub fn data_length(self) -> usize {
match self {
Self::Size8 => 1,
Self::Size32 => 4,
Self::Size64 => 8,
_ => 0,
}
}
}

/// String, object, and array lengths
Expand Down
17 changes: 17 additions & 0 deletions crates/batson/src/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ impl<'b> Object<'b> {
ObjectChoice::U32(o) => o.write_json(d, writer),
}
}

/// Get the length of the data that follows the header
pub fn move_to_end(self, d: &mut Decoder<'b>) -> DecodeResult<()> {
match self.0 {
ObjectChoice::U8(o) => o.move_to_end(d),
ObjectChoice::U16(o) => o.move_to_end(d),
ObjectChoice::U32(o) => o.move_to_end(d),
}
}
}

#[derive(Debug)]
Expand Down Expand Up @@ -136,6 +145,14 @@ impl<'b, S: SuperHeaderItem> ObjectSized<'b, S> {
None => Err(d.error(DecodeErrorType::ObjectBodyIndexInvalid)),
}
}

/// the offset of the end of the last value
pub fn move_to_end(self, d: &mut Decoder<'b>) -> DecodeResult<()> {
let h = self.super_header.last().unwrap();
d.index += h.offset() + h.key_length();
let header = d.take_header()?;
d.move_to_end(header)
}
}

trait SuperHeaderItem: fmt::Debug + Copy + Clone + Pod + Zeroable + Eq + PartialEq {
Expand Down
69 changes: 68 additions & 1 deletion crates/batson/tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::sync::Arc;
use jiter::{JsonValue, LazyIndexMap};
use smallvec::smallvec;

use batson::get::{contains, get_bool, get_int, get_length, get_str};
use batson::get::{contains, get_batson, get_bool, get_int, get_length, get_str};
use batson::{batson_to_json_string, compare_json_values, decode_to_json_value, encode_from_json};

#[test]
Expand Down Expand Up @@ -187,6 +187,73 @@ fn test_get_length() {
assert_eq!(get_length(&bytes, &["foo".into(), 1.into()]).unwrap().unwrap(), 2);
}

#[test]
fn test_get_batson() {
let bytes = json_to_batson(br#"{"foo": [null, {"a": 1, "b": 22}, 4294967299]}"#);

assert_eq!(get_batson(&bytes, &[]).unwrap().unwrap(), bytes);

let null_bytes = get_batson(&bytes, &["foo".into(), 0.into()]).unwrap().unwrap();
assert_eq!(null_bytes, [0u8].as_ref());
assert_eq!(batson_to_json_string(&null_bytes).unwrap(), "null");

let foo_bytes = get_batson(&bytes, &["foo".into()]).unwrap().unwrap();
assert_eq!(
batson_to_json_string(&foo_bytes).unwrap(),
r#"[null,{"a":1,"b":22},4294967299]"#
);

let missing = get_batson(&bytes, &["bar".into()]).unwrap();
assert!(missing.is_none());

let missing = get_batson(&bytes, &["foo".into(), "bar".into()]).unwrap();
assert!(missing.is_none());

let obj_bytes = get_batson(&bytes, &["foo".into(), 1.into()]).unwrap().unwrap();
assert_eq!(batson_to_json_string(&obj_bytes).unwrap(), r#"{"a":1,"b":22}"#);

let a_bytes = get_batson(&bytes, &["foo".into(), 1.into(), "a".into()])
.unwrap()
.unwrap();
assert_eq!(batson_to_json_string(&a_bytes).unwrap(), "1");

let b_bytes = get_batson(&bytes, &["foo".into(), 1.into(), "b".into()])
.unwrap()
.unwrap();
assert_eq!(batson_to_json_string(&b_bytes).unwrap(), "22");

let int_bytes = get_batson(&bytes, &["foo".into(), 2.into()]).unwrap().unwrap();
assert_eq!(batson_to_json_string(&int_bytes).unwrap(), "4294967299");
}

#[test]
fn test_get_batson_u8array() {
let bytes = json_to_batson(br#"[1, 2, 0, 255, 128]"#);

assert_eq!(get_batson(&bytes, &[]).unwrap().unwrap(), bytes);

let zeroth_bytes = get_batson(&bytes, &[0.into()]).unwrap().unwrap();
assert_eq!(batson_to_json_string(&zeroth_bytes).unwrap(), "1");

let first_bytes = get_batson(&bytes, &[1.into()]).unwrap().unwrap();
assert_eq!(batson_to_json_string(&first_bytes).unwrap(), "2");

let second_bytes = get_batson(&bytes, &[2.into()]).unwrap().unwrap();
assert_eq!(batson_to_json_string(&second_bytes).unwrap(), "0");

let third_bytes = get_batson(&bytes, &[3.into()]).unwrap().unwrap();
assert_eq!(batson_to_json_string(&third_bytes).unwrap(), "255");

let fourth_bytes = get_batson(&bytes, &[4.into()]).unwrap().unwrap();
assert_eq!(batson_to_json_string(&fourth_bytes).unwrap(), "128");

let missing = get_batson(&bytes, &[5.into()]).unwrap();
assert!(missing.is_none());

let missing = get_batson(&bytes, &[4.into(), 0.into()]).unwrap();
assert!(missing.is_none());
}

#[test]
fn test_to_json() {
let bytes = json_to_batson(br" [true, 123] ");
Expand Down

0 comments on commit f03e33a

Please sign in to comment.