From f6e756dcd0c9717cc715092d4d0eff39a9b7386f Mon Sep 17 00:00:00 2001 From: liu Date: Fri, 13 Dec 2024 17:39:37 +0800 Subject: [PATCH] feat: support lazy parsing in owned lazyvalue (#133) --- examples/lazyvalue.rs | 1 - fuzz/fuzz_targets/from_slice.rs | 275 +--------- fuzz/src/lib.rs | 346 ++++++++++++ scripts/sanitize.sh | 2 +- src/input.rs | 13 +- src/lazyvalue/de.rs | 42 +- src/lazyvalue/get.rs | 6 +- src/lazyvalue/iterator.rs | 71 ++- src/lazyvalue/mod.rs | 8 +- src/lazyvalue/owned.rs | 907 ++++++++++++++++++++++++++------ src/lazyvalue/ser.rs | 25 +- src/lazyvalue/value.rs | 159 ++++-- src/lib.rs | 11 +- src/parser.rs | 138 ++++- src/reader.rs | 58 +- src/serde/de.rs | 21 +- src/serde/mod.rs | 3 +- src/serde/ser.rs | 30 +- 18 files changed, 1567 insertions(+), 549 deletions(-) create mode 100644 fuzz/src/lib.rs diff --git a/examples/lazyvalue.rs b/examples/lazyvalue.rs index edfa6b3..0d56fe9 100644 --- a/examples/lazyvalue.rs +++ b/examples/lazyvalue.rs @@ -14,7 +14,6 @@ fn main() { } let data: TestLazyValue = sonic_rs::from_str(input).unwrap(); assert_eq!(data.borrowed.as_raw_str(), "\"hello\""); - assert_eq!(data.owned.as_raw_str(), "\"world\""); // use serde_json #[derive(Debug, Deserialize, Serialize)] diff --git a/fuzz/fuzz_targets/from_slice.rs b/fuzz/fuzz_targets/from_slice.rs index 4befd3d..4aac22a 100644 --- a/fuzz/fuzz_targets/from_slice.rs +++ b/fuzz/fuzz_targets/from_slice.rs @@ -1,277 +1,6 @@ #![no_main] -#![allow(clippy::mutable_key_type)] -use std::{borrow::Cow, collections::HashMap, hash::Hash, marker::PhantomData}; -use faststr::FastStr; use libfuzzer_sys::fuzz_target; -use serde::{Deserialize, Serialize}; -use serde_json::Value as JValue; -use sonic_rs::{ - from_slice, from_str, to_array_iter, to_array_iter_unchecked, to_object_iter, - to_object_iter_unchecked, value::JsonContainerTrait, Deserializer, JsonNumberTrait, - JsonValueTrait, Value, -}; +use sonic_rs_fuzz::sonic_rs_fuzz_data; -macro_rules! test_type { - ($data:expr, $($ty:ty),+) => { - $( - { - match serde_json::from_slice::<$ty>($data) { - Ok(jv) => { - let sv: $ty = sonic_rs::from_slice::<$ty>($data).expect(&format!( - "parse valid json {:?} failed for type {}", - $data, - stringify!($ty) - )); - assert_eq!(sv, jv); - - // Fuzz the struct to_string - let sout = sonic_rs::to_string(&sv).unwrap(); - let jout = serde_json::to_string(&jv).unwrap(); - let sv: $ty = sonic_rs::from_str::<$ty>(&sout).unwrap(); - let jv: $ty = serde_json::from_str::<$ty>(&jout).unwrap(); - assert_eq!(sv, jv); - } - Err(_) => { - let _ = sonic_rs::from_slice::<$ty>($data).expect_err(&format!( - "parse invalid json {:?} wrong for type {}", - $data, - stringify!($ty) - )); - } - } - } - )* - }; -} - -fuzz_target!(|data: &[u8]| { - match serde_json::from_slice::(data) { - Ok(jv) => { - // compare from_slice result - let sv: Value = from_slice(data).unwrap(); - compare_value(&jv, &sv); - - // compare to_string result - let sout = sonic_rs::to_string(&sv).unwrap(); - let jv2 = serde_json::from_str::(&sout).unwrap(); - let sv2: Value = from_str(&sout).unwrap(); - let eq = compare_value(&jv2, &sv2); - - fuzz_use_raw(data, &sv); - fuzz_utf8_lossy(data, &sv); - - if jv.is_object() && eq { - for ret in to_object_iter(data) { - let (k, lv) = ret.unwrap(); - let jv = jv.get(k.as_str()).unwrap(); - compare_lazyvalue(jv, &lv); - - let gv = sonic_rs::get(data, &[k.as_str()]).unwrap(); - compare_lazyvalue(jv, &gv); - } - - // fuzzing unchecked apis - unsafe { - for ret in to_object_iter_unchecked(data) { - let (k, lv) = ret.unwrap(); - let jv = jv.get(k.as_str()).unwrap(); - compare_lazyvalue(jv, &lv); - - let gv = sonic_rs::get_unchecked(data, &[k.as_str()]).unwrap(); - compare_lazyvalue(jv, &gv); - } - } - } else if jv.is_array() { - for (i, ret) in to_array_iter(data).enumerate() { - let lv = ret.unwrap(); - let jv = jv.get(i).unwrap(); - compare_lazyvalue(jv, &lv); - - let gv = sonic_rs::get(data, [i]).unwrap(); - compare_lazyvalue(jv, &gv); - } - - // fuzzing unchecked apis - unsafe { - for (i, ret) in to_array_iter_unchecked(data).enumerate() { - let lv = ret.unwrap(); - let jv = jv.get(i).unwrap(); - compare_lazyvalue(jv, &lv); - - let gv = sonic_rs::get_unchecked(data, [i]).unwrap(); - compare_lazyvalue(jv, &gv); - } - } - } - } - Err(_) => { - let _ = from_slice::(data) - .expect_err(&format!("parse invalid json {:?} failed", data)); - } - } - - test_type!( - data, TestStruct, Enum, Foo, String, f64, u8, u16, u32, u64, u128, i8, i16, i32, i64, i128 - ); -}); - -fn compare_lazyvalue(jv: &JValue, sv: &sonic_rs::LazyValue) { - let out = sv.as_raw_str().as_bytes(); - let sv2: sonic_rs::Value = sonic_rs::from_slice(out).unwrap(); - compare_value(jv, &sv2); -} - -fn fuzz_use_raw(json: &[u8], sv: &sonic_rs::Value) { - let json = unsafe { std::str::from_utf8_unchecked(json) }; - let mut de = Deserializer::from_str(json).use_raw(); - let value: Value = Deserialize::deserialize(&mut de).unwrap(); - let out = sonic_rs::to_string(&value).unwrap(); - let got: Value = sonic_rs::from_str(&out).unwrap(); - assert_eq!(&got, sv); -} - -fn fuzz_utf8_lossy(json: &[u8], sv: &sonic_rs::Value) { - let json = unsafe { std::str::from_utf8_unchecked(json) }; - let mut de = Deserializer::from_str(json).utf8_lossy(); - let value: Value = Deserialize::deserialize(&mut de).unwrap(); - let out = sonic_rs::to_string(&value).unwrap(); - let got: Value = sonic_rs::from_str(&out).unwrap(); - assert_eq!(&got, sv); -} - -fn compare_value(jv: &JValue, sv: &sonic_rs::Value) -> bool { - match *jv { - JValue::Object(ref obj) => { - assert!(sv.is_object()); - let sobj = sv.as_object().unwrap(); - // because serde_json use a map to store object, and sonic_rs allows the repeated keys - if sobj.len() == obj.len() { - for (k, v) in obj { - let got = sobj.get(k).unwrap(); - compare_value(v, got); - } - return true; - } else { - return false; - } - } - JValue::Array(ref arr) => { - assert!(sv.is_array()); - let sarr = sv.as_array().unwrap(); - assert!(arr.len() == sarr.len()); - - for (i, v) in arr.iter().enumerate() { - let got = sarr.get(i).unwrap(); - compare_value(v, got); - } - } - JValue::Bool(b) => assert!(sv.is_boolean() && sv.as_bool().unwrap() == b), - JValue::Null => assert!(sv.is_null()), - JValue::Number(ref num) => { - let got = sv.as_number().unwrap(); - if num.is_f64() { - let jf = num.as_f64().unwrap(); - let sf = got.as_f64().unwrap(); - assert_eq!(jf, sf, "jf {} sf {}", jf, sf); - } - if num.is_u64() { - assert!(num.as_u64().unwrap() == got.as_u64().unwrap()); - } - if num.is_i64() { - assert!(num.as_i64().unwrap() == got.as_i64().unwrap()); - } - } - JValue::String(ref s) => assert!(sv.is_str() && sv.as_str().unwrap() == s), - } - true -} - -#[derive(Debug, Deserialize, Serialize, PartialEq)] -struct Foo { - name: FastStr, - id: u64, -} - -#[derive(Debug, Deserialize, Serialize, Hash, Eq, PartialEq)] -enum Enum { - Zero = 0, - One = 1, - Two = 2, -} - -#[derive(Debug, Deserialize, Serialize, PartialEq)] -enum FieldEnum { - Integer(i8), - Tuple((FastStr, i32)), - Struct(Foo), - Unit, -} - -#[derive(Debug, Deserialize, Serialize, PartialEq)] -enum FieldlessEnum { - Tuple(), - Struct {}, - Unit, -} - -#[derive(Debug, Deserialize, Serialize, PartialEq)] -struct Wrapper<'a>(&'a str); - -// A unit struct -#[derive(Debug, Deserialize, Serialize, PartialEq)] -struct Unit; - -// A uint struct -#[derive(Debug, Deserialize, Serialize, PartialEq)] -struct Phan { - phan: String, - _data: PhantomData, -} - -// A tuple struct -#[derive(Debug, Deserialize, Serialize, PartialEq)] -struct Pair(i32, f32); - -#[derive(Debug, Deserialize, Serialize, PartialEq)] -struct TestStruct<'a> { - fieldless: FieldlessEnum, - enummap: HashMap, - enum_: Enum, - - // basic types - boolean: bool, - integer: i32, - float: f64, - int128: i128, - uint128: u128, - char_: char, - - // string or bytes - str_: &'a str, - // bytes_: &'a [u8], - string: String, - faststr: FastStr, - #[serde(borrow)] - cow: Cow<'a, str>, - - // containers - vector: Vec, - array: [u32; 1], - empty_array: [u8; 0], - map: HashMap, - map_opkey: HashMap, f64>, - - // enum types - option: Option, - fieldenum: FieldEnum, - - // tuple or struct - tuple: (u64, String), - tuple_struct: Pair, - unit_struct: Unit, - - #[serde(borrow)] - wrapper: Wrapper<'a>, - phan_struct: Phan<()>, -} +fuzz_target!(|data: &[u8]| sonic_rs_fuzz_data(data)); diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs new file mode 100644 index 0000000..44a9a5a --- /dev/null +++ b/fuzz/src/lib.rs @@ -0,0 +1,346 @@ +#![allow(clippy::mutable_key_type)] +use std::{borrow::Cow, collections::HashMap, hash::Hash, marker::PhantomData}; + +use faststr::FastStr; +use serde::{Deserialize, Serialize}; +use serde_json::Value as JValue; +use sonic_rs::{ + from_slice, from_str, to_array_iter, to_array_iter_unchecked, to_object_iter, + to_object_iter_unchecked, value::JsonContainerTrait, Deserializer, JsonNumberTrait, + JsonValueTrait, LazyValue, OwnedLazyValue, Value, +}; + +macro_rules! test_type { + ($data:expr, $($ty:ty),+) => { + $( + { + match serde_json::from_slice::<$ty>($data) { + Ok(jv) => { + let sv: $ty = sonic_rs::from_slice::<$ty>($data).expect(&format!( + "parse valid json {:?} failed for type {}", + $data, + stringify!($ty) + )); + assert_eq!(sv, jv); + + // Fuzz the struct to_string + let sout = sonic_rs::to_string(&sv).unwrap(); + let jout = serde_json::to_string(&jv).unwrap(); + let sv: $ty = sonic_rs::from_str::<$ty>(&sout).unwrap(); + let jv: $ty = serde_json::from_str::<$ty>(&jout).unwrap(); + assert_eq!(sv, jv); + } + Err(_) => { + let _ = sonic_rs::from_slice::<$ty>($data).expect_err(&format!( + "parse invalid json {:?} wrong for type {}", + $data, + stringify!($ty) + )); + } + } + } + )* + }; +} + +pub fn sonic_rs_fuzz_data(data: &[u8]) { + match serde_json::from_slice::(data) { + Ok(jv) => { + // compare from_slice result + let sv: Value = from_slice(data).unwrap(); + let eq = compare_value(&jv, &sv); + + // compare to_string result + let sout = sonic_rs::to_string(&sv).unwrap(); + let jv2 = serde_json::from_str::(&sout).unwrap(); + let sv2: Value = from_str(&sout).unwrap(); + compare_value(&jv2, &sv2); + + fuzz_use_raw(data, &sv); + fuzz_utf8_lossy(data, &sv); + + if jv.is_object() && eq { + let owned: OwnedLazyValue = sonic_rs::from_slice(data).unwrap(); + for ret in to_object_iter(data) { + let (k, lv) = ret.unwrap(); + let jv = jv.get(k.as_str()).unwrap(); + let ov = owned.get(k.as_str()).unwrap(); + compare_owned_lazyvalue(jv, ov); + compare_lazyvalue(jv, &lv); + + let gv = sonic_rs::get(data, &[k.as_str()]).unwrap(); + compare_lazyvalue(jv, &gv); + } + compare_owned_lazyvalue(&jv, &owned); + + // fuzzing unchecked apis + unsafe { + for ret in to_object_iter_unchecked(data) { + let (k, lv) = ret.unwrap(); + let jv = jv.get(k.as_str()).unwrap(); + compare_lazyvalue(jv, &lv); + + let gv = sonic_rs::get_unchecked(data, &[k.as_str()]).unwrap(); + compare_lazyvalue(jv, &gv); + } + } + } else if jv.is_array() && eq { + let owned: OwnedLazyValue = sonic_rs::from_slice(data).unwrap(); + for (i, ret) in to_array_iter(data).enumerate() { + let lv = ret.unwrap(); + let jv = jv.get(i).unwrap(); + compare_lazyvalue(jv, &lv); + let ov = owned.get(i).unwrap(); + compare_owned_lazyvalue(jv, ov); + + let gv = sonic_rs::get(data, [i]).unwrap(); + compare_lazyvalue(jv, &gv); + } + compare_owned_lazyvalue(&jv, &owned); + + // fuzzing unchecked apis + unsafe { + for (i, ret) in to_array_iter_unchecked(data).enumerate() { + let lv = ret.unwrap(); + let jv = jv.get(i).unwrap(); + compare_lazyvalue(jv, &lv); + + let gv = sonic_rs::get_unchecked(data, [i]).unwrap(); + compare_lazyvalue(jv, &gv); + } + } + } + } + Err(_) => { + let _ = from_slice::(data) + .expect_err(&format!("parse invalid json {:?} failed", data)); + } + } + + test_type!( + data, TestStruct, Enum, Foo, String, f64, u8, u16, u32, u64, u128, i8, i16, i32, i64, i128 + ); +} + +fn compare_lazyvalue(jv: &JValue, sv: &LazyValue) { + let out = sv.as_raw_str().as_bytes(); + let sv2: sonic_rs::Value = sonic_rs::from_slice(out).unwrap(); + compare_value(jv, &sv2); +} + +fn compare_owned_lazyvalue(jv: &JValue, sv: &OwnedLazyValue) { + match *jv { + JValue::Object(ref obj) => { + assert!(sv.is_object()); + for (k, v) in obj { + let got = sv.get(k).unwrap(); + compare_owned_lazyvalue(v, got); + } + } + JValue::Array(ref arr) => { + assert!(sv.is_array()); + for (i, v) in arr.iter().enumerate() { + let got = sv.get(i).unwrap(); + compare_owned_lazyvalue(v, got); + } + } + JValue::Bool(b) => assert!(sv.is_boolean() && sv.as_bool().unwrap() == b), + JValue::Null => assert!(sv.is_null()), + JValue::Number(ref num) => { + let got = sv.as_number().unwrap(); + if num.is_f64() { + assert_eq!(num.as_f64(), got.as_f64()); + } + if num.is_u64() { + assert_eq!(num.as_u64(), got.as_u64()); + } + if num.is_i64() { + assert_eq!(num.as_i64(), got.as_i64()); + } + } + JValue::String(ref s) => { + assert!(sv.is_str()); + assert_eq!(sv.as_str().unwrap(), s); + } + } +} + +fn fuzz_use_raw(json: &[u8], sv: &sonic_rs::Value) { + let json = unsafe { std::str::from_utf8_unchecked(json) }; + let mut de = Deserializer::from_str(json).use_raw(); + let value: Value = Deserialize::deserialize(&mut de).unwrap(); + let out = sonic_rs::to_string(&value).unwrap(); + let got: Value = sonic_rs::from_str(&out).unwrap(); + assert_eq!(&got, sv); +} + +fn fuzz_utf8_lossy(json: &[u8], sv: &sonic_rs::Value) { + let json = unsafe { std::str::from_utf8_unchecked(json) }; + let mut de = Deserializer::from_str(json).utf8_lossy(); + let value: Value = Deserialize::deserialize(&mut de).unwrap(); + let out = sonic_rs::to_string(&value).unwrap(); + let got: Value = sonic_rs::from_str(&out).unwrap(); + assert_eq!(&got, sv); +} + +pub fn compare_value(jv: &JValue, sv: &sonic_rs::Value) -> bool { + match *jv { + JValue::Object(ref obj) => { + assert!(sv.is_object()); + let sobj = sv.as_object().unwrap(); + // because serde_json use a map to store object, and sonic_rs allows the repeated keys + if sobj.len() == obj.len() { + for (k, v) in obj { + let got = sobj.get(k).unwrap(); + if !compare_value(v, got) { + return false; + } + } + return true; + } else { + return false; + } + } + JValue::Array(ref arr) => { + assert!(sv.is_array()); + let sarr = sv.as_array().unwrap(); + assert!(arr.len() == sarr.len()); + + for (i, v) in arr.iter().enumerate() { + let got = sarr.get(i).unwrap(); + if !compare_value(v, got) { + return false; + } + } + } + JValue::Bool(b) => assert!(sv.is_boolean() && sv.as_bool().unwrap() == b), + JValue::Null => assert!(sv.is_null()), + JValue::Number(ref num) => { + let got = sv.as_number().unwrap(); + if num.is_f64() { + let jf = num.as_f64().unwrap(); + let sf = got.as_f64().unwrap(); + assert_eq!(jf, sf, "jf {} sf {}", jf, sf); + } + if num.is_u64() { + assert!(num.as_u64().unwrap() == got.as_u64().unwrap()); + } + if num.is_i64() { + assert!(num.as_i64().unwrap() == got.as_i64().unwrap()); + } + } + JValue::String(ref s) => assert!(sv.is_str() && sv.as_str().unwrap() == s), + }; + true +} + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +struct Foo { + name: FastStr, + id: u64, +} + +#[derive(Debug, Deserialize, Serialize, Hash, Eq, PartialEq)] +enum Enum { + Zero = 0, + One = 1, + Two = 2, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +enum FieldEnum { + Integer(i8), + Tuple((FastStr, i32)), + Struct(Foo), + Unit, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +enum FieldlessEnum { + Tuple(), + Struct {}, + Unit, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +struct Wrapper<'a>(&'a str); + +// A unit struct +#[derive(Debug, Deserialize, Serialize, PartialEq)] +struct Unit; + +// A uint struct +#[derive(Debug, Deserialize, Serialize, PartialEq)] +struct Phan { + phan: String, + _data: PhantomData, +} + +// A tuple struct +#[derive(Debug, Deserialize, Serialize, PartialEq)] +struct Pair(i32, f32); + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +struct TestStruct<'a> { + fieldless: FieldlessEnum, + enummap: HashMap, + enum_: Enum, + + // basic types + boolean: bool, + integer: i32, + float: f64, + int128: i128, + uint128: u128, + char_: char, + + // string or bytes + str_: &'a str, + // bytes_: &'a [u8], + string: String, + faststr: FastStr, + #[serde(borrow)] + cow: Cow<'a, str>, + + // containers + vector: Vec, + array: [u32; 1], + empty_array: [u8; 0], + map: HashMap, + map_opkey: HashMap, f64>, + + // enum types + option: Option, + fieldenum: FieldEnum, + + // tuple or struct + tuple: (u64, String), + tuple_struct: Pair, + unit_struct: Unit, + + #[serde(borrow)] + wrapper: Wrapper<'a>, + phan_struct: Phan<()>, +} + +#[cfg(test)] +mod test { + use crate::*; + + fn test_compare_value(data: &[u8]) -> bool { + let sv = sonic_rs::from_slice(data).unwrap(); + let jv = serde_json::from_slice(data).unwrap(); + compare_value(&jv, &sv) + } + + #[test] + fn test_case() { + sonic_rs_fuzz_data(br#"[[{"1":4, "":80} ]]"#); + assert!(test_compare_value( + br#"[[{"1":4, "":80} ]]"# + )); + assert!(!test_compare_value( + br#"[[{"":4, "":80} ]]"# + )); + } +} diff --git a/scripts/sanitize.sh b/scripts/sanitize.sh index c68e51b..b36f795 100755 --- a/scripts/sanitize.sh +++ b/scripts/sanitize.sh @@ -7,7 +7,7 @@ export ASAN_OPTIONS="disable_coredump=0:unmap_shadow_on_exit=1:abort_on_error=1" run_tests() { local san="$1" local features="$2" - cargo +nightly test --target x86_64-unknown-linux-gnu --features "$features" -- --test-threads=1 + cargo +nightly test --target x86_64-unknown-linux-gnu --features "$features" -- --test-threads=1 --nocapture cargo +nightly test --doc --package sonic-rs --target x86_64-unknown-linux-gnu --features "$features" -- --show-output --test-threads=1 } diff --git a/src/input.rs b/src/input.rs index 60d4c51..f8d5069 100644 --- a/src/input.rs +++ b/src/input.rs @@ -13,14 +13,21 @@ pub enum JsonSlice<'de> { } impl<'de> JsonSlice<'de> { - pub fn slice_ref(&self, subset: &'de [u8]) -> Self { + #[inline(always)] + pub(crate) unsafe fn as_faststr(&self) -> FastStr { match self { - JsonSlice::Raw(_) => JsonSlice::Raw(subset), - JsonSlice::FastStr(f) => JsonSlice::FastStr(f.slice_ref(as_str(subset))), + JsonSlice::Raw(sub) => FastStr::new(as_str(sub)), + JsonSlice::FastStr(f) => f.clone(), } } } +impl Default for JsonSlice<'_> { + fn default() -> Self { + JsonSlice::Raw(&b"null"[..]) + } +} + impl<'de> From for JsonSlice<'de> { fn from(value: FastStr) -> Self { JsonSlice::FastStr(value) diff --git a/src/lazyvalue/de.rs b/src/lazyvalue/de.rs index fa225fe..87e0f1d 100644 --- a/src/lazyvalue/de.rs +++ b/src/lazyvalue/de.rs @@ -1,9 +1,10 @@ -use std::marker::PhantomData; +use std::{marker::PhantomData, mem::MaybeUninit}; use ::serde::{de, de::Visitor, Deserialize, Deserializer}; use faststr::FastStr; use super::{owned::OwnedLazyValue, value::LazyValue}; +use crate::lazyvalue::value::HasEsc; impl<'de: 'a, 'a> Deserialize<'de> for LazyValue<'a> { fn deserialize(deserializer: D) -> Result @@ -26,14 +27,14 @@ impl<'de: 'a, 'a> Deserialize<'de> for LazyValue<'a> { where E: de::Error, { - LazyValue::new(FastStr::new(v).into(), true).map_err(de::Error::custom) + Ok(LazyValue::new(FastStr::new(v).into(), HasEsc::Yes)) } fn visit_borrowed_str(self, v: &'de str) -> Result where E: de::Error, { - LazyValue::new(FastStr::new(v).into(), false).map_err(de::Error::custom) + Ok(LazyValue::new(FastStr::new(v).into(), HasEsc::None)) } } @@ -51,30 +52,37 @@ impl<'de> Deserialize<'de> for OwnedLazyValue { { struct OwnedVisitor; + let visit = OwnedVisitor; + impl<'de> Visitor<'de> for OwnedVisitor { type Value = OwnedLazyValue; - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(formatter, "any valid JSON value") - } - - // NOTE: only used for visit the str that has escaped chars - fn visit_str(self, v: &str) -> Result - where - E: de::Error, - { - OwnedLazyValue::new(FastStr::new(v).into(), true).map_err(de::Error::custom) + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str("expect a valid json") } - fn visit_borrowed_str(self, v: &'de str) -> Result + fn visit_bytes(self, value_binary: &[u8]) -> std::result::Result where E: de::Error, { - OwnedLazyValue::new(FastStr::new(v).into(), false).map_err(de::Error::custom) + // we pass the value from value_binary + unsafe { + assert!( + value_binary.len() == std::mem::size_of::(), + "invalid value size {}", + value_binary.len() + ); + let mut dom: MaybeUninit = MaybeUninit::zeroed(); + std::ptr::copy_nonoverlapping( + value_binary.as_ptr() as *const Self::Value, + dom.as_mut_ptr(), + 1, + ); + Ok(dom.assume_init()) + } } } - let visit = OwnedVisitor; - deserializer.deserialize_newtype_struct(super::TOKEN, visit) + deserializer.deserialize_newtype_struct(super::OWNED_LAZY_VALUE_TOKEN, visit) } } diff --git a/src/lazyvalue/get.rs b/src/lazyvalue/get.rs index 882618e..be6e200 100644 --- a/src/lazyvalue/get.rs +++ b/src/lazyvalue/get.rs @@ -6,7 +6,7 @@ use crate::{ error::Result, index::Index, input::JsonInput, - parser::{ParseStatus, Parser}, + parser::Parser, pointer::PointerTree, reader::{Read, Reader}, util::utf8::from_utf8, @@ -180,7 +180,7 @@ where let reader = Read::new(slice, false); let mut parser = Parser::new(reader); let (sub, status) = parser.get_from_with_iter_unchecked(path)?; - LazyValue::new(json.from_subset(sub), status == ParseStatus::HasEscaped) + Ok(LazyValue::new(json.from_subset(sub), status.into())) } /// get_many returns multiple fields from the `PointerTree`. @@ -389,7 +389,7 @@ where let reader = Read::new(slice, false); let mut parser = Parser::new(reader); let (sub, status) = parser.get_from_with_iter(path)?; - let lv = LazyValue::new(json.from_subset(sub), status == ParseStatus::HasEscaped)?; + let lv = LazyValue::new(json.from_subset(sub), status.into()); // validate the utf-8 if slice let index = parser.read.index(); diff --git a/src/lazyvalue/iterator.rs b/src/lazyvalue/iterator.rs index 67e7028..e829365 100644 --- a/src/lazyvalue/iterator.rs +++ b/src/lazyvalue/iterator.rs @@ -2,12 +2,11 @@ use faststr::FastStr; use crate::{ error::Result, - input::JsonInput, + input::{JsonInput, JsonSlice}, lazyvalue::LazyValue, parser::{Parser, DEFAULT_KEY_BUF_CAPACITY}, reader::{Read, Reader}, }; - /// A lazied iterator for JSON object text. It will parse the JSON when iterating. /// /// The item of the iterator is [`Result`][`crate::LazyValue`]. @@ -39,7 +38,7 @@ pub struct ObjectJsonIter<'de> { strbuf: Vec, first: bool, ending: bool, - check: bool, + skip_strict: bool, } /// A lazied iterator for JSON array text. It will parse the JSON when iterating. @@ -73,21 +72,36 @@ pub struct ArrayJsonIter<'de> { parser: Parser>, first: bool, ending: bool, - check: bool, + skip_strict: bool, } impl<'de> ObjectJsonIter<'de> { - fn new>(json: I, check: bool) -> Self { + // input is inner json, expected always be validated and well-formed + pub(crate) fn new_inner(input: JsonSlice<'de>) -> Self { + Self { + parser: Parser::new(Read::new_in(input, false)), + strbuf: Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY), + first: true, + ending: false, + skip_strict: false, + } + } + + pub(crate) fn new>(input: I, skip_strict: bool) -> Self { + let validate_utf8 = skip_strict + .then_some(input.need_utf8_valid()) + .unwrap_or_default(); + Self { - parser: Parser::new(Read::new_in(json, check)), + parser: Parser::new(Read::new_in(input.to_json_slice(), validate_utf8)), strbuf: Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY), first: true, ending: false, - check, + skip_strict, } } - fn next_entry_impl(&mut self, check: bool) -> Option)>> { + fn next_entry_impl(&mut self) -> Option)>> { if self.ending { return None; } @@ -102,12 +116,12 @@ impl<'de> ObjectJsonIter<'de> { match self .parser - .parse_entry_lazy(&mut self.strbuf, &mut self.first, check) + .parse_entry_lazy(&mut self.strbuf, &mut self.first, self.skip_strict) { Ok(ret) => { - if let Some((key, val, has_escaped)) = ret { + if let Some((key, val, status)) = ret { let val = self.parser.read.slice_ref(val); - Some(LazyValue::new(val, has_escaped).map(|v| (key, v))) + Some(Ok(LazyValue::new(val, status.into())).map(|v| (key, v))) } else { self.ending = true; None @@ -122,16 +136,30 @@ impl<'de> ObjectJsonIter<'de> { } impl<'de> ArrayJsonIter<'de> { - fn new>(input: I, check: bool) -> Self { + // input is inner json, expected always be validated and well-formed + pub(crate) fn new_inner(input: JsonSlice<'de>) -> Self { Self { - parser: Parser::new(Read::new_in(input, check)), + parser: Parser::new(Read::new_in(input, false)), first: true, ending: false, - check, + skip_strict: false, } } - fn next_elem_impl(&mut self, check: bool) -> Option>> { + pub(crate) fn new>(input: I, skip_strict: bool) -> Self { + let validate_utf8 = skip_strict + .then_some(input.need_utf8_valid()) + .unwrap_or_default(); + + Self { + parser: Parser::new(Read::new_in(input.to_json_slice(), validate_utf8)), + first: true, + ending: false, + skip_strict, + } + } + + fn next_elem_impl(&mut self) -> Option>> { if self.ending { return None; } @@ -144,11 +172,14 @@ impl<'de> ArrayJsonIter<'de> { } } - match self.parser.parse_array_elem_lazy(&mut self.first, check) { + match self + .parser + .parse_array_elem_lazy(&mut self.first, self.skip_strict) + { Ok(ret) => { - if let Some((val, has_escaped)) = ret { + if let Some((val, status)) = ret { let val = self.parser.read.slice_ref(val); - Some(LazyValue::new(val, has_escaped)) + Some(Ok(LazyValue::new(val, status.into()))) } else { self.ending = true; None @@ -317,7 +348,7 @@ impl<'de> Iterator for ObjectJsonIter<'de> { type Item = Result<(FastStr, LazyValue<'de>)>; fn next(&mut self) -> Option { - self.next_entry_impl(self.check) + self.next_entry_impl() } } @@ -325,7 +356,7 @@ impl<'de> Iterator for ArrayJsonIter<'de> { type Item = Result>; fn next(&mut self) -> Option { - self.next_elem_impl(self.check) + self.next_elem_impl() } } diff --git a/src/lazyvalue/mod.rs b/src/lazyvalue/mod.rs index dc86f0a..74f64f6 100644 --- a/src/lazyvalue/mod.rs +++ b/src/lazyvalue/mod.rs @@ -2,8 +2,8 @@ mod get; mod iterator; -mod owned; -mod value; +pub(crate) mod owned; +pub(crate) mod value; #[doc(inline)] pub use self::{ @@ -16,10 +16,12 @@ pub use self::{ to_array_iter, to_array_iter_unchecked, to_object_iter, to_object_iter_unchecked, ArrayJsonIter, ObjectJsonIter, }, - owned::OwnedLazyValue, + owned::{LazyArray, LazyObject, OwnedLazyValue}, value::LazyValue, }; pub(crate) mod de; pub(crate) mod ser; pub(crate) const TOKEN: &str = "$sonic_rs::LazyValue"; + +pub(crate) const OWNED_LAZY_VALUE_TOKEN: &str = "$sonic::OwnedLv"; diff --git a/src/lazyvalue/owned.rs b/src/lazyvalue/owned.rs index 606b4d8..6f316da 100644 --- a/src/lazyvalue/owned.rs +++ b/src/lazyvalue/owned.rs @@ -1,19 +1,20 @@ use std::{ - fmt, - fmt::{Debug, Display}, - hash::Hash, + fmt::{self, Debug, Display}, str::from_utf8_unchecked, - sync::Arc, + sync::atomic::{AtomicPtr, Ordering}, }; use faststr::FastStr; +use ref_cast::RefCast; +use serde::ser::{SerializeMap, SerializeStruct}; +use super::value::HasEsc; use crate::{ - from_str, get_unchecked, index::Index, input::JsonSlice, serde::Number, JsonType, - JsonValueTrait, LazyValue, Result, + index::Index, input::JsonSlice, prelude::*, serde::Number, JsonType, JsonValueTrait, LazyValue, + RawNumber, Result, }; -/// OwnedLazyValue wrappers a unparsed raw JSON text. It is owned. +/// OwnedLazyValue wrappers a unparsed raw JSON text. It is owned and support `Get, Set` /// /// It can be converted from [`LazyValue`](crate::lazyvalue::LazyValue). It can be used for serde. /// @@ -37,10 +38,6 @@ use crate::{ /// let own_a = OwnedLazyValue::from(get(input, &["a"]).unwrap()); /// let own_c = OwnedLazyValue::from(get(input, &["c"]).unwrap()); /// -/// // use as_raw_xx to get the unparsed JSON text -/// assert_eq!(own_a.as_raw_str(), "\"hello world\""); -/// assert_eq!(own_c.as_raw_str(), "[0, 1, 2]"); -/// /// // use as_xx to get the parsed value /// assert_eq!(own_a.as_str().unwrap(), "hello world"); /// assert_eq!(own_c.as_str(), None); @@ -53,7 +50,7 @@ use crate::{ /// # use sonic_rs::{LazyValue, OwnedLazyValue}; /// use serde::{Deserialize, Serialize}; /// -/// #[derive(Debug, Deserialize, Serialize, PartialEq)] +/// #[derive(Debug, Deserialize, Serialize)] /// struct TestLazyValue<'a> { /// #[serde(borrow)] /// borrowed_lv: LazyValue<'a>, @@ -64,225 +61,833 @@ use crate::{ /// /// let data: TestLazyValue = sonic_rs::from_str(input).unwrap(); /// assert_eq!(data.borrowed_lv.as_raw_str(), "\"hello\""); -/// assert_eq!(data.owned_lv.as_raw_str(), "\"world\""); /// ``` -pub struct OwnedLazyValue { +#[derive(Debug, Clone)] +pub struct OwnedLazyValue(pub(crate) LazyPacked); + +impl Default for OwnedLazyValue { + fn default() -> Self { + Self(LazyPacked::Parsed(Parsed::Null)) + } +} + +impl OwnedLazyValue { + pub(crate) fn from_non_esc_str(raw: FastStr) -> Self { + Self(LazyPacked::NonEscStrRaw(raw)) + } + + pub(crate) fn from_faststr(str: FastStr) -> Self { + Self(LazyPacked::Parsed(Parsed::String(str))) + } +} + +impl From for OwnedLazyValue { + fn from(number: Number) -> Self { + Self(LazyPacked::Parsed(Parsed::Number(number))) + } +} + +impl From> for OwnedLazyValue { + fn from(v: Vec<(FastStr, OwnedLazyValue)>) -> Self { + Self(LazyPacked::Parsed(Parsed::LazyObject(v))) + } +} + +impl From> for OwnedLazyValue { + fn from(v: Vec) -> Self { + Self(LazyPacked::Parsed(Parsed::LazyArray(v))) + } +} + +impl From for OwnedLazyValue { + fn from(v: bool) -> Self { + Self(LazyPacked::Parsed(Parsed::Bool(v))) + } +} + +impl From<()> for OwnedLazyValue { + fn from(_: ()) -> Self { + Self(LazyPacked::Parsed(Parsed::Null)) + } +} + +pub(crate) struct LazyRaw { // the raw slice from origin json pub(crate) raw: FastStr, - unescape: Option>, + pub(crate) parsed: AtomicPtr, } -impl JsonValueTrait for OwnedLazyValue { - type ValueType<'v> = OwnedLazyValue; +impl Drop for LazyRaw { + fn drop(&mut self) { + let ptr = self.parsed.get_mut(); + if !(*ptr).is_null() { + unsafe { + drop(Box::from_raw(*ptr)); + } + } + } +} - fn as_bool(&self) -> Option { - match self.raw.as_bytes() { - b"true" => Some(true), - b"false" => Some(false), +impl Debug for LazyRaw { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let ptr = self.parsed.load(Ordering::Relaxed); + let s = if ptr.is_null() { + "".to_string() + } else { + format!("{:?}", unsafe { &*ptr }) + }; + f.debug_struct("LazyRaw") + .field("raw", &self.raw) + .field("parsed", &s) + .finish() + } +} + +impl LazyRaw { + fn load(&self) -> Result<&Parsed> { + let ptr = self.parsed.load(Ordering::Acquire); + if !ptr.is_null() { + return Ok(unsafe { &*ptr }); + } + + let mut parser = crate::parser::Parser::new(crate::Read::from(&self.raw)); + let mut strbuf: Vec = Vec::new(); + let olv: OwnedLazyValue = parser.load_owned_lazyvalue(&mut strbuf)?; + let OwnedLazyValue(LazyPacked::Parsed(v)) = olv else { + unreachable!("must be lazy parsed"); + }; + let parsed = Box::into_raw(Box::new(v)); + match self + .parsed + .compare_exchange_weak(ptr, parsed, Ordering::AcqRel, Ordering::Acquire) + { + // will free by drop + Ok(_) => Ok(unsafe { &*parsed }), + Err(ptr) => { + // # Safety + // the pointer is immutable here, and we can drop it + drop(unsafe { Box::from_raw(parsed) }); + Ok(unsafe { &*ptr }) + } + } + } + + fn parse(&mut self) -> Result { + let ptr = self.parsed.get_mut(); + if !(*ptr).is_null() { + let v = unsafe { std::ptr::read(*ptr) }; + *ptr = std::ptr::null_mut(); + return Ok(v); + } + + let mut parser = crate::parser::Parser::new(crate::Read::from(&self.raw)); + let mut strbuf: Vec = Vec::new(); + let olv: OwnedLazyValue = parser.load_owned_lazyvalue(&mut strbuf)?; + let OwnedLazyValue(LazyPacked::Parsed(v)) = olv else { + unreachable!("must be lazy parsed"); + }; + Ok(v) + } + + fn get(&self, idx: I) -> Option<&OwnedLazyValue> { + match self.get_type() { + JsonType::Array if idx.as_index().is_some() => { + let parsed = self.load().ok()?; + parsed.get(idx) + } + JsonType::Object if idx.as_key().is_some() => { + let parsed = self.load().ok()?; + parsed.get(idx) + } _ => None, } } fn as_number(&self) -> Option { - if let Ok(num) = from_str(self.as_raw_str()) { - Some(num) + match self.get_type() { + JsonType::Number => match self.load().ok()? { + Parsed::Number(n) => Some(n.clone()), + _ => None, + }, + _ => None, + } + } + + fn as_str(&self) -> Option<&str> { + match self.get_type() { + JsonType::String => match self.load().ok()? { + Parsed::String(s) => Some(s.as_str()), + _ => None, + }, + _ => None, + } + } + + fn as_raw_number(&self) -> Option { + if self.raw.as_bytes()[0] == b'-' || self.raw.as_bytes()[0].is_ascii_digit() { + Some(RawNumber::from_faststr(self.raw.clone())) } else { None } } - fn as_raw_number(&self) -> Option { - if let Ok(num) = from_str(self.as_raw_str()) { - Some(num) + fn get_type(&self) -> JsonType { + match self.raw.as_bytes()[0] { + b'-' | b'0'..=b'9' => JsonType::Number, + b'"' => JsonType::String, + b'[' => JsonType::Array, + b'{' => JsonType::Object, + _ => unreachable!("invalid raw json value"), + } + } + + fn clone_lazyraw(&self) -> std::result::Result { + let parsed = self.parsed.load(Ordering::Relaxed); + if parsed.is_null() { + Ok(LazyRaw { + raw: self.raw.clone(), + parsed: AtomicPtr::new(std::ptr::null_mut()), + }) + } else { + // # Safety + // the pointer is immutable here, and we can clone it + Err(unsafe { (*parsed).clone() }) + } + } +} + +#[derive(Debug)] +pub(crate) enum LazyPacked { + // raw value: number, maybe esc strings, raw object, raw array + Raw(LazyRaw), + // most JSON string without escaped chars, will also optimize serialize + NonEscStrRaw(FastStr), + Parsed(Parsed), +} + +impl LazyPacked {} + +impl Clone for LazyPacked { + fn clone(&self) -> Self { + match self { + Self::Raw(raw) => match raw.clone_lazyraw() { + Ok(raw) => Self::Raw(raw), + Err(v) => Self::Parsed(v), + }, + Self::NonEscStrRaw(s) => Self::NonEscStrRaw(s.clone()), + Self::Parsed(v) => Self::Parsed(v.clone()), + } + } +} + +#[derive(Debug, Clone)] +pub(crate) enum Parsed { + LazyObject(Vec<(FastStr, OwnedLazyValue)>), + LazyArray(Vec), + String(FastStr), + Number(Number), + Null, + Bool(bool), +} + +impl Parsed { + fn get_type(&self) -> JsonType { + match self { + Parsed::LazyObject(_) => JsonType::Object, + Parsed::LazyArray(_) => JsonType::Array, + Parsed::String(_) => JsonType::String, + Parsed::Number(_) => JsonType::Number, + Parsed::Null => JsonType::Null, + Parsed::Bool(_) => JsonType::Boolean, + } + } + + fn get(&self, index: I) -> Option<&OwnedLazyValue> { + match self { + Parsed::LazyObject(obj) => { + if let Some(key) = index.as_key() { + for (k, v) in obj { + if k == key { + return Some(v); + } + } + } + None + } + Parsed::LazyArray(arr) => { + if let Some(index) = index.as_index() { + arr.get(index) + } else { + None + } + } + _ => None, + } + } + + fn get_mut(&mut self, index: I) -> Option<&mut OwnedLazyValue> { + match self { + Parsed::LazyObject(obj) => { + if let Some(key) = index.as_key() { + for (k, v) in obj { + if k == key { + return Some(v); + } + } + } + None + } + Parsed::LazyArray(arr) => { + if let Some(index) = index.as_index() { + arr.get_mut(index) + } else { + None + } + } + _ => None, + } + } +} + +impl JsonValueTrait for OwnedLazyValue { + type ValueType<'v> = &'v OwnedLazyValue; + + fn as_bool(&self) -> Option { + if let LazyPacked::Parsed(Parsed::Bool(b)) = &self.0 { + Some(*b) } else { None } } + fn as_number(&self) -> Option { + match &self.0 { + LazyPacked::Parsed(Parsed::Number(n)) => Some(n.clone()), + LazyPacked::Raw(raw) => raw.as_number(), + _ => None, + } + } + + fn as_raw_number(&self) -> Option { + match &self.0 { + LazyPacked::Raw(raw) => raw.as_raw_number(), + _ => None, + } + } + fn as_str(&self) -> Option<&str> { - if !self.is_str() { - None - } else if let Some(escaped) = self.unescape.as_ref() { - Some(escaped.as_ref()) + match &self.0 { + LazyPacked::Parsed(Parsed::String(s)) => Some(s.as_str()), + LazyPacked::Raw(raw) => raw.as_str(), + LazyPacked::NonEscStrRaw(raw) => { + Some(unsafe { from_utf8_unchecked(&raw.as_bytes()[1..raw.len() - 1]) }) + } + _ => None, + } + } + + fn get_type(&self) -> JsonType { + match &self.0 { + LazyPacked::Parsed(v) => v.get_type(), + LazyPacked::Raw(raw) => raw.get_type(), + LazyPacked::NonEscStrRaw(_) => JsonType::String, + } + } + + fn get(&self, index: I) -> Option<&OwnedLazyValue> { + match &self.0 { + LazyPacked::Parsed(v) => v.get(index), + LazyPacked::Raw(raw) => raw.get(index), + _ => None, + } + } + + fn pointer(&self, path: P) -> Option<&OwnedLazyValue> + where + P::Item: Index, + { + let mut next = self; + for index in path { + next = match &next.0 { + LazyPacked::Parsed(v) => v.get(index), + LazyPacked::Raw(raw) => raw.get(index), + _ => None, + }?; + } + Some(next) + } +} + +impl JsonValueMutTrait for OwnedLazyValue { + type ValueType = OwnedLazyValue; + type ArrayType = LazyArray; + type ObjectType = LazyObject; + + fn as_object_mut(&mut self) -> Option<&mut LazyObject> { + if let LazyPacked::Raw(raw) = &mut self.0 { + if raw.get_type() == JsonType::Object { + let parsed = raw.parse().ok()?; + self.0 = LazyPacked::Parsed(parsed); + } else { + return None; + } + } + + if let LazyPacked::Parsed(Parsed::LazyObject(_)) = &mut self.0 { + Some(LazyObject::ref_cast_mut(self)) } else { - // remove the quotes - let origin = { - let raw = self.as_raw_str().as_bytes(); - &raw[1..raw.len() - 1] - }; - Some(unsafe { from_utf8_unchecked(origin) }) + None } } - fn get_type(&self) -> crate::JsonType { - match self.raw.as_bytes()[0] { - b'-' | b'0'..=b'9' => JsonType::Number, - b'"' => JsonType::String, - b'{' => JsonType::Object, - b'[' => JsonType::Array, - b't' | b'f' => JsonType::Boolean, - b'n' => JsonType::Null, - _ => unreachable!(), + fn as_array_mut(&mut self) -> Option<&mut LazyArray> { + if let LazyPacked::Raw(raw) = &mut self.0 { + if raw.get_type() == JsonType::Array { + let parsed = raw.parse().ok()?; + self.0 = LazyPacked::Parsed(parsed); + } else { + return None; + } + } + + if let LazyPacked::Parsed(Parsed::LazyArray(_)) = &mut self.0 { + Some(LazyArray::ref_cast_mut(self)) + } else { + None } } - fn get(&self, index: I) -> Option { - if let Some(key) = index.as_key() { - self.get_key(key) - } else if let Some(index) = index.as_index() { - self.get_index(index) + fn get_mut(&mut self, index: I) -> Option<&mut OwnedLazyValue> { + if matches!(self.0, LazyPacked::Raw(_)) { + self.get_mut_from_raw(index) + } else if let LazyPacked::Parsed(parsed) = &mut self.0 { + parsed.get_mut(index) } else { - unreachable!("index must be key or index") + None } } - fn pointer(&self, path: P) -> Option + fn pointer_mut(&mut self, path: P) -> Option<&mut OwnedLazyValue> where P::Item: Index, { - let lv = unsafe { get_unchecked(&self.raw, path).ok() }; - lv.map(|v| v.into()) + let mut next = self; + for index in path { + if matches!(next.0, LazyPacked::Raw(_)) { + next = next.get_mut_from_raw(index)?; + } else { + next = match &mut next.0 { + LazyPacked::Parsed(v) => v.get_mut(index), + _ => None, + }?; + } + } + Some(next) + } +} + +impl JsonContainerTrait for OwnedLazyValue { + type ArrayType = LazyArray; + type ObjectType = LazyObject; + + #[inline] + fn as_array(&self) -> Option<&Self::ArrayType> { + let parsed = match &self.0 { + LazyPacked::Raw(raw) => { + if raw.get_type() == JsonType::Array { + raw.load().ok()? + } else { + return None; + } + } + LazyPacked::Parsed(parsed) => parsed, + _ => return None, + }; + + if let Parsed::LazyArray(_) = parsed { + Some(LazyArray::ref_cast(self)) + } else { + None + } + } + + #[inline] + fn as_object(&self) -> Option<&Self::ObjectType> { + let parsed = match &self.0 { + LazyPacked::Raw(raw) => { + if raw.get_type() == JsonType::Object { + raw.load().ok()? + } else { + return None; + } + } + LazyPacked::Parsed(parsed) => parsed, + _ => return None, + }; + + if let Parsed::LazyObject(_) = parsed { + Some(LazyObject::ref_cast(self)) + } else { + None + } } } impl OwnedLazyValue { - /// Export the raw JSON text as `str`. - /// - /// # Examples - /// - /// ``` - /// use sonic_rs::{get, LazyValue}; - /// - /// let lv: LazyValue = sonic_rs::get(r#"{"a": "hello world"}"#, &["a"]).unwrap(); - /// assert_eq!(lv.as_raw_str(), "\"hello world\""); - /// ``` - pub fn as_raw_str(&self) -> &str { - // # Safety - // it is validate when using to_object_iter/get ... - // if use `get_unchecked` unsafe apis, it must ensured by the user at first - unsafe { from_utf8_unchecked(self.raw.as_ref()) } - } - - /// Export the raw json text as faststr. - /// - /// # Note - /// If the input json is not bytes or faststr, there will be a string copy. - /// - /// # Examples - /// - /// ``` - /// use faststr::FastStr; - /// use sonic_rs::LazyValue; - /// - /// let lv: LazyValue = sonic_rs::get(r#"{"a": "hello world"}"#, &["a"]).unwrap(); - /// // will copy the raw_str into a new faststr - /// assert_eq!(lv.as_raw_faststr(), "\"hello world\""); - /// - /// let fs = FastStr::new(r#"{"a": "hello world"}"#); - /// let lv: LazyValue = sonic_rs::get(&fs, &["a"]).unwrap(); - /// assert_eq!(lv.as_raw_faststr(), "\"hello world\""); // zero-copy - /// ``` - pub fn as_raw_faststr(&self) -> FastStr { - self.raw.clone() - } - - /// get with index from lazyvalue - pub(crate) fn get_index(&self, index: usize) -> Option { - let path = [index]; - let lv = unsafe { get_unchecked(&self.raw, path.iter()).ok() }; - lv.map(|v| v.into()) - } - - /// get with key from lazyvalue - pub(crate) fn get_key(&self, key: &str) -> Option { - let path = [key]; - let lv = unsafe { get_unchecked(&self.raw, path.iter()).ok() }; - lv.map(|v| v.into()) - } - - pub(crate) fn new(raw: JsonSlice, has_escaped: bool) -> Result { + pub fn take(&mut self) -> Self { + std::mem::take(self) + } + + pub(crate) fn new(raw: JsonSlice, status: HasEsc) -> Self { let raw = match raw { JsonSlice::Raw(r) => FastStr::new(unsafe { from_utf8_unchecked(r) }), JsonSlice::FastStr(f) => f.clone(), }; - let unescape = if has_escaped { - let unescape: Arc = unsafe { crate::from_slice_unchecked(raw.as_ref()) }?; - Some(unescape) + + if status == HasEsc::None { + Self(LazyPacked::NonEscStrRaw(raw)) } else { - None + Self(LazyPacked::Raw(LazyRaw { + raw, + parsed: AtomicPtr::new(std::ptr::null_mut()), + })) + } + } + + fn get_mut_from_raw(&mut self, index: I) -> Option<&mut Self> { + let raw = if let LazyPacked::Raw(raw) = &mut self.0 { + raw + } else { + return None; }; - Ok(Self { raw, unescape }) + + match raw.get_type() { + JsonType::Array if index.as_index().is_some() => { + let parsed = raw.parse().ok()?; + *self = Self(LazyPacked::Parsed(parsed)); + } + JsonType::Object if index.as_key().is_some() => { + let parsed = raw.parse().ok()?; + *self = Self(LazyPacked::Parsed(parsed)); + } + _ => return None, + } + + if let LazyPacked::Parsed(parsed) = &mut self.0 { + parsed.get_mut(index) + } else { + None + } } } impl<'de> From> for OwnedLazyValue { fn from(lv: LazyValue<'de>) -> Self { - let raw = match lv.raw { - JsonSlice::Raw(r) => FastStr::new(unsafe { from_utf8_unchecked(r) }), - JsonSlice::FastStr(f) => f.clone(), - }; - Self { + let raw = unsafe { lv.raw.as_faststr() }; + if lv.inner.no_escaped() && raw.as_bytes()[0] == b'"' { + return Self(LazyPacked::NonEscStrRaw(raw)); + } + + Self(LazyPacked::Raw(LazyRaw { raw, - unescape: lv.unescape, + parsed: AtomicPtr::new(std::ptr::null_mut()), + })) + } +} + +impl Display for OwnedLazyValue { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", crate::to_string(self)) + } +} + +impl serde::ser::Serialize for OwnedLazyValue { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + match &self.0 { + LazyPacked::Raw(raw) => { + let raw = raw.raw.as_str(); + let mut s = serializer.serialize_struct(super::TOKEN, 1)?; + // will directly write raw in `LazyValueStrEmitter::seriazlie_str` + s.serialize_field(super::TOKEN, raw)?; + s.end() + } + LazyPacked::NonEscStrRaw(raw) => { + let raw = raw.as_str(); + let mut s = serializer.serialize_struct(super::TOKEN, 1)?; + // will directly write raw in `LazyValueStrEmitter::seriazlie_str` + s.serialize_field(super::TOKEN, raw)?; + s.end() + } + LazyPacked::Parsed(Parsed::LazyObject(vec)) => { + // if expected to be sort-keys, should use `sonic_rs::Value` + let mut map = serializer.serialize_map(Some(vec.len()))?; + for (k, v) in vec { + map.serialize_entry(k, v)?; + } + map.end() + } + LazyPacked::Parsed(Parsed::LazyArray(vec)) => vec.serialize(serializer), + LazyPacked::Parsed(Parsed::String(s)) => s.serialize(serializer), + LazyPacked::Parsed(Parsed::Number(n)) => n.serialize(serializer), + LazyPacked::Parsed(Parsed::Bool(b)) => b.serialize(serializer), + LazyPacked::Parsed(Parsed::Null) => serializer.serialize_none(), } } } -impl Debug for OwnedLazyValue { - fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter - .debug_tuple("OwnedLazyValue") - .field(&format_args!("{}", &self.as_raw_str())) - .finish() +#[derive(Debug, Clone, RefCast)] +#[repr(transparent)] +pub struct LazyObject(OwnedLazyValue); + +impl std::ops::Deref for LazyObject { + type Target = Vec<(FastStr, OwnedLazyValue)>; + fn deref(&self) -> &Self::Target { + if let LazyPacked::Parsed(Parsed::LazyObject(obj)) = &self.0 .0 { + obj + } else { + unreachable!("must be a lazy object"); + } } } -impl Display for OwnedLazyValue { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str(self.as_raw_str()) +impl std::ops::DerefMut for LazyObject { + fn deref_mut(&mut self) -> &mut Self::Target { + if let LazyPacked::Parsed(Parsed::LazyObject(obj)) = &mut self.0 .0 { + obj + } else { + unreachable!("must be a lazy object"); + } } } -impl Default for OwnedLazyValue { +impl Default for LazyObject { fn default() -> Self { - Self { - raw: FastStr::new("null"), - unescape: None, + Self::new() + } +} + +impl LazyObject { + pub fn new() -> Self { + Self(OwnedLazyValue(LazyPacked::Parsed(Parsed::LazyObject( + Vec::new(), + )))) + } + + pub fn with_capacity(cap: usize) -> Self { + Self(OwnedLazyValue(LazyPacked::Parsed(Parsed::LazyObject( + Vec::with_capacity(cap), + )))) + } + + pub fn append_pair(&mut self, key: FastStr, value: OwnedLazyValue) { + if let LazyPacked::Parsed(Parsed::LazyObject(obj)) = &mut self.0 .0 { + obj.push((key, value)); + } else { + unreachable!("must be a lazy object"); } } } -impl PartialEq for OwnedLazyValue { - fn eq(&self, other: &Self) -> bool { - self.raw == other.raw +impl From> for LazyObject { + fn from(v: Vec<(FastStr, OwnedLazyValue)>) -> Self { + Self(OwnedLazyValue(LazyPacked::Parsed(Parsed::LazyObject(v)))) } } -impl Clone for OwnedLazyValue { - fn clone(&self) -> Self { - Self { - raw: self.raw.clone(), - unescape: self.unescape.clone(), +impl From for OwnedLazyValue { + fn from(v: LazyObject) -> Self { + v.0 + } +} + +#[derive(Debug, Clone, RefCast)] +#[repr(transparent)] +pub struct LazyArray(OwnedLazyValue); + +impl From> for LazyArray { + fn from(v: Vec) -> Self { + Self(OwnedLazyValue(LazyPacked::Parsed(Parsed::LazyArray(v)))) + } +} + +impl From for OwnedLazyValue { + fn from(v: LazyArray) -> Self { + v.0 + } +} + +impl std::ops::DerefMut for LazyArray { + fn deref_mut(&mut self) -> &mut Self::Target { + if let LazyPacked::Parsed(Parsed::LazyArray(obj)) = &mut self.0 .0 { + obj + } else { + unreachable!("must be a lazy array"); } } } -impl PartialOrd for OwnedLazyValue { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) +impl std::ops::Deref for LazyArray { + type Target = Vec; + fn deref(&self) -> &Self::Target { + if let LazyPacked::Parsed(Parsed::LazyArray(obj)) = &self.0 .0 { + obj + } else { + unreachable!("must be a lazy array"); + } } } -impl Ord for OwnedLazyValue { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.raw.cmp(&other.raw) +impl Default for LazyArray { + fn default() -> Self { + Self::new() } } -impl Eq for OwnedLazyValue {} +impl LazyArray { + pub fn new() -> Self { + Self(OwnedLazyValue(LazyPacked::Parsed(Parsed::LazyArray( + Vec::new(), + )))) + } + + pub fn with_capacity(cap: usize) -> Self { + Self(OwnedLazyValue(LazyPacked::Parsed(Parsed::LazyArray( + Vec::with_capacity(cap), + )))) + } +} + +#[cfg(test)] +mod test { + use crate::{get, pointer, prelude::*, to_lazyvalue, to_string, FastStr, OwnedLazyValue}; + #[test] + fn test_owned_lazy_value() { + let mut lv: OwnedLazyValue = + crate::get_from_faststr(&FastStr::new(r#"{"a": "hello world"}"#), pointer![]) + .unwrap() + .into(); + dbg!(&lv); + + if let Some(obj) = lv.as_object_mut() { + for (k, v) in obj.iter_mut() { + dbg!(k, v); + } + + obj.append_pair(FastStr::new("foo"), to_lazyvalue("bar").unwrap()); + } + + dbg!(crate::to_string(&lv).unwrap()); + + let input = r#"{ + "a": "hello world", + "a\\": "\\hello \" world", + "b": true, + "c": [0, 1, 2], + "d": { + "sonic": "rs" + } + }"#; + let own_a = OwnedLazyValue::from(get(input, &["a"]).unwrap()); + let own_c = OwnedLazyValue::from(get(input, &["c"]).unwrap()); + let own = OwnedLazyValue::from(get(input, pointer![]).unwrap()); + // use as_xx to get the parsed value + assert_eq!(own_a.as_str().unwrap(), "hello world"); + assert_eq!(own.get("a\\").as_str().unwrap(), "\\hello \" world"); + assert_eq!(own_c.as_str(), None); + assert!(own_c.is_array()); + } + + #[test] + fn test_owned_array() { + let mut lv: OwnedLazyValue = + crate::get_from_faststr(&FastStr::new(r#"["a", "hello world"]"#), pointer![]) + .unwrap() + .into(); + dbg!(&lv); + + if let Some(arr) = lv.as_array_mut() { + for v in arr.iter_mut() { + dbg!(v); + } + + arr.push(to_lazyvalue("bar").unwrap()); + } + + dbg!(crate::to_string(&lv).unwrap()); + } + + #[test] + fn test_owned_value_pointer() { + let input = FastStr::from(String::from( + r#"{ + "a": "hello world", + "b": true, + "c": [0, 1, 2], + "d": { + "sonic": "rs" + } + }"#, + )); + let root: OwnedLazyValue = + unsafe { crate::get_unchecked(&input, pointer![]).unwrap() }.into(); + test_pointer(&root); + test_pointer(&root.clone()); + test_pointer(&to_lazyvalue(&root).unwrap()); + + fn test_pointer(lv: &OwnedLazyValue) { + assert!(lv.pointer(pointer!["aa"]).is_none()); + assert!(lv.get("aa").is_none()); + assert_eq!(lv.pointer(pointer!["a"]).as_str(), Some("hello world")); + assert_eq!(lv.get("a").as_str(), Some("hello world")); + assert_eq!(lv.pointer(pointer!["b"]).as_bool(), Some(true)); + assert_eq!(lv.get("b").as_bool(), Some(true)); + assert_eq!(lv.pointer(pointer!["c", 1]).as_i64(), Some(1)); + assert_eq!(lv.pointer(pointer!["c", 3]).as_i64(), None); + } + } + + #[test] + fn test_owned_value_mut() { + let input = FastStr::from(String::from( + r#"{ + "a": "hello world", + "b": true, + "c": [0, 1, 2], + "d": { + "sonic": "rs" + } + }"#, + )); + let mut root: OwnedLazyValue = + unsafe { crate::get_unchecked(&input, pointer![]).unwrap() }.into(); + let mut root2 = root.clone(); + let mut root3 = to_lazyvalue(&root2).unwrap(); + test_pointer(&mut root); + test_pointer(&mut root2); + test_pointer(&mut root3); + + fn test_pointer(lv: &mut OwnedLazyValue) { + assert!(lv.pointer_mut(pointer!["aa"]).is_none()); + assert!(lv.get_mut("aa").is_none()); + assert_eq!( + lv.pointer_mut(pointer!["a"]).unwrap().as_str(), + Some("hello world") + ); + assert_eq!(lv.get_mut("a").unwrap().as_str(), Some("hello world")); + assert_eq!(lv.pointer_mut(pointer!["b"]).unwrap().as_bool(), Some(true)); + assert_eq!(lv.get_mut("b").unwrap().as_bool(), Some(true)); + let sub = lv.pointer_mut(pointer!["c", 1]).unwrap(); + assert_eq!(sub.as_i64(), Some(1)); + *sub = to_lazyvalue(&3).unwrap(); + assert_eq!(sub.as_i64(), Some(3)); + assert!(lv.pointer_mut(pointer!["c", 3]).is_none()); + assert_eq!(lv.pointer_mut(pointer!["c", 1]).unwrap().as_i64(), Some(3)); + } -impl Hash for OwnedLazyValue { - fn hash(&self, state: &mut H) { - self.raw.hash(state) + assert_eq!(to_string(&root).unwrap(), to_string(&root2).unwrap()); + assert_eq!(to_string(&root).unwrap(), to_string(&root3).unwrap()); } } diff --git a/src/lazyvalue/ser.rs b/src/lazyvalue/ser.rs index 88a10f9..ae6c7ca 100644 --- a/src/lazyvalue/ser.rs +++ b/src/lazyvalue/ser.rs @@ -1,6 +1,6 @@ -use ::serde::ser::SerializeStruct; +use serde::ser::SerializeStruct; -use super::{owned::OwnedLazyValue, value::LazyValue}; +use super::value::LazyValue; impl<'a> serde::ser::Serialize for LazyValue<'a> { fn serialize(&self, serializer: S) -> std::result::Result @@ -15,22 +15,9 @@ impl<'a> serde::ser::Serialize for LazyValue<'a> { } } -impl serde::ser::Serialize for OwnedLazyValue { - fn serialize(&self, serializer: S) -> std::result::Result - where - S: serde::Serializer, - { - let raw = self.as_raw_str(); - let mut s = serializer.serialize_struct(super::TOKEN, 1)?; - // will directly write raw in `LazyValueStrEmitter::seriazlie_str` - s.serialize_field(super::TOKEN, raw)?; - s.end() - } -} - #[cfg(test)] mod test { - use ::serde::{Deserialize, Serialize}; + use serde::{Deserialize, Serialize}; use crate::{from_str, to_string, LazyValue, OwnedLazyValue, Result}; @@ -51,7 +38,7 @@ mod test { assert_eq!(json, json2); } - #[derive(Debug, Deserialize, Serialize, PartialEq)] + #[derive(Debug, Deserialize, Serialize)] struct TestLazyValue<'a> { #[serde(borrow)] borrowed_lv: LazyValue<'a>, @@ -72,12 +59,12 @@ mod test { borrowed_lv: from_str(json).expect(&json2), owned_lv: from_str(json).expect(&json2), }; - assert_eq!(data, data2); + assert_eq!(to_string(&data).unwrap(), to_string(&data2).unwrap()); let json = json.trim(); let expect: String = format!("{{\"borrowed_lv\":{},\"owned_lv\":{}}}", json, json); let serialized = to_string(&data).expect(json); assert_eq!(expect, serialized); - assert_eq!(from_str::(&serialized).expect(json), data); + assert_eq!(serialized, to_string(&data).unwrap()); } test_json_ok(r#""""#); test_json_ok(r#""raw value""#); diff --git a/src/lazyvalue/value.rs b/src/lazyvalue/value.rs index f4b0dd5..197b73d 100644 --- a/src/lazyvalue/value.rs +++ b/src/lazyvalue/value.rs @@ -1,17 +1,23 @@ use std::{ borrow::Cow, - fmt, - fmt::{Debug, Display}, + fmt::{self, Debug, Display}, hash::Hash, str::from_utf8_unchecked, - sync::Arc, + sync::{ + atomic::{AtomicPtr, Ordering}, + Arc, + }, }; use faststr::FastStr; use crate::{ - from_str, get_unchecked, index::Index, input::JsonSlice, serde::Number, JsonType, - JsonValueTrait, RawNumber, Result, + from_str, get_unchecked, + index::Index, + input::JsonSlice, + lazyvalue::iterator::{ArrayJsonIter, ObjectJsonIter}, + serde::Number, + JsonType, JsonValueTrait, RawNumber, }; /// LazyValue wrappers a unparsed raw JSON text. It is borrowed from the origin JSON text. @@ -111,17 +117,100 @@ use crate::{ /// println!("checked {key} with {trans_value:?}"); /// } /// ``` +#[derive(Clone)] pub struct LazyValue<'a> { // the raw slice from origin json pub(crate) raw: JsonSlice<'a>, - pub(crate) unescape: Option>, + pub(crate) inner: Inner, +} + +pub(crate) struct Inner { + pub(crate) status: HasEsc, + pub(crate) unescaped: AtomicPtr<()>, +} + +impl Inner { + pub(crate) fn no_escaped(&self) -> bool { + self.status == HasEsc::None + } + + pub(crate) fn parse_from(&self, raw: &[u8]) -> Option<&str> { + let ptr = self.unescaped.load(Ordering::Acquire); + if !ptr.is_null() { + return Some(unsafe { &*(ptr as *const String) }); + } + + unsafe { + let parsed: String = crate::from_slice_unchecked(raw).ok()?; + let parsed = Arc::into_raw(Arc::new(parsed)) as *mut (); + match self.unescaped.compare_exchange_weak( + ptr, + parsed, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => Some(&*(parsed as *const String)), + Err(e) => { + Arc::decrement_strong_count(parsed); + Some(&*(e as *const String)) + } + } + } + } +} +impl Default for Inner { + fn default() -> Self { + Self { + status: HasEsc::None, + unescaped: AtomicPtr::new(std::ptr::null_mut()), + } + } +} + +impl Clone for Inner { + fn clone(&self) -> Self { + let ptr = if !self.no_escaped() { + // possible is parsing + let ptr = self.unescaped.load(Ordering::Acquire); + if !ptr.is_null() { + unsafe { Arc::increment_strong_count(ptr as *const String) }; + } + ptr + } else { + std::ptr::null_mut() + }; + Self { + status: self.status, + unescaped: AtomicPtr::new(ptr), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum HasEsc { + None, + Yes, + Possible, } impl Default for LazyValue<'_> { fn default() -> Self { Self { raw: JsonSlice::Raw(&b"null"[..]), - unescape: None, + inner: Inner::default(), + } + } +} + +impl Drop for Inner { + fn drop(&mut self) { + if self.no_escaped() { + return; + } + + let ptr = self.unescaped.load(Ordering::Acquire); + if !ptr.is_null() { + unsafe { Arc::decrement_strong_count(ptr as *const String) }; } } } @@ -129,8 +218,9 @@ impl Default for LazyValue<'_> { impl<'a> Debug for LazyValue<'a> { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter - .debug_tuple("LazyValue") - .field(&format_args!("{}", &self.as_raw_str())) + .debug_struct("LazyValue") + .field("raw json", &format_args!("{}", &self.as_raw_str())) + .field("has_escaped", &self.inner.status) .finish() } } @@ -147,15 +237,6 @@ impl PartialEq for LazyValue<'_> { } } -impl<'a> Clone for LazyValue<'a> { - fn clone(&self) -> Self { - Self { - raw: self.raw.clone(), - unescape: self.unescape.clone(), - } - } -} - impl PartialOrd for LazyValue<'_> { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) @@ -208,16 +289,18 @@ impl<'a> JsonValueTrait for LazyValue<'a> { fn as_str(&self) -> Option<&str> { if !self.is_str() { - None - } else if let Some(escaped) = self.unescape.as_ref() { - Some(escaped.as_ref()) - } else { + return None; + } + + if self.inner.no_escaped() { // remove the quotes let origin = { let raw = self.as_raw_str().as_bytes(); &raw[1..raw.len() - 1] }; Some(unsafe { from_utf8_unchecked(origin) }) + } else { + self.inner.parse_from(self.raw.as_ref()) } } @@ -318,6 +401,22 @@ impl<'a> LazyValue<'a> { } } + pub fn into_object_iter(mut self) -> Option> { + if self.is_object() { + Some(ObjectJsonIter::new_inner(std::mem::take(&mut self.raw))) + } else { + None + } + } + + pub fn into_array_iter(mut self) -> Option> { + if self.is_array() { + Some(ArrayJsonIter::new_inner(std::mem::take(&mut self.raw))) + } else { + None + } + } + /// get with index from lazyvalue pub(crate) fn get_index(&'a self, index: usize) -> Option { let path = [index]; @@ -340,14 +439,14 @@ impl<'a> LazyValue<'a> { } } - pub(crate) fn new(raw: JsonSlice<'a>, has_escaped: bool) -> Result { - let unescape = if has_escaped { - let unescape: Arc = unsafe { crate::from_slice_unchecked(raw.as_ref()) }?; - Some(unescape) - } else { - None - }; - Ok(Self { raw, unescape }) + pub(crate) fn new(raw: JsonSlice<'a>, status: HasEsc) -> Self { + Self { + raw, + inner: Inner { + status, + unescaped: AtomicPtr::new(std::ptr::null_mut()), + }, + } } } diff --git a/src/lib.rs b/src/lib.rs index 0f02986..a25c7f7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,9 +18,9 @@ pub mod value; pub mod writer; // re-export FastStr +pub use ::faststr::FastStr; // re-export the serde trait pub use ::serde::{Deserialize, Serialize}; -pub use faststr::FastStr; #[doc(inline)] pub use reader::Read; @@ -35,15 +35,16 @@ pub use crate::lazyvalue::{ get, get_from_bytes, get_from_bytes_unchecked, get_from_faststr, get_from_faststr_unchecked, get_from_slice, get_from_slice_unchecked, get_from_str, get_from_str_unchecked, get_many, get_many_unchecked, get_unchecked, to_array_iter, to_array_iter_unchecked, to_object_iter, - to_object_iter_unchecked, ArrayJsonIter, LazyValue, ObjectJsonIter, OwnedLazyValue, + to_object_iter_unchecked, ArrayJsonIter, LazyArray, LazyObject, LazyValue, ObjectJsonIter, + OwnedLazyValue, }; #[doc(inline)] pub use crate::pointer::{JsonPointer, PointerNode, PointerTree}; #[doc(inline)] pub use crate::serde::{ - from_reader, from_slice, from_slice_unchecked, from_str, to_string, to_string_pretty, to_vec, - to_vec_pretty, to_writer, to_writer_pretty, Deserializer, JsonNumberTrait, Number, RawNumber, - Serializer, StreamDeserializer, + from_reader, from_slice, from_slice_unchecked, from_str, to_lazyvalue, to_string, + to_string_pretty, to_vec, to_vec_pretty, to_writer, to_writer_pretty, Deserializer, + JsonNumberTrait, Number, RawNumber, Serializer, StreamDeserializer, }; #[doc(inline)] pub use crate::value::{ diff --git a/src/parser.rs b/src/parser.rs index 28d186d..937293e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -21,6 +21,7 @@ use crate::{ Result, }, index::Index, + lazyvalue::value::HasEsc, pointer::{ tree::{MultiIndex, MultiKey, PointerTreeInner, PointerTreeNode}, PointerTree, @@ -33,7 +34,7 @@ use crate::{ unicode::{codepoint_to_utf8, hex_to_u32_nocheck}, }, value::{node::RawStr, visitor::JsonVisitor}, - JsonValueMutTrait, JsonValueTrait, LazyValue, + JsonValueMutTrait, JsonValueTrait, LazyValue, Number, OwnedLazyValue, }; // support borrow for owned deserizlie or skip @@ -200,6 +201,15 @@ pub(crate) enum ParseStatus { HasEscaped, } +impl From for HasEsc { + fn from(value: ParseStatus) -> Self { + match value { + ParseStatus::None => HasEsc::None, + ParseStatus::HasEscaped => HasEsc::Yes, + } + } +} + impl<'de, R> Parser where R: Reader<'de>, @@ -481,7 +491,7 @@ where &mut self, first: &mut bool, check: bool, - ) -> Result> { + ) -> Result> { if *first && self.skip_space() != Some(b'[') { return perr!(self, ExpectedArrayStart); } @@ -503,7 +513,7 @@ where } else { self.skip_one_unchecked() }?; - Ok(Some((raw, status == ParseStatus::HasEscaped))) + Ok(Some((raw, status))) } #[inline] @@ -512,7 +522,7 @@ where strbuf: &mut Vec, first: &mut bool, check: bool, - ) -> Result> { + ) -> Result> { if *first && self.skip_space() != Some(b'{') { return perr!(self, ExpectedObjectStart); } @@ -535,7 +545,7 @@ where } else { self.skip_one_unchecked() }?; - Ok(Some((key, raw, status == ParseStatus::HasEscaped))) + Ok(Some((key, raw, status))) } // Not use non-recurse version here, because it maybe 5% slower than recurse version. @@ -555,6 +565,122 @@ where Ok(()) } + #[inline(always)] + pub(crate) fn match_literal(&mut self, literal: &'static str) -> Result { + if let Some(chunk) = self.read.next_n(literal.len()) { + if chunk != literal.as_bytes() { + perr!(self, InvalidLiteral) + } else { + Ok(true) + } + } else { + perr!(self, EofWhileParsing) + } + } + + #[inline(always)] + pub(crate) fn get_owned_lazyvalue(&mut self, strict: bool) -> Result { + let c = self.skip_space(); + let start = self.read.index() - 1; + match c { + Some(b'"') => match self.skip_string()? { + ParseStatus::None => { + let slice = self.read.slice_unchecked(start, self.read.index()); + let raw = unsafe { self.read.slice_ref(slice).as_faststr() }; + return Ok(OwnedLazyValue::from_non_esc_str(raw)); + } + ParseStatus::HasEscaped => {} + }, + Some(b't') if self.match_literal("rue")? => return Ok(true.into()), + Some(b'f') if self.match_literal("alse")? => return Ok(false.into()), + Some(b'n') if self.match_literal("ull")? => return Ok(().into()), + _ => { + self.read.backward(1); + if strict { + self.skip_one()?; + } else { + self.skip_one_unchecked()?; + } + } + } + let end = self.read.index(); + let sub = self.read.slice_unchecked(start, end); + let raw = unsafe { self.read.slice_ref(sub).as_faststr() }; + Ok(OwnedLazyValue::new(raw.into(), HasEsc::Possible)) + } + + #[inline(always)] + fn parse_faststr(&mut self, strbuf: &mut Vec) -> Result { + match self.parse_str_impl(strbuf)? { + Reference::Borrowed(s) => { + return Ok(unsafe { self.read.slice_ref(s.as_bytes()).as_faststr() }); + } + Reference::Copied(s) => Ok(FastStr::new(s)), + } + } + + #[inline(always)] + pub(crate) fn load_owned_lazyvalue(&mut self, strbuf: &mut Vec) -> Result { + match self.skip_space() { + Some(c @ b'-' | c @ b'0'..=b'9') => { + let num: Number = self.parse_number(c)?.into(); + Ok(OwnedLazyValue::from(num)) + } + Some(b'"') => match self.parse_str_impl(strbuf)? { + Reference::Borrowed(s) => { + let raw = unsafe { self.read.slice_ref(s.as_bytes()).as_faststr() }; + Ok(OwnedLazyValue::from_faststr(raw)) + } + Reference::Copied(s) => { + let raw = FastStr::new(s); + Ok(OwnedLazyValue::from_faststr(raw)) + } + }, + Some(b'{') => { + // parsing empty object + match self.skip_space() { + Some(b'}') => return Ok(Vec::<(FastStr, OwnedLazyValue)>::new().into()), + Some(b'"') => {} + _ => return perr!(self, ExpectObjectKeyOrEnd), + } + + // loop for each object key and value + let mut vec = Vec::with_capacity(32); + loop { + let key = self.parse_faststr(strbuf)?; + self.parse_object_clo()?; + let olv = self.get_owned_lazyvalue(false)?; + vec.push((key, olv)); + match self.skip_space() { + Some(b'}') => return Ok(vec.into()), + Some(b',') => match self.skip_space() { + Some(b'"') => continue, + _ => return perr!(self, ExpectObjectKeyOrEnd), + }, + _ => return perr!(self, ExpectedArrayCommaOrEnd), + } + } + } + Some(b'[') => { + if let Some(b']') = self.skip_space() { + return Ok(Vec::::new().into()); + } + + let mut vec = Vec::with_capacity(32); + self.read.backward(1); + loop { + vec.push(self.get_owned_lazyvalue(false)?); + match self.skip_space() { + Some(b']') => return Ok(vec.into()), + Some(b',') => {} + _ => return perr!(self, ExpectedArrayCommaOrEnd), + }; + } + } + _ => perr!(self, InvalidJsonValue), + } + } + #[inline(always)] pub(crate) fn parse_dom(&mut self, vis: &mut V) -> Result<()> where @@ -1739,7 +1865,7 @@ where if !node.order.is_empty() { slice = self.read.slice_unchecked(start, self.read.index()); - let lv = LazyValue::new(slice.into(), status == ParseStatus::HasEscaped)?; + let lv = LazyValue::new(slice.into(), status.into()); for p in &node.order { out[*p] = lv.clone(); } diff --git a/src/reader.rs b/src/reader.rs index d8476f0..b94761e 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -1,8 +1,11 @@ use std::{marker::PhantomData, pin::Pin, ptr::NonNull}; +use faststr::FastStr; + use crate::{ error::invalid_utf8, input::JsonSlice, + parser::as_str, util::{private::Sealed, utf8::from_utf8}, JsonInput, Result, }; @@ -70,6 +73,36 @@ pub trait Reader<'de>: Sealed { fn slice_ref(&self, subset: &'de [u8]) -> JsonSlice<'de>; } +enum PinnedInput<'a> { + FastStr(Pin>), + Slice(&'a [u8]), +} + +impl<'a> PinnedInput<'a> { + unsafe fn as_ptr(&self) -> NonNull<[u8]> { + match self { + Self::FastStr(f) => f.as_bytes().into(), + Self::Slice(slice) => (*slice).into(), + } + } + + fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> { + match self { + Self::FastStr(f) => JsonSlice::FastStr(f.slice_ref(as_str(subset))), + Self::Slice(_) => JsonSlice::Raw(subset), + } + } +} + +impl<'a> From> for PinnedInput<'a> { + fn from(input: JsonSlice<'a>) -> Self { + match input { + JsonSlice::Raw(slice) => Self::Slice(slice), + JsonSlice::FastStr(f) => Self::FastStr(Pin::new(Box::new(f))), + } + } +} + /// JSON input source that reads from a string/bytes-like JSON input. /// /// Support most common types: &str, &[u8], &FastStr, &Bytes and &String @@ -96,7 +129,7 @@ pub trait Reader<'de>: Sealed { /// ``` pub struct Read<'a> { // pin the input JSON, because `slice` will reference it - input: Pin>>, + input: PinnedInput<'a>, slice: NonNull<[u8]>, pub(crate) index: usize, // next invalid utf8 position, if not found, will be usize::MAX @@ -107,27 +140,28 @@ impl<'a> Read<'a> { /// Make a `Read` from string/bytes-like JSON input. pub fn from>(input: I) -> Self { let need = input.need_utf8_valid(); - Self::new_in(input, need) + Self::new_in(input.to_json_slice(), need) } - pub(crate) fn new(slice: &'a [u8], need_validate: bool) -> Self { - Self::new_in(slice, need_validate) + pub(crate) fn new(slice: &'a [u8], validate_utf8: bool) -> Self { + Self::new_in(slice.to_json_slice(), validate_utf8) } - pub(crate) fn new_in>(input: I, need_validate: bool) -> Self { - let input = Pin::new(Box::new(input.to_json_slice())); + pub(crate) fn new_in(input: JsonSlice<'a>, validate_utf8: bool) -> Self { + let input: PinnedInput<'a> = input.into(); // #safety: we pinned the input json - - let slice = input.as_ref().get_ref().as_ref(); + let slice = unsafe { input.as_ptr() }; // validate the utf-8 at first for slice - let next_invalid_utf8 = need_validate - .then(|| from_utf8(slice).err().map(|e| e.offset())) + let next_invalid_utf8 = validate_utf8 + .then(|| { + from_utf8(unsafe { slice.as_ref() }) + .err() + .map(|e| e.offset()) + }) .flatten() .unwrap_or(usize::MAX); - let slice = NonNull::from(slice); - Self { input, slice, diff --git a/src/serde/de.rs b/src/serde/de.rs index e0a4ce1..407107d 100644 --- a/src/serde/de.rs +++ b/src/serde/de.rs @@ -18,7 +18,7 @@ use crate::{ parser::{as_str, ParseStatus, ParsedSlice, Parser, Reference}, reader::{Read, Reader}, value::{node::Value, shared::Shared}, - JsonInput, + JsonInput, OwnedLazyValue, }; const MAX_ALLOWED_DEPTH: u8 = u8::MAX; @@ -369,6 +369,23 @@ impl<'de, R: Reader<'de>> Deserializer { } } + fn deserialize_owned_lazyvalue(&mut self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let val = ManuallyDrop::new(self.parser.get_owned_lazyvalue(false)?); + // #Safety + // the json is validate before parsing json, and we pass the document using visit_bytes + // here. + unsafe { + let binary = &*slice_from_raw_parts( + &val as *const _ as *const u8, + std::mem::size_of::(), + ); + visitor.visit_bytes(binary) + } + } + fn deserialize_value(&mut self, visitor: V) -> Result where V: de::Visitor<'de>, @@ -742,6 +759,8 @@ impl<'de, 'a, R: Reader<'de>> de::Deserializer<'de> for &'a mut Deserializer return self.deserialize_rawnumber(visitor); } else if name == crate::lazyvalue::TOKEN { return self.deserialize_lazyvalue(visitor); + } else if name == crate::lazyvalue::OWNED_LAZY_VALUE_TOKEN { + return self.deserialize_owned_lazyvalue(visitor); } else if name == crate::value::de::TOKEN { return self.deserialize_value(visitor); } diff --git a/src/serde/mod.rs b/src/serde/mod.rs index 7154452..f5c155a 100644 --- a/src/serde/mod.rs +++ b/src/serde/mod.rs @@ -13,7 +13,8 @@ pub use self::{ number::{JsonNumberTrait, Number}, rawnumber::RawNumber, ser::{ - to_string, to_string_pretty, to_vec, to_vec_pretty, to_writer, to_writer_pretty, Serializer, + to_lazyvalue, to_string, to_string_pretty, to_vec, to_vec_pretty, to_writer, + to_writer_pretty, Serializer, }, }; diff --git a/src/serde/ser.rs b/src/serde/ser.rs index 620c869..15db586 100644 --- a/src/serde/ser.rs +++ b/src/serde/ser.rs @@ -8,20 +8,26 @@ use core::{ }; use std::io; -use ::serde::ser::{self, Impossible, Serialize}; -use serde::de::Unexpected; +use faststr::FastStr; +use serde::{ + de::Unexpected, + ser::{self, Impossible, Serialize}, +}; use super::de::tri; use crate::{ error::{Error, ErrorCode, Result}, format::{CompactFormatter, Formatter, PrettyFormatter}, + lazyvalue::value::HasEsc, writer::WriteExt, + OwnedLazyValue, }; - /// A structure for serializing Rust values into JSON. pub struct Serializer { writer: W, formatter: F, + // TODO: record has_escape to optimize lazyvalue + // has_escape: bool, } impl Serializer @@ -1299,6 +1305,24 @@ where Ok(string) } +/// Serialize the given data structure as a OwnedLazyValue of JSON. +#[inline] +pub fn to_lazyvalue(value: &T) -> Result +where + T: ?Sized + Serialize, +{ + let vec = tri!(to_vec(value)); + let string = unsafe { + // We do not emit Invalid UTF-8. + String::from_utf8_unchecked(vec) + }; + + Ok(OwnedLazyValue::new( + FastStr::new(string).into(), + HasEsc::Possible, + )) +} + /// Serialize the given data structure as a pretty-printed String of JSON. /// /// # Errors