From e87e2dc85e94c9272219a52d114ce66df322d417 Mon Sep 17 00:00:00 2001 From: ImJeremyHe Date: Mon, 18 Nov 2024 15:51:38 +0800 Subject: [PATCH] Support deserializing untagged structs --- derives/Cargo.toml | 2 +- derives/src/container.rs | 30 ++++- derives/src/de.rs | 230 ++++++++++++++++++++++++++++++++++++--- derives/src/ser.rs | 3 +- src/lib.rs | 60 +++++++++- tests/lib.rs | 51 +++++++++ 6 files changed, 351 insertions(+), 25 deletions(-) diff --git a/derives/Cargo.toml b/derives/Cargo.toml index 3330663..f53692f 100644 --- a/derives/Cargo.toml +++ b/derives/Cargo.toml @@ -12,5 +12,5 @@ proc-macro = true [dependencies] syn = {version = "2.0.48", features = ["full"]} -quote = "1.0.35" +quote = "1" proc-macro2 = "1.0.75" diff --git a/derives/src/container.rs b/derives/src/container.rs index d5c020e..afe2e37 100644 --- a/derives/src/container.rs +++ b/derives/src/container.rs @@ -120,7 +120,8 @@ pub struct FieldsSummary<'a> { pub text: Option>, pub attrs: Vec>, pub self_closed_children: Vec>, - pub untags: Vec>, + pub untagged_enums: Vec>, + pub untagged_structs: Vec>, } impl<'a> FieldsSummary<'a> { @@ -130,14 +131,17 @@ impl<'a> FieldsSummary<'a> { text: None, attrs: vec![], self_closed_children: vec![], - untags: vec![], + untagged_enums: vec![], + untagged_structs: vec![], }; fields.into_iter().for_each(|f| match f.ty { EleType::Attr => result.attrs.push(f), EleType::Child => result.children.push(f), EleType::Text => result.text = Some(f), EleType::SelfClosedChild => result.self_closed_children.push(f), - EleType::Untag => result.untags.push(f), + EleType::Untag => result.untagged_enums.push(f), + EleType::UntaggedEnum => result.untagged_enums.push(f), + EleType::UntaggedStruct => result.untagged_structs.push(f), }); result } @@ -154,7 +158,17 @@ pub struct StructField<'a> { } impl<'a> StructField<'a> { - pub fn validate(&self) {} + pub fn validate(&self) { + let untagged = match self.ty { + EleType::Untag => true, + EleType::UntaggedEnum => true, + EleType::UntaggedStruct => true, + _ => false, + }; + if untagged && self.name.is_some() { + panic!("untagged types doesn't need a name") + } + } pub fn from_ast(f: &'a syn::Field) -> Option { let mut name = Option::::None; @@ -182,7 +196,9 @@ impl<'a> StructField<'a> { "child" => EleType::Child, "text" => EleType::Text, "sfc" => EleType::SelfClosedChild, - "untag" => EleType::Untag, + "untag" => EleType::Untag, // todo: generate a deprecate function to let users know + "untagged_enum" => EleType::UntaggedEnum, + "untagged_struct" => EleType::UntaggedStruct, _ => panic!("invalid type"), }; ty = Some(t); @@ -318,7 +334,11 @@ pub enum EleType { /// /// In this case, indicates the field *bold* is true and indicates *italic* is true. SelfClosedChild, + /// Deprecated, use `UntaggedEnum` Untag, + + UntaggedEnum, + UntaggedStruct, } pub enum Derive { diff --git a/derives/src/de.rs b/derives/src/de.rs index aa1133a..3a5361a 100644 --- a/derives/src/de.rs +++ b/derives/src/de.rs @@ -119,6 +119,10 @@ pub fn get_de_enum_impl_block(container: Container) -> proc_macro2::TokenStream } #text_function + + fn __is_enum() -> bool { + true + } } } } @@ -127,12 +131,14 @@ pub fn get_de_struct_impl_block(container: Container) -> proc_macro2::TokenStrea let result = get_result(&container.struct_fields); let summary = FieldsSummary::from_fields(container.struct_fields); let fields_init = get_fields_init(&summary); + let result_untagged_structs = get_untagged_struct_fields_result(&summary.untagged_structs); let FieldsSummary { children, text, attrs, self_closed_children, - untags, + untagged_enums, + untagged_structs, } = summary; let get_children_tags = if children.len() > 0 { let names = children.iter().map(|f| { @@ -147,9 +153,11 @@ pub fn get_de_struct_impl_block(container: Container) -> proc_macro2::TokenStrea } else { quote! {} }; + let attr_len = attrs.len(); + let sfc_len = self_closed_children.len(); let vec_init = get_vec_init(&children); let attr_branches = attrs.into_iter().map(|a| attr_match_branch(a)); - let child_branches = children_match_branch(children, untags); + let child_branches = children_match_branch(&children, &untagged_enums, &untagged_structs); let sfc_branch = sfc_match_branch(self_closed_children); let ident = &container.original.ident; let (impl_generics, type_generics, where_clause) = container.original.generics.split_for_impl(); @@ -169,6 +177,13 @@ pub fn get_de_struct_impl_block(container: Container) -> proc_macro2::TokenStrea } else { quote! {} }; + + // Only those structs with only children can be untagged + let deserialize_from_unparsed = if children.len() > 0 && attr_len == 0 && sfc_len == 0 { + get_deserialize_from_unparsed(&children) + } else { + quote! {} + }; let encounter_unknown = if container.deny_unknown { quote! { let _field = std::str::from_utf8(_field).unwrap(); @@ -227,17 +242,43 @@ pub fn get_de_struct_impl_block(container: Container) -> proc_macro2::TokenStrea } } } + #result_untagged_structs Self { #result } } #get_root #get_children_tags + #deserialize_from_unparsed } } } +fn get_untagged_struct_fields_result(fileds: &[StructField]) -> proc_macro2::TokenStream { + let branch = fileds.iter().map(|f| { + let ident = f.original.ident.as_ref().unwrap(); + let ty = &f.original.ty; + let ident_unparsed_array = format_ident!("{}_unparseds", ident); + let ident_opt_unparsed_array = format_ident!("{}_opt_unparseds", ident); + match f.generic { + Generic::Vec(_) => unreachable!(), + Generic::Opt(_t) => quote! { + if #ident_opt_unparsed_array .len() > 0 { + #ident = Some(#_t::__deserialize_from_unparsed_array(#ident_opt_unparsed_array)); + } + }, + Generic::None => quote! { + if #ident_unparsed_array.len() > 0 { + #ident = Some(#ty::__deserialize_from_unparsed_array(#ident_unparsed_array)); + } + }, + } + }); + + quote! {#(#branch)*} +} + fn get_result(fields: &[StructField]) -> proc_macro2::TokenStream { let branch = fields.iter().map(|f| { let ident = f.original.ident.as_ref().unwrap(); @@ -278,7 +319,9 @@ fn get_fields_init(fields: &FieldsSummary) -> proc_macro2::TokenStream { let ty = &f.original.ty; match &f.default { Some(p) => { - quote! {let mut #ident = #p();} + quote! { + let mut #ident = #p(); + } } None => match f.generic { Generic::Vec(v) => quote! { @@ -319,8 +362,13 @@ fn get_fields_init(fields: &FieldsSummary) -> proc_macro2::TokenStream { let mut #ident = false; } }); - let untag_init = fields.untags.iter().map(|f| { + let untagged_enums_init = fields.untagged_enums.iter().map(|f| { let ident = f.original.ident.as_ref().unwrap(); + + if let Some(path) = &f.default { + return quote! {let mut #ident = #path();}; + } + let ty = &f.original.ty; match f.generic { Generic::Vec(t) => quote! { @@ -334,12 +382,123 @@ fn get_fields_init(fields: &FieldsSummary) -> proc_macro2::TokenStream { }, } }); + + let untagged_structs_init = fields.untagged_structs.iter().map(|f| { + let ident = f.original.ident.as_ref().unwrap(); + if let Some(path) = &f.default { + return quote! {let mut #ident = #path();}; + } + let ident_unparsed_array = format_ident!("{}_unparseds", ident); + let ident_opt_unparsed_array = format_ident!("{}_opt_unparseds", ident); + + let ty = &f.original.ty; + match f.generic { + Generic::Vec(_t) => quote! { + unreachable!() + }, + Generic::Opt(t) => quote! { + let mut #ident = Option::<#t>::None; + let mut #ident_opt_unparsed_array = Vec::new(); + }, + Generic::None => quote! { + let mut #ident = Option::<#ty>::None; + let mut #ident_unparsed_array = Vec::new(); + }, + } + }); quote! { #(#attrs_inits)* #(#sfc_init)* #(#children_inits)* #text_init - #(#untag_init)* + #(#untagged_enums_init)* + #(#untagged_structs_init)* + } +} + +fn get_deserialize_from_unparsed(children: &[StructField]) -> proc_macro2::TokenStream { + let init = children.iter().map(|c| { + let ident = c.original.ident.as_ref().unwrap(); + if let Some(path) = &c.default { + return quote! { + let mut #ident = #path(); + }; + } + match &c.generic { + Generic::Vec(_) => quote! {let mut #ident = vec![];}, + Generic::Opt(_) => quote! {let mut #ident = None;}, + Generic::None => quote! {let mut #ident = None;}, + } + }); + let body = children.iter().map(|c| { + let name = c + .name + .as_ref() + .expect("types can not have recursive untagged fields"); + let original_type = &c.original.ty; + let ident = c.original.ident.as_ref().unwrap(); + match &c.generic { + Generic::Vec(t) => { + quote! { + #name => { + #ident.push(content.deserialize_to::<#t>().unwrap()); + }, + } + } + Generic::Opt(t) => { + quote! { + #name => { + #ident = Some(content.deserialize_to::<#t>().unwrap()); + }, + } + } + Generic::None => { + if c.default.is_some() { + quote! { + #name => { + #ident = content.deserialize_to::<#original_type>().unwrap(); + } + } + } else { + quote! { + #name => { + #ident = Some(content.deserialize_to::<#original_type>().unwrap()); + } + } + } + } + } + }); + let result = { + let idents = children.iter().map(|c| { + let ident = c.original.ident.as_ref().unwrap(); + if c.is_required() { + quote! { + #ident: #ident.expect("missing field") + } + } else { + quote! { + #ident + } + } + }); + quote! { + Self { + #(#idents),* + } + } + }; + quote! { + fn __deserialize_from_unparsed_array(array: Vec<(&'static [u8], ::xmlserde::Unparsed)>) -> Self { + #(#init)* + array.into_iter().for_each(|(tag, content)| { + match tag { + #(#body),* + _ => {}, + } + }); + #result + } } } @@ -480,7 +639,7 @@ fn text_match_branch(field: StructField) -> proc_macro2::TokenStream { } } -fn untag_text_enum_branches(untags: Vec) -> proc_macro2::TokenStream { +fn untag_text_enum_branches(untags: &[StructField]) -> proc_macro2::TokenStream { if untags.len() == 0 { return quote! {}; } @@ -512,7 +671,7 @@ fn untag_text_enum_branches(untags: Vec) -> proc_macro2::TokenStrea return quote! {#(#branches)*}; } -fn untags_match_branch(fields: &[StructField]) -> proc_macro2::TokenStream { +fn untag_enums_match_branch(fields: &[StructField]) -> proc_macro2::TokenStream { if fields.len() == 0 { return quote! {}; } @@ -544,11 +703,50 @@ fn untags_match_branch(fields: &[StructField]) -> proc_macro2::TokenStream { } } +fn untag_structs_match_branch(fields: &[StructField]) -> proc_macro2::TokenStream { + if fields.len() == 0 { + return quote! {}; + } + let mut branches: Vec = vec![]; + + fields.iter().for_each(|f| { + let ident = f.original.ident.as_ref().unwrap(); + let ty = &f.original.ty; + let ident_unparsed_array = format_ident!("{}_unparseds", ident); + let ident_opt_unparsed_array = format_ident!("{}_opt_unparseds", ident); + // let name = f.name.as_ref().expect("should have `name` for `child` type"); + let branch = match f.generic { + Generic::Vec(_) => unreachable!(), + Generic::Opt(t) => quote! { + _t if #t::__get_children_tags().contains(&_t) => { + let _r = ::xmlserde::Unparsed::deserialize(_t, reader, s.attributes(), is_empty); + let _tags = #t::__get_children_tags(); + let idx = _tags.binary_search(&_t).unwrap(); + #ident_opt_unparsed_array.push((_tags[idx], _r)); + } + }, + Generic::None => quote! { + _t if #ty::__get_children_tags().contains(&_t) => { + let _r = ::xmlserde::Unparsed::deserialize(_t, reader, s.attributes(), is_empty); + let _tags = #ty::__get_children_tags(); + let idx = _tags.binary_search(&_t).unwrap(); + #ident_unparsed_array.push((_tags[idx], _r)); + } + }, + }; + branches.push(branch); + }); + quote! { + #(#branches)* + } +} + fn children_match_branch( - fields: Vec, - untags: Vec, + fields: &[StructField], + untagged_enums: &[StructField], + untagged_structs: &[StructField], ) -> proc_macro2::TokenStream { - if fields.len() == 0 && untags.len() == 0 { + if fields.is_empty() && untagged_enums.is_empty() && untagged_structs.is_empty() { return quote! {}; } let mut branches = vec![]; @@ -596,14 +794,17 @@ fn children_match_branch( }; branches.push(branch); }); - let untags_branches = untags_match_branch(&untags); - let untag_text_enum = untag_text_enum_branches(untags); + let untagged_enums_branches = untag_enums_match_branch(&untagged_enums); + let untagged_structs_branches = untag_structs_match_branch(&untagged_structs); + let untag_text_enum = untag_text_enum_branches(untagged_enums); + quote! { Ok(Event::Empty(s)) => { let is_empty = true; match s.name().into_inner() { #(#branches)* - #untags_branches + #untagged_enums_branches + #untagged_structs_branches _ => {}, } } @@ -611,7 +812,8 @@ fn children_match_branch( let is_empty = false; match s.name().into_inner() { #(#branches)* - #untags_branches + #untagged_enums_branches + #untagged_structs_branches _ => {}, } } diff --git a/derives/src/ser.rs b/derives/src/ser.rs index f5e4442..38aee0b 100644 --- a/derives/src/ser.rs +++ b/derives/src/ser.rs @@ -97,7 +97,8 @@ fn get_ser_struct_impl_block(container: Container) -> proc_macro2::TokenStream { text, attrs, self_closed_children, - untags, + untagged_enums: untags, + untagged_structs: _, } = FieldsSummary::from_fields(container.struct_fields); if text.is_some() && (children.len() > 0 || self_closed_children.len() > 0 || untags.len() > 0) { diff --git a/src/lib.rs b/src/lib.rs index 442fc6f..1161d59 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -156,6 +156,7 @@ use std::{ // it easily. In this way users don't need to import the `quick-xml` on // their own. pub use quick_xml; + use quick_xml::events::Event; pub trait XmlSerialize { @@ -182,7 +183,7 @@ impl XmlSerialize for Vec { } } -pub trait XmlDeserialize { +pub trait XmlDeserialize: Sized { fn deserialize( tag: &[u8], reader: &mut quick_xml::Reader, @@ -194,12 +195,31 @@ pub trait XmlDeserialize { None } - // A helper function used when ty = `untag`. It could help - // us to find out the children tags when deserializing + /// A helper function used when ty = `untag`. It could help + /// us to find out the children tags when deserializing fn __get_children_tags() -> Vec<&'static [u8]> { vec![] } + /// A helper function used when handling the untag types. + /// + /// For a outside struct, it doesn't + /// know how to deal with an untag type. The current solution is to treat them as `Unparsed` + /// types first, and then pass them into this function to deserialize. Since the type is untagged, + /// it doesn't require the attributes. + fn __deserialize_from_unparsed_array(_array: Vec<(&'static [u8], Unparsed)>) -> Self { + unreachable!("untagged types require having `child` types only") + } + + /// A helper function for handling the untagged types. + /// + /// For efficiency, deserializing enums has no need to handle the untagged types by `__deserialize_from_unparsed_array` method. + /// But we have no idea of whether this field is not enum or not, we make a helper function to discern it + /// in the runtime. + fn __is_enum() -> bool { + false + } + fn __deserialize_from_text(_: &str) -> Option where Self: Sized, @@ -290,6 +310,27 @@ impl XmlDeserialize for Unparsed { attrs: attrs_vec, } } + + fn __deserialize_from_unparsed_array(_array: Vec<(&'static [u8], Unparsed)>) -> Self { + unreachable!( + r#"seems you are using a struct having `attrs` or `text` as an UntaggedStruct"# + ) + } +} + +impl Unparsed { + pub fn deserialize_to(self) -> Result + where + T: XmlDeserialize + Sized, + { + // TODO: Find a more efficient way + let mut writer = quick_xml::Writer::new(Vec::new()); + let t = b"tmptag"; + self.serialize(t, &mut writer); + let result = writer.into_inner(); + + xml_deserialize_from_reader_with_root::(result.as_slice(), t) + } } /// The entry for serializing. `T` should have declared the `root` by `#[xmlserde(root=b"")]` @@ -335,13 +376,24 @@ where /// } /// ``` pub fn xml_deserialize_from_reader(reader: R) -> Result +where + T: XmlDeserialize, + R: BufRead, +{ + let root = T::de_root().expect(r#"#[xmlserde(root = b"tag")]"#); + xml_deserialize_from_reader_with_root(reader, root) +} + +pub(crate) fn xml_deserialize_from_reader_with_root( + reader: R, + root: &[u8], +) -> Result where T: XmlDeserialize, R: BufRead, { let mut reader = quick_xml::Reader::from_reader(reader); let mut buf = Vec::::new(); - let root = T::de_root().expect(r#"#[xmlserde(root = b"tag")]"#); loop { match reader.read_event_into(&mut buf) { Ok(Event::Start(start)) => { diff --git a/tests/lib.rs b/tests/lib.rs index 9797895..693ca3c 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -771,4 +771,55 @@ mod tests { let r = xml_serialize(wrapper); assert_eq!(r, r#""#); } + + #[test] + fn test_de_untagged_struct() { + #[derive(XmlDeserialize)] + #[xmlserde(root = b"foo")] + struct Foo { + #[xmlserde(ty = "untagged_struct")] + bar: Bar, + } + + #[derive(XmlDeserialize)] + struct Bar { + #[xmlserde(name = b"a", ty = "child")] + a: A, + #[xmlserde(name = b"c", ty = "child")] + c: C, + } + + #[derive(XmlDeserialize)] + struct A { + #[xmlserde(name = b"attr1", ty = "attr")] + attr1: u16, + } + + #[derive(XmlDeserialize)] + struct C { + #[xmlserde(name = b"attr2", ty = "attr")] + attr2: u16, + } + + let xml = r#""#; + let foo = xml_deserialize_from_str::(&xml).unwrap(); + assert_eq!(foo.bar.a.attr1, 12); + assert_eq!(foo.bar.c.attr2, 200); + + #[derive(XmlDeserialize)] + #[xmlserde(root = b"foo")] + struct FooOption { + #[xmlserde(ty = "untagged_struct")] + bar: Option, + } + let xml = r#""#; + let foo = xml_deserialize_from_str::(&xml).unwrap(); + let bar = foo.bar.unwrap(); + assert_eq!(bar.a.attr1, 12); + assert_eq!(bar.c.attr2, 200); + + let xml = r#">"#; + let foo = xml_deserialize_from_str::(&xml).unwrap(); + assert!(foo.bar.is_none()); + } }