From 91973de72159957997fd660139e5f58611174552 Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Thu, 2 Nov 2023 18:25:12 +0100 Subject: [PATCH] Add a trait `SchemaLike` for schema construction --- Changes.md | 10 +- example/src/main.rs | 2 +- serde_arrow/benches/groups/impls.rs | 2 +- serde_arrow/src/arrow2_impl/api.rs | 45 +++--- serde_arrow/src/arrow2_impl/schema.rs | 29 +++- serde_arrow/src/arrow_impl/api.rs | 43 +++--- serde_arrow/src/arrow_impl/schema.rs | 29 +++- serde_arrow/src/internal/schema.rs | 143 ++++++++++++-------- serde_arrow/src/lib.rs | 3 +- serde_arrow/src/schema.rs | 4 +- serde_arrow/src/test_end_to_end/issue_90.rs | 15 +- serde_arrow/src/test_impls/json_values.rs | 10 +- serde_arrow/src/test_impls/macros.rs | 8 +- serde_arrow/src/test_impls/union.rs | 6 +- serde_arrow/src/test_impls/wrappers.rs | 23 +--- 15 files changed, 215 insertions(+), 157 deletions(-) diff --git a/Changes.md b/Changes.md index ee076a02..209ee6be 100644 --- a/Changes.md +++ b/Changes.md @@ -11,11 +11,13 @@ Breaking changes: Improvements: -- Simpler and streamlined API +- Simpler and streamlined API (`to_arrow` / `from_arrow` and `to_arrow2` / + `from_arrow2`) +- Add `SchemaLike` trait to support direct construction of arrow / arrow2 fields - Add type based tracing to allow schema tracing without samples - (`SerdeArrowSchema::form_type()`) + (`SchemaLike::form_type()`) - Allow to build schema objects from serializable objects, e.g., - `serde_json::Value` (`SerdeArrow::from_value()`) + `serde_json::Value` (`SchemaLike::from_value()`) - Add support for `arrow=47` and `arrow=48` Deprecations (see the documentation of deprecated items for how to migratie): @@ -26,7 +28,7 @@ Deprecations (see the documentation of deprecated items for how to migratie): - Deprecate `serialize_into_arrays`, `deserialize_from_arrays` methods in favor of `to_arrow` / `to_arrow2` and `from_arrow` / `from_arrow2` - Deprecate `serialize_into_fields` methods in favor of - `SerdeArrowSchema::from_samples` + `SchemaLike::from_samples` - Deprecated single item methods in favor of using the `Items` and `Item` wrappers diff --git a/example/src/main.rs b/example/src/main.rs index 33fc6510..7b629a8c 100644 --- a/example/src/main.rs +++ b/example/src/main.rs @@ -78,7 +78,7 @@ fn main() -> Result<(), PanicOnError> { }, ]; - use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; + use serde_arrow::schema::{SchemaLike, SerdeArrowSchema, TracingOptions}; let fields: Vec = SerdeArrowSchema::from_samples(&examples, TracingOptions::default().guess_dates(true))? diff --git a/serde_arrow/benches/groups/impls.rs b/serde_arrow/benches/groups/impls.rs index 8ed7d37c..6cdb7a83 100644 --- a/serde_arrow/benches/groups/impls.rs +++ b/serde_arrow/benches/groups/impls.rs @@ -12,7 +12,7 @@ macro_rules! define_benchmark { )? ) => { pub fn benchmark_serialize(c: &mut criterion::Criterion) { - use serde_arrow::schema::SerdeArrowSchema; + use serde_arrow::schema::{SerdeArrowSchema, SchemaLike}; for n in [$($n),*] { let mut group = c.benchmark_group(format!("{}_serialize({})", stringify!($name), n)); diff --git a/serde_arrow/src/arrow2_impl/api.rs b/serde_arrow/src/arrow2_impl/api.rs index ed5c4a69..f80c8963 100644 --- a/serde_arrow/src/arrow2_impl/api.rs +++ b/serde_arrow/src/arrow2_impl/api.rs @@ -17,7 +17,7 @@ use crate::{ }, }; -/// Build arrow2 arrays record by record (*requires one of the `arrow2-*` +/// Build arrow2 arrays record by record (*requires one of the `arrow2-*` /// features*) /// /// The given items should be records (e.g., structs). To serialize items @@ -115,8 +115,10 @@ impl Arrow2Builder { /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow2; +/// use arrow2::datatypes::Field; /// use serde::{Serialize, Deserialize}; -/// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Serialize, Deserialize)] /// struct Record { @@ -129,8 +131,7 @@ impl Arrow2Builder { /// // ... /// ]; /// -/// let fields = SerdeArrowSchema::from_type::(TracingOptions::default())?. -/// to_arrow2_fields()?; +/// let fields = Vec::::from_type::(TracingOptions::default())?; /// /// let arrays = serde_arrow::to_arrow2(&fields, &items)?; /// # @@ -164,8 +165,10 @@ where /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow2; +/// use arrow2::datatypes::Field; /// use serde::{Deserialize, Serialize}; -/// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Deserialize, Serialize)] /// struct Record { @@ -173,8 +176,7 @@ where /// b: u64, /// } /// -/// let fields = SerdeArrowSchema::from_type::(TracingOptions::default())? -/// .to_arrow2_fields()?; +/// let fields = Vec::::from_type::(TracingOptions::default())?; /// # let items = &[Record { a: Some(1.0), b: 2}]; /// # let arrays = serde_arrow::to_arrow2(&fields, &items).unwrap(); /// # @@ -222,13 +224,15 @@ where } /// Replaced by -/// [`SerdeArrowSchema::from_samples`][crate::schema::SerdeArrowSchema::from_samples] +/// [`SchemaLike::from_samples`][crate::schema::SchemaLike::from_samples] /// (*[example][serialize_into_fields]*) /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow2; +/// use arrow2::datatypes::Field; /// use serde::Serialize; -/// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Serialize)] /// struct Record { @@ -237,8 +241,7 @@ where /// } /// /// let samples = [Record { a: 1, b: 2.0 }, /* ... */ ]; -/// let fields = SerdeArrowSchema::from_samples(&samples, TracingOptions::default())? -/// .to_arrow2_fields()?; +/// let fields = Vec::::from_samples(&samples, TracingOptions::default())?; /// # /// # drop(fields); /// # Ok(()) @@ -276,19 +279,20 @@ where } /// Replaced by -/// [`SerdeArrowSchema::from_samples`][crate::schema::SerdeArrowSchema::from_samples] -/// and [`Items`][crate::utils::Items] (*[example][serialize_into_field]*) +/// [`SchemaLike::from_samples`][crate::schema::SchemaLike::from_samples] and +/// [`Items`][crate::utils::Items] (*[example][serialize_into_field]*) /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow2; +/// use arrow2::datatypes::Field; /// use serde_arrow::{ -/// schema::{SerdeArrowSchema, TracingOptions}, +/// schema::{SchemaLike, TracingOptions}, /// utils::Items, /// }; /// /// let samples: Vec = vec![1, 2, 3, /* ... */ ]; -/// let fields = SerdeArrowSchema::from_samples(&Items(&samples), TracingOptions::default())? -/// .to_arrow2_fields()?; +/// let fields = Vec::::from_samples(&Items(&samples), TracingOptions::default())?; /// # /// # drop(fields); /// # Ok(()) @@ -310,14 +314,15 @@ where /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow2; +/// use arrow2::datatypes::Field; /// use serde_arrow::{ -/// schema::{SerdeArrowSchema, TracingOptions}, +/// schema::{SchemaLike, TracingOptions}, /// utils::Items, /// }; /// /// let samples: Vec = vec![1, 2, 3, /* ... */ ]; -/// let fields = SerdeArrowSchema::from_samples(&Items(&samples), TracingOptions::default())? -/// .to_arrow2_fields()?; +/// let fields = Vec::::from_samples(&Items(&samples), TracingOptions::default())?; /// /// let arrays = serde_arrow::to_arrow2(&fields, &Items(&samples))?; /// # @@ -346,7 +351,7 @@ where /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { -/// # use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// # use serde_arrow::schema::{SerdeArrowSchema, SchemaLike, TracingOptions}; /// # let samples: Vec = vec![1, 2, 3, /* ... */ ]; /// # let fields = SerdeArrowSchema::from_samples(&Items(&samples), TracingOptions::default())? /// # .to_arrow2_fields()?; diff --git a/serde_arrow/src/arrow2_impl/schema.rs b/serde_arrow/src/arrow2_impl/schema.rs index 8b53124f..da360c05 100644 --- a/serde_arrow/src/arrow2_impl/schema.rs +++ b/serde_arrow/src/arrow2_impl/schema.rs @@ -3,8 +3,8 @@ use crate::{ internal::{ error::{error, fail, Error, Result}, schema::{ - GenericDataType, GenericField, GenericTimeUnit, SerdeArrowSchema, Strategy, - STRATEGY_KEY, + GenericDataType, GenericField, GenericTimeUnit, SchemaLike, Sealed, SerdeArrowSchema, + Strategy, STRATEGY_KEY, }, }, }; @@ -26,7 +26,7 @@ impl SerdeArrowSchema { /// /// ```rust /// # fn main() -> serde_arrow::_impl::PanicOnError<()> { - /// # use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; + /// # use serde_arrow::schema::{SerdeArrowSchema, SchemaLike, TracingOptions}; /// # #[derive(serde::Deserialize)] /// # struct Item { a: u32 } /// # let schema = SerdeArrowSchema::from_type::(TracingOptions::default()).unwrap(); @@ -55,6 +55,29 @@ impl TryFrom for Vec { } } +impl Sealed for Vec {} + +/// Schema support for `Vec` (*requires one of the +/// `arrow2-*` features*) +impl SchemaLike for Vec { + fn from_value(value: &T) -> Result { + SerdeArrowSchema::from_value(value)?.to_arrow2_fields() + } + + fn from_type<'de, T: serde::Deserialize<'de>>( + options: crate::schema::TracingOptions, + ) -> Result { + SerdeArrowSchema::from_type::(options)?.to_arrow2_fields() + } + + fn from_samples( + samples: &T, + options: crate::schema::TracingOptions, + ) -> Result { + SerdeArrowSchema::from_samples(samples, options)?.to_arrow2_fields() + } +} + impl TryFrom<&Field> for GenericField { type Error = Error; diff --git a/serde_arrow/src/arrow_impl/api.rs b/serde_arrow/src/arrow_impl/api.rs index d9f2036b..50a06e1f 100644 --- a/serde_arrow/src/arrow_impl/api.rs +++ b/serde_arrow/src/arrow_impl/api.rs @@ -117,8 +117,10 @@ impl ArrowBuilder { /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow; +/// use arrow::datatypes::Field; /// use serde::{Serialize, Deserialize}; -/// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Serialize, Deserialize)] /// struct Record { @@ -131,8 +133,7 @@ impl ArrowBuilder { /// // ... /// ]; /// -/// let fields = SerdeArrowSchema::from_type::(TracingOptions::default())? -/// .to_arrow_fields()?; +/// let fields = Vec::::from_type::(TracingOptions::default())?; /// let arrays = serde_arrow::to_arrow(&fields, &items)?; /// # /// # assert_eq!(arrays.len(), 2); @@ -161,8 +162,10 @@ pub fn to_arrow(fields: &[Field], items: &T) -> Result serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow; +/// use arrow::datatypes::Field; /// use serde::{Deserialize, Serialize}; -/// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Deserialize, Serialize)] /// struct Record { @@ -171,8 +174,7 @@ pub fn to_arrow(fields: &[Field], items: &T) -> Result(TracingOptions::default())? -/// .to_arrow_fields()?; +/// let fields = Vec::::from_type::(TracingOptions::default())?; /// # let items = &[Record { a: Some(1.0), b: 2}]; /// # let arrays = serde_arrow::to_arrow(&fields, &items).unwrap(); /// # @@ -220,13 +222,15 @@ where } /// Replaced by -/// [`SerdeArrowSchema::from_samples`][crate::schema::SerdeArrowSchema::from_samples] +/// [`SchemaLike::from_samples`][crate::schema::SchemaLike::from_samples] /// (*[example][serialize_into_fields]*) /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow; +/// use arrow::datatypes::Field; /// use serde::Serialize; -/// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Serialize)] /// struct Record { @@ -235,8 +239,7 @@ where /// } /// /// let samples = [Record { a: 1, b: 2.0 }, /* ... */ ]; -/// let fields = SerdeArrowSchema::from_samples(&samples, TracingOptions::default())? -/// .to_arrow_fields()?; +/// let fields = Vec::::from_samples(&samples, TracingOptions::default())?; /// # /// # drop(fields); /// # Ok(()) @@ -255,19 +258,20 @@ where } /// Replaced by -/// [`SerdeArrowSchema::from_samples`][crate::schema::SerdeArrowSchema::from_samples] -/// and [`Items`][crate::utils::Items] (*[example][serialize_into_field]*) +/// [`SchemaLike::from_samples`][crate::schema::SchemaLike::from_samples] and +/// [`Items`][crate::utils::Items] (*[example][serialize_into_field]*) /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow; +/// use arrow::datatypes::Field; /// use serde_arrow::{ -/// schema::{SerdeArrowSchema, TracingOptions}, +/// schema::{SchemaLike, TracingOptions}, /// utils::Items, /// }; /// /// let samples: Vec = vec![1, 2, 3, /* ... */ ]; -/// let fields = SerdeArrowSchema::from_samples(&Items(&samples), TracingOptions::default())? -/// .to_arrow_fields()?; +/// let fields = Vec::::from_samples(&Items(&samples), TracingOptions::default())?; /// # /// # drop(fields); /// # Ok(()) @@ -309,14 +313,15 @@ where /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow; +/// use arrow::datatypes::Field; /// use serde_arrow::{ -/// schema::{SerdeArrowSchema, TracingOptions}, +/// schema::{SchemaLike, TracingOptions}, /// utils::Items, /// }; /// /// let samples: Vec = vec![1, 2, 3, /* ... */ ]; -/// let fields = SerdeArrowSchema::from_samples(&Items(&samples), TracingOptions::default())? -/// .to_arrow_fields()?; +/// let fields = Vec::::from_samples(&Items(&samples), TracingOptions::default())?; /// /// let arrays = serde_arrow::to_arrow(&fields, &Items(&samples))?; /// # @@ -345,7 +350,7 @@ where /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { -/// # use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// # use serde_arrow::schema::{SerdeArrowSchema, SchemaLike, TracingOptions}; /// # let samples: Vec = vec![1, 2, 3, /* ... */ ]; /// # let fields = SerdeArrowSchema::from_samples(&Items(&samples), TracingOptions::default())? /// # .to_arrow_fields()?; diff --git a/serde_arrow/src/arrow_impl/schema.rs b/serde_arrow/src/arrow_impl/schema.rs index 63fd5612..03c0f6e5 100644 --- a/serde_arrow/src/arrow_impl/schema.rs +++ b/serde_arrow/src/arrow_impl/schema.rs @@ -4,8 +4,8 @@ use crate::{ internal::{ error::{error, fail, Error, Result}, schema::{ - GenericDataType, GenericField, GenericTimeUnit, SerdeArrowSchema, Strategy, - STRATEGY_KEY, + GenericDataType, GenericField, GenericTimeUnit, SchemaLike, Sealed, SerdeArrowSchema, + Strategy, STRATEGY_KEY, }, }, }; @@ -27,7 +27,7 @@ impl SerdeArrowSchema { /// /// ```rust /// # fn main() -> serde_arrow::_impl::PanicOnError<()> { - /// # use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; + /// # use serde_arrow::schema::{SerdeArrowSchema, SchemaLike, TracingOptions}; /// # #[derive(serde::Deserialize)] /// # struct Item { a: u32 } /// # let schema = SerdeArrowSchema::from_type::(TracingOptions::default()).unwrap(); @@ -56,6 +56,29 @@ impl TryFrom for Vec { } } +impl Sealed for Vec {} + +/// Schema support for `Vec` (*requires one of the +/// `arrow-*` features*) +impl SchemaLike for Vec { + fn from_value(value: &T) -> Result { + SerdeArrowSchema::from_value(value)?.to_arrow_fields() + } + + fn from_type<'de, T: serde::Deserialize<'de>>( + options: crate::schema::TracingOptions, + ) -> Result { + SerdeArrowSchema::from_type::(options)?.to_arrow_fields() + } + + fn from_samples( + samples: &T, + options: crate::schema::TracingOptions, + ) -> Result { + SerdeArrowSchema::from_samples(samples, options)?.to_arrow_fields() + } +} + impl TryFrom<&DataType> for GenericDataType { type Error = Error; diff --git a/serde_arrow/src/internal/schema.rs b/serde_arrow/src/internal/schema.rs index 2370bf0f..dee52301 100644 --- a/serde_arrow/src/internal/schema.rs +++ b/serde_arrow/src/internal/schema.rs @@ -16,58 +16,47 @@ use serde::{Deserialize, Serialize}; /// pub const STRATEGY_KEY: &str = "SERDE_ARROW:strategy"; -/// A collection of fields as understood by `serde_arrow` +pub trait Sealed {} + +/// A sealed trait to add support for constructing schema-like objects /// /// There are three main ways to specify the schema: /// -/// 1. [`SerdeArrowSchema::from_value`]: specify the schema manually, e.g., as a -/// JSON value -/// 2. [`SerdeArrowSchema::from_type`]: determine the schema from the record -/// type -/// 3. [`SerdeArrowSchema::from_samples`]: Determine the schema from samples of -/// the data +/// 1. [`SchemaLike::from_value`]: specify the schema manually, e.g., as a JSON +/// value +/// 2. [`SchemaLike::from_type`]: determine the schema from the record type +/// 3. [`SchemaLike::from_samples`]: Determine the schema from samples of the +/// data /// -#[derive(Default, Debug, PartialEq, Clone, Serialize, Deserialize)] -#[serde(from = "SchemaSerializationOptions")] -pub struct SerdeArrowSchema { - pub(crate) fields: Vec, -} - -#[derive(Deserialize)] -#[serde(untagged)] -enum SchemaSerializationOptions { - FieldsOnly(Vec), - FullSchema { fields: Vec }, -} - -impl From for SerdeArrowSchema { - fn from(value: SchemaSerializationOptions) -> Self { - use SchemaSerializationOptions::*; - match value { - FieldsOnly(fields) | FullSchema { fields } => Self { fields }, - } - } -} - -impl SerdeArrowSchema { - /// Return a new schema (empty) instance - pub fn new() -> Self { - Self::default() - } - +/// The following types implement [`SchemaLike`] and can be constructed in this +/// way: +/// +/// - [`SerdeArrowSchema`] +#[cfg_attr( + has_arrow, + doc = "- `Vec<`[`arrow::datatypes::Field`][crate::_impl::arrow::datatypes::Field]`>" +)] +#[cfg_attr( + has_arrow2, + doc = "- `Vec<`[`arrow2::datatypes::Field`][crate::_impl::arrow2::datatypes::Field]`>`" +)] +/// +pub trait SchemaLike: Sized + Sealed { /// Build the schema from an object that implements serialize (e.g., /// `serde_json::Value`) /// /// ```rust /// # fn main() -> serde_arrow::_impl::PanicOnError<()> { - /// use serde_arrow::schema::SerdeArrowSchema; + /// # use serde_arrow::_impl::arrow; + /// use arrow::datatypes::Field; + /// use serde_arrow::schema::SchemaLike; /// /// let schema = serde_json::json!([ /// {"name": "foo", "data_type": "U8"}, /// {"name": "bar", "data_type": "Utf8"}, /// ]); /// - /// let schema = SerdeArrowSchema::from_value(&schema)?; + /// let fields = Vec::::from_value(&schema)?; /// # Ok(()) /// # } /// ``` @@ -119,13 +108,7 @@ impl SerdeArrowSchema { /// fields, named `"key"` of integer type and named `"value"` of string /// type /// - pub fn from_value(value: &T) -> Result { - // simple version of serde-transcode - let mut events = Vec::::new(); - crate::internal::sink::serialize_into_sink(&mut events, value)?; - let this: Self = crate::internal::source::deserialize_from_source(&events)?; - Ok(this) - } + fn from_value(value: &T) -> Result; /// Determine the schema from the given record type /// @@ -143,9 +126,9 @@ impl SerdeArrowSchema { /// ```rust /// # fn main() -> serde_arrow::_impl::PanicOnError<()> { /// # use serde_arrow::_impl::arrow; - /// use arrow::datatypes::DataType; + /// use arrow::datatypes::{DataType, Field}; /// use serde::Deserialize; - /// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; + /// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Deserialize)] /// struct Record { @@ -154,8 +137,7 @@ impl SerdeArrowSchema { /// string: String, /// } /// - /// let schema = SerdeArrowSchema::from_type::(TracingOptions::default())?; - /// let fields = schema.to_arrow_fields()?; + /// let fields = Vec::::from_type::(TracingOptions::default())?; /// /// assert_eq!(*fields[0].data_type(), DataType::Int32); /// assert_eq!(*fields[1].data_type(), DataType::Float64); @@ -164,11 +146,7 @@ impl SerdeArrowSchema { /// # } /// ``` /// - pub fn from_type<'de, T: Deserialize<'de>>(options: TracingOptions) -> Result { - let mut tracer = Tracer::new(String::from("$"), options); - tracer.trace_type::()?; - tracer.to_schema() - } + fn from_type<'de, T: Deserialize<'de>>(options: TracingOptions) -> Result; /// Determine the schema from the given samples /// @@ -190,9 +168,9 @@ impl SerdeArrowSchema { /// ```rust /// # fn main() -> serde_arrow::_impl::PanicOnError<()> { /// # use serde_arrow::_impl::arrow; - /// use arrow::datatypes::DataType; + /// use arrow::datatypes::{DataType, Field}; /// use serde::Serialize; - /// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; + /// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Serialize)] /// struct Record { @@ -215,8 +193,7 @@ impl SerdeArrowSchema { /// // ... /// ]; /// - /// let schema = SerdeArrowSchema::from_samples(&samples, TracingOptions::default())?; - /// let fields = schema.to_arrow_fields()?; + /// let fields = Vec::::from_samples(&samples, TracingOptions::default())?; /// /// assert_eq!(*fields[0].data_type(), DataType::Int32); /// assert_eq!(*fields[1].data_type(), DataType::Float64); @@ -225,7 +202,57 @@ impl SerdeArrowSchema { /// # } /// ``` /// - pub fn from_samples(samples: &T, options: TracingOptions) -> Result { + fn from_samples(samples: &T, options: TracingOptions) -> Result; +} + +/// A collection of fields as understood by `serde_arrow` +#[derive(Default, Debug, PartialEq, Clone, Serialize, Deserialize)] +#[serde(from = "SchemaSerializationOptions")] +pub struct SerdeArrowSchema { + pub(crate) fields: Vec, +} + +#[derive(Deserialize)] +#[serde(untagged)] +enum SchemaSerializationOptions { + FieldsOnly(Vec), + FullSchema { fields: Vec }, +} + +impl From for SerdeArrowSchema { + fn from(value: SchemaSerializationOptions) -> Self { + use SchemaSerializationOptions::*; + match value { + FieldsOnly(fields) | FullSchema { fields } => Self { fields }, + } + } +} + +impl SerdeArrowSchema { + /// Return a new schema without any fields + pub fn new() -> Self { + Self::default() + } +} + +impl Sealed for SerdeArrowSchema {} + +impl SchemaLike for SerdeArrowSchema { + fn from_value(value: &T) -> Result { + // simple version of serde-transcode + let mut events = Vec::::new(); + crate::internal::sink::serialize_into_sink(&mut events, value)?; + let this: Self = crate::internal::source::deserialize_from_source(&events)?; + Ok(this) + } + + fn from_type<'de, T: Deserialize<'de>>(options: TracingOptions) -> Result { + let mut tracer = Tracer::new(String::from("$"), options); + tracer.trace_type::()?; + tracer.to_schema() + } + + fn from_samples(samples: &T, options: TracingOptions) -> Result { let mut tracer = Tracer::new(String::from("$"), options); tracer.trace_samples(samples)?; tracer.to_schema() diff --git a/serde_arrow/src/lib.rs b/serde_arrow/src/lib.rs index 2786ace3..e79d6107 100644 --- a/serde_arrow/src/lib.rs +++ b/serde_arrow/src/lib.rs @@ -20,8 +20,7 @@ //! schema should contain a `Date64`. `serde_arrow` supports to derive the //! schema from the data itself via schema tracing, but does not require it. It //! is always possible to specify the schema manually. See the [`schema` -//! module][schema] and [`SerdeArrowSchema`][schema::SerdeArrowSchema] for -//! further details. +//! module][schema] and [`SchemaLike`][schema::SchemaLike] for further details. //! //! ## Overview //! diff --git a/serde_arrow/src/schema.rs b/serde_arrow/src/schema.rs index 1aaaadd9..d39d602d 100644 --- a/serde_arrow/src/schema.rs +++ b/serde_arrow/src/schema.rs @@ -2,7 +2,7 @@ //! //! To convert between Rust objects and Arrow types, `serde_arrows` requires //! schema information as a list of Arrow fields with additional meta data. See -//! [`SerdeArrowSchema`] for details on how to specify the schema. +//! [`SchemaLike`] for details on how to specify the schema. //! //! The default mapping of Rust types to [Arrow types][arrow-types] is as follows: //! @@ -38,7 +38,7 @@ //! # fn main() {} //! ``` pub use crate::internal::{ - schema::{SerdeArrowSchema, Strategy, STRATEGY_KEY}, + schema::{SchemaLike, SerdeArrowSchema, Strategy, STRATEGY_KEY}, tracing::TracingOptions, }; diff --git a/serde_arrow/src/test_end_to_end/issue_90.rs b/serde_arrow/src/test_end_to_end/issue_90.rs index 97dc7908..58ce1623 100644 --- a/serde_arrow/src/test_end_to_end/issue_90.rs +++ b/serde_arrow/src/test_end_to_end/issue_90.rs @@ -3,7 +3,11 @@ use std::sync::Arc; use serde::{Deserialize, Serialize}; -use crate::{self as serde_arrow, internal::error::PanicOnError, schema::TracingOptions}; +use crate::{ + self as serde_arrow, + internal::error::PanicOnError, + schema::{SchemaLike, TracingOptions}, +}; use crate::_impl::arrow::{ _raw::{array::RecordBatch, schema::Schema}, @@ -39,10 +43,7 @@ fn example() -> PanicOnError<()> { VectorMetric { distribution: None }, ]; - use serde_arrow::schema::SerdeArrowSchema; - - let fields: Vec = - SerdeArrowSchema::from_type::(TracingOptions::default())?.try_into()?; + let fields = Vec::::from_type::(TracingOptions::default())?; let arrays = serde_arrow::to_arrow(&fields, &metrics)?; let batch = RecordBatch::try_new(Arc::new(Schema::new(fields.clone())), arrays.clone())?; @@ -56,10 +57,8 @@ fn example() -> PanicOnError<()> { #[test] fn example_top_level_none() -> PanicOnError<()> { - use serde_arrow::schema::SerdeArrowSchema; - // top-level options are not supported if fields are are extracted - let res = SerdeArrowSchema::from_type::>(TracingOptions::default()); + let res = Vec::::from_type::>(TracingOptions::default()); assert!(res.is_err()); Ok(()) } diff --git a/serde_arrow/src/test_impls/json_values.rs b/serde_arrow/src/test_impls/json_values.rs index bbad1b2f..41ff599e 100644 --- a/serde_arrow/src/test_impls/json_values.rs +++ b/serde_arrow/src/test_impls/json_values.rs @@ -7,10 +7,7 @@ test_generic!( let tracing_options = TracingOptions::default(); let items = vec![json!({ "a": 1, "b": 2 }), json!({ "a": 3, "b": 4 })]; - let fields: Vec = SerdeArrowSchema::from_samples(&items, tracing_options) - .unwrap() - .try_into() - .unwrap(); + let fields = Vec::::from_samples(&items, tracing_options).unwrap(); let arrays = to_arrow(&fields, &items).unwrap(); drop(arrays); @@ -24,10 +21,7 @@ test_generic!( let tracing_options = TracingOptions::default().coerce_numbers(true); let items = vec![json!({ "a": 1, "b": -2 }), json!({ "a": 3.0, "b": 4 })]; - let fields: Vec = SerdeArrowSchema::from_samples(&items, tracing_options) - .unwrap() - .try_into() - .unwrap(); + let fields = Vec::::from_samples(&items, tracing_options).unwrap(); let arrays = to_arrow(&fields, &items).unwrap(); drop(arrays); diff --git a/serde_arrow/src/test_impls/macros.rs b/serde_arrow/src/test_impls/macros.rs index 5621d751..e1c8438c 100644 --- a/serde_arrow/src/test_impls/macros.rs +++ b/serde_arrow/src/test_impls/macros.rs @@ -49,10 +49,10 @@ macro_rules! test_example_impl { use super::*; use crate::{ - schema::{SerdeArrowSchema, TracingOptions, Strategy}, + schema::{TracingOptions, Strategy}, utils::{Items, Item}, }; - use crate::internal::schema::{GenericDataType, GenericField, GenericTimeUnit}; + use crate::internal::schema::{GenericDataType, GenericField, GenericTimeUnit, SchemaLike}; use crate::test_impls::macros::{btree_map, hash_map}; $(#[ignore = $ignore])? @@ -69,7 +69,7 @@ macro_rules! test_example_impl { println!("{options:?}"); - let actual: Vec = SerdeArrowSchema::from_samples(&Items(items), options).unwrap().try_into().unwrap(); + let actual = Vec::::from_samples(&Items(items), options).unwrap(); let expected: Vec = vec![(&field).try_into().unwrap()]; assert_eq!( actual, @@ -413,7 +413,7 @@ macro_rules! test_generic { #[allow(unused)] mod $name { use crate::{ - schema::{SerdeArrowSchema, TracingOptions}, + schema::{SchemaLike, TracingOptions}, utils::{Items, Item} }; use crate::internal::schema::{GenericField, GenericDataType}; diff --git a/serde_arrow/src/test_impls/union.rs b/serde_arrow/src/test_impls/union.rs index 7dd51850..641d264d 100644 --- a/serde_arrow/src/test_impls/union.rs +++ b/serde_arrow/src/test_impls/union.rs @@ -242,11 +242,7 @@ test_generic!( } let tracing_options = TracingOptions::default().allow_null_fields(true); - let fields: Vec = - SerdeArrowSchema::from_samples(&Items(&[U::A, U::C]), tracing_options) - .unwrap() - .try_into() - .unwrap(); + let fields = Vec::::from_samples(&Items(&[U::A, U::C]), tracing_options).unwrap(); // NOTE: variant B was never encountered during tracing let res = to_arrow(&fields, &Items(&[U::A, U::B, U::C])); diff --git a/serde_arrow/src/test_impls/wrappers.rs b/serde_arrow/src/test_impls/wrappers.rs index d06ed2a1..acd1fdca 100644 --- a/serde_arrow/src/test_impls/wrappers.rs +++ b/serde_arrow/src/test_impls/wrappers.rs @@ -16,11 +16,7 @@ use super::macros::test_generic; test_generic!( fn outer_vec() { let items: Vec = vec![0_u32, 1_u32, 2_u32]; - let fields: Vec = - SerdeArrowSchema::from_samples(&Items(&items), TracingOptions::default()) - .unwrap() - .try_into() - .unwrap(); + let fields = Vec::::from_samples(&Items(&items), TracingOptions::default()).unwrap(); let arrays = to_arrow(&fields, &Items(&items)).unwrap(); drop(arrays); @@ -30,11 +26,7 @@ test_generic!( test_generic!( fn outer_slice() { let items: &[u32] = &[0_u32, 1_u32, 2_u32]; - let fields: Vec = - SerdeArrowSchema::from_samples(&Items(items), TracingOptions::default()) - .unwrap() - .try_into() - .unwrap(); + let fields = Vec::::from_samples(&Items(items), TracingOptions::default()).unwrap(); let arrays = to_arrow(&fields, &Items(items)).unwrap(); drop(arrays); @@ -44,11 +36,7 @@ test_generic!( test_generic!( fn outer_array() { let items: &[u32; 3] = &[0_u32, 1_u32, 2_u32]; - let fields: Vec = - SerdeArrowSchema::from_samples(&Items(items), TracingOptions::default()) - .unwrap() - .try_into() - .unwrap(); + let fields = Vec::::from_samples(&Items(items), TracingOptions::default()).unwrap(); let arrays = to_arrow(&fields, &Items(items)).unwrap(); drop(arrays); @@ -68,10 +56,7 @@ test_generic!( Item { item: 1_u32 }, Item { item: 2_u32 }, ); - let fields: Vec = SerdeArrowSchema::from_samples(items, TracingOptions::default()) - .unwrap() - .try_into() - .unwrap(); + let fields = Vec::::from_samples(items, TracingOptions::default()).unwrap(); let arrays = to_arrow(&fields, &items).unwrap(); drop(arrays);