From 91973de72159957997fd660139e5f58611174552 Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Thu, 2 Nov 2023 18:25:12 +0100 Subject: [PATCH 1/7] Add a trait `SchemaLike` for schema construction --- Changes.md | 10 +- example/src/main.rs | 2 +- serde_arrow/benches/groups/impls.rs | 2 +- serde_arrow/src/arrow2_impl/api.rs | 45 +++--- serde_arrow/src/arrow2_impl/schema.rs | 29 +++- serde_arrow/src/arrow_impl/api.rs | 43 +++--- serde_arrow/src/arrow_impl/schema.rs | 29 +++- serde_arrow/src/internal/schema.rs | 143 ++++++++++++-------- serde_arrow/src/lib.rs | 3 +- serde_arrow/src/schema.rs | 4 +- serde_arrow/src/test_end_to_end/issue_90.rs | 15 +- serde_arrow/src/test_impls/json_values.rs | 10 +- serde_arrow/src/test_impls/macros.rs | 8 +- serde_arrow/src/test_impls/union.rs | 6 +- serde_arrow/src/test_impls/wrappers.rs | 23 +--- 15 files changed, 215 insertions(+), 157 deletions(-) diff --git a/Changes.md b/Changes.md index ee076a02..209ee6be 100644 --- a/Changes.md +++ b/Changes.md @@ -11,11 +11,13 @@ Breaking changes: Improvements: -- Simpler and streamlined API +- Simpler and streamlined API (`to_arrow` / `from_arrow` and `to_arrow2` / + `from_arrow2`) +- Add `SchemaLike` trait to support direct construction of arrow / arrow2 fields - Add type based tracing to allow schema tracing without samples - (`SerdeArrowSchema::form_type()`) + (`SchemaLike::form_type()`) - Allow to build schema objects from serializable objects, e.g., - `serde_json::Value` (`SerdeArrow::from_value()`) + `serde_json::Value` (`SchemaLike::from_value()`) - Add support for `arrow=47` and `arrow=48` Deprecations (see the documentation of deprecated items for how to migratie): @@ -26,7 +28,7 @@ Deprecations (see the documentation of deprecated items for how to migratie): - Deprecate `serialize_into_arrays`, `deserialize_from_arrays` methods in favor of `to_arrow` / `to_arrow2` and `from_arrow` / `from_arrow2` - Deprecate `serialize_into_fields` methods in favor of - `SerdeArrowSchema::from_samples` + `SchemaLike::from_samples` - Deprecated single item methods in favor of using the `Items` and `Item` wrappers diff --git a/example/src/main.rs b/example/src/main.rs index 33fc6510..7b629a8c 100644 --- a/example/src/main.rs +++ b/example/src/main.rs @@ -78,7 +78,7 @@ fn main() -> Result<(), PanicOnError> { }, ]; - use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; + use serde_arrow::schema::{SchemaLike, SerdeArrowSchema, TracingOptions}; let fields: Vec = SerdeArrowSchema::from_samples(&examples, TracingOptions::default().guess_dates(true))? diff --git a/serde_arrow/benches/groups/impls.rs b/serde_arrow/benches/groups/impls.rs index 8ed7d37c..6cdb7a83 100644 --- a/serde_arrow/benches/groups/impls.rs +++ b/serde_arrow/benches/groups/impls.rs @@ -12,7 +12,7 @@ macro_rules! define_benchmark { )? ) => { pub fn benchmark_serialize(c: &mut criterion::Criterion) { - use serde_arrow::schema::SerdeArrowSchema; + use serde_arrow::schema::{SerdeArrowSchema, SchemaLike}; for n in [$($n),*] { let mut group = c.benchmark_group(format!("{}_serialize({})", stringify!($name), n)); diff --git a/serde_arrow/src/arrow2_impl/api.rs b/serde_arrow/src/arrow2_impl/api.rs index ed5c4a69..f80c8963 100644 --- a/serde_arrow/src/arrow2_impl/api.rs +++ b/serde_arrow/src/arrow2_impl/api.rs @@ -17,7 +17,7 @@ use crate::{ }, }; -/// Build arrow2 arrays record by record (*requires one of the `arrow2-*` +/// Build arrow2 arrays record by record (*requires one of the `arrow2-*` /// features*) /// /// The given items should be records (e.g., structs). To serialize items @@ -115,8 +115,10 @@ impl Arrow2Builder { /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow2; +/// use arrow2::datatypes::Field; /// use serde::{Serialize, Deserialize}; -/// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Serialize, Deserialize)] /// struct Record { @@ -129,8 +131,7 @@ impl Arrow2Builder { /// // ... /// ]; /// -/// let fields = SerdeArrowSchema::from_type::(TracingOptions::default())?. -/// to_arrow2_fields()?; +/// let fields = Vec::::from_type::(TracingOptions::default())?; /// /// let arrays = serde_arrow::to_arrow2(&fields, &items)?; /// # @@ -164,8 +165,10 @@ where /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow2; +/// use arrow2::datatypes::Field; /// use serde::{Deserialize, Serialize}; -/// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Deserialize, Serialize)] /// struct Record { @@ -173,8 +176,7 @@ where /// b: u64, /// } /// -/// let fields = SerdeArrowSchema::from_type::(TracingOptions::default())? -/// .to_arrow2_fields()?; +/// let fields = Vec::::from_type::(TracingOptions::default())?; /// # let items = &[Record { a: Some(1.0), b: 2}]; /// # let arrays = serde_arrow::to_arrow2(&fields, &items).unwrap(); /// # @@ -222,13 +224,15 @@ where } /// Replaced by -/// [`SerdeArrowSchema::from_samples`][crate::schema::SerdeArrowSchema::from_samples] +/// [`SchemaLike::from_samples`][crate::schema::SchemaLike::from_samples] /// (*[example][serialize_into_fields]*) /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow2; +/// use arrow2::datatypes::Field; /// use serde::Serialize; -/// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Serialize)] /// struct Record { @@ -237,8 +241,7 @@ where /// } /// /// let samples = [Record { a: 1, b: 2.0 }, /* ... */ ]; -/// let fields = SerdeArrowSchema::from_samples(&samples, TracingOptions::default())? -/// .to_arrow2_fields()?; +/// let fields = Vec::::from_samples(&samples, TracingOptions::default())?; /// # /// # drop(fields); /// # Ok(()) @@ -276,19 +279,20 @@ where } /// Replaced by -/// [`SerdeArrowSchema::from_samples`][crate::schema::SerdeArrowSchema::from_samples] -/// and [`Items`][crate::utils::Items] (*[example][serialize_into_field]*) +/// [`SchemaLike::from_samples`][crate::schema::SchemaLike::from_samples] and +/// [`Items`][crate::utils::Items] (*[example][serialize_into_field]*) /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow2; +/// use arrow2::datatypes::Field; /// use serde_arrow::{ -/// schema::{SerdeArrowSchema, TracingOptions}, +/// schema::{SchemaLike, TracingOptions}, /// utils::Items, /// }; /// /// let samples: Vec = vec![1, 2, 3, /* ... */ ]; -/// let fields = SerdeArrowSchema::from_samples(&Items(&samples), TracingOptions::default())? -/// .to_arrow2_fields()?; +/// let fields = Vec::::from_samples(&Items(&samples), TracingOptions::default())?; /// # /// # drop(fields); /// # Ok(()) @@ -310,14 +314,15 @@ where /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow2; +/// use arrow2::datatypes::Field; /// use serde_arrow::{ -/// schema::{SerdeArrowSchema, TracingOptions}, +/// schema::{SchemaLike, TracingOptions}, /// utils::Items, /// }; /// /// let samples: Vec = vec![1, 2, 3, /* ... */ ]; -/// let fields = SerdeArrowSchema::from_samples(&Items(&samples), TracingOptions::default())? -/// .to_arrow2_fields()?; +/// let fields = Vec::::from_samples(&Items(&samples), TracingOptions::default())?; /// /// let arrays = serde_arrow::to_arrow2(&fields, &Items(&samples))?; /// # @@ -346,7 +351,7 @@ where /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { -/// # use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// # use serde_arrow::schema::{SerdeArrowSchema, SchemaLike, TracingOptions}; /// # let samples: Vec = vec![1, 2, 3, /* ... */ ]; /// # let fields = SerdeArrowSchema::from_samples(&Items(&samples), TracingOptions::default())? /// # .to_arrow2_fields()?; diff --git a/serde_arrow/src/arrow2_impl/schema.rs b/serde_arrow/src/arrow2_impl/schema.rs index 8b53124f..da360c05 100644 --- a/serde_arrow/src/arrow2_impl/schema.rs +++ b/serde_arrow/src/arrow2_impl/schema.rs @@ -3,8 +3,8 @@ use crate::{ internal::{ error::{error, fail, Error, Result}, schema::{ - GenericDataType, GenericField, GenericTimeUnit, SerdeArrowSchema, Strategy, - STRATEGY_KEY, + GenericDataType, GenericField, GenericTimeUnit, SchemaLike, Sealed, SerdeArrowSchema, + Strategy, STRATEGY_KEY, }, }, }; @@ -26,7 +26,7 @@ impl SerdeArrowSchema { /// /// ```rust /// # fn main() -> serde_arrow::_impl::PanicOnError<()> { - /// # use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; + /// # use serde_arrow::schema::{SerdeArrowSchema, SchemaLike, TracingOptions}; /// # #[derive(serde::Deserialize)] /// # struct Item { a: u32 } /// # let schema = SerdeArrowSchema::from_type::(TracingOptions::default()).unwrap(); @@ -55,6 +55,29 @@ impl TryFrom for Vec { } } +impl Sealed for Vec {} + +/// Schema support for `Vec` (*requires one of the +/// `arrow2-*` features*) +impl SchemaLike for Vec { + fn from_value(value: &T) -> Result { + SerdeArrowSchema::from_value(value)?.to_arrow2_fields() + } + + fn from_type<'de, T: serde::Deserialize<'de>>( + options: crate::schema::TracingOptions, + ) -> Result { + SerdeArrowSchema::from_type::(options)?.to_arrow2_fields() + } + + fn from_samples( + samples: &T, + options: crate::schema::TracingOptions, + ) -> Result { + SerdeArrowSchema::from_samples(samples, options)?.to_arrow2_fields() + } +} + impl TryFrom<&Field> for GenericField { type Error = Error; diff --git a/serde_arrow/src/arrow_impl/api.rs b/serde_arrow/src/arrow_impl/api.rs index d9f2036b..50a06e1f 100644 --- a/serde_arrow/src/arrow_impl/api.rs +++ b/serde_arrow/src/arrow_impl/api.rs @@ -117,8 +117,10 @@ impl ArrowBuilder { /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow; +/// use arrow::datatypes::Field; /// use serde::{Serialize, Deserialize}; -/// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Serialize, Deserialize)] /// struct Record { @@ -131,8 +133,7 @@ impl ArrowBuilder { /// // ... /// ]; /// -/// let fields = SerdeArrowSchema::from_type::(TracingOptions::default())? -/// .to_arrow_fields()?; +/// let fields = Vec::::from_type::(TracingOptions::default())?; /// let arrays = serde_arrow::to_arrow(&fields, &items)?; /// # /// # assert_eq!(arrays.len(), 2); @@ -161,8 +162,10 @@ pub fn to_arrow(fields: &[Field], items: &T) -> Result serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow; +/// use arrow::datatypes::Field; /// use serde::{Deserialize, Serialize}; -/// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Deserialize, Serialize)] /// struct Record { @@ -171,8 +174,7 @@ pub fn to_arrow(fields: &[Field], items: &T) -> Result(TracingOptions::default())? -/// .to_arrow_fields()?; +/// let fields = Vec::::from_type::(TracingOptions::default())?; /// # let items = &[Record { a: Some(1.0), b: 2}]; /// # let arrays = serde_arrow::to_arrow(&fields, &items).unwrap(); /// # @@ -220,13 +222,15 @@ where } /// Replaced by -/// [`SerdeArrowSchema::from_samples`][crate::schema::SerdeArrowSchema::from_samples] +/// [`SchemaLike::from_samples`][crate::schema::SchemaLike::from_samples] /// (*[example][serialize_into_fields]*) /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow; +/// use arrow::datatypes::Field; /// use serde::Serialize; -/// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Serialize)] /// struct Record { @@ -235,8 +239,7 @@ where /// } /// /// let samples = [Record { a: 1, b: 2.0 }, /* ... */ ]; -/// let fields = SerdeArrowSchema::from_samples(&samples, TracingOptions::default())? -/// .to_arrow_fields()?; +/// let fields = Vec::::from_samples(&samples, TracingOptions::default())?; /// # /// # drop(fields); /// # Ok(()) @@ -255,19 +258,20 @@ where } /// Replaced by -/// [`SerdeArrowSchema::from_samples`][crate::schema::SerdeArrowSchema::from_samples] -/// and [`Items`][crate::utils::Items] (*[example][serialize_into_field]*) +/// [`SchemaLike::from_samples`][crate::schema::SchemaLike::from_samples] and +/// [`Items`][crate::utils::Items] (*[example][serialize_into_field]*) /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow; +/// use arrow::datatypes::Field; /// use serde_arrow::{ -/// schema::{SerdeArrowSchema, TracingOptions}, +/// schema::{SchemaLike, TracingOptions}, /// utils::Items, /// }; /// /// let samples: Vec = vec![1, 2, 3, /* ... */ ]; -/// let fields = SerdeArrowSchema::from_samples(&Items(&samples), TracingOptions::default())? -/// .to_arrow_fields()?; +/// let fields = Vec::::from_samples(&Items(&samples), TracingOptions::default())?; /// # /// # drop(fields); /// # Ok(()) @@ -309,14 +313,15 @@ where /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { +/// # use serde_arrow::_impl::arrow; +/// use arrow::datatypes::Field; /// use serde_arrow::{ -/// schema::{SerdeArrowSchema, TracingOptions}, +/// schema::{SchemaLike, TracingOptions}, /// utils::Items, /// }; /// /// let samples: Vec = vec![1, 2, 3, /* ... */ ]; -/// let fields = SerdeArrowSchema::from_samples(&Items(&samples), TracingOptions::default())? -/// .to_arrow_fields()?; +/// let fields = Vec::::from_samples(&Items(&samples), TracingOptions::default())?; /// /// let arrays = serde_arrow::to_arrow(&fields, &Items(&samples))?; /// # @@ -345,7 +350,7 @@ where /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { -/// # use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; +/// # use serde_arrow::schema::{SerdeArrowSchema, SchemaLike, TracingOptions}; /// # let samples: Vec = vec![1, 2, 3, /* ... */ ]; /// # let fields = SerdeArrowSchema::from_samples(&Items(&samples), TracingOptions::default())? /// # .to_arrow_fields()?; diff --git a/serde_arrow/src/arrow_impl/schema.rs b/serde_arrow/src/arrow_impl/schema.rs index 63fd5612..03c0f6e5 100644 --- a/serde_arrow/src/arrow_impl/schema.rs +++ b/serde_arrow/src/arrow_impl/schema.rs @@ -4,8 +4,8 @@ use crate::{ internal::{ error::{error, fail, Error, Result}, schema::{ - GenericDataType, GenericField, GenericTimeUnit, SerdeArrowSchema, Strategy, - STRATEGY_KEY, + GenericDataType, GenericField, GenericTimeUnit, SchemaLike, Sealed, SerdeArrowSchema, + Strategy, STRATEGY_KEY, }, }, }; @@ -27,7 +27,7 @@ impl SerdeArrowSchema { /// /// ```rust /// # fn main() -> serde_arrow::_impl::PanicOnError<()> { - /// # use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; + /// # use serde_arrow::schema::{SerdeArrowSchema, SchemaLike, TracingOptions}; /// # #[derive(serde::Deserialize)] /// # struct Item { a: u32 } /// # let schema = SerdeArrowSchema::from_type::(TracingOptions::default()).unwrap(); @@ -56,6 +56,29 @@ impl TryFrom for Vec { } } +impl Sealed for Vec {} + +/// Schema support for `Vec` (*requires one of the +/// `arrow-*` features*) +impl SchemaLike for Vec { + fn from_value(value: &T) -> Result { + SerdeArrowSchema::from_value(value)?.to_arrow_fields() + } + + fn from_type<'de, T: serde::Deserialize<'de>>( + options: crate::schema::TracingOptions, + ) -> Result { + SerdeArrowSchema::from_type::(options)?.to_arrow_fields() + } + + fn from_samples( + samples: &T, + options: crate::schema::TracingOptions, + ) -> Result { + SerdeArrowSchema::from_samples(samples, options)?.to_arrow_fields() + } +} + impl TryFrom<&DataType> for GenericDataType { type Error = Error; diff --git a/serde_arrow/src/internal/schema.rs b/serde_arrow/src/internal/schema.rs index 2370bf0f..dee52301 100644 --- a/serde_arrow/src/internal/schema.rs +++ b/serde_arrow/src/internal/schema.rs @@ -16,58 +16,47 @@ use serde::{Deserialize, Serialize}; /// pub const STRATEGY_KEY: &str = "SERDE_ARROW:strategy"; -/// A collection of fields as understood by `serde_arrow` +pub trait Sealed {} + +/// A sealed trait to add support for constructing schema-like objects /// /// There are three main ways to specify the schema: /// -/// 1. [`SerdeArrowSchema::from_value`]: specify the schema manually, e.g., as a -/// JSON value -/// 2. [`SerdeArrowSchema::from_type`]: determine the schema from the record -/// type -/// 3. [`SerdeArrowSchema::from_samples`]: Determine the schema from samples of -/// the data +/// 1. [`SchemaLike::from_value`]: specify the schema manually, e.g., as a JSON +/// value +/// 2. [`SchemaLike::from_type`]: determine the schema from the record type +/// 3. [`SchemaLike::from_samples`]: Determine the schema from samples of the +/// data /// -#[derive(Default, Debug, PartialEq, Clone, Serialize, Deserialize)] -#[serde(from = "SchemaSerializationOptions")] -pub struct SerdeArrowSchema { - pub(crate) fields: Vec, -} - -#[derive(Deserialize)] -#[serde(untagged)] -enum SchemaSerializationOptions { - FieldsOnly(Vec), - FullSchema { fields: Vec }, -} - -impl From for SerdeArrowSchema { - fn from(value: SchemaSerializationOptions) -> Self { - use SchemaSerializationOptions::*; - match value { - FieldsOnly(fields) | FullSchema { fields } => Self { fields }, - } - } -} - -impl SerdeArrowSchema { - /// Return a new schema (empty) instance - pub fn new() -> Self { - Self::default() - } - +/// The following types implement [`SchemaLike`] and can be constructed in this +/// way: +/// +/// - [`SerdeArrowSchema`] +#[cfg_attr( + has_arrow, + doc = "- `Vec<`[`arrow::datatypes::Field`][crate::_impl::arrow::datatypes::Field]`>" +)] +#[cfg_attr( + has_arrow2, + doc = "- `Vec<`[`arrow2::datatypes::Field`][crate::_impl::arrow2::datatypes::Field]`>`" +)] +/// +pub trait SchemaLike: Sized + Sealed { /// Build the schema from an object that implements serialize (e.g., /// `serde_json::Value`) /// /// ```rust /// # fn main() -> serde_arrow::_impl::PanicOnError<()> { - /// use serde_arrow::schema::SerdeArrowSchema; + /// # use serde_arrow::_impl::arrow; + /// use arrow::datatypes::Field; + /// use serde_arrow::schema::SchemaLike; /// /// let schema = serde_json::json!([ /// {"name": "foo", "data_type": "U8"}, /// {"name": "bar", "data_type": "Utf8"}, /// ]); /// - /// let schema = SerdeArrowSchema::from_value(&schema)?; + /// let fields = Vec::::from_value(&schema)?; /// # Ok(()) /// # } /// ``` @@ -119,13 +108,7 @@ impl SerdeArrowSchema { /// fields, named `"key"` of integer type and named `"value"` of string /// type /// - pub fn from_value(value: &T) -> Result { - // simple version of serde-transcode - let mut events = Vec::::new(); - crate::internal::sink::serialize_into_sink(&mut events, value)?; - let this: Self = crate::internal::source::deserialize_from_source(&events)?; - Ok(this) - } + fn from_value(value: &T) -> Result; /// Determine the schema from the given record type /// @@ -143,9 +126,9 @@ impl SerdeArrowSchema { /// ```rust /// # fn main() -> serde_arrow::_impl::PanicOnError<()> { /// # use serde_arrow::_impl::arrow; - /// use arrow::datatypes::DataType; + /// use arrow::datatypes::{DataType, Field}; /// use serde::Deserialize; - /// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; + /// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Deserialize)] /// struct Record { @@ -154,8 +137,7 @@ impl SerdeArrowSchema { /// string: String, /// } /// - /// let schema = SerdeArrowSchema::from_type::(TracingOptions::default())?; - /// let fields = schema.to_arrow_fields()?; + /// let fields = Vec::::from_type::(TracingOptions::default())?; /// /// assert_eq!(*fields[0].data_type(), DataType::Int32); /// assert_eq!(*fields[1].data_type(), DataType::Float64); @@ -164,11 +146,7 @@ impl SerdeArrowSchema { /// # } /// ``` /// - pub fn from_type<'de, T: Deserialize<'de>>(options: TracingOptions) -> Result { - let mut tracer = Tracer::new(String::from("$"), options); - tracer.trace_type::()?; - tracer.to_schema() - } + fn from_type<'de, T: Deserialize<'de>>(options: TracingOptions) -> Result; /// Determine the schema from the given samples /// @@ -190,9 +168,9 @@ impl SerdeArrowSchema { /// ```rust /// # fn main() -> serde_arrow::_impl::PanicOnError<()> { /// # use serde_arrow::_impl::arrow; - /// use arrow::datatypes::DataType; + /// use arrow::datatypes::{DataType, Field}; /// use serde::Serialize; - /// use serde_arrow::schema::{SerdeArrowSchema, TracingOptions}; + /// use serde_arrow::schema::{SchemaLike, TracingOptions}; /// /// ##[derive(Serialize)] /// struct Record { @@ -215,8 +193,7 @@ impl SerdeArrowSchema { /// // ... /// ]; /// - /// let schema = SerdeArrowSchema::from_samples(&samples, TracingOptions::default())?; - /// let fields = schema.to_arrow_fields()?; + /// let fields = Vec::::from_samples(&samples, TracingOptions::default())?; /// /// assert_eq!(*fields[0].data_type(), DataType::Int32); /// assert_eq!(*fields[1].data_type(), DataType::Float64); @@ -225,7 +202,57 @@ impl SerdeArrowSchema { /// # } /// ``` /// - pub fn from_samples(samples: &T, options: TracingOptions) -> Result { + fn from_samples(samples: &T, options: TracingOptions) -> Result; +} + +/// A collection of fields as understood by `serde_arrow` +#[derive(Default, Debug, PartialEq, Clone, Serialize, Deserialize)] +#[serde(from = "SchemaSerializationOptions")] +pub struct SerdeArrowSchema { + pub(crate) fields: Vec, +} + +#[derive(Deserialize)] +#[serde(untagged)] +enum SchemaSerializationOptions { + FieldsOnly(Vec), + FullSchema { fields: Vec }, +} + +impl From for SerdeArrowSchema { + fn from(value: SchemaSerializationOptions) -> Self { + use SchemaSerializationOptions::*; + match value { + FieldsOnly(fields) | FullSchema { fields } => Self { fields }, + } + } +} + +impl SerdeArrowSchema { + /// Return a new schema without any fields + pub fn new() -> Self { + Self::default() + } +} + +impl Sealed for SerdeArrowSchema {} + +impl SchemaLike for SerdeArrowSchema { + fn from_value(value: &T) -> Result { + // simple version of serde-transcode + let mut events = Vec::::new(); + crate::internal::sink::serialize_into_sink(&mut events, value)?; + let this: Self = crate::internal::source::deserialize_from_source(&events)?; + Ok(this) + } + + fn from_type<'de, T: Deserialize<'de>>(options: TracingOptions) -> Result { + let mut tracer = Tracer::new(String::from("$"), options); + tracer.trace_type::()?; + tracer.to_schema() + } + + fn from_samples(samples: &T, options: TracingOptions) -> Result { let mut tracer = Tracer::new(String::from("$"), options); tracer.trace_samples(samples)?; tracer.to_schema() diff --git a/serde_arrow/src/lib.rs b/serde_arrow/src/lib.rs index 2786ace3..e79d6107 100644 --- a/serde_arrow/src/lib.rs +++ b/serde_arrow/src/lib.rs @@ -20,8 +20,7 @@ //! schema should contain a `Date64`. `serde_arrow` supports to derive the //! schema from the data itself via schema tracing, but does not require it. It //! is always possible to specify the schema manually. See the [`schema` -//! module][schema] and [`SerdeArrowSchema`][schema::SerdeArrowSchema] for -//! further details. +//! module][schema] and [`SchemaLike`][schema::SchemaLike] for further details. //! //! ## Overview //! diff --git a/serde_arrow/src/schema.rs b/serde_arrow/src/schema.rs index 1aaaadd9..d39d602d 100644 --- a/serde_arrow/src/schema.rs +++ b/serde_arrow/src/schema.rs @@ -2,7 +2,7 @@ //! //! To convert between Rust objects and Arrow types, `serde_arrows` requires //! schema information as a list of Arrow fields with additional meta data. See -//! [`SerdeArrowSchema`] for details on how to specify the schema. +//! [`SchemaLike`] for details on how to specify the schema. //! //! The default mapping of Rust types to [Arrow types][arrow-types] is as follows: //! @@ -38,7 +38,7 @@ //! # fn main() {} //! ``` pub use crate::internal::{ - schema::{SerdeArrowSchema, Strategy, STRATEGY_KEY}, + schema::{SchemaLike, SerdeArrowSchema, Strategy, STRATEGY_KEY}, tracing::TracingOptions, }; diff --git a/serde_arrow/src/test_end_to_end/issue_90.rs b/serde_arrow/src/test_end_to_end/issue_90.rs index 97dc7908..58ce1623 100644 --- a/serde_arrow/src/test_end_to_end/issue_90.rs +++ b/serde_arrow/src/test_end_to_end/issue_90.rs @@ -3,7 +3,11 @@ use std::sync::Arc; use serde::{Deserialize, Serialize}; -use crate::{self as serde_arrow, internal::error::PanicOnError, schema::TracingOptions}; +use crate::{ + self as serde_arrow, + internal::error::PanicOnError, + schema::{SchemaLike, TracingOptions}, +}; use crate::_impl::arrow::{ _raw::{array::RecordBatch, schema::Schema}, @@ -39,10 +43,7 @@ fn example() -> PanicOnError<()> { VectorMetric { distribution: None }, ]; - use serde_arrow::schema::SerdeArrowSchema; - - let fields: Vec = - SerdeArrowSchema::from_type::(TracingOptions::default())?.try_into()?; + let fields = Vec::::from_type::(TracingOptions::default())?; let arrays = serde_arrow::to_arrow(&fields, &metrics)?; let batch = RecordBatch::try_new(Arc::new(Schema::new(fields.clone())), arrays.clone())?; @@ -56,10 +57,8 @@ fn example() -> PanicOnError<()> { #[test] fn example_top_level_none() -> PanicOnError<()> { - use serde_arrow::schema::SerdeArrowSchema; - // top-level options are not supported if fields are are extracted - let res = SerdeArrowSchema::from_type::>(TracingOptions::default()); + let res = Vec::::from_type::>(TracingOptions::default()); assert!(res.is_err()); Ok(()) } diff --git a/serde_arrow/src/test_impls/json_values.rs b/serde_arrow/src/test_impls/json_values.rs index bbad1b2f..41ff599e 100644 --- a/serde_arrow/src/test_impls/json_values.rs +++ b/serde_arrow/src/test_impls/json_values.rs @@ -7,10 +7,7 @@ test_generic!( let tracing_options = TracingOptions::default(); let items = vec![json!({ "a": 1, "b": 2 }), json!({ "a": 3, "b": 4 })]; - let fields: Vec = SerdeArrowSchema::from_samples(&items, tracing_options) - .unwrap() - .try_into() - .unwrap(); + let fields = Vec::::from_samples(&items, tracing_options).unwrap(); let arrays = to_arrow(&fields, &items).unwrap(); drop(arrays); @@ -24,10 +21,7 @@ test_generic!( let tracing_options = TracingOptions::default().coerce_numbers(true); let items = vec![json!({ "a": 1, "b": -2 }), json!({ "a": 3.0, "b": 4 })]; - let fields: Vec = SerdeArrowSchema::from_samples(&items, tracing_options) - .unwrap() - .try_into() - .unwrap(); + let fields = Vec::::from_samples(&items, tracing_options).unwrap(); let arrays = to_arrow(&fields, &items).unwrap(); drop(arrays); diff --git a/serde_arrow/src/test_impls/macros.rs b/serde_arrow/src/test_impls/macros.rs index 5621d751..e1c8438c 100644 --- a/serde_arrow/src/test_impls/macros.rs +++ b/serde_arrow/src/test_impls/macros.rs @@ -49,10 +49,10 @@ macro_rules! test_example_impl { use super::*; use crate::{ - schema::{SerdeArrowSchema, TracingOptions, Strategy}, + schema::{TracingOptions, Strategy}, utils::{Items, Item}, }; - use crate::internal::schema::{GenericDataType, GenericField, GenericTimeUnit}; + use crate::internal::schema::{GenericDataType, GenericField, GenericTimeUnit, SchemaLike}; use crate::test_impls::macros::{btree_map, hash_map}; $(#[ignore = $ignore])? @@ -69,7 +69,7 @@ macro_rules! test_example_impl { println!("{options:?}"); - let actual: Vec = SerdeArrowSchema::from_samples(&Items(items), options).unwrap().try_into().unwrap(); + let actual = Vec::::from_samples(&Items(items), options).unwrap(); let expected: Vec = vec![(&field).try_into().unwrap()]; assert_eq!( actual, @@ -413,7 +413,7 @@ macro_rules! test_generic { #[allow(unused)] mod $name { use crate::{ - schema::{SerdeArrowSchema, TracingOptions}, + schema::{SchemaLike, TracingOptions}, utils::{Items, Item} }; use crate::internal::schema::{GenericField, GenericDataType}; diff --git a/serde_arrow/src/test_impls/union.rs b/serde_arrow/src/test_impls/union.rs index 7dd51850..641d264d 100644 --- a/serde_arrow/src/test_impls/union.rs +++ b/serde_arrow/src/test_impls/union.rs @@ -242,11 +242,7 @@ test_generic!( } let tracing_options = TracingOptions::default().allow_null_fields(true); - let fields: Vec = - SerdeArrowSchema::from_samples(&Items(&[U::A, U::C]), tracing_options) - .unwrap() - .try_into() - .unwrap(); + let fields = Vec::::from_samples(&Items(&[U::A, U::C]), tracing_options).unwrap(); // NOTE: variant B was never encountered during tracing let res = to_arrow(&fields, &Items(&[U::A, U::B, U::C])); diff --git a/serde_arrow/src/test_impls/wrappers.rs b/serde_arrow/src/test_impls/wrappers.rs index d06ed2a1..acd1fdca 100644 --- a/serde_arrow/src/test_impls/wrappers.rs +++ b/serde_arrow/src/test_impls/wrappers.rs @@ -16,11 +16,7 @@ use super::macros::test_generic; test_generic!( fn outer_vec() { let items: Vec = vec![0_u32, 1_u32, 2_u32]; - let fields: Vec = - SerdeArrowSchema::from_samples(&Items(&items), TracingOptions::default()) - .unwrap() - .try_into() - .unwrap(); + let fields = Vec::::from_samples(&Items(&items), TracingOptions::default()).unwrap(); let arrays = to_arrow(&fields, &Items(&items)).unwrap(); drop(arrays); @@ -30,11 +26,7 @@ test_generic!( test_generic!( fn outer_slice() { let items: &[u32] = &[0_u32, 1_u32, 2_u32]; - let fields: Vec = - SerdeArrowSchema::from_samples(&Items(items), TracingOptions::default()) - .unwrap() - .try_into() - .unwrap(); + let fields = Vec::::from_samples(&Items(items), TracingOptions::default()).unwrap(); let arrays = to_arrow(&fields, &Items(items)).unwrap(); drop(arrays); @@ -44,11 +36,7 @@ test_generic!( test_generic!( fn outer_array() { let items: &[u32; 3] = &[0_u32, 1_u32, 2_u32]; - let fields: Vec = - SerdeArrowSchema::from_samples(&Items(items), TracingOptions::default()) - .unwrap() - .try_into() - .unwrap(); + let fields = Vec::::from_samples(&Items(items), TracingOptions::default()).unwrap(); let arrays = to_arrow(&fields, &Items(items)).unwrap(); drop(arrays); @@ -68,10 +56,7 @@ test_generic!( Item { item: 1_u32 }, Item { item: 2_u32 }, ); - let fields: Vec = SerdeArrowSchema::from_samples(items, TracingOptions::default()) - .unwrap() - .try_into() - .unwrap(); + let fields = Vec::::from_samples(items, TracingOptions::default()).unwrap(); let arrays = to_arrow(&fields, &items).unwrap(); drop(arrays); From 67baa5b663990cbee8d1ec3f43d1350e49633161 Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Thu, 2 Nov 2023 18:32:46 +0100 Subject: [PATCH 2/7] Fix doc tests without features --- serde_arrow/src/internal/schema.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/serde_arrow/src/internal/schema.rs b/serde_arrow/src/internal/schema.rs index dee52301..ed6c7c7b 100644 --- a/serde_arrow/src/internal/schema.rs +++ b/serde_arrow/src/internal/schema.rs @@ -46,6 +46,7 @@ pub trait SchemaLike: Sized + Sealed { /// `serde_json::Value`) /// /// ```rust + /// # #[cfg(feature = "has_arrow")] /// # fn main() -> serde_arrow::_impl::PanicOnError<()> { /// # use serde_arrow::_impl::arrow; /// use arrow::datatypes::Field; @@ -59,6 +60,8 @@ pub trait SchemaLike: Sized + Sealed { /// let fields = Vec::::from_value(&schema)?; /// # Ok(()) /// # } + /// # #[cfg(not(feature = "has_arrow"))] + /// # fn main() { } /// ``` /// /// `SerdeArrowSchema` can also be directly serialized and deserialized. @@ -124,6 +127,7 @@ pub trait SchemaLike: Sized + Sealed { /// See [`TracingOptions`] for customization options. /// /// ```rust + /// # #[cfg(feature = "has_arrow")] /// # fn main() -> serde_arrow::_impl::PanicOnError<()> { /// # use serde_arrow::_impl::arrow; /// use arrow::datatypes::{DataType, Field}; @@ -144,6 +148,8 @@ pub trait SchemaLike: Sized + Sealed { /// assert_eq!(*fields[2].data_type(), DataType::LargeUtf8); /// # Ok(()) /// # } + /// # #[cfg(not(feature = "has_arrow"))] + /// # fn main() { } /// ``` /// fn from_type<'de, T: Deserialize<'de>>(options: TracingOptions) -> Result; @@ -166,6 +172,7 @@ pub trait SchemaLike: Sized + Sealed { /// See [`TracingOptions`] for customization options. /// /// ```rust + /// # #[cfg(feature = "has_arrow")] /// # fn main() -> serde_arrow::_impl::PanicOnError<()> { /// # use serde_arrow::_impl::arrow; /// use arrow::datatypes::{DataType, Field}; @@ -200,6 +207,8 @@ pub trait SchemaLike: Sized + Sealed { /// assert_eq!(*fields[2].data_type(), DataType::LargeUtf8); /// # Ok(()) /// # } + /// # #[cfg(not(feature = "has_arrow"))] + /// # fn main() { } /// ``` /// fn from_samples(samples: &T, options: TracingOptions) -> Result; From 278cd8fae0db51cd7d8e1b3689abf3bf8a66e13e Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Thu, 2 Nov 2023 19:05:06 +0100 Subject: [PATCH 3/7] Prevent the `arrow2-0-17` from always being active --- Cargo.toml | 6 ++---- serde_arrow/src/arrow2_impl/deserialization.rs | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 015d6e66..1b4b09d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,6 @@ [workspace] -members = [ - "serde_arrow", - "example", -] +members = ["serde_arrow", "example"] +default-members = ["serde_arrow"] resolver = "2" diff --git a/serde_arrow/src/arrow2_impl/deserialization.rs b/serde_arrow/src/arrow2_impl/deserialization.rs index 0da4dbb2..1199bdfb 100644 --- a/serde_arrow/src/arrow2_impl/deserialization.rs +++ b/serde_arrow/src/arrow2_impl/deserialization.rs @@ -89,6 +89,7 @@ impl BufferExtract for dyn Array { let validity = get_validity(typed); let offsets = typed.offsets(); + let offsets = offsets.as_slice(); check_supported_list_layout(validity, offsets)?; @@ -216,7 +217,6 @@ impl BufferExtract for dyn Array { let validity = get_validity(typed); check_supported_list_layout(validity, offsets)?; - let offsets = buffers.push_u32_cast(offsets)?; let validity = validity.map(|b| buffers.push_u1(b)); From 1fce2fb78e5240835a87a10c9240bf3b22e11b1a Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Thu, 2 Nov 2023 19:20:38 +0100 Subject: [PATCH 4/7] Fix warnings without selected arrow version --- serde_arrow/src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/serde_arrow/src/lib.rs b/serde_arrow/src/lib.rs index e79d6107..787e2d49 100644 --- a/serde_arrow/src/lib.rs +++ b/serde_arrow/src/lib.rs @@ -118,6 +118,9 @@ //! | `arrow-38` | `arrow=38` | | | | //! | `arrow-37` | `arrow=37` | | | | //! + +// be more forgiving without any active implementation +#[cfg_attr(all(not(has_arrow), not(has_arrow2)), allow(unused))] mod internal; /// *Internal. Do not use* From 3d86087ac72e3e1f3f9f92f69a25f514df7d8f17 Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Thu, 2 Nov 2023 19:20:43 +0100 Subject: [PATCH 5/7] Update changelog --- Changes.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Changes.md b/Changes.md index 209ee6be..d171c97d 100644 --- a/Changes.md +++ b/Changes.md @@ -19,6 +19,8 @@ Improvements: - Allow to build schema objects from serializable objects, e.g., `serde_json::Value` (`SchemaLike::from_value()`) - Add support for `arrow=47` and `arrow=48` +- Fix bug in `arrow2=0.16` support +- Fix unused warnings without selected arrow versions Deprecations (see the documentation of deprecated items for how to migratie): From 4d1ce08afd33751af6a2b29666397c01739b547a Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Thu, 2 Nov 2023 19:36:33 +0100 Subject: [PATCH 6/7] Prevent broken links in docs without arrow impl --- serde_arrow/src/lib.rs | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/serde_arrow/src/lib.rs b/serde_arrow/src/lib.rs index 787e2d49..620d652d 100644 --- a/serde_arrow/src/lib.rs +++ b/serde_arrow/src/lib.rs @@ -9,8 +9,9 @@ //! as easy as using Serde's derive macros. //! //! In the Rust ecosystem there are two competing implementations of the arrow -//! in-memory format, [`arrow`][] and [`arrow2`][]. `serde_arrow` supports both. -//! The supported arrow implementations can be selected via +//! in-memory format, [`arrow`](https://github.com/apache/arrow-rs) and +//! [`arrow2`](https://github.com/jorgecarleitao/arrow2). `serde_arrow` supports +//! both. The supported arrow implementations can be selected via //! [features](#features). //! //! `serde_arrow` relies on a schema to translate between Rust and Arrow as @@ -22,19 +23,20 @@ //! is always possible to specify the schema manually. See the [`schema` //! module][schema] and [`SchemaLike`][schema::SchemaLike] for further details. //! -//! ## Overview -//! -//! | Operation | `arrow` | `arrow2` | -//! |------------------|------------------|-------------------| -//! | Required features | [`arrow-*`](#features) | [`arrow2-*`](#features) | -//! | | | | -//! | Rust to Arrow | [`to_arrow`] | [`to_arrow2`] | -//! | Arrow to Rust | [`from_arrow`] | [`from_arrow2`] | -//! | Arrow Builder | [`ArrowBuilder`] | [`Arrow2Builder`] | -//! | | | | -//! | Fields to Schema | [`SerdeArrowSchema::from_arrow_fields`][schema::SerdeArrowSchema::from_arrow_fields] | [`SerdeArrowSchema::form_arrow2_fields`][schema::SerdeArrowSchema::from_arrow2_fields] | -//! | Schema to fields | [`schema.to_arrow_fields()`][schema::SerdeArrowSchema::to_arrow_fields] | [`schema.to_arrow2_fields()`][schema::SerdeArrowSchema::to_arrow2_fields] | -//! +#![cfg_attr(all(has_arrow, has_arrow2), doc = r#" +## Overview + +| Operation | `arrow` | `arrow2` | +|------------------|------------------|-------------------| +| Required features | [`arrow-*`](#features) | [`arrow2-*`](#features) | +| | | | +| Rust to Arrow | [`to_arrow`] | [`to_arrow2`] | +| Arrow to Rust | [`from_arrow`] | [`from_arrow2`] | +| Array Builder | [`ArrowBuilder`] | [`Arrow2Builder`] | +| | | | +| Fields to Schema | [`SerdeArrowSchema::from_arrow_fields`][schema::SerdeArrowSchema::from_arrow_fields] | [`SerdeArrowSchema::form_arrow2_fields`][schema::SerdeArrowSchema::from_arrow2_fields] | +| Schema to fields | [`schema.to_arrow_fields()`][schema::SerdeArrowSchema::to_arrow_fields] | [`schema.to_arrow2_fields()`][schema::SerdeArrowSchema::to_arrow2_fields] | +"#)] //! ## Example //! //! Requires one of `arrow2` feature (see below). From a99de2484b4f226492c0cb139b6d5941be236239 Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Thu, 2 Nov 2023 20:03:33 +0100 Subject: [PATCH 7/7] Tune doc examples and deprecation notes --- serde_arrow/src/arrow2_impl/api.rs | 32 +++++++++++------------------ serde_arrow/src/arrow_impl/api.rs | 33 ++++++++++++------------------ serde_arrow/src/lib.rs | 14 +++++++------ 3 files changed, 33 insertions(+), 46 deletions(-) diff --git a/serde_arrow/src/arrow2_impl/api.rs b/serde_arrow/src/arrow2_impl/api.rs index f80c8963..0f247689 100644 --- a/serde_arrow/src/arrow2_impl/api.rs +++ b/serde_arrow/src/arrow2_impl/api.rs @@ -132,7 +132,6 @@ impl Arrow2Builder { /// ]; /// /// let fields = Vec::::from_type::(TracingOptions::default())?; -/// /// let arrays = serde_arrow::to_arrow2(&fields, &items)?; /// # /// # assert_eq!(arrays.len(), 2); @@ -178,10 +177,8 @@ where /// /// let fields = Vec::::from_type::(TracingOptions::default())?; /// # let items = &[Record { a: Some(1.0), b: 2}]; -/// # let arrays = serde_arrow::to_arrow2(&fields, &items).unwrap(); +/// # let arrays = serde_arrow::to_arrow2(&fields, &items)?; /// # -/// -/// // deserialize the records from arrays /// let items: Vec = serde_arrow::from_arrow2(&fields, &arrays)?; /// # Ok(()) /// # } @@ -247,7 +244,7 @@ where /// # Ok(()) /// # } /// ``` -#[deprecated = "serde_arrow::arrow2::serialize_into_fields is deprecated. Use serde_arrow::schema::SerdeArrowSchema::from_samples instead"] +#[deprecated = "serde_arrow::arrow2::serialize_into_fields is deprecated. Use serde_arrow::schema::SchemaLike::from_samples instead"] pub fn serialize_into_fields(items: &T, options: TracingOptions) -> Result> where T: Serialize + ?Sized, @@ -298,7 +295,7 @@ where /// # Ok(()) /// # } /// ``` -#[deprecated = "serde_arrow::arrow2::serialize_into_field is deprecated. Use serde_arrow::schema::SerdeArrowSchema::from_samples instead"] +#[deprecated = "serde_arrow::arrow2::serialize_into_field is deprecated. Use serde_arrow::schema::SchemaLike::from_samples with serde_arrow::utils::Items instead"] pub fn serialize_into_field(items: &T, name: &str, options: TracingOptions) -> Result where T: Serialize + ?Sized, @@ -314,23 +311,17 @@ where /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { -/// # use serde_arrow::_impl::arrow2; -/// use arrow2::datatypes::Field; -/// use serde_arrow::{ -/// schema::{SchemaLike, TracingOptions}, -/// utils::Items, -/// }; +/// # use serde_arrow::_impl::arrow2::datatypes::Field; +/// # use serde_arrow::schema::{SchemaLike, TracingOptions}; +/// use serde_arrow::utils::Items; /// /// let samples: Vec = vec![1, 2, 3, /* ... */ ]; -/// let fields = Vec::::from_samples(&Items(&samples), TracingOptions::default())?; -/// +/// # let fields = Vec::::from_samples(&Items(&samples), TracingOptions::default())?; /// let arrays = serde_arrow::to_arrow2(&fields, &Items(&samples))?; -/// # -/// # drop(fields); /// # Ok(()) /// # } /// ``` -#[deprecated = "serde_arrow::arrow2::serialize_into_array is deprecated. Use serde_arrow::to_arrow2 instead"] +#[deprecated = "serde_arrow::arrow2::serialize_into_array is deprecated. Use serde_arrow::to_arrow2 with serde_arrow::utils::Items instead"] pub fn serialize_into_array(field: &Field, items: &T) -> Result> where T: Serialize + ?Sized, @@ -383,8 +374,9 @@ where /// use arrow2::datatypes::{DataType, Field}; /// use serde_arrow::{Arrow2Builder, utils::{Items, Item}}; /// -/// let fields = vec![Field::new("item", DataType::UInt8, false)]; -/// let mut builder = Arrow2Builder::new(&fields)?; +/// let mut builder = Arrow2Builder::new(&[ +/// Field::new("item", DataType::UInt8, false), +/// ])?; /// /// builder.push(&Item(0))?; /// builder.push(&Item(1))?; @@ -397,7 +389,7 @@ where /// # Ok(()) /// # } /// ``` -#[deprecated = "serde_arrow::arrow2::ArrayBuilder is deprecated. Use serde_arrow::Arrow2Builder instead"] +#[deprecated = "serde_arrow::arrow2::ArrayBuilder is deprecated. Use serde_arrow::Arrow2Builder with serde_arrow::utils::Items instead"] pub struct ArrayBuilder(generic::GenericBuilder); #[allow(deprecated)] diff --git a/serde_arrow/src/arrow_impl/api.rs b/serde_arrow/src/arrow_impl/api.rs index 50a06e1f..f68390e0 100644 --- a/serde_arrow/src/arrow_impl/api.rs +++ b/serde_arrow/src/arrow_impl/api.rs @@ -173,14 +173,11 @@ pub fn to_arrow(fields: &[Field], items: &T) -> Result::from_type::(TracingOptions::default())?; /// # let items = &[Record { a: Some(1.0), b: 2}]; -/// # let arrays = serde_arrow::to_arrow(&fields, &items).unwrap(); +/// # let arrays = serde_arrow::to_arrow(&fields, &items)?; /// # -/// -/// // deserialize the records from arrays -/// let items: Vec = serde_arrow::from_arrow(&fields, &arrays).unwrap(); +/// let items: Vec = serde_arrow::from_arrow(&fields, &arrays)?; /// # Ok(()) /// # } /// ``` @@ -245,7 +242,7 @@ where /// # Ok(()) /// # } /// ``` -#[deprecated = "serialize_into_fields is deprecated. Use serde_arrow::schema::SerdeArrowSchema::from_samples instead"] +#[deprecated = "serialize_into_fields is deprecated. Use serde_arrow::schema::SchemaLike::from_samples instead"] pub fn serialize_into_fields(items: &T, options: TracingOptions) -> Result> where T: Serialize + ?Sized, @@ -277,7 +274,7 @@ where /// # Ok(()) /// # } /// ``` -#[deprecated = "serialize_into_field is deprecated. Use serde_arrow::to_arrow with serde_arrow::utils::Items instead"] +#[deprecated = "serialize_into_field is deprecated. Use serde_arrow::schema::SchemaLike with serde_arrow::utils::Items instead"] pub fn serialize_into_field(items: &T, name: &str, options: TracingOptions) -> Result where T: Serialize + ?Sized, @@ -313,23 +310,18 @@ where /// /// ```rust /// # fn main() -> serde_arrow::Result<()> { -/// # use serde_arrow::_impl::arrow; -/// use arrow::datatypes::Field; -/// use serde_arrow::{ -/// schema::{SchemaLike, TracingOptions}, -/// utils::Items, -/// }; +/// # use serde_arrow::_impl::arrow::datatypes::Field; +/// # use serde_arrow::schema::{SchemaLike, TracingOptions}; +/// use serde_arrow::utils::Items; /// /// let samples: Vec = vec![1, 2, 3, /* ... */ ]; -/// let fields = Vec::::from_samples(&Items(&samples), TracingOptions::default())?; -/// +/// # let fields = Vec::::from_samples(&Items(&samples), TracingOptions::default())?; /// let arrays = serde_arrow::to_arrow(&fields, &Items(&samples))?; /// # -/// # drop(fields); /// # Ok(()) /// # } /// ``` -#[deprecated = "serialize_into_array is deprecated. Use serde_arrow::arrow::ArrayBuilder instead"] +#[deprecated = "serialize_into_array is deprecated. Use serde_arrow::to_arrow with serde_arrow::utils::Items instead"] pub fn serialize_into_array(field: &Field, items: &T) -> Result where T: Serialize + ?Sized, @@ -382,8 +374,9 @@ where /// use arrow::datatypes::{DataType, Field}; /// use serde_arrow::{ArrowBuilder, utils::{Items, Item}}; /// -/// let fields = vec![Field::new("item", DataType::UInt8, false)]; -/// let mut builder = ArrowBuilder::new(&fields)?; +/// let mut builder = ArrowBuilder::new(&[ +/// Field::new("item", DataType::UInt8, false), +/// ])?; /// /// builder.push(&Item(0))?; /// builder.push(&Item(1))?; @@ -396,7 +389,7 @@ where /// # Ok(()) /// # } /// ``` -#[deprecated = "serde_arrow::arrow::ArrayBuilder is deprecated. Use serde_arrow::ArrowBuilder instead"] +#[deprecated = "serde_arrow::arrow::ArrayBuilder is deprecated. Use serde_arrow::ArrowBuilder with serde_arrow::utils::Items instead"] pub struct ArrayBuilder(generic::GenericBuilder); #[allow(deprecated)] diff --git a/serde_arrow/src/lib.rs b/serde_arrow/src/lib.rs index 620d652d..e1a9e197 100644 --- a/serde_arrow/src/lib.rs +++ b/serde_arrow/src/lib.rs @@ -23,7 +23,9 @@ //! is always possible to specify the schema manually. See the [`schema` //! module][schema] and [`SchemaLike`][schema::SchemaLike] for further details. //! -#![cfg_attr(all(has_arrow, has_arrow2), doc = r#" +#![cfg_attr( + all(has_arrow, has_arrow2), + doc = r#" ## Overview | Operation | `arrow` | `arrow2` | @@ -36,7 +38,8 @@ | | | | | Fields to Schema | [`SerdeArrowSchema::from_arrow_fields`][schema::SerdeArrowSchema::from_arrow_fields] | [`SerdeArrowSchema::form_arrow2_fields`][schema::SerdeArrowSchema::from_arrow2_fields] | | Schema to fields | [`schema.to_arrow_fields()`][schema::SerdeArrowSchema::to_arrow_fields] | [`schema.to_arrow2_fields()`][schema::SerdeArrowSchema::to_arrow2_fields] | -"#)] +"# +)] //! ## Example //! //! Requires one of `arrow2` feature (see below). @@ -45,6 +48,8 @@ //! # use serde::{Deserialize, Serialize}; //! # #[cfg(feature = "has_arrow2")] //! # fn main() -> serde_arrow::Result<()> { +//! # use serde_arrow::_impl::arrow2; +//! use arrow2::datatypes::Field; //! use serde_arrow::schema::{TracingOptions, SerdeArrowSchema}; //! //! ##[derive(Serialize, Deserialize)] @@ -59,10 +64,7 @@ //! Record { a: 3.0, b: 3 }, //! ]; //! -//! let fields = -//! SerdeArrowSchema::from_type::(TracingOptions::default())? -//! .to_arrow2_fields()?; -//! +//! let fields = Vec::::from_type::(TracingOptions::default())?; //! let arrays = serde_arrow::to_arrow2(&fields, &records)?; //! # //! # drop(arrays);