From 7c4dfa883ccfaa060d3007f9aead5d812af1a76f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 16 Oct 2024 16:51:35 -0400 Subject: [PATCH 1/2] Proposal: improve SchemaBuilder for "update fields" usecase --- arrow-schema/src/schema.rs | 54 +++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index cc3a8a308a83..e3b13e215346 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -25,6 +25,32 @@ use crate::field::Field; use crate::{FieldRef, Fields}; /// A builder to facilitate building a [`Schema`] from iteratively from [`FieldRef`] +/// +/// # Example +/// Create an entirely new Schema +/// ``` +/// # use arrow_schema::*; +/// let schema = Schema::builder() +/// .with_field(Field::new("c1", DataType::Int64, false)) +/// .with_field(Field::new("c2", DataType::Utf8, false)) +/// .build(); +/// ``` +/// Create a new schema with a subset of fields from an existing schema +/// ``` +/// # use arrow_schema::*; +/// let schema = Schema::new(vec![ +/// Field::new("c1", DataType::Int64, false), +/// Field::new("c2", DataType::Utf8, false), +/// ]); +/// +/// // Create a new schema with the same metdata, but only the second field +/// let projected_schema = SchemaBuilder::from(&schema) +/// .clear_fields() +/// .with_field(schema.field(1).clone()) +/// .build(); +/// +/// assert_eq!(projected_schema, Schema::new(vec![Field::new("c2", DataType::Utf8, false)])); +/// ``` #[derive(Debug, Default)] pub struct SchemaBuilder { fields: Vec, @@ -45,6 +71,18 @@ impl SchemaBuilder { } } + /// Clears any fields currently in this builder. + pub fn clear_fields(mut self) -> Self { + self.fields.clear(); + self + } + + /// Appends a new field to this [`SchemaBuilder`] and returns self + pub fn with_field(mut self, field: impl Into) -> Self { + self.push(field); + self + } + /// Appends a [`FieldRef`] to this [`SchemaBuilder`] without checking for collision pub fn push(&mut self, field: impl Into) { self.fields.push(field.into()) @@ -87,7 +125,7 @@ impl SchemaBuilder { &mut self.metadata } - /// Reverse the fileds + /// Reverse the fields in this builder pub fn reverse(&mut self) { self.fields.reverse(); } @@ -120,6 +158,13 @@ impl SchemaBuilder { metadata: self.metadata, } } + + /// consume the builder and return the final [`Schema`] + /// + /// (synonym for [`Self::finish`] + pub fn build(self) -> Schema { + self.finish() + } } impl From<&Fields> for SchemaBuilder { @@ -182,6 +227,8 @@ pub type SchemaRef = Arc; /// /// Note that this information is only part of the meta-data and not part of the physical /// memory layout. +/// +/// See also [`SchemaBuilder`] for creating a schema. #[derive(Debug, Clone, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Schema { @@ -240,6 +287,11 @@ impl Schema { } } + /// Return a [`SchemaBuilder`] + pub fn builder() -> SchemaBuilder { + SchemaBuilder::new() + } + /// Sets the metadata of this `Schema` to be `metadata` and returns self pub fn with_metadata(mut self, metadata: HashMap) -> Self { self.metadata = metadata; From c83db7a45e6b084b39af349076af8b035c5b5a67 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 16 Oct 2024 17:21:58 -0400 Subject: [PATCH 2/2] add doc link --- arrow-schema/src/schema.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index e3b13e215346..db223a7689a5 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -265,6 +265,8 @@ impl Schema { /// Creates a new [`Schema`] from a sequence of [`Field`] values /// and adds additional metadata in form of key value pairs. /// + /// See also [`SchemaBuilder`] + /// /// # Example /// /// ```