-
Notifications
You must be signed in to change notification settings - Fork 833
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Proposal: improve SchemaBuilder for "update fields" usecase #6576
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -25,6 +25,32 @@ use crate::field::Field; | |||||||||||||
use crate::{FieldRef, Fields}; | ||||||||||||||
|
||||||||||||||
/// A builder to facilitate building a [`Schema`] from iteratively from [`FieldRef`] | ||||||||||||||
/// | ||||||||||||||
/// # Example | ||||||||||||||
/// Create an entirely new Schema | ||||||||||||||
/// ``` | ||||||||||||||
/// # use arrow_schema::*; | ||||||||||||||
/// let schema = Schema::builder() | ||||||||||||||
/// .with_field(Field::new("c1", DataType::Int64, false)) | ||||||||||||||
/// .with_field(Field::new("c2", DataType::Utf8, false)) | ||||||||||||||
/// .build(); | ||||||||||||||
/// ``` | ||||||||||||||
/// Create a new schema with a subset of fields from an existing schema | ||||||||||||||
/// ``` | ||||||||||||||
/// # use arrow_schema::*; | ||||||||||||||
/// let schema = Schema::new(vec![ | ||||||||||||||
/// Field::new("c1", DataType::Int64, false), | ||||||||||||||
/// Field::new("c2", DataType::Utf8, false), | ||||||||||||||
/// ]); | ||||||||||||||
/// | ||||||||||||||
/// // Create a new schema with the same metdata, but only the second field | ||||||||||||||
/// let projected_schema = SchemaBuilder::from(&schema) | ||||||||||||||
/// .clear_fields() | ||||||||||||||
/// .with_field(schema.field(1).clone()) | ||||||||||||||
/// .build(); | ||||||||||||||
/// | ||||||||||||||
/// assert_eq!(projected_schema, Schema::new(vec![Field::new("c2", DataType::Utf8, false)])); | ||||||||||||||
/// ``` | ||||||||||||||
#[derive(Debug, Default)] | ||||||||||||||
pub struct SchemaBuilder { | ||||||||||||||
fields: Vec<FieldRef>, | ||||||||||||||
|
@@ -45,6 +71,18 @@ impl SchemaBuilder { | |||||||||||||
} | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
/// Clears any fields currently in this builder. | ||||||||||||||
pub fn clear_fields(mut self) -> Self { | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
I can see the desire to distinguish this from metadata, but given the other fields aren't |
||||||||||||||
self.fields.clear(); | ||||||||||||||
self | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
/// Appends a new field to this [`SchemaBuilder`] and returns self | ||||||||||||||
pub fn with_field(mut self, field: impl Into<FieldRef>) -> Self { | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should there be a with_metadata as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, will add |
||||||||||||||
self.push(field); | ||||||||||||||
self | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
/// Appends a [`FieldRef`] to this [`SchemaBuilder`] without checking for collision | ||||||||||||||
pub fn push(&mut self, field: impl Into<FieldRef>) { | ||||||||||||||
self.fields.push(field.into()) | ||||||||||||||
|
@@ -87,7 +125,7 @@ impl SchemaBuilder { | |||||||||||||
&mut self.metadata | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
/// Reverse the fileds | ||||||||||||||
/// Reverse the fields in this builder | ||||||||||||||
pub fn reverse(&mut self) { | ||||||||||||||
self.fields.reverse(); | ||||||||||||||
} | ||||||||||||||
|
@@ -120,6 +158,13 @@ impl SchemaBuilder { | |||||||||||||
metadata: self.metadata, | ||||||||||||||
} | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
/// consume the builder and return the final [`Schema`] | ||||||||||||||
/// | ||||||||||||||
/// (synonym for [`Self::finish`] | ||||||||||||||
Comment on lines
+162
to
+164
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||
pub fn build(self) -> Schema { | ||||||||||||||
self.finish() | ||||||||||||||
} | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
impl From<&Fields> for SchemaBuilder { | ||||||||||||||
|
@@ -182,6 +227,8 @@ pub type SchemaRef = Arc<Schema>; | |||||||||||||
/// | ||||||||||||||
/// Note that this information is only part of the meta-data and not part of the physical | ||||||||||||||
/// memory layout. | ||||||||||||||
/// | ||||||||||||||
/// See also [`SchemaBuilder`] for creating a schema. | ||||||||||||||
#[derive(Debug, Clone, PartialEq, Eq)] | ||||||||||||||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] | ||||||||||||||
pub struct Schema { | ||||||||||||||
|
@@ -218,6 +265,8 @@ impl Schema { | |||||||||||||
/// Creates a new [`Schema`] from a sequence of [`Field`] values | ||||||||||||||
/// and adds additional metadata in form of key value pairs. | ||||||||||||||
/// | ||||||||||||||
/// See also [`SchemaBuilder`] | ||||||||||||||
/// | ||||||||||||||
/// # Example | ||||||||||||||
/// | ||||||||||||||
/// ``` | ||||||||||||||
|
@@ -240,6 +289,11 @@ impl Schema { | |||||||||||||
} | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
/// Return a [`SchemaBuilder`] | ||||||||||||||
pub fn builder() -> SchemaBuilder { | ||||||||||||||
SchemaBuilder::new() | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
/// Sets the metadata of this `Schema` to be `metadata` and returns self | ||||||||||||||
pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self { | ||||||||||||||
self.metadata = metadata; | ||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if we could make this work, it might be a bit cleaner?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree -- will give it a try