diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs index 1f3e1df847a8..4e859fdfe7ea 100644 --- a/arrow-array/src/record_batch.rs +++ b/arrow-array/src/record_batch.rs @@ -327,6 +327,40 @@ impl RecordBatch { &self.columns[..] } + /// Remove column by index and return it. + /// + /// Return the `ArrayRef` if the column is removed. + /// + /// # Panics + /// + /// Panics if `index`` out of bounds. + /// + /// # Example + /// + /// ``` + /// use std::sync::Arc; + /// use arrow_array::{BooleanArray, Int32Array, RecordBatch}; + /// use arrow_schema::{DataType, Field, Schema}; + /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]); + /// let bool_array = BooleanArray::from(vec![true, false, false, true, true]); + /// let schema = Schema::new(vec![ + /// Field::new("id", DataType::Int32, false), + /// Field::new("bool", DataType::Boolean, false), + /// ]); + /// + /// let mut batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array), Arc::new(bool_array)]).unwrap(); + /// + /// let removed_column = batch.remove_column(0); + /// assert_eq!(removed_column.as_any().downcast_ref::().unwrap(), &Int32Array::from(vec![1, 2, 3, 4, 5])); + /// assert_eq!(batch.num_columns(), 1); + /// ``` + pub fn remove_column(&mut self, index: usize) -> ArrayRef { + let mut builder = SchemaBuilder::from(self.schema.fields()); + builder.remove(index); + self.schema = Arc::new(builder.finish()); + self.columns.remove(index) + } + /// Return a new RecordBatch where each column is sliced /// according to `offset` and `length` /// diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs index 368ecabbf3ef..70cb1968e9a4 100644 --- a/arrow-schema/src/fields.rs +++ b/arrow-schema/src/fields.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::{ArrowError, Field, FieldRef}; +use crate::{ArrowError, Field, FieldRef, SchemaBuilder}; use std::ops::Deref; use std::sync::Arc; @@ -98,6 +98,31 @@ impl Fields { .zip(other.iter()) .all(|(a, b)| Arc::ptr_eq(a, b) || a.contains(b)) } + + /// Remove a field by index and return it. + /// + /// # Panic + /// + /// Panics if `index` is out of bounds. + /// + /// # Example + /// ``` + /// use arrow_schema::{DataType, Field, Fields}; + /// let mut fields = Fields::from(vec![ + /// Field::new("a", DataType::Boolean, false), + /// Field::new("b", DataType::Int8, false), + /// Field::new("c", DataType::Utf8, false), + /// ]); + /// assert_eq!(fields.len(), 3); + /// assert_eq!(fields.remove(1), Field::new("b", DataType::Int8, false).into()); + /// assert_eq!(fields.len(), 2); + /// ``` + pub fn remove(&mut self, index: usize) -> FieldRef { + let mut builder = SchemaBuilder::from(Fields::from(&*self.0)); + let field = builder.remove(index); + *self = builder.finish().fields; + field + } } impl Default for Fields { diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index c0f58e077a6f..711e4cb3314d 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -381,6 +381,30 @@ impl Schema { .iter() .all(|(k, v1)| self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default()) } + + /// Remove field by index and return it. Recommend to use [`SchemaBuilder`] + /// if you are looking to remove multiple columns, as this will save allocations. + /// + /// # Panic + /// + /// Panics if `index` is out of bounds. + /// + /// # Example + /// + /// ``` + /// use arrow_schema::{DataType, Field, Schema}; + /// let mut schema = Schema::new(vec![ + /// Field::new("a", DataType::Boolean, false), + /// Field::new("b", DataType::Int8, false), + /// Field::new("c", DataType::Utf8, false), + /// ]); + /// assert_eq!(schema.fields.len(), 3); + /// assert_eq!(schema.remove(1), Field::new("b", DataType::Int8, false).into()); + /// assert_eq!(schema.fields.len(), 2); + /// ``` + pub fn remove(&mut self, index: usize) -> FieldRef { + self.fields.remove(index) + } } impl fmt::Display for Schema {