Skip to content

Commit

Permalink
Add Field::remove(), Schema::remove(), and `RecordBatch::remove_c…
Browse files Browse the repository at this point in the history
…olumn()` APIs (apache#4959)

* Add `Field::remove()`, `Schema::remove_field()`, and `RecordBatch::remove_column()` APIs

* Update arrow-schema/src/fields.rs

Co-authored-by: Raphael Taylor-Davies <[email protected]>

* Update arrow-schema/src/schema.rs

Co-authored-by: Raphael Taylor-Davies <[email protected]>

* Fix docs testing

* Use `SchemaBuilder` to build the new `Schema`

* Recommend `SchemaBuilder`

* Apply review suggestions

* Update arrow-schema/src/schema.rs

Co-authored-by: Raphael Taylor-Davies <[email protected]>

---------

Co-authored-by: Raphael Taylor-Davies <[email protected]>
  • Loading branch information
Folyd and tustvold authored Oct 26, 2023
1 parent cd069ea commit 5601b7a
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 1 deletion.
34 changes: 34 additions & 0 deletions arrow-array/src/record_batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,40 @@ impl RecordBatch {
&self.columns[..]
}

/// Remove column by index and return it.
///
/// Return the `ArrayRef` if the column is removed.
///
/// # Panics
///
/// Panics if `index`` out of bounds.
///
/// # Example
///
/// ```
/// use std::sync::Arc;
/// use arrow_array::{BooleanArray, Int32Array, RecordBatch};
/// use arrow_schema::{DataType, Field, Schema};
/// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
/// let bool_array = BooleanArray::from(vec![true, false, false, true, true]);
/// let schema = Schema::new(vec![
/// Field::new("id", DataType::Int32, false),
/// Field::new("bool", DataType::Boolean, false),
/// ]);
///
/// let mut batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array), Arc::new(bool_array)]).unwrap();
///
/// let removed_column = batch.remove_column(0);
/// assert_eq!(removed_column.as_any().downcast_ref::<Int32Array>().unwrap(), &Int32Array::from(vec![1, 2, 3, 4, 5]));
/// assert_eq!(batch.num_columns(), 1);
/// ```
pub fn remove_column(&mut self, index: usize) -> ArrayRef {
let mut builder = SchemaBuilder::from(self.schema.fields());
builder.remove(index);
self.schema = Arc::new(builder.finish());
self.columns.remove(index)
}

/// Return a new RecordBatch where each column is sliced
/// according to `offset` and `length`
///
Expand Down
27 changes: 26 additions & 1 deletion arrow-schema/src/fields.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::{ArrowError, Field, FieldRef};
use crate::{ArrowError, Field, FieldRef, SchemaBuilder};
use std::ops::Deref;
use std::sync::Arc;

Expand Down Expand Up @@ -98,6 +98,31 @@ impl Fields {
.zip(other.iter())
.all(|(a, b)| Arc::ptr_eq(a, b) || a.contains(b))
}

/// Remove a field by index and return it.
///
/// # Panic
///
/// Panics if `index` is out of bounds.
///
/// # Example
/// ```
/// use arrow_schema::{DataType, Field, Fields};
/// let mut fields = Fields::from(vec![
/// Field::new("a", DataType::Boolean, false),
/// Field::new("b", DataType::Int8, false),
/// Field::new("c", DataType::Utf8, false),
/// ]);
/// assert_eq!(fields.len(), 3);
/// assert_eq!(fields.remove(1), Field::new("b", DataType::Int8, false).into());
/// assert_eq!(fields.len(), 2);
/// ```
pub fn remove(&mut self, index: usize) -> FieldRef {
let mut builder = SchemaBuilder::from(Fields::from(&*self.0));
let field = builder.remove(index);
*self = builder.finish().fields;
field
}
}

impl Default for Fields {
Expand Down
24 changes: 24 additions & 0 deletions arrow-schema/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,30 @@ impl Schema {
.iter()
.all(|(k, v1)| self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default())
}

/// Remove field by index and return it. Recommend to use [`SchemaBuilder`]
/// if you are looking to remove multiple columns, as this will save allocations.
///
/// # Panic
///
/// Panics if `index` is out of bounds.
///
/// # Example
///
/// ```
/// use arrow_schema::{DataType, Field, Schema};
/// let mut schema = Schema::new(vec![
/// Field::new("a", DataType::Boolean, false),
/// Field::new("b", DataType::Int8, false),
/// Field::new("c", DataType::Utf8, false),
/// ]);
/// assert_eq!(schema.fields.len(), 3);
/// assert_eq!(schema.remove(1), Field::new("b", DataType::Int8, false).into());
/// assert_eq!(schema.fields.len(), 2);
/// ```
pub fn remove(&mut self, index: usize) -> FieldRef {
self.fields.remove(index)
}
}

impl fmt::Display for Schema {
Expand Down

0 comments on commit 5601b7a

Please sign in to comment.