Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Array::shrink_to_fit(&mut self) #6790

Merged
merged 13 commits into from
Nov 29, 2024
7 changes: 7 additions & 0 deletions arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,13 @@ impl Array for BooleanArray {
self.values.is_empty()
}

fn shrink_to_fit(&mut self) {
self.values.shrink_to_fit();
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
}

fn offset(&self) -> usize {
self.values.offset()
}
Expand Down
8 changes: 8 additions & 0 deletions arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,14 @@ impl<T: ByteArrayType> Array for GenericByteArray<T> {
self.value_offsets.len() <= 1
}

fn shrink_to_fit(&mut self) {
self.value_offsets.shrink_to_fit();
self.value_data.shrink_to_fit();
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
}

fn offset(&self) -> usize {
0
}
Expand Down
43 changes: 26 additions & 17 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,31 +430,31 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
///
/// Before GC:
/// ```text
/// ┌──────┐
/// │......│
/// │......│
/// ┌────────────────────┐ ┌ ─ ─ ─ ▶ │Data1 │ Large buffer
/// ┌──────┐
/// │......│
/// │......│
/// ┌────────────────────┐ ┌ ─ ─ ─ ▶ │Data1 │ Large buffer
/// │ View 1 │─ ─ ─ ─ │......│ with data that
/// ├────────────────────┤ │......│ is not referred
/// │ View 2 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data2 │ to by View 1 or
/// └────────────────────┘ │......│ View 2
/// │......│
/// 2 views, refer to │......│
/// small portions of a └──────┘
/// large buffer
/// └────────────────────┘ │......│ View 2
/// │......│
/// 2 views, refer to │......│
/// small portions of a └──────┘
/// large buffer
/// ```
///
///
/// After GC:
///
/// ```text
/// ┌────────────────────┐ ┌─────┐ After gc, only
/// │ View 1 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data1│ data that is
/// ├────────────────────┤ ┌ ─ ─ ─ ▶ │Data2│ pointed to by
/// │ View 2 │─ ─ ─ ─ └─────┘ the views is
/// └────────────────────┘ left
///
///
/// 2 views
/// │ View 1 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data1│ data that is
/// ├────────────────────┤ ┌ ─ ─ ─ ▶ │Data2│ pointed to by
/// │ View 2 │─ ─ ─ ─ └─────┘ the views is
/// └────────────────────┘ left
///
///
/// 2 views
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ooops… my editor is set to trim trailing whitspace on save. Let me know if you want me to reverse.

/// ```
/// This method will compact the data buffers by recreating the view array and only include the data
/// that is pointed to by the views.
Expand Down Expand Up @@ -575,6 +575,15 @@ impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
self.views.is_empty()
}

fn shrink_to_fit(&mut self) {
self.views.shrink_to_fit();
self.buffers.iter_mut().for_each(|b| b.shrink_to_fit());
self.buffers.shrink_to_fit();
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
}

fn offset(&self) -> usize {
0
}
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,11 @@ impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
self.keys.is_empty()
}

fn shrink_to_fit(&mut self) {
self.keys.shrink_to_fit();
self.values.shrink_to_fit();
}

fn offset(&self) -> usize {
self.keys.offset()
}
Expand Down
7 changes: 7 additions & 0 deletions arrow-array/src/array/fixed_size_binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,13 @@ impl Array for FixedSizeBinaryArray {
self.len == 0
}

fn shrink_to_fit(&mut self) {
self.value_data.shrink_to_fit();
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
}

fn offset(&self) -> usize {
0
}
Expand Down
7 changes: 7 additions & 0 deletions arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,13 @@ impl Array for FixedSizeListArray {
self.len == 0
}

fn shrink_to_fit(&mut self) {
self.values.shrink_to_fit();
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
}

fn offset(&self) -> usize {
0
}
Expand Down
8 changes: 8 additions & 0 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,14 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
self.value_offsets.len() <= 1
}

fn shrink_to_fit(&mut self) {
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
self.values.shrink_to_fit();
self.value_offsets.shrink_to_fit();
}

fn offset(&self) -> usize {
0
}
Expand Down
9 changes: 9 additions & 0 deletions arrow-array/src/array/list_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,15 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {
self.value_sizes.is_empty()
}

fn shrink_to_fit(&mut self) {
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
self.values.shrink_to_fit();
self.value_offsets.shrink_to_fit();
self.value_sizes.shrink_to_fit();
}

fn offset(&self) -> usize {
0
}
Expand Down
8 changes: 8 additions & 0 deletions arrow-array/src/array/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,14 @@ impl Array for MapArray {
self.value_offsets.len() <= 1
}

fn shrink_to_fit(&mut self) {
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
self.entries.shrink_to_fit();
self.value_offsets.shrink_to_fit();
}

fn offset(&self) -> usize {
0
}
Expand Down
13 changes: 13 additions & 0 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// ```
fn is_empty(&self) -> bool;

/// Frees up unused memory.
emilk marked this conversation as resolved.
Show resolved Hide resolved
fn shrink_to_fit(&mut self) {}

/// Returns the offset into the underlying data used by this array(-slice).
/// Note that the underlying data can be shared by many arrays.
/// This defaults to `0`.
Expand Down Expand Up @@ -365,6 +368,16 @@ impl Array for ArrayRef {
self.as_ref().is_empty()
}

fn shrink_to_fit(&mut self) {
if let Some(slf) = Arc::get_mut(self) {
slf.shrink_to_fit();
} else {
// TODO(emilk): clone the contents and shrink that.
// This can be accomplished if we add `trait Array { fn clone(&self) -> Box<Array>>; }`.
// Or we clone using `let clone = self.slice(0, self.len());` and hope that the returned `ArrayRef` is not shared.
emilk marked this conversation as resolved.
Show resolved Hide resolved
}
}

fn offset(&self) -> usize {
self.as_ref().offset()
}
Expand Down
7 changes: 7 additions & 0 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1152,6 +1152,13 @@ impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
self.values.is_empty()
}

fn shrink_to_fit(&mut self) {
self.values.shrink_to_fit();
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
}

fn offset(&self) -> usize {
0
}
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/run_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,11 @@ impl<T: RunEndIndexType> Array for RunArray<T> {
self.run_ends.is_empty()
}

fn shrink_to_fit(&mut self) {
self.run_ends.shrink_to_fit();
self.values.shrink_to_fit();
}

fn offset(&self) -> usize {
self.run_ends.offset()
}
Expand Down
7 changes: 7 additions & 0 deletions arrow-array/src/array/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,13 @@ impl Array for StructArray {
self.len == 0
}

fn shrink_to_fit(&mut self) {
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
self.fields.iter_mut().for_each(|n| n.shrink_to_fit());
}

fn offset(&self) -> usize {
0
}
Expand Down
11 changes: 11 additions & 0 deletions arrow-array/src/array/union_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,17 @@ impl Array for UnionArray {
self.type_ids.is_empty()
}

fn shrink_to_fit(&mut self) {
self.type_ids.shrink_to_fit();
if let Some(offsets) = &mut self.offsets {
offsets.shrink_to_fit();
}
for array in self.fields.iter_mut().flatten() {
array.shrink_to_fit();
}
self.fields.shrink_to_fit();
}

fn offset(&self) -> usize {
0
}
Expand Down
7 changes: 7 additions & 0 deletions arrow-buffer/src/buffer/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ impl BooleanBuffer {
self.len == 0
}

/// Free up unused memory.
#[inline]
pub fn shrink_to_fit(&mut self) {
// TODO(emilk): we could shrink even more in the case where we are a small sub-slice of the full buffer
self.buffer.shrink_to_fit();
}

/// Returns the boolean value at index `i`.
///
/// # Panics
Expand Down
27 changes: 27 additions & 0 deletions arrow-buffer/src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,17 @@ impl Buffer {
self.data.capacity()
}

/// Shrinks the capacity of the buffer as much as possible, freeing unused memory.
///
/// The capacity of the returned buffer will be the same as [`Self::len`].
///
/// If the capacity is already less than or equal to the desired capacity, this is a no-op.
pub fn shrink_to_fit(&mut self) {
if self.len() < self.capacity() {
*self = Self::from_vec(self.as_slice().to_vec())
}
emilk marked this conversation as resolved.
Show resolved Hide resolved
}

/// Returns whether the buffer is empty.
#[inline]
pub fn is_empty(&self) -> bool {
Expand Down Expand Up @@ -554,6 +565,22 @@ mod tests {
assert_eq!(buf2.slice_with_length(2, 1).as_slice(), &[10]);
}

#[test]
fn test_shrink_to_fit() {
let original = Buffer::from(&[1, 2, 3, 4, 5, 6, 7]);
assert_eq!(original.len(), 7);
assert_eq!(original.capacity(), 64);

let slice = original.slice(3);
assert_eq!(slice.len(), 4);
assert_eq!(slice.capacity(), 64);

let mut shrunk = slice.clone();
shrunk.shrink_to_fit();
assert_eq!(shrunk.len(), 4);
assert_eq!(shrunk.capacity(), 4);
}

#[test]
#[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
fn test_slice_offset_out_of_bound() {
Expand Down
6 changes: 6 additions & 0 deletions arrow-buffer/src/buffer/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ impl NullBuffer {
self.buffer.is_empty()
}

/// Free up unused memory.
#[inline]
pub fn shrink_to_fit(&mut self) {
self.buffer.shrink_to_fit();
}

/// Returns the null count for this [`NullBuffer`]
#[inline]
pub fn null_count(&self) -> usize {
Expand Down
6 changes: 6 additions & 0 deletions arrow-buffer/src/buffer/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,12 @@ impl<O: ArrowNativeType> OffsetBuffer<O> {
Self(out.into())
}

/// Free up unused memory.
#[inline]
emilk marked this conversation as resolved.
Show resolved Hide resolved
pub fn shrink_to_fit(&mut self) {
self.0.shrink_to_fit();
}

/// Returns the inner [`ScalarBuffer`]
pub fn inner(&self) -> &ScalarBuffer<O> {
&self.0
Expand Down
7 changes: 7 additions & 0 deletions arrow-buffer/src/buffer/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,13 @@ where
self.len == 0
}

/// Free up unused memory.
#[inline]
pub fn shrink_to_fit(&mut self) {
// TODO(emilk): we could shrink even more in the case where we are a small sub-slice of the full buffer
self.run_ends.shrink_to_fit();
}

/// Returns the values of this [`RunEndBuffer`] not including any offset
#[inline]
pub fn values(&self) -> &[E] {
Expand Down
6 changes: 6 additions & 0 deletions arrow-buffer/src/buffer/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ impl<T: ArrowNativeType> ScalarBuffer<T> {
buffer.slice_with_length(byte_offset, byte_len).into()
}

/// Free up unused memory.
#[inline]
pub fn shrink_to_fit(&mut self) {
self.buffer.shrink_to_fit();
}

/// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
pub fn slice(&self, offset: usize, len: usize) -> Self {
Self::new(self.buffer.clone(), offset, len)
Expand Down