Skip to content

Commit 7f3dc5c

Browse files
committed
Improve performance of array_reverse 5x by using take, not MutableData
1 parent 32d2618 commit 7f3dc5c

File tree

1 file changed

+32
-21
lines changed

1 file changed

+32
-21
lines changed

datafusion/functions-nested/src/reverse.rs

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
2020
use crate::utils::make_scalar_function;
2121
use arrow::array::{
22-
Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray,
23-
GenericListViewArray, MutableArrayData, OffsetSizeTrait, UInt32Array,
22+
Array, ArrayRef, FixedSizeListArray, GenericListArray, GenericListViewArray,
23+
OffsetSizeTrait, UInt32Array, UInt64Array,
2424
};
2525
use arrow::buffer::{OffsetBuffer, ScalarBuffer};
2626
use arrow::compute::take;
@@ -155,11 +155,8 @@ fn general_array_reverse<O: OffsetSizeTrait>(
155155
field: &FieldRef,
156156
) -> Result<ArrayRef> {
157157
let values = array.values();
158-
let original_data = values.to_data();
159-
let capacity = Capacities::Array(original_data.len());
160158
let mut offsets = vec![O::usize_as(0)];
161-
let mut mutable =
162-
MutableArrayData::with_capacities(vec![&original_data], false, capacity);
159+
let mut indices: Vec<O> = Vec::with_capacity(values.len());
163160

164161
for (row_index, (&start, &end)) in array.offsets().iter().tuple_windows().enumerate()
165162
{
@@ -171,18 +168,34 @@ fn general_array_reverse<O: OffsetSizeTrait>(
171168

172169
let mut index = end - O::one();
173170
while index >= start {
174-
mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1);
171+
indices.push(index);
175172
index = index - O::one();
176173
}
177174
let size = end - start;
178175
offsets.push(offsets[row_index] + size);
179176
}
180177

181-
let data = mutable.freeze();
178+
// Materialize values from underlying array with take
179+
let indices_array: ArrayRef = if O::IS_LARGE {
180+
Arc::new(UInt64Array::from(
181+
indices
182+
.iter()
183+
.map(|i| i.as_usize() as u64)
184+
.collect::<Vec<_>>(),
185+
))
186+
} else {
187+
Arc::new(UInt32Array::from(
188+
indices
189+
.iter()
190+
.map(|i| i.as_usize() as u32)
191+
.collect::<Vec<_>>(),
192+
))
193+
};
194+
let values = take(&values, &indices_array, None)?;
182195
Ok(Arc::new(GenericListArray::<O>::try_new(
183196
Arc::clone(field),
184197
OffsetBuffer::<O>::new(offsets.into()),
185-
arrow::array::make_array(data),
198+
values,
186199
array.nulls().cloned(),
187200
)?))
188201
}
@@ -231,7 +244,7 @@ fn list_view_reverse<O: OffsetSizeTrait>(
231244

232245
// Materialize values from underlying array with take
233246
let indices_array: ArrayRef = if O::IS_LARGE {
234-
Arc::new(arrow::array::UInt64Array::from(
247+
Arc::new(UInt64Array::from(
235248
indices
236249
.iter()
237250
.map(|i| i.as_usize() as u64)
@@ -245,13 +258,12 @@ fn list_view_reverse<O: OffsetSizeTrait>(
245258
.collect::<Vec<_>>(),
246259
))
247260
};
248-
let values_reversed = take(&values, &indices_array, None)?;
249-
261+
let values = take(&values, &indices_array, None)?;
250262
Ok(Arc::new(GenericListViewArray::<O>::try_new(
251263
Arc::clone(field),
252264
ScalarBuffer::from(new_offsets),
253265
ScalarBuffer::from(new_sizes),
254-
values_reversed,
266+
values,
255267
array.nulls().cloned(),
256268
)?))
257269
}
@@ -261,30 +273,29 @@ fn fixed_size_array_reverse(
261273
field: &FieldRef,
262274
) -> Result<ArrayRef> {
263275
let values = array.values();
264-
let original_data = values.to_data();
265-
let capacity = Capacities::Array(original_data.len());
266-
let mut mutable =
267-
MutableArrayData::with_capacities(vec![&original_data], false, capacity);
268276
let value_length = array.value_length() as usize;
277+
let mut indices: Vec<u64> = Vec::with_capacity(values.len());
269278

270279
for row_index in 0..array.len() {
271280
// skip the null value
272281
if array.is_null(row_index) {
273-
mutable.extend(0, 0, value_length);
282+
indices.push(0);
274283
continue;
275284
}
276285
let start = row_index * value_length;
277286
let end = start + value_length;
278287
for idx in (start..end).rev() {
279-
mutable.extend(0, idx, idx + 1);
288+
indices.push(idx as u64);
280289
}
281290
}
282291

283-
let data = mutable.freeze();
292+
// Materialize values from underlying array with take
293+
let indices_array: ArrayRef = Arc::new(UInt64Array::from(indices));
294+
let values = take(&values, &indices_array, None)?;
284295
Ok(Arc::new(FixedSizeListArray::try_new(
285296
Arc::clone(field),
286297
array.value_length(),
287-
arrow::array::make_array(data),
298+
values,
288299
array.nulls().cloned(),
289300
)?))
290301
}

0 commit comments

Comments
 (0)