Skip to content

Commit 1da3e04

Browse files
committed
fix: Incorrect memory accounting in array_agg function
See this issue: #16517
1 parent b6c8cc5 commit 1da3e04

File tree

1 file changed

+39
-3
lines changed

1 file changed

+39
-3
lines changed

datafusion/functions-aggregate/src/array_agg.rs

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -341,12 +341,20 @@ impl Accumulator for ArrayAggAccumulator {
341341
Some(values) => {
342342
// Make sure we don't insert empty lists
343343
if !values.is_empty() {
344-
self.values.push(values);
344+
// The ArrayRef might be holding a reference to its original input buffer, so
345+
// storing it here directly copied/compacted avoids over accounting memory
346+
// not used here.
347+
self.values
348+
.push(make_array(copy_array_data(&values.to_data())));
345349
}
346350
}
347351
None => {
348352
for arr in list_arr.iter().flatten() {
349-
self.values.push(arr);
353+
// The ArrayRef might be holding a reference to its original input buffer, so
354+
// storing it here directly copied/compacted avoids over accounting memory
355+
// not used here.
356+
self.values
357+
.push(make_array(copy_array_data(&arr.to_data())));
350358
}
351359
}
352360
}
@@ -728,7 +736,7 @@ impl Accumulator for OrderSensitiveArrayAggAccumulator {
728736
mod tests {
729737
use super::*;
730738
use arrow::array::{ListBuilder, StringBuilder};
731-
use arrow::datatypes::{FieldRef, Schema};
739+
use arrow::datatypes::{FieldRef, Schema, UInt64Type};
732740
use datafusion_common::cast::as_generic_string_array;
733741
use datafusion_common::internal_err;
734742
use datafusion_physical_expr::expressions::Column;
@@ -994,6 +1002,34 @@ mod tests {
9941002
Ok(())
9951003
}
9961004

1005+
#[test]
1006+
fn does_not_over_account_memory_for_merge() -> Result<()> {
1007+
let (mut acc1, mut acc2) = ArrayAggAccumulatorBuilder::string().build_two()?;
1008+
1009+
let a1 = ListArray::from_iter_primitive::<UInt64Type, _, _>(vec![
1010+
Some(vec![Some(0), Some(1), Some(2)]),
1011+
Some(vec![Some(3)]),
1012+
None,
1013+
Some(vec![Some(4)]),
1014+
]);
1015+
let a2 = ListArray::from_iter_primitive::<UInt64Type, _, _>(vec![
1016+
Some(vec![Some(0), Some(1), Some(2)]),
1017+
Some(vec![Some(3)]),
1018+
None,
1019+
Some(vec![Some(4)]),
1020+
]);
1021+
1022+
acc1.merge_batch(&[Arc::new(a1.slice(0, 1))])?;
1023+
acc2.merge_batch(&[Arc::new(a2.slice(0, 1))])?;
1024+
1025+
acc1 = merge(acc1, acc2)?;
1026+
1027+
// without compaction, the size is 16812.
1028+
assert_eq!(acc1.size(), 556);
1029+
1030+
Ok(())
1031+
}
1032+
9971033
#[test]
9981034
fn does_not_over_account_memory() -> Result<()> {
9991035
let (mut acc1, mut acc2) = ArrayAggAccumulatorBuilder::string().build_two()?;

0 commit comments

Comments
 (0)