From 8c76510d4f23e05472a8e945f810781fcbfb39c9 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Fri, 29 Nov 2024 01:13:53 +0200 Subject: [PATCH] add extend scalar for mutable array --- arrow-data/src/transform/mod.rs | 26 ++++++++++++++++++++++ arrow-select/src/zip.rs | 38 ++++++++------------------------- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs index edc68f1a107f..fb1b75da789c 100644 --- a/arrow-data/src/transform/mod.rs +++ b/arrow-data/src/transform/mod.rs @@ -701,6 +701,8 @@ impl<'a> MutableArrayData<'a> { } /// Extends the in progress array with a region of the input arrays + /// + /// For extending scalar value, use [MutableArrayData::extend_scalar]. /// /// # Arguments /// * `index` - the index of array that you what to copy values from @@ -718,6 +720,30 @@ impl<'a> MutableArrayData<'a> { self.data.len += len; } + /// Extends the in progress array with the same value from the input arrays + /// + /// # Arguments + /// * `index` - the index of array that you what to copy values from + /// * `scalar_index` - the index of the scalar value to copy + /// * `count` - the number of times to repeat the value + /// + /// # Panic + /// This function panics if there is an invalid index, + /// i.e. `index` >= the number of source arrays + /// or `start` + `count` > the length of the `index`th array + /// + pub fn extend_scalar(&mut self, index: usize, scalar_index: usize, count: usize) { + let f = &self.extend_null_bits[index]; + for _ in 0..count { + f(&mut self.data, scalar_index, 1); + } + let f = &self.extend_values[index]; + for _ in 0..count { + f(&mut self.data, index, scalar_index, 1); + } + self.data.len += count; + } + /// Extends the in progress array with null elements, ignoring the input arrays. /// /// # Panics diff --git a/arrow-select/src/zip.rs b/arrow-select/src/zip.rs index 38684f908699..8dc8a575ffe0 100644 --- a/arrow-select/src/zip.rs +++ b/arrow-select/src/zip.rs @@ -75,7 +75,7 @@ pub fn zip( (true, true) => zip_both_scalar(mask, &mut mutable), (true, false) => zip_truthy_scalar_falsy_array(mask, &mut mutable), (false, true) => zip_truthy_array_falsy_scalar(mask, &mut mutable), - (false, false) => zip_both_array(mask, &mut mutable) + (false, false) => zip_both_array(mask, &mut mutable), }; let data = mutable.freeze(); @@ -89,28 +89,18 @@ fn zip_both_scalar(mask: &BooleanArray, mutable: &mut MutableArrayData) { SlicesIterator::new(mask).for_each(|(start, end)| { // the gap needs to be filled with falsy values if start > filled { - for _ in filled..start { - // Copy the first item from the 'falsy' array into the output buffer. - mutable.extend(1, 0, 1); - } + mutable.extend_scalar(1, 0, start - filled); } // fill with truthy values - for _ in start..end { - // Copy the first item from the 'truthy' array into the output buffer. - mutable.extend(0, 0, 1); - } + mutable.extend_scalar(0, 0, end - start); filled = end; }); - - // the remaining part is falsy if filled < mask.len() { - for _ in filled..mask.len() { - // Copy the first item from the 'falsy' array into the output buffer. - mutable.extend(1, 0, 1); - } + // Copy the first item from the 'falsy' array into the output buffer. + mutable.extend_scalar(1, 0, mask.len() - filled); } } @@ -118,7 +108,6 @@ fn zip_truthy_scalar_falsy_array(mask: &BooleanArray, mutable: &mut MutableArray // keep track of how much is filled let mut filled = 0; - SlicesIterator::new(mask).for_each(|(start, end)| { // the gap needs to be filled with falsy values if start > filled { @@ -126,10 +115,7 @@ fn zip_truthy_scalar_falsy_array(mask: &BooleanArray, mutable: &mut MutableArray } // fill with truthy values - for _ in start..end { - // Copy the first item from the 'truthy' array into the output buffer. - mutable.extend(0, 0, 1); - } + mutable.extend_scalar(0, 0, end - start); filled = end; }); @@ -147,10 +133,7 @@ fn zip_truthy_array_falsy_scalar(mask: &BooleanArray, mutable: &mut MutableArray SlicesIterator::new(mask).for_each(|(start, end)| { // the gap needs to be filled with falsy values if start > filled { - for _ in filled..start { - // Copy the first item from the 'falsy' array into the output buffer. - mutable.extend(1, 0, 1); - } + mutable.extend_scalar(1, 0, start - filled); } // fill with truthy values @@ -159,13 +142,10 @@ fn zip_truthy_array_falsy_scalar(mask: &BooleanArray, mutable: &mut MutableArray filled = end; }); - // the remaining part is falsy if filled < mask.len() { - for _ in filled..mask.len() { - // Copy the first item from the 'falsy' array into the output buffer. - mutable.extend(1, 0, 1); - } + // Copy the first item from the 'falsy' array into the output buffer. + mutable.extend_scalar(1, 0, mask.len() - filled); } }