diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs index edc68f1a107f..5d5c0fdffd48 100644 --- a/arrow-data/src/transform/mod.rs +++ b/arrow-data/src/transform/mod.rs @@ -702,6 +702,8 @@ impl<'a> MutableArrayData<'a> { /// Extends the in progress array with a region of the input arrays /// + /// For extending scalar value, use [MutableArrayData::extend_n]. + /// /// # Arguments /// * `index` - the index of array that you what to copy values from /// * `start` - the start index of the chunk (inclusive) @@ -718,6 +720,28 @@ impl<'a> MutableArrayData<'a> { self.data.len += len; } + /// Extends the in progress array with the same value from the input arrays + /// + /// # Arguments + /// * `index` - the index of array that you what to copy values from + /// * `scalar_index` - the index of the scalar value to copy + /// * `count` - the number of times to repeat the value + /// + /// # Panic + /// This function panics if there is an invalid index, + /// i.e. `index` >= the number of source arrays + /// or `start` + `count` > the length of the `index`th array + /// + pub fn extend_n(&mut self, index: usize, scalar_index: usize, count: usize) { + let extend_null_fn = &self.extend_null_bits[index]; + let extend_value_fn = &self.extend_values[index]; + for _ in 0..count { + extend_null_fn(&mut self.data, scalar_index, 1); + extend_value_fn(&mut self.data, index, scalar_index, 1); + self.data.len += 1; + } + } + /// Extends the in progress array with null elements, ignoring the input arrays. /// /// # Panics diff --git a/arrow-select/src/zip.rs b/arrow-select/src/zip.rs index ff2380ef2420..e4fe03264b9d 100644 --- a/arrow-select/src/zip.rs +++ b/arrow-select/src/zip.rs @@ -70,46 +70,104 @@ pub fn zip( // the SlicesIterator slices only the true values. So the gaps left by this iterator we need to // fill with falsy values + // keep track of how much is filled + match (truthy_is_scalar, falsy_is_scalar) { + (true, true) => zip_both_scalar(mask, &mut mutable), + (true, false) => zip_truthy_scalar_falsy_array(mask, &mut mutable), + (false, true) => zip_truthy_array_falsy_scalar(mask, &mut mutable), + (false, false) => zip_both_array(mask, &mut mutable), + }; + + let data = mutable.freeze(); + Ok(make_array(data)) +} + +fn zip_both_scalar(mask: &BooleanArray, mutable: &mut MutableArrayData) { // keep track of how much is filled let mut filled = 0; SlicesIterator::new(mask).for_each(|(start, end)| { // the gap needs to be filled with falsy values if start > filled { - if falsy_is_scalar { - for _ in filled..start { - // Copy the first item from the 'falsy' array into the output buffer. - mutable.extend(1, 0, 1); - } - } else { - mutable.extend(1, filled, start); - } + mutable.extend_n(1, 0, start - filled); } // fill with truthy values - if truthy_is_scalar { - for _ in start..end { - // Copy the first item from the 'truthy' array into the output buffer. - mutable.extend(0, 0, 1); - } - } else { - mutable.extend(0, start, end); + mutable.extend_n(0, 0, end - start); + + filled = end; + }); + + // the remaining part is falsy + if filled < mask.len() { + // Copy the first item from the 'falsy' array into the output buffer. + mutable.extend_n(1, 0, mask.len() - filled); + } +} + +fn zip_truthy_scalar_falsy_array(mask: &BooleanArray, mutable: &mut MutableArrayData) { + // keep track of how much is filled + let mut filled = 0; + + SlicesIterator::new(mask).for_each(|(start, end)| { + // the gap needs to be filled with falsy values + if start > filled { + mutable.extend(1, filled, start); } + + // fill with truthy values + mutable.extend_n(0, 0, end - start); + filled = end; }); + // the remaining part is falsy if filled < mask.len() { - if falsy_is_scalar { - for _ in filled..mask.len() { - // Copy the first item from the 'falsy' array into the output buffer. - mutable.extend(1, 0, 1); - } - } else { - mutable.extend(1, filled, mask.len()); + mutable.extend(1, filled, mask.len()); + } +} + +fn zip_truthy_array_falsy_scalar(mask: &BooleanArray, mutable: &mut MutableArrayData) { + // keep track of how much is filled + let mut filled = 0; + + SlicesIterator::new(mask).for_each(|(start, end)| { + // the gap needs to be filled with falsy values + if start > filled { + mutable.extend_n(1, 0, start - filled); } + + // fill with truthy values + mutable.extend(0, start, end); + + filled = end; + }); + + // the remaining part is falsy + if filled < mask.len() { + // Copy the first item from the 'falsy' array into the output buffer. + mutable.extend_n(1, 0, mask.len() - filled); } +} - let data = mutable.freeze(); - Ok(make_array(data)) +fn zip_both_array(mask: &BooleanArray, mutable: &mut MutableArrayData) { + let mut filled = 0; + + SlicesIterator::new(mask).for_each(|(start, end)| { + // the gap needs to be filled with falsy values + if start > filled { + mutable.extend(1, filled, start); + } + + // fill with truthy values + mutable.extend(0, start, end); + + filled = end; + }); + + // the remaining part is falsy + if filled < mask.len() { + mutable.extend(1, filled, mask.len()); + } } #[cfg(test)]