diff --git a/arrow/src/array/array_boolean.rs b/arrow/src/array/array_boolean.rs index 37080fe5a8c7..824f7c0832cd 100644 --- a/arrow/src/array/array_boolean.rs +++ b/arrow/src/array/array_boolean.rs @@ -22,6 +22,7 @@ use std::{any::Any, fmt}; use super::*; use super::{array::print_long_array, raw_pointer::RawPtrBox}; +use crate::bitmap::Bitmap; use crate::buffer::{Buffer, MutableBuffer}; use crate::util::bit_util; @@ -54,6 +55,12 @@ pub struct BooleanArray { raw_values: RawPtrBox, } +impl Clone for BooleanArray { + fn clone(&self) -> Self { + Self::from(self.data.clone()) + } +} + impl fmt::Debug for BooleanArray { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "BooleanArray\n[\n")?; @@ -103,6 +110,12 @@ impl BooleanArray { debug_assert!(i < self.len()); unsafe { self.value_unchecked(i) } } + + /// Returns (_, _, offset, length) + pub fn into_parts(self) -> (Buffer, Option, usize, usize) { + let data = self.data; + data.into_1_dimensional_parts() + } } impl Array for BooleanArray { diff --git a/arrow/src/array/array_primitive.rs b/arrow/src/array/array_primitive.rs index 76c1fcfe8c53..79919e41c210 100644 --- a/arrow/src/array/array_primitive.rs +++ b/arrow/src/array/array_primitive.rs @@ -57,6 +57,12 @@ pub struct PrimitiveArray { raw_values: RawPtrBox, } +impl Clone for PrimitiveArray { + fn clone(&self) -> Self { + Self::from(self.data.clone()) + } +} + impl PrimitiveArray { /// Returns the length of this array. #[inline] @@ -140,6 +146,10 @@ impl PrimitiveArray { ); PrimitiveArray::from(data) } + + pub fn into_data(self) -> ArrayData { + self.data + } } impl Array for PrimitiveArray { diff --git a/arrow/src/array/builder.rs b/arrow/src/array/builder.rs index 049fd7389f2d..4afd10c64a81 100644 --- a/arrow/src/array/builder.rs +++ b/arrow/src/array/builder.rs @@ -20,6 +20,7 @@ //! as an internal buffer in an [`ArrayData`](crate::array::ArrayData) //! object. +// use core::slice::SlicePattern; use std::any::Any; use std::collections::HashMap; use std::fmt; @@ -86,6 +87,12 @@ pub struct BufferBuilder { _marker: PhantomData, } +impl Default for BufferBuilder { + fn default() -> Self { + Self::new(0) + } +} + impl BufferBuilder { /// Creates a new builder with initial capacity for _at least_ `capacity` /// elements of type `T`. @@ -134,6 +141,16 @@ impl BufferBuilder { self.len } + #[allow(missing_docs)] + pub fn typed_data_mut(&mut self) -> &mut [T] { + // TODO: Make faster. + unsafe { + let (_prefix, offsets, _suffix) = + self.buffer.as_slice_mut().align_to_mut::(); + offsets + } + } + /// Returns whether the internal buffer is empty. /// /// # Example: @@ -297,11 +314,23 @@ impl BooleanBufferBuilder { Self { buffer, len: 0 } } + #[inline] + pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> BooleanBufferBuilder { + assert_eq!(len.div_ceil(8), buffer.len()); + Self { buffer, len } + } + #[inline] pub fn len(&self) -> usize { self.len } + #[inline] + pub fn get_bit(&self, index: usize) -> bool { + bit_util::get_bit(self.buffer.as_ref(), index) + } + + // TODO: Probably, make set_bit be branchless #[inline] pub fn set_bit(&mut self, index: usize, v: bool) { if v { @@ -382,6 +411,12 @@ impl BooleanBufferBuilder { self.len = 0; buf.into() } + + #[inline] + /// Builds the [Buffer] without resetting the builder. + pub fn finish_cloned(&self) -> Buffer { + Buffer::from_slice_ref(&self.buffer.as_slice()) + } } impl From for Buffer { diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index 2c957d29077e..41ae6fc29f40 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -478,6 +478,15 @@ impl ArrayData { Self::new(data_type.clone(), 0, Some(0), None, 0, buffers, child_data) } + + pub fn into_1_dimensional_parts(self) -> (Buffer, Option, usize, usize) { + let offset: usize = self.offset; + let length: usize = self.len; + let buffers: Vec = self.buffers; + let bitmap: Option = self.null_bitmap; + let buffer0: Buffer = buffers.into_iter().next().unwrap(); + (buffer0, bitmap, offset, length) + } } impl PartialEq for ArrayData { diff --git a/arrow/src/bitmap.rs b/arrow/src/bitmap.rs index 599dabefc9fb..cbe821668a85 100644 --- a/arrow/src/bitmap.rs +++ b/arrow/src/bitmap.rs @@ -74,6 +74,10 @@ impl Bitmap { pub fn get_array_memory_size(&self) -> usize { self.bits.capacity() + mem::size_of_val(self) } + + pub fn make_iter<'a>(&'a self, offset: usize, len: usize) -> bit_util::BitsIter<'a> { + bit_util::BitsIter::new(self.bits.as_slice(), offset, len) + } } impl<'a, 'b> BitAnd<&'b Bitmap> for &'a Bitmap { diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index e93ea51a4d8e..34b24dbfd588 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -32,8 +32,8 @@ use crate::buffer::MutableBuffer; #[cfg(not(feature = "simd"))] use crate::compute::kernels::arity::unary; use crate::compute::util::combine_option_bitmap; -use crate::datatypes; use crate::datatypes::ArrowNumericType; +use crate::datatypes::{self, ArrowPrimitiveType, DataType}; use crate::error::{ArrowError, Result}; use crate::{array::*, util::bit_util}; use num::traits::Pow; @@ -155,6 +155,21 @@ pub fn math_op( where T: ArrowNumericType, F: Fn(T::Native, T::Native) -> T::Native, +{ + math_op_with_data_type(T::DATA_TYPE, left, right, op) +} + +/// Like `math_op` but builds a PrimitiveArray with the supplied data type. +pub fn math_op_with_data_type( + data_type: DataType, + left: &PrimitiveArray, + right: &PrimitiveArray, + op: F, +) -> Result> +where + T: ArrowPrimitiveType, + U: ArrowPrimitiveType, + F: Fn(T::Native, U::Native) -> T::Native, { if left.len() != right.len() { return Err(ArrowError::ComputeError( @@ -178,7 +193,7 @@ where let buffer = unsafe { Buffer::from_trusted_len_iter(values) }; let data = ArrayData::new( - T::DATA_TYPE, + data_type, left.len(), None, null_bit_buffer, diff --git a/arrow/src/util/bit_util.rs b/arrow/src/util/bit_util.rs index f643d593fabd..54677e5deec8 100644 --- a/arrow/src/util/bit_util.rs +++ b/arrow/src/util/bit_util.rs @@ -107,6 +107,40 @@ pub fn ceil(value: usize, divisor: usize) -> usize { } } +#[derive(Debug)] +pub struct BitsIter<'a> { + bytes: &'a [u8], + offset: usize, + end_offset: usize, +} + +impl<'a> BitsIter<'a> { + pub fn new(bytes: &'a [u8], offset: usize, len: usize) -> BitsIter<'a> { + let end_offset = offset + len; + if end_offset < offset || end_offset.div_ceil(8) > bytes.len() { + panic!("BitsIter::new called with invalid offset or len. offset: {}, len: {}, bytes.len(): {}", offset, len, bytes.len()); + } + BitsIter { + bytes, + offset, + end_offset, + } + } +} + +impl<'a> Iterator for BitsIter<'a> { + type Item = bool; + fn next(&mut self) -> Option { + if self.offset == self.end_offset { + None + } else { + let bit = get_bit(self.bytes, self.offset); + self.offset += 1; + Some(bit) + } + } +} + /// Performs SIMD bitwise binary operations. /// /// # Safety