Skip to content

Commit

Permalink
Memory management and access features for primitive arrays and bit ar…
Browse files Browse the repository at this point in the history
…rays

* Add BitsIter and Bitmap::make_iter

* Add math_op_with_data_type

* Add Clone implementations for BooleanArray and PrimitiveArray

* Add functionality to BooleanBufferBuilder and BufferBuilder

* Add into_parts-type functions for consuming PrimitiveArray and BooleanArray
  • Loading branch information
srh authored Nov 26, 2024
1 parent ea7d119 commit b6c25a9
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 2 deletions.
13 changes: 13 additions & 0 deletions arrow/src/array/array_boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use std::{any::Any, fmt};

use super::*;
use super::{array::print_long_array, raw_pointer::RawPtrBox};
use crate::bitmap::Bitmap;
use crate::buffer::{Buffer, MutableBuffer};
use crate::util::bit_util;

Expand Down Expand Up @@ -54,6 +55,12 @@ pub struct BooleanArray {
raw_values: RawPtrBox<u8>,
}

impl Clone for BooleanArray {
fn clone(&self) -> Self {
Self::from(self.data.clone())
}
}

impl fmt::Debug for BooleanArray {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "BooleanArray\n[\n")?;
Expand Down Expand Up @@ -103,6 +110,12 @@ impl BooleanArray {
debug_assert!(i < self.len());
unsafe { self.value_unchecked(i) }
}

/// Returns (_, _, offset, length)
pub fn into_parts(self) -> (Buffer, Option<Bitmap>, usize, usize) {
let data = self.data;
data.into_1_dimensional_parts()
}
}

impl Array for BooleanArray {
Expand Down
10 changes: 10 additions & 0 deletions arrow/src/array/array_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ pub struct PrimitiveArray<T: ArrowPrimitiveType> {
raw_values: RawPtrBox<T::Native>,
}

impl<T: ArrowPrimitiveType> Clone for PrimitiveArray<T> {
fn clone(&self) -> Self {
Self::from(self.data.clone())
}
}

impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
/// Returns the length of this array.
#[inline]
Expand Down Expand Up @@ -140,6 +146,10 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
);
PrimitiveArray::from(data)
}

pub fn into_data(self) -> ArrayData {
self.data
}
}

impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
Expand Down
35 changes: 35 additions & 0 deletions arrow/src/array/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
//! as an internal buffer in an [`ArrayData`](crate::array::ArrayData)
//! object.
// use core::slice::SlicePattern;
use std::any::Any;
use std::collections::HashMap;
use std::fmt;
Expand Down Expand Up @@ -86,6 +87,12 @@ pub struct BufferBuilder<T: ArrowNativeType> {
_marker: PhantomData<T>,
}

impl<T: ArrowNativeType> Default for BufferBuilder<T> {
fn default() -> Self {
Self::new(0)
}
}

impl<T: ArrowNativeType> BufferBuilder<T> {
/// Creates a new builder with initial capacity for _at least_ `capacity`
/// elements of type `T`.
Expand Down Expand Up @@ -134,6 +141,16 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
self.len
}

#[allow(missing_docs)]
pub fn typed_data_mut(&mut self) -> &mut [T] {
// TODO: Make faster.
unsafe {
let (_prefix, offsets, _suffix) =
self.buffer.as_slice_mut().align_to_mut::<T>();
offsets
}
}

/// Returns whether the internal buffer is empty.
///
/// # Example:
Expand Down Expand Up @@ -297,11 +314,23 @@ impl BooleanBufferBuilder {
Self { buffer, len: 0 }
}

#[inline]
pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> BooleanBufferBuilder {
assert_eq!(len.div_ceil(8), buffer.len());
Self { buffer, len }
}

#[inline]
pub fn len(&self) -> usize {
self.len
}

#[inline]
pub fn get_bit(&self, index: usize) -> bool {
bit_util::get_bit(self.buffer.as_ref(), index)
}

// TODO: Probably, make set_bit be branchless
#[inline]
pub fn set_bit(&mut self, index: usize, v: bool) {
if v {
Expand Down Expand Up @@ -382,6 +411,12 @@ impl BooleanBufferBuilder {
self.len = 0;
buf.into()
}

#[inline]
/// Builds the [Buffer] without resetting the builder.
pub fn finish_cloned(&self) -> Buffer {
Buffer::from_slice_ref(&self.buffer.as_slice())
}
}

impl From<BooleanBufferBuilder> for Buffer {
Expand Down
9 changes: 9 additions & 0 deletions arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,15 @@ impl ArrayData {

Self::new(data_type.clone(), 0, Some(0), None, 0, buffers, child_data)
}

pub fn into_1_dimensional_parts(self) -> (Buffer, Option<Bitmap>, usize, usize) {
let offset: usize = self.offset;
let length: usize = self.len;
let buffers: Vec<Buffer> = self.buffers;
let bitmap: Option<Bitmap> = self.null_bitmap;
let buffer0: Buffer = buffers.into_iter().next().unwrap();
(buffer0, bitmap, offset, length)
}
}

impl PartialEq for ArrayData {
Expand Down
4 changes: 4 additions & 0 deletions arrow/src/bitmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ impl Bitmap {
pub fn get_array_memory_size(&self) -> usize {
self.bits.capacity() + mem::size_of_val(self)
}

pub fn make_iter<'a>(&'a self, offset: usize, len: usize) -> bit_util::BitsIter<'a> {
bit_util::BitsIter::new(self.bits.as_slice(), offset, len)
}
}

impl<'a, 'b> BitAnd<&'b Bitmap> for &'a Bitmap {
Expand Down
19 changes: 17 additions & 2 deletions arrow/src/compute/kernels/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ use crate::buffer::MutableBuffer;
#[cfg(not(feature = "simd"))]
use crate::compute::kernels::arity::unary;
use crate::compute::util::combine_option_bitmap;
use crate::datatypes;
use crate::datatypes::ArrowNumericType;
use crate::datatypes::{self, ArrowPrimitiveType, DataType};
use crate::error::{ArrowError, Result};
use crate::{array::*, util::bit_util};
use num::traits::Pow;
Expand Down Expand Up @@ -155,6 +155,21 @@ pub fn math_op<T, F>(
where
T: ArrowNumericType,
F: Fn(T::Native, T::Native) -> T::Native,
{
math_op_with_data_type(T::DATA_TYPE, left, right, op)
}

/// Like `math_op` but builds a PrimitiveArray with the supplied data type.
pub fn math_op_with_data_type<T, U, F>(
data_type: DataType,
left: &PrimitiveArray<T>,
right: &PrimitiveArray<U>,
op: F,
) -> Result<PrimitiveArray<T>>
where
T: ArrowPrimitiveType,
U: ArrowPrimitiveType,
F: Fn(T::Native, U::Native) -> T::Native,
{
if left.len() != right.len() {
return Err(ArrowError::ComputeError(
Expand All @@ -178,7 +193,7 @@ where
let buffer = unsafe { Buffer::from_trusted_len_iter(values) };

let data = ArrayData::new(
T::DATA_TYPE,
data_type,
left.len(),
None,
null_bit_buffer,
Expand Down
34 changes: 34 additions & 0 deletions arrow/src/util/bit_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,40 @@ pub fn ceil(value: usize, divisor: usize) -> usize {
}
}

#[derive(Debug)]
pub struct BitsIter<'a> {
bytes: &'a [u8],
offset: usize,
end_offset: usize,
}

impl<'a> BitsIter<'a> {
pub fn new(bytes: &'a [u8], offset: usize, len: usize) -> BitsIter<'a> {
let end_offset = offset + len;
if end_offset < offset || end_offset.div_ceil(8) > bytes.len() {
panic!("BitsIter::new called with invalid offset or len. offset: {}, len: {}, bytes.len(): {}", offset, len, bytes.len());
}
BitsIter {
bytes,
offset,
end_offset,
}
}
}

impl<'a> Iterator for BitsIter<'a> {
type Item = bool;
fn next(&mut self) -> Option<bool> {
if self.offset == self.end_offset {
None
} else {
let bit = get_bit(self.bytes, self.offset);
self.offset += 1;
Some(bit)
}
}
}

/// Performs SIMD bitwise binary operations.
///
/// # Safety
Expand Down

0 comments on commit b6c25a9

Please sign in to comment.