Skip to content

Commit

Permalink
Implement take kernel for byte view array. (#5602)
Browse files Browse the repository at this point in the history
* impl take kernel for byte view array.

* Add unit tests.

* Use ArrayData equality

* Rename to byte_view

---------

Co-authored-by: Raphael Taylor-Davies <[email protected]>
  • Loading branch information
RinChanNOWWW and tustvold authored Apr 9, 2024
1 parent 16f4a7f commit 144c9c7
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 0 deletions.
36 changes: 36 additions & 0 deletions arrow-array/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,34 @@ pub trait AsArray: private::Sealed {
self.as_bytes_opt().expect("binary array")
}

/// Downcast this to a [`StringViewArray`] returning `None` if not possible
fn as_string_view(&self) -> &StringViewArray {
self.as_byte_view_opt().expect("string view array")
}

/// Downcast this to a [`StringViewArray`] returning `None` if not possible
fn as_string_view_opt(&self) -> Option<&StringViewArray> {
self.as_byte_view_opt()
}

/// Downcast this to a [`StringViewArray`] returning `None` if not possible
fn as_binary_view(&self) -> &BinaryViewArray {
self.as_byte_view_opt().expect("binary view array")
}

/// Downcast this to a [`BinaryViewArray`] returning `None` if not possible
fn as_binary_view_opt(&self) -> Option<&BinaryViewArray> {
self.as_byte_view_opt()
}

/// Downcast this to a [`GenericByteViewArray`] returning `None` if not possible
fn as_byte_view<T: ByteViewType>(&self) -> &GenericByteViewArray<T> {
self.as_byte_view_opt().expect("byte view array")
}

/// Downcast this to a [`GenericByteViewArray`] returning `None` if not possible
fn as_byte_view_opt<T: ByteViewType>(&self) -> Option<&GenericByteViewArray<T>>;

/// Downcast this to a [`StructArray`] returning `None` if not possible
fn as_struct_opt(&self) -> Option<&StructArray>;

Expand Down Expand Up @@ -852,6 +880,10 @@ impl AsArray for dyn Array + '_ {
self.as_any().downcast_ref()
}

fn as_byte_view_opt<T: ByteViewType>(&self) -> Option<&GenericByteViewArray<T>> {
self.as_any().downcast_ref()
}

fn as_struct_opt(&self) -> Option<&StructArray> {
self.as_any().downcast_ref()
}
Expand Down Expand Up @@ -899,6 +931,10 @@ impl AsArray for ArrayRef {
self.as_ref().as_bytes_opt()
}

fn as_byte_view_opt<T: ByteViewType>(&self) -> Option<&GenericByteViewArray<T>> {
self.as_ref().as_byte_view_opt()
}

fn as_struct_opt(&self) -> Option<&StructArray> {
self.as_ref().as_struct_opt()
}
Expand Down
67 changes: 67 additions & 0 deletions arrow-select/src/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
DataType::LargeUtf8 => {
Ok(Arc::new(take_bytes(values.as_string::<i64>(), indices)?))
}
DataType::Utf8View => {
Ok(Arc::new(take_byte_view(values.as_string_view(), indices)?))
}
DataType::List(_) => {
Ok(Arc::new(take_list::<_, Int32Type>(values.as_list(), indices)?))
}
Expand Down Expand Up @@ -204,6 +207,9 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
DataType::LargeBinary => {
Ok(Arc::new(take_bytes(values.as_binary::<i64>(), indices)?))
}
DataType::BinaryView => {
Ok(Arc::new(take_byte_view(values.as_binary_view(), indices)?))
}
DataType::FixedSizeBinary(size) => {
let values = values
.as_any()
Expand Down Expand Up @@ -437,6 +443,20 @@ fn take_bytes<T: ByteArrayType, IndexType: ArrowPrimitiveType>(
Ok(GenericByteArray::from(array_data))
}

/// `take` implementation for byte view arrays
fn take_byte_view<T: ByteViewType, IndexType: ArrowPrimitiveType>(
array: &GenericByteViewArray<T>,
indices: &PrimitiveArray<IndexType>,
) -> Result<GenericByteViewArray<T>, ArrowError> {
let new_views = take_native(array.views(), indices);
let new_nulls = take_nulls(array.nulls(), indices);
Ok(GenericByteViewArray::new(
new_views,
array.data_buffers().to_vec(),
new_nulls,
))
}

/// `take` implementation for list arrays
///
/// Calculates the index and indexed offset for the inner array,
Expand Down Expand Up @@ -1424,6 +1444,53 @@ mod tests {
assert_eq!(result.as_ref(), &expected);
}

fn _test_byte_view<T>()
where
T: ByteViewType,
str: AsRef<T::Native>,
T::Native: PartialEq,
{
let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(4), Some(2)]);
let array = {
// ["hello", "world", null, "large payload over 12 bytes", "lulu"]
let mut builder = GenericByteViewBuilder::<T>::new();
builder.append_value("hello");
builder.append_value("world");
builder.append_null();
builder.append_value("large payload over 12 bytes");
builder.append_value("lulu");
builder.finish()
};

let actual = take(&array, &index, None).unwrap();

assert_eq!(actual.len(), index.len());

let expected = {
// ["large payload over 12 bytes", null, "world", "large payload over 12 bytes", "lulu", null]
let mut builder = GenericByteViewBuilder::<T>::new();
builder.append_value("large payload over 12 bytes");
builder.append_null();
builder.append_value("world");
builder.append_value("large payload over 12 bytes");
builder.append_value("lulu");
builder.append_null();
builder.finish()
};

assert_eq!(actual.as_ref(), &expected);
}

#[test]
fn test_take_string_view() {
_test_byte_view::<StringViewType>()
}

#[test]
fn test_take_binary_view() {
_test_byte_view::<BinaryViewType>()
}

macro_rules! test_take_list {
($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{
// Construct a value array, [[0,0,0], [-1,-2,-1], [], [2,3]]
Expand Down

0 comments on commit 144c9c7

Please sign in to comment.