Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into array/union/const…
Browse files Browse the repository at this point in the history
…ructor
  • Loading branch information
tustvold committed May 8, 2024
2 parents 5b77085 + 68d1eef commit 865a560
Show file tree
Hide file tree
Showing 111 changed files with 5,910 additions and 3,064 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ jobs:
rm website/build/artifact.tar
cp .asf.yaml ./website/build/.asf.yaml
- name: Deploy to gh-pages
uses: peaceiris/actions-gh-pages@v3.9.3
uses: peaceiris/actions-gh-pages@v4.0.0
if: github.event_name == 'push' && github.ref_name == 'master'
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ jobs:
with:
path: rust
fetch-depth: 0
# Workaround https://github.com/rust-lang/jobserver-rs/issues/87
# Can be removed once https://github.com/rust-lang/jobserver-rs/pull/88 is released
- name: Downgrade jobserver
working-directory: rust
run: cargo update -p cc --precise 1.0.94 && cargo update -p jobserver --precise 0.1.28
- name: Build
run: conda run --no-capture-output ci/scripts/integration_arrow_build.sh $PWD /build
- name: Run
Expand Down
3 changes: 2 additions & 1 deletion arrow-arith/src/arity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use arrow_array::builder::BufferBuilder;
use arrow_array::types::ArrowDictionaryKeyType;
use arrow_array::*;
use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::ArrowNativeType;
use arrow_buffer::{Buffer, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::ArrowError;
Expand Down Expand Up @@ -386,7 +387,7 @@ where
O: ArrowPrimitiveType,
F: Fn(A::Item, B::Item) -> Result<O::Native, ArrowError>,
{
let mut buffer = MutableBuffer::new(len * O::get_byte_width());
let mut buffer = MutableBuffer::new(len * O::Native::get_byte_width());
for idx in 0..len {
unsafe {
buffer.push_unchecked(op(a.value_unchecked(idx), b.value_unchecked(idx))?);
Expand Down
3 changes: 3 additions & 0 deletions arrow-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ num = { version = "0.4.1", default-features = false, features = ["std"] }
half = { version = "2.1", default-features = false, features = ["num-traits"] }
hashbrown = { version = "0.14", default-features = false }

[features]
ffi = ["arrow-schema/ffi", "arrow-data/ffi"]

[dev-dependencies]
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
criterion = { version = "0.5", default-features = false }
Expand Down
109 changes: 105 additions & 4 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,18 @@ impl<T: ByteViewType + ?Sized> From<GenericByteViewArray<T>> for ArrayData {
}
}

impl<'a, Ptr, T> FromIterator<&'a Option<Ptr>> for GenericByteViewArray<T>
where
Ptr: AsRef<T::Native> + 'a,
T: ByteViewType + ?Sized,
{
fn from_iter<I: IntoIterator<Item = &'a Option<Ptr>>>(iter: I) -> Self {
iter.into_iter()
.map(|o| o.as_ref().map(|p| p.as_ref()))
.collect()
}
}

impl<Ptr, T: ByteViewType + ?Sized> FromIterator<Option<Ptr>> for GenericByteViewArray<T>
where
Ptr: AsRef<T::Native>,
Expand All @@ -400,7 +412,35 @@ where
/// ```
pub type BinaryViewArray = GenericByteViewArray<BinaryViewType>;

/// A [`GenericByteViewArray`] that stores uf8 data
impl BinaryViewArray {
/// Convert the [`BinaryViewArray`] to [`StringViewArray`]
/// If items not utf8 data, validate will fail and error returned.
pub fn to_string_view(self) -> Result<StringViewArray, ArrowError> {
StringViewType::validate(self.views(), self.data_buffers())?;
unsafe { Ok(self.to_string_view_unchecked()) }
}

/// Convert the [`BinaryViewArray`] to [`StringViewArray`]
/// # Safety
/// Caller is responsible for ensuring that items in array are utf8 data.
pub unsafe fn to_string_view_unchecked(self) -> StringViewArray {
StringViewArray::new_unchecked(self.views, self.buffers, self.nulls)
}
}

impl From<Vec<&[u8]>> for BinaryViewArray {
fn from(v: Vec<&[u8]>) -> Self {
Self::from_iter_values(v)
}
}

impl From<Vec<Option<&[u8]>>> for BinaryViewArray {
fn from(v: Vec<Option<&[u8]>>) -> Self {
v.into_iter().collect()
}
}

/// A [`GenericByteViewArray`] that stores utf8 data
///
/// # Example
/// ```
Expand All @@ -411,21 +451,46 @@ pub type BinaryViewArray = GenericByteViewArray<BinaryViewType>;
/// ```
pub type StringViewArray = GenericByteViewArray<StringViewType>;

impl StringViewArray {
/// Convert the [`StringViewArray`] to [`BinaryViewArray`]
pub fn to_binary_view(self) -> BinaryViewArray {
unsafe { BinaryViewArray::new_unchecked(self.views, self.buffers, self.nulls) }
}
}

impl From<Vec<&str>> for StringViewArray {
fn from(v: Vec<&str>) -> Self {
Self::from_iter_values(v)
}
}

impl From<Vec<Option<&str>>> for StringViewArray {
fn from(v: Vec<Option<&str>>) -> Self {
v.into_iter().collect()
}
}

impl From<Vec<String>> for StringViewArray {
fn from(v: Vec<String>) -> Self {
Self::from_iter_values(v)
}
}

impl From<Vec<Option<String>>> for StringViewArray {
fn from(v: Vec<Option<String>>) -> Self {
v.into_iter().collect()
}
}

#[cfg(test)]
mod tests {
use crate::builder::StringViewBuilder;
use crate::builder::{BinaryViewBuilder, StringViewBuilder};
use crate::{Array, BinaryViewArray, StringViewArray};
use arrow_buffer::{Buffer, ScalarBuffer};
use arrow_data::ByteView;

#[test]
fn try_new() {
fn try_new_string() {
let array = StringViewArray::from_iter_values(vec![
"hello",
"world",
Expand All @@ -434,7 +499,10 @@ mod tests {
]);
assert_eq!(array.value(0), "hello");
assert_eq!(array.value(3), "large payload over 12 bytes");
}

#[test]
fn try_new_binary() {
let array = BinaryViewArray::from_iter_values(vec![
b"hello".as_slice(),
b"world".as_slice(),
Expand All @@ -443,14 +511,30 @@ mod tests {
]);
assert_eq!(array.value(0), b"hello");
assert_eq!(array.value(3), b"large payload over 12 bytes");
}

#[test]
fn try_new_empty_string() {
// test empty array
let array = {
let mut builder = StringViewBuilder::new();
builder.finish()
};
assert!(array.is_empty());
}

#[test]
fn try_new_empty_binary() {
// test empty array
let array = {
let mut builder = BinaryViewBuilder::new();
builder.finish()
};
assert!(array.is_empty());
}

#[test]
fn test_append_string() {
// test builder append
let array = {
let mut builder = StringViewBuilder::new();
Expand All @@ -462,8 +546,25 @@ mod tests {
assert_eq!(array.value(0), "hello");
assert!(array.is_null(1));
assert_eq!(array.value(2), "large payload over 12 bytes");
}

#[test]
fn test_append_binary() {
// test builder append
let array = {
let mut builder = BinaryViewBuilder::new();
builder.append_value(b"hello");
builder.append_null();
builder.append_option(Some(b"large payload over 12 bytes"));
builder.finish()
};
assert_eq!(array.value(0), b"hello");
assert!(array.is_null(1));
assert_eq!(array.value(2), b"large payload over 12 bytes");
}

// test builder's in_progress re-created
#[test]
fn test_in_progress_recreation() {
let array = {
// make a builder with small block size.
let mut builder = StringViewBuilder::new().with_block_size(14);
Expand Down
37 changes: 26 additions & 11 deletions arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,16 +152,22 @@ impl FixedSizeListArray {
ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {}", size))
})?;

let len = values.len() / s.max(1);
if let Some(n) = nulls.as_ref() {
if n.len() != len {
return Err(ArrowError::InvalidArgumentError(format!(
"Incorrect length of null buffer for FixedSizeListArray, expected {} got {}",
len,
n.len(),
)));
let len = match s {
0 => nulls.as_ref().map(|x| x.len()).unwrap_or_default(),
_ => {
let len = values.len() / s.max(1);
if let Some(n) = nulls.as_ref() {
if n.len() != len {
return Err(ArrowError::InvalidArgumentError(format!(
"Incorrect length of null buffer for FixedSizeListArray, expected {} got {}",
len,
n.len(),
)));
}
}
len
}
}
};

if field.data_type() != values.data_type() {
return Err(ArrowError::InvalidArgumentError(format!(
Expand Down Expand Up @@ -460,7 +466,7 @@ mod tests {

use crate::cast::AsArray;
use crate::types::Int32Type;
use crate::Int32Array;
use crate::{new_empty_array, Int32Array};

use super::*;

Expand Down Expand Up @@ -656,7 +662,7 @@ mod tests {
);

let list = FixedSizeListArray::new(field.clone(), 0, values.clone(), None);
assert_eq!(list.len(), 6);
assert_eq!(list.len(), 0);

let nulls = NullBuffer::new_null(2);
let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls)).unwrap_err();
Expand All @@ -674,4 +680,13 @@ mod tests {
let err = FixedSizeListArray::try_new(field, 2, values, None).unwrap_err();
assert_eq!(err.to_string(), "Invalid argument error: FixedSizeListArray expected data type Int64 got Int32 for \"item\"");
}

#[test]
fn empty_fixed_size_list() {
let field = Arc::new(Field::new("item", DataType::Int32, true));
let nulls = NullBuffer::new_null(2);
let values = new_empty_array(&DataType::Int32);
let list = FixedSizeListArray::new(field.clone(), 0, values, Some(nulls));
assert_eq!(list.len(), 2);
}
}
7 changes: 5 additions & 2 deletions arrow-array/src/array/null_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,11 @@ impl NullArray {
}

/// Returns a new null array builder
pub fn builder(capacity: usize) -> NullBuilder {
NullBuilder::with_capacity(capacity)
///
/// Note that the `capacity` parameter to this function is _deprecated_. It
/// now does nothing, and will be removed in a future version.
pub fn builder(_capacity: usize) -> NullBuilder {
NullBuilder::new()
}
}

Expand Down
Loading

0 comments on commit 865a560

Please sign in to comment.