Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ impl BooleanArray {
///
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
/// I.e. that `size_hint().1` correctly reports its length. Note that this is a stronger
/// guarantee that `ExactSizeIterator` provides which could still report a wrong length.
/// guarantee than `ExactSizeIterator` provides, which could still report a wrong length.
///
/// # Panics
///
Expand All @@ -511,8 +511,9 @@ impl BooleanArray {
pub unsafe fn from_trusted_len_iter<I, P>(iter: I) -> Self
where
P: Into<BooleanAdapter>,
I: ExactSizeIterator<Item = P>,
I: IntoIterator<Item = P, IntoIter: ExactSizeIterator>,
{
let iter = iter.into_iter();
let data_len = iter.len();

let num_bytes = bit_util::ceil(data_len, 8);
Expand Down Expand Up @@ -715,7 +716,7 @@ mod tests {
let expected = v.clone().into_iter().collect::<BooleanArray>();
let actual = unsafe {
// SAFETY: `v` has trusted length
BooleanArray::from_trusted_len_iter(v.into_iter())
BooleanArray::from_trusted_len_iter(v)
};
assert_eq!(expected, actual);
}
Expand Down
14 changes: 10 additions & 4 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1458,18 +1458,24 @@ impl<T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>> FromIterator<Ptr> for P

impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
/// Creates a [`PrimitiveArray`] from an iterator of trusted length.
///
/// # Safety
///
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
/// I.e. that `size_hint().1` correctly reports its length.
/// I.e. that `size_hint().1` correctly reports its length. Note that this is a stronger
/// guarantee than `ExactSizeIterator` provides, which could still report a wrong length.
///
/// # Panics
///
/// Panics if the iterator does not report an upper bound on `size_hint()`.
#[inline]
pub unsafe fn from_trusted_len_iter<I, P>(iter: I) -> Self
where
P: std::borrow::Borrow<Option<<T as ArrowPrimitiveType>::Native>>,
I: IntoIterator<Item = P>,
I: IntoIterator<Item = P, IntoIter: ExactSizeIterator>,
{
let iterator = iter.into_iter();
let (_, upper) = iterator.size_hint();
let len = upper.expect("trusted_len_unzip requires an upper limit");
let len = iterator.len();

let (null, buffer) = unsafe { trusted_len_unzip(iterator) };

Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/builder/boolean_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ impl Extend<Option<bool>> for BooleanBuilder {
let buffered = iter.into_iter().collect::<Vec<_>>();
let array = unsafe {
// SAFETY: std::vec::IntoIter implements TrustedLen
BooleanArray::from_trusted_len_iter(buffered.into_iter())
BooleanArray::from_trusted_len_iter(buffered)
};
self.append_array(&array)
}
Expand Down
6 changes: 3 additions & 3 deletions arrow-cast/src/cast/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ pub(crate) fn parse_string_view<P: Parser>(
fn parse_string_iter<
'a,
P: Parser,
I: Iterator<Item = Option<&'a str>>,
I: ExactSizeIterator<Item = Option<&'a str>>,
F: FnOnce() -> Option<NullBuffer>,
>(
iter: I,
Expand Down Expand Up @@ -156,7 +156,7 @@ pub(crate) fn cast_view_to_timestamp<T: ArrowTimestampType>(

fn cast_string_to_timestamp_impl<
'a,
I: Iterator<Item = Option<&'a str>>,
I: ExactSizeIterator<Item = Option<&'a str>>,
T: ArrowTimestampType,
Tz: TimeZone,
>(
Expand Down Expand Up @@ -310,7 +310,7 @@ fn cast_string_to_interval_impl<'a, I, ArrowType, F>(
parse_function: F,
) -> Result<ArrayRef, ArrowError>
where
I: Iterator<Item = Option<&'a str>>,
I: ExactSizeIterator<Item = Option<&'a str>>,
ArrowType: ArrowPrimitiveType,
F: Fn(&str) -> Result<ArrowType::Native, ArrowError> + Copy,
{
Expand Down
36 changes: 19 additions & 17 deletions arrow/benches/array_from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,40 +206,42 @@ fn array_from_vec_benchmark(c: &mut Criterion) {
});
}

fn gen_option_vector<TItem: Copy>(item: TItem, len: usize) -> Vec<Option<TItem>> {
hint::black_box(
repeat_n(item, len)
.enumerate()
.map(|(idx, item)| if idx % 3 == 0 { None } else { Some(item) })
.collect(),
)
fn gen_option_iter<TItem: Clone + 'static>(
item: TItem,
len: usize,
) -> Box<dyn ExactSizeIterator<Item = Option<TItem>>> {
hint::black_box(Box::new(repeat_n(item, len).enumerate().map(
|(idx, item)| {
if idx % 3 == 0 { None } else { Some(item) }
},
)))
}

fn from_iter_benchmark(c: &mut Criterion) {
const ITER_LEN: usize = 16_384;

// All ArrowPrimitiveType use the same implementation
c.bench_function("Int64Array::from_iter", |b| {
let values = gen_option_vector(1, ITER_LEN);
b.iter(|| hint::black_box(Int64Array::from_iter(values.iter())));
b.iter(|| hint::black_box(Int64Array::from_iter(gen_option_iter(1, ITER_LEN))));
});
c.bench_function("Int64Array::from_trusted_len_iter", |b| {
let values = gen_option_vector(1, ITER_LEN);
b.iter(|| unsafe {
// SAFETY: values.iter() is a TrustedLenIterator
hint::black_box(Int64Array::from_trusted_len_iter(values.iter()))
// SAFETY: gen_option_iter returns a TrustedLen iterator
hint::black_box(Int64Array::from_trusted_len_iter(gen_option_iter(
1, ITER_LEN,
)))
});
});

c.bench_function("BooleanArray::from_iter", |b| {
let values = gen_option_vector(true, ITER_LEN);
b.iter(|| hint::black_box(BooleanArray::from_iter(values.iter())));
b.iter(|| hint::black_box(BooleanArray::from_iter(gen_option_iter(true, ITER_LEN))));
});
c.bench_function("BooleanArray::from_trusted_len_iter", |b| {
let values = gen_option_vector(true, ITER_LEN);
b.iter(|| unsafe {
// SAFETY: values.iter() is a TrustedLenIterator
hint::black_box(BooleanArray::from_trusted_len_iter(values.iter()))
// SAFETY: gen_option_iter returns a TrustedLen iterator
hint::black_box(BooleanArray::from_trusted_len_iter(gen_option_iter(
true, ITER_LEN,
)))
});
});
}
Expand Down
Loading