Skip to content

Commit

Permalink
fix: Recursively align when converting to ArrowArray (#19097)
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite authored Oct 7, 2024
1 parent c4a28ae commit 1a9b224
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 4 deletions.
2 changes: 2 additions & 0 deletions crates/polars-arrow/src/array/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ pub(crate) trait FromFfi<T: ffi::ArrowArrayRef>: Sized {
macro_rules! ffi_dyn {
($array:expr, $ty:ty) => {{
let array = $array.as_any().downcast_ref::<$ty>().unwrap();

(
array.offset().unwrap(),
array.buffers(),
Expand All @@ -54,6 +55,7 @@ type BuffersChildren = (

pub fn offset_buffers_children_dictionary(array: &dyn Array) -> BuffersChildren {
use PhysicalType::*;

match array.dtype().to_physical_type() {
Null => ffi_dyn!(array, NullArray),
Boolean => ffi_dyn!(array, BooleanArray),
Expand Down
15 changes: 11 additions & 4 deletions crates/polars-arrow/src/ffi/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::datatypes::{ArrowDataType, PhysicalType};
use crate::ffi::schema::get_child;
use crate::storage::SharedStorage;
use crate::types::NativeType;
use crate::{match_integer_type, with_match_primitive_type_full};
use crate::{ffi, match_integer_type, with_match_primitive_type_full};

/// Reads a valid `ffi` interface into a `Box<dyn Array>`
/// # Errors
Expand Down Expand Up @@ -140,12 +140,19 @@ impl ArrowArray {

let children_ptr = children
.into_iter()
.map(|child| Box::into_raw(Box::new(ArrowArray::new(child))))
.map(|child| {
Box::into_raw(Box::new(ArrowArray::new(ffi::align_to_c_data_interface(
child,
))))
})
.collect::<Box<_>>();
let n_children = children_ptr.len() as i64;

let dictionary_ptr =
dictionary.map(|array| Box::into_raw(Box::new(ArrowArray::new(array))));
let dictionary_ptr = dictionary.map(|array| {
Box::into_raw(Box::new(ArrowArray::new(ffi::align_to_c_data_interface(
array,
))))
});

let length = array.len() as i64;
let null_count = array.null_count() as i64;
Expand Down
1 change: 1 addition & 0 deletions crates/polars-arrow/src/ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ mod schema;
mod stream;

pub(crate) use array::{try_from, ArrowArrayRef, InternalArrowArray};
pub(crate) use bridge::align_to_c_data_interface;
pub use generated::{ArrowArray, ArrowArrayStream, ArrowSchema};
use polars_error::PolarsResult;
pub use stream::{export_iterator, ArrowArrayStreamReader};
Expand Down
8 changes: 8 additions & 0 deletions py-polars/tests/unit/interop/test_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,3 +766,11 @@ def test_df_pycapsule_interface() -> None:
df2 = pl.from_arrow(out)
assert isinstance(df2, pl.DataFrame)
assert df.equals(df2)


def test_misaligned_nested_arrow_19097() -> None:
a = pl.Series("a", [1, 2, 3])
a = a.slice(1, 2) # by slicing we offset=1 the values
a = a.replace(2, None) # then we add a validity mask with offset=0
a = a.reshape((2, 1)) # then we make it nested
assert_series_equal(pl.Series("a", a.to_arrow()), a)

0 comments on commit 1a9b224

Please sign in to comment.