diff --git a/crates/polars-arrow/src/array/ffi.rs b/crates/polars-arrow/src/array/ffi.rs index bf9844529b1f..977568406900 100644 --- a/crates/polars-arrow/src/array/ffi.rs +++ b/crates/polars-arrow/src/array/ffi.rs @@ -36,6 +36,7 @@ pub(crate) trait FromFfi: Sized { macro_rules! ffi_dyn { ($array:expr, $ty:ty) => {{ let array = $array.as_any().downcast_ref::<$ty>().unwrap(); + ( array.offset().unwrap(), array.buffers(), @@ -54,6 +55,7 @@ type BuffersChildren = ( pub fn offset_buffers_children_dictionary(array: &dyn Array) -> BuffersChildren { use PhysicalType::*; + match array.dtype().to_physical_type() { Null => ffi_dyn!(array, NullArray), Boolean => ffi_dyn!(array, BooleanArray), diff --git a/crates/polars-arrow/src/ffi/array.rs b/crates/polars-arrow/src/ffi/array.rs index 60a102f56e94..7f7e9c409782 100644 --- a/crates/polars-arrow/src/ffi/array.rs +++ b/crates/polars-arrow/src/ffi/array.rs @@ -12,7 +12,7 @@ use crate::datatypes::{ArrowDataType, PhysicalType}; use crate::ffi::schema::get_child; use crate::storage::SharedStorage; use crate::types::NativeType; -use crate::{match_integer_type, with_match_primitive_type_full}; +use crate::{ffi, match_integer_type, with_match_primitive_type_full}; /// Reads a valid `ffi` interface into a `Box` /// # Errors @@ -140,12 +140,19 @@ impl ArrowArray { let children_ptr = children .into_iter() - .map(|child| Box::into_raw(Box::new(ArrowArray::new(child)))) + .map(|child| { + Box::into_raw(Box::new(ArrowArray::new(ffi::align_to_c_data_interface( + child, + )))) + }) .collect::>(); let n_children = children_ptr.len() as i64; - let dictionary_ptr = - dictionary.map(|array| Box::into_raw(Box::new(ArrowArray::new(array)))); + let dictionary_ptr = dictionary.map(|array| { + Box::into_raw(Box::new(ArrowArray::new(ffi::align_to_c_data_interface( + array, + )))) + }); let length = array.len() as i64; let null_count = array.null_count() as i64; diff --git a/crates/polars-arrow/src/ffi/mod.rs b/crates/polars-arrow/src/ffi/mod.rs index b7cf2b957b0a..b41c68575073 100644 --- a/crates/polars-arrow/src/ffi/mod.rs +++ b/crates/polars-arrow/src/ffi/mod.rs @@ -8,6 +8,7 @@ mod schema; mod stream; pub(crate) use array::{try_from, ArrowArrayRef, InternalArrowArray}; +pub(crate) use bridge::align_to_c_data_interface; pub use generated::{ArrowArray, ArrowArrayStream, ArrowSchema}; use polars_error::PolarsResult; pub use stream::{export_iterator, ArrowArrayStreamReader}; diff --git a/py-polars/tests/unit/interop/test_interop.py b/py-polars/tests/unit/interop/test_interop.py index 7f664e2fb197..5768787c22c4 100644 --- a/py-polars/tests/unit/interop/test_interop.py +++ b/py-polars/tests/unit/interop/test_interop.py @@ -766,3 +766,11 @@ def test_df_pycapsule_interface() -> None: df2 = pl.from_arrow(out) assert isinstance(df2, pl.DataFrame) assert df.equals(df2) + + +def test_misaligned_nested_arrow_19097() -> None: + a = pl.Series("a", [1, 2, 3]) + a = a.slice(1, 2) # by slicing we offset=1 the values + a = a.replace(2, None) # then we add a validity mask with offset=0 + a = a.reshape((2, 1)) # then we make it nested + assert_series_equal(pl.Series("a", a.to_arrow()), a)