From 9927959f4309369fde2817d63e0699cbf77a0f67 Mon Sep 17 00:00:00 2001 From: alexandreyc Date: Fri, 19 Apr 2024 09:49:35 +0200 Subject: [PATCH 1/3] Move ffi stream and utils from arrow to arrow-array --- arrow-array/Cargo.toml | 3 + {arrow => arrow-array}/src/ffi.rs | 363 +++++++++++++++++------ {arrow => arrow-array}/src/ffi_stream.rs | 14 +- arrow-array/src/lib.rs | 4 + arrow/Cargo.toml | 2 +- arrow/src/array/ffi.rs | 254 ---------------- arrow/src/array/mod.rs | 5 +- arrow/src/lib.rs | 4 +- 8 files changed, 288 insertions(+), 361 deletions(-) rename {arrow => arrow-array}/src/ffi.rs (81%) rename {arrow => arrow-array}/src/ffi_stream.rs (98%) delete mode 100644 arrow/src/array/ffi.rs diff --git a/arrow-array/Cargo.toml b/arrow-array/Cargo.toml index 0bc2facb5373..b00d2c88e1a7 100644 --- a/arrow-array/Cargo.toml +++ b/arrow-array/Cargo.toml @@ -50,6 +50,9 @@ num = { version = "0.4.1", default-features = false, features = ["std"] } half = { version = "2.1", default-features = false, features = ["num-traits"] } hashbrown = { version = "0.14", default-features = false } +[features] +ffi = ["arrow-schema/ffi", "arrow-data/ffi"] + [dev-dependencies] rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } criterion = { version = "0.5", default-features = false } diff --git a/arrow/src/ffi.rs b/arrow-array/src/ffi.rs similarity index 81% rename from arrow/src/ffi.rs rename to arrow-array/src/ffi.rs index d33de9d655f8..4dbe9411fe1d 100644 --- a/arrow/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -29,11 +29,11 @@ //! //! ```rust //! # use std::sync::Arc; -//! # use arrow::array::{Int32Array, Array, ArrayData, make_array}; -//! # use arrow::error::Result; -//! # use arrow_arith::numeric::add; -//! # use arrow::ffi::{to_ffi, from_ffi}; -//! # fn main() -> Result<()> { +//! # use arrow_array::{Int32Array, Array, make_array}; +//! # use arrow_data::ArrayData; +//! # use arrow_array::ffi::{to_ffi, from_ffi}; +//! # use arrow_schema::ArrowError; +//! # fn main() -> Result<(), ArrowError> { //! // create an array natively //! //! let array = Int32Array::from(vec![Some(1), None, Some(3)]); @@ -46,11 +46,8 @@ //! let data = unsafe { from_ffi(out_array, &out_schema) }?; //! let array = Int32Array::from(data); //! -//! // perform some operation -//! let array = add(&array, &array)?; -//! //! // verify -//! assert_eq!(array.as_ref(), &Int32Array::from(vec![Some(2), None, Some(6)])); +//! assert_eq!(array, Int32Array::from(vec![Some(1), None, Some(3)])); //! # //! # Ok(()) //! # } @@ -60,9 +57,9 @@ //! //! ``` //! # use std::ptr::addr_of_mut; -//! # use arrow::ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema}; +//! # use arrow_array::ffi::{from_ffi, FFI_ArrowArray}; //! # use arrow_array::{ArrayRef, make_array}; -//! # use arrow_schema::ArrowError; +//! # use arrow_schema::{ArrowError, ffi::FFI_ArrowSchema}; //! # //! /// A foreign data container that can export to C Data interface //! struct ForeignArray {}; @@ -106,16 +103,39 @@ To export an array, create an `ArrowArray` using [ArrowArray::try_new]. use std::{mem::size_of, ptr::NonNull, sync::Arc}; +use arrow_buffer::{bit_util, Buffer, MutableBuffer}; pub use arrow_data::ffi::FFI_ArrowArray; -pub use arrow_schema::ffi::{FFI_ArrowSchema, Flags}; +use arrow_data::{layout, ArrayData}; +pub use arrow_schema::ffi::FFI_ArrowSchema; +use arrow_schema::{ArrowError, DataType, UnionMode}; + +use crate::array::ArrayRef; + +type Result = std::result::Result; // TODO(alexandreyc): we should probably move Result to arrow_schema -use arrow_schema::UnionMode; +/// Exports an array to raw pointers of the C Data Interface provided by the consumer. +/// # Safety +/// Assumes that these pointers represent valid C Data Interfaces, both in memory +/// representation and lifetime via the `release` mechanism. +/// +/// This function copies the content of two FFI structs [ffi::FFI_ArrowArray] and +/// [ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers. +/// Usually the raw pointers are provided by the array data consumer. +#[deprecated(note = "Use FFI_ArrowArray::new and FFI_ArrowSchema::try_from")] +pub unsafe fn export_array_into_raw( + src: ArrayRef, + out_array: *mut FFI_ArrowArray, + out_schema: *mut FFI_ArrowSchema, +) -> Result<()> { + let data = src.to_data(); + let array = FFI_ArrowArray::new(&data); + let schema = FFI_ArrowSchema::try_from(data.data_type())?; -use crate::array::{layout, ArrayData}; -use crate::buffer::{Buffer, MutableBuffer}; -use crate::datatypes::DataType; -use crate::error::{ArrowError, Result}; -use crate::util::bit_util; + std::ptr::write_unaligned(out_array, array); + std::ptr::write_unaligned(out_schema, schema); + + Ok(()) +} // returns the number of bits that buffer `i` (in the C data interface) is expected to have. // This is set by the Arrow specification @@ -464,19 +484,17 @@ impl<'a> ImportedArrowArray<'a> { } #[cfg(test)] -mod tests { +mod tests_to_then_from_ffi { use std::collections::HashMap; use std::mem::ManuallyDrop; - use std::ptr::addr_of_mut; - use arrow_array::builder::UnionBuilder; - use arrow_array::cast::AsArray; - use arrow_array::types::{Float64Type, Int32Type}; - use arrow_array::*; use arrow_buffer::NullBuffer; + use arrow_schema::Field; - use crate::compute::kernels; - use crate::datatypes::{Field, Int8Type}; + use crate::builder::UnionBuilder; + use crate::cast::AsArray; + use crate::types::{Float64Type, Int32Type, Int8Type}; + use crate::*; use super::*; @@ -490,10 +508,9 @@ mod tests { // (simulate consumer) import it let array = Int32Array::from(unsafe { from_ffi(array, &schema) }.unwrap()); - let array = kernels::numeric::add(&array, &array).unwrap(); // verify - assert_eq!(array.as_ref(), &Int32Array::from(vec![2, 4, 6])); + assert_eq!(array, Int32Array::from(vec![1, 2, 3])); } #[test] @@ -535,15 +552,9 @@ mod tests { // (simulate consumer) import it let data = unsafe { from_ffi(array, &schema) }?; let array = make_array(data); - - // perform some operation let array = array.as_any().downcast_ref::().unwrap(); - assert_eq!(array, &Int32Array::from(vec![Some(2), None])); - - let array = kernels::numeric::add(array, array).unwrap(); - // verify - assert_eq!(array.as_ref(), &Int32Array::from(vec![Some(4), None])); + assert_eq!(array, &Int32Array::from(vec![Some(2), None])); // (drop/release) Ok(()) @@ -589,21 +600,13 @@ mod tests { let array = make_array(data); // perform some operation - let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).unwrap(); let array = array .as_any() .downcast_ref::>() .unwrap(); // verify - let expected = GenericStringArray::::from(vec![ - Some("a"), - None, - Some("aaa"), - Some("a"), - None, - Some("aaa"), - ]); + let expected = GenericStringArray::::from(vec![Some("a"), None, Some("aaa")]); assert_eq!(array, &expected); // (drop/release) @@ -694,23 +697,13 @@ mod tests { // (simulate consumer) import it let data = unsafe { from_ffi(array, &schema) }?; let array = make_array(data); - - // perform some operation - let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).unwrap(); let array = array .as_any() .downcast_ref::>() .unwrap(); // verify - let expected: Vec> = vec![ - Some(b"a"), - None, - Some(b"aaa"), - Some(b"a"), - None, - Some(b"aaa"), - ]; + let expected: Vec> = vec![Some(b"a"), None, Some(b"aaa")]; let expected = GenericBinaryArray::::from(expected); assert_eq!(array, &expected); @@ -739,15 +732,12 @@ mod tests { // (simulate consumer) import it let data = unsafe { from_ffi(array, &schema) }?; let array = make_array(data); - - // perform some operation let array = array.as_any().downcast_ref::().unwrap(); - let array = kernels::boolean::not(array)?; // verify assert_eq!( array, - BooleanArray::from(vec![None, Some(false), Some(true)]) + &BooleanArray::from(vec![None, Some(true), Some(false)]) ); // (drop/release) @@ -765,9 +755,6 @@ mod tests { // (simulate consumer) import it let data = unsafe { from_ffi(array, &schema) }?; let array = make_array(data); - - // perform some operation - let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).unwrap(); let array = array .as_any() .downcast_ref::() @@ -776,7 +763,7 @@ mod tests { // verify assert_eq!( array, - &Time32MillisecondArray::from(vec![None, Some(1), Some(2), None, Some(1), Some(2)]) + &Time32MillisecondArray::from(vec![None, Some(1), Some(2)]) ); // (drop/release) @@ -794,9 +781,6 @@ mod tests { // (simulate consumer) import it let data = unsafe { from_ffi(array, &schema) }?; let array = make_array(data); - - // perform some operation - let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).unwrap(); let array = array .as_any() .downcast_ref::() @@ -805,7 +789,7 @@ mod tests { // verify assert_eq!( array, - &TimestampMillisecondArray::from(vec![None, Some(1), Some(2), None, Some(1), Some(2)]) + &TimestampMillisecondArray::from(vec![None, Some(1), Some(2)]) ); // (drop/release) @@ -830,9 +814,6 @@ mod tests { // (simulate consumer) import it let data = unsafe { from_ffi(array, &schema) }?; let array = make_array(data); - - // perform some operation - let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).unwrap(); let array = array .as_any() .downcast_ref::() @@ -849,12 +830,6 @@ mod tests { Some(vec![20, 20, 20]), Some(vec![30, 30, 30]), None, - None, - Some(vec![10, 10, 10]), - None, - Some(vec![20, 20, 20]), - Some(vec![30, 30, 30]), - None, ] .into_iter(), 3 @@ -891,9 +866,6 @@ mod tests { // (simulate consumer) import it let data = unsafe { from_ffi(array, &schema) }?; let array = make_array(data); - - // perform some operation - let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).unwrap(); let array = array.as_any().downcast_ref::().unwrap(); // 0010 0100 @@ -903,15 +875,14 @@ mod tests { let mut w = vec![]; w.extend_from_slice(&v); - w.extend_from_slice(&v); let expected_value_data = ArrayData::builder(DataType::Int32) - .len(18) + .len(9) .add_buffer(Buffer::from_slice_ref(&w)) .build()?; let expected_list_data = ArrayData::builder(list_data_type) - .len(6) + .len(3) .null_bit_buffer(Some(Buffer::from(expected_validity_bits))) .add_child_data(expected_value_data) .build()?; @@ -936,16 +907,13 @@ mod tests { // (simulate consumer) import it let data = unsafe { from_ffi(array, &schema) }?; let array = make_array(data); - - // perform some operation - let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).unwrap(); let actual = array .as_any() .downcast_ref::>() .unwrap(); // verify - let new_values = vec!["a", "aaa", "aaa", "a", "aaa", "aaa"]; + let new_values = vec!["a", "aaa", "aaa"]; let expected: DictionaryArray = new_values.into_iter().collect(); assert_eq!(actual, &expected); @@ -956,7 +924,6 @@ mod tests { #[test] #[allow(deprecated)] fn test_export_array_into_raw() -> Result<()> { - use crate::array::export_array_into_raw; let array = make_array(Int32Array::from(vec![1, 2, 3]).into_data()); // Assume two raw pointers provided by the consumer @@ -964,8 +931,8 @@ mod tests { let mut out_schema = FFI_ArrowSchema::empty(); { - let out_array_ptr = addr_of_mut!(out_array); - let out_schema_ptr = addr_of_mut!(out_schema); + let out_array_ptr = std::ptr::addr_of_mut!(out_array); + let out_schema_ptr = std::ptr::addr_of_mut!(out_schema); unsafe { export_array_into_raw(array, out_array_ptr, out_schema_ptr)?; } @@ -977,10 +944,9 @@ mod tests { // perform some operation let array = array.as_any().downcast_ref::().unwrap(); - let array = kernels::numeric::add(array, array).unwrap(); // verify - assert_eq!(array.as_ref(), &Int32Array::from(vec![2, 4, 6])); + assert_eq!(array, &Int32Array::from(vec![1, 2, 3])); Ok(()) } @@ -995,9 +961,6 @@ mod tests { // (simulate consumer) import it let data = unsafe { from_ffi(array, &schema) }?; let array = make_array(data); - - // perform some operation - let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).unwrap(); let array = array .as_any() .downcast_ref::() @@ -1006,7 +969,7 @@ mod tests { // verify assert_eq!( array, - &DurationSecondArray::from(vec![None, Some(1), Some(2), None, Some(1), Some(2)]) + &DurationSecondArray::from(vec![None, Some(1), Some(2)]) ); // (drop/release) @@ -1248,3 +1211,213 @@ mod tests { Ok(()) } } + +#[cfg(test)] +mod tests_from_ffi { + use std::sync::Arc; + + use arrow_buffer::{bit_util, buffer::Buffer}; + use arrow_data::ArrayData; + use arrow_schema::{DataType, Field}; + + use crate::{ + array::{ + Array, BooleanArray, DictionaryArray, FixedSizeBinaryArray, FixedSizeListArray, + Int32Array, Int64Array, StringArray, StructArray, UInt32Array, UInt64Array, + }, + ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema}, + }; + + use super::Result; + + fn test_round_trip(expected: &ArrayData) -> Result<()> { + // here we export the array + let array = FFI_ArrowArray::new(expected); + let schema = FFI_ArrowSchema::try_from(expected.data_type())?; + + // simulate an external consumer by being the consumer + let result = &unsafe { from_ffi(array, &schema) }?; + + assert_eq!(result, expected); + Ok(()) + } + + #[test] + fn test_u32() -> Result<()> { + let array = UInt32Array::from(vec![Some(2), None, Some(1), None]); + let data = array.into_data(); + test_round_trip(&data) + } + + #[test] + fn test_u64() -> Result<()> { + let array = UInt64Array::from(vec![Some(2), None, Some(1), None]); + let data = array.into_data(); + test_round_trip(&data) + } + + #[test] + fn test_i64() -> Result<()> { + let array = Int64Array::from(vec![Some(2), None, Some(1), None]); + let data = array.into_data(); + test_round_trip(&data) + } + + #[test] + fn test_struct() -> Result<()> { + let inner = StructArray::from(vec![ + ( + Arc::new(Field::new("a1", DataType::Boolean, false)), + Arc::new(BooleanArray::from(vec![true, true, false, false])) as Arc, + ), + ( + Arc::new(Field::new("a2", DataType::UInt32, false)), + Arc::new(UInt32Array::from(vec![1, 2, 3, 4])), + ), + ]); + + let array = StructArray::from(vec![ + ( + Arc::new(Field::new("a", inner.data_type().clone(), false)), + Arc::new(inner) as Arc, + ), + ( + Arc::new(Field::new("b", DataType::Boolean, false)), + Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc, + ), + ( + Arc::new(Field::new("c", DataType::UInt32, false)), + Arc::new(UInt32Array::from(vec![42, 28, 19, 31])), + ), + ]); + let data = array.into_data(); + test_round_trip(&data) + } + + #[test] + fn test_dictionary() -> Result<()> { + let values = StringArray::from(vec![Some("foo"), Some("bar"), None]); + let keys = Int32Array::from(vec![ + Some(0), + Some(1), + None, + Some(1), + Some(1), + None, + Some(1), + Some(2), + Some(1), + None, + ]); + let array = DictionaryArray::new(keys, Arc::new(values)); + + let data = array.into_data(); + test_round_trip(&data) + } + + #[test] + fn test_fixed_size_binary() -> Result<()> { + let values = vec![vec![10, 10, 10], vec![20, 20, 20], vec![30, 30, 30]]; + let array = FixedSizeBinaryArray::try_from_iter(values.into_iter())?; + + let data = array.into_data(); + test_round_trip(&data) + } + + #[test] + fn test_fixed_size_binary_with_nulls() -> Result<()> { + let values = vec![ + None, + Some(vec![10, 10, 10]), + None, + Some(vec![20, 20, 20]), + Some(vec![30, 30, 30]), + None, + ]; + let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?; + + let data = array.into_data(); + test_round_trip(&data) + } + + #[test] + fn test_fixed_size_list() -> Result<()> { + let v: Vec = (0..9).collect(); + let value_data = ArrayData::builder(DataType::Int64) + .len(9) + .add_buffer(Buffer::from_slice_ref(v)) + .build()?; + let list_data_type = + DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int64, false)), 3); + let list_data = ArrayData::builder(list_data_type) + .len(3) + .add_child_data(value_data) + .build()?; + let array = FixedSizeListArray::from(list_data); + + let data = array.into_data(); + test_round_trip(&data) + } + + #[test] + fn test_fixed_size_list_with_nulls() -> Result<()> { + // 0100 0110 + let mut validity_bits: [u8; 1] = [0; 1]; + bit_util::set_bit(&mut validity_bits, 1); + bit_util::set_bit(&mut validity_bits, 2); + bit_util::set_bit(&mut validity_bits, 6); + + let v: Vec = (0..16).collect(); + let value_data = ArrayData::builder(DataType::Int16) + .len(16) + .add_buffer(Buffer::from_slice_ref(v)) + .build()?; + let list_data_type = + DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int16, false)), 2); + let list_data = ArrayData::builder(list_data_type) + .len(8) + .null_bit_buffer(Some(Buffer::from(validity_bits))) + .add_child_data(value_data) + .build()?; + let array = FixedSizeListArray::from(list_data); + + let data = array.into_data(); + test_round_trip(&data) + } + + #[test] + fn test_fixed_size_list_nested() -> Result<()> { + let v: Vec = (0..16).collect(); + let value_data = ArrayData::builder(DataType::Int32) + .len(16) + .add_buffer(Buffer::from_slice_ref(v)) + .build()?; + + let offsets: Vec = vec![0, 2, 4, 6, 8, 10, 12, 14, 16]; + let value_offsets = Buffer::from_slice_ref(offsets); + let inner_list_data_type = + DataType::List(Arc::new(Field::new("item", DataType::Int32, false))); + let inner_list_data = ArrayData::builder(inner_list_data_type.clone()) + .len(8) + .add_buffer(value_offsets) + .add_child_data(value_data) + .build()?; + + // 0000 0100 + let mut validity_bits: [u8; 1] = [0; 1]; + bit_util::set_bit(&mut validity_bits, 2); + + let list_data_type = + DataType::FixedSizeList(Arc::new(Field::new("f", inner_list_data_type, false)), 2); + let list_data = ArrayData::builder(list_data_type) + .len(4) + .null_bit_buffer(Some(Buffer::from(validity_bits))) + .add_child_data(inner_list_data) + .build()?; + + let array = FixedSizeListArray::from(list_data); + + let data = array.into_data(); + test_round_trip(&data) + } +} diff --git a/arrow/src/ffi_stream.rs b/arrow-array/src/ffi_stream.rs similarity index 98% rename from arrow/src/ffi_stream.rs rename to arrow-array/src/ffi_stream.rs index 15b88ef32163..22b6597932e5 100644 --- a/arrow/src/ffi_stream.rs +++ b/arrow-array/src/ffi_stream.rs @@ -63,14 +63,16 @@ use std::{ sync::Arc, }; +use arrow_data::ffi::FFI_ArrowArray; +use arrow_schema::{ffi::FFI_ArrowSchema, ArrowError, Schema, SchemaRef}; + use crate::array::Array; use crate::array::StructArray; -use crate::datatypes::{Schema, SchemaRef}; -use crate::error::ArrowError; -use crate::error::Result; -use crate::ffi::*; +use crate::ffi::from_ffi_and_data_type; use crate::record_batch::{RecordBatch, RecordBatchReader}; +type Result = std::result::Result; // TODO(alexandreyc): we should probably move Result to arrow_schema + const ENOMEM: i32 = 12; const EIO: i32 = 5; const EINVAL: i32 = 22; @@ -393,8 +395,10 @@ pub unsafe fn export_reader_into_raw( mod tests { use super::*; + use arrow_schema::Field; + use crate::array::Int32Array; - use crate::datatypes::Field; + use crate::ffi::from_ffi; struct TestRecordBatchReader { schema: SchemaRef, diff --git a/arrow-array/src/lib.rs b/arrow-array/src/lib.rs index ef98c5efefb0..90bc5e31205a 100644 --- a/arrow-array/src/lib.rs +++ b/arrow-array/src/lib.rs @@ -197,6 +197,10 @@ pub use scalar::*; pub mod builder; pub mod cast; mod delta; +#[cfg(feature = "ffi")] +pub mod ffi; +#[cfg(feature = "ffi")] +pub mod ffi_stream; pub mod iterator; pub mod run_iterator; pub mod temporal_conversions; diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index a938d75b1a6f..9d3c431b3048 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -77,7 +77,7 @@ pyarrow = ["pyo3", "ffi"] # but is run as part of our CI checks force_validate = ["arrow-data/force_validate"] # Enable ffi support -ffi = ["arrow-schema/ffi", "arrow-data/ffi"] +ffi = ["arrow-schema/ffi", "arrow-data/ffi", "arrow-array/ffi"] chrono-tz = ["arrow-array/chrono-tz"] [dev-dependencies] diff --git a/arrow/src/array/ffi.rs b/arrow/src/array/ffi.rs deleted file mode 100644 index 43f54a038421..000000000000 --- a/arrow/src/array/ffi.rs +++ /dev/null @@ -1,254 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Contains functionality to load an ArrayData from the C Data Interface - -use crate::{error::Result, ffi}; - -use super::ArrayRef; - -/// Exports an array to raw pointers of the C Data Interface provided by the consumer. -/// # Safety -/// Assumes that these pointers represent valid C Data Interfaces, both in memory -/// representation and lifetime via the `release` mechanism. -/// -/// This function copies the content of two FFI structs [ffi::FFI_ArrowArray] and -/// [ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers. -/// Usually the raw pointers are provided by the array data consumer. -#[deprecated(note = "Use FFI_ArrowArray::new and FFI_ArrowSchema::try_from")] -pub unsafe fn export_array_into_raw( - src: ArrayRef, - out_array: *mut ffi::FFI_ArrowArray, - out_schema: *mut ffi::FFI_ArrowSchema, -) -> Result<()> { - let data = src.to_data(); - let array = ffi::FFI_ArrowArray::new(&data); - let schema = ffi::FFI_ArrowSchema::try_from(data.data_type())?; - - std::ptr::write_unaligned(out_array, array); - std::ptr::write_unaligned(out_schema, schema); - - Ok(()) -} - -#[cfg(test)] -mod tests { - use crate::array::{DictionaryArray, FixedSizeListArray, Int32Array, StringArray}; - use crate::buffer::Buffer; - use crate::error::Result; - use crate::util::bit_util; - use crate::{ - array::{ - Array, ArrayData, BooleanArray, FixedSizeBinaryArray, Int64Array, StructArray, - UInt32Array, UInt64Array, - }, - datatypes::{DataType, Field}, - ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema}, - }; - use std::sync::Arc; - - fn test_round_trip(expected: &ArrayData) -> Result<()> { - // here we export the array - let array = FFI_ArrowArray::new(expected); - let schema = FFI_ArrowSchema::try_from(expected.data_type())?; - - // simulate an external consumer by being the consumer - let result = &unsafe { from_ffi(array, &schema) }?; - - assert_eq!(result, expected); - Ok(()) - } - - #[test] - fn test_u32() -> Result<()> { - let array = UInt32Array::from(vec![Some(2), None, Some(1), None]); - let data = array.into_data(); - test_round_trip(&data) - } - - #[test] - fn test_u64() -> Result<()> { - let array = UInt64Array::from(vec![Some(2), None, Some(1), None]); - let data = array.into_data(); - test_round_trip(&data) - } - - #[test] - fn test_i64() -> Result<()> { - let array = Int64Array::from(vec![Some(2), None, Some(1), None]); - let data = array.into_data(); - test_round_trip(&data) - } - - #[test] - fn test_struct() -> Result<()> { - let inner = StructArray::from(vec![ - ( - Arc::new(Field::new("a1", DataType::Boolean, false)), - Arc::new(BooleanArray::from(vec![true, true, false, false])) as Arc, - ), - ( - Arc::new(Field::new("a2", DataType::UInt32, false)), - Arc::new(UInt32Array::from(vec![1, 2, 3, 4])), - ), - ]); - - let array = StructArray::from(vec![ - ( - Arc::new(Field::new("a", inner.data_type().clone(), false)), - Arc::new(inner) as Arc, - ), - ( - Arc::new(Field::new("b", DataType::Boolean, false)), - Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc, - ), - ( - Arc::new(Field::new("c", DataType::UInt32, false)), - Arc::new(UInt32Array::from(vec![42, 28, 19, 31])), - ), - ]); - let data = array.into_data(); - test_round_trip(&data) - } - - #[test] - fn test_dictionary() -> Result<()> { - let values = StringArray::from(vec![Some("foo"), Some("bar"), None]); - let keys = Int32Array::from(vec![ - Some(0), - Some(1), - None, - Some(1), - Some(1), - None, - Some(1), - Some(2), - Some(1), - None, - ]); - let array = DictionaryArray::new(keys, Arc::new(values)); - - let data = array.into_data(); - test_round_trip(&data) - } - - #[test] - fn test_fixed_size_binary() -> Result<()> { - let values = vec![vec![10, 10, 10], vec![20, 20, 20], vec![30, 30, 30]]; - let array = FixedSizeBinaryArray::try_from_iter(values.into_iter())?; - - let data = array.into_data(); - test_round_trip(&data) - } - - #[test] - fn test_fixed_size_binary_with_nulls() -> Result<()> { - let values = vec![ - None, - Some(vec![10, 10, 10]), - None, - Some(vec![20, 20, 20]), - Some(vec![30, 30, 30]), - None, - ]; - let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?; - - let data = array.into_data(); - test_round_trip(&data) - } - - #[test] - fn test_fixed_size_list() -> Result<()> { - let v: Vec = (0..9).collect(); - let value_data = ArrayData::builder(DataType::Int64) - .len(9) - .add_buffer(Buffer::from_slice_ref(v)) - .build()?; - let list_data_type = - DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int64, false)), 3); - let list_data = ArrayData::builder(list_data_type) - .len(3) - .add_child_data(value_data) - .build()?; - let array = FixedSizeListArray::from(list_data); - - let data = array.into_data(); - test_round_trip(&data) - } - - #[test] - fn test_fixed_size_list_with_nulls() -> Result<()> { - // 0100 0110 - let mut validity_bits: [u8; 1] = [0; 1]; - bit_util::set_bit(&mut validity_bits, 1); - bit_util::set_bit(&mut validity_bits, 2); - bit_util::set_bit(&mut validity_bits, 6); - - let v: Vec = (0..16).collect(); - let value_data = ArrayData::builder(DataType::Int16) - .len(16) - .add_buffer(Buffer::from_slice_ref(v)) - .build()?; - let list_data_type = - DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int16, false)), 2); - let list_data = ArrayData::builder(list_data_type) - .len(8) - .null_bit_buffer(Some(Buffer::from(validity_bits))) - .add_child_data(value_data) - .build()?; - let array = FixedSizeListArray::from(list_data); - - let data = array.into_data(); - test_round_trip(&data) - } - - #[test] - fn test_fixed_size_list_nested() -> Result<()> { - let v: Vec = (0..16).collect(); - let value_data = ArrayData::builder(DataType::Int32) - .len(16) - .add_buffer(Buffer::from_slice_ref(v)) - .build()?; - - let offsets: Vec = vec![0, 2, 4, 6, 8, 10, 12, 14, 16]; - let value_offsets = Buffer::from_slice_ref(offsets); - let inner_list_data_type = - DataType::List(Arc::new(Field::new("item", DataType::Int32, false))); - let inner_list_data = ArrayData::builder(inner_list_data_type.clone()) - .len(8) - .add_buffer(value_offsets) - .add_child_data(value_data) - .build()?; - - // 0000 0100 - let mut validity_bits: [u8; 1] = [0; 1]; - bit_util::set_bit(&mut validity_bits, 2); - - let list_data_type = - DataType::FixedSizeList(Arc::new(Field::new("f", inner_list_data_type, false)), 2); - let list_data = ArrayData::builder(list_data_type) - .len(4) - .null_bit_buffer(Some(Buffer::from(validity_bits))) - .add_child_data(inner_list_data) - .build()?; - - let array = FixedSizeListArray::from(list_data); - - let data = array.into_data(); - test_round_trip(&data) - } -} diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs index fa01f4c4c15b..b563c320bb6d 100644 --- a/arrow/src/array/mod.rs +++ b/arrow/src/array/mod.rs @@ -19,9 +19,6 @@ //! //! **See [arrow_array] for examples and usage instructions** -#[cfg(feature = "ffi")] -mod ffi; - // --------------------- Array & ArrayData --------------------- pub use arrow_array::builder::*; pub use arrow_array::cast::*; @@ -35,7 +32,7 @@ pub use arrow_data::transform::{Capacities, MutableArrayData}; #[cfg(feature = "ffi")] #[allow(deprecated)] -pub use self::ffi::export_array_into_raw; +pub use arrow_array::ffi::export_array_into_raw; // --------------------- Array's values comparison --------------------- diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs index 78e2363e4825..09d6fc48aef1 100644 --- a/arrow/src/lib.rs +++ b/arrow/src/lib.rs @@ -363,9 +363,9 @@ pub use arrow_csv as csv; pub mod datatypes; pub mod error; #[cfg(feature = "ffi")] -pub mod ffi; +pub use arrow_array::ffi; #[cfg(feature = "ffi")] -pub mod ffi_stream; +pub use arrow_array::ffi_stream; #[cfg(feature = "ipc")] pub use arrow_ipc as ipc; #[cfg(feature = "json")] From 4f88db41f2903f1577cb7a3e9ca169c8d8ac0ee7 Mon Sep 17 00:00:00 2001 From: alexandreyc Date: Fri, 19 Apr 2024 10:01:35 +0200 Subject: [PATCH 2/3] Fix CI --- arrow-array/src/ffi.rs | 4 ++-- arrow-array/src/ffi_stream.rs | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index 4dbe9411fe1d..f4b41310a419 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -118,8 +118,8 @@ type Result = std::result::Result; // TODO(alexandreyc): we sh /// Assumes that these pointers represent valid C Data Interfaces, both in memory /// representation and lifetime via the `release` mechanism. /// -/// This function copies the content of two FFI structs [ffi::FFI_ArrowArray] and -/// [ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers. +/// This function copies the content of two FFI structs [arrow_data::ffi::FFI_ArrowArray] and +/// [arrow_schema::ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers. /// Usually the raw pointers are provided by the array data consumer. #[deprecated(note = "Use FFI_ArrowArray::new and FFI_ArrowSchema::try_from")] pub unsafe fn export_array_into_raw( diff --git a/arrow-array/src/ffi_stream.rs b/arrow-array/src/ffi_stream.rs index 22b6597932e5..bd37ad5ffeb7 100644 --- a/arrow-array/src/ffi_stream.rs +++ b/arrow-array/src/ffi_stream.rs @@ -83,6 +83,7 @@ const ENOSYS: i32 = 78; /// This was created by bindgen #[repr(C)] #[derive(Debug)] +#[allow(missing_docs)] pub struct FFI_ArrowArrayStream { pub get_schema: Option< unsafe extern "C" fn(arg1: *mut FFI_ArrowArrayStream, out: *mut FFI_ArrowSchema) -> c_int, From 402831662686d40cfbe19fd770e1f8e67c8ddb7c Mon Sep 17 00:00:00 2001 From: alexandreyc Date: Mon, 22 Apr 2024 12:43:35 +0200 Subject: [PATCH 3/3] Remove TODOs --- arrow-array/src/ffi.rs | 2 +- arrow-array/src/ffi_stream.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index f4b41310a419..7b988bb07478 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -111,7 +111,7 @@ use arrow_schema::{ArrowError, DataType, UnionMode}; use crate::array::ArrayRef; -type Result = std::result::Result; // TODO(alexandreyc): we should probably move Result to arrow_schema +type Result = std::result::Result; /// Exports an array to raw pointers of the C Data Interface provided by the consumer. /// # Safety diff --git a/arrow-array/src/ffi_stream.rs b/arrow-array/src/ffi_stream.rs index bd37ad5ffeb7..6f3405ead7b0 100644 --- a/arrow-array/src/ffi_stream.rs +++ b/arrow-array/src/ffi_stream.rs @@ -71,7 +71,7 @@ use crate::array::StructArray; use crate::ffi::from_ffi_and_data_type; use crate::record_batch::{RecordBatch, RecordBatchReader}; -type Result = std::result::Result; // TODO(alexandreyc): we should probably move Result to arrow_schema +type Result = std::result::Result; const ENOMEM: i32 = 12; const EIO: i32 = 5;