From b1c43a4d9b8df8a535e12de824f80ddfa893027a Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 12 Nov 2023 19:53:01 -0500 Subject: [PATCH 01/27] arrow ffi array copy --- arrow-data/src/ffi.rs | 17 +++++++++++++++++ arrow/src/ffi.rs | 5 +++++ 2 files changed, 22 insertions(+) diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs index 7623ced043cc..faf4c127f9de 100644 --- a/arrow-data/src/ffi.rs +++ b/arrow-data/src/ffi.rs @@ -282,6 +282,23 @@ impl FFI_ArrowArray { // If dictionary is not null should be valid for reads of `Self` unsafe { self.dictionary.as_ref() } } + + pub fn copy(&mut self) -> Self { + let new = Self { + length: self.length, + null_count: self.null_count, + offset: self.offset, + n_buffers: self.n_buffers, + n_children: self.n_children, + buffers: self.buffers, + children: self.children, + dictionary: self.dictionary, + release: self.release, + private_data: self.private_data, + }; + self.release = None; + new + } } #[cfg(test)] diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index c13d4c6e5dff..2a9156ddcbd4 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -243,6 +243,11 @@ pub fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result FFI_ArrowArray { + let x = unsafe { array.as_mut().unwrap() }; + x.copy() +} + #[derive(Debug)] struct ArrowArray<'a> { array: &'a FFI_ArrowArray, From c61d270f4f62fd6d1b065ae2b5b9cbb8827d7f2a Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 12 Nov 2023 20:01:44 -0500 Subject: [PATCH 02/27] remove copy_ffi_array --- arrow/src/ffi.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index 2a9156ddcbd4..c13d4c6e5dff 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -243,11 +243,6 @@ pub fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result FFI_ArrowArray { - let x = unsafe { array.as_mut().unwrap() }; - x.copy() -} - #[derive(Debug)] struct ArrowArray<'a> { array: &'a FFI_ArrowArray, From 6521cbaf113b818c14c443136486413d8ecd3d38 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 12 Nov 2023 20:03:44 -0500 Subject: [PATCH 03/27] docstring --- arrow-data/src/ffi.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs index faf4c127f9de..13cc46292edd 100644 --- a/arrow-data/src/ffi.rs +++ b/arrow-data/src/ffi.rs @@ -283,6 +283,10 @@ impl FFI_ArrowArray { unsafe { self.dictionary.as_ref() } } + /// Create a copy of an existing `FFI_ArrowArray` + /// + /// As required by the C Data Interface specification, this sets the `release` member of `Self` + /// to `None`, but without calling the release callback. pub fn copy(&mut self) -> Self { let new = Self { length: self.length, From 92b070abf2b260ac6d2681bbc806667ed55dbbc2 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 12 Nov 2023 22:24:55 -0500 Subject: [PATCH 04/27] wip: pycapsule support --- arrow-data/src/ffi.rs | 8 +++-- arrow-schema/src/ffi.rs | 11 ++++-- arrow/src/pyarrow.rs | 80 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 94 insertions(+), 5 deletions(-) diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs index 13cc46292edd..58168d806144 100644 --- a/arrow-data/src/ffi.rs +++ b/arrow-data/src/ffi.rs @@ -66,8 +66,12 @@ impl Drop for FFI_ArrowArray { unsafe impl Send for FFI_ArrowArray {} unsafe impl Sync for FFI_ArrowArray {} -// callback used to drop [FFI_ArrowArray] when it is exported -unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) { +/// callback used to drop [FFI_ArrowArray] when it is exported +/// +/// # Safety +/// +/// Must be passed a valid [FFI_ArrowArray]. +pub unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) { if array.is_null() { return; } diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs index 7e33a78fec27..27b2e7789ac2 100644 --- a/arrow-schema/src/ffi.rs +++ b/arrow-schema/src/ffi.rs @@ -86,8 +86,12 @@ struct SchemaPrivateData { metadata: Option>, } -// callback used to drop [FFI_ArrowSchema] when it is exported. -unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) { +/// callback used to drop [FFI_ArrowSchema] when it is exported. +/// +/// # Safety +/// +/// Must be passed a valid [FFI_ArrowSchema]. +pub unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) { if schema.is_null() { return; } @@ -351,6 +355,9 @@ impl Drop for FFI_ArrowSchema { } } +unsafe impl Send for FFI_ArrowSchema {} +unsafe impl Sync for FFI_ArrowSchema {} + impl TryFrom<&FFI_ArrowSchema> for DataType { type Error = ArrowError; diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 517c333addde..71e9aa527628 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -64,7 +64,7 @@ use pyo3::exceptions::{PyTypeError, PyValueError}; use pyo3::ffi::Py_uintptr_t; use pyo3::import_exception; use pyo3::prelude::*; -use pyo3::types::{PyList, PyTuple}; +use pyo3::types::{PyCapsule, PyList, PyTuple}; use crate::array::{make_array, ArrayData}; use crate::datatypes::{DataType, Field, Schema}; @@ -118,8 +118,39 @@ fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> { Ok(()) } +fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> { + let capsule_name = capsule.name()?; + if capsule_name.is_none() { + return Err(PyValueError::new_err( + "Expected schema PyCapsule to have name set.", + )); + } + + let capsule_name = capsule_name.unwrap().to_str()?; + if capsule_name != name { + return Err(PyValueError::new_err(format!( + "Expected name '{}' in PyCapsule.", + name, + ))); + } + + Ok(()) +} + impl FromPyArrow for DataType { fn from_pyarrow(value: &PyAny) -> PyResult { + // Newer versions of PyArrow as well as other libraries with Arrow data implement this + // method, so prefer it over _export_to_c. + if value.hasattr("__arrow_c_schema__")? { + let capsule: &PyCapsule = + PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?; + validate_pycapsule(capsule, "arrow_schema")?; + + let schema_ptr = unsafe { capsule.reference::() }; + let dtype = DataType::try_from(schema_ptr).map_err(to_py_err)?; + Ok(dtype) + } + validate_class("DataType", value)?; let c_schema = FFI_ArrowSchema::empty(); @@ -143,6 +174,18 @@ impl ToPyArrow for DataType { impl FromPyArrow for Field { fn from_pyarrow(value: &PyAny) -> PyResult { + // Newer versions of PyArrow as well as other libraries with Arrow data implement this + // method, so prefer it over _export_to_c. + if value.hasattr("__arrow_c_schema__")? { + let capsule: &PyCapsule = + PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?; + validate_pycapsule(capsule, "arrow_schema")?; + + let schema_ptr = unsafe { capsule.reference::() }; + let field = Field::try_from(schema_ptr).map_err(to_py_err)?; + Ok(field) + } + validate_class("Field", value)?; let c_schema = FFI_ArrowSchema::empty(); @@ -166,6 +209,18 @@ impl ToPyArrow for Field { impl FromPyArrow for Schema { fn from_pyarrow(value: &PyAny) -> PyResult { + // Newer versions of PyArrow as well as other libraries with Arrow data implement this + // method, so prefer it over _export_to_c. + if value.hasattr("__arrow_c_schema__")? { + let capsule: &PyCapsule = + PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?; + validate_pycapsule(capsule, "arrow_schema")?; + + let schema_ptr = unsafe { capsule.reference::() }; + let schema = Schema::try_from(&c_schema).map_err(to_py_err)?; + Ok(schema) + } + validate_class("Schema", value)?; let c_schema = FFI_ArrowSchema::empty(); @@ -189,6 +244,29 @@ impl ToPyArrow for Schema { impl FromPyArrow for ArrayData { fn from_pyarrow(value: &PyAny) -> PyResult { + // Newer versions of PyArrow as well as other libraries with Arrow data implement this + // method, so prefer it over _export_to_c. + if value.hasattr("__arrow_c_array__")? { + let tuple = value.getattr("__arrow_c_array__")?.call0()?; + + if !tuple.is_instance_of::() { + return Err(PyTypeError::new_err( + "Expected __arrow_c_array__ to return a tuple.", + )); + } + + let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?; + let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?; + + validate_pycapsule(schema_capsule, "arrow_schema")?; + validate_pycapsule(array_capsule, "arrow_array")?; + + let schema_ptr = unsafe { schema_capsule.reference::() }; + let array_ptr = unsafe { array_capsule.reference::() }; + + ffi::from_ffi(array_ptr.copy(), schema_ptr).map_err(to_py_err) + } + validate_class("Array", value)?; // prepare a pointer to receive the Array struct From 646070146e5652ba16c9554f6b6182644026cd06 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 12 Nov 2023 22:28:00 -0500 Subject: [PATCH 05/27] return --- arrow/src/pyarrow.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 71e9aa527628..08621d8f2aad 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -264,7 +264,7 @@ impl FromPyArrow for ArrayData { let schema_ptr = unsafe { schema_capsule.reference::() }; let array_ptr = unsafe { array_capsule.reference::() }; - ffi::from_ffi(array_ptr.copy(), schema_ptr).map_err(to_py_err) + return ffi::from_ffi(array_ptr.copy(), schema_ptr).map_err(to_py_err); } validate_class("Array", value)?; From dfdcfaee0cf382a20d5c612a0c86e6f50d6fffb8 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 13 Nov 2023 10:39:15 -0500 Subject: [PATCH 06/27] Update arrow/src/pyarrow.rs Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow/src/pyarrow.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 08621d8f2aad..8220db4b5c39 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -264,7 +264,8 @@ impl FromPyArrow for ArrayData { let schema_ptr = unsafe { schema_capsule.reference::() }; let array_ptr = unsafe { array_capsule.reference::() }; - return ffi::from_ffi(array_ptr.copy(), schema_ptr).map_err(to_py_err); + let array = std::mem::replace(array_ptr, FFI_ArrowArray::empty()); + return ffi::from_ffi(array, schema_ptr).map_err(to_py_err); } validate_class("Array", value)?; From 5a4f738db76075021910f36ad4fa29ca89fe2156 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 13 Nov 2023 12:00:52 -0500 Subject: [PATCH 07/27] remove sync impl --- arrow-schema/src/ffi.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs index 27b2e7789ac2..61c71015f29b 100644 --- a/arrow-schema/src/ffi.rs +++ b/arrow-schema/src/ffi.rs @@ -356,7 +356,6 @@ impl Drop for FFI_ArrowSchema { } unsafe impl Send for FFI_ArrowSchema {} -unsafe impl Sync for FFI_ArrowSchema {} impl TryFrom<&FFI_ArrowSchema> for DataType { type Error = ArrowError; From 8a1a05ecaa73196247b3a156953dd852f3c746a3 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 13 Nov 2023 12:01:54 -0500 Subject: [PATCH 08/27] Update arrow/src/pyarrow.rs Co-authored-by: Will Jones --- arrow/src/pyarrow.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 8220db4b5c39..6061d7a75115 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -129,8 +129,8 @@ fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> { let capsule_name = capsule_name.unwrap().to_str()?; if capsule_name != name { return Err(PyValueError::new_err(format!( - "Expected name '{}' in PyCapsule.", - name, + "Expected name '{}' in PyCapsule, instead got '{}'", + name, capsule_name ))); } From e109c1aaf47ac2d15d52dfde98836fa0c49b5469 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 13 Nov 2023 12:02:15 -0500 Subject: [PATCH 09/27] Remove copy() --- arrow-data/src/ffi.rs | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs index 58168d806144..67c711e6abad 100644 --- a/arrow-data/src/ffi.rs +++ b/arrow-data/src/ffi.rs @@ -286,27 +286,6 @@ impl FFI_ArrowArray { // If dictionary is not null should be valid for reads of `Self` unsafe { self.dictionary.as_ref() } } - - /// Create a copy of an existing `FFI_ArrowArray` - /// - /// As required by the C Data Interface specification, this sets the `release` member of `Self` - /// to `None`, but without calling the release callback. - pub fn copy(&mut self) -> Self { - let new = Self { - length: self.length, - null_count: self.null_count, - offset: self.offset, - n_buffers: self.n_buffers, - n_children: self.n_children, - buffers: self.buffers, - children: self.children, - dictionary: self.dictionary, - release: self.release, - private_data: self.private_data, - }; - self.release = None; - new - } } #[cfg(test)] From 05ea67d34b26eb6b03e72030d4837b5add2cec44 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 13 Nov 2023 12:05:07 -0500 Subject: [PATCH 10/27] Need &mut FFI_ArrowArray for std::mem::replace --- arrow/src/pyarrow.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 6061d7a75115..61b09769ac97 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -55,6 +55,7 @@ //! [pyarrow.Table.to_reader()](https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_reader) //! and then importing the reader as a [ArrowArrayStreamReader]. +use std::borrow::BorrowMut; use std::convert::{From, TryFrom}; use std::ptr::{addr_of, addr_of_mut}; use std::sync::Arc; @@ -262,9 +263,10 @@ impl FromPyArrow for ArrayData { validate_pycapsule(array_capsule, "arrow_array")?; let schema_ptr = unsafe { schema_capsule.reference::() }; - let array_ptr = unsafe { array_capsule.reference::() }; + let array_ptr = array_capsule.pointer() as *mut FFI_ArrowArray; + let array_mut = unsafe { array_ptr.as_mut() }; - let array = std::mem::replace(array_ptr, FFI_ArrowArray::empty()); + let array = std::mem::replace(array_mut.unwrap(), FFI_ArrowArray::empty()); return ffi::from_ffi(array, schema_ptr).map_err(to_py_err); } From e7ed58d1a8283319bad20563ea8fd45ea47b29af Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 13 Nov 2023 12:14:53 -0500 Subject: [PATCH 11/27] Use std::ptr::replace --- arrow/src/pyarrow.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 61b09769ac97..305bd0e470de 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -263,10 +263,12 @@ impl FromPyArrow for ArrayData { validate_pycapsule(array_capsule, "arrow_array")?; let schema_ptr = unsafe { schema_capsule.reference::() }; - let array_ptr = array_capsule.pointer() as *mut FFI_ArrowArray; - let array_mut = unsafe { array_ptr.as_mut() }; - - let array = std::mem::replace(array_mut.unwrap(), FFI_ArrowArray::empty()); + let array = unsafe { + std::ptr::replace( + array_capsule.pointer() as *mut FFI_ArrowArray, + FFI_ArrowArray::empty(), + ) + }; return ffi::from_ffi(array, schema_ptr).map_err(to_py_err); } From dc04b13017ab8847e139b4d39b6dee330ec73a8d Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 13 Nov 2023 12:15:35 -0500 Subject: [PATCH 12/27] update comments --- arrow/src/pyarrow.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 305bd0e470de..08d713136f01 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -142,6 +142,7 @@ impl FromPyArrow for DataType { fn from_pyarrow(value: &PyAny) -> PyResult { // Newer versions of PyArrow as well as other libraries with Arrow data implement this // method, so prefer it over _export_to_c. + // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_schema__")? { let capsule: &PyCapsule = PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?; @@ -177,6 +178,7 @@ impl FromPyArrow for Field { fn from_pyarrow(value: &PyAny) -> PyResult { // Newer versions of PyArrow as well as other libraries with Arrow data implement this // method, so prefer it over _export_to_c. + // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_schema__")? { let capsule: &PyCapsule = PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?; @@ -212,6 +214,7 @@ impl FromPyArrow for Schema { fn from_pyarrow(value: &PyAny) -> PyResult { // Newer versions of PyArrow as well as other libraries with Arrow data implement this // method, so prefer it over _export_to_c. + // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_schema__")? { let capsule: &PyCapsule = PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?; @@ -247,6 +250,7 @@ impl FromPyArrow for ArrayData { fn from_pyarrow(value: &PyAny) -> PyResult { // Newer versions of PyArrow as well as other libraries with Arrow data implement this // method, so prefer it over _export_to_c. + // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_array__")? { let tuple = value.getattr("__arrow_c_array__")?.call0()?; From 86918facb280bba042266d6dafae9081244f4d5e Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 13 Nov 2023 12:17:01 -0500 Subject: [PATCH 13/27] Minimize unsafe block --- arrow/src/pyarrow.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 08d713136f01..7099704e2a7c 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -267,12 +267,8 @@ impl FromPyArrow for ArrayData { validate_pycapsule(array_capsule, "arrow_array")?; let schema_ptr = unsafe { schema_capsule.reference::() }; - let array = unsafe { - std::ptr::replace( - array_capsule.pointer() as *mut FFI_ArrowArray, - FFI_ArrowArray::empty(), - ) - }; + let array_ptr = array_capsule.pointer() as *mut FFI_ArrowArray; + let array = unsafe { std::ptr::replace(array_ptr, FFI_ArrowArray::empty()) }; return ffi::from_ffi(array, schema_ptr).map_err(to_py_err); } From 0e273a3f900f348eb6309f4751ef4f3bc32f7a95 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 13 Nov 2023 17:02:27 -0500 Subject: [PATCH 14/27] revert pub release functions --- arrow-data/src/ffi.rs | 8 ++------ arrow-schema/src/ffi.rs | 8 ++------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs index 67c711e6abad..7623ced043cc 100644 --- a/arrow-data/src/ffi.rs +++ b/arrow-data/src/ffi.rs @@ -66,12 +66,8 @@ impl Drop for FFI_ArrowArray { unsafe impl Send for FFI_ArrowArray {} unsafe impl Sync for FFI_ArrowArray {} -/// callback used to drop [FFI_ArrowArray] when it is exported -/// -/// # Safety -/// -/// Must be passed a valid [FFI_ArrowArray]. -pub unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) { +// callback used to drop [FFI_ArrowArray] when it is exported +unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) { if array.is_null() { return; } diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs index 61c71015f29b..640a7de79878 100644 --- a/arrow-schema/src/ffi.rs +++ b/arrow-schema/src/ffi.rs @@ -86,12 +86,8 @@ struct SchemaPrivateData { metadata: Option>, } -/// callback used to drop [FFI_ArrowSchema] when it is exported. -/// -/// # Safety -/// -/// Must be passed a valid [FFI_ArrowSchema]. -pub unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) { +// callback used to drop [FFI_ArrowSchema] when it is exported. +unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) { if schema.is_null() { return; } From 252e746a8789e85da2254c37e5329d3b507be71e Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 13 Nov 2023 23:45:08 -0500 Subject: [PATCH 15/27] Add RecordBatch and Stream conversion --- arrow/src/pyarrow.rs | 53 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 7099704e2a7c..29652ba69961 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -60,7 +60,7 @@ use std::convert::{From, TryFrom}; use std::ptr::{addr_of, addr_of_mut}; use std::sync::Arc; -use arrow_array::{RecordBatchIterator, RecordBatchReader}; +use arrow_array::{RecordBatchIterator, RecordBatchReader, StructArray}; use pyo3::exceptions::{PyTypeError, PyValueError}; use pyo3::ffi::Py_uintptr_t; use pyo3::import_exception; @@ -330,6 +330,40 @@ impl ToPyArrow for Vec { impl FromPyArrow for RecordBatch { fn from_pyarrow(value: &PyAny) -> PyResult { + // Newer versions of PyArrow as well as other libraries with Arrow data implement this + // method, so prefer it over _export_to_c. + // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + if value.hasattr("__arrow_c_array__")? { + let tuple = value.getattr("__arrow_c_array__")?.call0()?; + + if !tuple.is_instance_of::() { + return Err(PyTypeError::new_err( + "Expected __arrow_c_array__ to return a tuple.", + )); + } + + let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?; + let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?; + + validate_pycapsule(schema_capsule, "arrow_schema")?; + validate_pycapsule(array_capsule, "arrow_array")?; + + let schema_ptr = unsafe { schema_capsule.reference::() }; + let array_ptr = array_capsule.pointer() as *mut FFI_ArrowArray; + let ffi_array = unsafe { std::ptr::replace(array_ptr, FFI_ArrowArray::empty()) }; + let array_data = ffi::from_ffi(ffi_array, schema_ptr).map_err(to_py_err)?; + let array_ref = make_array(array_data); + + if !matches!(array_ref.data_type(), DataType::Struct) { + return Err(PyTypeError::new_err( + "Expected Struct type from __arrow_c_array.", + )); + } + + let array = array_ref.as_any().downcast_ref::().unwrap(); + return Ok(array.into()); + } + validate_class("RecordBatch", value)?; // TODO(kszucs): implement the FFI conversions in arrow-rs for RecordBatches let schema = value.getattr("schema")?; @@ -359,6 +393,23 @@ impl ToPyArrow for RecordBatch { /// Supports conversion from `pyarrow.RecordBatchReader` to [ArrowArrayStreamReader]. impl FromPyArrow for ArrowArrayStreamReader { fn from_pyarrow(value: &PyAny) -> PyResult { + // Newer versions of PyArrow as well as other libraries with Arrow data implement this + // method, so prefer it over _export_to_c. + // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + if value.hasattr("__arrow_c_stream__")? { + let capsule: &PyCapsule = + PyTryInto::try_into(value.getattr("__arrow_c_stream__")?.call0()?)?; + validate_pycapsule(capsule, "arrow_array_stream")?; + + let stream_ptr = array_capsule.pointer() as *mut FFI_ArrowArrayStream; + let stream = unsafe { std::ptr::replace(stream_ptr, FFI_ArrowArrayStream::empty()) }; + + let stream_reader = ArrowArrayStreamReader::try_new(stream) + .map_err(|err| PyValueError::new_err(err.to_string()))?; + + return Ok(stream); + } + validate_class("RecordBatchReader", value)?; // prepare a pointer to receive the stream struct From 60bee4a7c2e47b32dfc1c8803c34a4afa1423f5f Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 13 Nov 2023 23:50:33 -0500 Subject: [PATCH 16/27] fix returns --- arrow/src/pyarrow.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 29652ba69961..a0c22458a097 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -55,7 +55,6 @@ //! [pyarrow.Table.to_reader()](https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_reader) //! and then importing the reader as a [ArrowArrayStreamReader]. -use std::borrow::BorrowMut; use std::convert::{From, TryFrom}; use std::ptr::{addr_of, addr_of_mut}; use std::sync::Arc; @@ -150,7 +149,7 @@ impl FromPyArrow for DataType { let schema_ptr = unsafe { capsule.reference::() }; let dtype = DataType::try_from(schema_ptr).map_err(to_py_err)?; - Ok(dtype) + return Ok(dtype); } validate_class("DataType", value)?; @@ -186,7 +185,7 @@ impl FromPyArrow for Field { let schema_ptr = unsafe { capsule.reference::() }; let field = Field::try_from(schema_ptr).map_err(to_py_err)?; - Ok(field) + return Ok(field); } validate_class("Field", value)?; @@ -222,7 +221,7 @@ impl FromPyArrow for Schema { let schema_ptr = unsafe { capsule.reference::() }; let schema = Schema::try_from(&c_schema).map_err(to_py_err)?; - Ok(schema) + return Ok(schema); } validate_class("Schema", value)?; From 46612ceeb687f9306305754efe731b4c17def03f Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 14 Nov 2023 11:25:55 -0500 Subject: [PATCH 17/27] Fix return type --- arrow/src/pyarrow.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index a0c22458a097..a40d287ea23c 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -406,7 +406,7 @@ impl FromPyArrow for ArrowArrayStreamReader { let stream_reader = ArrowArrayStreamReader::try_new(stream) .map_err(|err| PyValueError::new_err(err.to_string()))?; - return Ok(stream); + return Ok(stream_reader); } validate_class("RecordBatchReader", value)?; From becda12b16562b1bbd1b707340f6f7e5612b9f0f Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 14 Nov 2023 11:26:34 -0500 Subject: [PATCH 18/27] Fix name --- arrow/src/pyarrow.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index a40d287ea23c..0df6b15d9250 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -400,7 +400,7 @@ impl FromPyArrow for ArrowArrayStreamReader { PyTryInto::try_into(value.getattr("__arrow_c_stream__")?.call0()?)?; validate_pycapsule(capsule, "arrow_array_stream")?; - let stream_ptr = array_capsule.pointer() as *mut FFI_ArrowArrayStream; + let stream_ptr = capsule.pointer() as *mut FFI_ArrowArrayStream; let stream = unsafe { std::ptr::replace(stream_ptr, FFI_ArrowArrayStream::empty()) }; let stream_reader = ArrowArrayStreamReader::try_new(stream) From 2f7767bf94353e3eb621d44fcd00f37fcd58466d Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 14 Nov 2023 11:28:00 -0500 Subject: [PATCH 19/27] fix ci --- arrow/src/pyarrow.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 0df6b15d9250..6d634034a8e6 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -220,7 +220,7 @@ impl FromPyArrow for Schema { validate_pycapsule(capsule, "arrow_schema")?; let schema_ptr = unsafe { capsule.reference::() }; - let schema = Schema::try_from(&c_schema).map_err(to_py_err)?; + let schema = Schema::try_from(schema_ptr).map_err(to_py_err)?; return Ok(schema); } @@ -353,7 +353,7 @@ impl FromPyArrow for RecordBatch { let array_data = ffi::from_ffi(ffi_array, schema_ptr).map_err(to_py_err)?; let array_ref = make_array(array_data); - if !matches!(array_ref.data_type(), DataType::Struct) { + if !matches!(array_ref.data_type(), DataType::Struct(_)) { return Err(PyTypeError::new_err( "Expected Struct type from __arrow_c_array.", )); From 1e7bcd3d846f2c3f51a2f6c19face24c1b9aa83e Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 14 Nov 2023 23:08:16 -0500 Subject: [PATCH 20/27] Add tests --- arrow-pyarrow-integration-testing/README.md | 2 + .../tests/test_sql.py | 93 ++++++++++++++++++- 2 files changed, 91 insertions(+), 4 deletions(-) diff --git a/arrow-pyarrow-integration-testing/README.md b/arrow-pyarrow-integration-testing/README.md index e63953ad7900..5ca2ea76b88c 100644 --- a/arrow-pyarrow-integration-testing/README.md +++ b/arrow-pyarrow-integration-testing/README.md @@ -25,6 +25,7 @@ Note that this crate uses two languages and an external ABI: * `Rust` * `Python` * C ABI privately exposed by `Pyarrow`. +* PyCapsule ABI publicly exposed by `pyarrow` ## Basic idea @@ -36,6 +37,7 @@ we can use pyarrow's interface to move pointers from and to Rust. ## Relevant literature * [Arrow's CDataInterface](https://arrow.apache.org/docs/format/CDataInterface.html) +* [Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) * [Rust's FFI](https://doc.rust-lang.org/nomicon/ffi.html) * [Pyarrow private binds](https://github.com/apache/arrow/blob/ae1d24efcc3f1ac2a876d8d9f544a34eb04ae874/python/pyarrow/array.pxi#L1226) * [PyO3](https://docs.rs/pyo3/0.12.1/pyo3/index.html) diff --git a/arrow-pyarrow-integration-testing/tests/test_sql.py b/arrow-pyarrow-integration-testing/tests/test_sql.py index 1748fd3ffb6b..c773cd173507 100644 --- a/arrow-pyarrow-integration-testing/tests/test_sql.py +++ b/arrow-pyarrow-integration-testing/tests/test_sql.py @@ -113,6 +113,34 @@ def assert_pyarrow_leak(): _unsupported_pyarrow_types = [ ] +# As of pyarrow 14, pyarrow implements the Arrow PyCapsule interface +# (https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html). +# This defines that Arrow consumers should allow any object that has specific "dunder" +# methods, `__arrow_c_*_`. These wrapper classes ensure that arrow-rs is able to handle +# _any_ class, without pyarrow-specific handling. +class SchemaWrapper: + def __init__(self, schema): + self.schema = schema + + def __arrow_c_schema__(self): + return self.schema.__arrow_c_schema__() + + +class ArrayWrapper: + def __init__(self, array): + self.array = array + + def __arrow_c_array__(self): + return self.array.__arrow_c_array__() + + +class StreamWrapper: + def __init__(self, stream): + self.stream = stream + + def __arrow_c_stream__(self): + return self.stream.__arrow_c_stream__() + @pytest.mark.parametrize("pyarrow_type", _supported_pyarrow_types, ids=str) def test_type_roundtrip(pyarrow_type): @@ -121,6 +149,14 @@ def test_type_roundtrip(pyarrow_type): assert restored is not pyarrow_type +@pytest.mark.parametrize("pyarrow_type", _supported_pyarrow_types, ids=str) +def test_type_roundtrip_pycapsule(pyarrow_type): + wrapped = SchemaWrapper(pyarrow_type) + restored = rust.round_trip_type(wrapped) + assert restored == pyarrow_type + assert restored is not pyarrow_type + + @pytest.mark.parametrize("pyarrow_type", _unsupported_pyarrow_types, ids=str) def test_type_roundtrip_raises(pyarrow_type): with pytest.raises(pa.ArrowException): @@ -138,6 +174,19 @@ def test_field_roundtrip(pyarrow_type): field = rust.round_trip_field(pyarrow_field) assert field == pyarrow_field +@pytest.mark.parametrize('pyarrow_type', _supported_pyarrow_types, ids=str) +def test_field_roundtrip_pycapsule(pyarrow_type): + pyarrow_field = pa.field("test", pyarrow_type, nullable=True) + wrapped = SchemaWrapper(pyarrow_field) + field = rust.round_trip_field(wrapped) + assert field == wrapped.schema + + if pyarrow_type != pa.null(): + # A null type field may not be non-nullable + pyarrow_field = pa.field("test", pyarrow_type, nullable=False) + field = rust.round_trip_field(wrapped) + assert field == wrapped.schema + def test_field_metadata_roundtrip(): metadata = {"hello": "World! 😊", "x": "2"} pyarrow_field = pa.field("test", pa.int32(), metadata=metadata) @@ -163,6 +212,16 @@ def test_primitive_python(): del b +def test_primitive_python_pycapsule(): + """ + Python -> Rust -> Python + """ + a = pa.array([1, 2, 3]) + wrapped = ArrayWrapper(a) + b = rust.double(wrapped) + assert b == pa.array([2, 4, 6]) + + def test_primitive_rust(): """ Rust -> Python -> Rust @@ -433,6 +492,32 @@ def test_record_batch_reader(): got_batches = list(b) assert got_batches == batches +def test_record_batch_reader_pycapsule(): + """ + Python -> Rust -> Python + """ + schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'}) + batches = [ + pa.record_batch([[[1], [2, 42]]], schema), + pa.record_batch([[None, [], [5, 6]]], schema), + ] + a = pa.RecordBatchReader.from_batches(schema, batches) + wrapped = StreamWrapper(a) + b = rust.round_trip_record_batch_reader(wrapped) + + assert b.schema == schema + got_batches = list(b) + assert got_batches == batches + + # Also try the boxed reader variant + a = pa.RecordBatchReader.from_batches(schema, batches) + wrapped = StreamWrapper(a) + b = rust.boxed_reader_roundtrip(wrapped) + assert b.schema == schema + got_batches = list(b) + assert got_batches == batches + + def test_record_batch_reader_error(): schema = pa.schema([('ints', pa.list_(pa.int32()))]) @@ -459,18 +544,18 @@ def test_reject_other_classes(): with pytest.raises(TypeError, match="Expected instance of pyarrow.lib.Array, got builtins.list"): rust.round_trip_array(not_pyarrow) - + with pytest.raises(TypeError, match="Expected instance of pyarrow.lib.Schema, got builtins.list"): rust.round_trip_schema(not_pyarrow) - + with pytest.raises(TypeError, match="Expected instance of pyarrow.lib.Field, got builtins.list"): rust.round_trip_field(not_pyarrow) - + with pytest.raises(TypeError, match="Expected instance of pyarrow.lib.DataType, got builtins.list"): rust.round_trip_type(not_pyarrow) with pytest.raises(TypeError, match="Expected instance of pyarrow.lib.RecordBatch, got builtins.list"): rust.round_trip_record_batch(not_pyarrow) - + with pytest.raises(TypeError, match="Expected instance of pyarrow.lib.RecordBatchReader, got builtins.list"): rust.round_trip_record_batch_reader(not_pyarrow) From ae909fbf24a8b83881362f4148bc2662c16bf9ac Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 14 Nov 2023 23:11:40 -0500 Subject: [PATCH 21/27] Add table test --- .../tests/test_sql.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arrow-pyarrow-integration-testing/tests/test_sql.py b/arrow-pyarrow-integration-testing/tests/test_sql.py index c773cd173507..319e6498a5b0 100644 --- a/arrow-pyarrow-integration-testing/tests/test_sql.py +++ b/arrow-pyarrow-integration-testing/tests/test_sql.py @@ -538,6 +538,26 @@ def iter_batches(): with pytest.raises(ValueError, match="invalid utf-8"): rust.round_trip_record_batch_reader(reader) + +def test_table_pycapsule(): + """ + Python -> Rust -> Python + """ + schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'}) + batches = [ + pa.record_batch([[[1], [2, 42]]], schema), + pa.record_batch([[None, [], [5, 6]]], schema), + ] + table = pa.Table.from_batches(batches) + wrapped = StreamWrapper(table) + b = rust.round_trip_record_batch_reader(wrapped) + new_table = b.read_all() + + assert table.schema == new_table.schema + assert table == new_table + assert len(table.to_batches()) == len(new_table.to_batches()) + + def test_reject_other_classes(): # Arbitrary type that is not a PyArrow type not_pyarrow = ["hello"] From 6f01c9146eea2f5a7e59a291f746c2df6b5d27f5 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 14 Nov 2023 23:15:42 -0500 Subject: [PATCH 22/27] skip if pre pyarrow 14 --- arrow-pyarrow-integration-testing/tests/test_sql.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arrow-pyarrow-integration-testing/tests/test_sql.py b/arrow-pyarrow-integration-testing/tests/test_sql.py index 319e6498a5b0..f2db00fe20ec 100644 --- a/arrow-pyarrow-integration-testing/tests/test_sql.py +++ b/arrow-pyarrow-integration-testing/tests/test_sql.py @@ -27,6 +27,8 @@ import arrow_pyarrow_integration_testing as rust +PYARROW_PRE_14 = int(pa.__version__.split('.')[0]) < 14 + @contextlib.contextmanager def no_pyarrow_leak(): @@ -148,7 +150,7 @@ def test_type_roundtrip(pyarrow_type): assert restored == pyarrow_type assert restored is not pyarrow_type - +@pytest.mark.skipif(PYARROW_PRE_14, reason="requires pyarrow 14") @pytest.mark.parametrize("pyarrow_type", _supported_pyarrow_types, ids=str) def test_type_roundtrip_pycapsule(pyarrow_type): wrapped = SchemaWrapper(pyarrow_type) @@ -174,6 +176,7 @@ def test_field_roundtrip(pyarrow_type): field = rust.round_trip_field(pyarrow_field) assert field == pyarrow_field +@pytest.mark.skipif(PYARROW_PRE_14, reason="requires pyarrow 14") @pytest.mark.parametrize('pyarrow_type', _supported_pyarrow_types, ids=str) def test_field_roundtrip_pycapsule(pyarrow_type): pyarrow_field = pa.field("test", pyarrow_type, nullable=True) @@ -212,6 +215,7 @@ def test_primitive_python(): del b +@pytest.mark.skipif(PYARROW_PRE_14, reason="requires pyarrow 14") def test_primitive_python_pycapsule(): """ Python -> Rust -> Python @@ -492,6 +496,7 @@ def test_record_batch_reader(): got_batches = list(b) assert got_batches == batches +@pytest.mark.skipif(PYARROW_PRE_14, reason="requires pyarrow 14") def test_record_batch_reader_pycapsule(): """ Python -> Rust -> Python @@ -539,6 +544,7 @@ def iter_batches(): rust.round_trip_record_batch_reader(reader) +@pytest.mark.skipif(PYARROW_PRE_14, reason="requires pyarrow 14") def test_table_pycapsule(): """ Python -> Rust -> Python From f18305755bd0004cfed9658a9c617d64f926a406 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 14 Nov 2023 23:17:19 -0500 Subject: [PATCH 23/27] bump python version in CI to use pyarrow 14 --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6e2b4420408a..7500f6dd187a 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -128,7 +128,7 @@ jobs: key: ${{ runner.os }}-${{ matrix.arch }}-target-maturin-cache-${{ matrix.rust }}- - uses: actions/setup-python@v4 with: - python-version: '3.7' + python-version: '3.8' - name: Upgrade pip and setuptools run: pip install --upgrade pip setuptools wheel virtualenv - name: Create virtualenv and install dependencies From 107acef1f1e9a2a6724c02e100bedda9e41c87ae Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 14 Nov 2023 23:23:33 -0500 Subject: [PATCH 24/27] Add record batch test --- .../tests/test_sql.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arrow-pyarrow-integration-testing/tests/test_sql.py b/arrow-pyarrow-integration-testing/tests/test_sql.py index f2db00fe20ec..16d4e0f12f88 100644 --- a/arrow-pyarrow-integration-testing/tests/test_sql.py +++ b/arrow-pyarrow-integration-testing/tests/test_sql.py @@ -544,6 +544,25 @@ def iter_batches(): rust.round_trip_record_batch_reader(reader) +@pytest.mark.skipif(PYARROW_PRE_14, reason="requires pyarrow 14") +def test_record_batch_pycapsule(): + """ + Python -> Rust -> Python + """ + schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'}) + batch = pa.record_batch([[[1], [2, 42]]], schema) + wrapped = StreamWrapper(batch) + b = rust.round_trip_record_batch_reader(wrapped) + new_table = b.read_all() + new_batches = new_table.to_batches() + + assert len(new_batches) == 1 + new_batch = new_batches[0] + + assert batch == new_batch + assert batch.schema == new_batch.schema + + @pytest.mark.skipif(PYARROW_PRE_14, reason="requires pyarrow 14") def test_table_pycapsule(): """ From 6c44e010def8b56680792392a1fa566f35c54d42 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 15 Nov 2023 11:22:28 -0500 Subject: [PATCH 25/27] Update arrow/src/pyarrow.rs Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow/src/pyarrow.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 6d634034a8e6..4d262b0d106f 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -351,15 +351,12 @@ impl FromPyArrow for RecordBatch { let array_ptr = array_capsule.pointer() as *mut FFI_ArrowArray; let ffi_array = unsafe { std::ptr::replace(array_ptr, FFI_ArrowArray::empty()) }; let array_data = ffi::from_ffi(ffi_array, schema_ptr).map_err(to_py_err)?; - let array_ref = make_array(array_data); - - if !matches!(array_ref.data_type(), DataType::Struct(_)) { + if !matches!(array_data.data_type(), DataType::Struct(_)) { return Err(PyTypeError::new_err( "Expected Struct type from __arrow_c_array.", )); } - - let array = array_ref.as_any().downcast_ref::().unwrap(); + let array = StructArray::from(array_data); return Ok(array.into()); } From 60202475adb75c711bc972e5ea99e1943c68a64a Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 15 Nov 2023 11:24:48 -0500 Subject: [PATCH 26/27] run on pyarrow 13 and 14 --- .github/workflows/integration.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 7500f6dd187a..a05eda28324a 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -106,6 +106,7 @@ jobs: strategy: matrix: rust: [ stable ] + pyarrow: [ "13", "14" ] steps: - uses: actions/checkout@v4 with: @@ -135,7 +136,7 @@ jobs: run: | virtualenv venv source venv/bin/activate - pip install maturin toml pytest pytz pyarrow>=5.0 + pip install maturin toml pytest pytz pyarrow==${{ matrix.pyarrow }} - name: Run Rust tests run: | source venv/bin/activate From 2e42926248bf0e1b558ca91936208c53f81a2a19 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 15 Nov 2023 12:40:55 -0500 Subject: [PATCH 27/27] Update .github/workflows/integration.yml Co-authored-by: Will Jones --- .github/workflows/integration.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index a05eda28324a..f939a6a13b58 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -106,6 +106,7 @@ jobs: strategy: matrix: rust: [ stable ] + # PyArrow 13 was the last version prior to introduction to Arrow PyCapsules pyarrow: [ "13", "14" ] steps: - uses: actions/checkout@v4