Skip to content

Commit

Permalink
GH-43536: [Python] Do not use borrowed references APIs (#43540)
Browse files Browse the repository at this point in the history
### Rationale for this change

For better reference safety under Python free-threaded builds (i.e. with the GIL removed), we should be using `Py(List|Dict)_GetItemRef` that return strong references and are implemented in a thread-safe manner.

### What changes are included in this PR?

- Vendor a copy of https://github.com/python/pythoncapi-compat
- Port to strong reference APIs for lists and dicts

### Are these changes tested?

I ran the tests with the free-threaded build before and after, and there's the same expected failures.

* GitHub Issue: #43536

Lead-authored-by: Lysandros Nikolaou <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
lysnikolaou and pitrou authored Aug 15, 2024
1 parent f518d6b commit 894f72f
Show file tree
Hide file tree
Showing 7 changed files with 1,566 additions and 11 deletions.
1 change: 1 addition & 0 deletions dev/release/rat_exclude_files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ python/manylinux1/.dockerignore
python/pyarrow/includes/__init__.pxd
python/pyarrow/tests/__init__.py
python/pyarrow/vendored/*
python/pyarrow/src/arrow/python/vendored/*
python/requirements*.txt
pax_global_header
MANIFEST.in
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/src/arrow/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@
# under the License.

arrow_install_all_headers("arrow/python")
add_subdirectory(vendored)
14 changes: 11 additions & 3 deletions python/pyarrow/src/arrow/python/deserialize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "arrow/python/numpy_convert.h"
#include "arrow/python/pyarrow.h"
#include "arrow/python/serialize.h"
#include "arrow/python/vendored/pythoncapi_compat.h"

namespace arrow {

Expand Down Expand Up @@ -88,8 +89,13 @@ Status DeserializeDict(PyObject* context, const Array& array, int64_t start_idx,
// The latter two steal references whereas PyDict_SetItem does not. So we need
// to make sure the reference count is decremented by letting the OwnedRef
// go out of scope at the end.
int ret = PyDict_SetItem(result.obj(), PyList_GET_ITEM(keys.obj(), i - start_idx),
PyList_GET_ITEM(vals.obj(), i - start_idx));
PyObject* key = PyList_GetItemRef(keys.obj(), i - start_idx);
RETURN_IF_PYERROR();
OwnedRef keyref(key);
PyObject* val = PyList_GetItemRef(vals.obj(), i - start_idx);
RETURN_IF_PYERROR();
OwnedRef valref(val);
int ret = PyDict_SetItem(result.obj(), key, val);
if (ret != 0) {
return ConvertPyError();
}
Expand Down Expand Up @@ -398,7 +404,9 @@ Status GetSerializedFromComponents(int num_tensors,

auto GetBuffer = [&data](Py_ssize_t index, std::shared_ptr<Buffer>* out) {
ARROW_CHECK_LE(index, PyList_Size(data));
PyObject* py_buf = PyList_GET_ITEM(data, index);
PyObject* py_buf = PyList_GetItemRef(data, index);
RETURN_IF_PYERROR();
OwnedRef py_buf_ref(py_buf);
return unwrap_buffer(py_buf).Value(out);
};

Expand Down
7 changes: 5 additions & 2 deletions python/pyarrow/src/arrow/python/numpy_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#include "arrow/python/numpy_internal.h"
#include "arrow/python/python_to_arrow.h"
#include "arrow/python/type_traits.h"
#include "arrow/python/vendored/pythoncapi_compat.h"

namespace arrow {

Expand Down Expand Up @@ -757,8 +758,10 @@ Status NumPyConverter::Visit(const StructType& type) {
}

for (auto field : type.fields()) {
PyObject* tup =
PyDict_GetItemString(PyDataType_FIELDS(dtype_), field->name().c_str());
PyObject* tup;
PyDict_GetItemStringRef(PyDataType_FIELDS(dtype_), field->name().c_str(), &tup);
RETURN_IF_PYERROR();
OwnedRef tupref(tup);
if (tup == NULL) {
return Status::Invalid("Missing field '", field->name(), "' in struct array");
}
Expand Down
17 changes: 11 additions & 6 deletions python/pyarrow/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
#include "arrow/python/iterators.h"
#include "arrow/python/numpy_convert.h"
#include "arrow/python/type_traits.h"
#include "arrow/python/vendored/pythoncapi_compat.h"
#include "arrow/visit_type_inline.h"

namespace arrow {
Expand Down Expand Up @@ -1107,11 +1108,13 @@ class PyStructConverter : public StructConverter<PyConverter, PyConverterTrait>
Status AppendDict(PyObject* dict, PyObject* field_names) {
// NOTE we're ignoring any extraneous dict items
for (int i = 0; i < num_fields_; i++) {
PyObject* name = PyList_GET_ITEM(field_names, i); // borrowed
PyObject* value = PyDict_GetItem(dict, name); // borrowed
if (value == NULL) {
RETURN_IF_PYERROR();
}
PyObject* name = PyList_GetItemRef(field_names, i);
RETURN_IF_PYERROR();
OwnedRef nameref(name);
PyObject* value;
PyDict_GetItemRef(dict, name, &value);
RETURN_IF_PYERROR();
OwnedRef valueref(value);
RETURN_NOT_OK(this->children_[i]->Append(value ? value : Py_None));
}
return Status::OK();
Expand Down Expand Up @@ -1141,7 +1144,9 @@ class PyStructConverter : public StructConverter<PyConverter, PyConverterTrait>
ARROW_ASSIGN_OR_RAISE(auto pair, GetKeyValuePair(items, i));

// validate that the key and the field name are equal
PyObject* name = PyList_GET_ITEM(field_names, i);
PyObject* name = PyList_GetItemRef(field_names, i);
RETURN_IF_PYERROR();
OwnedRef nameref(name);
bool are_equal = PyObject_RichCompareBool(pair.first, name, Py_EQ);
RETURN_IF_PYERROR();

Expand Down
18 changes: 18 additions & 0 deletions python/pyarrow/src/arrow/python/vendored/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

arrow_install_all_headers("arrow/python/vendored")
Loading

0 comments on commit 894f72f

Please sign in to comment.