Skip to content

Commit

Permalink
GH-39006: [Python] Extract libparquet requirements out of libarrow_py…
Browse files Browse the repository at this point in the history
…thon.so to new libarrow_python_parquet_encryption.so (#39316)

### Rationale for this change

If I build pyarrow with everything and then I remove some of the Arrow CPP .so in order to have a minimal build I can't import pyarrow because it requires libarrow and libparquet. This is relevant in order to have a minimal build for Conda. Please see the related issue for more information.

### What changes are included in this PR?

Move libarrow parquet encryption for pyarrow to its own shared object.

### Are these changes tested?

I will run extensive CI with extra python archery tests.

### Are there any user-facing changes?

No, and yes :) There will be a new .so on pyarrow but shouldn't be relevant in my opinion.
* Closes: #39006

Lead-authored-by: Raúl Cumplido <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
  • Loading branch information
raulcd and pitrou authored Dec 22, 2023
1 parent 929c40b commit 51970e0
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 22 deletions.
2 changes: 2 additions & 0 deletions ci/scripts/python_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export ARROW_DEBUG_MEMORY_POOL=trap
: ${PYARROW_TEST_HDFS:=${ARROW_HDFS:-ON}}
: ${PYARROW_TEST_ORC:=${ARROW_ORC:-ON}}
: ${PYARROW_TEST_PARQUET:=${ARROW_PARQUET:-ON}}
: ${PYARROW_TEST_PARQUET_ENCRYPTION:=${PARQUET_REQUIRE_ENCRYPTION:-ON}}
: ${PYARROW_TEST_S3:=${ARROW_S3:-ON}}

export PYARROW_TEST_ACERO
Expand All @@ -56,6 +57,7 @@ export PYARROW_TEST_GCS
export PYARROW_TEST_HDFS
export PYARROW_TEST_ORC
export PYARROW_TEST_PARQUET
export PYARROW_TEST_PARQUET_ENCRYPTION
export PYARROW_TEST_S3

# Testing PyArrow
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/python_wheel_unix_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ export PYARROW_TEST_HDFS=ON
export PYARROW_TEST_ORC=ON
export PYARROW_TEST_PANDAS=ON
export PYARROW_TEST_PARQUET=ON
export PYARROW_TEST_PARQUET_ENCRYPTION=ON
export PYARROW_TEST_SUBSTRAIT=${ARROW_SUBSTRAIT}
export PYARROW_TEST_S3=${ARROW_S3}
export PYARROW_TEST_TENSORFLOW=ON
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/python_wheel_windows_test.bat
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ set PYARROW_TEST_GCS=ON
set PYARROW_TEST_HDFS=ON
set PYARROW_TEST_ORC=OFF
set PYARROW_TEST_PARQUET=ON
set PYARROW_TEST_PARQUET_ENCRYPTION=ON
set PYARROW_TEST_SUBSTRAIT=ON
set PYARROW_TEST_S3=OFF
set PYARROW_TEST_TENSORFLOW=ON
Expand Down
38 changes: 21 additions & 17 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -332,22 +332,6 @@ if(PYARROW_BUILD_PARQUET OR PYARROW_BUILD_PARQUET_ENCRYPTION)
find_package(Parquet REQUIRED)
endif()

if(PYARROW_BUILD_PARQUET_ENCRYPTION)
if(PARQUET_REQUIRE_ENCRYPTION)
list(APPEND PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc)
if(ARROW_BUILD_SHARED)
list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_shared)
else()
list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_static)
endif()
message(STATUS "Parquet Encryption Enabled")
else()
message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON")
endif()
else()
message(STATUS "Parquet Encryption is NOT Enabled")
endif()

if(PYARROW_BUILD_HDFS)
if(NOT ARROW_HDFS)
message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
Expand Down Expand Up @@ -391,6 +375,26 @@ install(TARGETS arrow_python
LIBRARY DESTINATION .
RUNTIME DESTINATION .)

set(PYARROW_CPP_ENCRYPTION_SRCS ${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc)
if(NOT PYARROW_BUILD_PARQUET_ENCRYPTION)
message(STATUS "Parquet Encryption is NOT Enabled")
else()
if(PARQUET_REQUIRE_ENCRYPTION)
add_library(arrow_python_parquet_encryption SHARED ${PYARROW_CPP_ENCRYPTION_SRCS})
target_link_libraries(arrow_python_parquet_encryption PUBLIC arrow_python
${PARQUET_LINK_LIBS})
target_compile_definitions(arrow_python_parquet_encryption
PRIVATE ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
install(TARGETS arrow_python_parquet_encryption
ARCHIVE DESTINATION .
LIBRARY DESTINATION .
RUNTIME DESTINATION .)
message(STATUS "Parquet Encryption Enabled")
else()
message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON")
endif()
endif()

set(PYARROW_CPP_FLIGHT_SRCS ${PYARROW_CPP_SOURCE_DIR}/flight.cc)
if(PYARROW_BUILD_FLIGHT)
if(NOT ARROW_FLIGHT)
Expand Down Expand Up @@ -814,6 +818,6 @@ endif()
if(PYARROW_BUILD_PARQUET)
target_link_libraries(_parquet PRIVATE ${PARQUET_LINK_LIBS})
if(PYARROW_BUILD_PARQUET_ENCRYPTION)
target_link_libraries(_parquet_encryption PRIVATE ${PARQUET_LINK_LIBS})
target_link_libraries(_parquet_encryption PRIVATE arrow_python_parquet_encryption)
endif()
endif()
33 changes: 28 additions & 5 deletions python/pyarrow/src/arrow/python/parquet_encryption.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,35 @@
#include "parquet/encryption/kms_client.h"
#include "parquet/encryption/kms_client_factory.h"

#if defined(_WIN32) || defined(__CYGWIN__) // Windows
#if defined(_MSC_VER)
#pragma warning(disable : 4251)
#else
#pragma GCC diagnostic ignored "-Wattributes"
#endif

#ifdef ARROW_PYTHON_STATIC
#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
#elif defined(ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllexport)
#else
#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllimport)
#endif

#else // Not Windows
#ifndef ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __attribute__((visibility("default")))
#endif
#endif // Non-Windows

namespace arrow {
namespace py {
namespace parquet {
namespace encryption {

/// \brief A table of function pointers for calling from C++ into
/// Python.
class ARROW_PYTHON_EXPORT PyKmsClientVtable {
class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientVtable {
public:
std::function<void(PyObject*, const std::string& key_bytes,
const std::string& master_key_identifier, std::string* out)>
Expand All @@ -44,7 +65,8 @@ class ARROW_PYTHON_EXPORT PyKmsClientVtable {
};

/// \brief A helper for KmsClient implementation in Python.
class ARROW_PYTHON_EXPORT PyKmsClient : public ::parquet::encryption::KmsClient {
class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClient
: public ::parquet::encryption::KmsClient {
public:
PyKmsClient(PyObject* handler, PyKmsClientVtable vtable);
~PyKmsClient() override;
Expand All @@ -62,7 +84,7 @@ class ARROW_PYTHON_EXPORT PyKmsClient : public ::parquet::encryption::KmsClient

/// \brief A table of function pointers for calling from C++ into
/// Python.
class ARROW_PYTHON_EXPORT PyKmsClientFactoryVtable {
class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientFactoryVtable {
public:
std::function<void(
PyObject*, const ::parquet::encryption::KmsConnectionConfig& kms_connection_config,
Expand All @@ -71,7 +93,7 @@ class ARROW_PYTHON_EXPORT PyKmsClientFactoryVtable {
};

/// \brief A helper for KmsClientFactory implementation in Python.
class ARROW_PYTHON_EXPORT PyKmsClientFactory
class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientFactory
: public ::parquet::encryption::KmsClientFactory {
public:
PyKmsClientFactory(PyObject* handler, PyKmsClientFactoryVtable vtable);
Expand All @@ -86,7 +108,8 @@ class ARROW_PYTHON_EXPORT PyKmsClientFactory
};

/// \brief A CryptoFactory that returns Results instead of throwing exceptions.
class ARROW_PYTHON_EXPORT PyCryptoFactory : public ::parquet::encryption::CryptoFactory {
class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyCryptoFactory
: public ::parquet::encryption::CryptoFactory {
public:
arrow::Result<std::shared_ptr<::parquet::FileEncryptionProperties>>
SafeGetFileEncryptionProperties(
Expand Down

0 comments on commit 51970e0

Please sign in to comment.