From 316859422682fd91b49aec68032768ff2eb13b5b Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sun, 23 Jun 2024 08:53:33 +0900 Subject: [PATCH] GH-42149: [C++] Use FetchContent for bundled ORC This also has a workaround for https://issues.apache.org/jira/browse/ORC-1732 . --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 184 ++++++++++---------- cpp/thirdparty/versions.txt | 4 +- 2 files changed, 96 insertions(+), 92 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 7dab0a362ff24..009ccacd6e172 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4490,116 +4490,120 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include") # ---------------------------------------------------------------------- # Apache ORC -macro(build_orc) +function(build_orc) + if(CMAKE_VERSION VERSION_LESS 3.22) + # We can't disable installing Apache ORC by + # "set_property(DIRECTORY ${orc_SOURCE_DIR} PROPERTY + # EXCLUDE_FROM_ALL TRUE)" with CMake 3.16. + # + # At least CMake 3.22 on Ubuntu 22.04 works. So we use 3.22 + # here. We may be able to use more earlier version here. + message(FATAL_ERROR "Building Apache ORC requires at least CMake 3.22. " + "(At least we can't use CMake 3.16)") + endif() message(STATUS "Building Apache ORC from source") + fetchcontent_declare(orc + ${FC_DECLARE_COMMON_OPTIONS} + URL ${ORC_SOURCE_URL} + URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}") + prepare_fetchcontent() - set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-install") - set(ORC_HOME "${ORC_PREFIX}") - set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include") - set(ORC_STATIC_LIB - "${ORC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}orc${CMAKE_STATIC_LIBRARY_SUFFIX}") + get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY) + set(LZ4_HOME + ${LZ4_ROOT} + CACHE BOOL "" FORCE) + set(LZ4_LIBRARY $) + set(LZ4_STATIC_LIB $) - get_target_property(ORC_PROTOBUF_ROOT ${ARROW_PROTOBUF_LIBPROTOBUF} + get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF} INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_ROOT}" DIRECTORY) - - get_target_property(ORC_SNAPPY_INCLUDE_DIR ${Snappy_TARGET} + get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY) + set(PROTOBUF_HOME + ${Protobuf_ROOT} + CACHE BOOL "" FORCE) + target_include_directories(${ARROW_PROTOBUF_LIBPROTOC} + INTERFACE "${PROTOBUF_INCLUDE_DIR}") + set(PROTOBUF_EXECUTABLE $) + set(PROTOBUF_LIBRARY $) + set(PROTOC_LIBRARY $) + + get_target_property(SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY) + set(SNAPPY_HOME + ${Snappy_ROOT} + CACHE BOOL "" FORCE) + set(SNAPPY_LIBRARY + $ + CACHE STRING "" FORCE) + + get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY) + set(ZLIB_HOME + ${ZLIB_ROOT} + CACHE BOOL "" FORCE) + + get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY) - - get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY) + get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY) + set(ZSTD_HOME + ${ZSTD_ROOT} + CACHE BOOL "" FORCE) + set(ZSTD_LIBRARY $) + set(ZSTD_STATIC_LIBRARY $) + + set(BUILD_CPP_TESTS + OFF + CACHE BOOL "" FORCE) + set(BUILD_JAVA + OFF + CACHE BOOL "" FORCE) + set(BUILD_LIBHDFSPP + OFF + CACHE BOOL "" FORCE) + set(BUILD_TOOLS + OFF + CACHE BOOL "" FORCE) + set(INSTALL_VENDORED_LIBS + OFF + CACHE BOOL "" FORCE) + set(STOP_BUILD_ON_WARNING + OFF + CACHE BOOL "" FORCE) - get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY) + # TODO: This should be fixed in upstream. + list(PREPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules) - set(ORC_CMAKE_ARGS - ${EP_COMMON_CMAKE_ARGS} - "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" - -DSTOP_BUILD_ON_WARNING=OFF - -DBUILD_LIBHDFSPP=OFF - -DBUILD_JAVA=OFF - -DBUILD_TOOLS=OFF - -DBUILD_CPP_TESTS=OFF - -DINSTALL_VENDORED_LIBS=OFF - "-DLZ4_HOME=${ORC_LZ4_ROOT}" - "-DPROTOBUF_EXECUTABLE=$" - "-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}" - "-DPROTOBUF_INCLUDE_DIR=$" - "-DPROTOBUF_LIBRARY=$" - "-DPROTOC_LIBRARY=$" - "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" - "-DSNAPPY_LIBRARY=$" - "-DLZ4_LIBRARY=$" - "-DLZ4_STATIC_LIB=$" - "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" - "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" - "-DZSTD_HOME=${ORC_ZSTD_ROOT}" - "-DZSTD_INCLUDE_DIR=$" - "-DZSTD_LIBRARY=$") - if(ZLIB_ROOT) - set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}") + fetchcontent_makeavailable(orc) + if(CMAKE_VERSION VERSION_LESS 3.28) + set_property(DIRECTORY ${orc_SOURCE_DIR} PROPERTY EXCLUDE_FROM_ALL TRUE) endif() - # Work around CMake bug - file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR}) + set(ORC_VENDORED + TRUE + PARENT_SCOPE) - externalproject_add(orc_ep - ${EP_COMMON_OPTIONS} - URL ${ORC_SOURCE_URL} - URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}" - BUILD_BYPRODUCTS ${ORC_STATIC_LIB} - CMAKE_ARGS ${ORC_CMAKE_ARGS} - DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF} - ${ARROW_PROTOBUF_PROTOC} - ${ARROW_ZSTD_LIBZSTD} - ${Snappy_TARGET} - LZ4::lz4 - ZLIB::ZLIB) - - set(ORC_VENDORED 1) - - add_library(orc::orc STATIC IMPORTED) - set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}") - target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}") - target_link_libraries(orc::orc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD} - ${Snappy_TARGET}) - # Protobuf generated files may use ABSL_DCHECK*() and - # absl::log_internal_check_op is needed for them. - if(TARGET absl::log_internal_check_op) - target_link_libraries(orc::orc INTERFACE absl::log_internal_check_op) - endif() - if(NOT MSVC) - if(NOT APPLE AND ARROW_ENABLE_THREADING) - target_link_libraries(orc::orc INTERFACE Threads::Threads) - endif() - target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS}) - endif() - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9") - target_link_libraries(orc::orc INTERFACE stdc++fs) - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8") - target_link_libraries(orc::orc INTERFACE c++fs) - endif() - endif() + target_include_directories(orc INTERFACE "${orc_BINARY_DIR}/c++/include" + "${orc_SOURCE_DIR}/c++/include") - add_dependencies(orc::orc orc_ep) + add_library(orc::orc ALIAS orc) - list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc) -endmacro() + list(APPEND ARROW_BUNDLED_STATIC_LIBS orc) + set(ARROW_BUNDLED_STATIC_LIBS + ${ARROW_BUNDLED_STATIC_LIBS} + PARENT_SCOPE) +endfunction() if(ARROW_ORC) resolve_dependency(orc HAVE_ALT TRUE) - target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) if(ORC_VENDORED) set(ARROW_ORC_VERSION ${ARROW_ORC_BUILD_VERSION}) else() + target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) set(ARROW_ORC_VERSION ${orcAlt_VERSION}) + message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}") + message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}") endif() - message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}") - message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}") endif() # ---------------------------------------------------------------------- diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 986ac056b61a6..ab988badec145 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -90,8 +90,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.13.0 ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=7735cc56507149686e6019e06f588317099d4522480be5f38a2a09ec69af1706 ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0 ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412 -ARROW_ORC_BUILD_VERSION=2.0.0 -ARROW_ORC_BUILD_SHA256_CHECKSUM=9107730919c29eb39efaff1b9e36166634d1d4d9477e5fee76bfd6a8fec317df +ARROW_ORC_BUILD_VERSION=2.0.1 +ARROW_ORC_BUILD_SHA256_CHECKSUM=1ffac0228aa83f04a1b1cf2788a3af5953e82587ae3a77c41900e99f2557132d ARROW_PROTOBUF_BUILD_VERSION=v21.3 ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f # Because of https://github.com/Tencent/rapidjson/pull/1323, we require