diff --git a/dev/build-thirdparty.sh b/dev/build-thirdparty.sh index ee827ef197f7..109392f4f4a7 100755 --- a/dev/build-thirdparty.sh +++ b/dev/build-thirdparty.sh @@ -12,40 +12,40 @@ ARCH=`uname -m` mkdir -p $THIRDPARTY_LIB function process_setup_ubuntu_2004 { cp /usr/lib/${ARCH}-linux-gnu/{libroken.so.18,libasn1.so.8,libcrypto.so.1.1,libnghttp2.so.14,libnettle.so.7,libhogweed.so.5,librtmp.so.1,libssh.so.4,libssl.so.1.1,liblber-2.4.so.2,libsasl2.so.2,libwind.so.0,libheimbase.so.1,libhcrypto.so.4,libhx509.so.5,libkrb5.so.26,libheimntlm.so.0,libgssapi.so.3,libldap_r-2.4.so.2,libcurl.so.4,libdouble-conversion.so.3,libevent-2.1.so.7,libgflags.so.2.2,libunwind.so.8,libglog.so.0,libidn.so.11,libntlm.so.0,libgsasl.so.7,libicudata.so.66,libicuuc.so.66,libxml2.so.2,libre2.so.5,libsnappy.so.1,libpsl.so.5,libbrotlidec.so.1,libbrotlicommon.so.1,libthrift-0.13.0.so} $THIRDPARTY_LIB/ - cp /usr/local/lib/{libprotobuf.so.32,libhdfs3.so.1,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/ + cp /usr/local/lib/{libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/ } function process_setup_ubuntu_2204 { cp /usr/lib/${ARCH}-linux-gnu/{libre2.so.9,libdouble-conversion.so.3,libidn.so.12,libglog.so.0,libgflags.so.2.2,libevent-2.1.so.7,libsnappy.so.1,libunwind.so.8,libcurl.so.4,libxml2.so.2,libgsasl.so.7,libicui18n.so.70,libicuuc.so.70,libnghttp2.so.14,libldap-2.5.so.0,liblber-2.5.so.0,libntlm.so.0,librtmp.so.1,libsasl2.so.2,libssh.so.4,libicudata.so.70,libthrift-0.16.0.so} $THIRDPARTY_LIB/ - cp /usr/local/lib/{libhdfs3.so.1,libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/ + cp /usr/local/lib/{libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/ } function process_setup_centos_9 { cp /lib64/{libre2.so.9,libdouble-conversion.so.3,libevent-2.1.so.7,libdwarf.so.0,libgsasl.so.7,libicudata.so.67,libicui18n.so.67,libicuuc.so.67,libidn.so.12,libntlm.so.0,libsodium.so.23} $THIRDPARTY_LIB/ - cp /usr/local/lib/{libhdfs3.so.1,libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_regex.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/ + cp /usr/local/lib/{libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_regex.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/ cp /usr/local/lib64/{libgflags.so.2.2,libglog.so.1} $THIRDPARTY_LIB/ } function process_setup_centos_8 { cp /usr/lib64/{libre2.so.0,libdouble-conversion.so.3,libevent-2.1.so.6,libdwarf.so.1,libgsasl.so.7,libicudata.so.60,libicui18n.so.60,libicuuc.so.60,libidn.so.11,libntlm.so.0,libsodium.so.23} $THIRDPARTY_LIB/ - cp /usr/local/lib/{libhdfs3.so.1,libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_regex.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/ + cp /usr/local/lib/{libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_regex.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/ cp /usr/local/lib64/{libgflags.so.2.2,libglog.so.1} $THIRDPARTY_LIB/ } function process_setup_centos_7 { cp /usr/local/lib64/{libgflags.so.2.2,libglog.so.0} $THIRDPARTY_LIB/ cp /usr/lib64/{libdouble-conversion.so.1,libevent-2.0.so.5,libzstd.so.1,libntlm.so.0,libgsasl.so.7,liblz4.so.1} $THIRDPARTY_LIB/ - cp /usr/local/lib/{libre2.so.10,libhdfs3.so.1,libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_regex.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/ + cp /usr/local/lib/{libre2.so.10,libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_regex.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/ } function process_setup_debian_11 { cp /usr/lib/x86_64-linux-gnu/{libre2.so.9,libthrift-0.13.0.so,libdouble-conversion.so.3,libevent-2.1.so.7,libgflags.so.2.2,libglog.so.0,libsnappy.so.1,libunwind.so.8,libcurl.so.4,libicui18n.so.67,libicuuc.so.67,libnghttp2.so.14,librtmp.so.1,libssh2.so.1,libpsl.so.5,libldap_r-2.4.so.2,liblber-2.4.so.2,libbrotlidec.so.1,libicudata.so.67,libsasl2.so.2,libbrotlicommon.so.1} $THIRDPARTY_LIB/ - cp /usr/local/lib/{libhdfs3.so.1,libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/ + cp /usr/local/lib/{libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/ } function process_setup_debian_12 { cp /usr/lib/x86_64-linux-gnu/{libthrift-0.17.0.so,libdouble-conversion.so.3,libevent-2.1.so.7,libgflags.so.2.2,libglog.so.1,libsnappy.so.1,libunwind.so.8,libcurl.so.4,libicui18n.so.72,libicuuc.so.72,libnghttp2.so.14,librtmp.so.1,libssh2.so.1,libpsl.so.5,libldap-2.5.so.0,liblber-2.5.so.0,libbrotlidec.so.1,libicudata.so.72,libsasl2.so.2,libbrotlicommon.so.1,libcrypto.so.3,libssl.so.3,libgssapi_krb5.so.2,libkrb5.so.3,libk5crypto.so.3,libkrb5support.so.0,libkeyutils.so.1} $THIRDPARTY_LIB/ - cp /usr/local/lib/{libprotobuf.so.32,libhdfs3.so.1,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/ + cp /usr/local/lib/{libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/ } if [[ "$LINUX_OS" == "ubuntu" || "$LINUX_OS" == "pop" ]]; then diff --git a/dev/build_helper_functions.sh b/dev/build_helper_functions.sh index 97e3a0993520..836ed6ca4e55 100644 --- a/dev/build_helper_functions.sh +++ b/dev/build_helper_functions.sh @@ -208,9 +208,3 @@ function setup_linux { exit 1 fi } - -function install_libhdfs3 { - github_checkout oap-project/libhdfs3 master - cmake_install -} - diff --git a/dev/build_libhdfs3.sh b/dev/build_libhdfs3.sh new file mode 100755 index 000000000000..b001a121c69b --- /dev/null +++ b/dev/build_libhdfs3.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -exu + +CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) +export SUDO=sudo +source ${CURRENT_DIR}/build_helper_functions.sh +DEPENDENCY_DIR=${DEPENDENCY_DIR:-$CURRENT_DIR/../ep/_ep} + +function build_libhdfs3 { + cd "${DEPENDENCY_DIR}" + github_checkout apache/hawq master + cd depends/libhdfs3 + sed -i "/FIND_PACKAGE(GoogleTest REQUIRED)/d" ./CMakeLists.txt + sed -i "s/dumpversion/dumpfullversion/" ./CMake/Platform.cmake + sed -i "s/dfs.domain.socket.path\", \"\"/dfs.domain.socket.path\", \"\/var\/lib\/hadoop-hdfs\/dn_socket\"/g" src/common/SessionConfig.cpp + sed -i "s/pos < endOfCurBlock/pos \< endOfCurBlock \&\& pos \- cursor \<\= 128 \* 1024/g" src/client/InputStreamImpl.cpp + cmake_install +} + +echo "Start to build Libhdfs3" +build_libhdfs3 +echo "Finished building Libhdfs3. You can find the libhdfs3.so in /usr/local/lib/libhdfs3.so.1" diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index 8eb4cf4edf2c..2180db9f8dca 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -247,15 +247,6 @@ if [ -z "${GLUTEN_VCPKG_ENABLED:-}" ] && [ $RUN_SETUP_SCRIPT == "ON" ]; then fi ${VELOX_HOME}/scripts/setup-adapters.sh aws fi - if [ $ENABLE_HDFS == "ON" ]; then - if [ $OS == 'Darwin' ]; then - echo "HDFS is not supported on MacOS." - exit 1 - fi - pushd $VELOX_HOME - install_libhdfs3 - popd - fi if [ $ENABLE_GCS == "ON" ]; then ${VELOX_HOME}/scripts/setup-adapters.sh gcs fi diff --git a/dev/vcpkg/init.sh b/dev/vcpkg/init.sh index bae1a8ad32eb..4e7b16af821c 100755 --- a/dev/vcpkg/init.sh +++ b/dev/vcpkg/init.sh @@ -70,9 +70,6 @@ fi if [ "$ENABLE_GCS" = "ON" ]; then EXTRA_FEATURES+="--x-feature=velox-gcs " fi -if [ "$ENABLE_HDFS" = "ON" ]; then - EXTRA_FEATURES+="--x-feature=velox-hdfs " -fi if [ "$ENABLE_ABFS" = "ON" ]; then EXTRA_FEATURES+="--x-feature=velox-abfs" fi @@ -90,6 +87,3 @@ cp $VCPKG_TRIPLET_INSTALL_DIR/lib/libssl.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib cp $VCPKG_TRIPLET_INSTALL_DIR/lib/libcrypto.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib cp $VCPKG_TRIPLET_INSTALL_DIR/lib/liblzma.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib cp $VCPKG_TRIPLET_INSTALL_DIR/lib/libdwarf.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib -# Allow libhdfs3.a is not installed as build option may not enable hdfs. -cp $VCPKG_TRIPLET_INSTALL_DIR/lib/libhdfs3.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib || true - diff --git a/dev/vcpkg/ports/libhdfs3/libhdfs3Config.cmake b/dev/vcpkg/ports/libhdfs3/libhdfs3Config.cmake deleted file mode 100644 index 93ef72da56b2..000000000000 --- a/dev/vcpkg/ports/libhdfs3/libhdfs3Config.cmake +++ /dev/null @@ -1,26 +0,0 @@ -include(CMakeFindDependencyMacro) -include(FindPkgConfig) - -find_dependency(Boost COMPONENTS thread chrono system atomic iostreams) -find_dependency(LibXml2) -find_dependency(Protobuf) -pkg_check_modules(Gsasl REQUIRED libgsasl mit-krb5-gssapi) -pkg_check_modules(UUID REQUIRED uuid) - -FUNCTION(SET_LIBRARY_TARGET NAMESPACE LIB_NAME DEBUG_LIB_FILE_NAME RELEASE_LIB_FILE_NAME INCLUDE_DIR) - ADD_LIBRARY(${NAMESPACE}::${LIB_NAME} STATIC IMPORTED) - SET_TARGET_PROPERTIES(${NAMESPACE}::${LIB_NAME} PROPERTIES - IMPORTED_CONFIGURATIONS "RELEASE;DEBUG" - IMPORTED_LOCATION_RELEASE "${RELEASE_LIB_FILE_NAME}" - IMPORTED_LOCATION_DEBUG "${DEBUG_LIB_FILE_NAME}" - INTERFACE_INCLUDE_DIRECTORIES "${INCLUDE_DIR}" - INTERFACE_LINK_LIBRARIES "protobuf::libprotobuf;LibXml2::LibXml2;${Gsasl_LINK_LIBRARIES};${UUID_LINK_LIBRARIES}" - ) - SET(${NAMESPACE}_${LIB_NAME}_FOUND 1) -ENDFUNCTION() - -GET_FILENAME_COMPONENT(ROOT "${CMAKE_CURRENT_LIST_FILE}" PATH) -GET_FILENAME_COMPONENT(ROOT "${ROOT}" PATH) -GET_FILENAME_COMPONENT(ROOT "${ROOT}" PATH) - -SET_LIBRARY_TARGET("HDFS" "hdfs3" "${ROOT}/debug/lib/libhdfs3.a" "${ROOT}/lib/libhdfs3.a" "${ROOT}/include/hdfs") \ No newline at end of file diff --git a/dev/vcpkg/ports/libhdfs3/portfile.cmake b/dev/vcpkg/ports/libhdfs3/portfile.cmake deleted file mode 100644 index 4c59c57ef9a8..000000000000 --- a/dev/vcpkg/ports/libhdfs3/portfile.cmake +++ /dev/null @@ -1,27 +0,0 @@ -vcpkg_from_github( - OUT_SOURCE_PATH SOURCE_PATH - REPO oap-project/libhdfs3 - HEAD_REF master - REF 9f234edb354ebcc99179cc6f72aefd66865f4154 - SHA512 a1a587fdca60a39f77d36b281ad15fefd7cb4b353c982274ef3d7702e84c834525cd5a3ec2bbc4154fce58f1c7054a17789f08485eaacfbb672544398a277951 -) - -vcpkg_configure_cmake( - SOURCE_PATH ${SOURCE_PATH} - PREFER_NINJA - OPTIONS - -DCMAKE_PROGRAM_PATH=${CURRENT_HOST_INSTALLED_DIR}/tools/yasm - -DWITH_KERBEROS=on -) - -vcpkg_install_cmake() - -vcpkg_copy_pdbs() - -file(GLOB HDFS3_SHARED_LIBS ${CURRENT_PACKAGES_DIR}/debug/lib/libhdfs3.so* ${CURRENT_PACKAGES_DIR}/lib/libhdfs3.so*) -file(REMOVE ${HDFS3_SHARED_LIBS}) - -file(REMOVE_RECURSE ${CURRENT_PACKAGES_DIR}/debug/include ${CURRENT_PACKAGES_DIR}/debug/share) -file(INSTALL ${SOURCE_PATH}/LICENSE.txt DESTINATION ${CURRENT_PACKAGES_DIR}/share/${PORT} RENAME copyright) -FILE(INSTALL ${CMAKE_CURRENT_LIST_DIR}/libhdfs3Config.cmake DESTINATION ${CURRENT_PACKAGES_DIR}/share/${PORT}) -FILE(INSTALL ${CMAKE_CURRENT_LIST_DIR}/usage DESTINATION ${CURRENT_PACKAGES_DIR}/share/${PORT}) diff --git a/dev/vcpkg/ports/libhdfs3/usage b/dev/vcpkg/ports/libhdfs3/usage deleted file mode 100644 index 780d82d25683..000000000000 --- a/dev/vcpkg/ports/libhdfs3/usage +++ /dev/null @@ -1,4 +0,0 @@ -The package libhdfs3 is compatible with built-in CMake targets: - - FIND_PACKAGE(libhdfs3 REQUIRED) - TARGET_LINK_LIBRARIES(main PRIVATE HDFS::hdfs3) diff --git a/dev/vcpkg/ports/libhdfs3/vcpkg.json b/dev/vcpkg/ports/libhdfs3/vcpkg.json deleted file mode 100644 index 495615cf9077..000000000000 --- a/dev/vcpkg/ports/libhdfs3/vcpkg.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "name": "libhdfs3", - "version-date": "2019-11-05", - "port-version": 3, - "description": "Native Hadoop RPC protocol and HDFS data transfer protocol implementation", - "homepage": "https://github.com/erikmuttersbach/libhdfs3", - "supports": "!windows", - "dependencies": [ - "boost-thread", - "boost-chrono", - "boost-system", - "boost-atomic", - "boost-iostreams", - "boost-function", - "boost-bind", - "boost-crc", - "boost-functional", - { - "name": "libuuid", - "platform": "!windows & !osx" - }, - "libxml2", - "protobuf", - "krb5", - "gsasl", - { - "name": "yasm", - "host": true, - "features": [ - "tools" - ] - } - ] -} diff --git a/dev/vcpkg/vcpkg.json b/dev/vcpkg/vcpkg.json index c0123cfbe998..a6a70ec913bf 100644 --- a/dev/vcpkg/vcpkg.json +++ b/dev/vcpkg/vcpkg.json @@ -80,12 +80,6 @@ } ] }, - "velox-hdfs": { - "description": "Velox HDFS Support", - "dependencies": [ - "libhdfs3" - ] - }, "velox-abfs": { "description": "Velox ABFS Support", "dependencies": [ diff --git a/docs/get-started/Velox.md b/docs/get-started/Velox.md index 81bb88c75aec..8c8cb7bbd818 100644 --- a/docs/get-started/Velox.md +++ b/docs/get-started/Velox.md @@ -127,6 +127,10 @@ libraries list from the third-party jar. Gluten supports dynamically loading both libhdfs.so and libhdfs3.so at runtime by using dlopen, allowing the JVM to load the appropriate shared library file as needed. This means you do not need to set the library path during the compilation phase. To enable this functionality, you must set the JAVA_HOME and HADOOP_HOME environment variables. Gluten will then locate and load the ${HADOOP_HOME}/lib/native/libhdfs.so file at runtime. If you prefer to use libhdfs3.so instead, simply replace the ${HADOOP_HOME}/lib/native/libhdfs.so file with libhdfs3.so. +### Build libhdfs3 + +If you want to run Gluten with libhdfs3.so, you need to manually compile libhdfs3 to obtain the libhdfs3.so file. We provide the script dev/build_libhdfs3.sh in Gluten to help you compile libhdfs3.so. + ### Build with HDFS support To build Gluten with HDFS support, below command is suggested: diff --git a/ep/build-velox/src/setup-centos7.sh b/ep/build-velox/src/setup-centos7.sh index 45880161a4a5..dbac575fbb47 100755 --- a/ep/build-velox/src/setup-centos7.sh +++ b/ep/build-velox/src/setup-centos7.sh @@ -166,17 +166,6 @@ function install_boost { $SUDO ./b2 "-j$(nproc)" -d0 install threading=multi } -function install_libhdfs3 { - cd "${DEPENDENCY_DIR}" - github_checkout apache/hawq master - cd depends/libhdfs3 - sed -i "/FIND_PACKAGE(GoogleTest REQUIRED)/d" ./CMakeLists.txt - sed -i "s/dumpversion/dumpfullversion/" ./CMake/Platform.cmake - sed -i "s/dfs.domain.socket.path\", \"\"/dfs.domain.socket.path\", \"\/var\/lib\/hadoop-hdfs\/dn_socket\"/g" src/common/SessionConfig.cpp - sed -i "s/pos < endOfCurBlock/pos \< endOfCurBlock \&\& pos \- cursor \<\= 128 \* 1024/g" src/client/InputStreamImpl.cpp - cmake_install -} - function install_protobuf { cd "${DEPENDENCY_DIR}" wget https://github.com/protocolbuffers/protobuf/releases/download/v21.4/protobuf-all-21.4.tar.gz