Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GLUTEN-8020][VL] Remove the libhdfs3 installation script required for static linking #8013

Merged
merged 4 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions dev/build-thirdparty.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,40 +12,40 @@ ARCH=`uname -m`
mkdir -p $THIRDPARTY_LIB
function process_setup_ubuntu_2004 {
cp /usr/lib/${ARCH}-linux-gnu/{libroken.so.18,libasn1.so.8,libcrypto.so.1.1,libnghttp2.so.14,libnettle.so.7,libhogweed.so.5,librtmp.so.1,libssh.so.4,libssl.so.1.1,liblber-2.4.so.2,libsasl2.so.2,libwind.so.0,libheimbase.so.1,libhcrypto.so.4,libhx509.so.5,libkrb5.so.26,libheimntlm.so.0,libgssapi.so.3,libldap_r-2.4.so.2,libcurl.so.4,libdouble-conversion.so.3,libevent-2.1.so.7,libgflags.so.2.2,libunwind.so.8,libglog.so.0,libidn.so.11,libntlm.so.0,libgsasl.so.7,libicudata.so.66,libicuuc.so.66,libxml2.so.2,libre2.so.5,libsnappy.so.1,libpsl.so.5,libbrotlidec.so.1,libbrotlicommon.so.1,libthrift-0.13.0.so} $THIRDPARTY_LIB/
cp /usr/local/lib/{libprotobuf.so.32,libhdfs3.so.1,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
cp /usr/local/lib/{libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
}

function process_setup_ubuntu_2204 {
cp /usr/lib/${ARCH}-linux-gnu/{libre2.so.9,libdouble-conversion.so.3,libidn.so.12,libglog.so.0,libgflags.so.2.2,libevent-2.1.so.7,libsnappy.so.1,libunwind.so.8,libcurl.so.4,libxml2.so.2,libgsasl.so.7,libicui18n.so.70,libicuuc.so.70,libnghttp2.so.14,libldap-2.5.so.0,liblber-2.5.so.0,libntlm.so.0,librtmp.so.1,libsasl2.so.2,libssh.so.4,libicudata.so.70,libthrift-0.16.0.so} $THIRDPARTY_LIB/
cp /usr/local/lib/{libhdfs3.so.1,libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
cp /usr/local/lib/{libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
}

function process_setup_centos_9 {
cp /lib64/{libre2.so.9,libdouble-conversion.so.3,libevent-2.1.so.7,libdwarf.so.0,libgsasl.so.7,libicudata.so.67,libicui18n.so.67,libicuuc.so.67,libidn.so.12,libntlm.so.0,libsodium.so.23} $THIRDPARTY_LIB/
cp /usr/local/lib/{libhdfs3.so.1,libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_regex.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/
cp /usr/local/lib/{libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_regex.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/
cp /usr/local/lib64/{libgflags.so.2.2,libglog.so.1} $THIRDPARTY_LIB/
}

function process_setup_centos_8 {
cp /usr/lib64/{libre2.so.0,libdouble-conversion.so.3,libevent-2.1.so.6,libdwarf.so.1,libgsasl.so.7,libicudata.so.60,libicui18n.so.60,libicuuc.so.60,libidn.so.11,libntlm.so.0,libsodium.so.23} $THIRDPARTY_LIB/
cp /usr/local/lib/{libhdfs3.so.1,libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_regex.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/
cp /usr/local/lib/{libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_regex.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/
cp /usr/local/lib64/{libgflags.so.2.2,libglog.so.1} $THIRDPARTY_LIB/
}

function process_setup_centos_7 {
cp /usr/local/lib64/{libgflags.so.2.2,libglog.so.0} $THIRDPARTY_LIB/
cp /usr/lib64/{libdouble-conversion.so.1,libevent-2.0.so.5,libzstd.so.1,libntlm.so.0,libgsasl.so.7,liblz4.so.1} $THIRDPARTY_LIB/
cp /usr/local/lib/{libre2.so.10,libhdfs3.so.1,libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_regex.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/
cp /usr/local/lib/{libre2.so.10,libboost_context.so.1.84.0,libboost_filesystem.so.1.84.0,libboost_program_options.so.1.84.0,libboost_system.so.1.84.0,libboost_thread.so.1.84.0,libboost_regex.so.1.84.0,libboost_atomic.so.1.84.0,libprotobuf.so.32} $THIRDPARTY_LIB/
}

function process_setup_debian_11 {
cp /usr/lib/x86_64-linux-gnu/{libre2.so.9,libthrift-0.13.0.so,libdouble-conversion.so.3,libevent-2.1.so.7,libgflags.so.2.2,libglog.so.0,libsnappy.so.1,libunwind.so.8,libcurl.so.4,libicui18n.so.67,libicuuc.so.67,libnghttp2.so.14,librtmp.so.1,libssh2.so.1,libpsl.so.5,libldap_r-2.4.so.2,liblber-2.4.so.2,libbrotlidec.so.1,libicudata.so.67,libsasl2.so.2,libbrotlicommon.so.1} $THIRDPARTY_LIB/
cp /usr/local/lib/{libhdfs3.so.1,libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
cp /usr/local/lib/{libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
}

function process_setup_debian_12 {
cp /usr/lib/x86_64-linux-gnu/{libthrift-0.17.0.so,libdouble-conversion.so.3,libevent-2.1.so.7,libgflags.so.2.2,libglog.so.1,libsnappy.so.1,libunwind.so.8,libcurl.so.4,libicui18n.so.72,libicuuc.so.72,libnghttp2.so.14,librtmp.so.1,libssh2.so.1,libpsl.so.5,libldap-2.5.so.0,liblber-2.5.so.0,libbrotlidec.so.1,libicudata.so.72,libsasl2.so.2,libbrotlicommon.so.1,libcrypto.so.3,libssl.so.3,libgssapi_krb5.so.2,libkrb5.so.3,libk5crypto.so.3,libkrb5support.so.0,libkeyutils.so.1} $THIRDPARTY_LIB/
cp /usr/local/lib/{libprotobuf.so.32,libhdfs3.so.1,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
cp /usr/local/lib/{libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/
}

if [[ "$LINUX_OS" == "ubuntu" || "$LINUX_OS" == "pop" ]]; then
Expand Down
6 changes: 0 additions & 6 deletions dev/build_helper_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,3 @@ function setup_linux {
exit 1
fi
}

function install_libhdfs3 {
github_checkout oap-project/libhdfs3 master
cmake_install
}

37 changes: 37 additions & 0 deletions dev/build_libhdfs3.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash
JkSelf marked this conversation as resolved.
Show resolved Hide resolved
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -exu

CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
export SUDO=sudo
source ${CURRENT_DIR}/build_helper_functions.sh
DEPENDENCY_DIR=${DEPENDENCY_DIR:-$CURRENT_DIR/../ep/_ep}

function build_libhdfs3 {
cd "${DEPENDENCY_DIR}"
github_checkout apache/hawq master
cd depends/libhdfs3
sed -i "/FIND_PACKAGE(GoogleTest REQUIRED)/d" ./CMakeLists.txt
sed -i "s/dumpversion/dumpfullversion/" ./CMake/Platform.cmake
sed -i "s/dfs.domain.socket.path\", \"\"/dfs.domain.socket.path\", \"\/var\/lib\/hadoop-hdfs\/dn_socket\"/g" src/common/SessionConfig.cpp
sed -i "s/pos < endOfCurBlock/pos \< endOfCurBlock \&\& pos \- cursor \<\= 128 \* 1024/g" src/client/InputStreamImpl.cpp
cmake_install
}

echo "Start to build Libhdfs3"
build_libhdfs3
echo "Finished building Libhdfs3. You can find the libhdfs3.so in /usr/local/lib/libhdfs3.so.1"
9 changes: 0 additions & 9 deletions dev/builddeps-veloxbe.sh
Original file line number Diff line number Diff line change
Expand Up @@ -247,15 +247,6 @@ if [ -z "${GLUTEN_VCPKG_ENABLED:-}" ] && [ $RUN_SETUP_SCRIPT == "ON" ]; then
fi
${VELOX_HOME}/scripts/setup-adapters.sh aws
fi
if [ $ENABLE_HDFS == "ON" ]; then
if [ $OS == 'Darwin' ]; then
echo "HDFS is not supported on MacOS."
exit 1
fi
pushd $VELOX_HOME
install_libhdfs3
popd
fi
if [ $ENABLE_GCS == "ON" ]; then
${VELOX_HOME}/scripts/setup-adapters.sh gcs
fi
Expand Down
6 changes: 0 additions & 6 deletions dev/vcpkg/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,6 @@ fi
if [ "$ENABLE_GCS" = "ON" ]; then
EXTRA_FEATURES+="--x-feature=velox-gcs "
fi
if [ "$ENABLE_HDFS" = "ON" ]; then
EXTRA_FEATURES+="--x-feature=velox-hdfs "
fi
if [ "$ENABLE_ABFS" = "ON" ]; then
EXTRA_FEATURES+="--x-feature=velox-abfs"
fi
Expand All @@ -90,6 +87,3 @@ cp $VCPKG_TRIPLET_INSTALL_DIR/lib/libssl.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib
cp $VCPKG_TRIPLET_INSTALL_DIR/lib/libcrypto.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib
cp $VCPKG_TRIPLET_INSTALL_DIR/lib/liblzma.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib
cp $VCPKG_TRIPLET_INSTALL_DIR/lib/libdwarf.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib
# Allow libhdfs3.a is not installed as build option may not enable hdfs.
cp $VCPKG_TRIPLET_INSTALL_DIR/lib/libhdfs3.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib || true

26 changes: 0 additions & 26 deletions dev/vcpkg/ports/libhdfs3/libhdfs3Config.cmake

This file was deleted.

27 changes: 0 additions & 27 deletions dev/vcpkg/ports/libhdfs3/portfile.cmake

This file was deleted.

4 changes: 0 additions & 4 deletions dev/vcpkg/ports/libhdfs3/usage

This file was deleted.

34 changes: 0 additions & 34 deletions dev/vcpkg/ports/libhdfs3/vcpkg.json

This file was deleted.

6 changes: 0 additions & 6 deletions dev/vcpkg/vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,6 @@
}
]
},
"velox-hdfs": {
"description": "Velox HDFS Support",
"dependencies": [
"libhdfs3"
]
},
JkSelf marked this conversation as resolved.
Show resolved Hide resolved
"velox-abfs": {
"description": "Velox ABFS Support",
"dependencies": [
Expand Down
4 changes: 4 additions & 0 deletions docs/get-started/Velox.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ libraries list from the third-party jar.
Gluten supports dynamically loading both libhdfs.so and libhdfs3.so at runtime by using dlopen, allowing the JVM to load the appropriate shared library file as needed. This means you do not need to set the library path during the compilation phase.
To enable this functionality, you must set the JAVA_HOME and HADOOP_HOME environment variables. Gluten will then locate and load the ${HADOOP_HOME}/lib/native/libhdfs.so file at runtime. If you prefer to use libhdfs3.so instead, simply replace the ${HADOOP_HOME}/lib/native/libhdfs.so file with libhdfs3.so.

### Build libhdfs3

If you want to run Gluten with libhdfs3.so, you need to manually compile libhdfs3 to obtain the libhdfs3.so file. We provide the script dev/build_libhdfs3.sh in Gluten to help you compile libhdfs3.so.

### Build with HDFS support

To build Gluten with HDFS support, below command is suggested:
Expand Down
11 changes: 0 additions & 11 deletions ep/build-velox/src/setup-centos7.sh
Original file line number Diff line number Diff line change
Expand Up @@ -166,17 +166,6 @@ function install_boost {
$SUDO ./b2 "-j$(nproc)" -d0 install threading=multi
}

function install_libhdfs3 {
cd "${DEPENDENCY_DIR}"
github_checkout apache/hawq master
cd depends/libhdfs3
sed -i "/FIND_PACKAGE(GoogleTest REQUIRED)/d" ./CMakeLists.txt
sed -i "s/dumpversion/dumpfullversion/" ./CMake/Platform.cmake
sed -i "s/dfs.domain.socket.path\", \"\"/dfs.domain.socket.path\", \"\/var\/lib\/hadoop-hdfs\/dn_socket\"/g" src/common/SessionConfig.cpp
sed -i "s/pos < endOfCurBlock/pos \< endOfCurBlock \&\& pos \- cursor \<\= 128 \* 1024/g" src/client/InputStreamImpl.cpp
cmake_install
}

function install_protobuf {
cd "${DEPENDENCY_DIR}"
wget https://github.com/protocolbuffers/protobuf/releases/download/v21.4/protobuf-all-21.4.tar.gz
Expand Down
Loading