Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GLUTEN-7746] Support static link libhdfs3 in velox #7697

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ option(ENABLE_IAA "Enable IAA for de/compression" OFF)
option(ENABLE_GCS "Enable GCS" OFF)
option(ENABLE_S3 "Enable S3" OFF)
option(ENABLE_HDFS "Enable HDFS" OFF)
option(ENABLE_HDFS3 "Enable HDFS3" OFF)
option(ENABLE_ORC "Enable ORC" OFF)
option(ENABLE_ABFS "Enable ABFS" OFF)

Expand Down
7 changes: 7 additions & 0 deletions cpp/compile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ ENABLE_HBM=OFF
ENABLE_GCS=OFF
ENABLE_S3=OFF
ENABLE_HDFS=OFF
ENABLE_HDFS3=OFF
ENABLE_ABFS=OFF
VELOX_HOME=
# set default number of threads as cpu cores minus 2
Expand Down Expand Up @@ -97,6 +98,10 @@ for arg in "$@"; do
ENABLE_HDFS=("${arg#*=}")
shift # Remove argument name from processing
;;
--enable_hdfs3=*)
ENABLE_HDFS3=("${arg#*=}")
shift # Remove argument name from processing
;;
*)
OTHER_ARGUMENTS+=("$1")
shift # Remove generic argument from processing
Expand Down Expand Up @@ -127,6 +132,7 @@ echo "ENABLE_HBM=${ENABLE_HBM}"
echo "ENABLE_GCS=${ENABLE_GCS}"
echo "ENABLE_S3=${ENABLE_S3}"
echo "ENABLE_HDFS=${ENABLE_HDFS}"
echo "ENABLE_HDFS3=${ENABLE_HDFS3}"
echo "ENABLE_ABFS=${ENABLE_ABFS}"

if [ -d build ]; then
Expand All @@ -147,5 +153,6 @@ cmake .. \
-DENABLE_GCS=${ENABLE_GCS} \
-DENABLE_S3=${ENABLE_S3} \
-DENABLE_HDFS=${ENABLE_HDFS} \
-DENABLE_HDFS3=${ENABLE_HDFS3} \
-DENABLE_ABFS=${ENABLE_ABFS}
make -j$NPROC
28 changes: 28 additions & 0 deletions cpp/velox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,34 @@ if(ENABLE_HDFS)
add_definitions(-DENABLE_HDFS)
endif()

macro(find_libhdfs3)
find_package(libhdfs3 CONFIG)
if(libhdfs3_FOUND AND TARGET HDFS::hdfs3)
set(LIBHDFS3_LIBRARY HDFS::hdfs3)
else()
find_path(libhdfs3_INCLUDE_DIR hdfs/hdfs.h)
set(CMAKE_FIND_LIBRARY_SUFFIXES ".so")
find_library(libhdfs3_LIBRARY NAMES hdfs3)
find_package_handle_standard_args(libhdfs3 DEFAULT_MSG libhdfs3_INCLUDE_DIR
libhdfs3_LIBRARY)
add_library(HDFS::hdfs3 SHARED IMPORTED)
set_target_properties(
HDFS::hdfs3
PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${libhdfs3_INCLUDE_DIR}"
IMPORTED_LOCATION "${libhdfs3_LIBRARY}")
endif()

if(NOT libhdfs3_FOUND)
message(FATAL_ERROR "LIBHDFS3 Library Not Found")
endif()
endmacro()

if(ENABLE_HDFS3)
find_libhdfs3()
target_link_libraries(velox PUBLIC HDFS::hdfs3)
add_definitions(-DENABLE_HDFS3)
endif()

if(ENABLE_S3)
add_definitions(-DENABLE_S3)
find_awssdk()
Expand Down
2 changes: 1 addition & 1 deletion cpp/velox/compute/VeloxBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ void VeloxBackend::init(const std::unordered_map<std::string, std::string>& conf
// Setup and register.
velox::filesystems::registerLocalFileSystem();

#ifdef ENABLE_HDFS
#if (defined(ENABLE_HDFS) || defined(ENABLE_HDFS3))
velox::filesystems::registerHdfsFileSystem();
#endif
#ifdef ENABLE_S3
Expand Down
11 changes: 8 additions & 3 deletions dev/builddeps-veloxbe.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ ENABLE_HBM=OFF
ENABLE_GCS=OFF
ENABLE_S3=OFF
ENABLE_HDFS=OFF
ENABLE_HDFS3=OFF
ENABLE_ABFS=OFF
ENABLE_EP_CACHE=OFF
ENABLE_VCPKG=OFF
Expand Down Expand Up @@ -91,6 +92,10 @@ do
--enable_hdfs=*)
ENABLE_HDFS=("${arg#*=}")
shift # Remove argument name from processing
;;
--enable_hdfs3=*)
ENABLE_HDFS3=("${arg#*=}")
shift # Remove argument name from processing
;;
--enable_abfs=*)
ENABLE_ABFS=("${arg#*=}")
Expand Down Expand Up @@ -167,7 +172,7 @@ function concat_velox_param {
if [ "$ENABLE_VCPKG" = "ON" ]; then
# vcpkg will install static depends and init build environment
BUILD_OPTIONS="--build_tests=$BUILD_TESTS --enable_s3=$ENABLE_S3 --enable_gcs=$ENABLE_GCS \
--enable_hdfs=$ENABLE_HDFS --enable_abfs=$ENABLE_ABFS"
--enable_hdfs=$ENABLE_HDFS --enable_hdfs3=$ENABLE_HDFS3 --enable_abfs=$ENABLE_ABFS"
source ./dev/vcpkg/env.sh ${BUILD_OPTIONS}
fi

Expand All @@ -191,7 +196,7 @@ function build_velox {
echo "Start to build Velox"
cd $GLUTEN_DIR/ep/build-velox/src
# When BUILD_TESTS is on for gluten cpp, we need turn on VELOX_BUILD_TEST_UTILS via build_test_utils.
./build_velox.sh --enable_s3=$ENABLE_S3 --enable_gcs=$ENABLE_GCS --build_type=$BUILD_TYPE --enable_hdfs=$ENABLE_HDFS \
./build_velox.sh --enable_s3=$ENABLE_S3 --enable_gcs=$ENABLE_GCS --build_type=$BUILD_TYPE --enable_hdfs=$ENABLE_HDFS --enable_hdfs3=$ENABLE_HDFS3 \
--enable_abfs=$ENABLE_ABFS --enable_ep_cache=$ENABLE_EP_CACHE --build_test_utils=$BUILD_TESTS \
--build_tests=$BUILD_VELOX_TESTS --build_benchmarks=$BUILD_VELOX_BENCHMARKS --num_threads=$NUM_THREADS \
--velox_home=$VELOX_HOME
Expand All @@ -207,7 +212,7 @@ function build_gluten_cpp {
-DVELOX_HOME=${VELOX_HOME} \
-DBUILD_TESTS=$BUILD_TESTS -DBUILD_EXAMPLES=$BUILD_EXAMPLES -DBUILD_BENCHMARKS=$BUILD_BENCHMARKS -DENABLE_JEMALLOC_STATS=$ENABLE_JEMALLOC_STATS \
-DENABLE_HBM=$ENABLE_HBM -DENABLE_QAT=$ENABLE_QAT -DENABLE_IAA=$ENABLE_IAA -DENABLE_GCS=$ENABLE_GCS \
-DENABLE_S3=$ENABLE_S3 -DENABLE_HDFS=$ENABLE_HDFS -DENABLE_ABFS=$ENABLE_ABFS ..
-DENABLE_S3=$ENABLE_S3 -DENABLE_HDFS=$ENABLE_HDFS -DENABLE_HDFS3=$ENABLE_HDFS3 -DENABLE_ABFS=$ENABLE_ABFS ..
make -j $NUM_THREADS
}

Expand Down
2 changes: 1 addition & 1 deletion dev/package-vcpkg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ fi
# build gluten with velox backend, prompt always respond y
export PROMPT_ALWAYS_RESPOND=y

./dev/buildbundle-veloxbe.sh --enable_vcpkg=ON --build_tests=ON --build_arrow=OFF --build_benchmarks=ON --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON "$@"
./dev/buildbundle-veloxbe.sh --enable_vcpkg=ON --build_tests=ON --build_arrow=OFF --build_benchmarks=ON --enable_s3=ON --enable_gcs=ON --enable_hdfs=OFF --enable_hdfs3=ON "$@"
7 changes: 6 additions & 1 deletion dev/vcpkg/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ BUILD_TESTS=OFF
ENABLE_S3=OFF
ENABLE_GCS=OFF
ENABLE_HDFS=OFF
ENABLE_HDFS3=OFF
ENABLE_ABFS=OFF

for arg in "$@"; do
Expand All @@ -26,6 +27,10 @@ for arg in "$@"; do
ENABLE_HDFS=("${arg#*=}")
shift # Remove argument name from processing
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does allow people enable flag both ENABLE_HDFS and ENABLE_HDFS3 ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we can't see performance gain from compile time link, we will close the PR.

;;
--enable_hdfs3=*)
ENABLE_HDFS3=("${arg#*=}")
shift # Remove argument name from processing
;;
--enable_abfs=*)
ENABLE_ABFS=("${arg#*=}")
shift # Remove argument name from processing
Expand Down Expand Up @@ -70,7 +75,7 @@ fi
if [ "$ENABLE_GCS" = "ON" ]; then
EXTRA_FEATURES+="--x-feature=velox-gcs "
fi
if [ "$ENABLE_HDFS" = "ON" ]; then
if [ "$ENABLE_HDFS3" = "ON" ]; then
EXTRA_FEATURES+="--x-feature=velox-hdfs "
fi
if [ "$ENABLE_ABFS" = "ON" ]; then
Expand Down
9 changes: 9 additions & 0 deletions ep/build-velox/src/build_velox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ ENABLE_S3=OFF
ENABLE_GCS=OFF
# Enable HDFS connector.
ENABLE_HDFS=OFF
# Enable HDFS connector.
ENABLE_HDFS3=OFF
# Enable ABFS connector.
ENABLE_ABFS=OFF
BUILD_TYPE=release
Expand Down Expand Up @@ -57,6 +59,10 @@ for arg in "$@"; do
ENABLE_HDFS=("${arg#*=}")
shift # Remove argument name from processing
;;
--enable_hdfs3=*)
ENABLE_HDFS3=("${arg#*=}")
shift # Remove argument name from processing
;;
--enable_abfs=*)
ENABLE_ABFS=("${arg#*=}")
shift # Remove argument name from processing
Expand Down Expand Up @@ -104,6 +110,9 @@ function compile {
if [ $ENABLE_HDFS == "ON" ]; then
COMPILE_OPTION="$COMPILE_OPTION -DVELOX_ENABLE_HDFS=ON"
fi
if [ $ENABLE_HDFS3 == "ON" ]; then
COMPILE_OPTION="$COMPILE_OPTION -DVELOX_ENABLE_HDFS3=ON"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After the pr facebookincubator/velox@10cdf6f , velox have removed VELOX_ENABLE_HDFS3

fi
if [ $ENABLE_S3 == "ON" ]; then
COMPILE_OPTION="$COMPILE_OPTION -DVELOX_ENABLE_S3=ON"
fi
Expand Down
4 changes: 2 additions & 2 deletions ep/build-velox/src/get_velox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

set -exu

VELOX_REPO=https://github.com/oap-project/velox.git
VELOX_BRANCH=2024_11_17
VELOX_REPO=https://github.com/JkSelf/velox.git
VELOX_BRANCH=static-link-libhdfs3.bak
VELOX_HOME=""

OS=`uname -s`
Expand Down
Loading