diff --git a/.circleci/dist_compile.yml b/.circleci/dist_compile.yml index 796111369d40a..e0f89a9c6d656 100644 --- a/.circleci/dist_compile.yml +++ b/.circleci/dist_compile.yml @@ -388,6 +388,7 @@ jobs: "-DVELOX_ENABLE_HDFS=ON" "-DVELOX_ENABLE_S3=ON" "-DVELOX_ENABLE_GCS=ON" + "-DVELOX_ENABLE_ABFS=ON" "-DVELOX_ENABLE_SUBSTRAIT=ON" "-DVELOX_ENABLE_REMOTE_FUNCTIONS=ON" ) diff --git a/CMakeLists.txt b/CMakeLists.txt index 948fb4bb78312..31df59045c044 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,6 +75,7 @@ option(VELOX_ENABLE_BENCHMARKS "Enable Velox top level benchmarks." OFF) option(VELOX_ENABLE_BENCHMARKS_BASIC "Enable Velox basic benchmarks." OFF) option(VELOX_ENABLE_S3 "Build S3 Connector" OFF) option(VELOX_ENABLE_GCS "Build GCS Connector" OFF) +option(VELOX_ENABLE_ABFS "Build Abfs Connector" OFF) option(VELOX_ENABLE_HDFS "Build Hdfs Connector" OFF) option(VELOX_ENABLE_PARQUET "Enable Parquet support" OFF) option(VELOX_ENABLE_ARROW "Enable Arrow support" OFF) @@ -104,6 +105,7 @@ if(${VELOX_BUILD_MINIMAL}) set(VELOX_ENABLE_EXAMPLES OFF) set(VELOX_ENABLE_S3 OFF) set(VELOX_ENABLE_GCS OFF) + set(VELOX_ENABLE_ABFS OFF) set(VELOX_ENABLE_SUBSTRAIT OFF) set(VELOX_CODEGEN_SUPPORT OFF) endif() @@ -138,6 +140,7 @@ if(${VELOX_BUILD_BENCHMARKS}) set(VELOX_BUILD_TESTING OFF) set(VELOX_ENABLE_EXAMPLES OFF) set(VELOX_ENABLE_GCS OFF) + set(VELOX_ENABLE_ABFS OFF) set(VELOX_ENABLE_SUBSTRAIT OFF) set(VELOX_CODEGEN_SUPPORT OFF) endif() @@ -156,6 +159,7 @@ if(${VELOX_BUILD_PYTHON_PACKAGE}) set(VELOX_ENABLE_EXAMPLES OFF) set(VELOX_ENABLE_S3 OFF) set(VELOX_ENABLE_GCS OFF) + set(VELOX_ENABLE_ABFS OFF) set(VELOX_ENABLE_SUBSTRAIT OFF) set(VELOX_CODEGEN_SUPPORT OFF) set(VELOX_ENABLE_BENCHMARKS_BASIC OFF) @@ -198,6 +202,16 @@ if(VELOX_ENABLE_GCS) add_definitions(-DVELOX_ENABLE_GCS) endif() +if(VELOX_ENABLE_ABFS) + # Set AZURESDK_ROOT_DIR if you have a custom install location of Azure Storage + # SDK CPP. + if(AZURESDK_ROOT_DIR) + list(APPEND CMAKE_PREFIX_PATH ${AZURESDK_ROOT_DIR}) + endif() + find_package(azure-storage-blobs-cpp CONFIG REQUIRED) + add_definitions(-DVELOX_ENABLE_ABFS) +endif() + if(VELOX_ENABLE_HDFS) find_library( LIBHDFS3 diff --git a/Makefile b/Makefile index 8fc57e7347b64..c6cdc3a714554 100644 --- a/Makefile +++ b/Makefile @@ -46,6 +46,10 @@ ifdef GCSSDK_ROOT_DIR CMAKE_FLAGS += -DGCSSDK_ROOT_DIR=$(GCSSDK_ROOT_DIR) endif +ifdef AZURESDK_ROOT_DIR +CMAKE_FLAGS += -DAZURESDK_ROOT_DIR=$(AZURESDK_ROOT_DIR) +endif + # Use Ninja if available. If Ninja is used, pass through parallelism control flags. USE_NINJA ?= 1 ifeq ($(USE_NINJA), 1) diff --git a/scripts/setup-adapters.sh b/scripts/setup-adapters.sh index bc8e4f44e78d7..5bc9c81e4605f 100755 --- a/scripts/setup-adapters.sh +++ b/scripts/setup-adapters.sh @@ -61,6 +61,35 @@ function install_gcs-sdk-cpp { -DGOOGLE_CLOUD_CPP_ENABLE=storage } +function install_azure-storage-sdk-cpp { + github_checkout azure/azure-sdk-for-cpp azure-storage-blobs_12.8.0 + + cd sdk/core/azure-core + if ! grep -q "baseline" vcpkg.json; then + # build and install azure-core with the version compatible with system pre-installed openssl + openssl_version=$(openssl version -v | awk '{print $2}') + if [[ "$openssl_version" == 1.1.1* ]]; then + openssl_version="1.1.1n" + fi + sed -i 's/"version-string"/"builtin-baseline": "dafef74af53669ef1cc9015f55e0ce809ead62aa","version-string"/' vcpkg.json + sed -i "s/\"version-string\"/\"overrides\": [{ \"name\": \"openssl\", \"version-string\": \"$openssl_version\" }],\"version-string\"/" vcpkg.json + fi + cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF + + cd - + # install azure-storage-common + cd sdk/storage/azure-storage-common + cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF + + cd - + # install azure-storage-blobs + cd sdk/storage/azure-storage-blobs + if ! grep -q "baseline" vcpkg.json; then + sed -i 's/"version-semver"/"builtin-baseline": "dafef74af53669ef1cc9015f55e0ce809ead62aa","version-semver"/' vcpkg.json + fi + cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF +} + function install_libhdfs3 { github_checkout apache/hawq master cd $DEPENDENCY_DIR/hawq/depends/libhdfs3 @@ -87,10 +116,15 @@ if [[ "$OSTYPE" == "linux-gnu"* ]]; then apt install -y --no-install-recommends libxml2-dev libgsasl7-dev uuid-dev # Dependencies of GCS, probably a workaround until the docker image is rebuilt apt install -y --no-install-recommends libc-ares-dev libcurl4-openssl-dev + # Dependencies of Azure Storage Blob cpp + apt install -y openssl else # Assume Fedora/CentOS yum -y install libxml2-devel libgsasl-devel libuuid-devel # Dependencies of GCS, probably a workaround until the docker image is rebuilt yum -y install curl-devel c-ares-devel + # Dependencies of Azure Storage Blob Cpp + yum -y install perl-IPC-Cmd + yum -y install openssl fi fi @@ -101,12 +135,14 @@ fi install_aws=0 install_gcs=0 install_hdfs=0 +install_abfs=0 if [ "$#" -eq 0 ]; then # Install all adapters by default install_aws=1 install_gcs=1 install_hdfs=1 + install_abfs=1 fi while [[ $# -gt 0 ]]; do @@ -123,6 +159,10 @@ while [[ $# -gt 0 ]]; do install_hdfs=1 shift # past argument ;; + abfs) + install_abfs=1 + shift # past argument + ;; *) echo "ERROR: Unknown option $1! will be ignored!" shift @@ -139,6 +179,9 @@ fi if [ $install_hdfs -eq 1 ]; then install_libhdfs3 fi +if [ $install_abfs -eq 1 ]; then + install_azure-storage-sdk-cpp +fi _ret=$? if [ $_ret -eq 0 ] ; then