From d5ba84938f7813dee0b6a1eb259bac03d1f25334 Mon Sep 17 00:00:00 2001 From: Yangyang Gao Date: Tue, 5 Sep 2023 18:41:44 +0800 Subject: [PATCH 1/8] install dependencies needed to support Azure Storage ABFS Connector --- .circleci/dist_compile.yml | 1 + CMakeLists.txt | 9 +++++++++ scripts/setup-adapters.sh | 27 +++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/.circleci/dist_compile.yml b/.circleci/dist_compile.yml index d6260666b2f4..43d2c67cc4ba 100644 --- a/.circleci/dist_compile.yml +++ b/.circleci/dist_compile.yml @@ -388,6 +388,7 @@ jobs: "-DVELOX_ENABLE_HDFS=ON" "-DVELOX_ENABLE_S3=ON" "-DVELOX_ENABLE_GCS=ON" + "-DVELOX_ENABLE_ABFS=ON" "-DVELOX_ENABLE_SUBSTRAIT=ON" "-DVELOX_ENABLE_REMOTE_FUNCTIONS=ON" ) diff --git a/CMakeLists.txt b/CMakeLists.txt index 948fb4bb7831..abeeec25c3b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,6 +75,7 @@ option(VELOX_ENABLE_BENCHMARKS "Enable Velox top level benchmarks." OFF) option(VELOX_ENABLE_BENCHMARKS_BASIC "Enable Velox basic benchmarks." OFF) option(VELOX_ENABLE_S3 "Build S3 Connector" OFF) option(VELOX_ENABLE_GCS "Build GCS Connector" OFF) +option(VELOX_ENABLE_ABFS "Build Abfs Connector" OFF) option(VELOX_ENABLE_HDFS "Build Hdfs Connector" OFF) option(VELOX_ENABLE_PARQUET "Enable Parquet support" OFF) option(VELOX_ENABLE_ARROW "Enable Arrow support" OFF) @@ -104,6 +105,7 @@ if(${VELOX_BUILD_MINIMAL}) set(VELOX_ENABLE_EXAMPLES OFF) set(VELOX_ENABLE_S3 OFF) set(VELOX_ENABLE_GCS OFF) + set(VELOX_ENABLE_ABFS OFF) set(VELOX_ENABLE_SUBSTRAIT OFF) set(VELOX_CODEGEN_SUPPORT OFF) endif() @@ -138,6 +140,7 @@ if(${VELOX_BUILD_BENCHMARKS}) set(VELOX_BUILD_TESTING OFF) set(VELOX_ENABLE_EXAMPLES OFF) set(VELOX_ENABLE_GCS OFF) + set(VELOX_ENABLE_ABFS OFF) set(VELOX_ENABLE_SUBSTRAIT OFF) set(VELOX_CODEGEN_SUPPORT OFF) endif() @@ -156,6 +159,7 @@ if(${VELOX_BUILD_PYTHON_PACKAGE}) set(VELOX_ENABLE_EXAMPLES OFF) set(VELOX_ENABLE_S3 OFF) set(VELOX_ENABLE_GCS OFF) + set(VELOX_ENABLE_ABFS OFF) set(VELOX_ENABLE_SUBSTRAIT OFF) set(VELOX_CODEGEN_SUPPORT OFF) set(VELOX_ENABLE_BENCHMARKS_BASIC OFF) @@ -198,6 +202,11 @@ if(VELOX_ENABLE_GCS) add_definitions(-DVELOX_ENABLE_GCS) endif() +if(VELOX_ENABLE_ABFS) + find_package(azure-storage-blobs-cpp CONFIG REQUIRED) + add_definitions(-DVELOX_ENABLE_ABFS) +endif() + if(VELOX_ENABLE_HDFS) find_library( LIBHDFS3 diff --git a/scripts/setup-adapters.sh b/scripts/setup-adapters.sh index bc8e4f44e78d..27c0801d5bd6 100755 --- a/scripts/setup-adapters.sh +++ b/scripts/setup-adapters.sh @@ -61,6 +61,24 @@ function install_gcs-sdk-cpp { -DGOOGLE_CLOUD_CPP_ENABLE=storage } +function install_azure-storage-sdk-cpp { + github_checkout azure/azure-sdk-for-cpp azure-storage-blobs_12.8.0 + + # install azure-storage-common + cd sdk/storage/azure-storage-common + echo "install storage-common" + cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF + + cd - + # install azure-storage-blobs + cd sdk/storage/azure-storage-blobs + + sed -i 's/"name": "azure-storage-common-cpp",/"name": "azure-storage-common-cpp"/' vcpkg.json + sed -i 's/"default-features": false,//' vcpkg.json + sed -i 's/"version>=": "12\.3\.1"//' vcpkg.json + cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF +} + function install_libhdfs3 { github_checkout apache/hawq master cd $DEPENDENCY_DIR/hawq/depends/libhdfs3 @@ -101,12 +119,14 @@ fi install_aws=0 install_gcs=0 install_hdfs=0 +install_abfs=0 if [ "$#" -eq 0 ]; then # Install all adapters by default install_aws=1 install_gcs=1 install_hdfs=1 + install_abfs=1 fi while [[ $# -gt 0 ]]; do @@ -123,6 +143,10 @@ while [[ $# -gt 0 ]]; do install_hdfs=1 shift # past argument ;; + abfs) + install_abfs=1 + shift # past argument + ;; *) echo "ERROR: Unknown option $1! will be ignored!" shift @@ -139,6 +163,9 @@ fi if [ $install_hdfs -eq 1 ]; then install_libhdfs3 fi +if [ $install_abfs -eq 1 ]; then + install_azure-storage-sdk-cpp +fi _ret=$? if [ $_ret -eq 0 ] ; then From 76f33a55c3ed380e54c6abad6be909ec35576a47 Mon Sep 17 00:00:00 2001 From: Yangyang Gao Date: Wed, 6 Sep 2023 10:52:06 +0800 Subject: [PATCH 2/8] try fix build issue in centos --- scripts/setup-adapters.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/setup-adapters.sh b/scripts/setup-adapters.sh index 27c0801d5bd6..13f345a04672 100755 --- a/scripts/setup-adapters.sh +++ b/scripts/setup-adapters.sh @@ -66,7 +66,6 @@ function install_azure-storage-sdk-cpp { # install azure-storage-common cd sdk/storage/azure-storage-common - echo "install storage-common" cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF cd - @@ -109,6 +108,9 @@ if [[ "$OSTYPE" == "linux-gnu"* ]]; then yum -y install libxml2-devel libgsasl-devel libuuid-devel # Dependencies of GCS, probably a workaround until the docker image is rebuilt yum -y install curl-devel c-ares-devel + # Dependencies of Azure Storage Blob Cpp + yum -y install gcc-c++ + yum -y install perl-IPC-Cmd fi fi From 36957c26e0b311cf7cf4b3359776edd5c5f6f90c Mon Sep 17 00:00:00 2001 From: Yangyang Gao Date: Wed, 6 Sep 2023 16:52:46 +0800 Subject: [PATCH 3/8] fix azure-storage-blobs dependency not found issue --- CMakeLists.txt | 4 ++++ Makefile | 4 ++++ scripts/setup-adapters.sh | 6 +++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index abeeec25c3b5..62b8df855ad3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -203,6 +203,10 @@ if(VELOX_ENABLE_GCS) endif() if(VELOX_ENABLE_ABFS) + # Set AZURESDK_ROOT_DIR if you have a custom install location of Azure Storage SDK CPP. + if(AZURESDK_ROOT_DIR) + list(APPEND CMAKE_PREFIX_PATH ${AZURESDK_ROOT_DIR}) + endif() find_package(azure-storage-blobs-cpp CONFIG REQUIRED) add_definitions(-DVELOX_ENABLE_ABFS) endif() diff --git a/Makefile b/Makefile index 8fc57e7347b6..c6cdc3a71455 100644 --- a/Makefile +++ b/Makefile @@ -46,6 +46,10 @@ ifdef GCSSDK_ROOT_DIR CMAKE_FLAGS += -DGCSSDK_ROOT_DIR=$(GCSSDK_ROOT_DIR) endif +ifdef AZURESDK_ROOT_DIR +CMAKE_FLAGS += -DAZURESDK_ROOT_DIR=$(AZURESDK_ROOT_DIR) +endif + # Use Ninja if available. If Ninja is used, pass through parallelism control flags. USE_NINJA ?= 1 ifeq ($(USE_NINJA), 1) diff --git a/scripts/setup-adapters.sh b/scripts/setup-adapters.sh index 13f345a04672..7e256f328848 100755 --- a/scripts/setup-adapters.sh +++ b/scripts/setup-adapters.sh @@ -64,6 +64,11 @@ function install_gcs-sdk-cpp { function install_azure-storage-sdk-cpp { github_checkout azure/azure-sdk-for-cpp azure-storage-blobs_12.8.0 + #install azure-core + cd sdk/core/azure-core + cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF + + cd - # install azure-storage-common cd sdk/storage/azure-storage-common cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF @@ -71,7 +76,6 @@ function install_azure-storage-sdk-cpp { cd - # install azure-storage-blobs cd sdk/storage/azure-storage-blobs - sed -i 's/"name": "azure-storage-common-cpp",/"name": "azure-storage-common-cpp"/' vcpkg.json sed -i 's/"default-features": false,//' vcpkg.json sed -i 's/"version>=": "12\.3\.1"//' vcpkg.json From cffea4bb9dff9804c362ee0a5f636307256e0fdd Mon Sep 17 00:00:00 2001 From: Yangyang Gao Date: Wed, 6 Sep 2023 17:31:31 +0800 Subject: [PATCH 4/8] fix format checking --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 62b8df855ad3..31df59045c04 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -203,7 +203,8 @@ if(VELOX_ENABLE_GCS) endif() if(VELOX_ENABLE_ABFS) - # Set AZURESDK_ROOT_DIR if you have a custom install location of Azure Storage SDK CPP. + # Set AZURESDK_ROOT_DIR if you have a custom install location of Azure Storage + # SDK CPP. if(AZURESDK_ROOT_DIR) list(APPEND CMAKE_PREFIX_PATH ${AZURESDK_ROOT_DIR}) endif() From 3e424cb5e4a7e9c01b77635f4881109d648c7ef0 Mon Sep 17 00:00:00 2001 From: Yangyang Gao Date: Fri, 8 Sep 2023 15:16:18 +0800 Subject: [PATCH 5/8] install azure-core compatible with system pre-installed openssl --- scripts/setup-adapters.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/setup-adapters.sh b/scripts/setup-adapters.sh index 7e256f328848..89d390b66bfe 100755 --- a/scripts/setup-adapters.sh +++ b/scripts/setup-adapters.sh @@ -64,8 +64,14 @@ function install_gcs-sdk-cpp { function install_azure-storage-sdk-cpp { github_checkout azure/azure-sdk-for-cpp azure-storage-blobs_12.8.0 - #install azure-core + # build and install azure-core with the version compatible with system pre-installed openssl + openssl_version=$(openssl version -v | awk '{print $2}') + if [[ "$openssl_version" == 1.1.1* ]]; then + openssl_version="1.1.1n" + fi cd sdk/core/azure-core + sed -i 's/"version-string"/"builtin-baseline": "dafef74af53669ef1cc9015f55e0ce809ead62aa","version-string"/' vcpkg.json + sed -i "s/\"version-string\"/\"overrides\": [{ \"name\": \"openssl\", \"version-string\": \"$openssl_version\" }],\"version-string\"/" vcpkg.json cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF cd - @@ -76,9 +82,7 @@ function install_azure-storage-sdk-cpp { cd - # install azure-storage-blobs cd sdk/storage/azure-storage-blobs - sed -i 's/"name": "azure-storage-common-cpp",/"name": "azure-storage-common-cpp"/' vcpkg.json - sed -i 's/"default-features": false,//' vcpkg.json - sed -i 's/"version>=": "12\.3\.1"//' vcpkg.json + sed -i 's/"version-semver"/"builtin-baseline": "dafef74af53669ef1cc9015f55e0ce809ead62aa","version-semver"/' vcpkg.json cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF } From c88d6e0c097a57c8927d6645e86b8ce2dc3b4229 Mon Sep 17 00:00:00 2001 From: Yangyang Gao Date: Fri, 8 Sep 2023 16:52:13 +0800 Subject: [PATCH 6/8] install openssl --- scripts/setup-adapters.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/setup-adapters.sh b/scripts/setup-adapters.sh index 89d390b66bfe..b91620d4d64b 100755 --- a/scripts/setup-adapters.sh +++ b/scripts/setup-adapters.sh @@ -119,6 +119,7 @@ if [[ "$OSTYPE" == "linux-gnu"* ]]; then # Dependencies of Azure Storage Blob Cpp yum -y install gcc-c++ yum -y install perl-IPC-Cmd + yum -y install openssl fi fi From ca08ea19cbc225168d6cc4d7daaf86c8a1d67abc Mon Sep 17 00:00:00 2001 From: Yangyang Gao Date: Fri, 8 Sep 2023 17:04:28 +0800 Subject: [PATCH 7/8] small change --- scripts/setup-adapters.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/setup-adapters.sh b/scripts/setup-adapters.sh index b91620d4d64b..e8c1e8b22d03 100755 --- a/scripts/setup-adapters.sh +++ b/scripts/setup-adapters.sh @@ -112,6 +112,8 @@ if [[ "$OSTYPE" == "linux-gnu"* ]]; then apt install -y --no-install-recommends libxml2-dev libgsasl7-dev uuid-dev # Dependencies of GCS, probably a workaround until the docker image is rebuilt apt install -y --no-install-recommends libc-ares-dev libcurl4-openssl-dev + # Dependencies of Azure Storage Blob cpp + apt install -y openssl else # Assume Fedora/CentOS yum -y install libxml2-devel libgsasl-devel libuuid-devel # Dependencies of GCS, probably a workaround until the docker image is rebuilt From 2e7fca5043a3968192c1d9b58295cd39da8a2900 Mon Sep 17 00:00:00 2001 From: Yangyang Gao Date: Fri, 8 Sep 2023 18:35:15 +0800 Subject: [PATCH 8/8] test --- scripts/setup-adapters.sh | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/scripts/setup-adapters.sh b/scripts/setup-adapters.sh index e8c1e8b22d03..5bc9c81e4605 100755 --- a/scripts/setup-adapters.sh +++ b/scripts/setup-adapters.sh @@ -64,14 +64,16 @@ function install_gcs-sdk-cpp { function install_azure-storage-sdk-cpp { github_checkout azure/azure-sdk-for-cpp azure-storage-blobs_12.8.0 - # build and install azure-core with the version compatible with system pre-installed openssl - openssl_version=$(openssl version -v | awk '{print $2}') - if [[ "$openssl_version" == 1.1.1* ]]; then - openssl_version="1.1.1n" - fi cd sdk/core/azure-core - sed -i 's/"version-string"/"builtin-baseline": "dafef74af53669ef1cc9015f55e0ce809ead62aa","version-string"/' vcpkg.json - sed -i "s/\"version-string\"/\"overrides\": [{ \"name\": \"openssl\", \"version-string\": \"$openssl_version\" }],\"version-string\"/" vcpkg.json + if ! grep -q "baseline" vcpkg.json; then + # build and install azure-core with the version compatible with system pre-installed openssl + openssl_version=$(openssl version -v | awk '{print $2}') + if [[ "$openssl_version" == 1.1.1* ]]; then + openssl_version="1.1.1n" + fi + sed -i 's/"version-string"/"builtin-baseline": "dafef74af53669ef1cc9015f55e0ce809ead62aa","version-string"/' vcpkg.json + sed -i "s/\"version-string\"/\"overrides\": [{ \"name\": \"openssl\", \"version-string\": \"$openssl_version\" }],\"version-string\"/" vcpkg.json + fi cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF cd - @@ -82,7 +84,9 @@ function install_azure-storage-sdk-cpp { cd - # install azure-storage-blobs cd sdk/storage/azure-storage-blobs - sed -i 's/"version-semver"/"builtin-baseline": "dafef74af53669ef1cc9015f55e0ce809ead62aa","version-semver"/' vcpkg.json + if ! grep -q "baseline" vcpkg.json; then + sed -i 's/"version-semver"/"builtin-baseline": "dafef74af53669ef1cc9015f55e0ce809ead62aa","version-semver"/' vcpkg.json + fi cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF } @@ -119,7 +123,6 @@ if [[ "$OSTYPE" == "linux-gnu"* ]]; then # Dependencies of GCS, probably a workaround until the docker image is rebuilt yum -y install curl-devel c-ares-devel # Dependencies of Azure Storage Blob Cpp - yum -y install gcc-c++ yum -y install perl-IPC-Cmd yum -y install openssl fi