diff --git a/.github/workflows/cpp_full_node_workflow.yml b/.github/workflows/cpp_full_node_workflow.yml index fee8d67a..4512a8e2 100644 --- a/.github/workflows/cpp_full_node_workflow.yml +++ b/.github/workflows/cpp_full_node_workflow.yml @@ -22,6 +22,8 @@ concurrency: env: RUST_BACKTRACE: 1 ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + ACTIONS_RUNNER_FORCED_INTERNAL_NODE_VERSION: node16 + ACTIONS_RUNNER_FORCE_ACTIONS_NODE_VERSION: node16 jobs: build: @@ -99,23 +101,37 @@ jobs: if: runner.os != 'Windows' run: | cd cpp/build && CTEST_OUTPUT_ON_FAILURE=TRUE make test ARGS="--output-on-failure" + - name: Publish Error + if: always() + uses: actions/upload-artifact@v4 + with: + name: build-x64-linux-dbg-err.log + path: /home/runner/work/WeDPR-Component/WeDPR-Component/vcpkg/buildtrees/gsasl/build-x64-linux-dbg-err.log + build_centos: name: build_centos full node - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest continue-on-error: true strategy: fail-fast: false matrix: - os: [ubuntu-latest] + container: + - centos-7.9 container: image: docker.io/centos:7 volumes: - /usr/local/share/vcpkg:/usr/local/share/vcpkg + - /node20217:/node20217:rw,rshared + - ${{ matrix.container == 'centos-7.9' && '/node20217:/__e/node20:ro,rshared' || ' ' }} steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 5 + - name: install nodejs20glibc2.17 + if: ${{ matrix.container == 'centos-7.9' }} + run: | + curl -LO https://unofficial-builds.nodejs.org/download/release/v20.9.0/node-v20.9.0-linux-x64-glibc-217.tar.xz + tar -xf node-v20.9.0-linux-x64-glibc-217.tar.xz --strip-components 1 -C /node20217 + - name: Check out the repo + uses: actions/checkout@v4 - uses: actions/cache@v3 id: deps_cache with: @@ -148,6 +164,10 @@ jobs: with: toolchain: nightly-2022-07-28 override: true + - name: Prepare vcpkg + if: runner.os != 'Windows' + uses: friendlyanon/setup-vcpkg@v1 + with: { committish: 51b14cd4e1230dd51c11ffeff6f7d53c61cc5297 } - name: Check disk space run: df . -h - name: Free disk space @@ -169,7 +189,7 @@ jobs: rm -rf python mkdir -p cpp/build cd cpp/build - cmake3 -DBUILD_STATIC=ON -DCMAKE_BUILD_TYPE=Release -DTESTS=ON -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake ../ + cmake3 -DCMAKE_TOOLCHAIN_FILE=${{ env.VCPKG_ROOT }}/scripts/buildsystems/vcpkg.cmake -DBUILD_STATIC=ON -DCMAKE_BUILD_TYPE=Release -DTESTS=ON -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake ../ - name: FreeDiskSpace run: | df -lh diff --git a/.github/workflows/cpp_sdk_workflow.yml b/.github/workflows/cpp_sdk_workflow.yml index e75cc273..4c361914 100644 --- a/.github/workflows/cpp_sdk_workflow.yml +++ b/.github/workflows/cpp_sdk_workflow.yml @@ -22,6 +22,8 @@ concurrency: env: RUST_BACKTRACE: 1 ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + ACTIONS_RUNNER_FORCED_INTERNAL_NODE_VERSION: node16 + ACTIONS_RUNNER_FORCE_ACTIONS_NODE_VERSION: node16 jobs: build_sdk: @@ -105,20 +107,27 @@ jobs: build_centos_for_sdk: name: build_centos_for_sdk - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest continue-on-error: true strategy: fail-fast: false matrix: - os: [ubuntu-latest] + container: + - centos-7.9 container: image: docker.io/centos:7 volumes: - /usr/local/share/vcpkg:/usr/local/share/vcpkg + - /node20217:/node20217:rw,rshared + - ${{ matrix.container == 'centos-7.9' && '/node20217:/__e/node20:ro,rshared' || ' ' }} steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 5 + - name: install nodejs20glibc2.17 + if: ${{ matrix.container == 'centos-7.9' }} + run: | + curl -LO https://unofficial-builds.nodejs.org/download/release/v20.9.0/node-v20.9.0-linux-x64-glibc-217.tar.xz + tar -xf node-v20.9.0-linux-x64-glibc-217.tar.xz --strip-components 1 -C /node20217 + - name: Check out the repo + uses: actions/checkout@v4 - uses: actions/cache@v3 id: deps_cache with: @@ -149,11 +158,15 @@ jobs: yum install -y java-11-openjdk-devel git make gcc gcc-c++ glibc-static glibc-devel openssl cmake3 ccache devtoolset-11 llvm-toolset-7.0 rh-perl530-perl libzstd-devel zlib-devel flex bison python-devel python3-devel yum install -y rh-perl530-perl cmake3 zlib-devel ccache lcov python-devel python3-devel yum install -y git + - name: Prepare vcpkg + if: runner.os != 'Windows' + uses: friendlyanon/setup-vcpkg@v1 + with: { committish: 51b14cd4e1230dd51c11ffeff6f7d53c61cc5297 } - name: Build run: | alias cmake='cmake3' . /opt/rh/devtoolset-11/enable mkdir -p cpp/build cd cpp/build - cmake3 -DCMAKE_BUILD_TYPE=Release -DBUILD_SDK=ON -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake ../ + cmake3 -DCMAKE_TOOLCHAIN_FILE=${{ env.VCPKG_ROOT }}/scripts/buildsystems/vcpkg.cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SDK=ON -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake ../ cmake3 --build . --parallel 3 diff --git a/.github/workflows/cpp_toolkit_workflow.yml b/.github/workflows/cpp_toolkit_workflow.yml index 267d8c51..04635c77 100644 --- a/.github/workflows/cpp_toolkit_workflow.yml +++ b/.github/workflows/cpp_toolkit_workflow.yml @@ -22,6 +22,8 @@ concurrency: env: RUST_BACKTRACE: 1 ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + ACTIONS_RUNNER_FORCED_INTERNAL_NODE_VERSION: node16 + ACTIONS_RUNNER_FORCE_ACTIONS_NODE_VERSION: node16 jobs: build_wedpr_toolkit: @@ -111,20 +113,27 @@ jobs: build_centos_for_toolkit: name: build_centos_for_toolkit - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest continue-on-error: true strategy: fail-fast: false matrix: - os: [ubuntu-latest] + container: + - centos-7.9 container: image: docker.io/centos:7 volumes: - /usr/local/share/vcpkg:/usr/local/share/vcpkg + - /node20217:/node20217:rw,rshared + - ${{ matrix.container == 'centos-7.9' && '/node20217:/__e/node20:ro,rshared' || ' ' }} steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 5 + - name: install nodejs20glibc2.17 + if: ${{ matrix.container == 'centos-7.9' }} + run: | + curl -LO https://unofficial-builds.nodejs.org/download/release/v20.9.0/node-v20.9.0-linux-x64-glibc-217.tar.xz + tar -xf node-v20.9.0-linux-x64-glibc-217.tar.xz --strip-components 1 -C /node20217 + - name: Check out the repo + uses: actions/checkout@v4 - uses: actions/cache@v3 id: deps_cache with: @@ -155,13 +164,17 @@ jobs: yum install -y java-11-openjdk-devel git make gcc gcc-c++ glibc-static glibc-devel openssl cmake3 ccache devtoolset-11 llvm-toolset-7.0 rh-perl530-perl libzstd-devel zlib-devel flex bison python-devel python3-devel yum install -y rh-perl530-perl cmake3 zlib-devel ccache lcov python-devel python3-devel yum install -y git + - name: Prepare vcpkg + if: runner.os != 'Windows' + uses: friendlyanon/setup-vcpkg@v1 + with: { committish: 51b14cd4e1230dd51c11ffeff6f7d53c61cc5297 } - name: Build run: | alias cmake='cmake3' . /opt/rh/devtoolset-11/enable mkdir -p cpp/build cd cpp/build - cmake3 -DCMAKE_BUILD_TYPE=Release -DBUILD_WEDPR_TOOLKIT=ON -DBUILD_PYTHON=ON -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake ../ + cmake3 -DCMAKE_TOOLCHAIN_FILE=${{ env.VCPKG_ROOT }}/scripts/buildsystems/vcpkg.cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_WEDPR_TOOLKIT=ON -DBUILD_PYTHON=ON -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake ../ cmake3 --build . --parallel 3 - uses: actions/upload-artifact@v3 with: diff --git a/cpp/cmake/CompilerSettings.cmake b/cpp/cmake/CompilerSettings.cmake index 1d2efdc8..e6a91d9b 100644 --- a/cpp/cmake/CompilerSettings.cmake +++ b/cpp/cmake/CompilerSettings.cmake @@ -197,7 +197,7 @@ endif() # rust static library linking requirements for macos if(APPLE) - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -framework Security") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -framework Security -framework Kerberos") else() set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -ldl") endif() diff --git a/cpp/cmake/TargetSettings.cmake b/cpp/cmake/TargetSettings.cmake index 1eabf08e..e66f019b 100644 --- a/cpp/cmake/TargetSettings.cmake +++ b/cpp/cmake/TargetSettings.cmake @@ -87,7 +87,7 @@ set(AIR_BINARY_NAME ppc-air-node) set(CEM_BINARY_NAME wedpr-cem) set(MPC_BINARY_NAME wedpr-mpc) -set(HDFS_LIB libhdfs3-static) +set(HDFS_LIB libhdfs3) # set cpu-info set(CPU_FEATURES_LIB "") diff --git a/cpp/ppc-framework/protocol/Krb5AuthConfig.h b/cpp/ppc-framework/protocol/Krb5AuthConfig.h new file mode 100644 index 00000000..a7ed09b3 --- /dev/null +++ b/cpp/ppc-framework/protocol/Krb5AuthConfig.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2022 WeDPR. + * SPDX-License-Identifier: Apache-2.0 + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * @file Krb5AuthConfig.h + * @author: yujiechen + * @date 2024-12-2 + */ +#pragma once +#include "ppc-framework/Common.h" +#include +#include +#include + +namespace ppc::protocol +{ +struct Krb5AuthConfig +{ + using Ptr = std::shared_ptr; + std::string principal; + std::string password; + std::string ccachePath; + std::string authConfigFilePath = "./conf/krb5.conf"; + void check() const + { + if (principal.size() == 0) + { + BOOST_THROW_EXCEPTION(WeDPRException() << bcos::errinfo_comment( + "Invalid krb5 auth config: Must set the principal!")); + } + if (password.size() == 0) + { + BOOST_THROW_EXCEPTION(WeDPRException() << bcos::errinfo_comment( + "Invalid krb5 auth config: Must set the password!")); + } + if (ccachePath.size() == 0) + { + BOOST_THROW_EXCEPTION(WeDPRException() << bcos::errinfo_comment( + "Invalid krb5 auth config: Must set the ccachePath!")); + } + if (authConfigFilePath.size() == 0) + { + BOOST_THROW_EXCEPTION( + WeDPRException() << bcos::errinfo_comment( + "Invalid krb5 auth config: Must set the authConfigFilePath!")); + } + } + + inline std::string desc() const + { + std::stringstream oss; + oss << LOG_KV("principal", principal) << LOG_KV("ccachePath", ccachePath) + << LOG_KV("authConfigFilePath", authConfigFilePath); + return oss.str(); + } +}; +} // namespace ppc::protocol \ No newline at end of file diff --git a/cpp/ppc-framework/protocol/Protocol.h b/cpp/ppc-framework/protocol/Protocol.h index f16ec21a..ee893d81 100644 --- a/cpp/ppc-framework/protocol/Protocol.h +++ b/cpp/ppc-framework/protocol/Protocol.h @@ -23,6 +23,7 @@ #define WIN32_LEAN_AND_MEAN #include #endif +#include "Krb5AuthConfig.h" #include "ppc-framework/Common.h" #include #include @@ -452,6 +453,7 @@ struct FileStorageConnectionOption bool replaceDataNodeOnFailure = false; // the default connection-timeout for the hdfs is 1000ms uint16_t connectionTimeout = 1000; + Krb5AuthConfig::Ptr authConfig; void check() const { @@ -470,6 +472,10 @@ struct FileStorageConnectionOption BOOST_THROW_EXCEPTION(WeDPRException() << bcos::errinfo_comment( "Invalid HDFS Option, Must set valid namenodeport!")); } + if (authConfig) + { + authConfig->check(); + } } inline std::string desc() const { @@ -477,7 +483,8 @@ struct FileStorageConnectionOption oss << LOG_KV("nameNode", nameNode) << LOG_KV("nameNodePort", nameNodePort) << LOG_KV("user", userName) << LOG_KV("token", token) << LOG_KV("replace-datanode-on-failure", replaceDataNodeOnFailure) - << LOG_KV("connectionTimeout", connectionTimeout); + << LOG_KV("connectionTimeout", connectionTimeout) + << LOG_KV("authInfo", authConfig ? authConfig->desc() : "null"); return oss.str(); } }; diff --git a/cpp/tools/build_ppc.sh b/cpp/tools/build_ppc.sh index 401169ad..621f5c99 100644 --- a/cpp/tools/build_ppc.sh +++ b/cpp/tools/build_ppc.sh @@ -476,7 +476,7 @@ generate_node_config_ini() { [hdfs_storage] ; the hdfs configuration - user = app + user = root name_node = 127.0.0.1 name_node_port = 9000 token = @@ -484,6 +484,16 @@ generate_node_config_ini() { replace-datanode-on-failure = false ; the connection-timeout, in ms, default is 1000ms connection-timeout = 1000 + ; enable auth or not, default is false + ; enable_krb5_auth = false + ; the hdfs kerberos auth principal, used when enable_krb5_auth + ; auth_principal = root@NODE.DC1.CONSUL + ; the hdfs kerberos auth password, used when enable_krb5_auth + ; auth_password = + ; the ccache path, used when enable_krb5_auth + ; ccache_path = /tmp/krb5cc_ppc_node + ; the krb5.conf path + ; krb5_conf_path = conf/krb5.conf [ra2018psi] @@ -541,6 +551,31 @@ generate_node_config_ini() { EOF } +generate_krb5_file_template() +{ + local filepath=$1 + mkdir -p $(dirname $filepath) + cat << EOF > "${filepath}" +[libdefaults] + default_realm = NODE.DC1.CONSUL + dns_lookup_realm = false + dns_lookup_kdc = false + ticket_lifetime = 24h + renew_lifetime = 7d + forwardable = true + +[realms] + NODE.DC1.CONSUL = { + kdc = + admin_server = + } + +[domain_realm] + .node.dc1.consul = NODE.DC1.CONSUL + node.dc1.consul = NODE.DC1.CONSUL +EOF +} + generate_script_template() { local filepath=$1 @@ -915,6 +950,7 @@ deploy_nodes() private_key=$(generate_private_key "${node_dir}/conf") node_id=$(cat "${node_dir}/conf/node.nodeid") generate_node_config_ini "${node_dir}/config.ini" "${listen_ip}" "${gateway_port}" "${listen_ip}" "${rpc_port}" "${listen_ip}" "${grpc_port}" ${agency_id} "${count}" "${node_id}" + generate_krb5_file_template "${node_dir}/conf/krb5.conf" generate_p2p_connected_conf "${node_dir}/${p2p_connected_conf_name}" "${connected_nodes}" "false" set_value ${ip//./}_count $(($(get_value ${ip//./}_count) + 1)) ((++count)) diff --git a/cpp/tools/build_wedpr_cem.sh b/cpp/tools/build_wedpr_cem.sh index c972e559..8299c61e 100644 --- a/cpp/tools/build_wedpr_cem.sh +++ b/cpp/tools/build_wedpr_cem.sh @@ -354,7 +354,7 @@ generate_config_ini() { [hdfs_storage] ; the hdfs configuration - user = ppc + user = root name_node = 127.0.0.1 name_node_port = 9900 token = @@ -362,6 +362,16 @@ generate_config_ini() { replace-datanode-on-failure = false ; the connection-timeout, in ms, default is 1000ms connection-timeout = 2000 + ; enable auth or not, default is false + ; enable_krb5_auth = false + ; the hdfs kerberos auth principal, used when enable_krb5_auth + ; auth_principal = root@NODE.DC1.CONSUL + ; the hdfs kerberos auth password, used when enable_krb5_auth + ; auth_password = + ; the ccache path, used when enable_krb5_auth + ; ccache_path = /tmp/krb5cc_ppc_node + ; the krb5.conf path + ; krb5_conf_path = conf/krb5.conf [cert] ; directory the certificates located in diff --git a/cpp/tools/build_wedpr_mpc.sh b/cpp/tools/build_wedpr_mpc.sh index 938b9184..c3d109dd 100644 --- a/cpp/tools/build_wedpr_mpc.sh +++ b/cpp/tools/build_wedpr_mpc.sh @@ -358,7 +358,7 @@ generate_config_ini() { [hdfs_storage] ; the hdfs configuration - user = ppc + user = root name_node = 127.0.0.1 name_node_port = 9900 token = @@ -366,6 +366,16 @@ generate_config_ini() { replace-datanode-on-failure = false ; the connection-timeout, in ms, default is 1000ms connection-timeout = 2000 + ; enable auth or not, default is false + ; enable_krb5_auth = false + ; the hdfs kerberos auth principal, used when enable_krb5_auth + ; auth_principal = root@NODE.DC1.CONSUL + ; the hdfs kerberos auth password, used when enable_krb5_auth + ; auth_password = + ; the ccache path, used when enable_krb5_auth + ; ccache_path = /tmp/krb5cc_ppc_node + ; the krb5.conf path + ; krb5_conf_path = conf/krb5.conf [transport] ; the endpoint information @@ -396,6 +406,31 @@ generate_config_ini() { EOF } +generate_krb5_file_template() +{ + local filepath=$1 + mkdir -p $(dirname $filepath) + cat << EOF > "${filepath}" +[libdefaults] + default_realm = NODE.DC1.CONSUL + dns_lookup_realm = false + dns_lookup_kdc = false + ticket_lifetime = 24h + renew_lifetime = 7d + forwardable = true + +[realms] + NODE.DC1.CONSUL = { + kdc = + admin_server = + } + +[domain_realm] + .node.dc1.consul = NODE.DC1.CONSUL + node.dc1.consul = NODE.DC1.CONSUL +EOF +} + generate_script_template() { local filepath=$1 @@ -723,6 +758,7 @@ deploy_nodes() private_key=$(generate_private_key "${output_dir}/conf") node_id=$(cat "${output_dir}/conf/node.nodeid") generate_config_ini "${output_dir}/config.ini" "${listen_ip}" "${rpc_port}" "${agency_info}" ${agency_id} "${listen_ip}" "${grpc_port}" "${node_id}" + generate_krb5_file_template "{output_dir}/conf/krb5.conf" print_result } diff --git a/cpp/tools/ppc-builder/conf/config-example.toml b/cpp/tools/ppc-builder/conf/config-example.toml index a27685dd..867f113a 100644 --- a/cpp/tools/ppc-builder/conf/config-example.toml +++ b/cpp/tools/ppc-builder/conf/config-example.toml @@ -90,10 +90,20 @@ holding_msg_minutes = 30 database = "" # the hdfs storage config [agency.node.hdfs_storage] - user = "" + user = "root" name_node = "127.0.0.1" name_node_port = 9000 token = "" + # enable auth or not, default is false + enable_krb5_auth = false + # the hdfs kerberos auth principal, used when enable_krb5_auth + auth_principal = "root@NODE.DC1.CONSUL" + # the hdfs kerberos auth password, used when enable_krb5_auth + auth_password = "" + # the ccache path, used when enable_krb5_auth + ccache_path = "/tmp/krb5cc_ppc_node" + # the krb5 conf path + krb5_conf_path = "conf/krb5.conf" # the gateway config [agency.node.gateway] gateway_grpc_target = ["127.0.0.1:40600", "127.0.0.1:40601"] @@ -175,10 +185,21 @@ holding_msg_minutes = 30 database = "" # the hdfs storage config [agency.node.hdfs_storage] - user = "" + user = "root" name_node = "127.0.0.1" name_node_port = 9000 token = "" + # enable auth or not, default is false + enable_krb5_auth = false + # the hdfs kerberos auth principal, used when enable_krb5_auth + auth_principal = "root@NODE.DC1.CONSUL" + # the hdfs kerberos auth password, used when enable_krb5_auth + auth_password = "" + # the ccache path, used when enable_krb5_auth + ccache_path = "/tmp/krb5cc_ppc_node" + # the krb5 conf path + krb5_conf_path = "conf/krb5.conf" + # the gateway config [agency.node.gateway] gateway_grpc_target = ["127.0.0.1:40620", "127.0.0.1:40621"] diff --git a/cpp/tools/ppc-builder/src/common/utilities.py b/cpp/tools/ppc-builder/src/common/utilities.py index 5e4b3c32..5462a9db 100644 --- a/cpp/tools/ppc-builder/src/common/utilities.py +++ b/cpp/tools/ppc-builder/src/common/utilities.py @@ -29,6 +29,8 @@ class ConfigInfo: pwd_path, tpl_abs_path, "config.ini.node") gateway_config_tpl_path = os.path.join( pwd_path, tpl_abs_path, "config.ini.gateway") + krb5_config_tpl_path = os.path.join( + pwd_path, tpl_abs_path, "krb5.conf") ppc_gateway_binary_name = "ppc-gateway-service" ppc_node_binary_name = "ppc-pro-node" diff --git a/cpp/tools/ppc-builder/src/config/ppc_deploy_config.py b/cpp/tools/ppc-builder/src/config/ppc_deploy_config.py index 88bae36f..d8c642e4 100644 --- a/cpp/tools/ppc-builder/src/config/ppc_deploy_config.py +++ b/cpp/tools/ppc-builder/src/config/ppc_deploy_config.py @@ -116,6 +116,28 @@ def __init__(self, config, config_section, must_exist): self.config, "name_node_port", None, must_exist, config_section) self.token = utilities.get_item_value( self.config, "token", "", False, config_section) + # enable auth or not + enable_krb5_auth = utilities.get_item_value( + self.config, "enable_krb5_auth", "", + False, config_section) + self.enable_krb5_auth_str = utilities.convert_bool_to_str( + enable_krb5_auth) + # auth principal + self.auth_principal = utilities.get_item_value( + self.config, "auth_principal", + "", enable_krb5_auth, config_section) + # auth password + self.auth_password = utilities.get_item_value( + self.config, "auth_password", + "", enable_krb5_auth, config_section) + # cacche path + self.ccache_path = utilities.get_item_value( + self.config, "ccache_path", + "", enable_krb5_auth, config_section) + # the krb5.conf + self.krb5_conf_path = utilities.get_item_value( + self.config, "krb5_conf_path", + "conf/krb5.conf", enable_krb5_auth, config_section) class RA2018PSIConfig: diff --git a/cpp/tools/ppc-builder/src/config/ppc_node_config_generator.py b/cpp/tools/ppc-builder/src/config/ppc_node_config_generator.py index 615d1d70..8a835ae1 100644 --- a/cpp/tools/ppc-builder/src/config/ppc_node_config_generator.py +++ b/cpp/tools/ppc-builder/src/config/ppc_node_config_generator.py @@ -109,8 +109,8 @@ def __generate_single_node_inner_config__(self, tpl_config_path, node_path, priv self.__generate_storage_config__( config_content, node_config.storage_config) # load the hdfs_storage_config - self.__generate_hdfs_storage_config__( - config_content, node_config.hdfs_storage_config) + self.__generate_hdfs_storage_config__(node_path, utilities.ConfigInfo.krb5_config_tpl_path, + config_content, node_config.hdfs_storage_config) # load the ra2018psi config self.__generate_ra2018psi_config__( config_content, node_config.ra2018psi_config) @@ -182,7 +182,7 @@ def __generate_storage_config__(self, config_content, storage_config): config_content[section_name]["password"] = storage_config.password config_content[section_name]["database"] = storage_config.database - def __generate_hdfs_storage_config__(self, config_content, hdfs_storage_config): + def __generate_hdfs_storage_config__(self, node_path, krb5_tpl_file_path, config_content, hdfs_storage_config): if hdfs_storage_config is None: return section_name = "hdfs_storage" @@ -191,8 +191,26 @@ def __generate_hdfs_storage_config__(self, config_content, hdfs_storage_config): config_content[section_name]["name_node_port"] = str( hdfs_storage_config.name_node_port) config_content[section_name]["token"] = hdfs_storage_config.token + config_content[section_name]["enable_krb5_auth"] = hdfs_storage_config.enable_krb5_auth_str + config_content[section_name]["auth_principal"] = hdfs_storage_config.auth_principal + config_content[section_name]["auth_password"] = hdfs_storage_config.auth_password + config_content[section_name]["ccache_path"] = hdfs_storage_config.ccache_path + config_content[section_name]["krb5_conf_path"] = hdfs_storage_config.krb5_conf_path + # copy krb5.conf to krb5_conf_path specified path + dst_path = os.path.join(node_path, hdfs_storage_config.krb5_conf_path) + if hdfs_storage_config.krb5_conf_path.startswith("/"): + dst_path = hdfs_storage_config.krb5_conf_path + command = "cp %s %s" % (krb5_tpl_file_path, dst_path) + (ret, output) = utilities.execute_command_and_getoutput(command) + if ret is False: + utilities.log_error("copy krb5 configuration from %s to %s failed, error: %s") % ( + krb5_tpl_file_path, dst_path, output) + return False + return True - def __generate_transport_config__(self, config_content, node_config, node_id, deploy_ip, node_index): + def __generate_transport_config__(self, config_content, + node_config, node_id, + deploy_ip, node_index): """_summary_ Args: diff --git a/cpp/tools/ppc-builder/src/tpl/config.ini.node b/cpp/tools/ppc-builder/src/tpl/config.ini.node index 2f0b7ab6..5bbb29b3 100644 --- a/cpp/tools/ppc-builder/src/tpl/config.ini.node +++ b/cpp/tools/ppc-builder/src/tpl/config.ini.node @@ -53,7 +53,7 @@ [hdfs_storage] ; the hdfs configuration - user = app + user = root name_node = 127.0.0.1 name_node_port = 9000 token = @@ -61,7 +61,16 @@ replace-datanode-on-failure = false ; the connection-timeout, in ms, default is 1000ms connection-timeout = 1000 - + ; enable auth or not, default is false + ; enable_krb5_auth = false + ; the hdfs kerberos auth principal, used when enable_krb5_auth + ; auth_principal = root@NODE.DC1.CONSUL + ; the hdfs kerberos auth password, used when enable_krb5_auth + ; auth_password = + ; the ccache path, used when enable_krb5_auth + ; ccache_path = /tmp/krb5cc_ppc_node + ; the krb5.conf path + ; krb5_conf_path = conf/krb5.conf [ra2018psi] ; The database used to store cuckoo-filter diff --git a/cpp/tools/ppc-builder/src/tpl/krb5.conf b/cpp/tools/ppc-builder/src/tpl/krb5.conf new file mode 100644 index 00000000..6e612c20 --- /dev/null +++ b/cpp/tools/ppc-builder/src/tpl/krb5.conf @@ -0,0 +1,17 @@ +[libdefaults] + default_realm = NODE.DC1.CONSUL + dns_lookup_realm = false + dns_lookup_kdc = false + ticket_lifetime = 24h + renew_lifetime = 7d + forwardable = true + +[realms] + NODE.DC1.CONSUL = { + kdc = + admin_server = + } + +[domain_realm] + .node.dc1.consul = NODE.DC1.CONSUL + node.dc1.consul = NODE.DC1.CONSUL diff --git a/cpp/vcpkg-configuration.json b/cpp/vcpkg-configuration.json index 0a2e904f..3c205926 100644 --- a/cpp/vcpkg-configuration.json +++ b/cpp/vcpkg-configuration.json @@ -3,14 +3,16 @@ { "kind": "git", "repository": "https://github.com/FISCO-BCOS/registry", - "baseline": "489334d1f374134163a3d9719c3345fdd1d268d3", + "baseline": "535c4fc74badd509b94e8a736452fa8a76bff944", "packages": [ "openssl", "bcos-utilities", "bcos-boostssl", "tarscpp", "libhdfs3", - "tbb" + "tbb", + "gsasl", + "krb5" ] } ] diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index 6b2f1f75..a3a838c8 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -1,8 +1,8 @@ { - "name": "fiscobcos", - "version-string": "3.7.3", + "name": "wedpr-component", + "version-string": "3.0.0", "homepage": "https://github.com/WeBankBlockchain/WeDPR-Component", - "description": "FISCO BCOS", + "description": "wedpr component", "dependencies": [ { "name": "vcpkg-cmake", @@ -30,7 +30,7 @@ }, { "name": "libhdfs3", - "version": "2024-04-27#1" + "version": "2024-11-29#0" }, { "name": "tbb", diff --git a/cpp/wedpr-helper/ppc-tools/src/config/PPCConfig.cpp b/cpp/wedpr-helper/ppc-tools/src/config/PPCConfig.cpp index 27234303..59b34c2a 100644 --- a/cpp/wedpr-helper/ppc-tools/src/config/PPCConfig.cpp +++ b/cpp/wedpr-helper/ppc-tools/src/config/PPCConfig.cpp @@ -580,8 +580,37 @@ void PPCConfig::loadHDFSConfig(boost::property_tree::ptree const& _pt) // connection-timeout option->connectionTimeout = _pt.get("hdfs_storage.connection-timeout", 1000); m_storageConfig.fileStorageConnectionOpt = option; + auto enableAuth = _pt.get("hdfs_storage.enable_krb5_auth", false); + if (enableAuth) + { + loadKrb5AuthConfig(_pt); + } PPCConfig_LOG(INFO) << LOG_DESC("loadStorageConfig: load hdfs connection option success") - << option->desc(); + << LOG_KV("enableAuth", enableAuth) << option->desc(); +} + +void PPCConfig::loadKrb5AuthConfig(boost::property_tree::ptree const& pt) +{ + auto authConfig = std::make_shared(); + // the principal + authConfig->principal = pt.get("hdfs_storage.auth_principal", ""); + // the password + authConfig->password = pt.get("hdfs_storage.auth_password", ""); + // the ccachePath + authConfig->ccachePath = + pt.get("hdfs_storage.ccache_path", "/tmp/krb5cc_ppc_node"); + // the krb5.conf path + authConfig->authConfigFilePath = + pt.get("hdfs_storage.krb5_conf_path", "./conf/krb5.conf"); + // relative path case + if (!authConfig->authConfigFilePath.starts_with("/")) + { + auto joinedPath = + boost::filesystem::absolute(boost::filesystem::path(authConfig->authConfigFilePath)); + authConfig->authConfigFilePath = joinedPath.string(); + } + m_storageConfig.fileStorageConnectionOpt->authConfig = authConfig; + PPCConfig_LOG(INFO) << LOG_DESC("loadKrb5AuthConfig") << LOG_KV("config", authConfig->desc()); } void PPCConfig::loadSQLConfig(boost::property_tree::ptree const& _pt) @@ -633,6 +662,5 @@ void PPCConfig::loadMPCConfig(boost::property_tree::ptree const& _pt) { m_mpcConfig.threadPoolSize = std::thread::hardware_concurrency(); } - loadHDFSConfig(_pt); } \ No newline at end of file diff --git a/cpp/wedpr-helper/ppc-tools/src/config/PPCConfig.h b/cpp/wedpr-helper/ppc-tools/src/config/PPCConfig.h index 6ac74457..aa620731 100644 --- a/cpp/wedpr-helper/ppc-tools/src/config/PPCConfig.h +++ b/cpp/wedpr-helper/ppc-tools/src/config/PPCConfig.h @@ -264,7 +264,7 @@ class PPCConfig std::string const& sectionName, boost::property_tree::ptree const& pt); virtual void loadHDFSConfig(boost::property_tree::ptree const& _pt); - + virtual void loadKrb5AuthConfig(boost::property_tree::ptree const& pt); virtual void loadSQLConfig(boost::property_tree::ptree const& _pt); diff --git a/cpp/wedpr-helper/ppc-tools/src/config/StorageConfig.h b/cpp/wedpr-helper/ppc-tools/src/config/StorageConfig.h index 8e3d7033..e9bd4dce 100644 --- a/cpp/wedpr-helper/ppc-tools/src/config/StorageConfig.h +++ b/cpp/wedpr-helper/ppc-tools/src/config/StorageConfig.h @@ -18,6 +18,7 @@ * @date 2022-11-4 */ #pragma once +#include "ppc-framework/protocol/Krb5AuthConfig.h" #include #include #include diff --git a/cpp/wedpr-storage/ppc-storage/src/hdfs/Common.h b/cpp/wedpr-storage/ppc-storage/src/hdfs/Common.h index ba02d6e1..17f8a2a4 100644 --- a/cpp/wedpr-storage/ppc-storage/src/hdfs/Common.h +++ b/cpp/wedpr-storage/ppc-storage/src/hdfs/Common.h @@ -20,7 +20,7 @@ #pragma once #include "../Common.h" #define HDFS_STORAGE_LOG(LEVEL) BCOS_LOG(LEVEL) << LOG_BADGE("HDFS_STORAGE") - +#define HDFS_AUTH_LOG(LEVEL) BCOS_LOG(LEVEL) << LOG_BADGE("HDFS_STORAGE_AUTH") namespace ppc::storage { DERIVE_PPC_EXCEPTION(ConnectToHDFSFailed); diff --git a/cpp/wedpr-storage/ppc-storage/src/hdfs/HDFSStorage.cpp b/cpp/wedpr-storage/ppc-storage/src/hdfs/HDFSStorage.cpp index 8eaa5455..8d467235 100644 --- a/cpp/wedpr-storage/ppc-storage/src/hdfs/HDFSStorage.cpp +++ b/cpp/wedpr-storage/ppc-storage/src/hdfs/HDFSStorage.cpp @@ -18,6 +18,7 @@ * @date 2022-11-30 */ #include "HDFSStorage.h" +#include "auth/Krb5CredLoader.h" #include #include #include @@ -41,6 +42,8 @@ HDFSStorage::HDFSStorage(FileStorageConnectionOption::Ptr const& _option) // the default 'rpc.client.connect.retry' is 10 auto connectTimeout = std::to_string(_option->connectionTimeout); // set 1s as the connectTimeout + HDFS_STORAGE_LOG(INFO) << LOG_DESC("create HDFSStorage") << _option->desc() + << LOG_KV("connectTimeout", connectTimeout); hdfsBuilderConfSetStr(m_builder.get(), "rpc.client.connect.timeout", connectTimeout.c_str()); // default disable output.replace-datanode-on-failure, set to false to resolve append data error @@ -53,8 +56,6 @@ HDFSStorage::HDFSStorage(FileStorageConnectionOption::Ptr const& _option) hdfsBuilderConfSetStr(m_builder.get(), "output.replace-datanode-on-failure", "false"); } - HDFS_STORAGE_LOG(INFO) << LOG_DESC("create HDFSStorage") << _option->desc() - << LOG_KV("connectTimeout", connectTimeout); // set name node hdfsBuilderSetNameNode(m_builder.get(), _option->nameNode.c_str()); hdfsBuilderSetNameNodePort(m_builder.get(), _option->nameNodePort); @@ -66,6 +67,19 @@ HDFSStorage::HDFSStorage(FileStorageConnectionOption::Ptr const& _option) { hdfsBuilderSetToken(m_builder.get(), _option->token.c_str()); } + // init the auth information + if (_option->authConfig) + { + // set auth type to Kerberos + hdfsBuilderConfSetStr(m_builder.get(), "hadoop.security.authentication", "kerberos"); + // init and store the auth information into the cache + auto ctx = std::make_shared(_option->authConfig); + ctx->init(); + HDFS_STORAGE_LOG(INFO) << LOG_DESC("SetKerbTicketCachePath") + << LOG_KV("ccachePath", _option->authConfig->ccachePath); + // set the ccache file path + hdfsBuilderSetKerbTicketCachePath(m_builder.get(), _option->authConfig->ccachePath.c_str()); + } // connect to the hdfs, Note: the m_fs is a pointer m_fs = std::shared_ptr( hdfsBuilderConnect(m_builder.get()), HDFSFSDeleter()); @@ -75,6 +89,8 @@ HDFSStorage::HDFSStorage(FileStorageConnectionOption::Ptr const& _option) ConnectToHDFSFailed() << errinfo_comment( "Connect to hdfs failed! error: " + std::string(hdfsGetLastError()))); } + HDFS_STORAGE_LOG(INFO) << LOG_DESC("create HDFSStorage success") << _option->desc() + << LOG_KV("connectTimeout", connectTimeout); } diff --git a/cpp/wedpr-storage/ppc-storage/src/hdfs/auth/Krb5CredLoader.cpp b/cpp/wedpr-storage/ppc-storage/src/hdfs/auth/Krb5CredLoader.cpp new file mode 100644 index 00000000..498aaa7e --- /dev/null +++ b/cpp/wedpr-storage/ppc-storage/src/hdfs/auth/Krb5CredLoader.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2022 WeDPR. + * SPDX-License-Identifier: Apache-2.0 + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * @file Krb5CredLoader.cpp + * @author: yujiechen + * @date 2024-12-1 + */ +#include "Krb5CredLoader.h" +#include "../Common.h" + +using namespace ppc::storage; +using namespace ppc; +using namespace ppc::protocol; +using namespace bcos; + +void Krb5Context::init() +{ + HDFS_AUTH_LOG(INFO) << LOG_DESC("init Krb5Context") << m_config->desc(); + + // init the profile + auto ret = profile_init_path(m_config->authConfigFilePath.c_str(), &m_profile); + if (ret) + { + BOOST_THROW_EXCEPTION(WeDPRException() << errinfo_comment( + "load Krb5Context failed for profile_init_path failed!")); + } + m_profilePtr = &m_profile; + // load krb5 ctx + auto error = krb5_init_context_profile(m_profile, 1, &m_ctx); + checkResult(error, "krb5_init_context_profile"); + + // init the principal + error = krb5_parse_name(m_ctx, m_config->principal.c_str(), &m_principal); + checkResult(error, "krb5_parse_name"); + // init credential + error = krb5_get_init_creds_password( + m_ctx, &m_credsObj, m_principal, m_config->password.c_str(), NULL, NULL, 0, NULL, NULL); + checkResult(error, "krb5_get_init_creds_password"); + m_creds = &m_credsObj; + // init the ccache + error = krb5_cc_resolve(m_ctx, m_config->ccachePath.c_str(), &m_ccache); + checkResult(error, "krb5_cc_resolve"); + + error = krb5_cc_initialize(m_ctx, m_ccache, m_principal); + checkResult(error, "krb5_cc_initialize"); + // store the credential + error = krb5_cc_store_cred(m_ctx, m_ccache, m_creds); + HDFS_AUTH_LOG(INFO) << LOG_DESC("init Krb5Context success") << m_config->desc(); +} + +void Krb5Context::checkResult(krb5_error_code const& error, std::string const& method) +{ + if (!error) + { + HDFS_AUTH_LOG(INFO) << LOG_DESC("init Krb5Context: ") << method << " success"; + return; + } + auto msg = krb5_get_error_message(m_ctx, error); + HDFS_AUTH_LOG(ERROR) << LOG_DESC("init Krb5Context failed") << LOG_KV("method", method) + << LOG_KV("reason", msg); + BOOST_THROW_EXCEPTION( + WeDPRException() << errinfo_comment( + "load Krb5Context failed, method: " + method + ", reason: " + std::string(msg))); +} \ No newline at end of file diff --git a/cpp/wedpr-storage/ppc-storage/src/hdfs/auth/Krb5CredLoader.h b/cpp/wedpr-storage/ppc-storage/src/hdfs/auth/Krb5CredLoader.h new file mode 100644 index 00000000..e2caeb30 --- /dev/null +++ b/cpp/wedpr-storage/ppc-storage/src/hdfs/auth/Krb5CredLoader.h @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2022 WeDPR. + * SPDX-License-Identifier: Apache-2.0 + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * @file Krb5CredLoader.h + * @author: yujiechen + * @date 2024-12-1 + */ +#pragma once +#include "ppc-framework/protocol/Krb5AuthConfig.h" +#include +#include +#include + +namespace ppc::storage +{ +class Krb5Context +{ +public: + using Ptr = std::shared_ptr; + Krb5Context(ppc::protocol::Krb5AuthConfig::Ptr const& config) : m_config(config) {} + + virtual ~Krb5Context() + { + if (m_principal) + { + krb5_free_principal(m_ctx, m_principal); + } + if (m_creds) + { + krb5_free_cred_contents(m_ctx, m_creds); + } + if (m_ctx) + { + krb5_free_context(m_ctx); + } + if (m_profilePtr) + { + profile_release(m_profile); + } + } + + virtual void init(); + +private: + void checkResult(krb5_error_code const& error, std::string const& method); + +protected: + ppc::protocol::Krb5AuthConfig::Ptr m_config; + krb5_context m_ctx = NULL; + profile_t m_profile; + profile_t* m_profilePtr = NULL; + krb5_principal m_principal = NULL; + krb5_creds m_credsObj; + krb5_creds* m_creds = NULL; + krb5_ccache m_ccache = NULL; +}; + +class Krb5CredLoader +{ +public: + using Ptr = std::shared_ptr; + Krb5CredLoader() = default; + virtual ~Krb5CredLoader() = default; + + virtual Krb5Context::Ptr load(ppc::protocol::Krb5AuthConfig::Ptr const& config) const + { + return std::make_shared(config); + } +}; +} // namespace ppc::storage \ No newline at end of file diff --git a/cpp/wedpr-transport/ppc-rpc/src/RpcMemory.cpp b/cpp/wedpr-transport/ppc-rpc/src/RpcMemory.cpp index 31f923d1..0cd35e2a 100644 --- a/cpp/wedpr-transport/ppc-rpc/src/RpcMemory.cpp +++ b/cpp/wedpr-transport/ppc-rpc/src/RpcMemory.cpp @@ -86,7 +86,7 @@ bcos::Error::Ptr RpcMemory::insertTask(protocol::Task::Ptr _task) RPC_STATUS_LOG(INFO) << LOG_DESC("find the existed not running-task") << LOG_KV("task", _task->id()) << LOG_KV("status", taskResult->status()); - if (taskResult->status() != toString(TaskState::COMPLETED)) + if (taskResult->status() != toString(TaskStatus::COMPLETED)) { // erase the task_id m_front->eraseTaskInfo(_task->id()); diff --git a/python/ppc_common/deps_services/hdfs_storage.py b/python/ppc_common/deps_services/hdfs_storage.py index 67f2d81b..aae0d851 100644 --- a/python/ppc_common/deps_services/hdfs_storage.py +++ b/python/ppc_common/deps_services/hdfs_storage.py @@ -3,27 +3,20 @@ from hdfs.client import InsecureClient from ppc_common.ppc_utils import common_func -from ppc_common.deps_services.storage_api import StorageApi, StorageType +from ppc_common.deps_services.storage_api import StorageApi, StorageType, HDFSStorageConfig from ppc_common.ppc_utils import utils class HdfsStorage(StorageApi): - DEFAULT_HDFS_USER = "ppc" - DEFAULT_HDFS_USER_PATH = "/user/" - # endpoint: http://127.0.0.1:50070 - def __init__(self, endpoint, hdfs_user, hdfs_home=None): - self.endpoint = endpoint - self._user = common_func.get_config_value( - "HDFS_USER", HdfsStorage.DEFAULT_HDFS_USER, hdfs_user, False) - self._hdfs_storage_path = hdfs_home - if hdfs_home is None: - self._hdfs_storage_path = os.path.join( - HdfsStorage.DEFAULT_HDFS_USER_PATH, self._user) - - self.client = InsecureClient(endpoint, user=self._user) + def __init__(self, hdfs_config: HDFSStorageConfig, init_client: bool = True): + self.endpoint = hdfs_config.hdfs_url + self._user = hdfs_config.hdfs_user + self._hdfs_storage_path = hdfs_config.hdfs_home + if init_client: + self.client = InsecureClient(self.endpoint, user=self._user) def get_home_path(self): return self._hdfs_storage_path diff --git a/python/ppc_common/deps_services/krb5_hdfs_storage.py b/python/ppc_common/deps_services/krb5_hdfs_storage.py new file mode 100644 index 00000000..fbc643c8 --- /dev/null +++ b/python/ppc_common/deps_services/krb5_hdfs_storage.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +import os +from krbcontext.context import krbContext +from hdfs.ext.kerberos import KerberosClient +from ppc_common.deps_services.hdfs_storage import HdfsStorage +from ppc_common.deps_services.storage_api import HDFSStorageConfig + + +class Krb5HdfsStorage(HdfsStorage): + def __init__(self, hdfs_config: HDFSStorageConfig): + super().__init__(hdfs_config, False) + self.hdfs_config = hdfs_config + self.krb5_ctx = krbContext( + using_keytab=True, + principal=self.hdfs_config.hdfs_auth_principal, + keytab_file=self.hdfs_config.hdfs_auth_secret_file_path) + + self.client = KerberosClient(self.hdfs_config.hdfs_url) + self.client = KerberosClient( + krb_principal=self.hdfs_config.hdfs_auth_principal, + krb_keytab=self.hdfs_config.hdfs_auth_secret_file_path, + krb_ccache_path="/tmp/hdfs", + hdfs_namenode_address=self.hdfs_config.hdfs_url, + timeout=10000) diff --git a/python/ppc_common/deps_services/storage_api.py b/python/ppc_common/deps_services/storage_api.py index 1f623fb1..a3e85b3f 100644 --- a/python/ppc_common/deps_services/storage_api.py +++ b/python/ppc_common/deps_services/storage_api.py @@ -1,12 +1,63 @@ from abc import ABC, abstractmethod from enum import Enum from typing import AnyStr +from ppc_common.ppc_utils import common_func +import os class StorageType(Enum): HDFS = 'HDFS' +class HDFSStorageConfig: + DEFAULT_HDFS_USER = "ppc" + DEFAULT_HDFS_USER_PATH = "/user/" + + def __init__(self, hdfs_url: str = None, + hdfs_user: str = None, + hdfs_home: str = None, + enable_krb5_auth: bool = False, + hdfs_auth_principal: str = None, + hdfs_auth_secret_file_path: str = None): + self.hdfs_url = hdfs_url + self.hdfs_user = hdfs_user + self.hdfs_home = hdfs_home + self.enable_krb5_auth = enable_krb5_auth + self.hdfs_auth_principal = hdfs_auth_principal + self.hdfs_auth_secret_file_path = hdfs_auth_secret_file_path + + def __repr__(self): + return f"hdfs_user: {self.hdfs_user}, hdfs_home: {self.hdfs_home}, hdfs_url: {self.hdfs_url}, " \ + f"enable_krb5_auth: {self.enable_krb5_auth}, hdfs_auth_principal: {self.hdfs_auth_principal}, " \ + f"hdfs_auth_secret_file_path: {self.hdfs_auth_secret_file_path}" + + def load_config(self, config: dict, logger): + self.hdfs_url = common_func.get_config_value( + 'HDFS_URL', None, config, True) + self.hdfs_user = common_func.get_config_value( + 'HDFS_USER', self.DEFAULT_HDFS_USER, config, False) + self.hdfs_home = common_func.get_config_value( + "HDFS_HOME", os.path.join(self.DEFAULT_HDFS_USER_PATH, self.hdfs_user), config, False) + # the auth information + self.enable_krb5_auth = common_func.get_config_value( + "HDFS_ENABLE_AUTH", False, config, False) + require_auth_info = False + if self.enable_krb5_auth is True: + require_auth_info = True + # the principal + self.hdfs_auth_principal = common_func.get_config_value( + "HDFS_AUTH_PRINCIPAL", None, config, require_auth_info + ) + # the keytab file path + self.hdfs_auth_secret_file_path = common_func.get_config_value( + "HDFS_AUTH_KEYTAB_PATH", None, config, require_auth_info + ) + if logger is not None: + logger.info(f"*** load hdfs storage config : {self}") + else: + print(f"*** load hdfs storage config : {self}") + + class StorageApi(ABC): @abstractmethod def download_file(self, storage_path: str, local_file_path: str, enable_cache=False): diff --git a/python/ppc_common/deps_services/storage_loader.py b/python/ppc_common/deps_services/storage_loader.py index 0688648a..991ce37e 100644 --- a/python/ppc_common/deps_services/storage_loader.py +++ b/python/ppc_common/deps_services/storage_loader.py @@ -1,14 +1,21 @@ from ppc_common.deps_services.storage_api import StorageType +from ppc_common.deps_services.storage_api import HDFSStorageConfig from ppc_common.deps_services.hdfs_storage import HdfsStorage -from ppc_common.ppc_utils import common_func +from ppc_common.deps_services.krb5_hdfs_storage import Krb5HdfsStorage + + +class HDFSStorageLoader: + @staticmethod + def load(hdfs_config: HDFSStorageConfig): + if hdfs_config.enable_krb5_auth is False: + return HdfsStorage(hdfs_config) + return Krb5HdfsStorage(hdfs_config) def load(config: dict, logger): if config['STORAGE_TYPE'] == StorageType.HDFS.value: - hdfs_user = common_func.get_config_value( - 'HDFS_USER', None, config, False) - hdfs_home = common_func.get_config_value( - "HDFS_HOME", None, config, False) - return HdfsStorage(config['HDFS_URL'], hdfs_user, hdfs_home) + hdfs_config = HDFSStorageConfig() + hdfs_config.load_config(config, logger) + return HDFSStorageLoader.load(hdfs_config) else: raise Exception('unsupported storage type') diff --git a/python/ppc_common/deps_services/tests/hdfs_storage_test.py b/python/ppc_common/deps_services/tests/hdfs_storage_test.py new file mode 100644 index 00000000..755322be --- /dev/null +++ b/python/ppc_common/deps_services/tests/hdfs_storage_test.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +import unittest +from ppc_common.deps_services.storage_api import HDFSStorageConfig +from ppc_common.deps_services.storage_loader import HDFSStorageLoader + + +class HDFSStorageWrapper: + def __init__(self): + # use the default config + hdfs_url = "hdfs://127.0.0.1:9900" + hdfs_user = "wedpr" + hdfs_home = "/user/ppc" + enable_krb5_auth = False + hdfs_auth_principal = "" + hdfs_auth_secret_file_path = "" + self.hdfs_config = HDFSStorageConfig( + hdfs_url=hdfs_url, hdfs_user=hdfs_user, + hdfs_home=hdfs_home, enable_krb5_auth=enable_krb5_auth, + hdfs_auth_principal=hdfs_auth_principal, + hdfs_auth_secret_file_path=hdfs_auth_secret_file_path) + self.hdfs_storage = HDFSStorageLoader.load(self.hdfs_config) + + def test_file_op(self, file_path): + hdfs_file_path = f"test/{file_path}" + print(f"*** upload file test ***") + self.hdfs_storage.upload_file( + local_file_path=file_path, hdfs_path=hdfs_file_path) + print(f"*** upload file test success***") + print(f"*** download file test ***") + local_file_path = f"{file_path}.download" + self.hdfs_storage.download_file( + hdfs_path=hdfs_file_path, local_file_path=local_file_path) + print(f"*** download file test success ***") + + +class TestHDFSStorage(unittest.TestCase): + def test_file_op(self): + file_path = "test.csv" + wrapper = HDFSStorageWrapper() + wrapper.test_file_op(file_path) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/ppc_model/conf/application-sample.yml b/python/ppc_model/conf/application-sample.yml index 1bc97e73..a472260b 100644 --- a/python/ppc_model/conf/application-sample.yml +++ b/python/ppc_model/conf/application-sample.yml @@ -17,17 +17,14 @@ HDFS_URL: "http://127.0.0.1:50070" # HDFS, STORAGE_TYPE: "HDFS" HDFS_URL: "http://127.0.0.1:9870" -HDFS_USER: "ppc" +HDFS_USER: "root" HDFS_HOME: "/user/ppc/model/webank" +HDFS_ENABLE_AUTH: False +# the hdfs auth principal +HDFS_AUTH_PRINCIPAL: "root@NODE.DC1.CONSUL" +# the auth key-tab path +HDFS_AUTH_KEYTAB_PATH: "./hdfs-wedpr.keytab" -# ECDSA or GM -CRYPTO_TYPE: "ECDSA" -private_key: "" -public_key: "" -gm_private_key: "" -gm_public_key: "" - -UPLOAD_FOLDER: "./upload_data_folder" JOB_TEMP_DIR: ".cache/job" # the transport config diff --git a/python/requirements.txt b/python/requirements.txt index 4476487c..92919807 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -56,3 +56,5 @@ MarkupSafe>=2.1.1 urllib3==1.26.18 phe chardet +krbcontext +requests_kerberos