diff --git a/.circleci/config.yml b/.circleci/config.yml index f3aaeb59..f7d54b86 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -52,7 +52,7 @@ commands: esac GOLANG_DIR="/usr/local" - GOLANG_VERSION="1.19.17" + GOLANG_VERSION="1.19.7" GOLANG_URL="https://golang.org/dl/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz" if ! command -v go &> /dev/null; then @@ -79,7 +79,7 @@ commands: esac GOLANG_DIR="/usr/local" - GOLANG_VERSION="1.19.17" + GOLANG_VERSION="1.19.7" GOLANG_URL="https://golang.org/dl/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz" if ! command -v go &> /dev/null; then @@ -130,18 +130,18 @@ jobs: CIRCLETAG=$(echo ${CIRCLE_TAG} | sed 's/v//') IMAGE = "secretflow/kuscia" ALIYUN_IMAGE = "secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/kuscia" - + # login kuscia dockerhub registry docker login -u secretflow -p ${DOCKER_DEPLOY_TOKEN} - + docker buildx create --name kuscia --platform linux/arm64,linux/amd64 --use - + docker buildx build -t ${IMAGE}:${CIRCLETAG} --platform linux/arm64,linux/amd64 -f ./build/dockerfile/kuscia-anolis.Dockerfile . --push docker buildx build -t ${IMAGE}:latest --platform linux/arm64,linux/amd64 -f ./build/dockerfile/kuscia-anolis.Dockerfile . --push - + # login kuscia aliyun registry docker login -u ${ALIYUN_DOCKER_USERNAME} -p ${ALIYUN_DOCKER_PASSWORD} secretflow-registry.cn-hangzhou.cr.aliyuncs.com - + docker buildx build -t ${ALIYUN_IMAGE}:latest --platform linux/arm64,linux/amd64 -f ./build/dockerfile/kuscia-anolis.Dockerfile . --push docker buildx build -t ${ALIYUN_IMAGE}:${CIRCLETAG} --platform linux/arm64,linux/amd64 -f ./build/dockerfile/kuscia-anolis.Dockerfile . --push diff --git a/.circleci/deps-config.yml b/.circleci/deps-config.yml index c07af3f9..c5bdc800 100644 --- a/.circleci/deps-config.yml +++ b/.circleci/deps-config.yml @@ -47,12 +47,12 @@ commands: esac GOLANG_DIR="/opt" - GOLANG_VERSION="1.22.2" + GOLANG_VERSION="1.19.7" GOLANG_URL="https://golang.org/dl/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz" if ! command -v go &> /dev/null; then wget "$GOLANG_URL" - + tar -C "$GOLANG_DIR" -xzf "go${GOLANG_VERSION}.linux-${ARCH}.tar.gz" echo 'export PATH=$PATH:/opt/go/bin' | tee -a ~/.bashrc echo 'export PATH=$PATH:/opt/go/bin' >> $BASH_ENV @@ -91,12 +91,12 @@ jobs: DEPS_IMAGE = secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/kuscia-deps:<< pipeline.parameters.DEPS_Tag >> # login kuscia aliyun registry docker login -u ${ALIYUN_DOCKER_USERNAME} -p ${ALIYUN_DOCKER_PASSWORD} secretflow-registry.cn-hangzhou.cr.aliyuncs.com - + docker buildx create --name kuscia_deps --platform linux/arm64,linux/amd64 --use - + docker buildx build -t ${DEPS_IMAGE}:${CIRCLETAG} --platform linux/arm64,linux/amd64 -f ./build/dockerfile/base/kuscia-deps.Dockerfile . --push - - + + workflows: build_deps_workflow: diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml new file mode 100644 index 00000000..879aaa88 --- /dev/null +++ b/.github/workflows/golangci-lint.yml @@ -0,0 +1,49 @@ +name: Golangci-Lint +on: + push: + branches: + - main + - release/* + pull_request: + branches: + - main + - release/* + +permissions: + contents: read + # Optional: allow read access to pull request. Use with `only-new-issues` option. + # pull-requests: read + +jobs: + golangci: + strategy: + matrix: + go: ['1.19'] + # [macos-latest, windows-latest] + os: [ubuntu-latest] + name: lint + runs-on: ${{ matrix.os }} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: '0' + + - name: Setup Go + uses: actions/setup-go@v3 + with: + go-version: ${{ matrix.go }} + cache: false + + - name: Install specific version of golangci-lint + run: | + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.14.0 + golangci-lint --version + - name: Run golangci-lint + run: | + if [ "${{ github.event_name }}" = "push" ]; then + LAST_COMMIT_SHA=${{ github.event.before }} + else + LAST_COMMIT_SHA=${{ github.event.pull_request.base.sha }} + fi + golangci-lint run --new-from-rev=${LAST_COMMIT_SHA} --out-format=colored-line-number \ No newline at end of file diff --git a/.gitignore b/.gitignore index 12027ae8..28793d5a 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ /docs/_build/ /thirdparty/fate/build/apps/ /vendor/ +/run/ # bazel bazel-* diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b16afbc..b06080c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,12 @@ # Changelog + All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Types of changes + `Added ` for new features. `Changed` for changes in existing functionality. `Deprecated` for soon-to-be removed features. @@ -13,115 +15,224 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `Security` in case of vulnerabilities. `Breaking Changed` Breaking for backward-incompatible changes that require user intervention. +## [v0.9.0.dev240430] - 2024-04-30 + +### Added + +- [Documentation] SCQL on Kuscia Tutorial. +- [Feature] Support for ARM architecture. +- [Feature] Support for reverse tunneling multiple replicas. +- [Feature] New CGROUP restrictions under RunP and RunC for Kuscia nodes. + +### Changed + +- [Security Hardening] Change the node registration request's MD5 hash to Sha256. +- [Security Hardening] Node certificate issuance requires the CN field to be a Domain name. +- [Script] kuscia.sh upgrade retains k3s resources, memory restrictions. +- [KusciaAPI] KusciaJob's TaskStatus new field alias. +- [Integration Testing] Improved integration testing stability. +- [Optimization] Modified the Access-Domain information that allows access to the Service. +- [Upgrade] Upgraded the dependent Envoy version to 1.29.4. + +### Fixed + +- Fixed the issue where the kusciaTask status was incorrect when stopping a KusciaJob. +- Fixed the issue of nodes not scheduling applications upon deletion/upgrade. +- Fixed the issue of not being able to delete task sub-resources. +- Fixed the issue where Service name was too long, causing task creation to fail. + +## [v0.8.0.dev240331] - 2024-03-31 + +### Added + +- [Alpha] Kuscia Job now has new interface capabilities for retrying, pausing, and canceling jobs. +- [Alpha] One-way Networking: Support for collaboration where only one party needs to expose a public port, eliminating + the need for the other party to also do so. Documentation to follow. +- [Documentation] How to use the Kuscia API to run a SecretFlow Serving. +- [Documentation] Introduction to Kuscia listening ports. +- [Documentation] Added documentation explaining DataMesh interface. +- [Scripts] Added kuscia.sh and init_example_data.sh scripts. + +### Changed + +- Modified the default task state in the job query results from Kuscia API. +- Optimized processing flows for KusciaJob and KusciaTask. +- Added isTLS field to the kuscia API for creating DomainRoute interface. +- Kuscia API has added a Suspended state enumeration to kusciaJob. +- Revised container launch deployment steps in point-to-point and centralized deployment documentation. +- Modified the pre-validation logic of the master address when initiating lite mode. + +### Breaking Changes + +- Deprecated:start_standalone.sh,deploy.sh. Recommended use kuscia.sh + +### Fixed + +- Fixed an occasional concurrent read-write map issue causing panics in the DomainRoute module. +- Fixed an issue with image repository names in the Agent module. +- Strengthened integration tests for improved stability. +- Fixed an issue where changes to the Configmap in Runk mode did not take effect for Serving. +- Fixed an error when creating DomainDataSource with Kusica API. +- Fixed a potential issue with abnormal startup when the protocol is set to TLS in the Kuscia configuration file. + ## [0.7.0.dev240229] - 2024-02-29 + ### Added + - add the documents of datasource api - add kusciaapi errorcode doc - add kuscia init command generating kuscia config + ### Changed + - update domain register and handshake error - report events when Pod failed to start in RunK mode ### Breaking Changed + - Change the mounting directory for logs and data of Kuscia deploying with Docker - - {{ROOT}}/kuscia-{{DEPLOY_MODE}}-{{DOMAIN_ID}}-data -> {{ROOT}}/{{DOMAIN_CONTAINER_NAME}}/data. - - {{ROOT}}/kuscia-{{DEPLOY_MODE}}-{{DOMAIN_ID}}-logs -> {{ROOT}}/{{DOMAIN_CONTAINER_NAME}}/logs. - - {{ROOT}}/kuscia-{{DEPLOY_MODE}}-{{DOMAIN_ID}}-certs was deleted. + - {{ROOT}}/kuscia-{{DEPLOY_MODE}}-{{DOMAIN_ID}}-data -> {{ROOT}}/{{DOMAIN_CONTAINER_NAME}}/data. + - {{ROOT}}/kuscia-{{DEPLOY_MODE}}-{{DOMAIN_ID}}-logs -> {{ROOT}}/{{DOMAIN_CONTAINER_NAME}}/logs. + - {{ROOT}}/kuscia-{{DEPLOY_MODE}}-{{DOMAIN_ID}}-certs was deleted. + ### Fixed + - fix some unit test case - fix the issue of inconsistent states among multiple parties in KusciaDeployment - fix the issue of ClusterDefine only having unilateral information under KusciaDeployment in P2P scenarios - fix kusciaapi grpc role check - Upgrade certain dependency packages to fix security vulnerabilities in pkg. - ## [0.6.0.dev240131] - 2024-01-31 + ### Added -- Upgrade interconnection protocol from kuscia-alpha to kuscia-beta to support interconnection between Kuscia-Master and Kuscia-Autonomy. -- Kuscia monitor, Kuscia exposes a set of metric data, which can be used as data sources for collection by external monitoring tools (such as Prometheus). -- The Kuscia API added a job approve interface ,allowing participants to review jobs . + +- Upgrade interconnection protocol from kuscia-alpha to kuscia-beta to support interconnection between Kuscia-Master and + Kuscia-Autonomy. +- Kuscia monitor, Kuscia exposes a set of metric data, which can be used as data sources for collection by external + monitoring tools (such as Prometheus). +- The Kuscia API added a job approve interface ,allowing participants to review jobs . - Add some pre-check before kuscia running, such as health check of the connection of database. - Add parameter validation to the kuscia api. - The create job interface of kuscia API added the attribute 'customed-fields' . -- Support configuring the application's image ID in AppImage to prevent domain's application image from being tampered with. +- Support configuring the application's image ID in AppImage to prevent domain's application image from being tampered + with. - Added the curl command example for requesting the kuscia API. - polish the agent runtime docs. + ### Changed + - Changed some kuscia-crds (KusciaJob,KusciaTask,KusciaDeployment)from cluster to namespace (cross-domain). + ### Fixed + - Correct some inaccurate descriptions in the document. ## [0.6.0.dev240115] - 2024-01-15 + ### Added + - Add network error troubleshooting document. - Add steps for pre creating data tables in the process of deploying kusica on K8s. ### Changed + - The token from lite to master supports rotation. + ### Fixed + - When deploying using deploy.sh, no kuscia API client certificate was generated. ## [0.5.0b0] - 2024-1-8 + ### Added + - Support deploying kuscia on K8s. - Support running algorithm images based on runp and runk modes. - Support configuring Path prefix in domain public URL addresses. + ### Changed + - Optimize deployment configuration and add configuration documentation. - Optimize error information of task and error logs of kuscia. ### Fixed + - When there is a duplicate node error, the node will not exit but will try again. - Change ClusterDomainRoute status to be unready when dest domain is unreachable. ## [0.5.0.dev231225] - 2023-12-25 + ### Added + - Add document of Kuscia overview. + ### Changed + - Move pod scheduling phase to the task pending phase. ## [0.5.0.dev231215] - 2023-12-15 + ### Added + - Add document for deploying Kuscia on k8s. + ### Changed + - Optimize log output. ## [0.5.0.dev231205] - 2023-12-5 + ### Changed + - Optimize Kuscia deployment configuration and add configuration documentation. - Optimize error messages due to scheduling failures. ## [v0.5.0.dev231201] - 2023-12-01 + ### Fixed + - When there is a duplicate node error, the node will not exit but will try again. - Change ClusterDomainRoute status to be unready when dest domain is unreachable. ## [v0.5.0.dev231122] - 2023-11-22 + ### Added + - Support register secretflow psi image. ## [0.4.0b0] - 2023-11-9 + ### Added + - Add KusciaDeployment operator. - Support non MTLS network communication in P2P networking mode. ## [0.3.0b0] - 2023-9-7 + ### Added + - Support the deployment of new lite domain in centralized clusters. - Support non MTLS network communication in centralized networking mode. - Supports the deployment of an autonomy domain across machines. - Add Integration Test. ## [0.2.0b2] - 2023-7-18 + ### Fixed + - Correct datamesh service name for p2p ## [0.2.0b1] - 2023-7-7 + ### Fixed + - Fix document typo. - Fix errors when installing secretpad using non-root user. - Fix the issue of token failure after restarting Kuscia. ## [0.2.0b0] - 2023-7-6 + ### Added + - Kuscia init release. diff --git a/Makefile b/Makefile index 56361936..88269a8e 100644 --- a/Makefile +++ b/Makefile @@ -87,8 +87,8 @@ gen_error_code_doc: verify_error_code ## Generate error code markdown doc. test: verify_error_code fmt vet ## Run tests. rm -rf ./test-results mkdir -p test-results - go test ./cmd/... -gcflags="all=-N -l" -coverprofile=test-results/cmd.covprofile.out | tee test-results/cmd.output.txt - go test ./pkg/... -gcflags="all=-N -l" -coverprofile=test-results/pkg.covprofile.out | tee test-results/pkg.output.txt + go test ./cmd/... --parallel 4 -gcflags="all=-N -l" -coverprofile=test-results/cmd.covprofile.out | tee test-results/cmd.output.txt + go test ./pkg/... --parallel 4 -gcflags="all=-N -l" -coverprofile=test-results/pkg.covprofile.out | tee test-results/pkg.output.txt cat ./test-results/cmd.output.txt | go-junit-report > ./test-results/TEST-cmd.xml cat ./test-results/pkg.output.txt | go-junit-report > ./test-results/TEST-pkg.xml @@ -118,9 +118,9 @@ docs: gen_error_code_doc ## Build docs. .PHONY: deps-build deps-build: bash hack/k3s/build.sh - mkdir -p build/linux/${ARCH} + mkdir -p build/linux/${ARCH}/k3s - cp -rp build/k3s build/linux/${ARCH} + cp -rp build/k3s/bin build/linux/${ARCH}/k3s .PHONY: deps-image diff --git a/build/dockerfile/base/kuscia-deps.Dockerfile b/build/dockerfile/base/kuscia-deps.Dockerfile index 4dc0724f..e3274671 100644 --- a/build/dockerfile/base/kuscia-deps.Dockerfile +++ b/build/dockerfile/base/kuscia-deps.Dockerfile @@ -17,7 +17,7 @@ WORKDIR /tmp COPY --from=k3s-image /bin/k3s /bin/containerd /bin/containerd-shim-runc-v2 /bin/runc /bin/cni /image/home/kuscia/bin/ COPY --from=k3s-image /bin/aux /image/bin/aux -COPY build/${TARGETPLATFORM}/k3s /image/home/kuscia/bin/ +COPY build/${TARGETPLATFORM}/k3s/bin/k3s /image/home/kuscia/bin/ -RUN wget "https://github.com/krallin/tini/releases/download/v0.19.0/tini-${TARGETARCH}" -O /image/home/kuscia/bin/tini; \ - chmod +x /image/home/kuscia/bin/tini; +RUN wget "https://github.com/krallin/tini/releases/download/v0.19.0/tini-${TARGETARCH}" -O /image/home/kuscia/bin/tini && \ + chmod +x /image/home/kuscia/bin/tini diff --git a/build/dockerfile/kuscia-anolis.Dockerfile b/build/dockerfile/kuscia-anolis.Dockerfile index ecb9fb59..0b95f32f 100644 --- a/build/dockerfile/kuscia-anolis.Dockerfile +++ b/build/dockerfile/kuscia-anolis.Dockerfile @@ -1,5 +1,5 @@ ARG DEPS_IMAGE="secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/kuscia-deps:0.5.0b0" -ARG KUSCIA_ENVOY_IMAGE="secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/kuscia-envoy:0.5.0b0" +ARG KUSCIA_ENVOY_IMAGE="secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/kuscia-envoy:0.6.0.dev20240507" ARG PROM_NODE_EXPORTER="prom/node-exporter:v1.7.0" FROM ${DEPS_IMAGE} as deps @@ -7,13 +7,13 @@ FROM ${DEPS_IMAGE} as deps FROM ${PROM_NODE_EXPORTER} as node_exporter FROM ${KUSCIA_ENVOY_IMAGE} as kuscia_envoy -FROM openanolis/anolisos:8.8 +FROM openanolis/anolisos:23 ENV TZ=Asia/Shanghai ARG TARGETPLATFORM ARG TARGETARCH ARG ROOT_DIR="/home/kuscia" -RUN yum install -y openssl net-tools which jq logrotate && \ +RUN yum install -y openssl net-tools which jq logrotate iproute procps-ng && \ yum clean all && \ mkdir -p ${ROOT_DIR}/bin && \ mkdir -p /bin/aux && \ diff --git a/build/dockerfile/kuscia-secretflow.Dockerfile b/build/dockerfile/kuscia-secretflow.Dockerfile index 4b41e0f1..83f2c9f4 100644 --- a/build/dockerfile/kuscia-secretflow.Dockerfile +++ b/build/dockerfile/kuscia-secretflow.Dockerfile @@ -1,6 +1,6 @@ ARG KUSCIA_IMAGE="secretflow/kuscia:latest" -FROM secretflow/anolis8-python:3.8.15 as python +FROM secretflow/anolis8-python:3.10.13 as python FROM ${KUSCIA_IMAGE} @@ -12,7 +12,7 @@ RUN yum install -y protobuf libnl3 libgomp && \ grep -rl '#!/root/miniconda3/envs/secretflow/bin' /usr/local/bin/ | xargs sed -i -e 's/#!\/root\/miniconda3\/envs\/secretflow/#!\/usr\/local/g' && \ rm /usr/local/bin/openssl -ARG SF_VERSION="1.3.0b0" +ARG SF_VERSION="1.5.0b0" RUN pip install secretflow-lite==${SF_VERSION} --extra-index-url https://mirrors.aliyun.com/pypi/simple/ && rm -rf /root/.cache RUN kuscia image builtin secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/secretflow-lite-anolis8:${SF_VERSION} --store /home/kuscia/var/images diff --git a/cmd/kuscia/autonomy/autonomy.go b/cmd/kuscia/autonomy/autonomy.go index c36d56fa..fd8bf862 100644 --- a/cmd/kuscia/autonomy/autonomy.go +++ b/cmd/kuscia/autonomy/autonomy.go @@ -47,27 +47,20 @@ func NewAutonomyCommand(ctx context.Context) *cobra.Command { } func Run(ctx context.Context, configFile string, onlyControllers bool) error { - runCtx, cancel := context.WithCancel(ctx) - defer cancel() - kusciaConf := confloader.ReadConfig(configFile, common.RunModeAutonomy) conf := modules.InitDependencies(ctx, kusciaConf) defer conf.Close() - var coreDnsModule modules.Module - if !onlyControllers { - coreDnsModule = modules.RunCoreDNS(runCtx, cancel, &kusciaConf) - } - if onlyControllers { conf.MakeClients() - modules.RunOperatorsAllinOne(runCtx, cancel, conf, true) + modules.RunOperatorsAllinOneWithDestroy(conf) utils.SetupPprof(conf.Debug, conf.CtrDebugPort, true) nlog.Info("Scheduler and controllers are all started") // wait any controller failed } else { - modules.RunK3s(runCtx, cancel, conf) + coreDnsModule := modules.RunCoreDNSWithDestroy(conf) + modules.RunK3sWithDestroy(conf) // make clients after k3s start conf.MakeClients() @@ -75,7 +68,7 @@ func Run(ctx context.Context, configFile string, onlyControllers bool) error { if !ok { return errors.New("coredns module type is invalid") } - cdsModule.StartControllers(runCtx, conf.Clients.KubeClient) + cdsModule.StartControllers(ctx, conf.Clients.KubeClient) if err := modules.CreateDefaultDomain(ctx, conf); err != nil { nlog.Error(err) @@ -88,25 +81,32 @@ func Run(ctx context.Context, configFile string, onlyControllers bool) error { } if conf.EnableContainerd { - modules.RunContainerd(runCtx, cancel, conf) + modules.RunContainerdWithDestroy(conf) } wg := sync.WaitGroup{} wg.Add(2) go func() { defer wg.Done() - modules.RunOperatorsInSubProcess(runCtx, cancel) + modules.RunOperatorsInSubProcessWithDestroy(conf) }() go func() { defer wg.Done() - modules.RunEnvoy(runCtx, cancel, conf) + modules.RunEnvoyWithDestroy(conf) }() wg.Wait() - modules.RunNodeExporter(runCtx, cancel, conf) - modules.RunSsExporter(runCtx, cancel, conf) - modules.RunMetricExporter(runCtx, cancel, conf) + modules.RunKusciaAPIWithDestroy(conf) + modules.RunAgentWithDestroy(conf) + modules.RunConfManagerWithDestroy(conf) + modules.RunDataMeshWithDestroy(conf) + modules.RunTransportWithDestroy(conf) + modules.RunNodeExporterWithDestroy(conf) + modules.RunSsExporterWithDestroy(conf) + modules.RunMetricExporterWithDestroy(conf) utils.SetupPprof(conf.Debug, conf.DebugPort, false) + + modules.SetKusciaOOMScore() } - <-runCtx.Done() + conf.WaitAllModulesDone(ctx.Done()) return nil } diff --git a/cmd/kuscia/confloader/kuscia_config.go b/cmd/kuscia/confloader/kuscia_config.go index 17958647..b4e06818 100644 --- a/cmd/kuscia/confloader/kuscia_config.go +++ b/cmd/kuscia/confloader/kuscia_config.go @@ -37,14 +37,15 @@ import ( ) type LiteKusciaConfig struct { - CommonConfig `yaml:",inline"` - LiteDeployToken string `yaml:"liteDeployToken"` - MasterEndpoint string `yaml:"masterEndpoint"` - Runtime string `yaml:"runtime"` - Runk RunkConfig `yaml:"runk"` - Capacity config.CapacityCfg `yaml:"capacity"` - Image ImageConfig `yaml:"image"` - AdvancedConfig `yaml:",inline"` + CommonConfig `yaml:",inline"` + LiteDeployToken string `yaml:"liteDeployToken"` + MasterEndpoint string `yaml:"masterEndpoint"` + Runtime string `yaml:"runtime"` + Runk RunkConfig `yaml:"runk"` + Capacity config.CapacityCfg `yaml:"capacity"` + ReservedResources config.ReservedResourcesCfg `yaml:"reservedResources"` + Image ImageConfig `yaml:"image"` + AdvancedConfig `yaml:",inline"` } type MasterKusciaConfig struct { @@ -56,11 +57,12 @@ type MasterKusciaConfig struct { type AutomonyKusciaConfig struct { CommonConfig `yaml:",inline"` - Runtime string `yaml:"runtime"` - Runk RunkConfig `yaml:"runk"` - Capacity config.CapacityCfg `yaml:"capacity"` - Image ImageConfig `yaml:"image"` - DatastoreEndpoint string `yaml:"datastoreEndpoint"` + Runtime string `yaml:"runtime"` + Runk RunkConfig `yaml:"runk"` + Capacity config.CapacityCfg `yaml:"capacity"` + ReservedResources config.ReservedResourcesCfg `yaml:"reservedResources"` + Image ImageConfig `yaml:"image"` + DatastoreEndpoint string `yaml:"datastoreEndpoint"` AdvancedConfig `yaml:",inline"` } @@ -157,6 +159,13 @@ func (lite *LiteKusciaConfig) OverwriteKusciaConfig(kusciaConfig *KusciaConfig) kusciaConfig.Agent.Provider.K8s = lite.Runk.overwriteK8sProviderCfg(lite.Agent.Provider.K8s) kusciaConfig.Agent.Capacity = lite.Capacity + if lite.ReservedResources.CPU != "" { + kusciaConfig.Agent.ReservedResources.CPU = lite.ReservedResources.CPU + } + if lite.ReservedResources.Memory != "" { + kusciaConfig.Agent.ReservedResources.Memory = lite.ReservedResources.Memory + } + for _, p := range lite.Agent.Plugins { for j, pp := range kusciaConfig.Agent.Plugins { if p.Name == pp.Name { @@ -167,7 +176,7 @@ func (lite *LiteKusciaConfig) OverwriteKusciaConfig(kusciaConfig *KusciaConfig) } kusciaConfig.Master.Endpoint = lite.MasterEndpoint - kusciaConfig.DomainRoute.DomainCsrData = generateCsrData(lite.DomainID, lite.DomainKeyData, lite.LiteDeployToken) + kusciaConfig.DomainRoute.DomainCsrData = GenerateCsrData(lite.DomainID, lite.DomainKeyData, lite.LiteDeployToken) kusciaConfig.Debug = lite.Debug kusciaConfig.DebugPort = lite.DebugPort @@ -207,6 +216,13 @@ func (autonomy *AutomonyKusciaConfig) OverwriteKusciaConfig(kusciaConfig *Kuscia kusciaConfig.Agent.Provider.K8s = autonomy.Runk.overwriteK8sProviderCfg(autonomy.Agent.Provider.K8s) kusciaConfig.Agent.Capacity = autonomy.Capacity + if autonomy.ReservedResources.CPU != "" { + kusciaConfig.Agent.ReservedResources.CPU = autonomy.ReservedResources.CPU + } + if autonomy.ReservedResources.Memory != "" { + kusciaConfig.Agent.ReservedResources.Memory = autonomy.ReservedResources.Memory + } + for _, p := range autonomy.Agent.Plugins { for j, pp := range kusciaConfig.Agent.Plugins { if p.Name == pp.Name { @@ -255,7 +271,7 @@ func loadConfig(configFile string, conf interface{}) { } } -func generateCsrData(domainID, domainKeyData, deployToken string) string { +func GenerateCsrData(domainID, domainKeyData, deployToken string) string { domainKeyDataDecoded, err := base64.StdEncoding.DecodeString(domainKeyData) if err != nil { nlog.Fatalf("Load domain key file error: %v", err.Error()) diff --git a/cmd/kuscia/kusciainit/init_test.go b/cmd/kuscia/kusciainit/init_test.go index a521353e..88202639 100644 --- a/cmd/kuscia/kusciainit/init_test.go +++ b/cmd/kuscia/kusciainit/init_test.go @@ -2,151 +2,151 @@ package kusciainit import ( "path" - "reflect" "testing" - "gotest.tools/v3/assert" - "github.com/secretflow/kuscia/cmd/kuscia/confloader" "github.com/secretflow/kuscia/pkg/utils/paths" "github.com/secretflow/kuscia/pkg/utils/tls" + "github.com/stretchr/testify/assert" ) -func TestKusciaInitCommand(t *testing.T) { +func TestKusciaInitCommand_ConfigConvert_Master(t *testing.T) { + t.Parallel() domainKeyFile := path.Join(t.TempDir(), "domain.key") - assert.NilError(t, tls.GeneratePrivateKeyToFile(domainKeyFile)) + assert.Nil(t, tls.GeneratePrivateKeyToFile(domainKeyFile)) domainKeyData, err := loadDomainKeyData(domainKeyFile) - assert.NilError(t, err) - testCases := []struct { - name string - initConfig InitConfig - kusciaConfig interface{} - }{ - { - name: "Master kuscia config", - initConfig: InitConfig{ - Mode: "Master", - DomainID: "kuscia-system", - DomainKeyFile: domainKeyFile, - LogLevel: "INFO", - Protocol: "NOTLS", - EnableWorkloadApprove: true, - }, - kusciaConfig: confloader.MasterKusciaConfig{ - CommonConfig: confloader.CommonConfig{ - Mode: "Master", - DomainID: "kuscia-system", - DomainKeyData: domainKeyData, - LogLevel: "INFO", - Protocol: "NOTLS", - }, - AdvancedConfig: confloader.AdvancedConfig{ - EnableWorkloadApprove: true, - }, - }, - }, - { - name: "Lite kuscia config", - initConfig: InitConfig{ - Mode: "Lite", - DomainID: "alice", - LogLevel: "INFO", - DomainKeyFile: domainKeyFile, - LiteDeployToken: "test", - MasterEndpoint: "https://master.svc:1080", - Runtime: "runc", - Protocol: "NOTLS", - EnableWorkloadApprove: false, - }, - kusciaConfig: confloader.LiteKusciaConfig{ - CommonConfig: confloader.CommonConfig{ - Mode: "Lite", - DomainID: "alice", - DomainKeyData: domainKeyData, - LogLevel: "INFO", - Protocol: "NOTLS", - }, - LiteDeployToken: "test", - MasterEndpoint: "https://master.svc:1080", - Runtime: "runc", - AdvancedConfig: confloader.AdvancedConfig{ - EnableWorkloadApprove: false, - }, - }, + assert.Nil(t, err) + + input := InitConfig{ + Mode: "Master", + DomainID: "kuscia-system", + DomainKeyFile: domainKeyFile, + LogLevel: "INFO", + Protocol: "NOTLS", + EnableWorkloadApprove: true, + } + + dst := confloader.MasterKusciaConfig{ + CommonConfig: confloader.CommonConfig{ + Mode: "Master", + DomainID: "kuscia-system", + DomainKeyData: domainKeyData, + LogLevel: "INFO", + Protocol: "NOTLS", }, - { - name: "Autonomy kuscia config", - initConfig: InitConfig{ - Mode: "Autonomy", - DomainID: "alice", - LogLevel: "INFO", - DomainKeyFile: domainKeyFile, - Runtime: "runc", - Protocol: "NOTLS", - }, - kusciaConfig: confloader.AutomonyKusciaConfig{ - CommonConfig: confloader.CommonConfig{ - Mode: "Autonomy", - DomainID: "alice", - DomainKeyData: domainKeyData, - LogLevel: "INFO", - Protocol: "NOTLS", - }, - Runtime: "runc", - }, + AdvancedConfig: confloader.AdvancedConfig{ + EnableWorkloadApprove: true, }, } - for _, testCase := range testCases { - t.Run(testCase.name, func(t *testing.T) { - kusciaConfig := testCase.initConfig.convert2KusciaConfig() - assert.Equal(t, reflect.DeepEqual(kusciaConfig, testCase.kusciaConfig), true) - }) - } + var exceptValue interface{} + exceptValue = dst + + assert.EqualValues(t, exceptValue, input.convert2KusciaConfig()) } -func TestLoadDomainKeyData(t *testing.T) { - validkeyFile := path.Join(t.TempDir(), "domain.key") - assert.NilError(t, tls.GeneratePrivateKeyToFile(validkeyFile)) - invalidKeyFile := path.Join(t.TempDir(), "invalid-domain.key") - assert.NilError(t, paths.WriteFile(invalidKeyFile, []byte("test"))) - - testCases := []struct { - name string - domainKeyFile string - wantErr bool - }{ - { - name: "valid domain key file exists", - domainKeyFile: validkeyFile, - wantErr: false, - }, - { - name: "domain key file not exists", - domainKeyFile: path.Join(t.TempDir(), "empty.key"), - wantErr: true, - }, - { - name: "invalid domain key file exists", - domainKeyFile: invalidKeyFile, - wantErr: true, +func TestKusciaInitCommand_ConfigConvert_Lite(t *testing.T) { + t.Parallel() + domainKeyFile := path.Join(t.TempDir(), "domain.key") + assert.Nil(t, tls.GeneratePrivateKeyToFile(domainKeyFile)) + domainKeyData, err := loadDomainKeyData(domainKeyFile) + assert.Nil(t, err) + + input := InitConfig{ + Mode: "Lite", + DomainID: "alice", + LogLevel: "INFO", + DomainKeyFile: domainKeyFile, + LiteDeployToken: "test", + MasterEndpoint: "https://master.svc:1080", + Runtime: "runc", + Protocol: "NOTLS", + EnableWorkloadApprove: false, + } + + dst := confloader.LiteKusciaConfig{ + CommonConfig: confloader.CommonConfig{ + Mode: "Lite", + DomainID: "alice", + DomainKeyData: domainKeyData, + LogLevel: "INFO", + Protocol: "NOTLS", }, - { - name: "domain key file is empty", - domainKeyFile: "", - wantErr: false, + LiteDeployToken: "test", + MasterEndpoint: "https://master.svc:1080", + Runtime: "runc", + AdvancedConfig: confloader.AdvancedConfig{ + EnableWorkloadApprove: false, }, } - for _, testCase := range testCases { - t.Run(testCase.name, func(t *testing.T) { - _, err := loadDomainKeyData(testCase.domainKeyFile) - if !testCase.wantErr { - assert.NilError(t, err) - } else { - assert.Error(t, err, err.Error()) - } - }) + var exceptValue interface{} + exceptValue = dst + + assert.EqualValues(t, exceptValue, input.convert2KusciaConfig()) +} + +func TestKusciaInitCommand_ConfigConvert_Autonomy(t *testing.T) { + t.Parallel() + domainKeyFile := path.Join(t.TempDir(), "domain.key") + assert.Nil(t, tls.GeneratePrivateKeyToFile(domainKeyFile)) + domainKeyData, err := loadDomainKeyData(domainKeyFile) + assert.Nil(t, err) + + input := InitConfig{ + Mode: "Autonomy", + DomainID: "alice", + LogLevel: "INFO", + DomainKeyFile: domainKeyFile, + Runtime: "runc", + Protocol: "NOTLS", + } + + dst := confloader.AutomonyKusciaConfig{ + CommonConfig: confloader.CommonConfig{ + Mode: "Autonomy", + DomainID: "alice", + DomainKeyData: domainKeyData, + LogLevel: "INFO", + Protocol: "NOTLS", + }, + Runtime: "runc", } + var exceptValue interface{} + exceptValue = dst + + assert.EqualValues(t, exceptValue, input.convert2KusciaConfig()) +} + +func TestLoadDomainKeyData_FileExists(t *testing.T) { + t.Parallel() + validkeyFile := path.Join(t.TempDir(), "domain.key") + assert.Nil(t, tls.GeneratePrivateKeyToFile(validkeyFile)) + + _, err := loadDomainKeyData(validkeyFile) + assert.Nil(t, err) +} + +func TestLoadDomainKeyData_FileNotExists(t *testing.T) { + t.Parallel() + + _, err := loadDomainKeyData(path.Join(t.TempDir(), "empty.key")) + assert.NotNil(t, err) +} + +func TestLoadDomainKeyData_InvalidateFileExists(t *testing.T) { + t.Parallel() + invalidKeyFile := path.Join(t.TempDir(), "invalid-domain.key") + assert.Nil(t, paths.WriteFile(invalidKeyFile, []byte("test"))) + + _, err := loadDomainKeyData(invalidKeyFile) + assert.NotNil(t, err) +} + +func TestLoadDomainKeyData_EmptyFile(t *testing.T) { + t.Parallel() + + _, err := loadDomainKeyData("") + assert.Nil(t, err) } diff --git a/cmd/kuscia/lite/lite.go b/cmd/kuscia/lite/lite.go index 764d2c14..c4563848 100644 --- a/cmd/kuscia/lite/lite.go +++ b/cmd/kuscia/lite/lite.go @@ -26,6 +26,7 @@ import ( "github.com/secretflow/kuscia/cmd/kuscia/modules" "github.com/secretflow/kuscia/cmd/kuscia/utils" "github.com/secretflow/kuscia/pkg/common" + "github.com/secretflow/kuscia/pkg/utils/nlog" ) func NewLiteCommand(ctx context.Context) *cobra.Command { @@ -44,32 +45,30 @@ func NewLiteCommand(ctx context.Context) *cobra.Command { } func Run(ctx context.Context, configFile string) error { - runCtx, cancel := context.WithCancel(ctx) - defer cancel() kusciaConf := confloader.ReadConfig(configFile, common.RunModeLite) conf := modules.InitDependencies(ctx, kusciaConf) defer conf.Close() - coreDnsModule := modules.RunCoreDNS(runCtx, cancel, &kusciaConf) + coreDnsModule := modules.RunCoreDNSWithDestroy(conf) conf.MakeClients() if conf.EnableContainerd { - modules.RunContainerd(runCtx, cancel, conf) + modules.RunContainerdWithDestroy(conf) } wg := sync.WaitGroup{} wg.Add(3) go func() { defer wg.Done() - modules.RunDomainRoute(runCtx, cancel, conf) + modules.RunEnvoyWithDestroy(conf) }() go func() { defer wg.Done() - modules.RunEnvoy(runCtx, cancel, conf) + modules.RunDomainRouteWithDestroy(conf) }() go func() { defer wg.Done() - modules.RunTransport(runCtx, cancel, conf) + modules.RunTransportWithDestroy(conf) }() wg.Wait() @@ -77,17 +76,18 @@ func Run(ctx context.Context, configFile string) error { if !ok { return errors.New("coredns module type is invalid") } - cdsModule.StartControllers(runCtx, conf.Clients.KubeClient) + cdsModule.StartControllers(ctx, conf.Clients.KubeClient) - modules.RunKusciaAPI(runCtx, cancel, conf) - modules.RunAgent(runCtx, cancel, conf) - modules.RunConfManager(runCtx, cancel, conf) - modules.RunDataMesh(runCtx, cancel, conf) - modules.RunNodeExporter(runCtx, cancel, conf) - modules.RunSsExporter(runCtx, cancel, conf) - modules.RunMetricExporter(runCtx, cancel, conf) + modules.RunKusciaAPIWithDestroy(conf) + modules.RunAgentWithDestroy(conf) + modules.RunConfManagerWithDestroy(conf) + modules.RunDataMeshWithDestroy(conf) + modules.RunNodeExporterWithDestroy(conf) + modules.RunSsExporterWithDestroy(conf) + modules.RunMetricExporterWithDestroy(conf) utils.SetupPprof(conf.Debug, conf.DebugPort, false) - - <-runCtx.Done() + modules.SetKusciaOOMScore() + conf.WaitAllModulesDone(ctx.Done()) + nlog.Errorf("Lite shut down......") return nil } diff --git a/cmd/kuscia/main.go b/cmd/kuscia/main.go index 5e6884f0..16928263 100644 --- a/cmd/kuscia/main.go +++ b/cmd/kuscia/main.go @@ -18,7 +18,6 @@ package main import ( "fmt" "os" - "strings" _ "github.com/coredns/caddy/onevent" _ "github.com/coredns/coredns/plugin/acl" @@ -92,27 +91,9 @@ func main() { rootCmd.AddCommand(image.NewImageCommand(ctx)) rootCmd.AddCommand(start.NewStartCommand(ctx)) rootCmd.AddCommand(kusciainit.NewInitCommand(ctx)) - initKubeEnv() rootCmd.AddCommand(kubectlcmd.NewDefaultKubectlCommand()) if err := rootCmd.Execute(); err != nil { fmt.Println(err) os.Exit(1) } } - -func initKubeEnv() { - kubenv := os.Getenv("KUBECONFIG") - for i, arg := range os.Args { - if strings.HasPrefix(arg, "--kubeconfig=") { - kubenv = strings.Split(arg, "=")[1] - } else if strings.HasPrefix(arg, "--kubeconfig") && i+1 < len(os.Args) { - kubenv = os.Args[i+1] - } - } - if kubenv == "" { - config := "/home/kuscia/etc/kubeconfig" - if _, err := os.Stat(config); err == nil { - os.Setenv("KUBECONFIG", config) - } - } -} diff --git a/cmd/kuscia/master/master.go b/cmd/kuscia/master/master.go index 44971659..5a942c0c 100644 --- a/cmd/kuscia/master/master.go +++ b/cmd/kuscia/master/master.go @@ -55,27 +55,20 @@ func NewMasterCommand(ctx context.Context) *cobra.Command { } func Run(ctx context.Context, configFile string, onlyControllers bool) error { - runCtx, cancel := context.WithCancel(ctx) - defer cancel() - kusciaConf := confloader.ReadConfig(configFile, common.RunModeMaster) conf := modules.InitDependencies(ctx, kusciaConf) defer conf.Close() - var coreDnsModule modules.Module - if !onlyControllers { - coreDnsModule = modules.RunCoreDNS(runCtx, cancel, &kusciaConf) - } - if onlyControllers { conf.MakeClients() - modules.RunOperatorsAllinOne(runCtx, cancel, conf, false) + modules.RunOperatorsAllinOneWithDestroy(conf) utils.SetupPprof(conf.Debug, conf.CtrDebugPort, true) nlog.Info("Scheduler and controllers are all started") // wait any controller failed } else { - if err := modules.RunK3s(runCtx, cancel, conf); err != nil { + coreDnsModule := modules.RunCoreDNSWithDestroy(conf) + if err := modules.RunK3sWithDestroy(conf); err != nil { nlog.Errorf("k3s start failed: %s", err) return err } @@ -86,7 +79,7 @@ func Run(ctx context.Context, configFile string, onlyControllers bool) error { if !ok { return errors.New("coredns module type is invalid") } - cdsModule.StartControllers(runCtx, conf.Clients.KubeClient) + cdsModule.StartControllers(ctx, conf.Clients.KubeClient) if err := modules.CreateDefaultDomain(ctx, conf); err != nil { nlog.Error(err) @@ -102,22 +95,25 @@ func Run(ctx context.Context, configFile string, onlyControllers bool) error { wg.Add(3) go func() { defer wg.Done() - modules.RunOperatorsInSubProcess(runCtx, cancel) + modules.RunOperatorsInSubProcessWithDestroy(conf) }() go func() { defer wg.Done() - modules.RunEnvoy(runCtx, cancel, conf) + modules.RunEnvoyWithDestroy(conf) }() go func() { defer wg.Done() - modules.RunConfManager(runCtx, cancel, conf) + modules.RunConfManagerWithDestroy(conf) }() wg.Wait() - modules.RunNodeExporter(runCtx, cancel, conf) - modules.RunSsExporter(runCtx, cancel, conf) - modules.RunMetricExporter(runCtx, cancel, conf) + modules.RunKusciaAPIWithDestroy(conf) + modules.RunNodeExporterWithDestroy(conf) + modules.RunSsExporterWithDestroy(conf) + modules.RunMetricExporterWithDestroy(conf) utils.SetupPprof(conf.Debug, conf.DebugPort, false) + + modules.SetKusciaOOMScore() } - <-runCtx.Done() + conf.WaitAllModulesDone(ctx.Done()) return nil } diff --git a/cmd/kuscia/modules/agent.go b/cmd/kuscia/modules/agent.go index 61889aee..c022483e 100644 --- a/cmd/kuscia/modules/agent.go +++ b/cmd/kuscia/modules/agent.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "os" + "os/exec" "path" "path/filepath" "time" @@ -52,6 +53,9 @@ func NewAgent(i *Dependencies) Module { if conf.Node.NodeName == "" { conf.Node.NodeName = hostname } + if conf.Provider.Runtime == config.ContainerRuntime { + conf.Node.KeepNodeOnExit = true + } conf.APIVersion = k8sVersion conf.AgentVersion = fmt.Sprintf("%v", meta.AgentVersionString()) conf.DomainCACert = i.CACert @@ -88,11 +92,24 @@ func NewAgent(i *Dependencies) Module { } func (agent *agentModule) Run(ctx context.Context) error { + if agent.conf.Provider.Runtime == config.ProcessRuntime { + err := agent.execPreCmds(ctx) + if err != nil { + nlog.Warn(err) + } + } return commands.RunRootCommand(ctx, agent.conf, agent.clients.KubeClient) } +func (agent *agentModule) execPreCmds(ctx context.Context) error { + cmd := exec.CommandContext(ctx, "sh", "-c", filepath.Join(agent.conf.RootDir, "scripts/deploy/cgroup_pre_detect.sh")) + cmd.Stderr = os.Stderr + cmd.Stdout = os.Stdout + return cmd.Run() +} + func (agent *agentModule) WaitReady(ctx context.Context) error { - ticker := time.NewTicker(300 * time.Second) + ticker := time.NewTicker(60 * time.Second) select { case <-commands.ReadyChan: return nil @@ -107,19 +124,34 @@ func (agent *agentModule) Name() string { return "agent" } -func RunAgent(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) Module { +func RunAgentWithDestroy(conf *Dependencies) { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(2 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "agent", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + RunAgent(runCtx, cancel, conf, shutdownEntry) +} + +func RunAgent(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) Module { m := NewAgent(conf) go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() } }() if err := m.WaitReady(ctx); err != nil { - nlog.Error(err) - cancel() - } else { - nlog.Info("Agent is ready") + nlog.Fatalf("Agent wait ready failed: %v", err) } + nlog.Info("Agent is ready") return m } diff --git a/cmd/kuscia/modules/allinone_operator.go b/cmd/kuscia/modules/allinone_operator.go index a0bb09cd..d92c17c4 100644 --- a/cmd/kuscia/modules/allinone_operator.go +++ b/cmd/kuscia/modules/allinone_operator.go @@ -25,21 +25,21 @@ import ( ) // TODO: The definition of this function is very ugly, we need to reconsider it later -func RunOperatorsAllinOne(runctx context.Context, cancel context.CancelFunc, conf *Dependencies, startAgent bool) error { - RunInterConn(runctx, cancel, conf) - RunController(runctx, cancel, conf) - RunScheduler(runctx, cancel, conf) - RunDomainRoute(runctx, cancel, conf) - RunKusciaAPI(runctx, cancel, conf) - - if startAgent { - RunAgent(runctx, cancel, conf) - RunConfManager(runctx, cancel, conf) - RunDataMesh(runctx, cancel, conf) - RunTransport(runctx, cancel, conf) - } +func RunOperatorsAllinOneWithDestroy(conf *Dependencies) { + RunInterConnWithDestroy(conf) + RunControllerWithDestroy(conf) + RunSchedulerWithDestroy(conf) + RunDomainRouteWithDestroy(conf) +} - return nil +func RunOperatorsInSubProcessWithDestroy(conf *Dependencies) error { + runCtx, cancel := context.WithCancel(context.Background()) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "operators", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + }) + return RunOperatorsInSubProcess(runCtx, cancel) } func RunOperatorsInSubProcess(ctx context.Context, cancel context.CancelFunc) error { @@ -59,7 +59,10 @@ func RunOperatorsInSubProcess(ctx context.Context, cancel context.CancelFunc) er cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr - return cmd + return &ModuleCMD{ + cmd: cmd, + score: &controllersOOMScore, + } }) nlog.Infof("Controllers subprocess finished with error: %v", err) diff --git a/cmd/kuscia/modules/common.go b/cmd/kuscia/modules/common.go new file mode 100644 index 00000000..f225dba4 --- /dev/null +++ b/cmd/kuscia/modules/common.go @@ -0,0 +1,26 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package modules + +var ( + initProcessOOMScore = -999 + kusciaOOMScore = -900 + k3sOOMScore = -800 + controllersOOMScore = -700 + containerdOOMScore = -700 + envoyOOMScore = -700 + transportOOMScore = -700 + nodeExporterOOMScore = -600 +) diff --git a/cmd/kuscia/modules/confmanager.go b/cmd/kuscia/modules/confmanager.go index ea7b360f..a4555bc1 100644 --- a/cmd/kuscia/modules/confmanager.go +++ b/cmd/kuscia/modules/confmanager.go @@ -185,7 +185,19 @@ func (m confManagerModule) readyZ() bool { return true } -func RunConfManager(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) Module { +func RunConfManagerWithDestroy(conf *Dependencies) { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(1 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "confmanager", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + RunConfManager(runCtx, cancel, conf, shutdownEntry) +} + +func RunConfManager(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) Module { m, err := NewConfManager(ctx, conf) if err != nil { nlog.Error(err) @@ -193,16 +205,19 @@ func RunConfManager(ctx context.Context, cancel context.CancelFunc, conf *Depend return m } go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() } }() if err := m.WaitReady(ctx); err != nil { - nlog.Error(err) - cancel() - } else { - nlog.Info("confmanager is ready") + nlog.Fatalf("ConfManager wait ready failed: %v", err) } + nlog.Info("ConfManager is ready") return m } diff --git a/cmd/kuscia/modules/containerd.go b/cmd/kuscia/modules/containerd.go index fbe1ae4d..121ccca5 100644 --- a/cmd/kuscia/modules/containerd.go +++ b/cmd/kuscia/modules/containerd.go @@ -79,12 +79,23 @@ func (s *containerdModule) Run(ctx context.Context) error { cmd := exec.CommandContext(ctx, filepath.Join(s.Root, "bin/containerd"), args...) cmd.Stderr = n cmd.Stdout = n - return cmd + return &ModuleCMD{ + cmd: cmd, + score: &containerdOOMScore, + } }) } func (s *containerdModule) execPreCmds(ctx context.Context) error { - cmd := exec.Command("sh", "-c", filepath.Join(s.Root, "scripts/deploy/containerd_pre_detect.sh")) + cmd := exec.CommandContext(ctx, "sh", "-c", filepath.Join(s.Root, "scripts/deploy/iptables_pre_detect.sh")) + cmd.Stderr = os.Stderr + cmd.Stdout = os.Stdout + err := cmd.Run() + if err != nil { + return err + } + + cmd = exec.CommandContext(ctx, "sh", "-c", filepath.Join(s.Root, "scripts/deploy/cgroup_pre_detect.sh")) cmd.Stderr = os.Stderr cmd.Stdout = os.Stdout return cmd.Run() @@ -135,19 +146,34 @@ func (s *containerdModule) Name() string { return "containerd" } -func RunContainerd(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) Module { +func RunContainerdWithDestroy(conf *Dependencies) { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(1 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "containerd", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + RunContainerd(runCtx, cancel, conf, shutdownEntry) +} + +func RunContainerd(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) Module { m := NewContainerd(conf) go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() } }() if err := m.WaitReady(ctx); err != nil { - nlog.Error(err) - cancel() - } else { - nlog.Info("containerd is ready") + nlog.Fatalf("Containerd wait ready failed: %v", err) } + nlog.Info("Containerd is ready") return m } diff --git a/cmd/kuscia/modules/controllers.go b/cmd/kuscia/modules/controllers.go index 7ad4c33b..c1246ac6 100644 --- a/cmd/kuscia/modules/controllers.go +++ b/cmd/kuscia/modules/controllers.go @@ -17,6 +17,7 @@ package modules import ( "context" + "time" "github.com/secretflow/kuscia/pkg/controllers" "github.com/secretflow/kuscia/pkg/controllers/clusterdomainroute" @@ -84,20 +85,34 @@ func NewControllersModule(i *Dependencies) Module { ) } -func RunController(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) Module { +func RunControllerWithDestroy(conf *Dependencies) { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(1 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "controllers", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + RunController(runCtx, cancel, conf, shutdownEntry) +} + +func RunController(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) Module { m := NewControllersModule(conf) go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() } }() if err := m.WaitReady(ctx); err != nil { - nlog.Error(err) - cancel() - } else { - nlog.Info("controllers is ready") + nlog.Fatalf("Controllers wait ready failed: %v", err) } - + nlog.Info("Controllers is ready") return m } diff --git a/cmd/kuscia/modules/coredns.go b/cmd/kuscia/modules/coredns.go index 131d77f0..4d815288 100644 --- a/cmd/kuscia/modules/coredns.go +++ b/cmd/kuscia/modules/coredns.go @@ -21,6 +21,7 @@ import ( "os" "path/filepath" "strings" + "time" "github.com/coredns/caddy" "github.com/coredns/coredns/core/dnsserver" @@ -153,9 +154,26 @@ func (s *CorednsModule) Name() string { return "coredns" } -func RunCoreDNS(ctx context.Context, cancel context.CancelFunc, conf *confloader.KusciaConfig) Module { +func RunCoreDNSWithDestroy(conf *Dependencies) Module { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(1 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "coredns", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + return RunCoreDNS(runCtx, cancel, &conf.KusciaConfig, shutdownEntry) +} + +func RunCoreDNS(ctx context.Context, cancel context.CancelFunc, conf *confloader.KusciaConfig, shutdownEntry *shutdownHookEntry) Module { m := NewCoreDNS(conf) go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() @@ -163,12 +181,9 @@ func RunCoreDNS(ctx context.Context, cancel context.CancelFunc, conf *confloader }() if err := m.WaitReady(ctx); err != nil { - nlog.Error(err) - cancel() - } else { - nlog.Info("coredns is ready") + nlog.Fatalf("CoreDNS wait ready failed: %v", err) } - + nlog.Info("CoreDNS is ready") return m } diff --git a/cmd/kuscia/modules/datamesh.go b/cmd/kuscia/modules/datamesh.go index 05e7c2f5..ba18b016 100644 --- a/cmd/kuscia/modules/datamesh.go +++ b/cmd/kuscia/modules/datamesh.go @@ -71,7 +71,7 @@ func (m dataMeshModule) Run(ctx context.Context) error { func (m dataMeshModule) WaitReady(ctx context.Context) error { timeoutTicker := time.NewTicker(30 * time.Second) defer timeoutTicker.Stop() - checkTicker := time.NewTicker(1 * time.Second) + checkTicker := time.NewTicker(100 * time.Millisecond) defer checkTicker.Stop() for { select { @@ -141,7 +141,19 @@ func (m dataMeshModule) readyZ() bool { return true } -func RunDataMesh(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) Module { +func RunDataMeshWithDestroy(conf *Dependencies) { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(1 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "datamesh", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + RunDataMesh(runCtx, cancel, conf, shutdownEntry) +} + +func RunDataMesh(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) Module { m, err := NewDataMesh(conf) if err != nil { nlog.Error(err) @@ -149,16 +161,19 @@ func RunDataMesh(ctx context.Context, cancel context.CancelFunc, conf *Dependenc return m } go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() } }() if err := m.WaitReady(ctx); err != nil { - nlog.Error(err) - cancel() - } else { - nlog.Info("datamesh is ready") + nlog.Fatalf("DataMesh wait ready failed: %v", err) } + nlog.Info("DataMesh is ready") return m } diff --git a/cmd/kuscia/modules/datamesh_test.go b/cmd/kuscia/modules/datamesh_test.go index 79d4f3d3..d6c720db 100644 --- a/cmd/kuscia/modules/datamesh_test.go +++ b/cmd/kuscia/modules/datamesh_test.go @@ -22,5 +22,5 @@ import ( func Test_RunDataMesh(t *testing.T) { runCtx, cancel := context.WithCancel(context.Background()) dependency := mockDependency(t) - RunDataMesh(runCtx, cancel, dependency) + RunDataMesh(runCtx, cancel, dependency, nil) } diff --git a/cmd/kuscia/modules/domainroute.go b/cmd/kuscia/modules/domainroute.go index b2b86961..fd5727fb 100644 --- a/cmd/kuscia/modules/domainroute.go +++ b/cmd/kuscia/modules/domainroute.go @@ -128,20 +128,34 @@ func (d *domainRouteModule) Name() string { return "domainroute" } -func RunDomainRoute(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) Module { +func RunDomainRouteWithDestroy(conf *Dependencies) { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(2 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "domainroute", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + RunDomainRoute(runCtx, cancel, conf, shutdownEntry) +} + +func RunDomainRoute(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) Module { m := NewDomainRoute(conf) go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() } }() if err := m.WaitReady(ctx); err != nil { - nlog.Errorf("domain route wait ready failed with error: %v", err) - cancel() - } else { - nlog.Info("domainroute is ready") + nlog.Fatalf("DomainRoute wait ready failed: %v", err) } - + nlog.Info("DomainRoute is ready") return m } diff --git a/cmd/kuscia/modules/domainroute_test.go b/cmd/kuscia/modules/domainroute_test.go index 7f24083a..6d52d080 100644 --- a/cmd/kuscia/modules/domainroute_test.go +++ b/cmd/kuscia/modules/domainroute_test.go @@ -57,5 +57,5 @@ func Test_RunDomainRoute(t *testing.T) { }, } runCtx, cancel := context.WithCancel(context.Background()) - RunDomainRoute(runCtx, cancel, dependency) + RunDomainRoute(runCtx, cancel, dependency, nil) } diff --git a/cmd/kuscia/modules/envoy.go b/cmd/kuscia/modules/envoy.go index 7a3b54de..1e13f68a 100644 --- a/cmd/kuscia/modules/envoy.go +++ b/cmd/kuscia/modules/envoy.go @@ -123,7 +123,10 @@ func (s *envoyModule) Run(ctx context.Context) error { cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr cmd.Env = os.Environ() - return cmd + return &ModuleCMD{ + cmd: cmd, + score: &envoyOOMScore, + } }) } @@ -161,7 +164,7 @@ func (s *envoyModule) logRotate(ctx context.Context, filePath string) { func (s *envoyModule) WaitReady(ctx context.Context) error { ticker := time.NewTicker(60 * time.Second) defer ticker.Stop() - tickerReady := time.NewTicker(time.Second) + tickerReady := time.NewTicker(100 * time.Millisecond) defer tickerReady.Stop() for { select { @@ -192,19 +195,34 @@ func (s *envoyModule) readCommandArgs() (*EnvoyCommandLineConfig, error) { return &config, err } -func RunEnvoy(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) Module { +func RunEnvoyWithDestroy(conf *Dependencies) { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(1 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "envoy", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + RunEnvoy(runCtx, cancel, conf, shutdownEntry) +} + +func RunEnvoy(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) Module { m := NewEnvoy(conf) go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() } }() if err := m.WaitReady(ctx); err != nil { - nlog.Error(err) - cancel() - } else { - nlog.Info("Envoy is ready") + nlog.Fatalf("Envoy wait ready failed: %v", err) } + nlog.Info("Envoy is ready") return m } diff --git a/cmd/kuscia/modules/interconn.go b/cmd/kuscia/modules/interconn.go index 66cba119..25a28f33 100644 --- a/cmd/kuscia/modules/interconn.go +++ b/cmd/kuscia/modules/interconn.go @@ -16,6 +16,7 @@ package modules import ( "context" + "time" "github.com/secretflow/kuscia/pkg/interconn" "github.com/secretflow/kuscia/pkg/utils/nlog" @@ -25,7 +26,19 @@ func NewInterConn(ctx context.Context, deps *Dependencies) (Module, error) { return interconn.NewServer(ctx, deps.Clients) } -func RunInterConn(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) { +func RunInterConnWithDestroy(conf *Dependencies) { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(1 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "interconn", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + RunInterConn(runCtx, cancel, conf, shutdownEntry) +} + +func RunInterConn(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) { m, err := NewInterConn(ctx, conf) if err != nil { nlog.Error(err) @@ -34,15 +47,18 @@ func RunInterConn(ctx context.Context, cancel context.CancelFunc, conf *Dependen } go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() } }() if err := m.WaitReady(ctx); err != nil { - nlog.Error(err) - cancel() - } else { - nlog.Info("interconn is ready") + nlog.Fatalf("InterConn wait ready failed: %v", err) } + nlog.Info("InterConn is ready") } diff --git a/cmd/kuscia/modules/k3s.go b/cmd/kuscia/modules/k3s.go index b88809f9..f43e6a67 100644 --- a/cmd/kuscia/modules/k3s.go +++ b/cmd/kuscia/modules/k3s.go @@ -217,7 +217,10 @@ func (s *k3sModule) Run(ctx context.Context) error { envs := os.Environ() envs = append(envs, "CATTLE_NEW_SIGNED_CERT_EXPIRATION_DAYS=3650") cmd.Env = envs - return cmd + return &ModuleCMD{ + cmd: cmd, + score: &k3sOOMScore, + } }) } @@ -244,7 +247,19 @@ func (s *k3sModule) Name() string { return "k3s" } -func RunK3s(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) error { +func RunK3sWithDestroy(conf *Dependencies) error { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(1 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "k3s", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + return RunK3s(runCtx, cancel, conf, shutdownEntry) +} + +func RunK3s(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) error { // check DatastoreEndpoint if err := datastore.CheckDatastoreEndpoint(conf.Master.DatastoreEndpoint); err != nil { nlog.Error(err) @@ -254,6 +269,11 @@ func RunK3s(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) m := NewK3s(conf) go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() diff --git a/cmd/kuscia/modules/kusciaapi.go b/cmd/kuscia/modules/kusciaapi.go index be2c3ce6..08fdee36 100644 --- a/cmd/kuscia/modules/kusciaapi.go +++ b/cmd/kuscia/modules/kusciaapi.go @@ -68,20 +68,15 @@ func NewKusciaAPI(d *Dependencies) (Module, error) { kusciaAPIConfig.RunMode = d.RunMode kusciaAPIConfig.DomainCertValue = &d.DomainCertByMasterValue kusciaAPIConfig.DomainID = d.DomainID + kusciaAPIConfig.Protocol = d.Protocol - if d.Protocol == "" { - d.Protocol = common.MTLS - } - switch d.Protocol { - case common.NOTLS: + protocol := kusciaAPIConfig.Protocol + if protocol == "" { + kusciaAPIConfig.Protocol = common.MTLS + protocol = common.MTLS + } else if protocol == common.NOTLS { kusciaAPIConfig.TLS = nil kusciaAPIConfig.Token = nil - case common.TLS: - kusciaAPIConfig.TLS.RootCA = nil - } - - if kusciaAPIConfig.Protocol == "" { - kusciaAPIConfig.Protocol = d.Protocol } if kusciaAPIConfig.TLS != nil { @@ -121,7 +116,7 @@ func (m kusciaAPIModule) Run(ctx context.Context) error { func (m kusciaAPIModule) WaitReady(ctx context.Context) error { timeoutTicker := time.NewTicker(30 * time.Second) defer timeoutTicker.Stop() - checkTicker := time.NewTicker(1 * time.Second) + checkTicker := time.NewTicker(100 * time.Millisecond) defer checkTicker.Stop() for { select { @@ -244,7 +239,19 @@ func (m kusciaAPIModule) readyZ() bool { return res.Data.Ready } -func RunKusciaAPI(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) Module { +func RunKusciaAPIWithDestroy(conf *Dependencies) { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(1 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "kusciaapi", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + RunKusciaAPI(runCtx, cancel, conf, shutdownEntry) +} + +func RunKusciaAPI(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) Module { m, err := NewKusciaAPI(conf) if err != nil { nlog.Error(err) @@ -252,16 +259,19 @@ func RunKusciaAPI(ctx context.Context, cancel context.CancelFunc, conf *Dependen return m } go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() } }() if err := m.WaitReady(ctx); err != nil { - nlog.Error(err) - cancel() - } else { - nlog.Info("kuscia api is ready") + nlog.Fatalf("KusciaApi wait ready failed: %v", err) } + nlog.Info("KusciaApi is ready") return m } diff --git a/cmd/kuscia/modules/kusciaapi_test.go b/cmd/kuscia/modules/kusciaapi_test.go index 8b24e601..3d0a82bd 100644 --- a/cmd/kuscia/modules/kusciaapi_test.go +++ b/cmd/kuscia/modules/kusciaapi_test.go @@ -17,10 +17,47 @@ package modules import ( "context" "testing" + + "github.com/secretflow/kuscia/pkg/common" ) func Test_RunKusciaAPI(t *testing.T) { runCtx, cancel := context.WithCancel(context.Background()) dependency := mockDependency(t) - RunKusciaAPI(runCtx, cancel, dependency) + _ = RunKusciaAPI(runCtx, cancel, dependency, nil) + cancel() + runCtx.Done() +} + +func Test_RunKusciaAPIWithTLS(t *testing.T) { + runCtx, cancel := context.WithCancel(context.Background()) + dependency := mockDependency(t) + dependency.KusciaAPI.HTTPPort = 8010 + dependency.KusciaAPI.GRPCPort = 8011 + dependency.KusciaAPI.HTTPInternalPort = 8012 + dependency.Protocol = common.TLS + RunKusciaAPI(runCtx, cancel, dependency, nil) + cancel() +} + +func Test_RunKusciaAPIWithMTLS(t *testing.T) { + runCtx, cancel := context.WithCancel(context.Background()) + dependency := mockDependency(t) + dependency.KusciaAPI.HTTPPort = 8020 + dependency.KusciaAPI.GRPCPort = 8021 + dependency.KusciaAPI.HTTPInternalPort = 8022 + dependency.Protocol = common.MTLS + RunKusciaAPI(runCtx, cancel, dependency, nil) + cancel() +} + +func Test_RunKusciaAPIWithNOTLS(t *testing.T) { + runCtx, cancel := context.WithCancel(context.Background()) + dependency := mockDependency(t) + dependency.KusciaAPI.HTTPPort = 8030 + dependency.KusciaAPI.GRPCPort = 8031 + dependency.KusciaAPI.HTTPInternalPort = 8032 + dependency.Protocol = common.NOTLS + RunKusciaAPI(runCtx, cancel, dependency, nil) + cancel() } diff --git a/cmd/kuscia/modules/metricexporter.go b/cmd/kuscia/modules/metricexporter.go index 78db6f4e..a6bd42fc 100644 --- a/cmd/kuscia/modules/metricexporter.go +++ b/cmd/kuscia/modules/metricexporter.go @@ -101,19 +101,34 @@ func (exporter *metricExporterModule) Name() string { return "metricexporter" } -func RunMetricExporter(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) Module { +func RunMetricExporterWithDestroy(conf *Dependencies) { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(1 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "metricexporter", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + RunMetricExporter(runCtx, cancel, conf, shutdownEntry) +} + +func RunMetricExporter(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) Module { m := NewMetricExporter(conf) go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() } }() if err := m.WaitReady(ctx); err != nil { - nlog.Error(err) - cancel() - } else { - nlog.Info("Metric exporter is ready") + nlog.Fatalf("MetricTransport wait ready failed: %v", err) } + nlog.Info("Metric exporter is ready") return m } diff --git a/cmd/kuscia/modules/modules.go b/cmd/kuscia/modules/modules.go index 59030a6d..78a3b674 100644 --- a/cmd/kuscia/modules/modules.go +++ b/cmd/kuscia/modules/modules.go @@ -21,8 +21,11 @@ import ( "crypto/x509" "encoding/base64" "os" + "os/exec" "path/filepath" + "sync" "sync/atomic" + "time" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" @@ -37,6 +40,7 @@ import ( "github.com/secretflow/kuscia/pkg/utils/kubeconfig" "github.com/secretflow/kuscia/pkg/utils/nlog" "github.com/secretflow/kuscia/pkg/utils/nlog/zlogwriter" + "github.com/secretflow/kuscia/pkg/utils/process" tlsutils "github.com/secretflow/kuscia/pkg/utils/tls" "github.com/secretflow/kuscia/pkg/web/logs" ) @@ -69,6 +73,106 @@ type Dependencies struct { DomainCertByMasterValue atomic.Value // the value is <*x509.Certificate> LogConfig *nlog.LogConfig Logrorate confloader.LogrotateConfig + destroyFuncs []DestroyFunc + stopCh chan struct{} + stopped int32 // 0: not stopped 1: stopped + lock sync.Mutex +} + +type DestroyFunc struct { + Name string + DestroyFn func() + DestroyCh <-chan struct{} + ShutdownHookEntry *shutdownHookEntry +} + +func (d *Dependencies) Destroy() { + d.lock.Lock() + defer d.lock.Unlock() + nlog.Infof("Begin to destroy modules......") + size := len(d.destroyFuncs) + if size > 0 { + for i := size - 1; i >= 0; i-- { + destroy := d.destroyFuncs[i] + nlog.Infof("Module [%s] begin to destroy", destroy.Name) + destroy.DestroyFn() + shutdownEntry := destroy.ShutdownHookEntry + if shutdownEntry != nil { + nlog.Infof("module[%s] shutdown entry found", destroy.Name) + shutdownEntry.WaitDown() + } + } + } + nlog.Infof("End to destroy modules......") +} + +func (d *Dependencies) RegisterDestroyFunc(destroyFunc DestroyFunc) { + d.lock.Lock() + defer d.lock.Unlock() + nlog.Infof("Add destroyFunc [%s]", destroyFunc.Name) + d.destroyFuncs = append(d.destroyFuncs, destroyFunc) +} + +func (d *Dependencies) Stop() <-chan struct{} { + d.lock.Lock() + defer d.lock.Unlock() + for _, destroy := range d.destroyFuncs { + go func(f DestroyFunc) { + <-f.DestroyCh + nlog.Errorf("Module[%s] receive stop", f.Name) + if atomic.CompareAndSwapInt32(&d.stopped, 0, 1) { + close(d.stopCh) + } + }(destroy) + } + return d.stopCh +} + +func (d *Dependencies) WaitAllModulesDone(stop <-chan struct{}) { + select { + // receive signal exit + case <-stop: + nlog.Infof("receive signal exit....") + // receive module exit + case <-d.Stop(): + nlog.Infof("receive module exit....") + } + d.Destroy() +} + +type shutdownHookEntry struct { + lock sync.Mutex + closed bool + closeCh chan struct{} + timeOut time.Duration +} + +func NewShutdownHookEntry(timeout time.Duration) *shutdownHookEntry { + return &shutdownHookEntry{ + timeOut: timeout, + closeCh: make(chan struct{}), + } +} + +func (s *shutdownHookEntry) RunShutdown() { + s.lock.Lock() + defer s.lock.Unlock() + if !s.closed { + close(s.closeCh) + s.closed = true + } +} + +func (s *shutdownHookEntry) WaitDown() { + timeout := s.timeOut + timeoutTicker := time.NewTicker(timeout) + defer timeoutTicker.Stop() + select { + case <-timeoutTicker.C: + nlog.Warnf("exceed timeout: %s", timeout.String()) + case <-s.closeCh: + nlog.Infof("shut down with closeCh close") + } } func (d *Dependencies) MakeClients() { @@ -204,6 +308,7 @@ func InitDependencies(ctx context.Context, kusciaConf confloader.KusciaConfig) * dependencies := &Dependencies{ KusciaConfig: kusciaConf, } + dependencies.stopCh = make(chan struct{}) // init log logConfig := &nlog.LogConfig{ LogLevel: kusciaConf.LogLevel, @@ -285,3 +390,40 @@ func InitDependencies(ctx context.Context, kusciaConf confloader.KusciaConfig) * dependencies.MetricExportPort = "9091" return dependencies } + +type ModuleCMD struct { + cmd *exec.Cmd + score *int +} + +func (c *ModuleCMD) Start() error { + return c.cmd.Start() +} + +func (c *ModuleCMD) Wait() error { + return c.cmd.Wait() +} + +func (c *ModuleCMD) Pid() int { + if c.cmd != nil && c.cmd.Process != nil { + return c.cmd.Process.Pid + } + return 0 +} + +func (c *ModuleCMD) SetOOMScore() error { + if c.score == nil { + return nil + } + return process.SetOOMScore(c.Pid(), *c.score) +} + +func SetKusciaOOMScore() { + if err := process.SetOOMScore(1, initProcessOOMScore); err != nil { + nlog.Warnf("Set init process oom_score_adj failed, %v, skip setting it", err) + } + + if err := process.SetOOMScore(os.Getpid(), kusciaOOMScore); err != nil { + nlog.Warnf("Set kuscia controllers process oom_score_adj failed, %v, skip setting it", err) + } +} diff --git a/cmd/kuscia/modules/nodeexporter.go b/cmd/kuscia/modules/nodeexporter.go index 1db5ca31..16ddd6e0 100644 --- a/cmd/kuscia/modules/nodeexporter.go +++ b/cmd/kuscia/modules/nodeexporter.go @@ -54,7 +54,10 @@ func (exporter *nodeExporterModule) Run(ctx context.Context) error { cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr cmd.Env = os.Environ() - return cmd + return &ModuleCMD{ + cmd: cmd, + score: &nodeExporterOOMScore, + } }) } @@ -107,17 +110,34 @@ func (exporter *nodeExporterModule) Name() string { return "nodeexporter" } -func RunNodeExporter(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) Module { +func RunNodeExporterWithDestroy(conf *Dependencies) { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(1 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "nodeexporter", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + RunNodeExporter(runCtx, cancel, conf, shutdownEntry) +} + +func RunNodeExporter(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) Module { m := NewNodeExporter(conf) go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() } }() if err := m.WaitReady(ctx); err != nil { - nlog.Error(err) cancel() + nlog.Fatalf("NodeExporter wait ready failed: %v", err) } else { nlog.Info("Node_exporter is ready") } diff --git a/cmd/kuscia/modules/scheduler.go b/cmd/kuscia/modules/scheduler.go index e6268c94..a090a27b 100644 --- a/cmd/kuscia/modules/scheduler.go +++ b/cmd/kuscia/modules/scheduler.go @@ -110,20 +110,34 @@ func (s *schedulerModule) Name() string { return "kusciascheduler" } -func RunScheduler(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) Module { +func RunSchedulerWithDestroy(conf *Dependencies) { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(1 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "kusciascheduler", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + RunScheduler(runCtx, cancel, conf, shutdownEntry) +} + +func RunScheduler(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) Module { m := NewScheduler(conf) go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() } }() if err := m.WaitReady(ctx); err != nil { - nlog.Error(err) - cancel() - } else { - nlog.Info("scheduler is ready") + nlog.Fatalf("Scheduler wait ready failed: %v", err) } - + nlog.Info("scheduler is ready") return m } diff --git a/cmd/kuscia/modules/ssexporter.go b/cmd/kuscia/modules/ssexporter.go index 999d4ccb..5e8abbfb 100644 --- a/cmd/kuscia/modules/ssexporter.go +++ b/cmd/kuscia/modules/ssexporter.go @@ -97,19 +97,34 @@ func (exporter *ssExporterModule) Name() string { return "ssexporter" } -func RunSsExporter(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) Module { +func RunSsExporterWithDestroy(conf *Dependencies) { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(500 * time.Millisecond) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "ssexporter", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + ShutdownHookEntry: shutdownEntry, + }) + RunSsExporter(runCtx, cancel, conf, shutdownEntry) +} + +func RunSsExporter(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) Module { m := NewSsExporter(conf) go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() } }() if err := m.WaitReady(ctx); err != nil { - nlog.Error(err) - cancel() - } else { - nlog.Info("Ss exporter is ready") + nlog.Fatalf("SsTransport wait ready failed: %v", err) } + nlog.Info("Ss exporter is ready") return m } diff --git a/cmd/kuscia/modules/transport.go b/cmd/kuscia/modules/transport.go index add60670..c6c8bded 100644 --- a/cmd/kuscia/modules/transport.go +++ b/cmd/kuscia/modules/transport.go @@ -73,7 +73,10 @@ func (t *transportModule) runAsSubProcess(ctx context.Context) error { return sp.Run(ctx, func(ctx context.Context) supervisor.Cmd { cmd := exec.CommandContext(ctx, filepath.Join(t.rootDir, transportBinPath), args...) cmd.Env = os.Environ() - return cmd + return &ModuleCMD{ + cmd: cmd, + score: &transportOOMScore, + } }) } @@ -111,20 +114,34 @@ func (t *transportModule) readyz(address string) error { return nil } -func RunTransport(ctx context.Context, cancel context.CancelFunc, conf *Dependencies) Module { +func RunTransportWithDestroy(conf *Dependencies) { + runCtx, cancel := context.WithCancel(context.Background()) + shutdownEntry := NewShutdownHookEntry(1 * time.Second) + conf.RegisterDestroyFunc(DestroyFunc{ + Name: "transport", + DestroyCh: runCtx.Done(), + DestroyFn: cancel, + }) + RunTransport(runCtx, cancel, conf, shutdownEntry) +} + +func RunTransport(ctx context.Context, cancel context.CancelFunc, conf *Dependencies, shutdownEntry *shutdownHookEntry) Module { m := NewTransport(conf) go func() { + defer func() { + if shutdownEntry != nil { + shutdownEntry.RunShutdown() + } + }() if err := m.Run(ctx); err != nil { nlog.Error(err) cancel() } }() if err := m.WaitReady(ctx); err != nil { - nlog.Error(err) - cancel() - } else { - nlog.Info("transport is ready") + nlog.Fatalf("Transport wait ready failed: %v", err) } + nlog.Info("transport is ready") return m } diff --git a/docs/deployment/Docker_deployment_kuscia/deploy_master_lite_cn.md b/docs/deployment/Docker_deployment_kuscia/deploy_master_lite_cn.md index ea93dc68..b05125ff 100644 --- a/docs/deployment/Docker_deployment_kuscia/deploy_master_lite_cn.md +++ b/docs/deployment/Docker_deployment_kuscia/deploy_master_lite_cn.md @@ -2,11 +2,11 @@ ## 前言 -本教程帮助你在多台机器上使用 [中心化组网模式](../reference/architecture_cn.md#中心化组网模式) 来部署 Kuscia 集群。 +本教程帮助你在多台机器上使用 [中心化组网模式](../../reference/architecture_cn.md#中心化组网模式) 来部署 Kuscia 集群。 ## 前置准备 -在部署 Kuscia 之前,请确保环境准备齐全,包括所有必要的软件、资源、操作系统版本和网络环境等满足要求,以确保部署过程顺畅进行,详情参考[部署要求](../deployment/deploy_check.md) +在部署 Kuscia 之前,请确保环境准备齐全,包括所有必要的软件、资源、操作系统版本和网络环境等满足要求,以确保部署过程顺畅进行,详情参考[部署要求](../deploy_check.md) ## 部署流程(基于TOKEN认证) @@ -35,13 +35,15 @@ docker run -it --rm ${KUSCIA_IMAGE} kuscia init --mode master --domain "antgroup ```bash # -p 参数传递的是 master 容器映射到主机的端口,保证和主机上现有的端口不冲突即可 # -k 参数传递的是 master 容器 KusciaAPI 映射到主机的 HTTP 端口,保证和主机上现有的端口不冲突即可 +# -a 指定自动导入的引擎镜像,-a none: 不自动导入引擎镜像,-a secretflow(默认): 自动导入 secretflow 引擎镜像 +# -m 或者 --memory-limit 参数给节点容器设置适当的内存限制。例如,'-m 4GiB 或 --memory-limit=4GiB' 表示限制最大内存 4GiB,'-m -1 或 --memory-limit=-1'表示没有限制,不设置默认 master 为 2GiB,lite 节点 4GiB,autonomy 节点 6GiB。 ./kuscia.sh start -c kuscia_master.yaml -p 18080 -k 18081 ``` - -注意:
-1、如果 master 的入口网络存在网关时,为了确保节点与 master 之间通信正常,需要网关符合一些要求,详情请参考[这里](./networkrequirements.md)
-2、master 节点默认使用 sqlite 作为存储,如果生产部署,需要配置链接到 mysql 数据库的连接串,具体配置可以参考[这里](./kuscia_config_cn.md#id3)
-3、需要对合作方暴露的 Kuscia 端口,可参考 [Kuscia 端口介绍](../kuscia_ports_cn.md)
+> 注意事项:
+> - 目前 kuscia.sh 脚本仅支持导入 Secretflow 镜像,scql、serving 以及其他自定义镜像请移步至[注册自定义算法镜像](../../development/register_custom_image.md)
+> - 如果 master 的入口网络存在网关时,为了确保节点与 master 之间通信正常,需要网关符合一些要求,详情请参考[这里](../networkrequirements.md)
+> - master 节点默认使用 sqlite 作为存储,如果生产部署,需要配置链接到 mysql 数据库的连接串,具体配置可以参考[这里](../kuscia_config_cn.md#id3)
+> - 需要对合作方暴露的 Kuscia 端口,可参考 [Kuscia 端口介绍](../kuscia_ports_cn.md) 建议使用 curl -kvvv https://ip:port; 检查一下是否访问能通,正常情况下返回的 HTTP 错误码是 401,内容是:unauthorized。 示例如下: diff --git a/docs/deployment/Docker_deployment_kuscia/deploy_p2p_cn.md b/docs/deployment/Docker_deployment_kuscia/deploy_p2p_cn.md index 41272620..86241b9c 100644 --- a/docs/deployment/Docker_deployment_kuscia/deploy_p2p_cn.md +++ b/docs/deployment/Docker_deployment_kuscia/deploy_p2p_cn.md @@ -2,13 +2,13 @@ ## 前言 -本教程帮助你在多台机器上使用 [点对点组网模式](../reference/architecture_cn.md#点对点组网模式) 来部署 Kuscia 集群。 +本教程帮助你在多台机器上使用 [点对点组网模式](../../reference/architecture_cn.md#点对点组网模式) 来部署 Kuscia 集群。 当前 Kuscia 节点之间只支持 Token 的身份认证方式,在跨机器部署的场景下流程较为繁琐,后续本教程会持续更新优化。 ## 前置准备 -在部署 Kuscia 之前,请确保环境准备齐全,包括所有必要的软件、资源、操作系统版本和网络环境等满足要求,以确保部署过程顺畅进行,详情参考[部署要求](../deployment/deploy_check.md) +在部署 Kuscia 之前,请确保环境准备齐全,包括所有必要的软件、资源、操作系统版本和网络环境等满足要求,以确保部署过程顺畅进行,详情参考[部署要求](../deploy_check.md) ## 部署流程(基于 TOKEN 认证) @@ -38,14 +38,17 @@ docker run -it --rm ${KUSCIA_IMAGE} kuscia init --mode autonomy --domain "alice" ```bash # -p 参数传递的是节点容器映射到主机的 HTTPS 端口,保证和主机上现有的端口不冲突即可 # -k 参数传递的是节点容器 KusciaAPI 映射到主机的 MTLS 端口,保证和主机上现有的端口不冲突即可 +# -a 指定自动导入的引擎镜像,-a none: 不自动导入引擎镜像,-a secretflow(默认): 自动导入 secretflow 引擎镜像 +# -m 或者 --memory-limit 参数给节点容器设置适当的内存限制。例如,'-m 4GiB 或 --memory-limit=4GiB' 表示限制最大内存 4GiB,'-m -1 或 --memory-limit=-1'表示没有限制,不设置默认 master 为 2GiB,lite 节点 4GiB,autonomy 节点 6GiB。 ./kuscia.sh start -c autonomy_alice.yaml -p 11080 -k 11081 ``` -> 如果多个 lite 节点部署在同一个物理机上,可以用 -p -k -g -q 参数指定下端口号(例如:./kuscia.sh start -c autonomy_alice.yaml -p 11080 -k 11081 -g 11082 -q 11083),防止出现端口冲突。 +> 注意事项:
+> - 如果多个 lite 节点部署在同一个物理机上,可以用 -p -k -g -q 参数指定下端口号(例如:./kuscia.sh start -c autonomy_alice.yaml -p 11080 -k 11081 -g 11082 -q 11083),防止出现端口冲突。
+> - 目前 kuscia.sh 脚本仅支持导入 Secretflow 镜像,scql、serving 以及其他自定义镜像请移步至[注册自定义算法镜像](../../development/register_custom_image.md)
+> - 如果节点之间的入口网络存在网关时,为了确保节点与 master 之间通信正常,需要网关符合一些要求,详情请参考[这里](../networkrequirements.md)
+> - alice、bob 节点默认使用 sqlite 作为存储,如果生产部署,需要配置链接到 mysql 数据库的连接串,具体配置可以参考[这里](../kuscia_config_cn.md#id3)
+> - 需要对合作方暴露的 Kuscia 端口,可参考 [Kuscia 端口介绍](../kuscia_ports_cn.md) -注意:
-1、如果节点之间的入口网络存在网关时,为了确保节点与 master 之间通信正常,需要网关符合一些要求,详情请参考[这里](./networkrequirements.md)
-2、alice、bob 节点默认使用 sqlite 作为存储,如果生产部署,需要配置链接到 mysql 数据库的连接串,具体配置可以参考[这里](./kuscia_config_cn.md#id3)
-3、需要对合作方暴露的 Kuscia 端口,可参考 [Kuscia 端口介绍](../kuscia_ports_cn.md)
### 部署 bob 节点 @@ -97,7 +100,7 @@ docker cp bob.domain.crt ${USER}-kuscia-autonomy-alice:/home/kuscia/var/certs/ 在 alice 里添加 bob 的证书等信息: ```bash -# [alice 机器] 添加 alice 的证书等信息 +# [alice 机器] 添加 bob 的证书等信息 docker exec -it ${USER}-kuscia-autonomy-alice scripts/deploy/add_domain.sh bob p2p ``` diff --git a/docs/deployment/K8s_deployment_kuscia/K8s_master_lite_cn.md b/docs/deployment/K8s_deployment_kuscia/K8s_master_lite_cn.md index 51fa35e6..c0d0dc35 100644 --- a/docs/deployment/K8s_deployment_kuscia/K8s_master_lite_cn.md +++ b/docs/deployment/K8s_deployment_kuscia/K8s_master_lite_cn.md @@ -39,11 +39,9 @@ ConfigMap 是用来配置 Kuscia 的配置文件,详细的配置文件介绍 domainID、私钥以及 datastoreEndpoint 字段里的数据库连接串(user、password、host、database)需要替换成真实有效的信息,私钥可以通过命令 `docker run -it --rm secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/kuscia scripts/deploy/generate_rsa_key.sh` 生成 > 注意:
-1、database 名称暂不支持 "-" 特殊字符
-2、目前节点私钥仅支持 pkcs#1 格式: "BEGIN RSA PRIVATE KEY/END RSA PRIVATE KEY"
-3、修改 Configmap 配置后,需执行 kubectl delete po pod-name -n namespace 重新拉起 Pod 生效 - -注意:节点 ID 需要符合 DNS 子域名规则要求,详情请参考[这里](https://kubernetes.io/zh-cn/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names) +> - database 名称暂不支持 "-" 特殊字符
+> - 修改 Configmap 配置后,需执行 kubectl delete po {pod-name} -n {namespace} 重新拉起 Pod 生效
+> - 节点 ID 需要符合 DNS 子域名规则要求,详情请参考[这里](https://kubernetes.io/zh-cn/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names) 获取 [configmap.yaml](https://github.com/secretflow/kuscia/blob/main/hack/k8s/master/configmap.yaml) 文件,创建 Configmap;因为这里面涉及很多敏感配置,请在生产时务必重新配置,不使用默认配置。 @@ -87,12 +85,11 @@ ConfigMap 是用来配置 Kuscia 的配置文件,详细的配置文件介绍 部署 Configmap 需要提前在 Master 节点 Pod 内生成 domainID 以及 Token,并填写到 Configmap 的 domainID 和 liteDeployToken 字段中,私钥可以通过命令 `docker run -it --rm secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/kuscia scripts/deploy/generate_rsa_key.sh` 生成并填写到 domainKeyData 字段中 > 注意:
-1、目前节点私钥仅支持 pkcs#1 格式: "BEGIN RSA PRIVATE KEY/END RSA PRIVATE KEY"
-2、修改 Configmap 配置后,需执行 kubectl delete po pod-name -n namespace 重新拉起 Pod 生效 - -注意:节点 ID 需要符合 DNS 子域名规则要求,详情请参考[这里](https://kubernetes.io/zh-cn/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names) +> - 目前节点私钥仅支持 pkcs#1 格式: "BEGIN RSA PRIVATE KEY/END RSA PRIVATE KEY"
+> - 修改 Configmap 配置后,需执行 kubectl delete po pod-name -n namespace 重新拉起 Pod 生效
+> - 节点 ID 需要符合 DNS 子域名规则要求,详情请参考[这里](https://kubernetes.io/zh-cn/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names) -lite-bob 配置与 lite-alice 一样,下面以 alice 为例: +lite-bob 配置与 lite-alice 一样,下面以 Alice 为例: ```bash kubectl exec -it ${master_pod_name} bash -n kuscia-master # [pod 内部] 获取节点 Token @@ -159,18 +156,18 @@ kubectl create -f deployement.yaml > PS:目前因为安全性和时间因素,节点之间授权还是需要很多手动的操作,未来会优化。 ```bash -# 登录 master +# 登录 Master kubectl exec -it ${master_pod_name} bash -n kuscia-master -# [pod 内部] 创建 alice 到 bob 的授权 +# [pod 内部] 创建 Alice 到 Bob 的授权 scripts/deploy/create_cluster_domain_route.sh alice bob http://kuscia-lite-bob.lite-bob.svc.cluster.local:1080 -# [pod 内部] 创建 bob 到 alice 的授权 +# [pod 内部] 创建 Bob 到 Alice 的授权 scripts/deploy/create_cluster_domain_route.sh bob alice http://kuscia-lite-alice.lite-alice.svc.cluster.local:1080 -# [pod 内部] 执行以下命令,查看是否有内容,如果有说明 alice 到 bob 授权建立成功。 +# [pod 内部] 执行以下命令,查看是否有内容,如果有说明 Alice 到 Bob 授权建立成功。 kubectl get cdr alice-bob -o=jsonpath="{.status.tokenStatus.sourceTokens[*]}" -# [pod 内部] 执行以下命令,查看是否有内容,如果有说明 bob 到 alice 授权建立成功 +# [pod 内部] 执行以下命令,查看是否有内容,如果有说明 Bob 到 Alice 授权建立成功 kubectl get cdr bob-alice -o=jsonpath="{.status.tokenStatus.sourceTokens[*]}" ``` -`pod 内部` 在执行 master Pod 内执行 `kubectl get cdr` 返回 Ready 为 True 时,表示授权成功,示例如下: +`pod 内部` 在执行 Master Pod 内执行 `kubectl get cdr` 返回 Ready 为 True 时,表示授权成功,示例如下: ```bash NAME SOURCE DESTINATION HOST AUTHENTICATION READY alice-kuscia-system alice kuscia-system Token True @@ -181,71 +178,771 @@ bob-alice bob alice kuscia-lite-alice.lite-alice.svc. 授权失败,请参考[授权错误排查](../../reference/troubleshoot/networkauthorizationcheck.md)文档 ## 确认部署成功 -### 检查 pod 状态 +### 检查 Pod 状态 pod 处于 running 状态表示部署成功 ```bash kuebctl get po -n kuscia-master kubectl get po -n lite-alice ``` ### 检查数据库连接状态 -数据库内生成表格 kine 并且有数据表示数据库连接成功 +数据库内生成表格 Kine 并且有数据表示数据库连接成功 ## 运行任务 -### 准备测试数据 +> RunK 模式不支持使用本地数据训练,请准备[OSS数据](K8s_master_lite_cn.md#准备-oss-测试数据)。使用本地数据请先切换至 RunP 模式,详情请参考 [使用 RunP 运行时部署节点](./deploy_with_runp_cn.md)。 -- 登录 master pod +### 准备本地测试数据 +#### Alice 节点准备本地测试数据 + +登录到 Alice 节点的 Pod 中 ```bash -kubectl exec -it ${master_pod_name} bash -n kuscia-master +kubectl exec -it ${alice_pod_name} bash -n lite-alice ``` -为 alice 节点准备测试数据 +为 Alice 节点创建本地数据源 + +创建 DomainData 的时候要指定 datasource_id,所以要先创建数据源,再创建 DomainData,示例如下: +```bash +# 在容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -k -X POST 'https://localhost:8082/api/v1/domaindatasource/create' \ + --header 'Content-Type: application/json' \ + --cacert ${CTR_CERTS_ROOT}/ca.crt \ + -d '{ + "domain_id": "alice", + "datasource_id":"default-data-source", + "type":"localfs", + "name": "DemoDataSource", + "info": { + "localfs": { + "path": "/home/kuscia/var/storage/data" + } + }, + "access_directly": true +}' +``` -`pod 内部`为 alice 的测试数据创建 domaindata +为 Alice 的测试数据创建 DomainData ```bash -scripts/deploy/create_domaindata_alice_table.sh alice +# 在 alice 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -X POST 'http://127.0.0.1:8082/api/v1/domaindata/create' \ +--header 'Content-Type: application/json' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +-d '{ + "domaindata_id": "alice-table", + "name": "alice.csv", + "type": "table", + "relative_uri": "alice.csv", + "domain_id": "alice", + "datasource_id": "default-data-source", + "attributes": { + "description": "alice demo data" + }, + "columns": [ + { + "comment": "", + "name": "id1", + "type": "str" + }, + { + "comment": "", + "name": "age", + "type": "float" + }, + { + "comment": "", + "name": "education", + "type": "float" + }, + { + "comment": "", + "name": "default", + "type": "float" + }, + { + "comment": "", + "name": "balance", + "type": "float" + }, + { + "comment": "", + "name": "housing", + "type": "float" + }, + { + "comment": "", + "name": "loan", + "type": "float" + }, + { + "comment": "", + "name": "day", + "type": "float" + }, + { + "comment": "", + "name": "duration", + "type": "float" + }, + { + "comment": "", + "name": "campaign", + "type": "float" + }, + { + "comment": "", + "name": "pdays", + "type": "float" + }, + { + "comment": "", + "name": "previous", + "type": "float" + }, + { + "comment": "", + "name": "job_blue-collar", + "type": "float" + }, + { + "comment": "", + "name": "job_entrepreneur", + "type": "float" + }, + { + "comment": "", + "name": "job_housemaid", + "type": "float" + }, + { + "comment": "", + "name": "job_management", + "type": "float" + }, + { + "comment": "", + "name": "job_retired", + "type": "float" + }, + { + "comment": "", + "name": "job_self-employed", + "type": "float" + }, + { + "comment": "", + "name": "job_services", + "type": "float" + }, + { + "comment": "", + "name": "job_student", + "type": "float" + }, + { + "comment": "", + "name": "job_technician", + "type": "float" + }, + { + "comment": "", + "name": "job_unemployed", + "type": "float" + }, + { + "comment": "", + "name": "marital_divorced", + "type": "float" + }, + { + "comment": "", + "name": "marital_married", + "type": "float" + }, + { + "comment": "", + "name": "marital_single", + "type": "float" + } + ], + "vendor": "manual", + "author": "alice" +}' ``` -`pod 内部`为 alice 的测试数据创建 domaindatagrant +将 Alice 的 DomainData 授权给 Bob ```bash +# 在 alice 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs curl -X POST 'http://127.0.0.1:8082/api/v1/domaindatagrant/create' \ - --cert /home/kuscia/var/certs/kusciaapi-server.crt \ - --key /home/kuscia/var/certs/kusciaapi-server.key \ - --cacert /home/kuscia/var/certs/ca.crt \ - --header "Token: $(cat /home/kuscia/var/certs/token)" \ - --header 'Content-Type: application/json' \ - -d '{ "grant_domain": "bob", - "description": {"domaindatagrant":"alice-bob"}, - "domain_id": "alice", - "domaindata_id": "alice-table" - }' +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +--header 'Content-Type: application/json' \ +-d '{ "grant_domain": "bob", + "description": {"domaindatagrant":"alice-bob"}, + "domain_id": "alice", + "domaindata_id": "alice-table" +}' ``` -为 bob 节点准备测试数据 +#### Bob 节点准备本地测试数据 -`pod 内部`为 bob 的测试数据创建 domaindata +登录到 Bob 节点的 Pod 中 ```bash -scripts/deploy/create_domaindata_bob_table.sh bob +kubectl exec -it ${bob_pod_name} bash -n lite-bob ``` -`pod 内部`为 bob 的测试数据创建 domaindatagrant +为 Bob 节点创建本地数据源 + +创建 DomainData 的时候要指定 datasource_id,所以要先创建数据源,再创建 DomainData,示例如下: +```bash +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -k -X POST 'https://localhost:8082/api/v1/domaindatasource/create' \ + --header 'Content-Type: application/json' \ + --cacert ${CTR_CERTS_ROOT}/ca.crt \ + -d '{ + "domain_id": "bob", + "datasource_id":"default-data-source", + "type":"localfs", + "name": "DemoDataSource", + "info": { + "localfs": { + "path": "/home/kuscia/var/storage/data" + } + }, + "access_directly": true +}' +``` + +为 Bob 的测试数据创建 domaindata ```bash +# 在 bob 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -X POST 'http://127.0.0.1:8082/api/v1/domaindata/create' \ +--header 'Content-Type: application/json' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +-d '{ + "domaindata_id": "bob-table", + "name": "bob.csv", + "type": "table", + "relative_uri": "bob.csv", + "domain_id": "bob", + "datasource_id": "default-data-source", + "attributes": { + "description": "bob demo data" + }, + "columns": [ + { + "comment": "", + "name": "id2", + "type": "str" + }, + { + "comment": "", + "name": "contact_cellular", + "type": "float" + }, + { + "comment": "", + "name": "contact_telephone", + "type": "float" + }, + { + "comment": "", + "name": "contact_unknown", + "type": "float" + }, + { + "comment": "", + "name": "month_apr", + "type": "float" + }, + { + "comment": "", + "name": "month_aug", + "type": "float" + }, + { + "comment": "", + "name": "month_dec", + "type": "float" + }, + { + "comment": "", + "name": "month_feb", + "type": "float" + }, + { + "comment": "", + "name": "month_jan", + "type": "float" + }, + { + "comment": "", + "name": "month_jul", + "type": "float" + }, + { + "comment": "", + "name": "month_jun", + "type": "float" + }, + { + "comment": "", + "name": "month_mar", + "type": "float" + }, + { + "comment": "", + "name": "month_may", + "type": "float" + }, + { + "comment": "", + "name": "month_nov", + "type": "float" + }, + { + "comment": "", + "name": "month_oct", + "type": "float" + }, + { + "comment": "", + "name": "month_sep", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_failure", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_other", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_success", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_unknown", + "type": "float" + }, + { + "comment": "", + "name": "y", + "type": "int" + } + ], + "vendor": "manual", + "author": "bob" +}' +``` +将 Bob 的 DomainData 授权给 Alice + +```bash +# 在 bob 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs curl -X POST 'http://127.0.0.1:8082/api/v1/domaindatagrant/create' \ - --cert /home/kuscia/var/certs/kusciaapi-server.crt \ - --key /home/kuscia/var/certs/kusciaapi-server.key \ - --cacert /home/kuscia/var/certs/ca.crt \ - --header "Token: $(cat /home/kuscia/var/certs/token)" \ - --header 'Content-Type: application/json' \ - -d '{ "grant_domain": "alice", - "description": {"domaindatagrant":"bob-alice"}, - "domain_id": "bob", - "domaindata_id": "bob-table" - }' +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +--header 'Content-Type: application/json' \ +-d '{ "grant_domain": "alice", + "description": {"domaindatagrant":"bob-alice"}, + "domain_id": "bob", + "domaindata_id": "bob-table" +}' +``` + +### 准备 OSS 测试数据 +#### Alice 节点准备 OSS 数据 + +请先将 Alice 节点测试数据 [alice.csv](https://github.com/secretflow/kuscia/blob/main/testdata/alice.csv) 上传至 OSS + +登录到 Alice 节点的 Pod 中 +```bash +kubectl exec -it ${alice_pod_name} bash -n lite-alice +``` + +为 Alice 节点创建 OSS 数据源 + +创建 DomainData 的时候要指定 datasource_id,所以要先创建数据源,再创建 DomainData,示例如下: +```bash +# 在 alice 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -k -X POST 'http://localhost:8082/api/v1/domaindatasource/create' \ +--header 'Content-Type: application/json' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +-d '{ + "domain_id": "alice", + "datasource_id":"default-data-source", + "type":"oss", + "name": "DemoDataSource", + "info": { + "oss": { + "endpoint": "https://oss.xxx.cn-xxx.com", + "bucket": "secretflow", + "prefix": "kuscia/", + "access_key_id":"ak-xxxx", + "access_key_secret" :"sk-xxxx" +# "virtualhost": true (阿里云 OSS 需要配置此项) +# "storage_type": "minio" (Minio 需要配置此项) + } + }, + "access_directly": true +}' +``` + +为 Alice 的测试数据创建 DomainData +```bash +# 在 alice 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -X POST 'http://127.0.0.1:8082/api/v1/domaindata/create' \ +--header 'Content-Type: application/json' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +-d '{ + "domaindata_id": "alice-table", + "name": "alice.csv", + "type": "table", + "relative_uri": "alice.csv", + "domain_id": "alice", + "datasource_id": "default-data-source", + "attributes": { + "description": "alice demo data" + }, + "columns": [ + { + "comment": "", + "name": "id1", + "type": "str" + }, + { + "comment": "", + "name": "age", + "type": "float" + }, + { + "comment": "", + "name": "education", + "type": "float" + }, + { + "comment": "", + "name": "default", + "type": "float" + }, + { + "comment": "", + "name": "balance", + "type": "float" + }, + { + "comment": "", + "name": "housing", + "type": "float" + }, + { + "comment": "", + "name": "loan", + "type": "float" + }, + { + "comment": "", + "name": "day", + "type": "float" + }, + { + "comment": "", + "name": "duration", + "type": "float" + }, + { + "comment": "", + "name": "campaign", + "type": "float" + }, + { + "comment": "", + "name": "pdays", + "type": "float" + }, + { + "comment": "", + "name": "previous", + "type": "float" + }, + { + "comment": "", + "name": "job_blue-collar", + "type": "float" + }, + { + "comment": "", + "name": "job_entrepreneur", + "type": "float" + }, + { + "comment": "", + "name": "job_housemaid", + "type": "float" + }, + { + "comment": "", + "name": "job_management", + "type": "float" + }, + { + "comment": "", + "name": "job_retired", + "type": "float" + }, + { + "comment": "", + "name": "job_self-employed", + "type": "float" + }, + { + "comment": "", + "name": "job_services", + "type": "float" + }, + { + "comment": "", + "name": "job_student", + "type": "float" + }, + { + "comment": "", + "name": "job_technician", + "type": "float" + }, + { + "comment": "", + "name": "job_unemployed", + "type": "float" + }, + { + "comment": "", + "name": "marital_divorced", + "type": "float" + }, + { + "comment": "", + "name": "marital_married", + "type": "float" + }, + { + "comment": "", + "name": "marital_single", + "type": "float" + } + ], + "vendor": "manual", + "author": "alice" +}' +``` + +将 Alice 的 DomainData 授权给 Bob +```bash +# 在 alice 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -X POST 'http://127.0.0.1:8082/api/v1/domaindatagrant/create' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +--header 'Content-Type: application/json' \ +-d '{ "grant_domain": "bob", + "description": {"domaindatagrant":"alice-bob"}, + "domain_id": "alice", + "domaindata_id": "alice-table" +}' +``` + +#### Bob 节点准备 OSS 测试数据 + +请先将 Bob 节点测试数据 [bob.csv](https://github.com/secretflow/kuscia/blob/main/testdata/bob.csv) 上传至 OSS + +登录到 Bob 节点的 Pod 中 +```bash +kubectl exec -it ${bob_pod_name} bash -n lite-bob +``` + +为 Bob 节点创建 OSS 数据源 + +创建 DomainData 的时候要指定 datasource_id,所以要先创建数据源,再创建 DomainData,示例如下: +```bash +# 在 bob 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -k -X POST 'http://localhost:8082/api/v1/domaindatasource/create' \ +--header 'Content-Type: application/json' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +-d '{ + "domain_id": "bob", + "datasource_id":"default-data-source", + "type":"oss", + "name": "DemoDataSource", + "info": { + "oss": { + "endpoint": "https://oss.xxx.cn-xxx.com", + "bucket": "secretflow", + "prefix": "kuscia/", + "access_key_id":"ak-xxxx", + "access_key_secret" :"sk-xxxx" +# "virtualhost": true (阿里云 OSS 需要配置此项) +# "storage_type": "minio" (Minio 需要配置此项) + } + }, + "access_directly": true +}' +``` + +为 Bob 的测试数据创建 DomainData +```bash +# 在 bob 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -X POST 'http://127.0.0.1:8082/api/v1/domaindata/create' \ +--header 'Content-Type: application/json' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +-d '{ + "domaindata_id": "bob-table", + "name": "bob.csv", + "type": "table", + "relative_uri": "bob.csv", + "domain_id": "bob", + "datasource_id": "default-data-source", + "attributes": { + "description": "bob demo data" + }, + "columns": [ + { + "comment": "", + "name": "id2", + "type": "str" + }, + { + "comment": "", + "name": "contact_cellular", + "type": "float" + }, + { + "comment": "", + "name": "contact_telephone", + "type": "float" + }, + { + "comment": "", + "name": "contact_unknown", + "type": "float" + }, + { + "comment": "", + "name": "month_apr", + "type": "float" + }, + { + "comment": "", + "name": "month_aug", + "type": "float" + }, + { + "comment": "", + "name": "month_dec", + "type": "float" + }, + { + "comment": "", + "name": "month_feb", + "type": "float" + }, + { + "comment": "", + "name": "month_jan", + "type": "float" + }, + { + "comment": "", + "name": "month_jul", + "type": "float" + }, + { + "comment": "", + "name": "month_jun", + "type": "float" + }, + { + "comment": "", + "name": "month_mar", + "type": "float" + }, + { + "comment": "", + "name": "month_may", + "type": "float" + }, + { + "comment": "", + "name": "month_nov", + "type": "float" + }, + { + "comment": "", + "name": "month_oct", + "type": "float" + }, + { + "comment": "", + "name": "month_sep", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_failure", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_other", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_success", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_unknown", + "type": "float" + }, + { + "comment": "", + "name": "y", + "type": "int" + } + ], + "vendor": "manual", + "author": "bob" +}' +``` + +将 Bob 的 DomainData 授权给 Alice +```bash +# 在 bob 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -X POST 'http://127.0.0.1:8082/api/v1/domaindatagrant/create' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +--header 'Content-Type: application/json' \ +-d '{ "grant_domain": "alice", + "description": {"domaindatagrant":"bob-alice"}, + "domain_id": "bob", + "domaindata_id": "bob-table" +}' ``` ### 创建 AppImage -- 登录到 master pod +- 登录到 Master pod ```bash kubectl exec -it ${master_pod_name} bash -n kuscia-master ``` @@ -255,7 +952,7 @@ kubectl apply -f AppImage.yaml ``` ### 执行测试作业 -- 登录到 master pod +- 登录到 Master pod ```bash kubectl exec -it ${master_pod_name} bash -n kuscia-master ``` @@ -268,4 +965,9 @@ scripts/user/create_example_job.sh `pod 内部`查看作业状态 ```bash kubectl get kj -n cross-domain +``` + +Runk 模式可以在 Kuscia Pod 所在集群中执行如下命令查看引擎日志 +```bash +kubectl logs ${engine_pod_name} -n kuscia-master ``` \ No newline at end of file diff --git a/docs/deployment/K8s_deployment_kuscia/K8s_p2p_cn.md b/docs/deployment/K8s_deployment_kuscia/K8s_p2p_cn.md index b6c40dd2..e69a45ba 100644 --- a/docs/deployment/K8s_deployment_kuscia/K8s_p2p_cn.md +++ b/docs/deployment/K8s_deployment_kuscia/K8s_p2p_cn.md @@ -39,11 +39,9 @@ ConfigMap 是用来配置 Kuscia 的配置文件,详细的配置文件介绍 domainID、私钥以及 datastoreEndpoint 字段里的数据库连接串(user、password、host、database)需要替换成真实有效的信息,私钥可以通过命令 `docker run -it --rm secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/kuscia scripts/deploy/generate_rsa_key.sh`生成 > 注意:
-1、database 名称暂不支持 "-" 特殊字符
-2、目前节点私钥仅支持 pkcs#1 格式: "BEGIN RSA PRIVATE KEY/END RSA PRIVATE KEY"
-3、修改 Configmap 配置后,需执行 kubectl delete po pod-name -n namespace 重新拉起 Pod 生效 - -注意:节点 ID 需要符合 DNS 子域名规则要求,详情请参考[这里](https://kubernetes.io/zh-cn/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names) +> - database 名称暂不支持 "-" 特殊字符
+> - 修改 Configmap 配置后,需执行 kubectl delete po {pod-name} -n {namespace} 重新拉起 Pod 生效
+> - 节点 ID 需要符合 DNS 子域名规则要求,详情请参考[这里](https://kubernetes.io/zh-cn/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names) 特殊说明:为了使 ServiceAccount 具有创建、查看、删除等资源权限,RunK 模式提供两种方式: - 方式一:在 Configmap 的 KubeconfigFile 字段配置具有同等权限的 Kubeconfig @@ -71,54 +69,54 @@ kubectl create -f deployment.yaml ### 创建 autonomy-alice、autonomy-bob 之间的授权 > PS:目前因为安全性和时间因素,节点之间授权还是需要很多手动的操作,未来会优化。 -alice 和 bob 授权之前可以先检测下相互之间的通信是否正常 +Alice 和 Bob 授权之前可以先检测下相互之间的通信是否正常 建议使用 curl -kvvv http://kuscia-autonomy-bob.autonomy-bob.svc.cluster.local:1080;(此处以 HTTP 为例,HTTPS 可以删除 Configmap 里的 Protocol: NOTLS 字段,重启 Pod 生效。[LoadBalancer](https://kubernetes.io/zh-cn/docs/concepts/services-networking/service/#loadbalancer) 或者 [NodePort](https://kubernetes.io/zh-cn/docs/concepts/services-networking/service/#type-nodeport) 方式可以用 curl -kvvv http://ip:port)检查一下是否访问能通,正常情况下返回的 HTTP 错误码是 401,内容是:unauthorized 示例参考[这里](../K8s_deployment_kuscia/K8s_master_lite_cn.md#id6) -注意:如果 alice/bob 的入口网络存在网关时,为了确保节点之间通信正常,需要网关符合一些要求,详情请参考[这里](../networkrequirements.md) +注意:如果 Alice/Bob 的入口网络存在网关时,为了确保节点之间通信正常,需要网关符合一些要求,详情请参考[这里](../networkrequirements.md) -建立 alice 到 bob 授权 +建立 Alice 到 Bob 授权 ```bash -# 将 alice 节点的 domain.crt 证书 cp 到 跳板机当前目录并改名 alice.domain.crt +# 将 Alice 节点的 domain.crt 证书 cp 到 跳板机当前目录并改名 alice.domain.crt kubectl cp autonomy-alice/kuscia-autonomy-alice-686d6747c-gc2kk:var/certs/domain.crt alice.domain.crt -# 将 alice.domain.crt 证书 cp 到 bob 节点的里 +# 将 alice.domain.crt 证书 cp 到 Bob 节点的里 kubectl cp alice.domain.crt autonomy-bob/kuscia-autonomy-bob-89cf8bc77-cvn9f:var/certs/ -# 登录到 bob 节点 +# 登录到 Bob 节点 kubectl exec -it kuscia-autonomy-bob-89cf8bc77-cvn9f bash -n autonomy-bob -# [pod 内部] 在 bob 里添加 alice 的证书等信息 +# [pod 内部] 在 Bob 里添加 Alice 的证书等信息 scripts/deploy/add_domain.sh alice p2p -# 登录到 alice 节点 +# 登录到 Alice 节点 kubectl exec -it kuscia-autonomy-alice-686d6747c-gc2kk bash -n autonomy-alice -# [pod 内部] 建立 alice 到 bob 的通信 +# [pod 内部] 建立 Alice 到 Bob 的通信 scripts/deploy/join_to_host.sh alice bob http://kuscia-autonomy-bob.autonomy-bob.svc.cluster.local:1080 ``` -建立 bob 到 alice 授权 +建立 Bob 到 Alice 授权 ```bash -# 将 bob 节点的 domain.crt 证书 cp 到 跳板机当前目录并改 bob.domain.crt +# 将 Bob 节点的 domain.crt 证书 cp 到 跳板机当前目录并改 bob.domain.crt kubectl cp autonomy-bob/kuscia-autonomy-bob-89cf8bc77-cvn9f:var/certs/domain.crt bob.domain.crt -# 将 bob.domain.crt 证书 cp 到 alice 节点的里 +# 将 bob.domain.crt 证书 cp 到 Alice 节点的里 kubectl cp bob.domain.crt autonomy-alice/kuscia-autonomy-alice-686d6747c-h78lr:var/certs/ -# 登录到 alice 节点 +# 登录到 Alice 节点 kubectl exec -it kuscia-autonomy-alice-686d6747c-h78lr bash -n autonomy-alice -# [pod 内部] 在 alice 里添加 bob 的证书等信息 +# [pod 内部] 在 Alice 里添加 Bob 的证书等信息 scripts/deploy/add_domain.sh bob p2p -# 登录到 bob 节点 +# 登录到 Bob 节点 kubectl exec -it kuscia-autonomy-bob-89cf8bc77-cvn9f bash -n autonomy-bob -# [pod 内部] 建立 bob 到 alice 的通信 +# [pod 内部] 建立 Bob 到 Alice 的通信 scripts/deploy/join_to_host.sh bob alice http://kuscia-autonomy-alice.autonomy-alice.svc.cluster.local:1080 ``` 检查双方授权状态 -`pod 内部`在 alice 节点内执行 `kubectl get cdr alice-bob -o=jsonpath="{.status.tokenStatus.sourceTokens[*]}"`,在 bob 节点内执行 `kubectl get cdr bob-alice -o=jsonpath="{.status.tokenStatus.sourceTokens[*]}"` 得到下面示例返回结果表示授权成功 +`pod 内部`在 Alice 节点内执行 `kubectl get cdr alice-bob -o=jsonpath="{.status.tokenStatus.sourceTokens[*]}"`,在 Bob 节点内执行 `kubectl get cdr bob-alice -o=jsonpath="{.status.tokenStatus.sourceTokens[*]}"` 得到下面示例返回结果表示授权成功 ```bash {"effectiveInstances":["kuscia-autonomy-alice-686d6747c-h78lr","kuscia-autonomy-alice-686d6747c-qlh2m"],"expirationTime":"2123-11-24T02:42:12Z","isReady":true,"revision":1,"revisionTime":"2023-11-24T02:42:12Z","token":"dVYZ4Ld/i7msNwuLoT+F8kFaCXbgXk6FziaU5PMASl8ReFfOVpsUt0qijlQaKTLm+OKzABfMQEI4jGeJ/Qsmhr6XOjc+7rkSCa5bmCxw5YVq+UtIFwNnjyRDaBV6A+ViiEMZwuaLIiFMtsPLki2SXzcA7LiLZY3oZvHfgf0m8LenMfU9tmZEptRoTBeL3kKagMBhxLxXL4rZzmI1bBwi49zxwOmg3c/MbDP8JiI6zIM7/NdIAEJhqsbzC5/Yw1qajr7D+NLXhsdrtTDSHN8gSB8D908FxYvcxeUTHqDQJT1mWcXs2N4r/Z/3OydkwJiQQokpjfZsR0T4xmbVTJd5qw=="} ``` -`pod 内部`在 alice、bob 节点 pod 内执行 `kubectl get cdr` 返回 Ready 为 True 时,表示授权成功,示例如下: +`pod 内部`在 Alice、Bob 节点 pod 内执行 `kubectl get cdr` 返回 Ready 为 True 时,表示授权成功,示例如下: ```bash NAME SOURCE DESTINATION HOST AUTHENTICATION READY alice-bob alice bob kuscia-autonomy-bob.autonomy-bob.svc.cluster.local Token True @@ -137,65 +135,759 @@ kubectl get po -n autonomy-alice ## 运行任务 -### 准备测试数据 -- alice 节点准备测试数据 +> RunK 模式不支持使用本地数据训练,请准备[OSS数据](K8s_p2p_cn.md#准备-oss-测试数据)。使用本地数据请先切换至 RunP 模式,详情请参考 [使用 RunP 运行时部署节点](./deploy_with_runp_cn.md)。 + +### 准备本地测试数据 +#### Alice 节点准备本地测试数据 -登录 alice pod +登录到 Alice 节点的 Pod 中 ```bash kubectl exec -it ${alice_pod_name} bash -n autonomy-alice ``` +为 Alice 节点创建本地数据源 + +创建 DomainData 的时候要指定 datasource_id,所以要先创建数据源,再创建 DomainData,示例如下: +```bash +# 在容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -k -X POST 'https://localhost:8082/api/v1/domaindatasource/create' \ + --header 'Content-Type: application/json' \ + --cacert ${CTR_CERTS_ROOT}/ca.crt \ + -d '{ + "domain_id": "alice", + "datasource_id":"default-data-source", + "type":"localfs", + "name": "DemoDataSource", + "info": { + "localfs": { + "path": "/home/kuscia/var/storage/data" + } + }, + "access_directly": true +}' +``` -`pod 内部`为 alice 的测试数据创建 domaindata +为 Alice 的测试数据创建 DomainData ```bash -scripts/deploy/create_domaindata_alice_table.sh alice +# 在 alice 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -X POST 'http://127.0.0.1:8082/api/v1/domaindata/create' \ +--header 'Content-Type: application/json' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +-d '{ + "domaindata_id": "alice-table", + "name": "alice.csv", + "type": "table", + "relative_uri": "alice.csv", + "domain_id": "alice", + "datasource_id": "default-data-source", + "attributes": { + "description": "alice demo data" + }, + "columns": [ + { + "comment": "", + "name": "id1", + "type": "str" + }, + { + "comment": "", + "name": "age", + "type": "float" + }, + { + "comment": "", + "name": "education", + "type": "float" + }, + { + "comment": "", + "name": "default", + "type": "float" + }, + { + "comment": "", + "name": "balance", + "type": "float" + }, + { + "comment": "", + "name": "housing", + "type": "float" + }, + { + "comment": "", + "name": "loan", + "type": "float" + }, + { + "comment": "", + "name": "day", + "type": "float" + }, + { + "comment": "", + "name": "duration", + "type": "float" + }, + { + "comment": "", + "name": "campaign", + "type": "float" + }, + { + "comment": "", + "name": "pdays", + "type": "float" + }, + { + "comment": "", + "name": "previous", + "type": "float" + }, + { + "comment": "", + "name": "job_blue-collar", + "type": "float" + }, + { + "comment": "", + "name": "job_entrepreneur", + "type": "float" + }, + { + "comment": "", + "name": "job_housemaid", + "type": "float" + }, + { + "comment": "", + "name": "job_management", + "type": "float" + }, + { + "comment": "", + "name": "job_retired", + "type": "float" + }, + { + "comment": "", + "name": "job_self-employed", + "type": "float" + }, + { + "comment": "", + "name": "job_services", + "type": "float" + }, + { + "comment": "", + "name": "job_student", + "type": "float" + }, + { + "comment": "", + "name": "job_technician", + "type": "float" + }, + { + "comment": "", + "name": "job_unemployed", + "type": "float" + }, + { + "comment": "", + "name": "marital_divorced", + "type": "float" + }, + { + "comment": "", + "name": "marital_married", + "type": "float" + }, + { + "comment": "", + "name": "marital_single", + "type": "float" + } + ], + "vendor": "manual", + "author": "alice" +}' ``` -`pod 内部`为 alice 的测试数据创建 domaindatagrant +将 Alice 的 DomainData 授权给 Bob ```bash +# 在容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs curl -X POST 'http://127.0.0.1:8082/api/v1/domaindatagrant/create' \ - --cert /home/kuscia/var/certs/kusciaapi-server.crt \ - --key /home/kuscia/var/certs/kusciaapi-server.key \ - --cacert /home/kuscia/var/certs/ca.crt \ - --header "Token: $(cat /home/kuscia/var/certs/token)" \ - --header 'Content-Type: application/json' \ - -d '{ "grant_domain": "bob", - "description": {"domaindatagrant":"alice-bob"}, - "domain_id": "alice", - "domaindata_id": "alice-table" - }' +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +--header 'Content-Type: application/json' \ +-d '{ "grant_domain": "bob", + "description": {"domaindatagrant":"alice-bob"}, + "domain_id": "alice", + "domaindata_id": "alice-table" +}' ``` -- bob 节点准备测试数据 +#### Bob 节点准备本地测试数据 -登录 bob pod +登录到 Bob 节点的 Pod 中 ```bash kubectl exec -it ${bob_pod_name} bash -n autonomy-bob ``` -`pod 内部`为 bob 的测试数据创建 domaindata +为 Bob 节点创建本地数据源 + +创建 DomainData 的时候要指定 datasource_id,所以要先创建数据源,再创建 DomainData,示例如下: +```bash +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -k -X POST 'https://localhost:8082/api/v1/domaindatasource/create' \ + --header 'Content-Type: application/json' \ + --cacert ${CTR_CERTS_ROOT}/ca.crt \ + -d '{ + "domain_id": "bob", + "datasource_id":"default-data-source", + "type":"localfs", + "name": "DemoDataSource", + "info": { + "localfs": { + "path": "/home/kuscia/var/storage/data" + } + }, + "access_directly": true +}' +``` + +为 Bob 的测试数据创建 DomainData ```bash -scripts/deploy/create_domaindata_bob_table.sh bob +# 在 bob 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -X POST 'http://127.0.0.1:8082/api/v1/domaindata/create' \ +--header 'Content-Type: application/json' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +-d '{ + "domaindata_id": "bob-table", + "name": "bob.csv", + "type": "table", + "relative_uri": "bob.csv", + "domain_id": "bob", + "datasource_id": "default-data-source", + "attributes": { + "description": "bob demo data" + }, + "columns": [ + { + "comment": "", + "name": "id2", + "type": "str" + }, + { + "comment": "", + "name": "contact_cellular", + "type": "float" + }, + { + "comment": "", + "name": "contact_telephone", + "type": "float" + }, + { + "comment": "", + "name": "contact_unknown", + "type": "float" + }, + { + "comment": "", + "name": "month_apr", + "type": "float" + }, + { + "comment": "", + "name": "month_aug", + "type": "float" + }, + { + "comment": "", + "name": "month_dec", + "type": "float" + }, + { + "comment": "", + "name": "month_feb", + "type": "float" + }, + { + "comment": "", + "name": "month_jan", + "type": "float" + }, + { + "comment": "", + "name": "month_jul", + "type": "float" + }, + { + "comment": "", + "name": "month_jun", + "type": "float" + }, + { + "comment": "", + "name": "month_mar", + "type": "float" + }, + { + "comment": "", + "name": "month_may", + "type": "float" + }, + { + "comment": "", + "name": "month_nov", + "type": "float" + }, + { + "comment": "", + "name": "month_oct", + "type": "float" + }, + { + "comment": "", + "name": "month_sep", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_failure", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_other", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_success", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_unknown", + "type": "float" + }, + { + "comment": "", + "name": "y", + "type": "int" + } + ], + "vendor": "manual", + "author": "bob" +}' ``` -`pod 内部`为 bob 的测试数据创建 domaindatagrant +将 Bob 的 DomainData 授权给 Alice ```bash +# 在容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs curl -X POST 'http://127.0.0.1:8082/api/v1/domaindatagrant/create' \ - --cert /home/kuscia/var/certs/kusciaapi-server.crt \ - --key /home/kuscia/var/certs/kusciaapi-server.key \ - --cacert /home/kuscia/var/certs/ca.crt \ - --header "Token: $(cat /home/kuscia/var/certs/token)" \ - --header 'Content-Type: application/json' \ - -d '{ "grant_domain": "alice", - "description": {"domaindatagrant":"bob-alice"}, - "domain_id": "bob", - "domaindata_id": "bob-table" - }' +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +--header 'Content-Type: application/json' \ +-d '{ "grant_domain": "alice", + "description": {"domaindatagrant":"bob-alice"}, + "domain_id": "bob", + "domaindata_id": "bob-table" +}' +``` + +### 准备 OSS 测试数据 +#### Alice 节点准备 OSS 数据 + +请先将 Alice 节点测试数据 [alice.csv](https://github.com/secretflow/kuscia/blob/main/testdata/alice.csv) 上传至 OSS + +登录到 Alice 节点的 Pod 中 +```bash +kubectl exec -it ${alice_pod_name} bash -n autonomy-alice +``` +为 Alice 节点创建 OSS 数据源 + +创建 DomainData 的时候要指定 datasource_id,所以要先创建数据源,再创建 DomainData,示例如下: +```bash +# 在 alice 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -k -X POST 'http://localhost:8082/api/v1/domaindatasource/create' \ +--header 'Content-Type: application/json' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +-d '{ + "domain_id": "alice", + "datasource_id":"default-data-source", + "type":"oss", + "name": "DemoDataSource", + "info": { + "oss": { + "endpoint": "https://oss.xxx.cn-xxx.com", + "bucket": "secretflow", + "prefix": "kuscia/", + "access_key_id":"ak-xxxx", + "access_key_secret" :"sk-xxxx" +# "virtualhost": true (阿里云 OSS 需要配置此项) +# "storage_type": "minio" (Minio 需要配置此项) + } + }, + "access_directly": true +}' +``` + +为 Alice 的测试数据创建 DomainData +```bash +# 在 alice 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -X POST 'http://127.0.0.1:8082/api/v1/domaindata/create' \ +--header 'Content-Type: application/json' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +-d '{ + "domaindata_id": "alice-table", + "name": "alice.csv", + "type": "table", + "relative_uri": "alice.csv", + "domain_id": "alice", + "datasource_id": "default-data-source", + "attributes": { + "description": "alice demo data" + }, + "columns": [ + { + "comment": "", + "name": "id1", + "type": "str" + }, + { + "comment": "", + "name": "age", + "type": "float" + }, + { + "comment": "", + "name": "education", + "type": "float" + }, + { + "comment": "", + "name": "default", + "type": "float" + }, + { + "comment": "", + "name": "balance", + "type": "float" + }, + { + "comment": "", + "name": "housing", + "type": "float" + }, + { + "comment": "", + "name": "loan", + "type": "float" + }, + { + "comment": "", + "name": "day", + "type": "float" + }, + { + "comment": "", + "name": "duration", + "type": "float" + }, + { + "comment": "", + "name": "campaign", + "type": "float" + }, + { + "comment": "", + "name": "pdays", + "type": "float" + }, + { + "comment": "", + "name": "previous", + "type": "float" + }, + { + "comment": "", + "name": "job_blue-collar", + "type": "float" + }, + { + "comment": "", + "name": "job_entrepreneur", + "type": "float" + }, + { + "comment": "", + "name": "job_housemaid", + "type": "float" + }, + { + "comment": "", + "name": "job_management", + "type": "float" + }, + { + "comment": "", + "name": "job_retired", + "type": "float" + }, + { + "comment": "", + "name": "job_self-employed", + "type": "float" + }, + { + "comment": "", + "name": "job_services", + "type": "float" + }, + { + "comment": "", + "name": "job_student", + "type": "float" + }, + { + "comment": "", + "name": "job_technician", + "type": "float" + }, + { + "comment": "", + "name": "job_unemployed", + "type": "float" + }, + { + "comment": "", + "name": "marital_divorced", + "type": "float" + }, + { + "comment": "", + "name": "marital_married", + "type": "float" + }, + { + "comment": "", + "name": "marital_single", + "type": "float" + } + ], + "vendor": "manual", + "author": "alice" +}' +``` + +将 Alice 的 DomainData 授权给 Bob +```bash +# 在 alice 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -X POST 'http://127.0.0.1:8082/api/v1/domaindatagrant/create' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +--header 'Content-Type: application/json' \ +-d '{ "grant_domain": "bob", + "description": {"domaindatagrant":"alice-bob"}, + "domain_id": "alice", + "domaindata_id": "alice-table" +}' +``` + +#### Bob 节点准备 OSS 测试数据 + +请先将 Bob 节点测试数据 [bob.csv](https://github.com/secretflow/kuscia/blob/main/testdata/bob.csv) 上传至 OSS + +登录到 Bob 节点的 Pod 中 +```bash +kubectl exec -it ${bob_pod_name} bash -n autonomy-bob +``` + +为 Bob 节点创建 OSS 数据源 + +创建 DomainData 的时候要指定 datasource_id,所以要先创建数据源,再创建 DomainData,示例如下: +```bash +# 在 bob 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -k -X POST 'http://localhost:8082/api/v1/domaindatasource/create' \ +--header 'Content-Type: application/json' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +-d '{ + "domain_id": "bob", + "datasource_id":"default-data-source", + "type":"oss", + "name": "DemoDataSource", + "info": { + "oss": { + "endpoint": "https://oss.xxx.cn-xxx.com", + "bucket": "secretflow", + "prefix": "kuscia/", + "access_key_id":"ak-xxxx", + "access_key_secret" :"sk-xxxx" +# "virtualhost": true (阿里云 OSS 需要配置此项) +# "storage_type": "minio" (Minio 需要配置此项) + } + }, + "access_directly": true +}' +``` + +为 Bob 的测试数据创建 DomainData +```bash +# 在 bob 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -X POST 'http://127.0.0.1:8082/api/v1/domaindata/create' \ +--header 'Content-Type: application/json' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +-d '{ + "domaindata_id": "bob-table", + "name": "bob.csv", + "type": "table", + "relative_uri": "bob.csv", + "domain_id": "bob", + "datasource_id": "default-data-source", + "attributes": { + "description": "bob demo data" + }, + "columns": [ + { + "comment": "", + "name": "id2", + "type": "str" + }, + { + "comment": "", + "name": "contact_cellular", + "type": "float" + }, + { + "comment": "", + "name": "contact_telephone", + "type": "float" + }, + { + "comment": "", + "name": "contact_unknown", + "type": "float" + }, + { + "comment": "", + "name": "month_apr", + "type": "float" + }, + { + "comment": "", + "name": "month_aug", + "type": "float" + }, + { + "comment": "", + "name": "month_dec", + "type": "float" + }, + { + "comment": "", + "name": "month_feb", + "type": "float" + }, + { + "comment": "", + "name": "month_jan", + "type": "float" + }, + { + "comment": "", + "name": "month_jul", + "type": "float" + }, + { + "comment": "", + "name": "month_jun", + "type": "float" + }, + { + "comment": "", + "name": "month_mar", + "type": "float" + }, + { + "comment": "", + "name": "month_may", + "type": "float" + }, + { + "comment": "", + "name": "month_nov", + "type": "float" + }, + { + "comment": "", + "name": "month_oct", + "type": "float" + }, + { + "comment": "", + "name": "month_sep", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_failure", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_other", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_success", + "type": "float" + }, + { + "comment": "", + "name": "poutcome_unknown", + "type": "float" + }, + { + "comment": "", + "name": "y", + "type": "int" + } + ], + "vendor": "manual", + "author": "bob" +}' +``` + +将 Bob 的 DomainData 授权给 Alice +```bash +# 在 bob 容器内执行示例 +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -X POST 'http://127.0.0.1:8082/api/v1/domaindatagrant/create' \ +--cacert ${CTR_CERTS_ROOT}/ca.crt \ +--header 'Content-Type: application/json' \ +-d '{ "grant_domain": "alice", + "description": {"domaindatagrant":"bob-alice"}, + "domain_id": "bob", + "domaindata_id": "bob-table" +}' ``` ### 创建 AppImage -- [alice 节点] +- [Alice 节点] -登录到 alice pod +登录到 Alice pod ```bash kubectl exec -it ${alice_pod_name} bash -n autonomy-alice ``` @@ -204,9 +896,9 @@ kubectl exec -it ${alice_pod_name} bash -n autonomy-alice kubectl apply -f AppImage.yaml ``` -- [bob 节点] +- [Bob 节点] -登录到 bob pod +登录到 Bob 节点的 Pod 内 ```bash kubectl exec -it ${bob_pod_name} bash -n autonomy-bob ``` @@ -216,7 +908,7 @@ kubectl apply -f AppImage.yaml ``` ### 执行测试作业 -登录到 alice pod +登录到 Alice 节点 的 Pod 内 ```bash kubectl exec -it ${alice_pod_name} bash -n autonomy-alice ``` @@ -229,4 +921,9 @@ scripts/user/create_example_job.sh `pod 内部`查看作业状态 ```bash kubectl get kj -n cross-domain +``` + +`pod 外部`Runk 模式可以在 Kuscia Pod 所在集群中执行如下命令查看引擎日志 +```bash +kubectl logs ${engine_pod_name} -n autonomy-alice ``` \ No newline at end of file diff --git a/docs/deployment/kuscia_config_cn.md b/docs/deployment/kuscia_config_cn.md index 8962cdcc..135b1655 100644 --- a/docs/deployment/kuscia_config_cn.md +++ b/docs/deployment/kuscia_config_cn.md @@ -111,7 +111,7 @@ enableWorkloadApprove: false - 自动建表:如果提供的数据库账号有建表权限(账号具有`DDL+DML`权限),并且数据表不存在,kuscia 会尝试自动建表,如果创建失败 kuscia 会启动失败。 - 数据库账户对表中字段至少具有 select、insert、update、delete 操作权限。 - `protocol`: KusciaAPI 以及节点对外网关使用的通信协议,有三种通信协议可供选择:NOTLS/TLS/MTLS(不区分大小写)。 - - `NOTLS`: 不使用 TLS 协议,即数据通过未加密的 HTTP 传输,比较安全的内部网络环境或者 Kuscia 已经存在外部网关的情况可以使用该模式。 + - `NOTLS`: 此模式下,通信并未采用 TLS 协议进行加密,即数据通过未加密的 HTTP 传输。在高度信任且严格管控的内部网络环境,或是已具备外部安全网关防护措施的情况下,可以使用该模式,但在一般情况下,由于存在安全隐患,不推荐使用。 - `TLS`: 通过 TLS 协议进行加密,即使用 HTTPS 进行安全传输,不需要手动配置证书。 - `MTLS`: 使用 HTTPS 进行通信,支持双向 TLS 验证,需要手动交换证书以建立安全连接。 - `enableWorkloadApprove`: 是否开启工作负载审核,默认为 false,即关闭审核。取值范围:[true, false]。 diff --git a/docs/deployment/kuscia_ports_cn.md b/docs/deployment/kuscia_ports_cn.md index 228f8b5d..5b2a9464 100644 --- a/docs/deployment/kuscia_ports_cn.md +++ b/docs/deployment/kuscia_ports_cn.md @@ -16,4 +16,4 @@ | HTTP | 80 | 访问节点中应用的端口。例如:可通过此端口访问 Serving 服务进行预测打分,可参考[使用 SecretFlow Serving 进行预测](../tutorial/run_sf_serving_with_api_cn.md#使用-secretflow-serving-进行预测) | 否 | | HTTP/HTTPS | 8082 | 节点 KusciaAPI 的访问端口,可参考[如何使用 KusciaAPI](../reference/apis/summary_cn.md#如何使用-kuscia-api) | 否 | | GRPC/GRPCS | 8083 | 节点 KusciaAPI 的访问端口,可参考[如何使用 KusciaAPI](../reference/apis/summary_cn.md#如何使用-kuscia-api) | 否 | -| HTTP | 9091 | 节点 Metrics 指标采集端口,可参考 [Kuscia 监控](./kuscia_monitor) | 否 | +| HTTP | 9091 | 节点 Metrics 指标采集端口,可参考 [Kuscia 监控](./kuscia_monitor) | 否 | \ No newline at end of file diff --git a/docs/development/build_kuscia_cn.md b/docs/development/build_kuscia_cn.md index aef7e527..50085cc5 100644 --- a/docs/development/build_kuscia_cn.md +++ b/docs/development/build_kuscia_cn.md @@ -70,6 +70,22 @@ Kuscia 镜像的构建依赖 Kuscia-Envoy 镜像,Kuscia 提供默认的 [Kusci 如果你使用的是 arm 架构的 macOS,请修改`build/dockerfile/kuscia-anolis.Dockerfile`文件,将`FROM openanolis/anolisos:8.8`修改为`FROM openanolis/anolisos:8.4-x86_64`,然后再执行`make build`命令。 +### 构建 Kuscia-Secretflow Image + +在 kuscia/build/dockerfile 目录下: + +执行`docker build -f ./kuscia-secretflow.Dockerfile .`命令会构建出 Kuscia-Secretflow 镜像。Kuscia-Secretflow 镜像在 Kuscia 镜像的基础上集成了 Secretflow 镜像。 + +需要注意的是,仅 `RunP` 模式下需要构建 kuscia-secretflow 镜像。 + +kuscia-secretflow.Dockerfile 文件里默认的 Kuscia 镜像版本是 latest,Secretflow 版本是 1.5.0b0,如果需要指定其他版本,可以使用如下命令: + +此处以 Kuscia 0.8.0b0,Secretflow 1.5.0b0 版本为例 + +```bash +docker build --build-arg KUSCIA_IMAGE=secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/kuscia:0.8.0b0 --build-arg SF_VERSION=1.5.0b0 -f ./kuscia-secretflow.Dockerfile . +``` + ### 编译文档 在 Kuscia 项目根目录下: diff --git a/docs/development/register_custom_image.md b/docs/development/register_custom_image.md index f8708781..8da9054f 100644 --- a/docs/development/register_custom_image.md +++ b/docs/development/register_custom_image.md @@ -86,35 +86,4 @@ docker cp ${USER}-kuscia-autonomy-alice:/home/kuscia/scripts/tools/register_app_ 通过前面步骤注册完自定义算法镜像后,你可以获取算法镜像对应的 AppImage 资源名称。后续使用自定义算法镜像运行任务时,只需修改相应的字段即可。 -下面以名称为`secretflow-image`的 AppImage 为例,使用自定义算法镜像运行 [KusciaJob](../reference/concepts/kusciajob_cn.md) 作业。 - -- 修改 KusciaJob 下 `spec.tasks[].appImage`字段的值。 - -```yaml -apiVersion: kuscia.secretflow/v1alpha1 -kind: KusciaJob -metadata: - name: job-best-effort-linear -spec: - initiator: alice - scheduleMode: BestEffort - maxParallelism: 2 - tasks: - - taskID: job-psi - alias: job-psi - priority: 100 - taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.1","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"ECDH_PSI_2PC"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' - appImage: secretflow-image - parties: - - domainID: alice - - domainID: bob - - taskID: job-split - alias: job-split - priority: 100 - dependencies: ['job-psi'] - taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"train_test_split","version":"0.0.1","attr_paths":["train_size","test_size","random_state","shuffle"],"attrs":[{"f":0.75},{"f":0.25},{"i64":1234},{"b":true}]},"sf_output_uris":["train-dataset.csv","test-dataset.csv"],"sf_output_ids":["train-dataset","test-dataset"],"sf_input_ids":["psi-output"]}' - appImage: secretflow-image - parties: - - domainID: alice - - domainID: bob -``` +以名称为`secretflow-image`的 AppImage 为例,使用自定义算法镜像运行 [KusciaJob](../reference/concepts/kusciajob_cn.md) 作业,修改[KusciaJob 示例](../reference/concepts/kusciajob_cn.md#创建-kusciajob) 中 `spec.tasks[].appImage`字段的值。 \ No newline at end of file diff --git a/docs/getting_started/quickstart_cn.md b/docs/getting_started/quickstart_cn.md index b5a99584..ab467773 100644 --- a/docs/getting_started/quickstart_cn.md +++ b/docs/getting_started/quickstart_cn.md @@ -50,6 +50,7 @@ macOS 默认给单个 docker container 分配了 2G 内存,请参考[官方文 此外,Kuscia 当前不支持 M1/M2 芯片的 Mac。 ## 部署体验 +> 本文旨在帮助您快速体验 Kuscia,不涉及任何宿主机端口暴露配置。如需暴露端口,请前往[多机部署](../deployment/Docker_deployment_kuscia/deploy_p2p_cn.md) ### 前置操作 diff --git a/docs/getting_started/run_secretflow_cn.md b/docs/getting_started/run_secretflow_cn.md index 984d2807..6989f466 100644 --- a/docs/getting_started/run_secretflow_cn.md +++ b/docs/getting_started/run_secretflow_cn.md @@ -57,61 +57,13 @@ docker exec -it ${USER}-kuscia-master bash ### 使用 Kuscia 示例数据配置 KusciaJob -下面的示例展示了一个 KusciaJob, 该任务流完成 2 个任务: +此处以[KusciaJob 示例](../reference/concepts/kusciajob_cn.md#创建-kusciajob)作为展示,该任务流完成 2 个任务: 1. job-psi 读取 alice 和 bob 的数据文件,进行隐私求交,求交的结果分别保存为两个参与方的 `psi-output.csv`。 2. job-split 读取 alice 和 bob 上一步中求交的结果文件,并拆分成训练集和测试集,分别保存为两个参与方的 `train-dataset.csv`、`test-dataset.csv`。 这个 KusciaJob 的名称为 job-best-effort-linear,在一个 Kuscia 集群中,这个名称必须是唯一的,由 `.metadata.name` 指定。 -在 kuscia-master 容器中,在任意路径创建文件 job-best-effort-linear.yaml,内容如下: - -```yaml -apiVersion: kuscia.secretflow/v1alpha1 -kind: KusciaJob -metadata: - name: job-best-effort-linear - namespace: cross-domain -spec: - initiator: alice - scheduleMode: BestEffort - maxParallelism: 2 - tasks: - - taskID: job-psi - alias: job-psi - priority: 100 - taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.1","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"ECDH_PSI_2PC"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' - appImage: secretflow-image - parties: - - domainID: alice - - domainID: bob - - taskID: job-split - alias: job-split - priority: 100 - dependencies: ['job-psi'] - taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"train_test_split","version":"0.0.1","attr_paths":["train_size","test_size","random_state","shuffle"],"attrs":[{"f":0.75},{"f":0.25},{"i64":1234},{"b":true}]},"sf_output_uris":["train-dataset.csv","test-dataset.csv"],"sf_output_ids":["train-dataset","test-dataset"],"sf_input_ids":["psi-output"]}' - appImage: secretflow-image - parties: - - domainID: alice - - domainID: bob -``` - -:::{tip} - -更多有关 KusciaJob 配置的信息,请查看 [KusciaJob](../reference/concepts/kusciajob_cn.md)。 - -KusciaJob 的算子参数由 `taskInputConfig` 字段定义,对于不同的算子,算子的参数不同。 - -本教程使用的是 SecretFlow 的算子参数定义,以 SecretFlow 引擎任务为例: -- `sf_datasource_config`:表示 SecretFlow 输入输出所需要的节点数据源信息。 -- `sf_cluster_desc`:表示 SecretFlow 集群信息,详情请查阅 [SecretFlow 集群文档](https://www.secretflow.org.cn/docs/secretflow/latest/zh-Hans/component/comp_spec_design#sfclusterdesc)。 -- `sf_node_eval_param`:表示 SecretFlow 算子的详细配置,详情请查阅 [SecretFlow 算子运行配置文档](https://www.secretflow.org.cn/docs/spec/latest/zh-Hans/intro#nodeevalparam)。 -- `sf_input_ids`:表示 SecretFlow 输入数据 `id` ,SecretFlow 引擎会将 Kuscia 定义的输入数据 [DomainData](../reference/concepts/domaindata_cn.md) 转换成引擎所需要的 [DistData](https://www.secretflow.org.cn/docs/spec/latest/zh-Hans/spec#distdata)。 -- `sf_output_ids`:表示 SecretFlow 输出数据 `id` ,SecretFlow 引擎会将输出的 [DistData](https://www.secretflow.org.cn/docs/spec/latest/zh-Hans/spec#distdata) 转换成 Kuscia 的 [DomainData](../reference/concepts/domaindata_cn.md)。 -- `sf_output_uris`:表示 SecretFlow 输出数据路径。 - -::: - ### 使用你自己的数据配置 KusciaJob 如果你要使用你自己的数据,可以将两个算子中的 `taskInputConfig.sf_input_ids` 的数据文件 `id` 修改为你在 [准备你自己的数据](#prepare-your-own-data) 中的 `domaindata_id` 即可。 @@ -175,7 +127,7 @@ spec: priority: 100 taskID: job-psi alias: job-psi - taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.1","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"ECDH_PSI_2PC"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' + taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.4","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type","left_side"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"PROTOCOL_ECDH"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"},{"is_na": false,"ss": ["alice"]}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' tolerable: false - appImage: secretflow-image dependencies: diff --git a/docs/reference/apis/domaindatasource_cn.md b/docs/reference/apis/domaindatasource_cn.md index 0182ff14..c877b4ff 100644 --- a/docs/reference/apis/domaindatasource_cn.md +++ b/docs/reference/apis/domaindatasource_cn.md @@ -98,6 +98,8 @@ curl -k -X POST 'https://localhost:8082/api/v1/domaindatasource/create' \ "prefix": "kuscia/", "access_key_id":"ak-xxxx", "access_key_secret" :"sk-xxxx" +# "virtualhost": true (阿里云 OSS 需要配置此项) +# "storage_type": "minio" (Minio 需要配置此项) } }, "access_directly": true diff --git a/docs/reference/apis/kusciajob_cn.md b/docs/reference/apis/kusciajob_cn.md index 0a90589e..4f0a6805 100644 --- a/docs/reference/apis/kusciajob_cn.md +++ b/docs/reference/apis/kusciajob_cn.md @@ -81,7 +81,7 @@ curl -k -X POST 'https://localhost:8082/api/v1/job/create' \ ], "alias": "job-psi", "dependencies": [], - "task_input_config": "{\"sf_datasource_config\":{\"alice\":{\"id\":\"default-data-source\"},\"bob\":{\"id\":\"default-data-source\"}},\"sf_cluster_desc\":{\"parties\":[\"alice\",\"bob\"],\"devices\":[{\"name\":\"spu\",\"type\":\"spu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"runtime_config\\\":{\\\"protocol\\\":\\\"REF2K\\\",\\\"field\\\":\\\"FM64\\\"},\\\"link_desc\\\":{\\\"connect_retry_times\\\":60,\\\"connect_retry_interval_ms\\\":1000,\\\"brpc_channel_protocol\\\":\\\"http\\\",\\\"brpc_channel_connection_type\\\":\\\"pooled\\\",\\\"recv_timeout_ms\\\":1200000,\\\"http_timeout_ms\\\":1200000}}\"},{\"name\":\"heu\",\"type\":\"heu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"mode\\\": \\\"PHEU\\\", \\\"schema\\\": \\\"paillier\\\", \\\"key_size\\\": 2048}\"}],\"ray_fed_config\":{\"cross_silo_comm_backend\":\"brpc_link\"}},\"sf_node_eval_param\":{\"domain\":\"preprocessing\",\"name\":\"psi\",\"version\":\"0.0.1\",\"attr_paths\":[\"input/receiver_input/key\",\"input/sender_input/key\",\"protocol\",\"precheck_input\",\"bucket_size\",\"curve_type\"],\"attrs\":[{\"ss\":[\"id1\"]},{\"ss\":[\"id2\"]},{\"s\":\"ECDH_PSI_2PC\"},{\"b\":true},{\"i64\":\"1048576\"},{\"s\":\"CURVE_FOURQ\"}]},\"sf_input_ids\":[\"alice-table\",\"bob-table\"],\"sf_output_ids\":[\"psi-output\"],\"sf_output_uris\":[\"psi-output.csv\"]}", + "task_input_config": "{\"sf_datasource_config\":{\"alice\":{\"id\":\"default-data-source\"},\"bob\":{\"id\":\"default-data-source\"}},\"sf_cluster_desc\":{\"parties\":[\"alice\",\"bob\"],\"devices\":[{\"name\":\"spu\",\"type\":\"spu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"runtime_config\\\":{\\\"protocol\\\":\\\"REF2K\\\",\\\"field\\\":\\\"FM64\\\"},\\\"link_desc\\\":{\\\"connect_retry_times\\\":60,\\\"connect_retry_interval_ms\\\":1000,\\\"brpc_channel_protocol\\\":\\\"http\\\",\\\"brpc_channel_connection_type\\\":\\\"pooled\\\",\\\"recv_timeout_ms\\\":1200000,\\\"http_timeout_ms\\\":1200000}}\"},{\"name\":\"heu\",\"type\":\"heu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"mode\\\": \\\"PHEU\\\", \\\"schema\\\": \\\"paillier\\\", \\\"key_size\\\": 2048}\"}],\"ray_fed_config\":{\"cross_silo_comm_backend\":\"brpc_link\"}},\"sf_node_eval_param\":{\"domain\":\"data_prep\",\"name\":\"psi\",\"version\":\"0.0.4\",\"attr_paths\":[\"input/receiver_input/key\",\"input/sender_input/key\",\"protocol\",\"precheck_input\",\"bucket_size\",\"curve_type\",\"left_side\"],\"attrs\":[{\"ss\":[\"id1\"]},{\"ss\":[\"id2\"]},{\"s\":\"PROTOCOL_ECDH\"},{\"b\":true},{\"i64\":\"1048576\"},{\"s\":\"CURVE_FOURQ\"},{\"is_na\": false,\"ss\": [\"alice\"]}]},\"sf_input_ids\":[\"alice-table\",\"bob-table\"],\"sf_output_ids\":[\"psi-output\"],\"sf_output_uris\":[\"psi-output.csv\"]}", "priority": 100 }, { @@ -101,7 +101,7 @@ curl -k -X POST 'https://localhost:8082/api/v1/job/create' \ "dependencies": [ "job-psi" ], - "task_input_config": "{\"sf_datasource_config\":{\"alice\":{\"id\":\"default-data-source\"},\"bob\":{\"id\":\"default-data-source\"}},\"sf_cluster_desc\":{\"parties\":[\"alice\",\"bob\"],\"devices\":[{\"name\":\"spu\",\"type\":\"spu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"runtime_config\\\":{\\\"protocol\\\":\\\"REF2K\\\",\\\"field\\\":\\\"FM64\\\"},\\\"link_desc\\\":{\\\"connect_retry_times\\\":60,\\\"connect_retry_interval_ms\\\":1000,\\\"brpc_channel_protocol\\\":\\\"http\\\",\\\"brpc_channel_connection_type\\\":\\\"pooled\\\",\\\"recv_timeout_ms\\\":1200000,\\\"http_timeout_ms\\\":1200000}}\"},{\"name\":\"heu\",\"type\":\"heu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"mode\\\": \\\"PHEU\\\", \\\"schema\\\": \\\"paillier\\\", \\\"key_size\\\": 2048}\"}],\"ray_fed_config\":{\"cross_silo_comm_backend\":\"brpc_link\"}},\"sf_node_eval_param\":{\"domain\":\"preprocessing\",\"name\":\"train_test_split\",\"version\":\"0.0.1\",\"attr_paths\":[\"train_size\",\"test_size\",\"random_state\",\"shuffle\"],\"attrs\":[{\"f\":0.75},{\"f\":0.25},{\"i64\":1234},{\"b\":true}]},\"sf_output_uris\":[\"train-dataset.csv\",\"test-dataset.csv\"],\"sf_output_ids\":[\"train-dataset\",\"test-dataset\"],\"sf_input_ids\":[\"psi-output\"]}", + "task_input_config": "{\"sf_datasource_config\":{\"alice\":{\"id\":\"default-data-source\"},\"bob\":{\"id\":\"default-data-source\"}},\"sf_cluster_desc\":{\"parties\":[\"alice\",\"bob\"],\"devices\":[{\"name\":\"spu\",\"type\":\"spu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"runtime_config\\\":{\\\"protocol\\\":\\\"REF2K\\\",\\\"field\\\":\\\"FM64\\\"},\\\"link_desc\\\":{\\\"connect_retry_times\\\":60,\\\"connect_retry_interval_ms\\\":1000,\\\"brpc_channel_protocol\\\":\\\"http\\\",\\\"brpc_channel_connection_type\\\":\\\"pooled\\\",\\\"recv_timeout_ms\\\":1200000,\\\"http_timeout_ms\\\":1200000}}\"},{\"name\":\"heu\",\"type\":\"heu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"mode\\\": \\\"PHEU\\\", \\\"schema\\\": \\\"paillier\\\", \\\"key_size\\\": 2048}\"}],\"ray_fed_config\":{\"cross_silo_comm_backend\":\"brpc_link\"}},\"sf_node_eval_param\":{\"domain\":\"data_prep\",\"name\":\"train_test_split\",\"version\":\"0.0.1\",\"attr_paths\":[\"train_size\",\"test_size\",\"random_state\",\"shuffle\"],\"attrs\":[{\"f\":0.75},{\"f\":0.25},{\"i64\":1234},{\"b\":true}]},\"sf_output_uris\":[\"train-dataset.csv\",\"test-dataset.csv\"],\"sf_output_ids\":[\"train-dataset\",\"test-dataset\"],\"sf_input_ids\":[\"psi-output\"]}", "priority": 100 } ] @@ -123,6 +123,12 @@ curl -k -X POST 'https://localhost:8082/api/v1/job/create' \ } ``` +:::{tip} + +上述请求示例中的引擎镜像基于 SecretFlow `1.5.0b0` 版本。算子参数的 `taskInputConfig` 内容可参考[KusciaJob](../concepts/kusciajob_cn.md#创建-kusciajob) + +::: + {#query-job} ### 查询 Job @@ -200,7 +206,7 @@ curl -k -X POST 'https://localhost:8082/api/v1/job/query' \ "dependencies": [ "" ], - "task_input_config": "{\"sf_datasource_config\":{\"alice\":{\"id\":\"default-data-source\"},\"bob\":{\"id\":\"default-data-source\"}},\"sf_cluster_desc\":{\"parties\":[\"alice\",\"bob\"],\"devices\":[{\"name\":\"spu\",\"type\":\"spu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"runtime_config\\\":{\\\"protocol\\\":\\\"REF2K\\\",\\\"field\\\":\\\"FM64\\\"},\\\"link_desc\\\":{\\\"connect_retry_times\\\":60,\\\"connect_retry_interval_ms\\\":1000,\\\"brpc_channel_protocol\\\":\\\"http\\\",\\\"brpc_channel_connection_type\\\":\\\"pooled\\\",\\\"recv_timeout_ms\\\":1200000,\\\"http_timeout_ms\\\":1200000}}\"},{\"name\":\"heu\",\"type\":\"heu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"mode\\\": \\\"PHEU\\\", \\\"schema\\\": \\\"paillier\\\", \\\"key_size\\\": 2048}\"}],\"ray_fed_config\":{\"cross_silo_comm_backend\":\"brpc_link\"}},\"sf_node_eval_param\":{\"domain\":\"preprocessing\",\"name\":\"psi\",\"version\":\"0.0.1\",\"attr_paths\":[\"input/receiver_input/key\",\"input/sender_input/key\",\"protocol\",\"precheck_input\",\"bucket_size\",\"curve_type\"],\"attrs\":[{\"ss\":[\"id1\"]},{\"ss\":[\"id2\"]},{\"s\":\"ECDH_PSI_2PC\"},{\"b\":true},{\"i64\":\"1048576\"},{\"s\":\"CURVE_FOURQ\"}]},\"sf_input_ids\":[\"alice-table\",\"bob-table\"],\"sf_output_ids\":[\"psi-output\"],\"sf_output_uris\":[\"psi-output.csv\"]}", + "task_input_config": "{\"sf_datasource_config\":{\"alice\":{\"id\":\"default-data-source\"},\"bob\":{\"id\":\"default-data-source\"}},\"sf_cluster_desc\":{\"parties\":[\"alice\",\"bob\"],\"devices\":[{\"name\":\"spu\",\"type\":\"spu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"runtime_config\\\":{\\\"protocol\\\":\\\"REF2K\\\",\\\"field\\\":\\\"FM64\\\"},\\\"link_desc\\\":{\\\"connect_retry_times\\\":60,\\\"connect_retry_interval_ms\\\":1000,\\\"brpc_channel_protocol\\\":\\\"http\\\",\\\"brpc_channel_connection_type\\\":\\\"pooled\\\",\\\"recv_timeout_ms\\\":1200000,\\\"http_timeout_ms\\\":1200000}}\"},{\"name\":\"heu\",\"type\":\"heu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"mode\\\": \\\"PHEU\\\", \\\"schema\\\": \\\"paillier\\\", \\\"key_size\\\": 2048}\"}],\"ray_fed_config\":{\"cross_silo_comm_backend\":\"brpc_link\"}},\"sf_node_eval_param\":{\"domain\":\"data_prep\",\"name\":\"psi\",\"version\":\"0.0.4\",\"attr_paths\":[\"input/receiver_input/key\",\"input/sender_input/key\",\"protocol\",\"precheck_input\",\"bucket_size\",\"curve_type\",\"left_side\"],\"attrs\":[{\"ss\":[\"id1\"]},{\"ss\":[\"id2\"]},{\"s\":\"PROTOCOL_ECDH\"},{\"b\":true},{\"i64\":\"1048576\"},{\"s\":\"CURVE_FOURQ\"},{\"is_na\": false,\"ss\": [\"alice\"]}]},\"sf_input_ids\":[\"alice-table\",\"bob-table\"],\"sf_output_ids\":[\"psi-output\"],\"sf_output_uris\":[\"psi-output.csv\"]}", "priority": 100 }, { @@ -220,7 +226,7 @@ curl -k -X POST 'https://localhost:8082/api/v1/job/query' \ "dependencies": [ "job-psi" ], - "task_input_config": "{\"sf_datasource_config\":{\"alice\":{\"id\":\"default-data-source\"},\"bob\":{\"id\":\"default-data-source\"}},\"sf_cluster_desc\":{\"parties\":[\"alice\",\"bob\"],\"devices\":[{\"name\":\"spu\",\"type\":\"spu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"runtime_config\\\":{\\\"protocol\\\":\\\"REF2K\\\",\\\"field\\\":\\\"FM64\\\"},\\\"link_desc\\\":{\\\"connect_retry_times\\\":60,\\\"connect_retry_interval_ms\\\":1000,\\\"brpc_channel_protocol\\\":\\\"http\\\",\\\"brpc_channel_connection_type\\\":\\\"pooled\\\",\\\"recv_timeout_ms\\\":1200000,\\\"http_timeout_ms\\\":1200000}}\"},{\"name\":\"heu\",\"type\":\"heu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"mode\\\": \\\"PHEU\\\", \\\"schema\\\": \\\"paillier\\\", \\\"key_size\\\": 2048}\"}],\"ray_fed_config\":{\"cross_silo_comm_backend\":\"brpc_link\"}},\"sf_node_eval_param\":{\"domain\":\"preprocessing\",\"name\":\"train_test_split\",\"version\":\"0.0.1\",\"attr_paths\":[\"train_size\",\"test_size\",\"random_state\",\"shuffle\"],\"attrs\":[{\"f\":0.75},{\"f\":0.25},{\"i64\":1234},{\"b\":true}]},\"sf_output_uris\":[\"train-dataset.csv\",\"test-dataset.csv\"],\"sf_output_ids\":[\"train-dataset\",\"test-dataset\"],\"sf_input_ids\":[\"psi-output\"]}", + "task_input_config": "{\"sf_datasource_config\":{\"alice\":{\"id\":\"default-data-source\"},\"bob\":{\"id\":\"default-data-source\"}},\"sf_cluster_desc\":{\"parties\":[\"alice\",\"bob\"],\"devices\":[{\"name\":\"spu\",\"type\":\"spu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"runtime_config\\\":{\\\"protocol\\\":\\\"REF2K\\\",\\\"field\\\":\\\"FM64\\\"},\\\"link_desc\\\":{\\\"connect_retry_times\\\":60,\\\"connect_retry_interval_ms\\\":1000,\\\"brpc_channel_protocol\\\":\\\"http\\\",\\\"brpc_channel_connection_type\\\":\\\"pooled\\\",\\\"recv_timeout_ms\\\":1200000,\\\"http_timeout_ms\\\":1200000}}\"},{\"name\":\"heu\",\"type\":\"heu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"mode\\\": \\\"PHEU\\\", \\\"schema\\\": \\\"paillier\\\", \\\"key_size\\\": 2048}\"}],\"ray_fed_config\":{\"cross_silo_comm_backend\":\"brpc_link\"}},\"sf_node_eval_param\":{\"domain\":\"data_prep\",\"name\":\"train_test_split\",\"version\":\"0.0.1\",\"attr_paths\":[\"train_size\",\"test_size\",\"random_state\",\"shuffle\"],\"attrs\":[{\"f\":0.75},{\"f\":0.25},{\"i64\":1234},{\"b\":true}]},\"sf_output_uris\":[\"train-dataset.csv\",\"test-dataset.csv\"],\"sf_output_ids\":[\"train-dataset\",\"test-dataset\"],\"sf_input_ids\":[\"psi-output\"]}", "priority": 100 } ], @@ -725,26 +731,26 @@ curl -k -X POST 'https://localhost:8082/api/v1/job/cancel' \ ### PartyStatus -| 字段 | 类型 | 选填 | 描述 | -|-----------|-------------------------------------------|----|---------------| -| domain_id | string | 必填 | 节点 ID | +| 字段 | 类型 | 选填 | 描述 | +|-----------|-------------------------------------------|----|-----------------------------| +| domain_id | string | 必填 | 节点 ID | | state | string | 必填 | 参与方任务状态, 参考 [State](#state) | -| err_msg | string | 可选 | 错误信息 | -| endpoints | [JobPartyEndpoint](#job-party-endpoint)[] | 必填 | 应用对外暴露的访问地址信息 | +| err_msg | string | 可选 | 错误信息 | +| endpoints | [JobPartyEndpoint](#job-party-endpoint)[] | 必填 | 应用对外暴露的访问地址信息 | {#task} ### Task -| 字段 | 类型 | 选填 | 描述 | -|-------------------|-------------------|----|----------------------------------------------------------------------------------------------------------------------------| -| app_image | string | 必填 | 任务镜像 | -| parties | [Party](#party)[] | 必填 | 参与方节点 ID | -| alias | string | 必填 | 任务别名 | -| task_id | string | 必填 | 任务 ID,满足 [DNS 子域名规则要求](https://kubernetes.io/zh-cn/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names) | -| dependencies | string[] | 必填 | 依赖任务 | -| task_input_config | string | 必填 | 任务配置 | -| priority | string | 可选 | 优先级,值越大优先级越高 | +| 字段 | 类型 | 选填 | 描述 | +|-------------------|-------------------|----|----------------------------------------------------------------------------------------------------------------------------------------------------------| +| app_image | string | 必填 | 任务镜像 | +| parties | [Party](#party)[] | 必填 | 参与方节点 ID | +| alias | string | 必填 | 任务别名,同一个 Job 中唯一,满足 [DNS 子域名规则要求](https://kubernetes.io/zh-cn/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names) | +| task_id | string | 可选 | 任务 ID,如果不填,Kuscia 将随机生成唯一的 task_id ,满足 [DNS 子域名规则要求](https://kubernetes.io/zh-cn/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names) | +| dependencies | string[] | 必填 | 依赖任务,通过 alias 字段来编排 Job 中 Task 之间的依赖关系 | +| task_input_config | string | 必填 | 任务配置 | +| priority | string | 可选 | 优先级,值越大优先级越高 | {#task-config} @@ -755,7 +761,7 @@ curl -k -X POST 'https://localhost:8082/api/v1/job/cancel' \ | app_image | string | 必填 | 任务镜像 | | parties | [Party](#party)[] | 必填 | 参与方 | | alias | string | 必填 | 任务别名 | -| task_id | string | 必填 | 任务 ID | +| task_id | string | 可选 | 任务 ID | | dependencies | string[] | 必填 | 依赖任务 | | task_input_config | string | 必填 | 任务配置 | | priority | string | 可选 | 优先级,值越大优先级越高 | @@ -764,15 +770,16 @@ curl -k -X POST 'https://localhost:8082/api/v1/job/cancel' \ ### TaskStatus -| 字段 | 类型 | 选填 | 描述 | -|-------------|--------------------------------|----|---------------------------| -| task_id | string | 可选 | 任务 ID | -| state | string | 必填 | 任务状态,参考 [State](#state) | -| err_msg | string | 可选 | 错误信息 | -| create_time | string | 必填 | 创建事件 | -| start_time | string | 必填 | 开始事件 | -| end_time | string | 可选 | 结束事件 | -| parties | [PartyStatus](#party-status)[] | 必填 | 参与方 | +| 字段 | 类型 | 选填 | 描述 | +|-------------|--------------------------------|----|-------------------------| +| task_id | string | 可选 | 任务 ID | +| alias | string | 必填 | 任务别名 | +| state | string | 必填 | 任务状态,参考 [State](#state) | +| err_msg | string | 可选 | 错误信息 | +| create_time | string | 必填 | 创建事件 | +| start_time | string | 必填 | 开始事件 | +| end_time | string | 可选 | 结束事件 | +| parties | [PartyStatus](#party-status)[] | 必填 | 参与方 | {#event-type} diff --git a/docs/reference/concepts/kusciajob_cn.md b/docs/reference/concepts/kusciajob_cn.md index ea44cf09..219e2d97 100644 --- a/docs/reference/concepts/kusciajob_cn.md +++ b/docs/reference/concepts/kusciajob_cn.md @@ -64,7 +64,7 @@ spec: - taskID: job-psi alias: job-psi priority: 100 - taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.1","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"ECDH_PSI_2PC"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' + taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.4","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type","left_side"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"PROTOCOL_ECDH"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"},{"is_na": false,"ss": ["alice"]}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' appImage: secretflow-image parties: - domainID: alice @@ -94,6 +94,20 @@ spec: 计算出两方的交集数据并分别保存为 alice 和 bob 的 psi-output.csv ,而 job-split 将会读取新产生的两个求交数据,并进行随机分割, 随机分割的结果也会保存在 alice 和 bob 两方。 +:::{tip} + +KusciaJob 的算子参数由 `taskInputConfig` 字段定义,对于不同的算子,算子的参数不同。 + +本教程使用的是 SecretFlow 的算子参数定义,以 SecretFlow `1.5.0b0` 版本引擎任务为例,其他版本请参考[SecretFlow 官网](https://www.secretflow.org.cn/zh-CN/docs/secretflow/main/getting_started): +- `sf_datasource_config`:表示 SecretFlow 输入输出所需要的节点数据源信息。 +- `sf_cluster_desc`:表示 SecretFlow 集群信息,详情请查阅 [SecretFlow 集群文档](https://www.secretflow.org.cn/docs/secretflow/latest/zh-Hans/component/comp_spec_design#sfclusterdesc)。 +- `sf_node_eval_param`:表示 SecretFlow 算子的详细配置,详情请查阅 [SecretFlow 算子运行配置文档](https://www.secretflow.org.cn/docs/spec/latest/zh-Hans/intro#nodeevalparam)。 +- `sf_input_ids`:表示 SecretFlow 输入数据 `id`,SecretFlow 引擎会将 Kuscia 定义的输入数据 [DomainData](../reference/concepts/domaindata_cn.md) 转换成引擎所需要的 [DistData](https://www.secretflow.org.cn/docs/spec/latest/zh-Hans/spec#distdata)。 +- `sf_output_ids`:表示 SecretFlow 输出数据 `id`,SecretFlow 引擎会将输出的 [DistData](https://www.secretflow.org.cn/docs/spec/latest/zh-Hans/spec#distdata) 转换成 Kuscia 的 [DomainData](../reference/concepts/domaindata_cn.md)。 +- `sf_output_uris`:表示 SecretFlow 输出数据路径。 + +::: + ## KusciaJob 的调度 {#task-classification} @@ -155,7 +169,7 @@ spec: - alias: job-psi taskID: job-psi priority: 100 - taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.1","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"ECDH_PSI_2PC"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' + taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.4","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type","left_side"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"PROTOCOL_ECDH"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"},{"is_na": false,"ss": ["alice"]}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' appImage: secretflow-image parties: - domainID: alice @@ -198,7 +212,7 @@ spec: - alias: job-psi1 taskID: job-psi1 priority: 100 - taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.1","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"ECDH_PSI_2PC"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' + taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.4","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type","left_side"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"PROTOCOL_ECDH"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"},{"is_na": false,"ss": ["alice"]}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' appImage: secretflow-image parties: - domainID: alice @@ -206,7 +220,7 @@ spec: - alias: job-psi2 taskID: job-psi2 priority: 80 - taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.1","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"ECDH_PSI_2PC"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output2"],"sf_output_uris":["psi-output2.csv"]}' + taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.4","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type","left_side"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"PROTOCOL_ECDH"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"},{"is_na": false,"ss": ["alice"]}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output2"],"sf_output_uris":["psi-output2.csv"]}' appImage: secretflow-image parties: - domainID: alice @@ -236,7 +250,7 @@ spec: - alias: job-psi1 taskID: job-psi1 priority: 100 - taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.1","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"ECDH_PSI_2PC"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' + taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.4","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type","left_side"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"PROTOCOL_ECDH"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"},{"is_na": false,"ss": ["alice"]}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' appImage: secretflow-image parties: - domainID: alice @@ -244,7 +258,7 @@ spec: - alias: job-psi2 taskID: job-psi2 priority: 80 - taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.1","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"ECDH_PSI_2PC"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output1"],"sf_output_uris":["psi-output1.csv"]}' + taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.4","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type","left_side"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"PROTOCOL_ECDH"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"},{"is_na": false,"ss": ["alice"]}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output1"],"sf_output_uris":["psi-output1.csv"]}' appImage: secretflow-image parties: - domainID: alice @@ -305,7 +319,7 @@ spec: - alias: job-psi taskID: job-psi priority: 100 - taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.1","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"ECDH_PSI_2PC"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' + taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.4","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type","left_side"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"PROTOCOL_ECDH"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"},{"is_na": false,"ss": ["alice"]}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' appImage: secretflow-image parties: - domainID: alice diff --git a/docs/reference/concepts/kusciatask_cn.md b/docs/reference/concepts/kusciatask_cn.md index 3c68f0d9..f946cdb1 100644 --- a/docs/reference/concepts/kusciatask_cn.md +++ b/docs/reference/concepts/kusciatask_cn.md @@ -29,7 +29,7 @@ spec: domainID: alice - appImageRef: secretflow-image domainID: bob - taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.1","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"ECDH_PSI_2PC"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' + taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.4","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type","left_side"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"PROTOCOL_ECDH"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"},{"is_na": false,"ss": ["alice"]}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' ``` 在该示例中: @@ -43,165 +43,6 @@ spec: - `.spec.parties[1].appImageRef`:表示节点标识为 `alice` 的任务参与方所依赖的应用镜像 AppImage 名称为 `secretflow-image` 。 - `.spec.taskInputConfig`:表示任务输入参数配置。 -1. 运行以下命令创建 KusciaTask。 - -```shell -kubectl apply -f secretflow-task-psi.yaml -``` - -## 查看 KusciaTask - -下面以 KusciaTask `secretflow-task-psi` 为例,介绍如何查看任务运行状态。 - -1. 运行以下命令查看 KusciaTask。 - -```shell -kubectl get kt secretflow-task-psi -n cross-domain -NAME STARTTIME COMPLETIONTIME LASTRECONCILETIME PHASE -secretflow-task-psi 7s 7s 7s Succeeded -``` - -上述命令输出内容,各个列字段的含义如下: -- `NAME`:表示 KusciaTask 的名称,当前示例为 `secretflow-task-psi` 。 -- `STARTTIME`:表示 KusciaTask 从开始执行到现在经历的时间。 -- `COMPLETIONTIME`:表示 KusciaTask 从完成执行到现在经历的时间。 -- `LASTRECONCILETIME`:表示 KusciaTask 从上次被更新到现在经历的时间。 -- `PHASE`:表示 KusciaTask 当前所处的阶段。当前示例阶段为 `Succeeded` 。 - -2. 运行以下命令查看 KusciaTask 详细的状态信息。 - -```shell -kubectl get kt secretflow-task-psi -n cross-domain -o jsonpath={.status} | jq -{ - "completionTime": "2023-08-21T07:43:34Z", - "conditions": [ - { - "lastTransitionTime": "2023-08-21T07:43:15Z", - "status": "True", - "type": "ResourceCreated" - }, - { - "lastTransitionTime": "2023-08-21T07:43:15Z", - "status": "True", - "type": "Running" - }, - { - "lastTransitionTime": "2023-08-21T07:43:34Z", - "status": "True", - "type": "Success" - } - ], - "lastReconcileTime": "2023-08-21T07:43:34Z", - "partyTaskStatus": [ - { - "domainID": "alice", - "phase": "Succeeded" - }, - { - "domainID": "bob", - "phase": "Succeeded" - } - ], - "phase": "Succeeded", - "podStatuses": { - "alice/secretflow-task-psi-0": { - "createTime": "2023-08-21T07:43:15Z", - "namespace": "alice", - "nodeName": "070a9fc7ff24", - "podName": "secretflow-task-psi-0", - "podPhase": "Succeeded", - "readyTime": "2023-08-21T07:43:18Z", - "reason": "Completed", - "startTime": "2023-08-21T07:43:17Z" - }, - "bob/secretflow-task-psi-0": { - "createTime": "2023-08-21T07:43:15Z", - "namespace": "bob", - "nodeName": "dd3bdda2b853", - "podName": "secretflow-task-psi-0", - "podPhase": "Succeeded", - "readyTime": "2023-08-21T07:43:18Z", - "reason": "Completed", - "startTime": "2023-08-21T07:43:17Z" - } - }, - "serviceStatuses": { - "alice/secretflow-task-psi-0-fed": { - "createTime": "2023-08-21T07:43:15Z", - "namespace": "alice", - "portName": "fed", - "portNumber": 8080, - "readyTime": "2023-08-21T07:43:18Z", - "scope": "Cluster", - "serviceName": "secretflow-task-psi-0-fed" - }, - "alice/secretflow-task-psi-0-global": { - "createTime": "2023-08-21T07:43:15Z", - "namespace": "alice", - "portName": "global", - "portNumber": 8081, - "readyTime": "2023-08-21T07:43:18Z", - "scope": "Domain", - "serviceName": "secretflow-task-psi-0-global" - }, - "alice/secretflow-task-psi-0-spu": { - "createTime": "2023-08-21T07:43:15Z", - "namespace": "alice", - "portName": "spu", - "portNumber": 54509, - "readyTime": "2023-08-21T07:43:18Z", - "scope": "Cluster", - "serviceName": "secretflow-task-psi-0-spu" - }, - "bob/secretflow-task-psi-0-fed": { - "createTime": "2023-08-21T07:43:15Z", - "namespace": "bob", - "portName": "fed", - "portNumber": 8080, - "readyTime": "2023-08-21T07:43:18Z", - "scope": "Cluster", - "serviceName": "secretflow-task-psi-0-fed" - }, - "bob/secretflow-task-psi-0-global": { - "createTime": "2023-08-21T07:43:15Z", - "namespace": "bob", - "portName": "global", - "portNumber": 8081, - "readyTime": "2023-08-21T07:43:18Z", - "scope": "Domain", - "serviceName": "secretflow-task-psi-0-global" - }, - "bob/secretflow-task-psi-0-spu": { - "createTime": "2023-08-21T07:43:15Z", - "namespace": "bob", - "portName": "spu", - "portNumber": 54509, - "readyTime": "2023-08-21T07:43:18Z", - "scope": "Cluster", - "serviceName": "secretflow-task-psi-0-spu" - } - }, - "startTime": "2023-08-21T07:43:15Z" -} -``` - -## 清理 KusciaTask - -下面以 KusciaTask `secretflow-task-psi` 为例,介绍如何清理 KusciaTask。 - -1. 运行以下命令清理 KusciaTask。 - -```shell -kubectl delete kt secretflow-task-psi -n cross-domain -``` - -2. 检查 KusciaTask 是否已被清理。 - -```shell -kubectl get kt secretflow-task-psi -n cross-domain -Error from server (NotFound): kusciatasks.kuscia.secretflow "secretflow-task-psi" not found -``` - ## 参考 下面以 `task-template` 模版为例,介绍 KusciaTask 所包含的完整字段。 @@ -219,7 +60,7 @@ spec: resourceReservedSeconds: 30 lifecycleSeconds: 300 retryIntervalSeconds: 15 - taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.1","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"ECDH_PSI_2PC"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' + taskInputConfig: '{"sf_datasource_config":{"alice":{"id":"default-data-source"},"bob":{"id":"default-data-source"}},"sf_cluster_desc":{"parties":["alice","bob"],"devices":[{"name":"spu","type":"spu","parties":["alice","bob"],"config":"{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}"},{"name":"heu","type":"heu","parties":["alice","bob"],"config":"{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}"}],"ray_fed_config":{"cross_silo_comm_backend":"brpc_link"}},"sf_node_eval_param":{"domain":"data_prep","name":"psi","version":"0.0.4","attr_paths":["input/receiver_input/key","input/sender_input/key","protocol","precheck_input","bucket_size","curve_type","left_side"],"attrs":[{"ss":["id1"]},{"ss":["id2"]},{"s":"PROTOCOL_ECDH"},{"b":true},{"i64":"1048576"},{"s":"CURVE_FOURQ"},{"is_na": false,"ss": ["alice"]}]},"sf_input_ids":["alice-table","bob-table"],"sf_output_ids":["psi-output"],"sf_output_uris":["psi-output.csv"]}' parties: - domainID: alice appImageRef: app-template diff --git a/docs/reference/overview.md b/docs/reference/overview.md index ce1e123c..67cdd9a0 100644 --- a/docs/reference/overview.md +++ b/docs/reference/overview.md @@ -51,6 +51,7 @@ Kuscia 编排任务时期,即当前开源的 Kuscia 。经历了 Coordinator Kuscia Layer 通过这样的分层设计,让每一层聚焦于自己本层的核心职责。平台层专注于终端用户体验及实际的业务场景适配。Kuscia 层专注于解决隐私计算技术生产落地遇到的问题,引擎开发者专注于隐私计算任务核心逻辑的开发,无需考虑复杂的跨域网络环境及不同机构可能存在的异构数据源等问题。 + {#why-kuscia} ## 为什么需要 Kuscia 从一个隐私计算引擎开发完成到真正的生产级可用有多远,Kuscia 在隐私计算任务部署、执行、运维等整个生命周期中解决了哪些问题? diff --git a/docs/reference/troubleshoot/docker_memory_limit.md b/docs/reference/troubleshoot/docker_memory_limit.md new file mode 100644 index 00000000..417ee644 --- /dev/null +++ b/docs/reference/troubleshoot/docker_memory_limit.md @@ -0,0 +1,14 @@ +# 如何通过 Docker 命令对已部署的节点进行 Memory 扩容 + +## 背景 +在使用脚本部署 Kuscia 时,可以使用 -m 或者 --memory-limit 参数给节点容器设置适当的内存限制。例如,"-m 4GiB 或 --memory-limit=4GiB" 表示限制最大内存 4GiB,"-m -1 或 --memory-limit=-1"表示没有限制,不设置默认 master 为 2GiB,lite 节点 4GiB,autonomy 节点 6GiB。如果当您的节点已经部署好了,但是遇到内存限制不符合需求时,您可以通过 Docker 命令对已部署的节点进行 Memory 扩容。 + +## 步骤 +1. 运行以下命令来查看当前节点的内存限制: +```bash +docker inspect ${container_name} --format '{{.HostConfig.Memory}}' +``` +2. 根据需要调整内存限制。例如,如果您需要增加内存限制到 20GiB,您可以运行以下命令: +```bash +docker update ${container_name} --memory=20GiB --memory-swap=20GiB +``` \ No newline at end of file diff --git a/docs/reference/troubleshoot/index.rst b/docs/reference/troubleshoot/index.rst index fa0a6fd6..e432de56 100644 --- a/docs/reference/troubleshoot/index.rst +++ b/docs/reference/troubleshoot/index.rst @@ -15,4 +15,5 @@ userdefinedserviceroute private_key_loss protocol_describe - docker_cpp_copy \ No newline at end of file + docker_cpp_copy + docker_memory_limit \ No newline at end of file diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst index 63c0f0ca..9c4b1149 100644 --- a/docs/tutorial/index.rst +++ b/docs/tutorial/index.rst @@ -8,6 +8,7 @@ run_sf_serving_with_api_cn run_bfia_job_cn run_fate_cn + run_scql_on_kuscia_cn .. toctree:: :maxdepth: 1 diff --git a/docs/tutorial/run_scql_on_kuscia_cn.md b/docs/tutorial/run_scql_on_kuscia_cn.md new file mode 100644 index 00000000..a36d4157 --- /dev/null +++ b/docs/tutorial/run_scql_on_kuscia_cn.md @@ -0,0 +1,692 @@ +# 如何在 Kuscia 上运行 SCQL 联合分析任务 +本教程将以 [KusciaAPI](../reference/apis/summary_cn.md) 创建本地数据源作为示例,介绍如何在 Kuscia 上运行 SCQL 联合分析任务。 + +## 准备节点 +- 体验部署请选择[快速入门](../getting_started/quickstart_cn.md)。 +- 生产部署请选择[多机部署](../deployment/Docker_deployment_kuscia/index.rst)。 + +本示例在**点对点组网模式**下完成。在中心化组网模式下,证书的配置会有所不同。 + +{#cert-and-token} + +## 获取 KusciaAPI 证书和 Token + +在下面[准备数据](./run_scql_on_kuscia_cn.md#alice-准备测试数据)步骤中需要使用到 KusciaAPI,如果 KusciaAPI 启用了 MTLS 协议,则需要提前准备好 MTLS 证书和 Token。协议参考[这里](../reference/troubleshoot/protocol_describe.md)。 + +### 点对点组网模式 + +证书的配置参考[配置授权](../deployment/Docker_deployment_kuscia/deploy_p2p_cn.md#配置授权) + +这里以 alice 节点为例,接口需要的证书文件在 ${USER}-kuscia-autonomy-alice 节点的`/home/kuscia/var/certs/`目录下: + +| 文件名 | 文件功能 | +| -------------------- | ------------------------------------------------------- | +| kusciaapi-server.key | 服务端私钥文件 | +| kusciaapi-server.crt | 服务端证书文件 | +| ca.crt | CA 证书文件 | +| token | 认证 Token ,在 headers 中添加 Token: { token 文件内容} | + +### 中心化组网模式 + +证书文件在 ${USER}-kuscia-master 节点的`/home/kuscia/var/certs/`目录下: + +| 文件名 | 文件功能 | +| -------------------- | ------------------------------------------------------- | +| kusciaapi-server.key | 服务端私钥文件 | +| kusciaapi-server.crt | 服务端证书文件 | +| ca.crt | CA 证书文件 | +| token | 认证 Token ,在 headers 中添加 Token: { token 文件内容} | + +## 准备数据 + +你可以使用本文示例的测试数据文件,或者使用你自己的数据文件。 + +在 Kuscia 中,在节点容器的 `/home/kuscia/var/storage` 目录存放内置测试数据文件,下面 alice 和 bob 节点分别使用的是 scql-alice.csv 和 scql-bob.csv,你可以在容器中查看这两个数据文件。 +### 准备测试数据 +#### Alice 准备测试数据 +1.这里以 Docker 部署模式为例,登录到 alice 节点中 +```bash +docker exec -it ${USER}-kuscia-autonomy-alice bash +``` + +2.创建 DomainDataSource + +下面 datasource_id 名称以 scql-demo-local-datasource 为例: +```bash +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -k -X POST 'https://localhost:8082/api/v1/domaindatasource/create' \ + --header "Token: $(cat ${CTR_CERTS_ROOT}/token)" \ + --header 'Content-Type: application/json' \ + --cert ${CTR_CERTS_ROOT}/kusciaapi-server.crt \ + --key ${CTR_CERTS_ROOT}/kusciaapi-server.key \ + --cacert ${CTR_CERTS_ROOT}/ca.crt \ + -d '{ + "domain_id": "alice", + "datasource_id":"scql-demo-local-datasource", + "type":"localfs", + "name": "DemoDataSource", + "info": { + "localfs": { + "path": "/home/kuscia/var/storage/data" + } + }, + "access_directly": true +}' +``` +3.创建 DomainData + +下面 domaindata_id 名称以 scql-alice-table 为例: +```bash +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -k -X POST 'https://localhost:8082/api/v1/domaindata/create' \ + --header "Token: $(cat ${CTR_CERTS_ROOT}/token)" \ + --header 'Content-Type: application/json' \ + --cert ${CTR_CERTS_ROOT}/kusciaapi-server.crt \ + --key ${CTR_CERTS_ROOT}/kusciaapi-server.key \ + --cacert ${CTR_CERTS_ROOT}/ca.crt \ + -d '{ + "domain_id": "alice", + "domaindata_id": "scql-alice-table", + "datasource_id": "scql-demo-local-datasource", + "name": "alice001", + "type": "table", + "relative_uri": "scql-alice.csv", + "columns": [ + { + "name": "ID", + "type": "str" + }, + { + "name": "credit_rank", + "type": "int" + }, + { + "name": "income", + "type": "int" + }, + { + "name": "age", + "type": "int" + } + ] +}' +``` +#### Bob 准备测试数据 +1.这里以 Docker 部署模式为例,登录到 bob 节点中 +```bash +docker exec -it ${USER}-kuscia-autonomy-bob bash +``` + +2.创建 DomainDataSource + +下面 datasource_id 名称以 scql-demo-local-datasource 为例: +```bash +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -k -X POST 'https://localhost:8082/api/v1/domaindatasource/create' \ + --header "Token: $(cat ${CTR_CERTS_ROOT}/token)" \ + --header 'Content-Type: application/json' \ + --cert ${CTR_CERTS_ROOT}/kusciaapi-server.crt \ + --key ${CTR_CERTS_ROOT}/kusciaapi-server.key \ + --cacert ${CTR_CERTS_ROOT}/ca.crt \ + -d '{ + "domain_id": "bob", + "datasource_id":"scql-demo-local-datasource", + "type":"localfs", + "name": "DemoDataSource", + "info": { + "localfs": { + "path": "/home/kuscia/var/storage/data" + } + }, + "access_directly": true +}' +``` +3.创建 DomainData + +下面 domaindata_id 名称以 scql-bob-table 为例: +```bash +export CTR_CERTS_ROOT=/home/kuscia/var/certs +curl -k -X POST 'https://localhost:8082/api/v1/domaindata/create' \ + --header "Token: $(cat ${CTR_CERTS_ROOT}/token)" \ + --header 'Content-Type: application/json' \ + --cert ${CTR_CERTS_ROOT}/kusciaapi-server.crt \ + --key ${CTR_CERTS_ROOT}/kusciaapi-server.key \ + --cacert ${CTR_CERTS_ROOT}/ca.crt \ + -d '{ + "domain_id": "bob", + "domaindata_id": "scql-bob-table", + "datasource_id": "scql-demo-local-datasource", + "name": "bob001", + "type": "table", + "relative_uri": "scql-bob.csv", + "columns": [ + { + "name": "ID", + "type": "str" + }, + { + "name": "order_amount", + "type": "int" + }, + { + "name": "is_active", + "type": "int" + } + ] +}' +``` + +## 部署 SCQL +### Alice 部署 SCQL +1.登陆到 alice 节点容器中 +```bash +docker exec -it ${USER}-kuscia-autonomy-alice bash +``` +如果是中心化组网模式,则需要登录到 master 节点容器中。 +```bash +docker exec -it ${USER}-kuscia-master bash +``` + +2.获取 SCQL 应用的镜像模版 AppImage + +从 SCQL 官方文档中,获取 AppImage 具体内容,并将其内容保存到 scql-image.yaml 文件中。 具体模版内容,可参考 [SCQL AppImage](https://www.secretflow.org.cn/zh-CN/docs/scql/main/topics/deployment/run-scql-on-kuscia)。 + +> 注意: +> +> 1. 如果 `secretflow/scql` 仓库访问网速较慢,可以替换为 `secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/scql`。 +> 2. 请删除 `#--datasource_router=kusciadatamesh` 代码行前面的 # 符号,以启用 Datamesh 本地数据源配置。 +> 3. 在 `engineConf` 字段加上 `--enable_restricted_read_path=false` 限制 csv 文件的读取路径 + +3.创建 SCQL 应用的镜像模版 AppImage +```bash +kubectl apply -f scql-image.yaml +``` + +4.部署 Broker +```bash +kubectl apply -f /home/kuscia/scripts/templates/scql/broker_alice.yaml +``` + +### Bob 部署 SCQL +1.登陆到 bob 节点容器中 +```bash +docker exec -it ${USER}-kuscia-autonomy-bob bash +``` +如果是中心化组网模式,则需要登录到 master 节点容器中。 +```bash +docker exec -it ${USER}-kuscia-master bash +``` + +2.获取 SCQL 应用的镜像模版 AppImage + +从 SCQL 官方文档中,获取 AppImage 具体内容,并将其内容保存到 scql-image.yaml 文件中。 具体模版内容,可参考 [SCQL AppImage](https://www.secretflow.org.cn/zh-CN/docs/scql/main/topics/deployment/run-scql-on-kuscia)。 + +> 注意: +> +> 1. 如果 `secretflow/scql` 仓库访问网速较慢,可以替换为 `secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/scql`。 +> 2. 请删除 `#--datasource_router=kusciadatamesh` 代码行前面的 # 符号,以启用 Datamesh 本地数据源配置。 +> 3. 在 `engineConf` 字段加上 `--enable_restricted_read_path=false` 限制 csv 文件的读取路径 + +3.创建 SCQL 应用的镜像模版 AppImage +```bash +kubectl apply -f appimage.yaml +``` + +4.部署 Broker +```bash +kubectl apply -f /home/kuscia/scripts/templates/scql/broker_bob.yaml +``` + +### 查看 broker 是否部署成功 +下面以 alice 节点为例,bob 节点类似 +```bash +docker exec -it ${USER}-kuscia-autonomy-alice kubectl get po -A + +# Pod 状态为 Running 时,表示部署成功: +NAMESPACE NAME READY STATUS RESTARTS AGE +alice scql-broker-6f4f85b64f-fsgq8 1/1 Running 0 2m42s +``` + +## 使用 SCQL 进行联合分析 +下面仅以流程步骤作为示例展示,更多接口参数请参考 [SCQL API](https://www.secretflow.org.cn/zh-CN/docs/scql/main/reference/broker-api)。 + +### 创建项目并邀请参与方加入 +#### Alice 创建项目,并邀请 Bob 加入 +1.登录到 alice 节点容器中 +```bash +docker exec -it ${USER}-kuscia-autonomy-alice bash +``` + +2.创建项目 + +下面项目名称以 "demo" 为例: +```bash +curl -X POST http://127.0.0.1:80/intra/project/create \ +--header "host: scql-broker-intra.alice.svc" \ +--header "kuscia-source: alice" \ +-d '{ + "project_id":"demo", + "name":"demo", + "conf":{ + "spu_runtime_cfg":{ + "protocol":"SEMI2K", + "field":"FM64" + } + }, + "description":"this is a project" +}' +``` + +3.查看项目 +```bash +curl -X POST http://127.0.0.1:80/intra/project/list \ +--header "host: scql-broker-intra.alice.svc" \ +--header "kuscia-source: alice" +``` + +4.邀请 bob 加入到 "demo" 项目中 +```bash +curl -X POST http://127.0.0.1:80/intra/member/invite \ +--header "host: scql-broker-intra.alice.svc" \ +--header "kuscia-source: alice" \ +-d '{ + "invitee": "bob", + "project_id": "demo" +}' +``` + +5.查看邀请状态 +```bash +curl -X POST http://127.0.0.1:80/intra/invitation/list \ +--header "host: scql-broker-intra.alice.svc" \ +--header "kuscia-source: alice" +``` + +#### Bob 接受邀请 +1.登录到 bob 节点容器中 +```bash +docker exec -it ${USER}-kuscia-autonomy-bob bash +``` +2.bob 接受 alice 的入项邀请 +```bash +curl -X POST http://127.0.0.1:80/intra/invitation/process \ +--header "host: scql-broker-intra.bob.svc" \ +--header "kuscia-source: bob" \ +-d '{ + "invitation_id":1, + "respond":0 +}' +``` + +### 创建数据表 +#### Alice 创建数据表 +1.登录到 alice 节点容器中 +```bash +docker exec -it ${USER}-kuscia-autonomy-alice bash +``` + +2.创建数据表 + +下面 tabel_name 以 ta 为例,ref_table 参数的值为[创建 DomainData](./run_scql_on_kuscia_cn.md#alice-准备测试数据)时的 `domaindata_id` +```bash +curl -X POST http://127.0.0.1:80/intra/table/create \ +--header "host: scql-broker-intra.alice.svc" \ +--header "kuscia-source: alice" \ +-H "Content-Type: application/json" \ +-d '{ + "project_id": "demo", + "table_name": "ta", + "ref_table": "scql-alice-table", + "db_type": "csvdb", + "columns": [ + {"name":"ID","dtype":"string"}, + {"name":"credit_rank","dtype":"int"}, + {"name":"income","dtype":"int"}, + {"name":"age","dtype":"int"} + ] +}' +``` + +#### Bob 创建数据表 +1.登录到 bob 节点容器中 +```bash + docker exec -it ${USER}-kuscia-autonomy-bob bash +``` + +2.创建数据表 + +下面 tabel_name 以 ta 为例,ref_table 参数的值为[创建 DomainData](./run_scql_on_kuscia_cn.md#bob-准备测试数据)时的 `domaindata_id` +```bash +curl -X POST http://127.0.0.1:80/intra/table/create \ +--header "host: scql-broker-intra.bob.svc" \ +--header "kuscia-source: bob" \ +-H "Content-Type: application/json" \ +-d '{ + "project_id": "demo", + "table_name": "tb", + "ref_table": "scql-bob-table", + "db_type": "csvdb", + "columns": [ + {"name":"ID","dtype":"string"}, + {"name":"order_amount","dtype":"double"}, + {"name":"is_active","dtype":"int"} + ] +}' +``` + +### 查看数据表 +下面以 alice 为例,bob 节点类似 +```bash +curl -X POST http://127.0.0.1:80/intra/table/list \ +--header "host: scql-broker-intra.alice.svc" \ +--header "kuscia-source: alice" \ +-H "Content-Type: application/json" \ +-d '{ + "project_id": "demo" +}' +``` + +### 删除数据表 +若想删除创建的数据表时,可以参考下面命令。以 alice 节点为例,bob 节点类似。 +```bash +curl -X POST http://127.0.0.1:80/intra/table/drop \ +--header "host: scql-broker-intra.alice.svc" \ +--header "kuscia-source: alice" \ +-H "Content-Type: application/json" \ +-d '{ + "project_id": "demo", + "table_name":"ta" +}' +``` + +### 数据表授权 +#### Alice 的数据表授权 +1.将 ta 数据表授权给 alice +```bash +curl -X POST http://127.0.0.1:80/intra/ccl/grant \ +--header "host: scql-broker-intra.alice.svc" \ +--header "kuscia-source: alice" \ +-H "Content-Type: application/json" \ +-d '{ + "project_id": "demo", + "column_control_list":[ + {"col":{"column_name":"ID","table_name":"ta"},"party_code":"alice","constraint":1}, + {"col":{"column_name":"age","table_name":"ta"},"party_code":"alice","constraint":1}, + {"col":{"column_name":"income","table_name":"ta"},"party_code":"alice","constraint":1}, + {"col":{"column_name":"credit_rank","table_name":"ta"},"party_code":"alice","constraint":1} + ] +}' +``` + +2.将 ta 表授权给 bob 节点 +```bash +curl -X POST http://127.0.0.1:80/intra/ccl/grant \ +--header "host: scql-broker-intra.alice.svc" \ +--header "kuscia-source: alice" \ +-H "Content-Type: application/json" \ +-d '{ + "project_id": "demo", + "column_control_list":[ + {"col":{"column_name":"ID","table_name":"ta"},"party_code":"bob","constraint":1}, + {"col":{"column_name":"age","table_name":"ta"},"party_code":"bob","constraint":1}, + {"col":{"column_name":"income","table_name":"ta"},"party_code":"bob","constraint":1}, + {"col":{"column_name":"credit_rank","table_name":"ta"},"party_code":"bob","constraint":1} + ] +}' +``` +#### Bob 的数据表授权 +1.将 tb 表授权给 alice 节点 +```bash +curl -X POST http://127.0.0.1:80/intra/ccl/grant \ +--header "host: scql-broker-intra.bob.svc" \ +--header "kuscia-source: bob" \ +-H "Content-Type: application/json" \ +-d '{ + "project_id": "demo", + "column_control_list":[ + {"col":{"column_name":"ID","table_name":"tb"},"party_code":"alice","constraint":1}, + {"col":{"column_name":"is_active","table_name":"tb"},"party_code":"alice","constraint":1}, + {"col":{"column_name":"order_amount","table_name":"tb"},"party_code":"alice","constraint":1} + ] +}' +``` + +2.将 tb 表授权给 bob 节点 +```bash +curl -X POST http://127.0.0.1:80/intra/ccl/grant \ +--header "host: scql-broker-intra.bob.svc" \ +--header "kuscia-source: bob" \ +-H "Content-Type: application/json" \ +-d '{ + "project_id": "demo", + "column_control_list":[ + {"col":{"column_name":"ID","table_name":"tb"},"party_code":"bob","constraint":1}, + {"col":{"column_name":"is_active","table_name":"tb"},"party_code":"bob","constraint":1}, + {"col":{"column_name":"order_amount","table_name":"tb"},"party_code":"bob","constraint":1} + ] +}' +``` + +### 查看数据表授权 +下面以 alice 为例,bob 节点类似 +```bash +curl -X POST http://127.0.0.1:80/intra/ccl/show \ +--header "host: scql-broker-intra.alice.svc" \ +--header "kuscia-source: alice" \ +-H "Content-Type: application/json" \ +-d '{ + "project_id": "demo", + "tables":["ta"], + "dest_parties":["alice"] +}' +``` + +### 撤销数据表授权 +若想撤销数据表授权,那么可以参考下面命令。以 alice 节点为例,bob 节点类似。 +```bash +curl -X POST http://127.0.0.1:80/intra/ccl/revoke \ +--header "host: scql-broker-intra.alice.svc" \ +--header "kuscia-source: alice" \ +-H "Content-Type: application/json" \ +-d '{ + "project_id": "demo", + "column_control_list":[ + {"col":{"column_name":"ID","table_name":"ta"},"party_code":"alice","constraint":1}, + {"col":{"column_name":"age","table_name":"ta"},"party_code":"alice","constraint":1}, + {"col":{"column_name":"income","table_name":"ta"},"party_code":"alice","constraint":1}, + {"col":{"column_name":"credit_rank","table_name":"ta"},"party_code":"alice","constraint":1} + ] +}' +``` + +### 进行联合分析 +#### 同步查询 +下面以 alice 节点查询为例,bob 节点类似。 +```bash +curl -X POST http://127.0.0.1:80/intra/query \ +--header "host: scql-broker-intra.alice.svc" \ +--header "kuscia-source: alice" \ +-H "Content-Type: application/json" \ +-d '{ + "project_id": "demo", + "query":"SELECT ta.credit_rank, COUNT(*) as cnt, AVG(ta.income) as avg_income, AVG(tb.order_amount) as avg_amount FROM ta INNER JOIN tb ON ta.ID = tb.ID WHERE ta.age >= 20 AND ta.age <= 30 AND tb.is_active=1 GROUP BY ta.credit_rank;" +}' +``` +返回的成功结果如下: +```bash +{ + "status": { + "code": 0, + "message": "", + "details": [] + }, + "affected_rows": "0", + "warnings": [], + "cost_time_s": 7.171298774, + "out_columns": [{ + "name": "credit_rank", + "shape": { + "dim": [{ + "dim_value": "2" + }, { + "dim_value": "1" + }] + }, + "elem_type": "INT64", + "option": "VALUE", + "annotation": { + "status": "TENSORSTATUS_UNKNOWN" + }, + "int32_data": [], + "int64_data": ["6", "5"], + "float_data": [], + "double_data": [], + "bool_data": [], + "string_data": [], + "ref_num": 0 + }, { + "name": "cnt", + "shape": { + "dim": [{ + "dim_value": "2" + }, { + "dim_value": "1" + }] + }, + "elem_type": "INT64", + "option": "VALUE", + "annotation": { + "status": "TENSORSTATUS_UNKNOWN" + }, + "int32_data": [], + "int64_data": ["3", "1"], + "float_data": [], + "double_data": [], + "bool_data": [], + "string_data": [], + "ref_num": 0 + }, { + "name": "avg_income", + "shape": { + "dim": [{ + "dim_value": "2" + }, { + "dim_value": "1" + }] + }, + "elem_type": "FLOAT64", + "option": "VALUE", + "annotation": { + "status": "TENSORSTATUS_UNKNOWN" + }, + "int32_data": [], + "int64_data": [], + "float_data": [], + "double_data": [438000, 30070], + "bool_data": [], + "string_data": [], + "ref_num": 0 + }, { + "name": "avg_amount", + "shape": { + "dim": [{ + "dim_value": "2" + }, { + "dim_value": "1" + }] + }, + "elem_type": "FLOAT64", + "option": "VALUE", + "annotation": { + "status": "TENSORSTATUS_UNKNOWN" + }, + "int32_data": [], + "int64_data": [], + "float_data": [], + "double_data": [4060.6666666666665, 3598], + "bool_data": [], + "string_data": [], + "ref_num": 0 + }] +} +``` + +#### 异步查询 +下面以 alice 节点为例,bob 节点类似。 + +1.提交 query +```bash +curl -X POST http://127.0.0.1:80/intra/query/submit \ +--header "host: scql-broker-intra.alice.svc" \ +--header "kuscia-source: alice" \ +-H "Content-Type: application/json" \ +-d '{ + "project_id": "demo", + "query":"SELECT ta.credit_rank, COUNT(*) as cnt, AVG(ta.income) as avg_income, AVG(tb.order_amount) as avg_amount FROM ta INNER JOIN tb ON ta.ID = tb.ID WHERE ta.age >= 20 AND ta.age <= 30 AND tb.is_active=1 GROUP BY ta.credit_rank;" +}' +``` + +2.获取结果 +```bash +curl -X POST http://127.0.0.1:80/intra/query/fetch \ +--header "host: scql-broker-intra.alice.svc" \ +--header "kuscia-source: alice" \ +-H "Content-Type: application/json" \ +-d '{ + "job_id":"3c4723fb-9afa-11ee-8934-0242ac12000" +}' +``` + +## 参考 +### 常用命令 +查看 broker kd 状态: +```bash +docker exec -it ${USER}-kuscia-autonomy-alice kubectl get kd -n cross-domain +``` + +查看 broker deployment 状态 +```bash +docker exec -it ${USER}-kuscia-autonomy-alice kubectl get deployment -A +``` + +查看 broker 应用状态 +```bash +docker exec -it ${USER}-kuscia-autonomy-alice kubectl get po -A +``` + +查看 broker configmap +```bash +docker exec -it ${USER}-kuscia-autonomy-alice kubectl get cm scql-broker-configtemplate -n alice -oyaml +``` + +查看 appimage +```bash +docker exec -it ${USER}-kuscia-autonomy-alice kubectl get appimage +``` + +删除 broker +```bash +docker exec -it ${USER}-kuscia-autonomy-alice kubectl delete kd scql -n cross-domain +``` + +### 如何查看 SCQL 应用容器日志 +在 Kuscia 中,可以登陆到节点容器内查看 SCQL 应用容器的日志。具体方法如下。 + +1.登陆到节点容器中 + +下面以 alice 节点为例: +```bash +docker exec -it ${USER}-kuscia-autonomy-alice bash +``` + +2.查看日志 + +在目录 `/home/kuscia/var/stdout/pods` 下可以看到对应 SCQL Broker 和 Engine 应用容器的目录。后续进入到相应目录下,即可查看应用的日志。 +```bash +# 查看当前应用容器的目录 +ls /home/kuscia/var/stdout/pods + +# 查看应用容器的日志,示例如下: +cat /home/kuscia/var/stdout/pods/alice_xxxx_engine_xxxx/secretflow/0.log +cat /home/kuscia/var/stdout/pods/alice_xxxx_broker_xxxx/secretflow/0.log +``` \ No newline at end of file diff --git a/docs/tutorial/run_sf_job_with_api_cn.md b/docs/tutorial/run_sf_job_with_api_cn.md index 856dcd76..ffa97ee1 100644 --- a/docs/tutorial/run_sf_job_with_api_cn.md +++ b/docs/tutorial/run_sf_job_with_api_cn.md @@ -25,7 +25,7 @@ Kuscia API 使用双向 HTTPS,所以需要配置你的客户端库的双向 HT ### 点对点组网模式 -证书的配置参考[配置授权](../deployment/deploy_p2p_cn.md#配置授权) +证书的配置参考[配置授权](../deployment/Docker_deployment_kuscia/deploy_p2p_cn.md#配置授权) 这里以 alice 节点为例,接口需要的证书文件在 ${USER}-kuscia-autonomy-alice 节点的`/home/kuscia/var/certs/`目录下: @@ -94,7 +94,7 @@ docker exec -it ${USER}-kuscia-autonomy-alice ### 使用 Kuscia 示例数据配置 KusciaJob -下面的示例展示了一个 KusciaJob,该任务流完成 2 个任务: +此处以[KusciaJob 示例](../reference/apis/kusciajob_cn.md#请求示例)作为任务示例展示,该任务流完成 2 个任务: 1. job-psi 读取 alice 和 bob 的数据文件,进行隐私求交,求交的结果分别保存为两个参与方的`psi-output.csv`。 2. job-split 读取 alice 和 bob 上一步中求交的结果文件,并拆分成训练集和测试集,分别保存为两个参与方的`train-dataset.csv`、`test-dataset.csv`。 @@ -103,39 +103,6 @@ docker exec -it ${USER}-kuscia-autonomy-alice 我们请求[创建 Job](../reference/apis/kusciajob_cn.md#请求createjobrequest) 接口来创建并运行这个 KusciaJob。 -在 kuscia-master 容器终端中,执行以下命令,内容如下: - -```shell -curl -k -X POST 'https://localhost:8082/api/v1/job/create' \ ---header "Token: $(cat /home/kuscia/var/certs/token)" \ ---header 'Content-Type: application/json' \ ---cert '/home/kuscia/var/certs/kusciaapi-server.crt' \ ---key '/home/kuscia/var/certs/kusciaapi-server.key' \ ---cacert '/home/kuscia/var/certs/ca.crt' \ --d '{ - "job_id": "job-best-effort-linear", - "initiator": "alice", - "max_parallelism": 2, - "tasks": [{ - "app_image": "secretflow-image", - "parties": [{"domain_id": "alice"},{"domain_id": "bob"}], - "alias": "job-psi", - "task_id": "job-psi", - "task_input_config": "{\"sf_datasource_config\":{\"alice\":{\"id\":\"default-data-source\"},\"bob\":{\"id\":\"default-data-source\"}},\"sf_cluster_desc\":{\"parties\":[\"alice\",\"bob\"],\"devices\":[{\"name\":\"spu\",\"type\":\"spu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"runtime_config\\\":{\\\"protocol\\\":\\\"REF2K\\\",\\\"field\\\":\\\"FM64\\\"},\\\"link_desc\\\":{\\\"connect_retry_times\\\":60,\\\"connect_retry_interval_ms\\\":1000,\\\"brpc_channel_protocol\\\":\\\"http\\\",\\\"brpc_channel_connection_type\\\":\\\"pooled\\\",\\\"recv_timeout_ms\\\":1200000,\\\"http_timeout_ms\\\":1200000}}\"},{\"name\":\"heu\",\"type\":\"heu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"mode\\\": \\\"PHEU\\\", \\\"schema\\\": \\\"paillier\\\", \\\"key_size\\\": 2048}\"}],\"ray_fed_config\":{\"cross_silo_comm_backend\":\"brpc_link\"}},\"sf_node_eval_param\":{\"domain\":\"data_prep\",\"name\":\"psi\",\"version\":\"0.0.1\",\"attr_paths\":[\"input/receiver_input/key\",\"input/sender_input/key\",\"protocol\",\"precheck_input\",\"bucket_size\",\"curve_type\"],\"attrs\":[{\"ss\":[\"id1\"]},{\"ss\":[\"id2\"]},{\"s\":\"ECDH_PSI_2PC\"},{\"b\":true},{\"i64\":\"1048576\"},{\"s\":\"CURVE_FOURQ\"}]},\"sf_input_ids\":[\"alice-table\",\"bob-table\"],\"sf_output_ids\":[\"psi-output\"],\"sf_output_uris\":[\"psi-output.csv\"]}", - "priority": "100" - }, { - "app_image": "secretflow-image", - "parties": [{"domain_id": "alice"},{"domain_id": "bob"}], - "alias": "job-split", - "task_id": "job-split", - "dependencies": ["job-psi"], - "task_input_config": "{\"sf_datasource_config\":{\"alice\":{\"id\":\"default-data-source\"},\"bob\":{\"id\":\"default-data-source\"}},\"sf_cluster_desc\":{\"parties\":[\"alice\",\"bob\"],\"devices\":[{\"name\":\"spu\",\"type\":\"spu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"runtime_config\\\":{\\\"protocol\\\":\\\"REF2K\\\",\\\"field\\\":\\\"FM64\\\"},\\\"link_desc\\\":{\\\"connect_retry_times\\\":60,\\\"connect_retry_interval_ms\\\":1000,\\\"brpc_channel_protocol\\\":\\\"http\\\",\\\"brpc_channel_connection_type\\\":\\\"pooled\\\",\\\"recv_timeout_ms\\\":1200000,\\\"http_timeout_ms\\\":1200000}}\"},{\"name\":\"heu\",\"type\":\"heu\",\"parties\":[\"alice\",\"bob\"],\"config\":\"{\\\"mode\\\": \\\"PHEU\\\", \\\"schema\\\": \\\"paillier\\\", \\\"key_size\\\": 2048}\"}],\"ray_fed_config\":{\"cross_silo_comm_backend\":\"brpc_link\"}},\"sf_node_eval_param\":{\"domain\":\"data_prep\",\"name\":\"train_test_split\",\"version\":\"0.0.1\",\"attr_paths\":[\"train_size\",\"test_size\",\"random_state\",\"shuffle\"],\"attrs\":[{\"f\":0.75},{\"f\":0.25},{\"i64\":1234},{\"b\":true}]},\"sf_output_uris\":[\"train-dataset.csv\",\"test-dataset.csv\"],\"sf_output_ids\":[\"train-dataset\",\"test-dataset\"],\"sf_input_ids\":[\"psi-output\"]}", - "priority": "100" - } - ] -}' -``` - 具体字段数据格式和含义请参考[创建 Job](../reference/apis/kusciajob_cn.md#请求createjobrequest) ,本文不再赘述。 如果你成功了,你将得到如下返回: diff --git a/etc/conf/domain-cluster-res.yaml b/etc/conf/domain-cluster-res.yaml index 1ae714d5..6df560ee 100644 --- a/etc/conf/domain-cluster-res.yaml +++ b/etc/conf/domain-cluster-res.yaml @@ -12,6 +12,7 @@ rules: - get - patch - update + - delete - apiGroups: - "" resources: diff --git a/go.mod b/go.mod index 16ee5830..88f30237 100644 --- a/go.mod +++ b/go.mod @@ -6,26 +6,28 @@ require ( github.com/apache/arrow/go/v13 v13.0.0 github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e github.com/casbin/casbin/v2 v2.77.2 + github.com/containerd/cgroups/v3 v3.0.3 github.com/coredns/caddy v1.1.1 github.com/coredns/coredns v1.10.0 github.com/docker/distribution v2.8.2+incompatible github.com/envoyproxy/go-control-plane v0.11.1 - github.com/fsnotify/fsnotify v1.6.0 + github.com/fsnotify/fsnotify v1.7.0 github.com/gin-gonic/gin v1.9.1 github.com/go-sql-driver/mysql v1.6.0 github.com/golang-jwt/jwt/v5 v5.0.0 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da github.com/golang/mock v1.6.0 - github.com/golang/protobuf v1.5.3 + github.com/golang/protobuf v1.5.4 github.com/google/go-cmp v0.6.0 - github.com/google/uuid v1.3.1 + github.com/google/uuid v1.5.0 github.com/json-iterator/go v1.1.12 github.com/miekg/dns v1.1.50 github.com/mitchellh/go-homedir v1.1.0 github.com/mitchellh/mapstructure v1.5.0 github.com/moby/sys/mount v0.3.3 - github.com/moby/sys/mountinfo v0.6.2 - github.com/opencontainers/image-spec v1.0.2 + github.com/moby/sys/mountinfo v0.7.1 + github.com/opencontainers/image-spec v1.1.0-rc5 + github.com/opencontainers/runtime-spec v1.1.1-0.20230823135140-4fec88fd00a4 github.com/opencontainers/selinux v1.11.0 github.com/patrickmn/go-cache v2.1.0+incompatible github.com/pkg/errors v0.9.1 @@ -35,34 +37,35 @@ require ( github.com/shirou/gopsutil/v3 v3.22.6 github.com/spf13/cobra v1.6.1 github.com/spf13/pflag v1.0.5 + github.com/stathat/consistent v1.0.0 github.com/stretchr/testify v1.8.4 github.com/tidwall/match v1.1.1 gitlab.com/jonas.jasas/condchan v0.0.0-20190210165812-36637ad2b5bc go.uber.org/atomic v1.9.0 go.uber.org/zap v1.24.0 - golang.org/x/net v0.17.0 - google.golang.org/genproto/googleapis/rpc v0.0.0-20231030173426-d783a09b4405 - google.golang.org/grpc v1.59.0 + golang.org/x/net v0.19.0 + google.golang.org/genproto/googleapis/rpc v0.0.0-20231212172506-995d672761c0 + google.golang.org/grpc v1.60.1 google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0 - google.golang.org/protobuf v1.31.0 + google.golang.org/protobuf v1.33.0 gopkg.in/natefinch/lumberjack.v2 v2.0.0 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 gotest.tools/v3 v3.0.3 - k8s.io/api v0.26.11 + k8s.io/api v0.28.4 k8s.io/apiextensions-apiserver v0.26.11 - k8s.io/apimachinery v0.26.11 - k8s.io/apiserver v0.26.11 - k8s.io/client-go v0.26.11 + k8s.io/apimachinery v0.28.4 + k8s.io/apiserver v0.28.2 + k8s.io/client-go v0.28.4 k8s.io/code-generator v0.26.11 - k8s.io/component-base v0.26.11 + k8s.io/component-base v0.28.4 k8s.io/component-helpers v0.26.11 - k8s.io/cri-api v0.17.3 - k8s.io/klog/v2 v2.80.1 + k8s.io/cri-api v0.28.2 + k8s.io/klog/v2 v2.100.1 k8s.io/kubectl v0.0.0 - k8s.io/kubelet v0.26.11 + k8s.io/kubelet v0.28.2 k8s.io/kubernetes v1.26.11 - k8s.io/utils v0.0.0-20221107191617-1a15be271d1d + k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 sigs.k8s.io/controller-tools v0.9.2 sigs.k8s.io/yaml v1.3.0 @@ -72,17 +75,17 @@ require ( github.com/lufia/plan9stats v0.0.0-20220517141722-cf486979b281 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect - sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect ) require ( - cloud.google.com/go/compute v1.23.1 // indirect + cloud.google.com/go/compute v1.23.3 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect github.com/JeffAshton/win_pdh v0.0.0-20161109143554-76bb4ee9f0ab // indirect github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible // indirect github.com/MakeNowJust/heredoc v1.0.0 // indirect - github.com/Microsoft/go-winio v0.5.1 // indirect + github.com/Microsoft/go-winio v0.6.1 // indirect github.com/NYTimes/gziphandler v1.1.1 // indirect github.com/antlr/antlr4/runtime/Go/antlr v1.4.10 // indirect github.com/apparentlymart/go-cidr v1.1.0 // indirect @@ -100,7 +103,7 @@ require ( github.com/cilium/ebpf v0.11.0 // indirect github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 // indirect github.com/containerd/console v1.0.3 // indirect - github.com/containerd/ttrpc v1.1.0 // indirect + github.com/containerd/ttrpc v1.2.2 // indirect github.com/coreos/go-semver v0.3.0 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/cyphar/filepath-securejoin v0.2.4 // indirect @@ -122,7 +125,7 @@ require ( github.com/gabriel-vasile/mimetype v1.4.2 // indirect github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-errors/errors v1.0.1 // indirect - github.com/go-logr/logr v1.3.0 // indirect + github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-openapi/jsonpointer v0.19.5 // indirect @@ -144,6 +147,7 @@ require ( github.com/google/gofuzz v1.2.0 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 // indirect + github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.0 // indirect github.com/grpc-ecosystem/grpc-opentracing v0.0.0-20180507213350-8e809c8a8645 // indirect @@ -155,8 +159,8 @@ require ( github.com/jonboulle/clockwork v0.2.2 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/karrick/godirwalk v1.17.0 // indirect - github.com/klauspost/compress v1.15.15 // indirect - github.com/klauspost/cpuid/v2 v2.2.4 // indirect + github.com/klauspost/compress v1.17.4 // indirect + github.com/klauspost/cpuid/v2 v2.2.5 // indirect github.com/leodido/go-urn v1.2.4 // indirect github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect github.com/lithammer/dedent v1.1.0 // indirect @@ -176,10 +180,9 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/opencontainers/go-digest v1.0.0 // indirect - github.com/opencontainers/runc v1.1.6 // indirect - github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 // indirect + github.com/opencontainers/runc v1.1.12 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect - github.com/pelletier/go-toml/v2 v2.0.8 // indirect + github.com/pelletier/go-toml/v2 v2.1.1 // indirect github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/pierrec/lz4/v4 v4.1.17 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect @@ -189,7 +192,7 @@ require ( github.com/rosedblabs/wal v1.3.3 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646 // indirect - github.com/sirupsen/logrus v1.8.1 // indirect + github.com/sirupsen/logrus v1.9.3 // indirect github.com/stoewer/go-strcase v1.2.0 // indirect github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect github.com/tidwall/gjson v1.14.4 // indirect @@ -199,41 +202,42 @@ require ( github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.11 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect - github.com/vishvananda/netlink v1.1.0 // indirect - github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae // indirect + github.com/vishvananda/netlink v1.2.1-beta.2 // indirect + github.com/vishvananda/netns v0.0.4 // indirect github.com/xlab/treeprint v1.1.0 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect + go.etcd.io/bbolt v1.3.8 // indirect go.etcd.io/etcd/api/v3 v3.5.6 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.6 // indirect go.etcd.io/etcd/client/v2 v2.305.6 // indirect go.etcd.io/etcd/client/v3 v3.5.6 // indirect - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0 // indirect - go.opentelemetry.io/otel v1.20.0 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.1 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect + go.opentelemetry.io/otel v1.21.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect - go.opentelemetry.io/otel/metric v1.20.0 // indirect - go.opentelemetry.io/otel/sdk v1.20.0 // indirect - go.opentelemetry.io/otel/trace v1.20.0 // indirect + go.opentelemetry.io/otel/metric v1.21.0 // indirect + go.opentelemetry.io/otel/sdk v1.21.0 // indirect + go.opentelemetry.io/otel/trace v1.21.0 // indirect go.opentelemetry.io/proto/otlp v1.0.0 // indirect go.starlark.net v0.0.0-20200306205701-8dd3e2ee1dd5 // indirect go.uber.org/multierr v1.8.0 // indirect golang.org/x/arch v0.3.0 // indirect golang.org/x/crypto v0.17.0 // indirect - golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 // indirect - golang.org/x/mod v0.12.0 // indirect - golang.org/x/oauth2 v0.12.0 // indirect - golang.org/x/sync v0.5.0 // indirect - golang.org/x/sys v0.15.0 // indirect + golang.org/x/exp v0.0.0-20231214170342-aacd6d4b4611 // indirect + golang.org/x/mod v0.14.0 // indirect + golang.org/x/oauth2 v0.13.0 // indirect + golang.org/x/sync v0.6.0 // indirect + golang.org/x/sys v0.18.0 // indirect golang.org/x/term v0.15.0 // indirect golang.org/x/text v0.14.0 // indirect golang.org/x/time v0.3.0 // indirect - golang.org/x/tools v0.12.0 // indirect + golang.org/x/tools v0.16.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect - google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20231016165738-49dd2c1f3d0b // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20231012201019-e917dd12ba7a // indirect + google.golang.org/appengine v1.6.8 // indirect + google.golang.org/genproto v0.0.0-20231211222908-989df2bf70f3 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20231120223509-83a465c0220f // indirect gopkg.in/inf.v0 v0.9.1 // indirect k8s.io/cli-runtime v0.26.11 // indirect k8s.io/cloud-provider v0.0.0 // indirect @@ -250,11 +254,10 @@ require ( sigs.k8s.io/kustomize/kustomize/v4 v4.5.7 // indirect sigs.k8s.io/kustomize/kyaml v0.13.9 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect + stathat.com/c/consistent v1.0.0 // indirect ) replace ( - github.com/opencontainers/runc => github.com/opencontainers/runc v1.1.12 - //google.golang.org/protobuf => google.golang.org/protobuf v1.28.1 k8s.io/api => k8s.io/api v0.26.11 k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.26.11 k8s.io/apimachinery => k8s.io/apimachinery v0.26.11 diff --git a/go.sum b/go.sum index f2dc5788..ee932ea4 100644 --- a/go.sum +++ b/go.sum @@ -19,8 +19,8 @@ cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvf cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/compute v1.23.1 h1:V97tBoDaZHb6leicZ1G6DLK2BAaZLJ/7+9BB/En3hR0= -cloud.google.com/go/compute v1.23.1/go.mod h1:CqB3xpmPKKt3OJpW2ndFIXnA9A4xAy/F3Xp1ixncW78= +cloud.google.com/go/compute v1.23.3 h1:6sVlXXBmbd7jNX0Ipq0trII3e4n1/MsADLK6a+aiVlk= +cloud.google.com/go/compute v1.23.3/go.mod h1:VCgBUoMnIVIR0CscqQiPJLAG25E3ZRZMzcFZeQ+h8CI= cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= @@ -47,8 +47,8 @@ github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= github.com/Microsoft/go-winio v0.4.15/go.mod h1:tTuCMEN+UleMWgg9dVx4Hu52b1bJo+59jBh3ajtinzw= -github.com/Microsoft/go-winio v0.5.1 h1:aPJp2QD7OOrhO5tQXqQoGSJc+DjDtWTGLOmNyAm6FgY= -github.com/Microsoft/go-winio v0.5.1/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= +github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= +github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I= github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= @@ -70,6 +70,7 @@ github.com/aws/aws-sdk-go v1.35.24/go.mod h1:tlPOdRjfxPBpNIwqDj61rmsnA85v9jc0Ps9 github.com/aws/aws-sdk-go v1.44.116 h1:NpLIhcvLWXJZAEwvPj3TDHeqp7DleK6ZUVYyW01WNHY= github.com/aws/aws-sdk-go v1.44.116/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= +github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -118,10 +119,13 @@ github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWH github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+gqO04wryn5h75LSazbRlnya1k= github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/container-storage-interface/spec v1.7.0 h1:gW8eyFQUZWWrMWa8p1seJ28gwDoN5CVJ4uAbQ+Hdycw= +github.com/containerd/cgroups/v3 v3.0.3 h1:S5ByHZ/h9PMe5IOQoN7E+nMc2UcLEM/V48DGDJ9kip0= +github.com/containerd/cgroups/v3 v3.0.3/go.mod h1:8HBe7V3aWGLFPd/k03swSIsGjZhHI2WzJmticMgVuz0= github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw= github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= -github.com/containerd/ttrpc v1.1.0 h1:GbtyLRxb0gOLR0TYQWt3O6B0NvT8tMdorEHqIQo/lWI= github.com/containerd/ttrpc v1.1.0/go.mod h1:XX4ZTnoOId4HklF4edwc4DcqskFZuvXB1Evzy5KFQpQ= +github.com/containerd/ttrpc v1.2.2 h1:9vqZr0pxwOF5koz6N0N3kJ0zDHokrcPxIR/ZR2YFtOs= +github.com/containerd/ttrpc v1.2.2/go.mod h1:sIT6l32Ph/H9cvnJsfXM5drIVzTr5A2flTf1G5tYZak= github.com/containerd/typeurl v1.0.2 h1:Chlt8zIieDbzQFzXzAeBEF92KhExuE4p9p92/QmY7aY= github.com/containerd/typeurl v1.0.2/go.mod h1:9trJWW2sRlGub4wZJRTW83VtbOLS6hwcDZXTn6oPz9s= github.com/coredns/caddy v1.1.1 h1:2eYKZT7i6yxIfGP3qLJoJ7HAsDJqYB+X68g4NYjSrE0= @@ -138,6 +142,7 @@ github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46t github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg= github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -194,8 +199,8 @@ github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI github.com/form3tech-oss/jwt-go v3.2.3+incompatible h1:7ZaBxOI7TMoYBfyA3cQHErNNyAWIKUMIwqxEtgHOs5c= github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA= -github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= -github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/fvbommel/sortorder v1.0.1 h1:dSnXLt4mJYH25uDDGa3biZNQsozaUWDSWeKJ0qqFfzE= github.com/fvbommel/sortorder v1.0.1/go.mod h1:uk88iVf1ovNn1iLfgUVU2F9o5eO30ui720w+kxuqRs0= github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= @@ -222,8 +227,8 @@ github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7 github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= -github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= @@ -294,8 +299,8 @@ github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golangplus/bytes v0.0.0-20160111154220-45c989fe5450/go.mod h1:Bk6SMAONeMXrxql8uvOKuAZSu8aM5RUGv+1C6IJaEho= github.com/golangplus/bytes v1.0.0/go.mod h1:AdRaCFwmc/00ZzELMWb01soso6W1R/++O1XL80yAn+A= github.com/golangplus/fmt v1.0.0/go.mod h1:zpM0OfbMCjPtd2qkTD/jX2MgiFCqklhSUFyDW44gVQE= @@ -344,15 +349,16 @@ github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaU github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= -github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= +github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 h1:pdN6V1QBWetyv/0+wjACpqVH+eVULgEjkurDLq3goeM= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= -github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= +github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDaL56wXCB/5+wF6uHfaI= +github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= @@ -394,11 +400,11 @@ github.com/karrick/godirwalk v1.17.0 h1:b4kY7nqDdioR/6qnbHQyDvmA17u5G1cZ6J+CZXwS github.com/karrick/godirwalk v1.17.0/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw= -github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4= +github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= +github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= -github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= +github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= @@ -455,8 +461,9 @@ github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0Gq github.com/moby/sys/mount v0.3.3 h1:fX1SVkXFJ47XWDoeFW4Sq7PdQJnV2QIDZAqjNqgEjUs= github.com/moby/sys/mount v0.3.3/go.mod h1:PBaEorSNTLG5t/+4EgukEQVlAvVEc6ZjTySwKdqp5K0= github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= -github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vygl78= github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= +github.com/moby/sys/mountinfo v0.7.1 h1:/tTvQaSJRr2FshkhXiIpux6fQ2Zvc4j7tAhMTStAG2g= +github.com/moby/sys/mountinfo v0.7.1/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= github.com/moby/term v0.0.0-20220808134915-39b0c02b01ae h1:O4SWKdcHVCvYqyDV+9CJA1fcDN2L11Bule0iFy3YlAI= github.com/moby/term v0.0.0-20220808134915-39b0c02b01ae/go.mod h1:E2VnQOmVuvZB6UYnnDB0qG5Nq/1tD9acaOpo6xmt0Kw= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -469,6 +476,7 @@ github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjY github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0= github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= +github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q= github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= @@ -484,21 +492,25 @@ github.com/onsi/ginkgo/v2 v2.4.0 h1:+Ig9nvqgS5OBSACXNk15PLdp0U9XPYROt9CFzVdFGIs= github.com/onsi/gomega v1.23.0 h1:/oxKu9c2HVap+F3PfKort2Hw5DEU+HGlW8n+tguWsys= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= -github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM= github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= +github.com/opencontainers/image-spec v1.1.0-rc5 h1:Ygwkfw9bpDvs+c9E34SdgGOj41dX/cbdlwvlWt0pnFI= +github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8= +github.com/opencontainers/runc v1.1.4/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg= github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss= github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8= -github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc= github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-spec v1.1.1-0.20230823135140-4fec88fd00a4 h1:EctkgBjZ1y4q+sibyuuIgiKpa0QSd2elFtSSdNvBVow= +github.com/opencontainers/runtime-spec v1.1.1-0.20230823135140-4fec88fd00a4/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU= github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= +github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= -github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= -github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= +github.com/pelletier/go-toml/v2 v2.1.1 h1:LWAJwfNvjQZCFIDKWYQaM62NcYeYViCmWIwmOStowAI= +github.com/pelletier/go-toml/v2 v2.1.1/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pierrec/lz4/v4 v4.1.17 h1:kV4Ip+/hUBC+8T6+2EgburRtkE9ef4nbY3f4dFhGjMc= @@ -563,9 +575,9 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= -github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA= @@ -573,6 +585,8 @@ github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUq github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stathat/consistent v1.0.0 h1:ZFJ1QTRn8npNBKW065raSZ8xfOqhpb8vLOkfp4CcL/U= +github.com/stathat/consistent v1.0.0/go.mod h1:uajTPbgSygZBJ+V+0mY7meZ8i0XAcZs7AQ6V121XSxw= github.com/stoewer/go-strcase v1.2.0 h1:Z2iHWqGXH00XYgqDmNgQbIBxf3wrNq0F3feEy0ainaU= github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -591,7 +605,6 @@ github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI= @@ -614,11 +627,13 @@ github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZ github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= -github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0= github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= +github.com/vishvananda/netlink v1.2.1-beta.2 h1:Llsql0lnQEbHj0I1OuKyp8otXp0r3q0mPkuhwHfStVs= +github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= -github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae h1:4hwBBUfQCFe3Cym0ZtKyq7L16eZUtYKs+BaHDN6mAns= github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= +github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8= +github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= github.com/xlab/treeprint v1.1.0 h1:G/1DjNkPpfZCFt9CSh6b5/nY4VimlbHF3Rh4obvtzDk= github.com/xlab/treeprint v1.1.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= @@ -635,7 +650,8 @@ github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= gitlab.com/jonas.jasas/condchan v0.0.0-20190210165812-36637ad2b5bc h1:zCsu+odZEHb2f8U8WWhDgY5N5w3JCLHxuCIqVqCsLcQ= gitlab.com/jonas.jasas/condchan v0.0.0-20190210165812-36637ad2b5bc/go.mod h1:4JS8TdA7HSdK+x43waOdTGodqY/VKsj4w+8pWDL0E88= -go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= +go.etcd.io/bbolt v1.3.8 h1:xs88BrvEv273UsB79e0hcVrlUWmS0a8upikMFhSyAtA= +go.etcd.io/bbolt v1.3.8/go.mod h1:N9Mkw9X8x5fupy0IKsmuqVtoGDyxsaDlbk4Rd05IAQw= go.etcd.io/etcd/api/v3 v3.5.6 h1:Cy2qx3npLcYqTKqGJzMypnMv2tiRyifZJ17BlWIWA7A= go.etcd.io/etcd/api/v3 v3.5.6/go.mod h1:KFtNaxGDw4Yx/BA4iPPwevUTAuqcsPxzyX8PHydchN8= go.etcd.io/etcd/client/pkg/v3 v3.5.6 h1:TXQWYceBKqLp4sa87rcPs11SXxUA/mHwH975v+BDvLU= @@ -652,22 +668,22 @@ go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.0 h1:PzIubN4/sjByhDRHLviCjJuweBXWFZWhghjg7cS28+M= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.0/go.mod h1:Ct6zzQEuGK3WpJs2n4dn+wfJYzd/+hNnxMRTWjGn30M= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0 h1:KfYpVmrjI7JuToy5k8XV3nkapjWx48k4E4JOtVstzQI= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.44.0/go.mod h1:SeQhzAEccGVZVEy7aH87Nh0km+utSpo1pTv6eMMop48= -go.opentelemetry.io/otel v1.20.0 h1:vsb/ggIY+hUjD/zCAQHpzTmndPqv/ml2ArbsbfBYTAc= -go.opentelemetry.io/otel v1.20.0/go.mod h1:oUIGj3D77RwJdM6PPZImDpSZGDvkD9fhesHny69JFrs= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.1 h1:SpGay3w+nEwMpfVnbqOLH5gY52/foP8RE8UzTZ1pdSE= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.1/go.mod h1:4UoMYEZOC0yN/sPGH76KPkkU7zgiEWYWL9vwmbnTJPE= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 h1:x8Z78aZx8cOF0+Kkazoc7lwUNMGy0LrzEMxTm4BbTxg= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0/go.mod h1:62CPTSry9QZtOaSsE3tOzhx6LzDhHnXJ6xHeMNNiM6Q= +go.opentelemetry.io/otel v1.21.0 h1:hzLeKBZEL7Okw2mGzZ0cc4k/A7Fta0uoPgaJCr8fsFc= +go.opentelemetry.io/otel v1.21.0/go.mod h1:QZzNPQPm1zLX4gZK4cMi+71eaorMSGT3A4znnUvNNEo= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 h1:Mne5On7VWdx7omSrSSZvM4Kw7cS7NQkOOmLcgscI51U= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0/go.mod h1:IPtUMKL4O3tH5y+iXVyAXqpAwMuzC1IrxVS81rummfE= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 h1:3d+S281UTjM+AbF31XSOYn1qXn3BgIdWl8HNEpx08Jk= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0/go.mod h1:0+KuTDyKL4gjKCF75pHOX4wuzYDUZYfAQdSu43o+Z2I= -go.opentelemetry.io/otel/metric v1.20.0 h1:ZlrO8Hu9+GAhnepmRGhSU7/VkpjrNowxRN9GyKR4wzA= -go.opentelemetry.io/otel/metric v1.20.0/go.mod h1:90DRw3nfK4D7Sm/75yQ00gTJxtkBxX+wu6YaNymbpVM= -go.opentelemetry.io/otel/sdk v1.20.0 h1:5Jf6imeFZlZtKv9Qbo6qt2ZkmWtdWx/wzcCbNUlAWGM= -go.opentelemetry.io/otel/sdk v1.20.0/go.mod h1:rmkSx1cZCm/tn16iWDn1GQbLtsW/LvsdEEFzCSRM6V0= -go.opentelemetry.io/otel/trace v1.20.0 h1:+yxVAPZPbQhbC3OfAkeIVTky6iTFpcr4SiY9om7mXSQ= -go.opentelemetry.io/otel/trace v1.20.0/go.mod h1:HJSK7F/hA5RlzpZ0zKDCHCDHm556LCDtKaAo6JmBFUU= +go.opentelemetry.io/otel/metric v1.21.0 h1:tlYWfeo+Bocx5kLEloTjbcDwBuELRrIFxwdQ36PlJu4= +go.opentelemetry.io/otel/metric v1.21.0/go.mod h1:o1p3CA8nNHW8j5yuQLdc1eeqEaPfzug24uvsyIEJRWM= +go.opentelemetry.io/otel/sdk v1.21.0 h1:FTt8qirL1EysG6sTQRZ5TokkU8d0ugCj8htOgThZXQ8= +go.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E= +go.opentelemetry.io/otel/trace v1.21.0 h1:WD9i5gzvoUPuXIXH24ZNBudiarZDKuekPqi/E8fpfLc= +go.opentelemetry.io/otel/trace v1.21.0/go.mod h1:LGbsEB0f9LGjN+OZaQQ26sohbOmiMR+BaslueVtS/qQ= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I= go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= @@ -676,11 +692,13 @@ go.starlark.net v0.0.0-20200306205701-8dd3e2ee1dd5/go.mod h1:nmDLcffg48OtT/PSW0H go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= go.uber.org/multierr v1.8.0 h1:dg6GjLku4EH+249NNmoIciG9N/jURbDG+pFlTkhzIC8= go.uber.org/multierr v1.8.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= +go.uber.org/zap v1.18.1/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= @@ -705,8 +723,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 h1:k/i9J1pBpvlfR+9QsetwPyERsqu1GIbi967PQMq3Ivc= -golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1/go.mod h1:V1LtkGg67GoY2N1AnLN78QLrzxkLyJw7RJb1gzOOz9w= +golang.org/x/exp v0.0.0-20231214170342-aacd6d4b4611 h1:qCEDpW1G+vcj3Y7Fy52pEM1AWm3abj8WimGYejI3SC4= +golang.org/x/exp v0.0.0-20231214170342-aacd6d4b4611/go.mod h1:iRJReGqOEeBhDZGkGbynYwcHlctCvnjTYIamk7uXpHI= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -730,9 +748,8 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= -golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0= +golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -763,6 +780,7 @@ golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81R golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= @@ -770,10 +788,8 @@ golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= -golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= -golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= +golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c= +golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -781,8 +797,8 @@ golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4Iltr golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc= -golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4= -golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4= +golang.org/x/oauth2 v0.13.0 h1:jDDenyj+WgFtmV3zYVoi8aE2BwtXFLWOA67ZfNWftiY= +golang.org/x/oauth2 v0.13.0/go.mod h1:/JMhi4ZRXAf4HG9LiNmxvk+45+96RUlVThiH8FzNBn0= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -796,9 +812,8 @@ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= -golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -837,6 +852,7 @@ golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -853,22 +869,21 @@ golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= -golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4= golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -879,8 +894,7 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -904,6 +918,7 @@ golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgw golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -937,9 +952,8 @@ golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.6-0.20210726203631-07bc1bf47fb2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss= -golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM= +golang.org/x/tools v0.16.0 h1:GO788SKMRunPIBCXiQyo2AaexLstOrVhuAL5YwsckQM= +golang.org/x/tools v0.16.0/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -969,8 +983,8 @@ google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7 google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= -google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= +google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= @@ -992,6 +1006,7 @@ google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfG google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= @@ -1003,12 +1018,12 @@ google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6D google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20201019141844-1ed22bb0c154/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= -google.golang.org/genproto v0.0.0-20231016165738-49dd2c1f3d0b h1:+YaDE2r2OG8t/z5qmsh7Y+XXwCbvadxxZ0YY6mTdrVA= -google.golang.org/genproto v0.0.0-20231016165738-49dd2c1f3d0b/go.mod h1:CgAqfJo+Xmu0GwA0411Ht3OU3OntXwsGmrmjI8ioGXI= -google.golang.org/genproto/googleapis/api v0.0.0-20231012201019-e917dd12ba7a h1:myvhA4is3vrit1a6NZCWBIwN0kNEnX21DJOJX/NvIfI= -google.golang.org/genproto/googleapis/api v0.0.0-20231012201019-e917dd12ba7a/go.mod h1:SUBoKXbI1Efip18FClrQVGjWcyd0QZd8KkvdP34t7ww= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231030173426-d783a09b4405 h1:AB/lmRny7e2pLhFEYIbl5qkDAUt2h0ZRO4wGPhZf+ik= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231030173426-d783a09b4405/go.mod h1:67X1fPuzjcrkymZzZV1vvkFeTn2Rvc6lYF9MYFGCcwE= +google.golang.org/genproto v0.0.0-20231211222908-989df2bf70f3 h1:1hfbdAfFbkmpg41000wDVqr7jUpK/Yo+LPnIxxGzmkg= +google.golang.org/genproto v0.0.0-20231211222908-989df2bf70f3/go.mod h1:5RBcpGRxr25RbDzY5w+dmaqpSEvl8Gwl1x2CICf60ic= +google.golang.org/genproto/googleapis/api v0.0.0-20231120223509-83a465c0220f h1:2yNACc1O40tTnrsbk9Cv6oxiW8pxI/pXj0wRtdlYmgY= +google.golang.org/genproto/googleapis/api v0.0.0-20231120223509-83a465c0220f/go.mod h1:Uy9bTZJqmfrw2rIBxgGLnamc78euZULUBrLZ9XTITKI= +google.golang.org/genproto/googleapis/rpc v0.0.0-20231212172506-995d672761c0 h1:/jFB8jK5R3Sq3i/lmeZO0cATSzFfZaJq1J2Euan3XKU= +google.golang.org/genproto/googleapis/rpc v0.0.0-20231212172506-995d672761c0/go.mod h1:FUoWkonphQm3RhTS+kOEhF8h0iDpm4tdXolVCeZ9KKA= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -1026,8 +1041,8 @@ google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAG google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= google.golang.org/grpc v1.41.0/go.mod h1:U3l9uK9J0sini8mHphKoXyaqDA/8VyGnDee1zzIUK6k= google.golang.org/grpc v1.49.0/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI= -google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= -google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= +google.golang.org/grpc v1.60.1 h1:26+wFr+cNqSGFcOXcabYC0lUVJVRa2Sb2ortSK7VrEU= +google.golang.org/grpc v1.60.1/go.mod h1:OlCHIeLYqSSsLi6i49B5QGdzaMZK9+M7LXN2FKz4eGM= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0 h1:rNBFJjBCOgVr9pWD7rs/knKL4FRTKgpZmsRfV214zcA= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0/go.mod h1:Dk1tviKTvMCz5tvh7t+fh94dhmQVHuCt2OzJB3CTW9Y= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= @@ -1044,8 +1059,8 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -1112,8 +1127,9 @@ k8s.io/gengo v0.0.0-20220902162205-c0856e24416d h1:U9tB195lKdzwqicbJvyJeOXV7Klv+ k8s.io/gengo v0.0.0-20220902162205-c0856e24416d/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= -k8s.io/klog/v2 v2.80.1 h1:atnLQ121W371wYYFawwYx1aEY2eUfs4l3J72wtgAwV4= k8s.io/klog/v2 v2.80.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= +k8s.io/klog/v2 v2.100.1 h1:7WCHKK6K8fNhTqfBhISHQ97KrnJNFZMcQvKp7gP/tmg= +k8s.io/klog/v2 v2.100.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= k8s.io/kms v0.26.11 h1:nZ35T+DK5IqPiUTWws/I/RRe35VJrCRBkz1bGK7UbhA= k8s.io/kms v0.26.11/go.mod h1:xhWeNhaLRA9ThEOrFFnJnUR1w50HvarqaGr0uoReFoU= k8s.io/kube-openapi v0.0.0-20221012153701-172d655c2280 h1:+70TFaan3hfJzs+7VK2o+OGxg8HsuBr/5f6tVAjDu6E= @@ -1131,8 +1147,8 @@ k8s.io/metrics v0.26.11/go.mod h1:pTlFeyDb3pyIGF4eZD6AdER1+syCx3q4cMSShh5msL0= k8s.io/mount-utils v0.26.11 h1:wt0TyLv1YhRAxHvQB6w3GikdeHnMm1hpwzOVLweRGyI= k8s.io/mount-utils v0.26.11/go.mod h1:huSg2NI5P8ZNfE8PkQmm5a9fFZ9iHCXFxP/rasMCgYA= k8s.io/utils v0.0.0-20211116205334-6203023598ed/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= -k8s.io/utils v0.0.0-20221107191617-1a15be271d1d h1:0Smp/HP1OH4Rvhe+4B8nWGERtlqAGSftbSbbmm45oFs= -k8s.io/utils v0.0.0-20221107191617-1a15be271d1d/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 h1:qY1Ad8PODbnymg2pRbkyMT/ylpTrCM8P2RJ0yroCyIk= +k8s.io/utils v0.0.0-20230406110748-d93618cff8a2/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= @@ -1141,8 +1157,8 @@ sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.37 h1:fAPTNEpzQMOLM sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.37/go.mod h1:vfnxT4FXNT8eGvO+xi/DsyC/qHmdujqwrUa1WSspCsk= sigs.k8s.io/controller-tools v0.9.2 h1:AkTE3QAdz9LS4iD3EJvHyYxBkg/g9fTbgiYsrcsFCcM= sigs.k8s.io/controller-tools v0.9.2/go.mod h1:NUkn8FTV3Sad3wWpSK7dt/145qfuQ8CKJV6j4jHC5rM= -sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 h1:iXTIw73aPyC+oRdyqqvVJuloN1p0AC/kzH07hu3NE+k= -sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= sigs.k8s.io/kustomize/api v0.12.1 h1:7YM7gW3kYBwtKvoY216ZzY+8hM+lV53LUayghNRJ0vM= sigs.k8s.io/kustomize/api v0.12.1/go.mod h1:y3JUhimkZkR6sbLNwfJHxvo1TCLwuwm14sCYnkH6S1s= sigs.k8s.io/kustomize/kustomize/v4 v4.5.7 h1:cDW6AVMl6t/SLuQaezMET8hgnadZGIAr8tUrxFVOrpg= @@ -1154,3 +1170,5 @@ sigs.k8s.io/structured-merge-diff/v4 v4.2.3/go.mod h1:qjx8mGObPmV2aSZepjQjbmb2ih sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= +stathat.com/c/consistent v1.0.0 h1:ezyc51EGcRPJUxfHGSgJjWzJdj3NiMU9pNfLNGiXV0c= +stathat.com/c/consistent v1.0.0/go.mod h1:QkzMWzcbB+yQBL2AttO6sgsQS/JSTapcDISJalmCDS0= diff --git a/hack/k8s/AppImage.yaml b/hack/k8s/AppImage.yaml index c152799b..67a3e2bb 100644 --- a/hack/k8s/AppImage.yaml +++ b/hack/k8s/AppImage.yaml @@ -56,4 +56,4 @@ spec: id: abc name: secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/secretflow-lite-anolis8 sign: abc - tag: 1.4.0.dev20240105 \ No newline at end of file + tag: 1.5.0b0 \ No newline at end of file diff --git a/pkg/agent/config/agent_config.go b/pkg/agent/config/agent_config.go index 8ee07cd7..7b8318b0 100644 --- a/pkg/agent/config/agent_config.go +++ b/pkg/agent/config/agent_config.go @@ -39,6 +39,9 @@ const ( defaultK8sClientMaxQPS = 250 defaultPodsCapacity = "500" + DefaultReservedCPU = "0.5" + DefaultReservedMemory = "500Mi" + defaultCRIRemoteEndpoint = "unix:///home/kuscia/containerd/run/containerd.sock" defaultResolvConfig = "/etc/resolv.conf" @@ -76,6 +79,11 @@ type CapacityCfg struct { Storage string `yaml:"storage"` } +type ReservedResourcesCfg struct { + CPU string `yaml:"cpu"` + Memory string `yaml:"memory"` +} + type KubeConnCfg struct { KubeconfigFile string `yaml:"kubeconfigFile,omitempty"` Endpoint string `yaml:"endpoint,omitempty"` @@ -241,15 +249,16 @@ type AgentConfig struct { // AllowPrivileged if true, securityContext.Privileged will work for container. AllowPrivileged bool `yaml:"allowPrivileged,omitempty"` - Capacity CapacityCfg `yaml:"capacity,omitempty"` - Log AgentLogCfg `yaml:"log,omitempty"` - Source SourceCfg `yaml:"source,omitempty"` - Framework FrameworkCfg `yaml:"framework,omitempty"` - Provider ProviderCfg `yaml:"provider,omitempty"` - Node NodeCfg `yaml:"node,omitempty"` - Registry RegistryCfg `yaml:"registry,omitempty"` - Cert CertCfg `yaml:"cert,omitempty"` - Plugins []PluginCfg `yaml:"plugins,omitempty"` + Capacity CapacityCfg `yaml:"capacity,omitempty"` + ReservedResources ReservedResourcesCfg `yaml:"reservedResources"` + Log AgentLogCfg `yaml:"log,omitempty"` + Source SourceCfg `yaml:"source,omitempty"` + Framework FrameworkCfg `yaml:"framework,omitempty"` + Provider ProviderCfg `yaml:"provider,omitempty"` + Node NodeCfg `yaml:"node,omitempty"` + Registry RegistryCfg `yaml:"registry,omitempty"` + Cert CertCfg `yaml:"cert,omitempty"` + Plugins []PluginCfg `yaml:"plugins,omitempty"` } func DefaultStaticAgentConfig() *AgentConfig { @@ -262,6 +271,10 @@ func DefaultStaticAgentConfig() *AgentConfig { Capacity: CapacityCfg{ Pods: defaultPodsCapacity, }, + ReservedResources: ReservedResourcesCfg{ + CPU: DefaultReservedCPU, + Memory: DefaultReservedMemory, + }, AllowPrivileged: false, Log: AgentLogCfg{ LogLevel: "INFO", diff --git a/pkg/agent/framework/node_controller.go b/pkg/agent/framework/node_controller.go index 0382356b..e19d6e22 100644 --- a/pkg/agent/framework/node_controller.go +++ b/pkg/agent/framework/node_controller.go @@ -325,7 +325,6 @@ func (nc *NodeController) exit() error { nodeFromMaster, err := nc.nodeStub.Get(context.Background(), nc.nmt.Name, emptyGetOptions) if err == nil { nodeFromMaster.Spec.Unschedulable = true - if err := retry.OnError(retry.DefaultRetry, retriable, func() error { _, err = nc.nodeStub.Update(context.Background(), nodeFromMaster, metav1.UpdateOptions{}) return err @@ -351,7 +350,7 @@ func (nc *NodeController) exit() error { func retriable(err error) bool { return k8serrors.IsInternalError(err) || k8serrors.IsServiceUnavailable(err) || - net.IsConnectionRefused(err) + net.IsConnectionRefused(err) || k8serrors.IsConflict(err) } func (nc *NodeController) Stop() { diff --git a/pkg/agent/framework/node_controller_test.go b/pkg/agent/framework/node_controller_test.go index 596a2303..7b09b367 100644 --- a/pkg/agent/framework/node_controller_test.go +++ b/pkg/agent/framework/node_controller_test.go @@ -171,14 +171,14 @@ func TestNodeRun(t *testing.T) { t.Fatal(err) // if this returns at all it is an error regardless if err is nil case err := <-waitForEvent(eCtx, nr, func(e watch.Event) bool { node := e.Object.(*corev1.Node) - if len(node.Status.Conditions) == 0 { + if len(node.Status.Conditions) != len(n.Status.Conditions) { return false } // Check if this is a node update we are looking for // Since node updates happen periodically there could be some that occur // before the status update that we are looking for happens. - c := node.Status.Conditions[len(n.Status.Conditions)-1] + c := node.Status.Conditions[len(node.Status.Conditions)-1] if !c.LastTransitionTime.Equal(&newCondition.LastTransitionTime) { return false } diff --git a/pkg/agent/local/runtime/process/container/container.go b/pkg/agent/local/runtime/process/container/container.go index ce4b0917..c62dd3b5 100644 --- a/pkg/agent/local/runtime/process/container/container.go +++ b/pkg/agent/local/runtime/process/container/container.go @@ -32,9 +32,11 @@ import ( "github.com/secretflow/kuscia/pkg/agent/local/store" "github.com/secretflow/kuscia/pkg/agent/local/store/kii" "github.com/secretflow/kuscia/pkg/agent/local/store/layout" + "github.com/secretflow/kuscia/pkg/utils/cgroup" "github.com/secretflow/kuscia/pkg/utils/common" "github.com/secretflow/kuscia/pkg/utils/nlog" "github.com/secretflow/kuscia/pkg/utils/paths" + "github.com/secretflow/kuscia/pkg/utils/process" ) const ( @@ -99,7 +101,7 @@ func NewContainer(config *runtime.ContainerConfig, logDirectory, sandboxID strin return nil, errors.New("container config must include metadata") } - cid := common.GenerateID() + cid := common.GenerateID(16) name := makeContainerName(metadata, cid) imageName, err := kii.NewImageName(config.Image.Image) @@ -189,6 +191,9 @@ func (c *Container) Start() (retErr error) { c.status.StartedAt = time.Now().UnixNano() c.status.Pid = starter.Command().Process.Pid + c.addCgroup(c.status.Pid) + process.SetOOMScore(c.status.Pid, 0) + go c.signalOnExit(starter) return nil @@ -237,6 +242,73 @@ func (c *Container) buildStarter() (st.Starter, error) { return st.NewRawStarter(initConfig) } +func (c *Container) addCgroup(pid int) { + if !cgroup.HasPermission() || pid <= 0 { + return + } + + var ( + cpuQuota *int64 + cpuPeriod *uint64 + memoryLimit *int64 + ) + + if c.Config != nil && c.Config.Linux != nil && c.Config.Linux.Resources != nil { + if c.Config.Linux.Resources.CpuQuota > 0 { + cpuQuota = &c.Config.Linux.Resources.CpuQuota + } + if c.Config.Linux.Resources.CpuPeriod > 0 { + period := uint64(c.Config.Linux.Resources.CpuPeriod) + cpuPeriod = &period + } + if c.Config.Linux.Resources.MemoryLimitInBytes > 0 { + memoryLimit = &c.Config.Linux.Resources.MemoryLimitInBytes + } + } + + cgroupConfig := &cgroup.Config{ + Group: fmt.Sprintf("%s/%s", cgroup.KusciaAppsGroup, c.ID), + Pid: uint64(pid), + CPUQuota: cpuQuota, + CPUPeriod: cpuPeriod, + MemoryLimit: memoryLimit, + } + m, err := cgroup.NewManager(cgroupConfig) + if err != nil { + nlog.Warnf("New cgroup manager for container[%v] process[%v] failed, details -> %v, skip adding process into cgroup", c.Name, pid, err) + return + } + + if err = m.AddCgroup(); err != nil { + nlog.Warnf("Add cgroup for container[%v] process[%v] failed, details -> %v, skip adding process into cgroup", c.Name, pid, err) + } +} + +func (c *Container) deleteCgroup(pid int) { + if !cgroup.HasPermission() || pid <= 0 { + return + } + + cgroupConfig := &cgroup.Config{ + Group: fmt.Sprintf("%s/%s", cgroup.KusciaAppsGroup, c.ID), + Pid: uint64(pid), + } + m, err := cgroup.NewManager(cgroupConfig) + if err != nil { + nlog.Warnf("New cgroup manager for container[%v] process[%v] failed, details -> %v, skip deleting process from cgroup", c.Name, pid, err) + return + } + + for i := 0; i < 5; i++ { + err = m.DeleteCgroup() + if err == nil { + return + } + nlog.Warnf("Delete cgroup for container[%v] process[%v] failed, details -> %v, max retry count[5], current retry count[%v]", c.Name, pid, err, i+1) + time.Sleep(2 * time.Second) + } +} + // The final execution command follows the following rules: // 1. If you do not supply command or args for a Container, the defaults defined in the // Docker image are used. @@ -306,6 +378,8 @@ func (c *Container) signalOnExit(starter st.Starter) { } } + go c.deleteCgroup(c.status.Pid) + if c.status.FinishedAt == 0 { c.status.Pid = 0 c.status.FinishedAt = time.Now().UnixNano() diff --git a/pkg/agent/local/runtime/process/container/container_test.go b/pkg/agent/local/runtime/process/container/container_test.go index 8dfd32bf..282aec00 100644 --- a/pkg/agent/local/runtime/process/container/container_test.go +++ b/pkg/agent/local/runtime/process/container/container_test.go @@ -59,7 +59,7 @@ func createTestContainer(t *testing.T) *Container { return container } -func TestContainer_Start(t *testing.T) { +func TestContainerStart(t *testing.T) { t.Run("Container normal exited ", func(t *testing.T) { container := createTestContainer(t) assert.NoError(t, container.Create(kii.Plain)) @@ -89,7 +89,7 @@ func TestContainer_Start(t *testing.T) { } -func Test_Container_generateCmdLine(t *testing.T) { +func TestContainerGenerateCmdLine(t *testing.T) { tests := []struct { ImageEntrypoint []string ImageCommand []string @@ -148,5 +148,14 @@ func Test_Container_generateCmdLine(t *testing.T) { assert.Equal(t, tt.ExpectedCmd, strings.Join(cmdLine, " ")) }) } +} + +func TestAddCgroup(t *testing.T) { + container := createTestContainer(t) + container.addCgroup(0) +} +func TestDeleteCgroup(t *testing.T) { + container := createTestContainer(t) + container.deleteCgroup(0) } diff --git a/pkg/agent/local/runtime/process/process_test.go b/pkg/agent/local/runtime/process/process_test.go index dff110a1..b542e423 100644 --- a/pkg/agent/local/runtime/process/process_test.go +++ b/pkg/agent/local/runtime/process/process_test.go @@ -128,6 +128,9 @@ func Test_RuntimeSandboxAndContainers(t *testing.T) { assert.NoError(t, runtime.StopContainer(ctx, containerID, 0)) + // make sure process killed + time.Sleep(100 * time.Millisecond) + assert.NoError(t, runtime.StopPodSandbox(ctx, sandboxID)) time.Sleep(100 * time.Millisecond) diff --git a/pkg/agent/local/runtime/process/sandbox/sandbox.go b/pkg/agent/local/runtime/process/sandbox/sandbox.go index 4f3a3534..9a4795f7 100644 --- a/pkg/agent/local/runtime/process/sandbox/sandbox.go +++ b/pkg/agent/local/runtime/process/sandbox/sandbox.go @@ -65,7 +65,7 @@ type Sandbox struct { func NewSandbox(config *runtime.PodSandboxConfig, ip, rootDir string) (s *Sandbox, retErr error) { s = &Sandbox{ MetaData: MetaData{ - ID: common.GenerateID(), + ID: common.GenerateID(16), Config: config, IP: ip, CreatedAt: time.Now(), diff --git a/pkg/agent/provider/node/base_node_test.go b/pkg/agent/provider/node/base_node_test.go index 40197b04..b88e1f6a 100644 --- a/pkg/agent/provider/node/base_node_test.go +++ b/pkg/agent/provider/node/base_node_test.go @@ -26,7 +26,7 @@ import ( func TestBaseNode_configureCommonNode(t *testing.T) { agentConfig := config.DefaultStaticAgentConfig() agentConfig.RootDir = "." - capacityManager, err := NewCapacityManager(&agentConfig.Capacity, ".", true) + capacityManager, err := NewCapacityManager(config.ContainerRuntime, &agentConfig.Capacity, nil, ".", true) assert.NoError(t, err) dep := &BaseNodeDependence{ Runtime: config.ContainerRuntime, diff --git a/pkg/agent/provider/node/capacity_manager.go b/pkg/agent/provider/node/capacity_manager.go index d89e9783..e989a860 100644 --- a/pkg/agent/provider/node/capacity_manager.go +++ b/pkg/agent/provider/node/capacity_manager.go @@ -25,6 +25,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "github.com/secretflow/kuscia/pkg/agent/config" + "github.com/secretflow/kuscia/pkg/utils/cgroup" "github.com/secretflow/kuscia/pkg/utils/nlog" ) @@ -44,11 +45,16 @@ type CapacityManager struct { podTotal resource.Quantity podAvailable resource.Quantity + + cgroupCPUQuota *int64 + cgroupCPUPeriod *uint64 + cgroupMemoryLimit *int64 } -func NewCapacityManager(cfg *config.CapacityCfg, rootDir string, localCapacity bool) (*CapacityManager, error) { +func NewCapacityManager(runtime string, cfg *config.CapacityCfg, reservedResCfg *config.ReservedResourcesCfg, rootDir string, localCapacity bool) (*CapacityManager, error) { pa := &CapacityManager{} - nlog.Infof("Capacity Manager, cfg:%v, rootDir: %s, localCapacity:%v", cfg, rootDir, localCapacity) + nlog.Infof("Capacity Manager, runtime: %v, capacityCfg:%v, reservedResCfg: %v, rootDir: %s, localCapacity:%v", + runtime, cfg, reservedResCfg, rootDir, localCapacity) if localCapacity { memStat, err := mem.VirtualMemory() if err != nil { @@ -57,7 +63,13 @@ func NewCapacityManager(cfg *config.CapacityCfg, rootDir string, localCapacity b if cfg.Memory == "" { pa.memTotal = *resource.NewQuantity(int64(memStat.Total), resource.BinarySI) } + pa.memAvailable = *resource.NewQuantity(int64(memStat.Available), resource.BinarySI) + memoryLimit, err := cgroup.GetMemoryLimit(cgroup.DefaultMountPoint) + if err == nil && memoryLimit > 0 && memoryLimit < int64(memStat.Available) { + pa.memTotal = *resource.NewQuantity(memoryLimit, resource.BinarySI) + pa.memAvailable = pa.memTotal.DeepCopy() + } if cfg.CPU == "" { // One cpu, in Kubernetes, is equivalent to 1 vCPU/Core for cloud providers @@ -69,6 +81,14 @@ func NewCapacityManager(cfg *config.CapacityCfg, rootDir string, localCapacity b cfg.CPU = strconv.Itoa(cpus) pa.cpuTotal = *resource.NewQuantity(int64(cpus), resource.BinarySI) pa.cpuAvailable = pa.cpuTotal.DeepCopy() + cpuQuota, cpuPeriod, err := cgroup.GetCPUQuotaAndPeriod(cgroup.DefaultMountPoint) + if err == nil && cpuQuota > 0 && cpuPeriod > 0 { + availableCPU := cpuQuota / cpuPeriod + if availableCPU > 0 && availableCPU < pa.cpuAvailable.Value() { + pa.cpuTotal = *resource.NewQuantity(availableCPU, resource.BinarySI) + pa.cpuAvailable = pa.cpuTotal.DeepCopy() + } + } } if cfg.Storage == "" { @@ -106,6 +126,7 @@ func NewCapacityManager(cfg *config.CapacityCfg, rootDir string, localCapacity b pa.memAvailable = memory.DeepCopy() } } + if pa.memTotal.Cmp(pa.memAvailable) < 0 { // total memory in config is smaller than available memory pa.memAvailable = pa.memTotal.DeepCopy() @@ -132,9 +153,68 @@ func NewCapacityManager(cfg *config.CapacityCfg, rootDir string, localCapacity b pa.podTotal = pods.DeepCopy() pa.podAvailable = pods.DeepCopy() + err = pa.buildCgroupResource(runtime, reservedResCfg) + if err != nil { + return nil, err + } + return pa, nil } +func (pa *CapacityManager) buildCgroupResource(runtime string, reservedResCfg *config.ReservedResourcesCfg) error { + if reservedResCfg == nil { + return nil + } + + if runtime != config.ProcessRuntime && runtime != config.ContainerRuntime { + return nil + } + + reservedCPU, err := resource.ParseQuantity(reservedResCfg.CPU) + if err != nil { + return fmt.Errorf("failed to parse reserved cpu %q, detail-> %v", reservedResCfg.CPU, err) + } + + if reservedCPU.MilliValue() <= 0 { + reservedCPU, _ = resource.ParseQuantity(config.DefaultReservedCPU) + } + + if pa.cpuAvailable.Cmp(reservedCPU) < 0 { + return fmt.Errorf("available cpu %v is less than reserved cpu %v", pa.cpuAvailable.String(), reservedCPU.String()) + } + + cpuPeriod := uint64(100000) + availableCPU := pa.cpuAvailable.MilliValue() - reservedCPU.MilliValue() + cpuQuota := availableCPU * 100 + pa.cgroupCPUQuota = &cpuQuota + pa.cgroupCPUPeriod = &cpuPeriod + if reservedCPU.MilliValue() > 500 { + pa.cpuAvailable.SetMilli(availableCPU) + } + + nlog.Infof("Total cpu: %v, available cpu: %v, cpu quota: %v, cpu period: %v", pa.cpuTotal.String(), pa.cpuAvailable.String(), cpuQuota, *pa.cgroupCPUPeriod) + + reservedMemory, err := resource.ParseQuantity(reservedResCfg.Memory) + if err != nil { + return fmt.Errorf("failed to parse reserved memory %q, detail-> %v", reservedResCfg.Memory, err) + } + + if reservedMemory.MilliValue() <= 0 { + reservedMemory, _ = resource.ParseQuantity(config.DefaultReservedMemory) + } + + if pa.memAvailable.Cmp(reservedMemory) < 0 { + return fmt.Errorf("available memory %d is less than reserved memory %d", pa.cpuTotal.Value(), reservedCPU.Value()) + } + + availableMemory := pa.memAvailable.Value() - reservedMemory.Value() + pa.cgroupMemoryLimit = &availableMemory + pa.memAvailable.Set(availableMemory) + + nlog.Infof("Total memory: %v, available memory: %v", pa.memTotal.Value(), pa.memAvailable.Value()) + return nil +} + // Capacity returns a resource list containing the capacity limits. func (pa *CapacityManager) Capacity() v1.ResourceList { return v1.ResourceList{ @@ -153,3 +233,15 @@ func (pa *CapacityManager) Allocatable() v1.ResourceList { "pods": pa.podAvailable, } } + +func (pa *CapacityManager) GetCgroupCPUQuota() *int64 { + return pa.cgroupCPUQuota +} + +func (pa *CapacityManager) GetCgroupCPUPeriod() *uint64 { + return pa.cgroupCPUPeriod +} + +func (pa *CapacityManager) GetCgroupMemoryLimit() *int64 { + return pa.cgroupMemoryLimit +} diff --git a/pkg/agent/provider/node/capacity_manager_test.go b/pkg/agent/provider/node/capacity_manager_test.go index 088518a4..cd81970b 100644 --- a/pkg/agent/provider/node/capacity_manager_test.go +++ b/pkg/agent/provider/node/capacity_manager_test.go @@ -16,25 +16,26 @@ package node import ( "fmt" + "strconv" "testing" "github.com/stretchr/testify/assert" + "k8s.io/apimachinery/pkg/api/resource" "github.com/secretflow/kuscia/pkg/agent/config" ) func TestNewGenericNodeProvider(t *testing.T) { - nonEmptyCfg := &config.CapacityCfg{ + nonEmptyCfg := config.CapacityCfg{ CPU: "1", Memory: "1000000000", Pods: "100", Storage: "100G", } - emptyCfg := &config.CapacityCfg{} tests := []struct { localCapacity bool - cfg *config.CapacityCfg + cfg config.CapacityCfg hasErr bool useCfg bool }{ @@ -45,7 +46,7 @@ func TestNewGenericNodeProvider(t *testing.T) { }, { localCapacity: true, - cfg: emptyCfg, + cfg: config.CapacityCfg{}, useCfg: false, }, { @@ -55,7 +56,7 @@ func TestNewGenericNodeProvider(t *testing.T) { }, { localCapacity: false, - cfg: emptyCfg, + cfg: config.CapacityCfg{}, hasErr: true, }, } @@ -64,21 +65,102 @@ func TestNewGenericNodeProvider(t *testing.T) { for i, tt := range tests { t.Run(fmt.Sprintf("Test %d", i), func(t *testing.T) { - cp, err := NewCapacityManager(tt.cfg, rootDir, tt.localCapacity) + cp, err := NewCapacityManager(config.ContainerRuntime, &tt.cfg, nil, rootDir, tt.localCapacity) if tt.hasErr { assert.Error(t, err) return } assert.NoError(t, err) - assert.True(t, cp.cpuAvailable.Equal(cp.cpuTotal)) + assert.True(t, cp.cpuAvailable.Equal(cp.cpuTotal) || cp.cpuAvailable.Cmp(cp.cpuTotal) < 0) assert.True(t, cp.podAvailable.Equal(cp.podTotal)) if tt.useCfg { assert.True(t, cp.storageAvailable.Equal(cp.storageTotal)) assert.True(t, cp.memAvailable.Equal(cp.memTotal) || cp.memAvailable.Cmp(cp.memTotal) < 0) - assert.Equal(t, tt.cfg.CPU, cp.cpuAvailable.String()) + cfgCPU, _ := strconv.Atoi(tt.cfg.CPU) + cpuAvailable, _ := strconv.Atoi(cp.cpuAvailable.String()) + assert.True(t, cfgCPU >= cpuAvailable) assert.Equal(t, tt.cfg.Storage, cp.storageAvailable.String()) assert.Equal(t, tt.cfg.Pods, cp.podAvailable.String()) } }) } } + +func TestBuildCgroupResource(t *testing.T) { + pointerToInt64 := func(i *int64) int64 { + if i == nil { + return 0 + } + return *i + } + + pointerToUint64 := func(i *uint64) int64 { + if i == nil { + return 0 + } + return int64(*i) + } + + tests := []struct { + runtime string + reservedResCfg *config.ReservedResourcesCfg + wantCPUAvailable int64 + wantMemAvailable int64 + wantCgroupCPUQuota int64 + wantCgroupCPUPeriod int64 + wantCgroupMemoryLimit int64 + }{ + {config.ContainerRuntime, nil, 4, 838860800, 0, 0, 0}, + {"", &config.ReservedResourcesCfg{CPU: "500m", Memory: "500Mi"}, 4, 838860800, 0, 0, 0}, + {config.K8sRuntime, &config.ReservedResourcesCfg{CPU: "500m", Memory: "500Mi"}, 4, 838860800, 0, 0, 0}, + {config.ContainerRuntime, &config.ReservedResourcesCfg{CPU: "500m", Memory: "500Mi"}, 4, 314572800, 350000, 100000, 314572800}, + {config.ProcessRuntime, &config.ReservedResourcesCfg{CPU: "600m", Memory: "500Mi"}, 4, 314572800, 340000, 100000, 314572800}, + } + + for _, tt := range tests { + pa := &CapacityManager{ + cpuTotal: *resource.NewQuantity(4, resource.BinarySI), + cpuAvailable: *resource.NewQuantity(4, resource.BinarySI), + memTotal: *resource.NewQuantity(1073741824, resource.BinarySI), // 1024Mi + memAvailable: *resource.NewQuantity(838860800, resource.BinarySI), // 800Mi + } + + err := pa.buildCgroupResource(tt.runtime, tt.reservedResCfg) + assert.Nil(t, err) + assert.Equal(t, tt.wantCPUAvailable, pa.cpuAvailable.Value()) + assert.Equal(t, tt.wantMemAvailable, pa.memAvailable.Value()) + assert.Equal(t, tt.wantCgroupCPUQuota, pointerToInt64(pa.cgroupCPUQuota)) + assert.Equal(t, tt.wantCgroupCPUPeriod, pointerToUint64(pa.cgroupCPUPeriod)) + assert.Equal(t, tt.wantCgroupMemoryLimit, pointerToInt64(pa.cgroupMemoryLimit)) + } +} + +func TestGetCgroupCPUQuota(t *testing.T) { + quota := int64(100000) + pa := &CapacityManager{ + cgroupCPUQuota: "a, + } + + got := pa.GetCgroupCPUQuota() + assert.Equal(t, quota, *got) +} + +func TestGetCgroupCPUPeriod(t *testing.T) { + period := uint64(100000) + pa := &CapacityManager{ + cgroupCPUPeriod: &period, + } + + got := pa.GetCgroupCPUPeriod() + assert.Equal(t, period, *got) +} + +func TestGetCgroupMemoryLimit(t *testing.T) { + limit := int64(100000) + pa := &CapacityManager{ + cgroupMemoryLimit: &limit, + } + + got := pa.GetCgroupMemoryLimit() + assert.Equal(t, limit, *got) +} diff --git a/pkg/agent/provider/node/generic_node_test.go b/pkg/agent/provider/node/generic_node_test.go index efd57055..c4908954 100644 --- a/pkg/agent/provider/node/generic_node_test.go +++ b/pkg/agent/provider/node/generic_node_test.go @@ -26,7 +26,7 @@ import ( func TestGenericNode_ConfigureNode(t *testing.T) { agentConfig := config.DefaultStaticAgentConfig() agentConfig.RootDir = t.TempDir() - capacityManager, err := NewCapacityManager(&agentConfig.Capacity, ".", true) + capacityManager, err := NewCapacityManager(config.ContainerRuntime, &agentConfig.Capacity, nil, ".", true) assert.NoError(t, err) dep := &GenericNodeDependence{ BaseNodeDependence: BaseNodeDependence{ diff --git a/pkg/agent/provider/provider_factory.go b/pkg/agent/provider/provider_factory.go index 9eae7975..6ad8b71a 100644 --- a/pkg/agent/provider/provider_factory.go +++ b/pkg/agent/provider/provider_factory.go @@ -16,6 +16,8 @@ package provider import ( "fmt" + "os" + "time" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" @@ -27,6 +29,7 @@ import ( "github.com/secretflow/kuscia/pkg/agent/provider/node" "github.com/secretflow/kuscia/pkg/agent/provider/pod" "github.com/secretflow/kuscia/pkg/agent/resource" + "github.com/secretflow/kuscia/pkg/utils/cgroup" "github.com/secretflow/kuscia/pkg/utils/kubeconfig" "github.com/secretflow/kuscia/pkg/utils/nlog" ) @@ -81,11 +84,17 @@ type containerRuntimeFactory struct { func (f *containerRuntimeFactory) BuildNodeProvider() (kri.NodeProvider, error) { providerCfg := &f.agentConfig.Provider - cm, err := node.NewCapacityManager(&f.agentConfig.Capacity, f.agentConfig.RootDir, true) + cm, err := node.NewCapacityManager(f.agentConfig.Provider.Runtime, + &f.agentConfig.Capacity, + &f.agentConfig.ReservedResources, + f.agentConfig.RootDir, + true) if err != nil { return nil, err } + initCgroup(cm, f.agentConfig.Provider.Runtime) + nodeDep := &node.GenericNodeDependence{ BaseNodeDependence: node.BaseNodeDependence{ Runtime: providerCfg.Runtime, @@ -101,6 +110,73 @@ func (f *containerRuntimeFactory) BuildNodeProvider() (kri.NodeProvider, error) return nodeProvider, nil } +func initCgroup(cm *node.CapacityManager, runtime string) { + if cm == nil || (runtime != config.ProcessRuntime && runtime != config.ContainerRuntime) { + return + } + + if !cgroup.HasPermission() { + return + } + + set := func(cm *node.CapacityManager, runtime string) { + for i := 0; ; i++ { + err := setCgroup(cm, runtime) + if err == nil { + nlog.Infof("Finish initializing cgroup") + return + } + + if !os.IsNotExist(err) { + nlog.Warnf("Init cgroup failed. details -> %v", err) + return + } + + time.Sleep(5 * time.Second) + } + } + + switch runtime { + case config.ProcessRuntime: + set(cm, runtime) + case config.ContainerRuntime: + // Asynchronously waiting for containerd to create cgroup + go set(cm, runtime) + default: + } +} + +func setCgroup(cm *node.CapacityManager, runtime string) error { + switch runtime { + case config.ProcessRuntime: + m, err := newCgroupManager(cm, cgroup.KusciaAppsGroup) + if err != nil { + return err + } + return m.AddCgroup() + case config.ContainerRuntime: + m, err := newCgroupManager(cm, cgroup.K8sIOGroup) + if err != nil { + return err + } + return m.UpdateCgroup() + default: + nlog.Warnf("Unknown runtime %q, skip initializing cgroup", runtime) + } + return nil +} + +func newCgroupManager(cm *node.CapacityManager, group string) (cgroup.Manager, error) { + conf := &cgroup.Config{ + Group: group, + Pid: 0, + CPUQuota: cm.GetCgroupCPUQuota(), + CPUPeriod: cm.GetCgroupCPUPeriod(), + MemoryLimit: cm.GetCgroupMemoryLimit(), + } + return cgroup.NewManager(conf) +} + func (f *containerRuntimeFactory) BuildPodProvider(nodeName string, eventRecorder record.EventRecorder, resourceManager *resource.KubeResourceManager, podsController *framework.PodsController) (kri.PodProvider, error) { podProviderDep := &pod.CRIProviderDependence{ Namespace: f.agentConfig.Namespace, @@ -130,7 +206,11 @@ type k8sRuntimeFactory struct { func (f *k8sRuntimeFactory) BuildNodeProvider() (kri.NodeProvider, error) { providerCfg := &f.agentConfig.Provider - cm, err := node.NewCapacityManager(&f.agentConfig.Capacity, f.agentConfig.RootDir, false) + cm, err := node.NewCapacityManager(f.agentConfig.Provider.Runtime, + &f.agentConfig.Capacity, + &f.agentConfig.ReservedResources, + f.agentConfig.RootDir, + false) if err != nil { return nil, err } diff --git a/pkg/common/constants.go b/pkg/common/constants.go index 6b4de8c1..4ae0125e 100644 --- a/pkg/common/constants.go +++ b/pkg/common/constants.go @@ -97,6 +97,7 @@ const ( KusciaCrossDomain = "cross-domain" JobCustomFieldsLabelPrefix = "kuscia.job.custom-fields/" ReceiverServiceName = "receiver" + ControllerKusciaTask = "kusciatask" ) // annotations @@ -232,3 +233,7 @@ const ( DeployTokenUsedState = "used" DeployTokenUnusedState = "unused" ) + +const ( + K3sRegex = `^[a-z0-9]([a-z0-9.-]{0,61}[a-z0-9])?$` +) diff --git a/pkg/common/gen.go b/pkg/common/gen.go index cf78a74f..11e67bb2 100644 --- a/pkg/common/gen.go +++ b/pkg/common/gen.go @@ -19,13 +19,14 @@ import ( "regexp" "strings" - "github.com/google/uuid" + "github.com/secretflow/kuscia/pkg/utils/common" ) func GenDomainDataID(dataName string) (dataID string) { // reserve the valid characters in the string reg, _ := regexp.Compile("[^a-zA-Z0-9/-]+") s1 := reg.ReplaceAllString(dataName, "") + s1 = strings.ToLower(s1) // remove the invalid characters ['0-9' and '-'] at the beginning of the string reg, _ = regexp.Compile("^[0-9/-]+") prefix := reg.ReplaceAllString(s1, "") @@ -37,7 +38,15 @@ func GenDomainDataID(dataName string) (dataID string) { prefix = prefix + "-" } - return prefix + uuid.NewString() + return prefix + common.GenerateID(16) +} + +// GenDomainDataSourceID generates data source id +func GenDomainDataSourceID(domainDataSourceType string) string { + if len(domainDataSourceType) == 0 { + return common.GenerateID(16) + } + return fmt.Sprintf("%s-%s", domainDataSourceType, common.GenerateID(16)) } func GenDomainRouteName(src, dest string) string { diff --git a/pkg/common/gen_test.go b/pkg/common/gen_test.go new file mode 100644 index 00000000..21bfb66d --- /dev/null +++ b/pkg/common/gen_test.go @@ -0,0 +1,92 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package common + +import ( + "github.com/stretchr/testify/assert" + "regexp" + "testing" +) + +func TestGenDomainDataID(t *testing.T) { + + testCases := []struct { //nolint:typecheck + name string + match bool + }{ + { + name: "example.com", + match: true, + }, + { + name: "-example-com", + match: true, + }, + { + name: "TestExample.DomainDataSource", + match: true, + }, + { + name: "", + match: true, + }, { + name: "Test$%^&*(data_source-", + match: true, + }, + { + name: "中文测试", + match: true, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + genDomainDataID := GenDomainDataID(tc.name) + match, _ := regexp.MatchString(K3sRegex, genDomainDataID) + assert.True(t, match, "domain data id should be valid: %s", genDomainDataID) + }) + } +} + +func TestGenDomainDataSourceID(t *testing.T) { + + testCases := []struct { //nolint:typecheck + name string + match bool + }{ + { + name: DomainDataSourceTypeOSS, + match: true, + }, + { + name: DomainDataSourceTypeMysql, + match: true, + }, + { + name: DomainDataSourceTypeLocalFS, + match: true, + }, + { + name: "", + match: true, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + genDomainDataID := GenDomainDataSourceID(tc.name) + match, _ := regexp.MatchString(K3sRegex, genDomainDataID) + assert.True(t, match, "domain data source id should be valid: %s", genDomainDataID) + }) + } +} diff --git a/pkg/confmanager/bean/grpc_server_bean.go b/pkg/confmanager/bean/grpc_server_bean.go index 639385eb..8837c761 100644 --- a/pkg/confmanager/bean/grpc_server_bean.go +++ b/pkg/confmanager/bean/grpc_server_bean.go @@ -67,7 +67,8 @@ func (s *grpcServerBean) Start(ctx context.Context, e framework.ConfBeanRegistry serverTLSConfig, err := tls.BuildServerTLSConfig(s.config.TLS.RootCA, s.config.TLS.ServerCert, s.config.TLS.ServerKey) if err != nil { - nlog.Fatalf("Failed to init server tls config: %v", err) + nlog.Errorf("Failed to init server tls config: %v", err) + return err } creds := credentials.NewTLS(serverTLSConfig) opts = append(opts, grpc.Creds(creds)) @@ -76,7 +77,8 @@ func (s *grpcServerBean) Start(ctx context.Context, e framework.ConfBeanRegistry addr := fmt.Sprintf(":%d", s.config.GRPCPort) lis, err := net.Listen("tcp", addr) if err != nil { - nlog.Fatalf("Failed to listen on addr[%s]: %v", addr, err) + nlog.Errorf("Failed to listen on addr[%s]: %v", addr, err) + return err } // register grpc server diff --git a/pkg/confmanager/service/certificate_test.go b/pkg/confmanager/service/certificate_test.go index 29c6f1b5..1d23cf9e 100644 --- a/pkg/confmanager/service/certificate_test.go +++ b/pkg/confmanager/service/certificate_test.go @@ -23,6 +23,7 @@ import ( "github.com/secretflow/kuscia/pkg/utils/tls" "github.com/secretflow/kuscia/pkg/web/asserts" "github.com/secretflow/kuscia/proto/api/v1alpha1/confmanager" + "github.com/stretchr/testify/assert" ) func newNewCertificateService() (ICertificateService, error) { @@ -39,139 +40,81 @@ func newNewCertificateService() (ICertificateService, error) { } func TestNewCertificateService(t *testing.T) { - tests := []struct { - name string - want ICertificateService - }{ - { - name: "cm new certificate service should success", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - certService, err := newNewCertificateService() - asserts.NotNil(err, "new certificate service failed") - asserts.IsNil(certService, "new certificate service return nil") - }) - } + t.Parallel() + certService, err := newNewCertificateService() + asserts.NotNil(err, "new certificate service failed") + asserts.IsNil(certService, "new certificate service return nil") } -func Test_certificateService_GenerateKeyCerts(t *testing.T) { - type args struct { - ctx context.Context - request *confmanager.GenerateKeyCertsRequest - } - tests := []struct { - name string - args args - wantCode int32 - wantCertChainLen int - }{ - { - name: "cm generate pkcs#1 key certs should return success", - args: args{ - ctx: context.Background(), - request: &confmanager.GenerateKeyCertsRequest{ - CommonName: "test", - KeyType: KeyTypeForPCKS1, - }, - }, - wantCode: 0, - wantCertChainLen: 2, - }, - { - name: "cm generate pkcs#8 key certs should return success", - args: args{ - ctx: context.Background(), - request: &confmanager.GenerateKeyCertsRequest{ - CommonName: "test", - KeyType: KeyTypeForPCKS8, - }, - }, - wantCode: 0, - wantCertChainLen: 2, - }, - } - for _, tt := range tests { - certService, err := newNewCertificateService() - asserts.NotNil(err, "new certificate service failed") - asserts.IsNil(certService, "new certificate service return nil") - t.Run(tt.name, func(t *testing.T) { - got := certService.GenerateKeyCerts(tt.args.ctx, tt.args.request) - if got.Status.Code != tt.wantCode { - t.Errorf("GenerateKeyCerts() = %v, wantCode %v", got, tt.wantCode) - } - if got.Key == "" { - t.Errorf("GenerateKeyCerts() = %v, key empty", got) - } - if len(got.CertChain) != tt.wantCertChainLen { - t.Errorf("GenerateKeyCerts() = %v, cert chain len %v", got, tt.wantCertChainLen) - } - }) - } +func Test_certificateService_GenerateKeyCerts_PKCS1(t *testing.T) { + t.Parallel() + certService, err := newNewCertificateService() + assert.Nil(t, err) + assert.NotNil(t, certService) + got := certService.GenerateKeyCerts(context.Background(), &confmanager.GenerateKeyCertsRequest{ + CommonName: "test", + KeyType: KeyTypeForPCKS1, + }) + + assert.Equal(t, 0, int(got.Status.Code)) + assert.NotEmpty(t, got.Key) + assert.Equal(t, int(2), len(got.CertChain)) +} + +func Test_certificateService_GenerateKeyCerts_PKCS8(t *testing.T) { + t.Parallel() + certService, err := newNewCertificateService() + assert.Nil(t, err) + assert.NotNil(t, certService) + got := certService.GenerateKeyCerts(context.Background(), &confmanager.GenerateKeyCertsRequest{ + CommonName: "test", + KeyType: KeyTypeForPCKS8, + }) + + assert.Equal(t, 0, int(got.Status.Code)) + assert.NotEmpty(t, got.Key) + assert.Equal(t, 2, len(got.CertChain)) } -func Test_certificateService_ValidateGenerateKeyCertsRequest(t *testing.T) { - type args struct { - ctx context.Context - request *confmanager.GenerateKeyCertsRequest - } - tests := []struct { - name string - args args - wantErr bool - wantErrLen int - }{ - { - name: "cm generate key certs request validate should return success", - args: args{ - ctx: context.Background(), - request: &confmanager.GenerateKeyCertsRequest{ - CommonName: "test", - KeyType: KeyTypeForPCKS1, - }, - }, - wantErr: false, - wantErrLen: 0, - }, - { - name: "cm generate key certs request validate should return 1 error", - args: args{ - ctx: context.Background(), - request: &confmanager.GenerateKeyCertsRequest{ - KeyType: KeyTypeForPCKS1, - }, - }, - wantErr: true, - wantErrLen: 1, - }, - { - name: "cm generate key certs request validate should return 3 error", - args: args{ - ctx: context.Background(), - request: &confmanager.GenerateKeyCertsRequest{ - KeyType: "123", - DurationSec: -123, - }, - }, - wantErr: true, - wantErrLen: 3, - }, - } - for _, tt := range tests { - certService, err := newNewCertificateService() - asserts.NotNil(err, "new certificate service failed") - asserts.IsNil(certService, "new certificate service return nil") - t.Run(tt.name, func(t *testing.T) { - got := certService.ValidateGenerateKeyCertsRequest(tt.args.ctx, tt.args.request) - if (got != nil) != tt.wantErr { - t.Errorf("ValidateGenerateKeyCertsRequest() = %v, wantErr %v", got, tt.wantErr) - } - if got != nil { - if len(*got) != tt.wantErrLen { - t.Errorf("ValidateGenerateKeyCertsRequest() = %v, wantErr %v", got, tt.wantErr) - } - } - }) - } +func Test_certificateService_ValidateGenerateKeyCertsRequest_PKCS1_Error(t *testing.T) { + t.Parallel() + certService, err := newNewCertificateService() + assert.Nil(t, err) + assert.NotNil(t, certService) + + got := certService.ValidateGenerateKeyCertsRequest(context.Background(), &confmanager.GenerateKeyCertsRequest{ + CommonName: "test", + KeyType: KeyTypeForPCKS1, + }) + + assert.Nil(t, got) +} + +func Test_certificateService_ValidateGenerateKeyCertsRequest_PKCS1(t *testing.T) { + t.Parallel() + certService, err := newNewCertificateService() + assert.Nil(t, err) + assert.NotNil(t, certService) + + got := certService.ValidateGenerateKeyCertsRequest(context.Background(), &confmanager.GenerateKeyCertsRequest{ + KeyType: KeyTypeForPCKS1, + }) + + assert.NotNil(t, got) + assert.Equal(t, 1, len(*got)) +} + +func Test_certificateService_ValidateGenerateKeyCertsRequest_3(t *testing.T) { + t.Parallel() + certService, err := newNewCertificateService() + assert.Nil(t, err) + assert.NotNil(t, certService) + + got := certService.ValidateGenerateKeyCertsRequest(context.Background(), &confmanager.GenerateKeyCertsRequest{ + KeyType: "123", + DurationSec: -123, + }) + + assert.NotNil(t, got) + assert.Equal(t, 3, len(*got)) } diff --git a/pkg/controllers/domain/authorization_resource.go b/pkg/controllers/domain/authorization_resource.go index c252a5b7..33fa91c7 100644 --- a/pkg/controllers/domain/authorization_resource.go +++ b/pkg/controllers/domain/authorization_resource.go @@ -38,7 +38,7 @@ const ( clusterRoleKind = "ClusterRole" authCompleted = "completed" tokenExpiredSeconds = 3650 * 24 * 3600 - defaultRollingSeconds = 600 + defaultRollingSeconds = 86400 ) // 1. P2P Kusica partner master -> rolebinding + clusterdomainroute + status-update diff --git a/pkg/controllers/kusciadeployment/reconcile.go b/pkg/controllers/kusciadeployment/reconcile.go index 37d01a21..85df5596 100644 --- a/pkg/controllers/kusciadeployment/reconcile.go +++ b/pkg/controllers/kusciadeployment/reconcile.go @@ -42,14 +42,20 @@ const ( // ProcessKusciaDeployment processes kuscia deployment resource. func (c *Controller) ProcessKusciaDeployment(ctx context.Context, kd *kusciav1alpha1.KusciaDeployment) (err error) { - updated, err := c.updateKusciaDeploymentAnnotations(kd) + conditionNeedUpdate, err := c.updateKusciaDeploymentAnnotations(kd) if err != nil { nlog.Errorf("UpdateKusciaDeploymentSpec kd=%s/%s failed: %s", kd.Namespace, kd.Name, err) return err } + preKdStatus := kd.Status.DeepCopy() + partyKitInfos, kitNeedUpdate, err := c.buildPartyKitInfos(kd) + if err != nil { + return c.handleError(ctx, partyKitInfos, preKdStatus, kd, err) + } + // We update the spec and status separately. - if updated { + if conditionNeedUpdate || kitNeedUpdate { _, err = c.kusciaClient.KusciaV1alpha1().KusciaDeployments(kd.Namespace).Update(ctx, kd, metav1.UpdateOptions{}) if err != nil && !k8serrors.IsConflict(err) { return fmt.Errorf("failed to updating kuscia deployment %v, %v", kd.Name, err) @@ -57,12 +63,6 @@ func (c *Controller) ProcessKusciaDeployment(ctx context.Context, kd *kusciav1al return nil } - preKdStatus := kd.Status.DeepCopy() - partyKitInfos, err := c.buildPartyKitInfos(kd) - if err != nil { - return c.handleError(ctx, partyKitInfos, preKdStatus, kd, err) - } - if err = c.syncResources(ctx, partyKitInfos); err != nil { return c.handleError(ctx, partyKitInfos, preKdStatus, kd, err) } @@ -601,11 +601,17 @@ func (c *Controller) generateDeployment(partyKitInfo *PartyKitInfo) (*appsv1.Dep } for _, port := range ctr.Ports { - resCtr.Ports = append(resCtr.Ports, corev1.ContainerPort{ + namedPort, ok := partyKitInfo.dkInfo.ports[port.Name] + if !ok { + return nil, fmt.Errorf("port %s is not allocated for deployment %s", port.Name, partyKitInfo.dkInfo.deploymentName) + } + resPort := corev1.ContainerPort{ Name: port.Name, - ContainerPort: port.Port, + ContainerPort: namedPort.Port, Protocol: corev1.ProtocolTCP, - }) + } + + resCtr.Ports = append(resCtr.Ports, resPort) } protoJSONOptions := protojson.MarshalOptions{EmitUnpopulated: true} diff --git a/pkg/controllers/kusciadeployment/util.go b/pkg/controllers/kusciadeployment/util.go index 377fad9f..946dcaab 100644 --- a/pkg/controllers/kusciadeployment/util.go +++ b/pkg/controllers/kusciadeployment/util.go @@ -17,6 +17,7 @@ package kusciadeployment import ( "context" + "encoding/json" "fmt" "reflect" "strings" @@ -24,6 +25,8 @@ import ( k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/secretflow/kuscia/pkg/common" + pkgport "github.com/secretflow/kuscia/pkg/controllers/portflake/port" kusciav1alpha1 "github.com/secretflow/kuscia/pkg/crd/apis/kuscia/v1alpha1" utilsres "github.com/secretflow/kuscia/pkg/utils/resources" proto "github.com/secretflow/kuscia/proto/api/v1alpha1/appconfig" @@ -34,6 +37,7 @@ type KdStatusReason string const ( buildPartyKitInfoFailed KdStatusReason = "BuildPartyKitInfoFailed" fillPartyClusterDefinesFailed KdStatusReason = "FillPartyClusterDefinesFailed" + allocatePortsFailed KdStatusReason = "AllocatePortsFailed" getSelfPartyKitInfoFailed KdStatusReason = "GetSelfPartyKitInfoFailed" createConfigMapFailed KdStatusReason = "CreateConfigMapFailed" createServiceFailed KdStatusReason = "CreateServiceFailed" @@ -69,7 +73,7 @@ type DeploymentKitInfo struct { allocatedPorts *proto.AllocatedPorts } -func (c *Controller) buildPartyKitInfos(kd *kusciav1alpha1.KusciaDeployment) (map[string]*PartyKitInfo, error) { +func (c *Controller) buildPartyKitInfos(kd *kusciav1alpha1.KusciaDeployment) (map[string]*PartyKitInfo, bool, error) { partyKitInfos := make(map[string]*PartyKitInfo) for _, party := range kd.Spec.Parties { kitInfo, err := c.buildPartyKitInfo(kd, &party) @@ -77,22 +81,15 @@ func (c *Controller) buildPartyKitInfos(kd *kusciav1alpha1.KusciaDeployment) (ma kd.Status.Phase = kusciav1alpha1.KusciaDeploymentPhaseFailed kd.Status.Reason = string(buildPartyKitInfoFailed) kd.Status.Message = fmt.Sprintf("failed to build domain %v kit info, %v", party.DomainID, err) - return nil, err + return nil, false, err } key := party.DomainID + "/" + party.Role partyKitInfos[key] = kitInfo } - if err := c.fillPartyClusterDefines(partyKitInfos); err != nil { - kd.Status.Phase = kusciav1alpha1.KusciaDeploymentPhaseFailed - kd.Status.Reason = string(fillPartyClusterDefinesFailed) - kd.Status.Message = fmt.Sprintf("failed to fill party cluster defines, %v", err) - return nil, err - } - selfParties, err := c.selfParties(kd) if err != nil { - return nil, err + return nil, false, err } selfPartyKitInfos := make(map[string]*PartyKitInfo) @@ -103,12 +100,27 @@ func (c *Controller) buildPartyKitInfos(kd *kusciav1alpha1.KusciaDeployment) (ma kd.Status.Phase = kusciav1alpha1.KusciaDeploymentPhaseFailed kd.Status.Reason = string(getSelfPartyKitInfoFailed) kd.Status.Message = err.Error() - return nil, err + return nil, false, err } selfPartyKitInfos[key] = partyKitInfos[key] } - return selfPartyKitInfos, nil + needUpdate, err := allocatePorts(kd, selfPartyKitInfos) + if err != nil { + kd.Status.Phase = kusciav1alpha1.KusciaDeploymentPhaseFailed + kd.Status.Reason = string(allocatePortsFailed) + kd.Status.Message = fmt.Sprintf("failed to allocate ports, %v", err) + return nil, false, err + } + + if err := c.fillPartyClusterDefines(partyKitInfos); err != nil { + kd.Status.Phase = kusciav1alpha1.KusciaDeploymentPhaseFailed + kd.Status.Reason = string(fillPartyClusterDefinesFailed) + kd.Status.Message = fmt.Sprintf("failed to fill party cluster defines, %v", err) + return nil, false, err + } + + return selfPartyKitInfos, needUpdate, nil } func (c *Controller) buildPartyKitInfo(kd *kusciav1alpha1.KusciaDeployment, party *kusciav1alpha1.KusciaDeploymentParty) (*PartyKitInfo, error) { @@ -157,8 +169,8 @@ func (c *Controller) buildPartyKitInfo(kd *kusciav1alpha1.KusciaDeployment, part dkInfo: dkInfo, } - if baseDeployTemplate.NetworkPolicy != nil { - kit.portAccessDomains = generatePortAccessDomains(kd.Spec.Parties, baseDeployTemplate.NetworkPolicy) + if len(kd.Spec.Parties) > 1 { + kit.portAccessDomains = generatePortAccessDomains(kd.Spec.Parties, baseDeployTemplate.NetworkPolicy, ports) } return kit, nil @@ -208,6 +220,7 @@ func (c *Controller) fillPartyClusterDefines(partyKitInfos map[string]*PartyKitI return err } } + return nil } @@ -225,7 +238,6 @@ func fillPartyClusterDefine(kitInfo *PartyKitInfo, parties []*proto.Party) error } fillClusterDefine(kitInfo.dkInfo, parties, *selfPartyIndex, 0) - fillAllocatedPorts(kitInfo.dkInfo) return nil } @@ -237,18 +249,108 @@ func fillClusterDefine(dkInfo *DeploymentKitInfo, parties []*proto.Party, partyI } } -func fillAllocatedPorts(dkInfo *DeploymentKitInfo) { +func allocatePorts(kd *kusciav1alpha1.KusciaDeployment, partyKitInfos map[string]*PartyKitInfo) (bool, error) { + needUpdate := false + + var partyAllocatedPorts []kusciav1alpha1.PartyAllocatedPorts + if kd.Annotations == nil { + kd.Annotations = map[string]string{} + } else if kd.Annotations[common.AllocatedPortsAnnotationKey] != "" { + if err := json.Unmarshal([]byte(kd.Annotations[common.AllocatedPortsAnnotationKey]), &partyAllocatedPorts); err != nil { + return false, err + } + } + + if len(partyAllocatedPorts) == 0 { + needCounts := map[string]int{} + for _, partyKit := range partyKitInfos { + ns := partyKit.domainID + count := needCounts[ns] + count += len(partyKit.dkInfo.ports) + needCounts[ns] = count + } + + retPorts, err := pkgport.AllocatePort(needCounts) + if err != nil { + return false, err + } + + for _, partyKit := range partyKitInfos { + ns := partyKit.domainID + ports, ok := retPorts[ns] + if !ok { + return false, fmt.Errorf("allocated ports not found for domain %s", ns) + } + index := 0 + partyPorts := kusciav1alpha1.PartyAllocatedPorts{ + DomainID: partyKit.domainID, + Role: partyKit.role, + NamedPort: map[string]int32{}, + } + + for portName := range partyKit.dkInfo.ports { + if index >= len(ports) { + return false, fmt.Errorf("allocated ports are not enough for domain %s", ns) + } + + partyPorts.NamedPort[portName] = ports[index] + index++ + } + + partyAllocatedPorts = append(partyAllocatedPorts, partyPorts) + } + + allocatedPortsContent, err := json.Marshal(partyAllocatedPorts) + if err != nil { + return false, err + } + + kd.Annotations[common.AllocatedPortsAnnotationKey] = string(allocatedPortsContent) + needUpdate = true + } + + for _, partyKit := range partyKitInfos { + var partyPorts *kusciav1alpha1.PartyAllocatedPorts + for _, ports := range partyAllocatedPorts { + if ports.DomainID == partyKit.domainID && ports.Role == partyKit.role { + partyPorts = &ports + break + } + } + if partyPorts == nil { + return false, fmt.Errorf("allocated ports not found for party %s/%s", partyKit.domainID, partyKit.role) + } + + if err := fillAllocatedPorts(partyPorts, partyKit.dkInfo); err != nil { + return false, fmt.Errorf("failed to fill allocated ports for party %s/%s, detail->%v", partyKit.domainID, partyKit.role, err) + } + + } + + return needUpdate, nil +} + +func fillAllocatedPorts(partyPorts *kusciav1alpha1.PartyAllocatedPorts, dkInfo *DeploymentKitInfo) error { resPorts := make([]*proto.Port, 0, len(dkInfo.ports)) - for _, port := range dkInfo.ports { + for name, port := range dkInfo.ports { + realPort, ok := partyPorts.NamedPort[port.Name] + if !ok { + return fmt.Errorf("not found allocated port for %v", port.Name) + } + resPorts = append(resPorts, &proto.Port{ Name: port.Name, - Port: port.Port, + Port: realPort, Scope: string(port.Scope), Protocol: string(port.Protocol), }) + + port.Port = realPort + dkInfo.ports[name] = port } dkInfo.allocatedPorts = &proto.AllocatedPorts{Ports: resPorts} + return nil } func (c *Controller) generateClusterDefineParties(partyKitInfos map[string]*PartyKitInfo) ([]*proto.Party, error) { @@ -344,42 +446,59 @@ func generatePortServices(deploymentName string, servicedPorts []string) PortSer return portService } -func generatePortAccessDomains(parties []kusciav1alpha1.KusciaDeploymentParty, networkPolicy *kusciav1alpha1.NetworkPolicy) map[string]string { - roleDomains := map[string][]string{} - for _, party := range parties { - if domains, ok := roleDomains[party.Role]; ok { - roleDomains[party.Role] = append(domains, party.DomainID) - } else { - roleDomains[party.Role] = []string{party.DomainID} +func generatePortAccessDomains(parties []kusciav1alpha1.KusciaDeploymentParty, networkPolicy *kusciav1alpha1.NetworkPolicy, ports NamedPorts) map[string]string { + portAccessDomains := map[string]string{} + if networkPolicy == nil { + domainMap := map[string]struct{}{} + for _, party := range parties { + domainMap[party.DomainID] = struct{}{} } - } - portAccessRoles := map[string][]string{} - for _, item := range networkPolicy.Ingresses { - for _, port := range item.Ports { - if domains, ok := portAccessRoles[port.Port]; ok { - portAccessRoles[port.Port] = append(domains, item.From.Roles...) + domainSlice := make([]string, 0, len(domainMap)) + for domain := range domainMap { + domainSlice = append(domainSlice, domain) + } + + for _, port := range ports { + if port.Scope == kusciav1alpha1.ScopeCluster { + portAccessDomains[port.Name] = strings.Join(domainSlice, ",") + } + } + } else { + roleDomains := map[string][]string{} + for _, party := range parties { + if domains, ok := roleDomains[party.Role]; ok { + roleDomains[party.Role] = append(domains, party.DomainID) } else { - portAccessRoles[port.Port] = item.From.Roles + roleDomains[party.Role] = []string{party.DomainID} } } - } - portAccessDomains := map[string]string{} - for port, roles := range portAccessRoles { - domainMap := map[string]struct{}{} - for _, role := range roles { - for _, domain := range roleDomains[role] { - domainMap[domain] = struct{}{} + portAccessRoles := map[string][]string{} + for _, item := range networkPolicy.Ingresses { + for _, port := range item.Ports { + if domains, ok := portAccessRoles[port.Port]; ok { + portAccessRoles[port.Port] = append(domains, item.From.Roles...) + } else { + portAccessRoles[port.Port] = item.From.Roles + } } } - domainSlice := make([]string, 0, len(domainMap)) - for domain := range domainMap { - domainSlice = append(domainSlice, domain) + + for port, roles := range portAccessRoles { + domainMap := map[string]struct{}{} + for _, role := range roles { + for _, domain := range roleDomains[role] { + domainMap[domain] = struct{}{} + } + } + domainSlice := make([]string, 0, len(domainMap)) + for domain := range domainMap { + domainSlice = append(domainSlice, domain) + } + portAccessDomains[port] = strings.Join(domainSlice, ",") } - portAccessDomains[port] = strings.Join(domainSlice, ",") } - return portAccessDomains } diff --git a/pkg/controllers/kusciadeployment/util_test.go b/pkg/controllers/kusciadeployment/util_test.go index f8cc065c..c73bac0f 100644 --- a/pkg/controllers/kusciadeployment/util_test.go +++ b/pkg/controllers/kusciadeployment/util_test.go @@ -23,6 +23,7 @@ import ( "github.com/stretchr/testify/assert" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/secretflow/kuscia/pkg/common" kusciaapisv1alpha1 "github.com/secretflow/kuscia/pkg/crd/apis/kuscia/v1alpha1" kusciafake "github.com/secretflow/kuscia/pkg/crd/clientset/versioned/fake" kusciainformers "github.com/secretflow/kuscia/pkg/crd/informers/externalversions" @@ -93,14 +94,81 @@ func TestFillClusterDefine(t *testing.T) { } } +func TestAllocatePorts(t *testing.T) { + partyKitInfos := map[string]*PartyKitInfo{ + "alice": { + domainID: "alice", + dkInfo: &DeploymentKitInfo{ + ports: NamedPorts{ + "domain": kusciaapisv1alpha1.ContainerPort{ + Name: "domain", + Port: 8080, + Protocol: "HTTP", + Scope: kusciaapisv1alpha1.ScopeDomain, + }, + }, + allocatedPorts: nil, + }, + }, + "bob": { + domainID: "bob", + dkInfo: &DeploymentKitInfo{ + ports: NamedPorts{ + "domain": kusciaapisv1alpha1.ContainerPort{ + Name: "domain", + Port: 9080, + Protocol: "HTTP", + Scope: kusciaapisv1alpha1.ScopeDomain, + }, + "cluster": kusciaapisv1alpha1.ContainerPort{ + Name: "cluster", + Port: 9081, + Protocol: "HTTP", + Scope: kusciaapisv1alpha1.ScopeCluster, + }, + }, + allocatedPorts: nil, + }, + }, + "carol": { + domainID: "carol", + dkInfo: &DeploymentKitInfo{ + ports: NamedPorts{}, + allocatedPorts: nil, + }, + }, + } + + kd := makeTestKusciaDeployment("kd-1", 2, 1, 1) + + needUpdate, err := allocatePorts(kd, partyKitInfos) + assert.NoError(t, err) + assert.True(t, needUpdate) + assert.True(t, kd.Annotations[common.AllocatedPortsAnnotationKey] != "") + + alicePorts := partyKitInfos["alice"].dkInfo.allocatedPorts + assert.NotNil(t, alicePorts) + assert.Equal(t, 1, len(alicePorts.Ports)) + + bobPorts := partyKitInfos["bob"].dkInfo.allocatedPorts + assert.NotNil(t, bobPorts) + assert.Equal(t, 2, len(bobPorts.Ports)) + + carolPorts := partyKitInfos["carol"].dkInfo.allocatedPorts + assert.NotNil(t, carolPorts) + assert.Equal(t, 0, len(carolPorts.Ports)) +} + func TestFillAllocatedPorts(t *testing.T) { tests := []struct { - name string - dkInfo *DeploymentKitInfo - want *proto.AllocatedPorts + name string + partyPorts *kusciaapisv1alpha1.PartyAllocatedPorts + dkInfo *DeploymentKitInfo + want *proto.AllocatedPorts }{ { - name: "ports is empty", + name: "ports is empty", + partyPorts: &kusciaapisv1alpha1.PartyAllocatedPorts{NamedPort: map[string]int32{}}, dkInfo: &DeploymentKitInfo{ deploymentName: "deploy-1", ports: nil, @@ -111,7 +179,8 @@ func TestFillAllocatedPorts(t *testing.T) { }, }, { - name: "ports is not empty", + name: "ports is not empty", + partyPorts: &kusciaapisv1alpha1.PartyAllocatedPorts{NamedPort: map[string]int32{"domain": 10000}}, dkInfo: &DeploymentKitInfo{ deploymentName: "deploy-1", ports: NamedPorts{ @@ -128,7 +197,7 @@ func TestFillAllocatedPorts(t *testing.T) { Ports: []*proto.Port{ { Name: "domain", - Port: 8080, + Port: 10000, Scope: string(kusciaapisv1alpha1.ScopeDomain), Protocol: "HTTP", }, @@ -139,7 +208,7 @@ func TestFillAllocatedPorts(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - fillAllocatedPorts(tt.dkInfo) + assert.NoError(t, fillAllocatedPorts(tt.partyPorts, tt.dkInfo)) assert.Equal(t, tt.want, tt.dkInfo.allocatedPorts) }) } @@ -446,7 +515,7 @@ func TestGeneratePortServices(t *testing.T) { } func TestGeneratePortAccessDomains(t *testing.T) { - deploymentParties := []kusciaapisv1alpha1.KusciaDeploymentParty{ + parties := []kusciaapisv1alpha1.KusciaDeploymentParty{ { DomainID: "domain-a", Role: "server", @@ -461,6 +530,21 @@ func TestGeneratePortAccessDomains(t *testing.T) { }, } + ports := NamedPorts{ + "port-10000": kusciaapisv1alpha1.ContainerPort{ + Name: "port-10000", + Port: 10000, + Protocol: "HTTP", + Scope: kusciaapisv1alpha1.ScopeCluster, + }, + "port-10001": kusciaapisv1alpha1.ContainerPort{ + Name: "port-10001", + Port: 10001, + Protocol: "HTTP", + Scope: kusciaapisv1alpha1.ScopeDomain, + }, + } + tests := []struct { name string parties []kusciaapisv1alpha1.KusciaDeploymentParty @@ -469,7 +553,7 @@ func TestGeneratePortAccessDomains(t *testing.T) { }{ { name: "domain-b,domain-c can access all port, domain-a only can access one port", - parties: deploymentParties, + parties: parties, networkPolicy: &kusciaapisv1alpha1.NetworkPolicy{ Ingresses: []kusciaapisv1alpha1.Ingress{ { @@ -504,11 +588,19 @@ func TestGeneratePortAccessDomains(t *testing.T) { "port-10002": {"domain-a", "domain-b", "domain-c"}, }, }, + { + name: "domain-a, domain-b and domain-c can access cluster port", + parties: parties, + networkPolicy: nil, + wantPortAccessDomains: map[string][]string{ + "port-10000": {"domain-a", "domain-b", "domain-c"}, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - accessDomains := generatePortAccessDomains(tt.parties, tt.networkPolicy) + accessDomains := generatePortAccessDomains(tt.parties, tt.networkPolicy, ports) accessDomainsConverted := map[string][]string{} for port, domains := range accessDomains { domainSlice := strings.Split(domains, ",") diff --git a/pkg/controllers/kusciajob/handler/scheduler.go b/pkg/controllers/kusciajob/handler/scheduler.go index 26c6e90d..ddf52f60 100644 --- a/pkg/controllers/kusciajob/handler/scheduler.go +++ b/pkg/controllers/kusciajob/handler/scheduler.go @@ -22,7 +22,6 @@ import ( "strconv" "strings" - v1alpha1 "github.com/secretflow/kuscia/pkg/crd/apis/kuscia/v1alpha1" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" k8sresource "k8s.io/apimachinery/pkg/api/resource" @@ -32,6 +31,8 @@ import ( "k8s.io/apimachinery/pkg/util/uuid" corelisters "k8s.io/client-go/listers/core/v1" + "github.com/secretflow/kuscia/pkg/crd/apis/kuscia/v1alpha1" + "github.com/secretflow/kuscia/pkg/common" kusciaapisv1alpha1 "github.com/secretflow/kuscia/pkg/crd/apis/kuscia/v1alpha1" "github.com/secretflow/kuscia/pkg/crd/clientset/versioned" @@ -185,7 +186,8 @@ func (h *JobScheduler) handleStageCmdStop(now metav1.Time, job *kusciaapisv1alph } // set job phase to failed reason := fmt.Sprintf("Party: %s execute the cmd: %s.", cmdTrigger, cmd) - setKusciaJobStatus(now, &job.Status, kusciaapisv1alpha1.KusciaJobFailed, reason, "") + setKusciaJobStatus(now, &job.Status, kusciaapisv1alpha1.KusciaJobFailed, reason, reason) + setRunningTaskStatusToFailed(&job.Status) return nil } @@ -1061,22 +1063,22 @@ func (h *RunningHandler) buildPartyTemplate(p kusciaapisv1alpha1.Party, appImage if err != nil { nlog.Warnf("Can not get suitable deployTemplate. err: %s", err.Error()) return v1alpha1.PartyTemplate{} - } else { - deployTemplate = *ptrDT - ctrNumber = len(deployTemplate.Spec.Containers) - rplNumber = int(*(deployTemplate.Replicas)) } - var everyCpu, everyMemory k8sresource.Quantity + deployTemplate = *ptrDT + ctrNumber = len(deployTemplate.Spec.Containers) + rplNumber = int(*(deployTemplate.Replicas)) + + var everyCPU, everyMemory k8sresource.Quantity var ptr *k8sresource.Quantity var limitResource = corev1.ResourceList{} if !utilsres.IsEmpty(p.Resources) && !utilsres.IsEmpty(p.Resources.Limits[corev1.ResourceCPU]) { ptrValue := p.Resources.Limits[corev1.ResourceCPU] ptr = &ptrValue - stringEveryCpu, _ := utilsres.SplitRSC(ptr.String(), ctrNumber*rplNumber) - everyCpu = k8sresource.MustParse(stringEveryCpu) - limitResource[corev1.ResourceCPU] = everyCpu + stringEveryCPU, _ := utilsres.SplitRSC(ptr.String(), ctrNumber*rplNumber) + everyCPU = k8sresource.MustParse(stringEveryCPU) + limitResource[corev1.ResourceCPU] = everyCPU } if !utilsres.IsEmpty(p.Resources) && !utilsres.IsEmpty(p.Resources.Limits[corev1.ResourceMemory]) { ptrValue := p.Resources.Limits[corev1.ResourceMemory] @@ -1087,7 +1089,7 @@ func (h *RunningHandler) buildPartyTemplate(p kusciaapisv1alpha1.Party, appImage } containers := deployTemplate.Spec.Containers - for ctrIdx, _ := range containers { + for ctrIdx := range containers { containers[ctrIdx].Resources = corev1.ResourceRequirements{ Limits: limitResource, } @@ -1107,18 +1109,13 @@ func (h *RunningHandler) buildPartyTemplate(p kusciaapisv1alpha1.Party, appImage // findMatchedDeployTemplate will get the best matched deployTemplate func (h *RunningHandler) findMatchedDeployTemplate(p kusciaapisv1alpha1.Party, appImageName string) (*v1alpha1.DeployTemplate, error) { - if appImage, err := h.kusciaClient.KusciaV1alpha1().AppImages().Get(context.Background(), appImageName, metav1.GetOptions{}); err == nil { - var deployTemplate *v1alpha1.DeployTemplate - deployTemplate, err := utilsres.SelectDeployTemplate(appImage.Spec.DeployTemplates, p.Role) - if err != nil { - return nil, err - } else { - return deployTemplate, nil - } - } else { + appImage, err := h.kusciaClient.KusciaV1alpha1().AppImages().Get(context.Background(), appImageName, metav1.GetOptions{}) + if err != nil { nlog.Warnf("Can not get appImage %s. error: %s", appImageName, err.Error()) return nil, err } + + return utilsres.SelectDeployTemplate(appImage.Spec.DeployTemplates, p.Role) } // jobTaskSelector will make selector of kuscia task which kuscia job generate. @@ -1255,6 +1252,15 @@ func setKusciaJobStatus(now metav1.Time, status *kusciaapisv1alpha1.KusciaJobSta } } +// setRunningTaskStatusToFailed +func setRunningTaskStatusToFailed(status *kusciaapisv1alpha1.KusciaJobStatus) { + for k, v := range status.TaskStatus { + if v == kusciaapisv1alpha1.TaskPending || v == kusciaapisv1alpha1.TaskRunning { + status.TaskStatus[k] = kusciaapisv1alpha1.TaskFailed + } + } +} + func setKusciaTaskStatus(now metav1.Time, status *kusciaapisv1alpha1.KusciaTaskStatus, phase kusciaapisv1alpha1.KusciaTaskPhase, reason, message string) { status.Phase = phase status.LastReconcileTime = &now diff --git a/pkg/controllers/kusciajob/handler/scheduler_test.go b/pkg/controllers/kusciajob/handler/scheduler_test.go index 0c71157a..aa1999c0 100644 --- a/pkg/controllers/kusciajob/handler/scheduler_test.go +++ b/pkg/controllers/kusciajob/handler/scheduler_test.go @@ -19,9 +19,6 @@ import ( "testing" "time" - v1alpha1 "github.com/secretflow/kuscia/pkg/crd/apis/kuscia/v1alpha1" - kusciafake "github.com/secretflow/kuscia/pkg/crd/clientset/versioned/fake" - kusciainformers "github.com/secretflow/kuscia/pkg/crd/informers/externalversions" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" k8sresource "k8s.io/apimachinery/pkg/api/resource" @@ -33,6 +30,8 @@ import ( "github.com/secretflow/kuscia/pkg/common" kusciaapisv1alpha1 "github.com/secretflow/kuscia/pkg/crd/apis/kuscia/v1alpha1" + kusciafake "github.com/secretflow/kuscia/pkg/crd/clientset/versioned/fake" + kusciainformers "github.com/secretflow/kuscia/pkg/crd/informers/externalversions" ) const ( @@ -875,7 +874,7 @@ func TestRunningHandler_buildPartyTemplate(t *testing.T) { }, appImageName: "", }, - want: v1alpha1.PartyTemplate{}, + want: kusciaapisv1alpha1.PartyTemplate{}, }, { name: "Inexistent appImage should return no-resource-config party info", @@ -892,7 +891,7 @@ func TestRunningHandler_buildPartyTemplate(t *testing.T) { }, appImageName: "test-image-1", }, - want: v1alpha1.PartyTemplate{}, + want: kusciaapisv1alpha1.PartyTemplate{}, }, { name: "Existent appImage should return resource-config party info", @@ -908,9 +907,9 @@ func TestRunningHandler_buildPartyTemplate(t *testing.T) { }, appImageName: "mockImage", }, - want: v1alpha1.PartyTemplate{ - Spec: v1alpha1.PodSpec{ - Containers: []v1alpha1.Container{ + want: kusciaapisv1alpha1.PartyTemplate{ + Spec: kusciaapisv1alpha1.PodSpec{ + Containers: []kusciaapisv1alpha1.Container{ { Name: "mock-Container", Resources: corev1.ResourceRequirements{ @@ -986,22 +985,22 @@ func TestRunningHandler_buildPartyTemplate(t *testing.T) { } } -func makeMockAppImage(name string) *v1alpha1.AppImage { +func makeMockAppImage(name string) *kusciaapisv1alpha1.AppImage { replicas := int32(1) - return &v1alpha1.AppImage{ + return &kusciaapisv1alpha1.AppImage{ ObjectMeta: metav1.ObjectMeta{Name: name}, - Spec: v1alpha1.AppImageSpec{ - Image: v1alpha1.AppImageInfo{ + Spec: kusciaapisv1alpha1.AppImageSpec{ + Image: kusciaapisv1alpha1.AppImageInfo{ Name: "mock-AppImage", Tag: "latest", }, - DeployTemplates: []v1alpha1.DeployTemplate{ + DeployTemplates: []kusciaapisv1alpha1.DeployTemplate{ { Name: "mock-DeployTemplate", Role: "server", Replicas: &replicas, - Spec: v1alpha1.PodSpec{ - Containers: []v1alpha1.Container{ + Spec: kusciaapisv1alpha1.PodSpec{ + Containers: []kusciaapisv1alpha1.Container{ { Name: "mock-Container", Resources: corev1.ResourceRequirements{ diff --git a/pkg/controllers/kusciatask/controller.go b/pkg/controllers/kusciatask/controller.go index 26589ba1..82c52676 100644 --- a/pkg/controllers/kusciatask/controller.go +++ b/pkg/controllers/kusciatask/controller.go @@ -274,7 +274,7 @@ func (c *Controller) enqueueKusciaTask(obj interface{}) { ) if key, err = cache.DeletionHandlingMetaNamespaceKeyFunc(obj); err != nil { - nlog.Errorf("Error building key of kusciatask: %v", err) + nlog.Errorf("Error building key of kusciaTask: %v", err) } c.taskQueue.Add(key) nlog.Debugf("Enqueue kusciaTask %q", key) @@ -293,69 +293,82 @@ func (c *Controller) handleDeletedKusciaTask(obj interface{}) { } } - c.taskDeleteQueue.Add(fmt.Sprintf("%s/%s", kt.Name, kt.UID)) + c.enqueueDeletedKusciaTask(kt.Name, string(kt.UID)) } // handleTaskResourceGroupObject enqueue the KusciaTask which the task resource group belongs. func (c *Controller) handleTaskResourceGroupObject(obj interface{}) { var ( - object metav1.Object - ok bool + trg *kusciaapisv1alpha1.TaskResourceGroup + ok bool ) - if object, ok = obj.(metav1.Object); !ok { + if trg, ok = obj.(*kusciaapisv1alpha1.TaskResourceGroup); !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { nlog.Errorf("Error decoding object, invalid type %T", obj) return } - object, ok = tombstone.Obj.(metav1.Object) + trg, ok = tombstone.Obj.(*kusciaapisv1alpha1.TaskResourceGroup) if !ok { nlog.Errorf("Error decoding object tombstone, invalid type %T", tombstone.Obj) return } - nlog.Debugf("Recovered deleted object %q from tombstone", object.GetName()) + nlog.Debugf("Recovered deleted object %q from tombstone", trg.Name) } - kusciaTask, err := c.kusciaTaskLister.KusciaTasks(common.KusciaCrossDomain).Get(object.GetName()) + kt, err := c.kusciaTaskLister.KusciaTasks(common.KusciaCrossDomain).Get(trg.Name) if err != nil { - nlog.Debugf("Get kuscia task %v fail, %v, skip processing it", object.GetName(), err) + if k8serrors.IsNotFound(err) { + kt, err = c.handleNotFoundKusciaTask(trg.Name, trg.Labels) + if err == nil { + return + } + } + nlog.Debugf("Get kusciaTask %v failed, %v, skip processing it", trg.Name, err) return } - c.enqueueKusciaTask(kusciaTask) + c.enqueueKusciaTask(kt) } // handlePodObject enqueue the KusciaTask which the pod belongs. func (c *Controller) handlePodObject(obj interface{}) { var ( - object metav1.Object - ok bool + pod *v1.Pod + ok bool ) - if object, ok = obj.(metav1.Object); !ok { + if pod, ok = obj.(*v1.Pod); !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { nlog.Error("Error decoding object, invalid type") return } - object, ok = tombstone.Obj.(metav1.Object) + pod, ok = tombstone.Obj.(*v1.Pod) if !ok { nlog.Errorf("Error decoding object tombstone, invalid type") return } - nlog.Debugf("Recovered deleted object %q from tombstone", object.GetName()) + nlog.Debugf("Recovered deleted object %q from tombstone", pod.Name) } - annotations := object.GetAnnotations() + annotations := pod.Annotations if annotations != nil && annotations[common.TaskIDAnnotationKey] != "" { - kusciaTask, err := c.kusciaTaskLister.KusciaTasks(common.KusciaCrossDomain).Get(annotations[common.TaskIDAnnotationKey]) + taskID := annotations[common.TaskIDAnnotationKey] + kt, err := c.kusciaTaskLister.KusciaTasks(common.KusciaCrossDomain).Get(taskID) if err != nil { - nlog.Debugf("Get pod %v/%v related kusciaTask %v fail, %v, skip processing it", object.GetNamespace(), object.GetName(), - annotations[common.TaskIDAnnotationKey], err) + if k8serrors.IsNotFound(err) { + kt, err = c.handleNotFoundKusciaTask(taskID, pod.Labels) + if err == nil { + return + } + } + nlog.Debugf("Get kusciaTask %v failed, %v, skip processing it", taskID, err) return } - c.enqueueKusciaTask(kusciaTask) + + c.enqueueKusciaTask(kt) } } @@ -401,6 +414,26 @@ func (c *Controller) handleServiceObject(obj interface{}) { } } +func (c *Controller) handleNotFoundKusciaTask(taskID string, labels map[string]string) (*kusciaapisv1alpha1.KusciaTask, error) { + kt, err := c.kusciaClient.KusciaV1alpha1().KusciaTasks(common.KusciaCrossDomain).Get(context.Background(), taskID, metav1.GetOptions{}) + if err == nil { + return kt, err + } + + if k8serrors.IsNotFound(err) && labels != nil { + c.enqueueDeletedKusciaTask(taskID, labels[common.LabelTaskUID]) + } + return kt, err +} + +func (c *Controller) enqueueDeletedKusciaTask(taskName, taskUID string) { + if taskName == "" || taskUID == "" { + return + } + c.taskDeleteQueue.Add(fmt.Sprintf("%s/%s", taskName, taskUID)) + nlog.Debugf("Enqueue deleted kusciaTask %q", taskName) +} + // runWorker is a long-running function that will continually call the // processNextWorkItem function in order to read and process a message on the taskQueue. func (c *Controller) runWorker() { diff --git a/pkg/controllers/kusciatask/controller_test.go b/pkg/controllers/kusciatask/controller_test.go index 91aef7a2..9d3fa208 100644 --- a/pkg/controllers/kusciatask/controller_test.go +++ b/pkg/controllers/kusciatask/controller_test.go @@ -248,7 +248,7 @@ func TestHandlePodObject(t *testing.T) { cc.kusciaTaskLister = ktInformer.Lister() pod1 := st.MakePod().Name("pod1").Obj() - pod2 := st.MakePod().Name("pod2").Annotation(common.TaskIDAnnotationKey, kt.Name) + pod2 := st.MakePod().Name("pod2").Annotation(common.TaskIDAnnotationKey, kt.Name).Obj() tests := []struct { name string diff --git a/pkg/controllers/kusciatask/handler/common.go b/pkg/controllers/kusciatask/handler/common.go index 5c999f4e..0c2d55a2 100644 --- a/pkg/controllers/kusciatask/handler/common.go +++ b/pkg/controllers/kusciatask/handler/common.go @@ -34,8 +34,6 @@ const ( labelKusciaTaskPodIdentity = "kuscia.secretflow/pod-identity" labelKusciaTaskPodRole = "kuscia.secretflow/pod-role" - kusciaTaskLabelValue = "kusciatask" - configTemplateVolumeName = "config-template" ) diff --git a/pkg/controllers/kusciatask/handler/pending_handler.go b/pkg/controllers/kusciatask/handler/pending_handler.go index 388fdfd9..3e19fd1e 100644 --- a/pkg/controllers/kusciatask/handler/pending_handler.go +++ b/pkg/controllers/kusciatask/handler/pending_handler.go @@ -165,6 +165,7 @@ func (h *PendingHandler) prepareTaskResources(now metav1.Time, kusciaTask *kusci // 2) only local party is controlled in participant cluster func (h *PendingHandler) createTaskResources(kusciaTask *kusciaapisv1alpha1.KusciaTask) error { partyKitInfos := map[string]*PartyKitInfo{} + selfPartyKitInfos := map[string]*PartyKitInfo{} for i, party := range kusciaTask.Spec.Parties { kit, err := h.buildPartyKitInfo(kusciaTask, &kusciaTask.Spec.Parties[i]) if err != nil { @@ -172,9 +173,13 @@ func (h *PendingHandler) createTaskResources(kusciaTask *kusciaapisv1alpha1.Kusc } partyKitInfos[party.DomainID+party.Role] = kit + + if !utilsres.IsPartnerDomain(h.namespacesLister, kit.domainID) { + selfPartyKitInfos[party.DomainID+party.Role] = kit + } } - _, err := allocatePorts(kusciaTask, partyKitInfos) + _, err := allocatePorts(kusciaTask, selfPartyKitInfos) if err != nil { return err } @@ -186,11 +191,7 @@ func (h *PendingHandler) createTaskResources(kusciaTask *kusciaapisv1alpha1.Kusc podStatuses := make(map[string]*kusciaapisv1alpha1.PodStatus) serviceStatuses := make(map[string]*kusciaapisv1alpha1.ServiceStatus) - for _, partyKitInfo := range partyKitInfos { - if utilsres.IsPartnerDomain(h.namespacesLister, partyKitInfo.domainID) { - continue - } - + for _, partyKitInfo := range selfPartyKitInfos { ps, ss, err := h.createResourceForParty(partyKitInfo) if err != nil { return fmt.Errorf("failed to create resource for party '%v/%v', %v", partyKitInfo.domainID, partyKitInfo.role, err) @@ -331,8 +332,9 @@ func (h *PendingHandler) buildPartyKitInfo(kusciaTask *kusciaapisv1alpha1.Kuscia pods: pods, } - if deployTemplate.NetworkPolicy != nil { - kit.portAccessDomains = generatePortAccessDomains(kusciaTask.Spec.Parties, deployTemplate.NetworkPolicy) + // Todo: Consider how to limit the communication between single-party jobs between multiple parties. + if len(kusciaTask.Spec.Parties) > 1 { + kit.portAccessDomains = generatePortAccessDomains(kusciaTask.Spec.Parties, deployTemplate.NetworkPolicy, ports) } return kit, nil @@ -441,42 +443,59 @@ func generatePortServices(podName string, servicedPorts []string) map[string]str } // generatePortAccessDomains generates domain list with access permission according to the role that has access to a port. -func generatePortAccessDomains(parties []kusciaapisv1alpha1.PartyInfo, networkPolicy *kusciaapisv1alpha1.NetworkPolicy) map[string]string { - roleDomains := map[string][]string{} - for _, party := range parties { - if domains, ok := roleDomains[party.Role]; ok { - roleDomains[party.Role] = append(domains, party.DomainID) - } else { - roleDomains[party.Role] = []string{party.DomainID} +func generatePortAccessDomains(parties []kusciaapisv1alpha1.PartyInfo, networkPolicy *kusciaapisv1alpha1.NetworkPolicy, ports NamedPorts) map[string]string { + portAccessDomains := map[string]string{} + if networkPolicy == nil { + domainMap := map[string]struct{}{} + for _, party := range parties { + domainMap[party.DomainID] = struct{}{} } - } - portAccessRoles := map[string][]string{} - for _, item := range networkPolicy.Ingresses { - for _, port := range item.Ports { - if domains, ok := portAccessRoles[port.Port]; ok { - portAccessRoles[port.Port] = append(domains, item.From.Roles...) + domainSlice := make([]string, 0, len(domainMap)) + for domain := range domainMap { + domainSlice = append(domainSlice, domain) + } + + for _, port := range ports { + if port.Scope == kusciaapisv1alpha1.ScopeCluster { + portAccessDomains[port.Name] = strings.Join(domainSlice, ",") + } + } + } else { + roleDomains := map[string][]string{} + for _, party := range parties { + if domains, ok := roleDomains[party.Role]; ok { + roleDomains[party.Role] = append(domains, party.DomainID) } else { - portAccessRoles[port.Port] = item.From.Roles + roleDomains[party.Role] = []string{party.DomainID} } } - } - portAccessDomains := map[string]string{} - for port, roles := range portAccessRoles { - domainMap := map[string]struct{}{} - for _, role := range roles { - for _, domain := range roleDomains[role] { - domainMap[domain] = struct{}{} + portAccessRoles := map[string][]string{} + for _, item := range networkPolicy.Ingresses { + for _, port := range item.Ports { + if domains, ok := portAccessRoles[port.Port]; ok { + portAccessRoles[port.Port] = append(domains, item.From.Roles...) + } else { + portAccessRoles[port.Port] = item.From.Roles + } } } - domainSlice := make([]string, 0, len(domainMap)) - for domain := range domainMap { - domainSlice = append(domainSlice, domain) + + for port, roles := range portAccessRoles { + domainMap := map[string]struct{}{} + for _, role := range roles { + for _, domain := range roleDomains[role] { + domainMap[domain] = struct{}{} + } + } + domainSlice := make([]string, 0, len(domainMap)) + for domain := range domainMap { + domainSlice = append(domainSlice, domain) + } + portAccessDomains[port] = strings.Join(domainSlice, ",") } - portAccessDomains[port] = strings.Join(domainSlice, ",") } - return portAccessDomains } @@ -802,7 +821,7 @@ func (h *PendingHandler) generateTaskResourceGroup(kusciaTask *kusciaapisv1alpha common.KusciaPartyMasterDomainAnnotationKey: kusciaTask.Annotations[common.KusciaPartyMasterDomainAnnotationKey], }, Labels: map[string]string{ - common.LabelController: kusciaTaskLabelValue, + common.LabelController: common.ControllerKusciaTask, common.LabelJobUID: jobUID, common.LabelTaskUID: string(kusciaTask.UID), }, @@ -823,7 +842,7 @@ func (h *PendingHandler) generateTaskResourceGroup(kusciaTask *kusciaapisv1alpha func generateConfigMap(partyKit *PartyKitInfo) *v1.ConfigMap { labels := map[string]string{ - common.LabelController: kusciaTaskLabelValue, + common.LabelController: common.ControllerKusciaTask, common.LabelTaskUID: string(partyKit.kusciaTask.UID), } @@ -869,7 +888,7 @@ func (h *PendingHandler) submitConfigMap(cm *v1.ConfigMap) error { func (h *PendingHandler) generatePod(partyKit *PartyKitInfo, podKit *PodKitInfo) (*v1.Pod, error) { labels := map[string]string{ - common.LabelController: kusciaTaskLabelValue, + common.LabelController: common.ControllerKusciaTask, common.LabelCommunicationRoleServer: "true", common.LabelCommunicationRoleClient: "true", labelKusciaTaskPodIdentity: podKit.podIdentity, @@ -1113,6 +1132,7 @@ func generateServices(partyKit *PartyKitInfo, pod *v1.Pod, serviceName string, p svc.Labels = map[string]string{ common.LabelPortName: port.Name, common.LabelPortScope: string(port.Scope), + common.LabelTaskUID: string(partyKit.kusciaTask.UID), } var protocolType string @@ -1132,6 +1152,7 @@ func generateServices(partyKit *PartyKitInfo, pod *v1.Pod, serviceName string, p common.InitiatorAnnotationKey: partyKit.kusciaTask.Spec.Initiator, common.ProtocolAnnotationKey: string(port.Protocol), common.AccessDomainAnnotationKey: partyKit.portAccessDomains[port.Name], + common.TaskIDAnnotationKey: partyKit.kusciaTask.Name, } return svc, nil diff --git a/pkg/controllers/kusciatask/handler/pending_handler_test.go b/pkg/controllers/kusciatask/handler/pending_handler_test.go index 56d285af..65a673df 100644 --- a/pkg/controllers/kusciatask/handler/pending_handler_test.go +++ b/pkg/controllers/kusciatask/handler/pending_handler_test.go @@ -161,7 +161,7 @@ func Test_mergeDeployTemplate(t *testing.T) { } func Test_generatePortAccessDomains(t *testing.T) { - testParties := []kusciaapisv1alpha1.PartyInfo{ + parties := []kusciaapisv1alpha1.PartyInfo{ { DomainID: "domain-a", Role: "server", @@ -176,13 +176,30 @@ func Test_generatePortAccessDomains(t *testing.T) { }, } + ports := NamedPorts{ + "port-10000": kusciaapisv1alpha1.ContainerPort{ + Name: "port-10000", + Port: 10000, + Protocol: "HTTP", + Scope: kusciaapisv1alpha1.ScopeCluster, + }, + "port-10001": kusciaapisv1alpha1.ContainerPort{ + Name: "port-10001", + Port: 10001, + Protocol: "HTTP", + Scope: kusciaapisv1alpha1.ScopeDomain, + }, + } + tests := []struct { + name string parties []kusciaapisv1alpha1.PartyInfo networkPolicy *kusciaapisv1alpha1.NetworkPolicy wantPortAccessDomains map[string][]string }{ { - parties: testParties, + name: "domain-b,domain-c can access all port, domain-a only can access one port", + parties: parties, networkPolicy: &kusciaapisv1alpha1.NetworkPolicy{ Ingresses: []kusciaapisv1alpha1.Ingress{ { @@ -216,11 +233,19 @@ func Test_generatePortAccessDomains(t *testing.T) { "port-10002": {"domain-a", "domain-b", "domain-c"}, }, }, + { + name: "domain-a, domain-b and domain-c can access cluster ports", + parties: parties, + networkPolicy: nil, + wantPortAccessDomains: map[string][]string{ + "port-10000": {"domain-a", "domain-b", "domain-c"}, + }, + }, } - for i, tt := range tests { - t.Run(fmt.Sprintf("TestCase %d", i), func(t *testing.T) { - accessDomains := generatePortAccessDomains(tt.parties, tt.networkPolicy) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + accessDomains := generatePortAccessDomains(tt.parties, tt.networkPolicy, ports) accessDomainsConverted := map[string][]string{} for port, domains := range accessDomains { diff --git a/pkg/controllers/portflake/contoller.go b/pkg/controllers/portflake/contoller.go index 9b7fabe8..96edaf95 100644 --- a/pkg/controllers/portflake/contoller.go +++ b/pkg/controllers/portflake/contoller.go @@ -159,15 +159,19 @@ func (c *PortController) handleDeploymentEvent(deployment *appsv1.Deployment, ev portProvider := port.GetPortProvider(deployment.Namespace) switch event { case ResourceEventAdd: - portProvider.AddIndeed(deployment.Name, podPorts) + portProvider.AddIndeed(buildDeploymentOwnerName(deployment.Name), podPorts) case ResourceEventDelete: - portProvider.DeleteIndeed(deployment.Name, podPorts) + portProvider.DeleteIndeed(buildDeploymentOwnerName(deployment.Name), podPorts) default: nlog.Errorf("Unsupported deployment event: %v", event) } } func (c *PortController) handlePodEvent(pod *corev1.Pod, event ResourceEvent) { + if pod.Labels == nil || pod.Labels[common.LabelController] != common.ControllerKusciaTask { + return + } + nlog.Debugf("Receive pod event [%v], namespace=%v, pod name=%v", event, pod.Namespace, pod.Name) // fetch pod ports @@ -181,9 +185,9 @@ func (c *PortController) handlePodEvent(pod *corev1.Pod, event ResourceEvent) { portProvider := port.GetPortProvider(pod.Namespace) switch event { case ResourceEventAdd: - portProvider.AddIndeed(pod.Name, podPorts) + portProvider.AddIndeed(buildPodOwnerName(pod.Name), podPorts) case ResourceEventDelete: - portProvider.DeleteIndeed(pod.Name, podPorts) + portProvider.DeleteIndeed(buildPodOwnerName(pod.Name), podPorts) default: nlog.Errorf("Unsupported pod event: %v", event) } @@ -236,3 +240,11 @@ func (c *PortController) Stop() { func (c *PortController) Name() string { return controllerName } + +func buildDeploymentOwnerName(deploymentName string) string { + return fmt.Sprintf("deployment:%s", deploymentName) +} + +func buildPodOwnerName(podName string) string { + return fmt.Sprintf("pod:%s", podName) +} diff --git a/pkg/controllers/portflake/controller_test.go b/pkg/controllers/portflake/controller_test.go index bb8622c6..c9ba6dd0 100644 --- a/pkg/controllers/portflake/controller_test.go +++ b/pkg/controllers/portflake/controller_test.go @@ -62,7 +62,7 @@ func TestPortController(t *testing.T) { pod.Name = "pod_a" pod.Namespace = "ns_a" pod.Labels = map[string]string{ - common.LabelController: "111", + common.LabelController: common.ControllerKusciaTask, } pod.Spec.Containers = []corev1.Container{ { diff --git a/pkg/controllers/portflake/port/port_provider.go b/pkg/controllers/portflake/port/port_provider.go index 0e706989..990dcac5 100644 --- a/pkg/controllers/portflake/port/port_provider.go +++ b/pkg/controllers/portflake/port/port_provider.go @@ -23,7 +23,7 @@ import ( ) const ( - DefaultMaxNotVerifiedPortAge = 60 * 5 + DefaultMaxNotVerifiedPortAge = 30 ) type BaseInfo struct { @@ -65,6 +65,9 @@ func (pp *Provider) addPort(port int) { func (pp *Provider) Allocate(count int) ([]int32, error) { nlog.Debugf("Allocate port, count=%v, namespace=%v", count, pp.namespace) + if count <= 0 { + return []int32{}, nil + } choosePorts := make(map[int]bool, count) @@ -125,7 +128,7 @@ func (pp *Provider) addPortIndeed(owner string, port int) error { if !ok { info = &BaseInfo{owner: owner} pp.ports[port] = info - } else if len(info.owner) > 0 { + } else if info.owner != "" && info.owner != owner { return fmt.Errorf("port conflict, current owner=%v, new owner=%v, namespace=%v", info.owner, owner, pp.namespace) } else { info.owner = owner diff --git a/pkg/coredns/handler.go b/pkg/coredns/handler.go index 32f584ca..6e451e0d 100644 --- a/pkg/coredns/handler.go +++ b/pkg/coredns/handler.go @@ -1,17 +1,3 @@ -// Copyright 2023 Ant Group Co., Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - package coredns import ( diff --git a/pkg/coredns/xfr.go b/pkg/coredns/xfr.go index 17b06f3e..0d4d6427 100644 --- a/pkg/coredns/xfr.go +++ b/pkg/coredns/xfr.go @@ -1,17 +1,3 @@ -// Copyright 2023 Ant Group Co., Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - package coredns import ( diff --git a/pkg/gateway/commands/root.go b/pkg/gateway/commands/root.go index a6b5e131..8592664c 100644 --- a/pkg/gateway/commands/root.go +++ b/pkg/gateway/commands/root.go @@ -110,8 +110,10 @@ func Run(ctx context.Context, gwConfig *config.GatewayConfig, clients *kubeconfi kusciaInformerFactory := informers.NewSharedInformerFactoryWithOptions(clients.KusciaClient, defaultResync, informers.WithNamespace(gwConfig.DomainID)) + gatewayInformer := kusciaInformerFactory.Kuscia().V1alpha1().Gateways() + // start GatewayController - gwc, err := controller.NewGatewayController(gwConfig.DomainID, prikey, clients.KusciaClient, kusciaInformerFactory.Kuscia().V1alpha1().Gateways()) + gwc, err := controller.NewGatewayController(gwConfig.DomainID, prikey, clients.KusciaClient, gatewayInformer) if err != nil { return fmt.Errorf("failed to new gateway controller, detail-> %v", err) } @@ -149,7 +151,7 @@ func Run(ctx context.Context, gwConfig *config.GatewayConfig, clients *kubeconfi drc := controller.NewDomainRouteController(drConfig, clients.KubeClient, clients.KusciaClient, drInformer) go drc.Run(ctx, concurrentSyncs*2, ctx.Done()) - pm, err := poller.NewPollManager(isMaster, gwConfig.DomainID, gwc.GatewayName(), serviceInformer, drInformer) + pm, err := poller.NewPollManager(isMaster, gwConfig.DomainID, gwc.GatewayName(), serviceInformer, drInformer, gatewayInformer) go pm.Run(concurrentSyncs, ctx.Done()) // start runtime metrics collector diff --git a/pkg/gateway/controller/domain_route.go b/pkg/gateway/controller/domain_route.go index 312432f3..19b22cb4 100644 --- a/pkg/gateway/controller/domain_route.go +++ b/pkg/gateway/controller/domain_route.go @@ -24,6 +24,7 @@ import ( "sync" "time" + "github.com/golang/protobuf/ptypes/duration" gocache "github.com/patrickmn/go-cache" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" @@ -254,18 +255,27 @@ func (c *DomainRouteController) syncHandler(ctx context.Context, key string) err } // try to update poller && receiver rule first - if err := c.updatePollerReceiverFilter(dr); err != nil { - nlog.Warnf("Failed to update poller or receiver filter: %v", err) + if utils.IsReverseTunnelTransit(dr.Spec.Transit) { + if err := c.updatePollerReceiverXds(dr); err != nil { + nlog.Warnf("Failed to update poller or receiver filter: %v", err) + return err + } } is3rdParty := utils.IsThirdPartyTransit(dr.Spec.Transit) - if dr.Spec.Source == c.gateway.Namespace && !is3rdParty && dr.Spec.Destination != c.masterNamespace { - if err := c.addClusterWithEnvoy(dr); err != nil { - return fmt.Errorf("add envoy cluster failed with %s", err.Error()) + if dr.Spec.Source == c.gateway.Namespace && dr.Spec.Destination != c.masterNamespace { + if is3rdParty { + if err := c.updateTransitVh(dr); err != nil { + return fmt.Errorf("add transit route failed with %s", err.Error()) + } + } else { + if err := c.addClusterWithEnvoy(dr); err != nil { + return fmt.Errorf("add envoy cluster failed with %s", err.Error()) + } } } - if (dr.Spec.BodyEncryption != nil || (dr.Spec.AuthenticationType == kusciaapisv1alpha1.DomainAuthenticationToken && !is3rdParty)) && + if (dr.Spec.BodyEncryption != nil || (dr.Spec.AuthenticationType == kusciaapisv1alpha1.DomainAuthenticationToken)) && (dr.Spec.TokenConfig.TokenGenMethod == kusciaapisv1alpha1.TokenGenMethodRSA || dr.Spec.TokenConfig.TokenGenMethod == kusciaapisv1alpha1.TokenGenUIDRSA) { if dr.Spec.Source == c.gateway.Namespace && dr.Status.TokenStatus.RevisionInitializer == c.gateway.Name { if dr.Status.TokenStatus.RevisionToken.Token == "" { @@ -428,29 +438,27 @@ func (c *DomainRouteController) addClusterWithEnvoy(dr *kusciaapisv1alpha1.Domai return nil } -func (c *DomainRouteController) updatePollerReceiverFilter(dr *kusciaapisv1alpha1.DomainRoute) error { +func (c *DomainRouteController) updatePollerReceiverXds(dr *kusciaapisv1alpha1.DomainRoute) error { if dr.Spec.Source == c.gateway.Namespace { // internal - if utils.IsGatewayTceTransit(dr.Spec.Transit) { - rule := kusciareceiver.ReceiverRule{ - Source: dr.Spec.Source, - Destination: dr.Spec.Destination, - } - if err := xds.UpdateReceiverRules(&rule, true); err != nil { - return err - } + rule := kusciareceiver.ReceiverRule{ + Source: dr.Spec.Source, + Destination: dr.Spec.Destination, + } + if err := xds.UpdateReceiverRules(&rule, true); err != nil { + return err + } + if err := xds.AddOrUpdateVirtualHost(generateReceiverExternalVh(dr), xds.ExternalRoute); err != nil { + return err + } + if err := xds.AddOrUpdateVirtualHost(generateReceiverInternalVh(dr), xds.InternalRoute); err != nil { + return err } - } else if dr.Spec.Destination == c.gateway.Namespace { // external - if !utils.IsThirdPartyTransit(dr.Spec.Transit) { - if utils.IsGatewayTceTransit(dr.Spec.Transit) { - pollHeader := generatePollHeaders(dr) - if err := xds.UpdatePoller(pollHeader, true); err != nil { - return err - } - } + pollHeader := generatePollHeaders(dr) + if err := xds.UpdatePoller(pollHeader, true); err != nil { + return err } } - return nil } @@ -493,10 +501,10 @@ func (c *DomainRouteController) updateEnvoyRule(dr *kusciaapisv1alpha1.DomainRou // next step with two cases // case1: transit route, just clone routing rule from source-to-transitDomainID + // move up to sync handler if utils.IsThirdPartyTransit(dr.Spec.Transit) { - return c.updateRoutingRule(dr) + return nil } - // case2: direct route, add virtualhost: source-to-dest-Protocol if err := xds.AddOrUpdateVirtualHost(generateInternalVirtualHost(dr, token.Token, grpcDegrade), xds.InternalRoute); err != nil { @@ -535,7 +543,7 @@ func (c *DomainRouteController) deleteEnvoyRule(dr *kusciaapisv1alpha1.DomainRou if err := xds.DeleteVirtualHost(name, xds.InternalRoute); err != nil { return fmt.Errorf("delete virtual host %s failed with %v", name, err) } - if utils.IsGatewayTceTransit(dr.Spec.Transit) { + if utils.IsReverseTunnelTransit(dr.Spec.Transit) { rule := kusciareceiver.ReceiverRule{ Source: dr.Spec.Source, Destination: dr.Spec.Destination, @@ -576,7 +584,7 @@ func (c *DomainRouteController) deleteEnvoyRule(dr *kusciaapisv1alpha1.DomainRou if err := xds.UpdateTokenAuthAndHeaderDecorator(sourceToken, sourceHeader, false); err != nil { return err } - if utils.IsGatewayTceTransit(dr.Spec.Transit) { + if utils.IsReverseTunnelTransit(dr.Spec.Transit) { sourceHeader := &kusciapoller.Poller_SourceHeader{ Source: dr.Spec.Source, } @@ -590,7 +598,7 @@ func (c *DomainRouteController) deleteEnvoyRule(dr *kusciaapisv1alpha1.DomainRou return nil } -func (c *DomainRouteController) updateRoutingRule(dr *kusciaapisv1alpha1.DomainRoute) error { +func (c *DomainRouteController) updateTransitVh(dr *kusciaapisv1alpha1.DomainRoute) error { ns := dr.Spec.Transit.Domain.DomainID vhName := fmt.Sprintf("%s-to-%s", dr.Spec.Source, ns) if ns == c.masterNamespace && c.masterNamespace != c.gateway.Namespace { @@ -647,6 +655,104 @@ func updateDecryptFilter(dr *kusciaapisv1alpha1.DomainRoute, tokens []*Token) er return xds.UpdateDecryptRules(rule, true) } +func generateReceiverExternalVh(dr *kusciaapisv1alpha1.DomainRoute) *route.VirtualHost { + return &route.VirtualHost{ + Name: fmt.Sprintf("%s-to-%s-receiver-external", dr.Spec.Source, dr.Spec.Destination), + Domains: []string{fmt.Sprintf("receiver.%s.svc", dr.Spec.Source)}, + Routes: []*route.Route{ + { + Match: &route.RouteMatch{ + PathSpecifier: &route.RouteMatch_Prefix{ + Prefix: "/", + }, + }, + Action: &route.Route_Route{ + Route: &route.RouteAction{ + ClusterSpecifier: &route.RouteAction_Cluster{ + Cluster: utils.EnvoyClusterName, + }, + HashPolicy: []*route.RouteAction_HashPolicy{ + { + PolicySpecifier: &route.RouteAction_HashPolicy_Header_{ + Header: &route.RouteAction_HashPolicy_Header{ + HeaderName: utils.HeaderTransitHash, + }, + }, + Terminal: true, + }, + }, + Timeout: &duration.Duration{ + Seconds: 300, + }, + IdleTimeout: &duration.Duration{ + Seconds: 300, + }, + }, + }, + RequestHeadersToAdd: []*core.HeaderValueOption{ + { + Header: &core.HeaderValue{ + Key: utils.HeaderTransitFlag, + Value: "true", + }, + AppendAction: core.HeaderValueOption_OVERWRITE_IF_EXISTS_OR_ADD, + }, + }, + // prevent poll request terminated by envoy + RequestHeadersToRemove: []string{"x-envoy-expected-rq-timeout-ms"}, + }, + }, + } +} + +func generateReceiverInternalVh(dr *kusciaapisv1alpha1.DomainRoute) *route.VirtualHost { + return &route.VirtualHost{ + Name: fmt.Sprintf("%s-to-%s-receiver-internal", dr.Spec.Source, dr.Spec.Destination), + Domains: []string{fmt.Sprintf("kuscia-handshake.%s.svc", dr.Spec.Destination)}, + Routes: []*route.Route{ + { + Match: &route.RouteMatch{ + PathSpecifier: &route.RouteMatch_Prefix{ + Prefix: "/", + }, + }, + Action: &route.Route_Route{ + Route: &route.RouteAction{ + ClusterSpecifier: &route.RouteAction_Cluster{ + Cluster: utils.EnvoyClusterName, + }, + HashPolicy: []*route.RouteAction_HashPolicy{ + { + PolicySpecifier: &route.RouteAction_HashPolicy_Header_{ + Header: &route.RouteAction_HashPolicy_Header{ + HeaderName: ":authority", + }, + }, + Terminal: true, + }, + }, + Timeout: &duration.Duration{ + Seconds: 300, + }, + IdleTimeout: &duration.Duration{ + Seconds: 300, + }, + }, + }, + RequestHeadersToAdd: []*core.HeaderValueOption{ + { + Header: &core.HeaderValue{ + Key: utils.HeaderTransitFlag, + Value: "true", + }, + AppendAction: core.HeaderValueOption_OVERWRITE_IF_EXISTS_OR_ADD, + }, + }, + }, + }, + } +} + func generateInternalRoute(dr *kusciaapisv1alpha1.DomainRoute, dp kusciaapisv1alpha1.DomainPort, token string, isDefaultRoute bool, grpcDegrade bool) []*route.Route { httpRoutes := interconn.Decorator.GenerateInternalRoute(dr, dp, token) @@ -884,6 +990,9 @@ func (c *DomainRouteController) getClusterNamesByDomainRoute(dr *kusciaapisv1alp } func (c *DomainRouteController) getDefaultClusterNameByDomainRoute(dr *kusciaapisv1alpha1.DomainRoute) string { + if utils.IsThirdPartyTransit(dr.Spec.Transit) { + return "" + } if dr.Spec.Destination == c.masterNamespace && c.masterNamespace != c.gateway.Namespace { return clusters.GetMasterClusterName() } @@ -902,8 +1011,5 @@ func (c *DomainRouteController) getDefaultClusterNameByDomainRoute(dr *kusciaapi func getHandshakeHost(dr *kusciaapisv1alpha1.DomainRoute) string { ns := dr.Spec.Destination - if utils.IsThirdPartyTransit(dr.Spec.Transit) { - ns = dr.Spec.Transit.Domain.DomainID - } return fmt.Sprintf("%s.%s.svc", utils.ServiceHandshake, ns) } diff --git a/pkg/gateway/controller/gateway.go b/pkg/gateway/controller/gateway.go index a9835287..18f7307e 100644 --- a/pkg/gateway/controller/gateway.go +++ b/pkg/gateway/controller/gateway.go @@ -19,19 +19,27 @@ import ( "crypto/rsa" "encoding/base64" "fmt" + "reflect" + "sort" "sync" "time" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/tools/cache" + envoycluster "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" + core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" + endpoint "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" + kusciaapisv1alpha1 "github.com/secretflow/kuscia/pkg/crd/apis/kuscia/v1alpha1" kusciaclientset "github.com/secretflow/kuscia/pkg/crd/clientset/versioned" kusciaextv1alpha1 "github.com/secretflow/kuscia/pkg/crd/informers/externalversions/kuscia/v1alpha1" kuscialistersv1alpha1 "github.com/secretflow/kuscia/pkg/crd/listers/kuscia/v1alpha1" "github.com/secretflow/kuscia/pkg/gateway/utils" + "github.com/secretflow/kuscia/pkg/gateway/xds" "github.com/secretflow/kuscia/pkg/utils/meta" "github.com/secretflow/kuscia/pkg/utils/network" "github.com/secretflow/kuscia/pkg/utils/nlog" @@ -42,6 +50,10 @@ const ( heartbeatPeriod = 15 * time.Second ) +var ( + gwAddrs []string +) + // GatewayController sync gateway status periodically to master. type GatewayController struct { namespace string @@ -154,7 +166,6 @@ func (c *GatewayController) syncHandler() error { c.lock.Lock() defer c.lock.Unlock() - status.NetworkStatus = make([]kusciaapisv1alpha1.GatewayEndpointStatus, len(c.networkStatus)) status.NetworkStatus = append(status.NetworkStatus, c.networkStatus...) } @@ -164,8 +175,69 @@ func (c *GatewayController) syncHandler() error { _, err = client.UpdateStatus(context.Background(), gatewayCopy, metav1.UpdateOptions{}) if err != nil { nlog.Errorf("update gateway(name:%s namespace:%s) fail: %v", c.hostname, c.namespace, err) + return err + } + gws, err := c.gatewayLister.Gateways(c.namespace).List(labels.Everything()) + if err != nil { + nlog.Errorf("get gateway list(namespace:%s) fail: %v", c.namespace, err) + return err + } + thresh := time.Now().Add(-2 * heartbeatPeriod) + var ga []string + for _, gw := range gws { + if gw.Status.HeartbeatTime.After(thresh) { + ga = append(ga, gw.Status.Address) + } + } + sort.Strings(ga) + if !reflect.DeepEqual(gwAddrs, ga) { + nlog.Infof("Envoy cluster changed, old: %+v new: %+v", gwAddrs, ga) + + gwAddrs = ga + xds.AddOrUpdateCluster(c.createEnvoyCluster(utils.EnvoyClusterName, gwAddrs, 80)) + } + return nil +} + +func (c *GatewayController) createEnvoyCluster(name string, addrs []string, port uint32) *envoycluster.Cluster { + exists := map[string]bool{} + var endpoints []*endpoint.LbEndpoint + for _, addr := range addrs { + key := fmt.Sprintf("%s:%d", addr, port) + if exists[key] { + continue + } + exists[key] = true + endpoints = append(endpoints, &endpoint.LbEndpoint{ + HostIdentifier: &endpoint.LbEndpoint_Endpoint{ + Endpoint: &endpoint.Endpoint{ + Address: &core.Address{ + Address: &core.Address_SocketAddress{ + SocketAddress: &core.SocketAddress{ + Address: addr, + PortSpecifier: &core.SocketAddress_PortValue{ + PortValue: port, + }, + }, + }, + }, + }, + }, + }) + } + cluster := &envoycluster.Cluster{ + Name: name, + LoadAssignment: &endpoint.ClusterLoadAssignment{ + ClusterName: name, + Endpoints: []*endpoint.LocalityLbEndpoints{ + { + LbEndpoints: endpoints, + }, + }, + }, } - return err + xds.DecorateCluster(cluster) + return cluster } func (c *GatewayController) UpdateStatus(status []*kusciaapisv1alpha1.GatewayEndpointStatus) { diff --git a/pkg/gateway/controller/handshake.go b/pkg/gateway/controller/handshake.go index c893dc93..a8b53734 100644 --- a/pkg/gateway/controller/handshake.go +++ b/pkg/gateway/controller/handshake.go @@ -135,6 +135,7 @@ func (c *DomainRouteController) checkConnectionStatus(dr *kusciaapisv1alpha1.Dom ClusterName: clusterName, KusciaHost: getHandshakeHost(dr), KusciaSource: dr.Spec.Source, + Transit: utils.IsTransit(dr.Spec.Transit), Headers: headers} err := utils.DoHTTP(nil, out, hp) if err != nil { @@ -234,6 +235,7 @@ func (c *DomainRouteController) sourceInitiateHandShake(dr *kusciaapisv1alpha1.D KusciaSource: dr.Spec.Source, ClusterName: clusterName, KusciaHost: getHandshakeHost(dr), + Transit: utils.IsTransit(dr.Spec.Transit), }) if err != nil { nlog.Errorf("DomainRoute %s: handshake fail:%v", dr.Name, err) @@ -293,6 +295,7 @@ func (c *DomainRouteController) sourceInitiateHandShake(dr *kusciaapisv1alpha1.D KusciaSource: dr.Spec.Source, ClusterName: clusterName, KusciaHost: getHandshakeHost(dr), + Transit: utils.IsTransit(dr.Spec.Transit), }) if err != nil { nlog.Errorf("DomainRoute %s: handshake fail:%v", dr.Name, err) diff --git a/pkg/gateway/controller/interconn/kuscia_handler.go b/pkg/gateway/controller/interconn/kuscia_handler.go index da79cddf..06d67307 100644 --- a/pkg/gateway/controller/interconn/kuscia_handler.go +++ b/pkg/gateway/controller/interconn/kuscia_handler.go @@ -19,6 +19,7 @@ import ( "strings" "time" + "github.com/golang/protobuf/ptypes/duration" "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/wrapperspb" @@ -36,7 +37,69 @@ type KusciaHandler struct { func (handler *KusciaHandler) GenerateInternalRoute(dr *kusciaapisv1alpha1.DomainRoute, dp kusciaapisv1alpha1.DomainPort, token string) []*route.Route { clusterName := fmt.Sprintf("%s-to-%s-%s", dr.Spec.Source, dr.Spec.Destination, dp.Name) - action := generateDefaultRouteAction(dr, clusterName) + requestToAdd := []*core.HeaderValueOption{ + { + Header: &core.HeaderValue{ + Key: interConnProtocolHeader, + Value: string(kusciaapisv1alpha1.InterConnKuscia), + }, + AppendAction: core.HeaderValueOption_OVERWRITE_IF_EXISTS_OR_ADD, + }, + { + Header: &core.HeaderValue{ + Key: "Kuscia-Host", + Value: "%REQ(:authority)%", + }, + AppendAction: core.HeaderValueOption_OVERWRITE_IF_EXISTS_OR_ADD, + }, + { + Header: &core.HeaderValue{ + Key: "Kuscia-Source", + Value: dr.Spec.Source, + }, + AppendAction: core.HeaderValueOption_OVERWRITE_IF_EXISTS_OR_ADD, + }, + { + Header: &core.HeaderValue{ + Key: "Kuscia-Token", + Value: token, + }, + AppendAction: core.HeaderValueOption_OVERWRITE_IF_EXISTS_OR_ADD, + }, + } + isReverseTunnel := utils.IsReverseTunnelTransit(dr.Spec.Transit) + if isReverseTunnel { + clusterName = utils.EnvoyClusterName + requestToAdd = append(requestToAdd, &core.HeaderValueOption{ + Header: &core.HeaderValue{ + Key: utils.HeaderTransitFlag, + Value: "true", + }, + AppendAction: core.HeaderValueOption_OVERWRITE_IF_EXISTS_OR_ADD, + }) + } + action := xds.AddDefaultTimeout(generateDefaultRouteAction(dr, clusterName)) + if isReverseTunnel { + action.HostRewriteSpecifier = &route.RouteAction_AutoHostRewrite{ + AutoHostRewrite: wrapperspb.Bool(false), + } + action.HashPolicy = []*route.RouteAction_HashPolicy{ + { + PolicySpecifier: &route.RouteAction_HashPolicy_Header_{ + Header: &route.RouteAction_HashPolicy_Header{ + HeaderName: ":authority", + }, + }, + Terminal: true, + }, + } + action.Timeout = &duration.Duration{ + Seconds: 300, + } + action.IdleTimeout = &duration.Duration{ + Seconds: 300, + } + } if len(dp.PathPrefix) > 0 { action.PrefixRewrite = strings.TrimSuffix(dp.PathPrefix, "/") + "/" } @@ -46,40 +109,10 @@ func (handler *KusciaHandler) GenerateInternalRoute(dr *kusciaapisv1alpha1.Domai Prefix: "/", }, }, - Action: &route.Route_Route{ - Route: xds.AddDefaultTimeout(action), - }, - RequestHeadersToAdd: []*core.HeaderValueOption{ - { - Header: &core.HeaderValue{ - Key: interConnProtocolHeader, - Value: string(kusciaapisv1alpha1.InterConnKuscia), - }, - AppendAction: core.HeaderValueOption_OVERWRITE_IF_EXISTS_OR_ADD, - }, - { - Header: &core.HeaderValue{ - Key: "Kuscia-Host", - Value: "%REQ(:authority)%", - }, - AppendAction: core.HeaderValueOption_OVERWRITE_IF_EXISTS_OR_ADD, - }, - { - Header: &core.HeaderValue{ - Key: "Kuscia-Source", - Value: dr.Spec.Source, - }, - AppendAction: core.HeaderValueOption_OVERWRITE_IF_EXISTS_OR_ADD, - }, - { - Header: &core.HeaderValue{ - Key: "Kuscia-Token", - Value: token, - }, - AppendAction: core.HeaderValueOption_OVERWRITE_IF_EXISTS_OR_ADD, - }, + Route: action, }, + RequestHeadersToAdd: requestToAdd, } return []*route.Route{httpRoute} } diff --git a/pkg/gateway/controller/poller/poll_client.go b/pkg/gateway/controller/poller/poll_client.go index 8fcd9c28..f64d093a 100644 --- a/pkg/gateway/controller/poller/poll_client.go +++ b/pkg/gateway/controller/poller/poll_client.go @@ -24,6 +24,7 @@ import ( "time" "github.com/secretflow/kuscia/pkg/common" + "github.com/secretflow/kuscia/pkg/gateway/utils" "github.com/secretflow/kuscia/pkg/utils/nlog" ) @@ -39,6 +40,7 @@ type PollConnection struct { connID string client *http.Client receiverAddress string + hashPolicyValue string serviceName string connected chan struct{} disconnected chan struct{} @@ -51,11 +53,12 @@ type PollConnection struct { pollRetryInterval time.Duration } -func NewPollConnection(index int, client *http.Client, receiverDomain, serviceName string) *PollConnection { +func NewPollConnection(index int, client *http.Client, receiverDomain, pollerDomain, serviceName string) *PollConnection { return &PollConnection{ connID: fmt.Sprintf("%s:%s:%d", serviceName, receiverDomain, index), client: client, receiverAddress: fmt.Sprintf("%s.%s.svc", common.ReceiverServiceName, receiverDomain), + hashPolicyValue: fmt.Sprintf("%s.%s.svc", serviceName, pollerDomain), serviceName: serviceName, connected: make(chan struct{}), disconnected: make(chan struct{}), @@ -119,6 +122,7 @@ func (conn *PollConnection) connect(ctx context.Context) error { if err != nil { return err } + req.Header.Set(utils.HeaderTransitHash, conn.hashPolicyValue) resp, err := conn.client.Do(req) if err != nil { @@ -142,14 +146,16 @@ type PollClient struct { client *http.Client serviceName string receiverDomain string + pollerDomain string } -func newPollClient(client *http.Client, serviceName, receiverDomain string) *PollClient { +func newPollClient(client *http.Client, serviceName, receiverDomain, pollerDomain string) *PollClient { return &PollClient{ client: client, serviceName: serviceName, stopCh: make(chan struct{}), receiverDomain: receiverDomain, + pollerDomain: pollerDomain, } } @@ -169,7 +175,7 @@ func (pc *PollClient) pollReceiver() { nlog.Infof("Stop poll connection %v", buildPollerKey(pc.serviceName, pc.receiverDomain)) return default: - conn := NewPollConnection(index, pc.client, pc.receiverDomain, pc.serviceName) + conn := NewPollConnection(index, pc.client, pc.receiverDomain, pc.pollerDomain, pc.serviceName) conn.Start(ctx) index++ if index > 999999 { diff --git a/pkg/gateway/controller/poller/poll_client_test.go b/pkg/gateway/controller/poller/poll_client_test.go index 0fa39449..ae28c92b 100644 --- a/pkg/gateway/controller/poller/poll_client_test.go +++ b/pkg/gateway/controller/poller/poll_client_test.go @@ -52,7 +52,7 @@ func createTestPollConnection(index int) *PollConnection { DialContext: dialer.DialContext, }, } - pc := NewPollConnection(index, client, "test", "test.alice.svc") + pc := NewPollConnection(index, client, "test", "test.alice.svc", "") pc.receiverAddress = "127.0.0.1:12345" pc.forceReconnectIntervalBase = time.Millisecond * 1000 pc.forceReconnectIntervalMaxJitter = 1 diff --git a/pkg/gateway/controller/poller/poll_manager.go b/pkg/gateway/controller/poller/poll_manager.go index 4a77e3a7..64ffe8a0 100644 --- a/pkg/gateway/controller/poller/poll_manager.go +++ b/pkg/gateway/controller/poller/poll_manager.go @@ -23,6 +23,7 @@ import ( "sync" "time" + "github.com/stathat/consistent" v1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" apismetav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -44,9 +45,10 @@ import ( ) const ( - processPeriod = time.Second - defaultSyncPeriod = 10 * time.Minute - maxRetries = 16 + processPeriod = time.Second + defaultSyncPeriod = 10 * time.Minute + maxRetries = 16 + defaultGatewayTickTime = 15 * time.Second svcPollQueueName = "service-poll-queue" drPollQueueName = "domain-route-poll-queue" @@ -54,17 +56,13 @@ const ( type PollState int -const ( - PollStateUnknown PollState = iota - PollStateNotPoll - PollStatePolling -) - type PollManager struct { serviceLister corelisters.ServiceLister serviceListerSynced cache.InformerSynced domainRouteLister kuscialistersv1alpha1.DomainRouteLister domainRouteListerSynced cache.InformerSynced + gatewayLister kuscialistersv1alpha1.GatewayLister + gatewayListerSynced cache.InformerSynced svcQueue workqueue.RateLimitingInterface drQueue workqueue.RateLimitingInterface gatewayName string @@ -75,11 +73,14 @@ type PollManager struct { pollers map[string]map[string]*PollClient pollersLock sync.Mutex - sourcePollState map[string]PollState - pollStateLock sync.RWMutex + receiverDomains map[string]bool + receiverDomainsLock sync.RWMutex + + consist *consistent.Consistent } -func NewPollManager(isMaster bool, selfNamespace, gatewayName string, serviceInformer corev1informers.ServiceInformer, domainRouteInformer kusciaextv1alpha1.DomainRouteInformer) (*PollManager, error) { +func NewPollManager(isMaster bool, selfNamespace, gatewayName string, serviceInformer corev1informers.ServiceInformer, + domainRouteInformer kusciaextv1alpha1.DomainRouteInformer, gatewayInformer kusciaextv1alpha1.GatewayInformer) (*PollManager, error) { dialer := &net.Dialer{ Timeout: 10 * time.Second, } @@ -96,11 +97,14 @@ func NewPollManager(isMaster bool, selfNamespace, gatewayName string, serviceInf serviceListerSynced: serviceInformer.Informer().HasSynced, domainRouteLister: domainRouteInformer.Lister(), domainRouteListerSynced: domainRouteInformer.Informer().HasSynced, + gatewayLister: gatewayInformer.Lister(), + gatewayListerSynced: gatewayInformer.Informer().HasSynced, svcQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), svcPollQueueName), drQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), drPollQueueName), client: client, pollers: map[string]map[string]*PollClient{}, - sourcePollState: map[string]PollState{}, + receiverDomains: map[string]bool{}, + consist: consistent.New(), } if err := pm.addServiceEventHandler(serviceInformer); err != nil { @@ -121,8 +125,18 @@ func (pm *PollManager) Run(workers int, stopCh <-chan struct{}) { }() nlog.Info("Waiting for informer caches to sync") - if !cache.WaitForNamedCacheSync("poll manager", stopCh, pm.serviceListerSynced, pm.domainRouteListerSynced) { - nlog.Fatal("failed to wait for caches to sync") + if !cache.WaitForNamedCacheSync("poll manager", stopCh, pm.serviceListerSynced, pm.domainRouteListerSynced, pm.gatewayListerSynced) { + nlog.Fatal("Failed to wait for caches to sync") + } + + gatewayList, err := pm.gatewayLister.List(labels.Everything()) + if err != nil { + nlog.Fatalf("Failed to list gateways: %v", err) + } + for _, gateway := range gatewayList { + if isGatewayAlive(gateway) { + pm.consist.Add(gateway.Name) + } } nlog.Info("Starting poll manager ") @@ -130,13 +144,14 @@ func (pm *PollManager) Run(workers int, stopCh <-chan struct{}) { go wait.Until(pm.runServiceWorker, processPeriod, stopCh) go wait.Until(pm.runDomainRouteWorker, processPeriod, stopCh) } + go pm.runGatewayTicker(stopCh) <-stopCh nlog.Info("Shutting down poll manager") } func (pm *PollManager) runServiceWorker() { - for queue.HandleQueueItem(context.Background(), svcPollQueueName, pm.svcQueue, pm.syncHandlerService, maxRetries) { + for queue.HandleQueueItem(context.Background(), svcPollQueueName, pm.svcQueue, pm.syncHandleService, maxRetries) { } } @@ -145,6 +160,80 @@ func (pm *PollManager) runDomainRouteWorker() { } } +func (pm *PollManager) runGatewayTicker(stopCh <-chan struct{}) { + ticker := time.NewTicker(defaultGatewayTickTime) + defer ticker.Stop() + for { + select { + case <-stopCh: + return + case <-ticker.C: + if err := pm.updateAliveGateways(); err != nil { + nlog.Errorf("Failed to update alive gateways: %v", err) + } + } + } +} + +func (pm *PollManager) updateAliveGateways() error { + gatewayList, err := pm.gatewayLister.List(labels.Everything()) + if err != nil { + return err + } + + members := pm.consist.Members() + aliveGateways := map[string]bool{} + for _, gateway := range gatewayList { + if isGatewayAlive(gateway) { + aliveGateways[gateway.Name] = true + } + } + + membersChanged := false + if len(members) != len(aliveGateways) { + nlog.Infof("Current gateway member count is %d, while alive gateway count is %d", len(members), len(aliveGateways)) + membersChanged = true + } else { + for _, member := range members { + if !aliveGateways[member] { + nlog.Infof("Gateway Member %q is not alive now", member) + membersChanged = true + } + } + } + + if !membersChanged { + return nil + } + + var newMembers []string + for gatewayName := range aliveGateways { + newMembers = append(newMembers, gatewayName) + } + + nlog.Infof("Gateway Members changed, old: %+v new: %+v", members, newMembers) + + pm.consist.Set(newMembers) + pm.handleAllServices() + + return nil +} + +func isGatewayAlive(gateway *kusciaapisv1alpha1.Gateway) bool { + return time.Since(gateway.Status.HeartbeatTime.Time) <= 2*defaultGatewayTickTime +} + +func (pm *PollManager) memberExist(gatewayName string) bool { + members := pm.consist.Members() + for _, member := range members { + if member == gatewayName { + return true + } + } + + return false +} + func (pm *PollManager) addServiceEventHandler(serviceInformer corev1informers.ServiceInformer) error { _, err := serviceInformer.Informer().AddEventHandlerWithResyncPeriod( cache.FilteringResourceEventHandler{ @@ -249,7 +338,7 @@ func (pm *PollManager) domainRouteFilter(dr *kusciaapisv1alpha1.DomainRoute) boo return true } -func (pm *PollManager) syncHandlerService(ctx context.Context, key string) error { +func (pm *PollManager) syncHandleService(ctx context.Context, key string) error { startTime := time.Now() defer func() { nlog.Debugf("Finished syncing service %q (%v)", key, time.Since(startTime)) @@ -260,37 +349,56 @@ func (pm *PollManager) syncHandlerService(ctx context.Context, key string) error return err } - service, err := pm.serviceLister.Services(namespace).Get(name) - if k8serrors.IsNotFound(err) { - pm.removePollConnection(name, nil) - return nil - } else if err != nil { - return err - } + domains := map[string]bool{} - domains := pm.buildReceiverDomainsByService(service) + if isDefaultService(key) { + domains = pm.buildReceiverDomainsAll(name) + } else { + service, err := pm.serviceLister.Services(namespace).Get(name) + if k8serrors.IsNotFound(err) { + pm.setPollConnection(name, nil) + return nil + } else if err != nil { + return err + } - pm.addPollConnection(name, domains) + domains = pm.buildReceiverDomainsByService(service) + } + + pm.setPollConnection(name, domains) return nil } -func (pm *PollManager) buildReceiverDomainsByService(service *v1.Service) []string { - var retDomains []string +func (pm *PollManager) buildReceiverDomainsByService(service *v1.Service) map[string]bool { if service.Annotations != nil && service.Annotations[common.AccessDomainAnnotationKey] != "" { + retDomains := map[string]bool{} + domains := strings.Split(service.Annotations[common.AccessDomainAnnotationKey], ",") for _, domain := range domains { - state := pm.getDomainPollState(domain) - if !isPollingState(state) { + if !pm.isReceiverDomain(domain) { nlog.Debugf("Need not to poll domain %s", domain) continue } - // TODO Check if this instance needs to be processed - retDomains = append(retDomains, domain) + if pm.needSelfPoll(service.Name, domain) { + retDomains[domain] = true + } + } + return retDomains + } + + return pm.buildReceiverDomainsAll(service.Name) +} + +func (pm *PollManager) buildReceiverDomainsAll(serviceName string) map[string]bool { + retDomains := map[string]bool{} + + receiverDomains := pm.getReceiverDomains() + for _, domain := range receiverDomains { + if pm.needSelfPoll(serviceName, domain) { + retDomains[domain] = true } - } else { - retDomains = pm.getPollingDomains() } return retDomains @@ -309,14 +417,14 @@ func (pm *PollManager) syncHandleDomainRoute(ctx context.Context, key string) er dr, err := pm.domainRouteLister.DomainRoutes(pm.selfNamespace).Get(name) if k8serrors.IsNotFound(err) { - pm.domainRouteDeleted(sourceDomain) + pm.needNotPollSource(sourceDomain) return nil } else if err != nil { return err } if dr.Spec.Transit == nil || dr.Spec.Transit.TransitMethod != kusciaapisv1alpha1.TransitMethodReverseTunnel { - pm.needNotPollSource(dr) + pm.needNotPollSource(dr.Spec.Source) return nil } @@ -327,84 +435,35 @@ func (pm *PollManager) syncHandleDomainRoute(ctx context.Context, key string) er return nil } -func (pm *PollManager) getDomainPollState(domain string) PollState { - pm.pollStateLock.RLock() - defer pm.pollStateLock.RUnlock() - - return pm.sourcePollState[domain] -} - -func (pm *PollManager) getPollingDomains() []string { - pm.pollStateLock.RLock() - defer pm.pollStateLock.RUnlock() +func (pm *PollManager) needNotPollSource(sourceDomain string) { + pm.receiverDomainsLock.Lock() + defer pm.receiverDomainsLock.Unlock() - var domains []string - for domain, state := range pm.sourcePollState { - if isPollingState(state) { - domains = append(domains, domain) - } - } - - return domains -} - -func (pm *PollManager) removeDRPollConnection(receiverDomain string) { - pm.removePollConnection(utils.ServiceAPIServer, []string{receiverDomain}) - pm.removePollConnection(utils.ServiceHandshake, []string{receiverDomain}) - - pm.handleServicesByDomainRoute(receiverDomain, func(serviceName string, receiverDomain string) { - nlog.Infof("Prepare to remove poll connection %q", buildPollerKey(serviceName, receiverDomain)) - pm.removePollConnection(serviceName, []string{receiverDomain}) - }) -} - -func (pm *PollManager) domainRouteDeleted(sourceDomain string) { - pm.pollStateLock.Lock() - defer pm.pollStateLock.Unlock() - - if isPollingState(pm.sourcePollState[sourceDomain]) { - delete(pm.sourcePollState, sourceDomain) - pm.removeDRPollConnection(sourceDomain) - } -} - -func (pm *PollManager) needNotPollSource(dr *kusciaapisv1alpha1.DomainRoute) { - pm.pollStateLock.Lock() - defer pm.pollStateLock.Unlock() - - if isPollingState(pm.sourcePollState[dr.Spec.Source]) { - pm.sourcePollState[dr.Spec.Source] = PollStateNotPoll - pm.removeDRPollConnection(dr.Spec.Source) + if pm.receiverDomains[sourceDomain] { + delete(pm.receiverDomains, sourceDomain) + pm.handleAllServices() } } func (pm *PollManager) needPollSource(dr *kusciaapisv1alpha1.DomainRoute) error { - pm.pollStateLock.Lock() - defer pm.pollStateLock.Unlock() + pm.receiverDomainsLock.Lock() + defer pm.receiverDomainsLock.Unlock() source := dr.Spec.Source - if pm.sourcePollState[source] == PollStatePolling { + if pm.receiverDomains[source] { nlog.Debugf("DomainRoute %s has triggered polling", dr.Name) return nil } - pm.sourcePollState[source] = PollStatePolling - - receiverDomains := []string{source} + pm.receiverDomains[source] = true - if pm.isMaster { - pm.addPollConnection(utils.ServiceAPIServer, receiverDomains) - } - pm.addPollConnection(utils.ServiceHandshake, receiverDomains) + nlog.Infof("DomainRoute %s triggered polling", dr.Name) - pm.handleServicesByDomainRoute(source, func(serviceName string, receiverDomain string) { - nlog.Infof("Prepare to add poll connection %q", buildPollerKey(serviceName, receiverDomain)) - pm.addPollConnection(serviceName, []string{receiverDomain}) - }) + pm.handleAllServices() return nil } -func (pm *PollManager) handleServicesByDomainRoute(sourceDomain string, pollConnectionHandler func(string, string)) { +func (pm *PollManager) handleAllServices() { services, err := pm.serviceLister.Services(pm.selfNamespace).List(labels.Everything()) if err != nil { nlog.Errorf("Failed to list services: %v", err) @@ -415,74 +474,104 @@ func (pm *PollManager) handleServicesByDomainRoute(sourceDomain string, pollConn if !serviceFilter(service) { continue } - if service.Annotations != nil && service.Annotations[common.AccessDomainAnnotationKey] != "" { - domains := strings.Split(service.Annotations[common.AccessDomainAnnotationKey], ",") - for _, domain := range domains { - if domain == sourceDomain { - pollConnectionHandler(service.Name, domain) - break - } - } - } else { - pollConnectionHandler(service.Name, sourceDomain) - } + + pm.enqueueService(service) + } + + defaultServices := []string{utils.ServiceHandshake} + if pm.isMaster { + defaultServices = append(defaultServices, utils.ServiceAPIServer) + } + + for _, service := range defaultServices { + pm.svcQueue.Add(service) } } -func (pm *PollManager) addPollConnection(serviceName string, domainList []string) { - if len(domainList) == 0 { - nlog.Debugf("No address for serviceName: %v, skip", serviceName) - return +func (pm *PollManager) needSelfPoll(serviceName string, receiverDomain string) bool { + key := buildPollerKey(serviceName, receiverDomain) + expected, err := pm.consist.Get(key) + if err != nil { + nlog.Errorf("Unable to select consistent hash ring for key %q: %v", key, err) + return false } + if expected != pm.gatewayName { + nlog.Debugf("Unexpected gateway %q for key %q, expected gateway %q, skip", pm.gatewayName, key, expected) + return false + } + + return true +} + +func (pm *PollManager) setPollConnection(serviceName string, domains map[string]bool) { pm.pollersLock.Lock() defer pm.pollersLock.Unlock() + if domains == nil { + domains = map[string]bool{} + } + svcPoller, ok := pm.pollers[serviceName] if !ok { + if len(domains) == 0 { + return + } svcPoller = map[string]*PollClient{} pm.pollers[serviceName] = svcPoller } - for _, domain := range domainList { - if _, ok := svcPoller[domain]; ok { - continue + var addDomainList, removeDomainList []string + for domain := range svcPoller { + if _, ok := domains[domain]; !ok { + removeDomainList = append(removeDomainList, domain) } + } + for domain := range domains { + if _, ok := svcPoller[domain]; !ok { + addDomainList = append(addDomainList, domain) + } + } - poller := newPollClient(pm.client, serviceName, domain) + for _, domain := range removeDomainList { + if poller, ok := svcPoller[domain]; ok { + poller.Stop() + delete(svcPoller, domain) + nlog.Infof("Remove poll connection: %v", buildPollerKey(serviceName, domain)) + } + } + for _, domain := range addDomainList { + poller := newPollClient(pm.client, serviceName, domain, pm.selfNamespace) svcPoller[domain] = poller poller.Start() nlog.Infof("Add poll connection: %v", buildPollerKey(serviceName, domain)) } -} -func (pm *PollManager) removePollConnection(serviceName string, domainList []string) { - pm.pollersLock.Lock() - defer pm.pollersLock.Unlock() + if len(svcPoller) == 0 { + delete(pm.pollers, serviceName) - svcPoller, ok := pm.pollers[serviceName] - if !ok { - return + nlog.Infof("Poll connections of service %q were all removed", serviceName) } +} - if len(domainList) == 0 { - for domain, poller := range svcPoller { - poller.Stop() - nlog.Infof("Remove poll connection: %v", buildPollerKey(serviceName, domain)) - } +func (pm *PollManager) getReceiverDomains() []string { + pm.receiverDomainsLock.RLock() + defer pm.receiverDomainsLock.RUnlock() - delete(pm.pollers, serviceName) - return + var domains []string + for domain := range pm.receiverDomains { + domains = append(domains, domain) } - for _, domain := range domainList { - if poller, ok := svcPoller[domain]; ok { - poller.Stop() - delete(svcPoller, domain) - nlog.Infof("Remove poll connection: %v", buildPollerKey(serviceName, domain)) - } - } + return domains +} + +func (pm *PollManager) isReceiverDomain(domain string) bool { + pm.receiverDomainsLock.RLock() + defer pm.receiverDomainsLock.RUnlock() + + return pm.receiverDomains[domain] } func buildPollerKey(svcName string, receiverDomain string) string { @@ -501,6 +590,6 @@ func splitDomainRouteKey(key string) (string, string, error) { return arr[0], arr[1], nil } -func isPollingState(state PollState) bool { - return state == PollStatePolling +func isDefaultService(service string) bool { + return service == utils.ServiceHandshake || service == utils.ServiceAPIServer } diff --git a/pkg/gateway/controller/poller/poll_manager_test.go b/pkg/gateway/controller/poller/poll_manager_test.go index 6f5cf837..6677523c 100644 --- a/pkg/gateway/controller/poller/poll_manager_test.go +++ b/pkg/gateway/controller/poller/poll_manager_test.go @@ -37,15 +37,19 @@ import ( func TestPollManager_Run(t *testing.T) { testServices := makeTestServices() + testGateways := makeTestGateways() kubeClient := kubefake.NewSimpleClientset(testServices...) kusciaClient := kusciafake.NewSimpleClientset(makeTestDomainRoutes()...) + _, err := kusciaClient.KusciaV1alpha1().Gateways("alice").Create(context.Background(), &testGateways[0], metav1.CreateOptions{}) + assert.NoError(t, err) kubeInformersFactory := kubeinformers.NewSharedInformerFactory(kubeClient, 0) kusciaInformerFactory := kusciainformers.NewSharedInformerFactory(kusciaClient, 0) svcInformer := kubeInformersFactory.Core().V1().Services() drInformer := kusciaInformerFactory.Kuscia().V1alpha1().DomainRoutes() + gatewayInformer := kusciaInformerFactory.Kuscia().V1alpha1().Gateways() - pm, err := NewPollManager(true, "alice", "alice-01", svcInformer, drInformer) + pm, err := NewPollManager(true, "alice", "alice-01", svcInformer, drInformer, gatewayInformer) assert.NoError(t, err) stopCh := make(chan struct{}) defer close(stopCh) @@ -53,7 +57,7 @@ func TestPollManager_Run(t *testing.T) { go kubeInformersFactory.Start(stopCh) go kusciaInformerFactory.Start(stopCh) - cache.WaitForNamedCacheSync("poll manager", stopCh, pm.serviceListerSynced, pm.domainRouteListerSynced) + cache.WaitForNamedCacheSync("poll manager", stopCh, pm.serviceListerSynced, pm.domainRouteListerSynced, pm.gatewayListerSynced) pollerExist := func(serviceName, receiverDomain string) bool { pm.pollersLock.Lock() @@ -103,6 +107,54 @@ func TestPollManager_Run(t *testing.T) { assert.True(t, pollClientExist) }) + pollerCounts := func() int { + pm.pollersLock.Lock() + defer pm.pollersLock.Unlock() + counts := 0 + for _, svcPollers := range pm.pollers { + counts += len(svcPollers) + } + + return counts + } + + t.Run("Add and delete gateway", func(t *testing.T) { + prePollerCounts := pollerCounts() + gatewayInterface := kusciaClient.KusciaV1alpha1().Gateways("alice") + assert.NoError(t, err) + + gw, err := gatewayInterface.Create(context.Background(), &testGateways[1], metav1.CreateOptions{}) + assert.NoError(t, err) + + curPollerCounts := 0 + for i := 0; i < 50; i++ { + assert.NoError(t, pm.updateAliveGateways()) + time.Sleep(100 * time.Millisecond) + curPollerCounts = pollerCounts() + if prePollerCounts != curPollerCounts { + break + } + } + assert.NotEqual(t, prePollerCounts, curPollerCounts) + + err = kusciaClient.KusciaV1alpha1().Gateways("alice").Delete(context.Background(), gw.Name, metav1.DeleteOptions{}) + assert.NoError(t, err) + + curPollerCounts = 0 + for i := 0; i < 50; i++ { + assert.NoError(t, pm.updateAliveGateways()) + + time.Sleep(100 * time.Millisecond) + + curPollerCounts = pollerCounts() + if prePollerCounts == curPollerCounts { + break + } + } + assert.Equal(t, prePollerCounts, curPollerCounts) + + }) + t.Run("update domain route", func(t *testing.T) { dr, err := kusciaClient.KusciaV1alpha1().DomainRoutes("alice").Get(context.Background(), "carol-alice", metav1.GetOptions{}) assert.NoError(t, err) @@ -255,3 +307,30 @@ func makeTestDomainRoutes() []runtime.Object { }, } } + +func makeTestGateways() []kusciaapisv1alpha1.Gateway { + return []kusciaapisv1alpha1.Gateway{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "alice-01", + Namespace: "alice", + }, + Status: kusciaapisv1alpha1.GatewayStatus{ + HeartbeatTime: metav1.Time{ + Time: time.Now(), + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "alice-02", + Namespace: "alice", + }, + Status: kusciaapisv1alpha1.GatewayStatus{ + HeartbeatTime: metav1.Time{ + Time: time.Now(), + }, + }, + }, + } +} diff --git a/pkg/gateway/controller/regitser_node.go b/pkg/gateway/controller/register_node.go similarity index 75% rename from pkg/gateway/controller/regitser_node.go rename to pkg/gateway/controller/register_node.go index a904f267..57daec46 100644 --- a/pkg/gateway/controller/regitser_node.go +++ b/pkg/gateway/controller/register_node.go @@ -19,6 +19,7 @@ import ( "crypto/md5" "crypto/rand" "crypto/rsa" + "crypto/sha256" "crypto/x509" "encoding/base64" "encoding/json" @@ -30,7 +31,7 @@ import ( "strings" "time" - jwt "github.com/golang-jwt/jwt/v5" + "github.com/golang-jwt/jwt/v5" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -44,44 +45,33 @@ import ( "github.com/secretflow/kuscia/proto/api/v1alpha1/handshake" ) -func getRegisterRequestHash(regReq *handshake.RegisterRequest) [16]byte { +func getRegisterRequestHashSha256(regReq *handshake.RegisterRequest) [32]byte { + return sha256.Sum256([]byte(fmt.Sprintf("%s_%s_%d", regReq.DomainId, regReq.Csr, regReq.RequestTime))) +} + +// getRegisterRequestHashMd5 deprecated: md5 hash would deprecate soon +func getRegisterRequestHashMd5(regReq *handshake.RegisterRequest) [16]byte { return md5.Sum([]byte(fmt.Sprintf("%s_%s_%d", regReq.DomainId, regReq.Csr, regReq.RequestTime))) } type RegisterJwtClaims struct { - ReqHash [16]byte `json:"req"` + ReqHash [16]byte `json:"req"` // deprecate soon + ReqHashSha256 [32]byte `json:"req_hash"` jwt.RegisteredClaims } -func RegisterDomain(namespace, path, csrData string, prikey *rsa.PrivateKey, afterRegisterHook AfterRegisterDomainHook) error { - regReq := &handshake.RegisterRequest{ - DomainId: namespace, - Csr: base64.StdEncoding.EncodeToString([]byte(csrData)), - RequestTime: int64(time.Now().Nanosecond()), - } - - rjc := &RegisterJwtClaims{ - ReqHash: getRegisterRequestHash(regReq), - RegisteredClaims: jwt.RegisteredClaims{ - ExpiresAt: jwt.NewNumericDate(time.Now().Add(5 * time.Minute)), - IssuedAt: jwt.NewNumericDate(time.Now()), - Issuer: namespace, - Subject: namespace, - }, - } - token := jwt.NewWithClaims(jwt.SigningMethodRS256, rjc) - tokenstr, err := token.SignedString(prikey) +func RegisterDomain(namespace, path, csrData string, priKey *rsa.PrivateKey, afterRegisterHook AfterRegisterDomainHook) error { + req, token, err := generateJwtToken(namespace, csrData, priKey) if err != nil { return err } regResp := &handshake.RegisterResponse{} headers := map[string]string{ - "jwt-token": tokenstr, + "jwt-token": token, } - registerPath := fmt.Sprintf("%s%s", strings.TrimSuffix(path, "/"), "/register") - err = doHTTPWithDefaultRetry(regReq, regResp, &utils.HTTPParam{ + err = doHTTPWithDefaultRetry(req, regResp, &utils.HTTPParam{ Method: http.MethodPost, - Path: registerPath, + Path: fmt.Sprintf("%s/register", strings.TrimSuffix(path, "/")), KusciaSource: namespace, ClusterName: clusters.GetMasterClusterName(), KusciaHost: fmt.Sprintf("%s.master.svc", utils.ServiceHandshake), @@ -96,6 +86,31 @@ func RegisterDomain(namespace, path, csrData string, prikey *rsa.PrivateKey, aft return nil } +func generateJwtToken(namespace, csrData string, prikey *rsa.PrivateKey) (req *handshake.RegisterRequest, token string, err error) { + req = &handshake.RegisterRequest{ + DomainId: namespace, + Csr: base64.StdEncoding.EncodeToString([]byte(csrData)), + RequestTime: int64(time.Now().Nanosecond()), + } + + rjc := &RegisterJwtClaims{ + ReqHash: getRegisterRequestHashMd5(req), + ReqHashSha256: getRegisterRequestHashSha256(req), + RegisteredClaims: jwt.RegisteredClaims{ + ExpiresAt: jwt.NewNumericDate(time.Now().Add(5 * time.Minute)), + IssuedAt: jwt.NewNumericDate(time.Now()), + Issuer: namespace, + Subject: namespace, + }, + } + tokenData := jwt.NewWithClaims(jwt.SigningMethodRS256, rjc) + token, err = tokenData.SignedString(prikey) + if err != nil { + nlog.Errorf("Signed token failed, error: %s.", err.Error()) + } + return +} + func (c *DomainRouteController) registerHandle(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodPost { w.WriteHeader(http.StatusMethodNotAllowed) @@ -116,13 +131,10 @@ func (c *DomainRouteController) registerHandle(w http.ResponseWriter, r *http.Re httpErrWrapped(w, err, http.StatusBadRequest) return } - - // Use jwt verify first. - // Jwt's token must be signed by domain's private key. - // This handler will verify it by public key in csr. - err = c.registerVerify(r.Header.Get("jwt-token"), certRequest.PublicKey, &req) + // verify jwt token and csr + err = verifyRegisterRequest(&req, r.Header.Get("jwt-token")) if err != nil { - httpErrWrapped(w, fmt.Errorf(`request jwt verify error, detail -> %s`, err.Error()), http.StatusBadRequest) + httpErrWrapped(w, err, http.StatusBadRequest) return } @@ -259,7 +271,7 @@ func isCertMatch(certString string, c *x509.Certificate) bool { return true } -func (c *DomainRouteController) registerVerify(jwtTokenStr string, pubKey interface{}, req *handshake.RegisterRequest) error { +func verifyJwtToken(jwtTokenStr string, pubKey interface{}, req *handshake.RegisterRequest) error { rjc := &RegisterJwtClaims{} jwtToken, err := jwt.ParseWithClaims(jwtTokenStr, rjc, func(token *jwt.Token) (interface{}, error) { return pubKey, nil @@ -268,19 +280,50 @@ func (c *DomainRouteController) registerVerify(jwtTokenStr string, pubKey interf return err } if !jwtToken.Valid { - return fmt.Errorf("%s", "jwt token decrpted fail") + return fmt.Errorf("%s", "verify jwt failed, detail: jwt token decrpted fail") } if time.Since(rjc.ExpiresAt.Time) > 0 { - return fmt.Errorf("%s", "jwt verify error, token expired") + return fmt.Errorf("%s", "verify jwt failed, detail: token expired") } - hash := getRegisterRequestHash(req) - if len(hash) != len(rjc.ReqHash) { - return fmt.Errorf("%s", "request body verify error, hash not match") + // check sha256 hash + if reflect.DeepEqual(getRegisterRequestHashSha256(req), rjc.ReqHashSha256) { + return nil } - for i := 0; i < len(hash); i++ { - if hash[i] != rjc.ReqHash[i] { - return fmt.Errorf("%s", "request body verify error, hash not match") - } + // check md5 hash + if reflect.DeepEqual(getRegisterRequestHashMd5(req), rjc.ReqHash) { + return nil + } + return fmt.Errorf("verify jwt failed, detail: the request content doesn't match the hash") +} + +// verifyCSR verify the CN of CSR must be equal with domainID +func verifyCSR(csr *x509.CertificateRequest, domainID string) error { + + if csr == nil { + return fmt.Errorf("csr is nil") + } + if csr.Subject.CommonName != domainID { + return fmt.Errorf("the csr subject common name must be domainID: %s not %s", csr.Subject.CommonName, domainID) + } + return nil +} + +func verifyRegisterRequest(req *handshake.RegisterRequest, token string) error { + // Csr in request must be base64 encoded string + // Raw data must be pem format + certRequest, err := parseCertRequest(req.Csr) + if err != nil { + return fmt.Errorf("parse cert request failed, detail: %s", err.Error()) + } + // verify the CN of CSR must be equal with domainID + if err = verifyCSR(certRequest, req.DomainId); err != nil { + return fmt.Errorf("verify csr failed, detail: %s", err.Error()) + } + // Use jwt verify first. + // JWT token must be signed by domain's private key. + // This handler will verify it by public key in csr. + if err = verifyJwtToken(token, certRequest.PublicKey, req); err != nil { + return fmt.Errorf("verify jwt failed, detail: %s", err.Error()) } return nil } @@ -312,7 +355,7 @@ func parseCertRequest(certStr string) (*x509.CertificateRequest, error) { } csr, err := x509.ParseCertificateRequest(p.Bytes) if err != nil { - err = fmt.Errorf(`csr pem data parse err, %s`, err.Error()) + err = fmt.Errorf("csr pem data parse error: %s", err.Error()) return nil, err } return csr, nil diff --git a/pkg/gateway/controller/register_node_test.go b/pkg/gateway/controller/register_node_test.go new file mode 100644 index 00000000..abf554cc --- /dev/null +++ b/pkg/gateway/controller/register_node_test.go @@ -0,0 +1,240 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package controller + +import ( + "crypto/rsa" + "encoding/base64" + "fmt" + "reflect" + "testing" + "time" + + "github.com/golang-jwt/jwt/v5" + "github.com/stretchr/testify/assert" + + "github.com/secretflow/kuscia/cmd/kuscia/confloader" + "github.com/secretflow/kuscia/pkg/utils/nlog" + "github.com/secretflow/kuscia/pkg/utils/tls" + "github.com/secretflow/kuscia/proto/api/v1alpha1/handshake" +) + +var ( + unitTestDeployToken = "FwBvarrLUpfACr00v8AiIbHbFcYguNqvu92XRJ2YysU=" + utAlice = "alice" + utBob = "bob" +) + +type RegisterJwtClaimsMd5 struct { + ReqHash [16]byte `json:"req"` // deprecate soon + jwt.RegisteredClaims +} + +type RegisterJwtClaimsSha256 struct { + ReqHashSha256 [32]byte `json:"req_hash"` + jwt.RegisteredClaims +} + +func generateJwtTokenMd5(namespace, csrData string, prikey *rsa.PrivateKey) (req *handshake.RegisterRequest, token string, err error) { + req = &handshake.RegisterRequest{ + DomainId: namespace, + Csr: base64.StdEncoding.EncodeToString([]byte(csrData)), + RequestTime: int64(time.Now().Nanosecond()), + } + + rjc := &RegisterJwtClaimsMd5{ + ReqHash: getRegisterRequestHashMd5(req), + RegisteredClaims: jwt.RegisteredClaims{ + ExpiresAt: jwt.NewNumericDate(time.Now().Add(5 * time.Minute)), + IssuedAt: jwt.NewNumericDate(time.Now()), + Issuer: namespace, + Subject: namespace, + }, + } + tokenData := jwt.NewWithClaims(jwt.SigningMethodRS256, rjc) + token, err = tokenData.SignedString(prikey) + if err != nil { + nlog.Errorf("Signed token failed, error: %s.", err.Error()) + } + return +} + +func generateJwtTokenSha256(namespace, csrData string, prikey *rsa.PrivateKey) (req *handshake.RegisterRequest, token string, err error) { + req = &handshake.RegisterRequest{ + DomainId: namespace, + Csr: base64.StdEncoding.EncodeToString([]byte(csrData)), + RequestTime: int64(time.Now().Nanosecond()), + } + + rjc := &RegisterJwtClaimsSha256{ + ReqHashSha256: getRegisterRequestHashSha256(req), + RegisteredClaims: jwt.RegisteredClaims{ + ExpiresAt: jwt.NewNumericDate(time.Now().Add(5 * time.Minute)), + IssuedAt: jwt.NewNumericDate(time.Now()), + Issuer: namespace, + Subject: namespace, + }, + } + tokenData := jwt.NewWithClaims(jwt.SigningMethodRS256, rjc) + token, err = tokenData.SignedString(prikey) + if err != nil { + nlog.Errorf("Signed token failed, error: %s.", err.Error()) + } + return +} + +func generateTestKey(t *testing.T, namespace string) (csr string, key *rsa.PrivateKey) { + keyStr, err := tls.GenerateKeyData() + if err != nil { + t.Errorf("Generate key data failed, error: %s.", err.Error()) + } + rawKey, err := base64.StdEncoding.DecodeString(keyStr) + key, err = tls.ParseKey(rawKey, "") + if err != nil { + t.Errorf("Parse key data failed, error: %s.", err.Error()) + } + csr = confloader.GenerateCsrData(namespace, keyStr, unitTestDeployToken) + return +} + +func TestRegisterDomain(t *testing.T) { + csr, key := generateTestKey(t, utAlice) + _ = RegisterDomain("alice", "test", csr, key, nil) +} + +func TestVerifyRequest(t *testing.T) { + csr, key := generateTestKey(t, utAlice) + req, token, err := generateJwtToken(utAlice, csr, key) + assert.NoError(t, err, "generateJwtToken failed") + err = verifyRegisterRequest(req, token) + assert.NoError(t, err, "verifyRegisterRequest failed") +} + +func TestVerifyCSRcn(t *testing.T) { + // request domain is alice but csr is bob + csr, key := generateTestKey(t, utBob) + req, token, err := generateJwtToken(utAlice, csr, key) + assert.NoError(t, err, "generateJwtToken failed") + err = verifyRegisterRequest(req, token) + assert.Error(t, err, "verifyRegisterRequest failed") +} + +func TestCompatibility(t *testing.T) { + // token md5 vs claim (ma5\sha256) + csr, key := generateTestKey(t, utAlice) + req, token, err := generateJwtTokenMd5(utAlice, csr, key) + assert.NoError(t, err, "generateJwtToken failed") + err = verifyRegisterRequest(req, token) + assert.NoError(t, err, "verifyRegisterRequest failed") + + // token (md5/sha256) vs claim (ma5) + req, token, err = generateJwtToken(utAlice, csr, key) + assert.NoError(t, err, "generateJwtToken failed") + err = verifyRegisterRequestMd5(req, token) + assert.NoError(t, err, "generateJwtToken failed") + + // token (md5/sha256) vs claim (sha256) + err = verifyRegisterRequestSha256(req, token) + assert.NoError(t, err, "generateJwtToken failed") + + // token sha256 vs claim (ma5\sha256) + req, token, err = generateJwtTokenSha256(utAlice, csr, key) + assert.NoError(t, err, "generateJwtToken failed") + err = verifyRegisterRequest(req, token) + assert.NoError(t, err, "generateJwtToken failed") + +} + +func verifyRegisterRequestMd5(req *handshake.RegisterRequest, token string) error { + // Csr in request must be base64 encoded string + // Raw data must be pem format + certRequest, err := parseCertRequest(req.Csr) + if err != nil { + return fmt.Errorf("parse cert request failed, detail: %s", err.Error()) + } + // verify the CN of CSR must be equal with domainID + if err = verifyCSR(certRequest, req.DomainId); err != nil { + return fmt.Errorf("verify csr failed, detail: %s", err.Error()) + } + // Use jwt verify first. + // JWT token must be signed by domain's private key. + // This handler will verify it by public key in csr. + if err = verifyJwtTokenMd5(token, certRequest.PublicKey, req); err != nil { + return fmt.Errorf(`verify jwt failed, detail: %s`, err.Error()) + } + return nil +} + +func verifyJwtTokenMd5(jwtTokenStr string, pubKey interface{}, req *handshake.RegisterRequest) error { + rjc := &RegisterJwtClaimsMd5{} + jwtToken, err := jwt.ParseWithClaims(jwtTokenStr, rjc, func(token *jwt.Token) (interface{}, error) { + return pubKey, nil + }) + if err != nil { + return err + } + if !jwtToken.Valid { + return fmt.Errorf("%s", "jwt token decrpted fail") + } + if time.Since(rjc.ExpiresAt.Time) > 0 { + return fmt.Errorf("%s", "verify jwt failed, detail: token expired") + } + // check md5 hash + if reflect.DeepEqual(getRegisterRequestHashMd5(req), rjc.ReqHash) { + return nil + } + return fmt.Errorf("verify request failed, detail: the request content doesn't match the hash") +} + +func verifyRegisterRequestSha256(req *handshake.RegisterRequest, token string) error { + // Csr in request must be base64 encoded string + // Raw data must be pem format + certRequest, err := parseCertRequest(req.Csr) + if err != nil { + return fmt.Errorf("parse cert request failed, detail: %s", err.Error()) + } + // verify the CN of CSR must be equal with domainID + if err = verifyCSR(certRequest, req.DomainId); err != nil { + return fmt.Errorf("verify csr failed, detail: %s", err.Error()) + } + // Use jwt verify first. + // JWT token must be signed by domain's private key. + // This handler will verify it by public key in csr. + if err = verifyJwtTokenSha256(token, certRequest.PublicKey, req); err != nil { + return fmt.Errorf("verify jwt failed, detail: %s", err.Error()) + } + return nil +} + +func verifyJwtTokenSha256(jwtTokenStr string, pubKey interface{}, req *handshake.RegisterRequest) error { + rjc := &RegisterJwtClaimsSha256{} + jwtToken, err := jwt.ParseWithClaims(jwtTokenStr, rjc, func(token *jwt.Token) (interface{}, error) { + return pubKey, nil + }) + if err != nil { + return err + } + if !jwtToken.Valid { + return fmt.Errorf("%s", "jwt token decrpted fail") + } + if time.Since(rjc.ExpiresAt.Time) > 0 { + return fmt.Errorf("%s", "jwt verify error, token expired") + } + // check sha256 hash + if reflect.DeepEqual(getRegisterRequestHashSha256(req), rjc.ReqHashSha256) { + return nil + } + return fmt.Errorf("verify request failed, detail: the request content doesn't match the hash") +} diff --git a/pkg/gateway/utils/clusters.go b/pkg/gateway/utils/clusters.go index e8c92489..6066b736 100644 --- a/pkg/gateway/utils/clusters.go +++ b/pkg/gateway/utils/clusters.go @@ -25,4 +25,5 @@ const ( ServiceKusciaStorage = "kusciastorage" ServiceHandshake = "kuscia-handshake" ServiceKusciaAPI = "kusciaapi" + EnvoyClusterName = "envoy-cluster" ) diff --git a/pkg/gateway/utils/http.go b/pkg/gateway/utils/http.go index 41eaf0ab..6c16641b 100644 --- a/pkg/gateway/utils/http.go +++ b/pkg/gateway/utils/http.go @@ -36,6 +36,7 @@ type HTTPParam struct { ClusterName string KusciaSource string KusciaHost string + Transit bool Headers map[string]string } @@ -89,10 +90,16 @@ func DoHTTPWithRetry(in interface{}, out interface{}, hp *HTTPParam, waitTime ti } func DoHTTP(in interface{}, out interface{}, hp *HTTPParam) error { + var handshakeHost string var req *http.Request var err error + + handshakeHost = InternalServer + if hp.Transit { + handshakeHost = "http://" + hp.KusciaHost + } if hp.Method == http.MethodGet { - req, err = http.NewRequest(http.MethodGet, InternalServer+hp.Path, nil) + req, err = http.NewRequest(http.MethodGet, handshakeHost+hp.Path, nil) if err != nil { return fmt.Errorf("invalid request, detail -> %s", err.Error()) } @@ -101,19 +108,21 @@ func DoHTTP(in interface{}, out interface{}, hp *HTTPParam) error { if err != nil { return fmt.Errorf("invalid request, detail -> %s", err.Error()) } - req, err = http.NewRequest(hp.Method, InternalServer+hp.Path, bytes.NewBuffer(inbody)) + req, err = http.NewRequest(hp.Method, handshakeHost+hp.Path, bytes.NewBuffer(inbody)) if err != nil { return fmt.Errorf("invalid request, detail -> %s", err.Error()) } } - + if !hp.Transit { + req.Header.Set(fmt.Sprintf("%s-Cluster", ServiceHandshake), hp.ClusterName) + } req.Header.Set("Content-Type", "application/json") - req.Header.Set(fmt.Sprintf("%s-Cluster", ServiceHandshake), hp.ClusterName) req.Header.Set("Kuscia-Source", hp.KusciaSource) req.Header.Set("kuscia-Host", hp.KusciaHost) for key, val := range hp.Headers { req.Header.Set(key, val) } + client := &http.Client{ Timeout: time.Second * 10, } diff --git a/pkg/gateway/utils/transit.go b/pkg/gateway/utils/transit.go index 1cf5eb46..c0bed889 100644 --- a/pkg/gateway/utils/transit.go +++ b/pkg/gateway/utils/transit.go @@ -18,10 +18,19 @@ import ( "github.com/secretflow/kuscia/pkg/crd/apis/kuscia/v1alpha1" ) +const ( + HeaderTransitHash = "Kuscia-Transit-Hash" + HeaderTransitFlag = "Kuscia-Transit-Flag" +) + +func IsTransit(transit *v1alpha1.Transit) bool { + return transit != nil && (transit.TransitMethod == v1alpha1.TransitMethodReverseTunnel || transit.TransitMethod == v1alpha1.TransitMethodThirdDomain) +} + func IsThirdPartyTransit(transit *v1alpha1.Transit) bool { return transit != nil && (transit.TransitMethod == "" || transit.TransitMethod == v1alpha1.TransitMethodThirdDomain) } -func IsGatewayTceTransit(transit *v1alpha1.Transit) bool { +func IsReverseTunnelTransit(transit *v1alpha1.Transit) bool { return transit != nil && transit.TransitMethod == v1alpha1.TransitMethodReverseTunnel } diff --git a/pkg/gateway/utils/transit_test.go b/pkg/gateway/utils/transit_test.go index fd19c9ee..b733b4ac 100644 --- a/pkg/gateway/utils/transit_test.go +++ b/pkg/gateway/utils/transit_test.go @@ -20,10 +20,29 @@ import ( "github.com/secretflow/kuscia/pkg/crd/apis/kuscia/v1alpha1" ) -func TestIsThirdPartyTransit(t *testing.T) { - type args struct { - transit *v1alpha1.Transit +type args struct { + transit *v1alpha1.Transit +} + +var ( + argsWithReverseTunnel = args{ + transit: &v1alpha1.Transit{ + TransitMethod: v1alpha1.TransitMethodReverseTunnel, + }, + } + argsWithThirdDomain = args{ + transit: &v1alpha1.Transit{ + TransitMethod: v1alpha1.TransitMethodThirdDomain, + }, } + argsWithEmpty = args{ + transit: &v1alpha1.Transit{}, + } + argsWithNil = args{nil} +) + +func TestIsThirdPartyTransit(t *testing.T) { + tests := []struct { name string args args @@ -32,34 +51,22 @@ func TestIsThirdPartyTransit(t *testing.T) { // TODO: Add test cases. { name: "case 0", - args: args{ - &v1alpha1.Transit{ - TransitMethod: v1alpha1.TransitMethodReverseTunnel, - }, - }, + args: argsWithReverseTunnel, want: false, }, { name: "case 1", - args: args{ - &v1alpha1.Transit{ - TransitMethod: v1alpha1.TransitMethodThirdDomain, - }, - }, + args: argsWithThirdDomain, want: true, }, { name: "case 2", - args: args{ - nil, - }, + args: argsWithNil, want: false, }, { name: "case 3", - args: args{ - &v1alpha1.Transit{}, - }, + args: argsWithEmpty, want: true, }, } @@ -72,10 +79,8 @@ func TestIsThirdPartyTransit(t *testing.T) { } } -func TestIsGatewayTceTransit(t *testing.T) { - type args struct { - transit *v1alpha1.Transit - } +func TestIsReverseTunnelTransit(t *testing.T) { + tests := []struct { name string args args @@ -84,41 +89,67 @@ func TestIsGatewayTceTransit(t *testing.T) { // TODO: Add test cases. { name: "case 0", - args: args{ - &v1alpha1.Transit{ - TransitMethod: v1alpha1.TransitMethodReverseTunnel, - }, - }, + args: argsWithReverseTunnel, want: true, }, { name: "case 1", - args: args{ - &v1alpha1.Transit{ - TransitMethod: v1alpha1.TransitMethodThirdDomain, - }, - }, + args: argsWithThirdDomain, + want: false, + }, + { + name: "case 2", + args: argsWithNil, want: false, }, + { + name: "case 3", + args: argsWithEmpty, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := IsReverseTunnelTransit(tt.args.transit); got != tt.want { + t.Errorf("IsReverseTunnelTransit() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIsTransit(t *testing.T) { + + tests := []struct { + name string + args args + want bool + }{ + // TODO: Add test cases. + { + name: "case 0", + args: argsWithReverseTunnel, + want: true, + }, + { + name: "case 1", + args: argsWithThirdDomain, + want: true, + }, { name: "case 2", - args: args{ - nil, - }, + args: argsWithNil, want: false, }, { name: "case 3", - args: args{ - &v1alpha1.Transit{}, - }, + args: argsWithEmpty, want: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := IsGatewayTceTransit(tt.args.transit); got != tt.want { - t.Errorf("IsGatewayTceTransit() = %v, want %v", got, tt.want) + if got := IsTransit(tt.args.transit); got != tt.want { + t.Errorf("IsTransit() = %v, want %v", got, tt.want) } }) } diff --git a/pkg/kusciaapi/bean/grpc_server_bean.go b/pkg/kusciaapi/bean/grpc_server_bean.go index 3148b9f5..35291b0b 100644 --- a/pkg/kusciaapi/bean/grpc_server_bean.go +++ b/pkg/kusciaapi/bean/grpc_server_bean.go @@ -16,10 +16,12 @@ package bean import ( "context" + "crypto/tls" "fmt" "net" "time" + "github.com/secretflow/kuscia/pkg/common" cmservice "github.com/secretflow/kuscia/pkg/confmanager/service" "google.golang.org/grpc" @@ -31,9 +33,10 @@ import ( "github.com/secretflow/kuscia/pkg/kusciaapi/service" "github.com/secretflow/kuscia/pkg/kusciaapi/utils" "github.com/secretflow/kuscia/pkg/utils/nlog" - "github.com/secretflow/kuscia/pkg/utils/tls" + tlsutil "github.com/secretflow/kuscia/pkg/utils/tls" "github.com/secretflow/kuscia/pkg/web/errorcode" "github.com/secretflow/kuscia/pkg/web/framework" + frameworkconfig "github.com/secretflow/kuscia/pkg/web/framework/config" "github.com/secretflow/kuscia/pkg/web/interceptor" "github.com/secretflow/kuscia/proto/api/v1alpha1/kusciaapi" ) @@ -62,7 +65,7 @@ func (s *grpcServerBean) Start(ctx context.Context, e framework.ConfBeanRegistry grpc.ConnectionTimeout(time.Duration(s.config.ConnectTimeout) * time.Second), } if s.config.TLS != nil { - serverTLSConfig, err := tls.BuildServerTLSConfig(s.config.TLS.RootCA, s.config.TLS.ServerCert, s.config.TLS.ServerKey) + serverTLSConfig, err := buildServerTLSConfig(s.config.TLS, s.config.Protocol) if err != nil { nlog.Fatalf("Failed to init server tls config: %v", err) } @@ -115,3 +118,14 @@ func (s *grpcServerBean) Start(ctx context.Context, e framework.ConfBeanRegistry func (s *grpcServerBean) ServerName() string { return "kusciaAPIGrpcServer" } + +func buildServerTLSConfig(config *frameworkconfig.TLSServerConfig, protocol common.Protocol) (*tls.Config, error) { + + if config == nil { + return nil, fmt.Errorf("tls config is empty") + } + if protocol == common.MTLS { + return tlsutil.BuildServerTLSConfig(config.RootCA, config.ServerCert, config.ServerKey) + } + return tlsutil.BuildServerTLSConfig(nil, config.ServerCert, config.ServerKey) +} diff --git a/pkg/kusciaapi/bean/http_server_bean.go b/pkg/kusciaapi/bean/http_server_bean.go index bf0d56cc..80a9dc03 100644 --- a/pkg/kusciaapi/bean/http_server_bean.go +++ b/pkg/kusciaapi/bean/http_server_bean.go @@ -477,10 +477,17 @@ func protoDecorator(e framework.ConfBeanRegistry, handler api.ProtoHandler) gin. func convertToGinConf(conf *apiconfig.KusciaAPIConfig) beans.GinBeanConfig { var tlsConfig *beans.TLSServerConfig if conf.TLS != nil { - tlsConfig = &beans.TLSServerConfig{ - CACert: conf.TLS.RootCA, - ServerCert: conf.TLS.ServerCert, - ServerKey: conf.TLS.ServerKey, + if conf.Protocol == common.MTLS { + tlsConfig = &beans.TLSServerConfig{ + CACert: conf.TLS.RootCA, + ServerCert: conf.TLS.ServerCert, + ServerKey: conf.TLS.ServerKey, + } + } else { + tlsConfig = &beans.TLSServerConfig{ + ServerCert: conf.TLS.ServerCert, + ServerKey: conf.TLS.ServerKey, + } } } return beans.GinBeanConfig{ diff --git a/pkg/kusciaapi/handler/httphandler/domaindata/create.go b/pkg/kusciaapi/handler/httphandler/domaindata/create.go index d433dabd..8c141e84 100644 --- a/pkg/kusciaapi/handler/httphandler/domaindata/create.go +++ b/pkg/kusciaapi/handler/httphandler/domaindata/create.go @@ -16,7 +16,6 @@ package domaindata import ( - "errors" "reflect" "github.com/secretflow/kuscia/pkg/kusciaapi/service" @@ -36,16 +35,6 @@ func NewCreateDomainDataHandler(domainDataService service.IDomainDataService) ap } func (h *createDomainDataHandler) Validate(context *api.BizContext, request api.ProtoRequest, errs *errorcode.Errs) { - createReq, _ := request.(*kusciaapi.CreateDomainDataRequest) - if createReq.DomainId == "" { - errs.AppendErr(errors.New("domainID should not be empty")) - } - if createReq.RelativeUri == "" { - errs.AppendErr(errors.New("relative uri should not be empty")) - } - if createReq.Type == "" { - errs.AppendErr(errors.New("type should not be empty")) - } } func (h *createDomainDataHandler) Handle(context *api.BizContext, request api.ProtoRequest) api.ProtoResponse { diff --git a/pkg/kusciaapi/handler/httphandler/domaindata/delete.go b/pkg/kusciaapi/handler/httphandler/domaindata/delete.go index 9bdb6bc0..3d0f5538 100644 --- a/pkg/kusciaapi/handler/httphandler/domaindata/delete.go +++ b/pkg/kusciaapi/handler/httphandler/domaindata/delete.go @@ -16,7 +16,6 @@ package domaindata import ( - "errors" "reflect" "github.com/secretflow/kuscia/pkg/kusciaapi/service" @@ -37,13 +36,6 @@ func NewDeleteDomainDataHandler(domainDataService service.IDomainDataService) ap } func (h *deleteDomainDataHandler) Validate(context *api.BizContext, request api.ProtoRequest, errs *errorcode.Errs) { - deleteReq, _ := request.(*kusciaapi.DeleteDomainDataRequest) - if deleteReq.DomainId == "" { - errs.AppendErr(errors.New("domain id should not be empty")) - } - if deleteReq.DomaindataId == "" { - errs.AppendErr(errors.New("domaindata id should not be empty")) - } } func (h *deleteDomainDataHandler) Handle(context *api.BizContext, request api.ProtoRequest) api.ProtoResponse { diff --git a/pkg/kusciaapi/handler/httphandler/domaindata/query.go b/pkg/kusciaapi/handler/httphandler/domaindata/query.go index b2068f58..34dd9828 100644 --- a/pkg/kusciaapi/handler/httphandler/domaindata/query.go +++ b/pkg/kusciaapi/handler/httphandler/domaindata/query.go @@ -36,17 +36,6 @@ func NewQueryDomainDataHandler(domainDataService service.IDomainDataService) api } func (h *queryDomainDataHandler) Validate(context *api.BizContext, request api.ProtoRequest, errs *errorcode.Errs) { - req, _ := request.(*kusciaapi.QueryDomainDataRequest) - if req.Data == nil { - errs.AppendErr(errors.New("request data should not be nil")) - return - } - if req.Data.DomainId == "" { - errs.AppendErr(errors.New("request domainID should not be empty")) - } - if req.Data.DomaindataId == "" { - errs.AppendErr(errors.New("request domainDataID should not be empty")) - } } func (h *queryDomainDataHandler) Handle(context *api.BizContext, request api.ProtoRequest) api.ProtoResponse { diff --git a/pkg/kusciaapi/handler/httphandler/domaindata/update.go b/pkg/kusciaapi/handler/httphandler/domaindata/update.go index 536d44fe..fae87b42 100644 --- a/pkg/kusciaapi/handler/httphandler/domaindata/update.go +++ b/pkg/kusciaapi/handler/httphandler/domaindata/update.go @@ -16,7 +16,6 @@ package domaindata import ( - "errors" "reflect" "github.com/secretflow/kuscia/pkg/kusciaapi/service" @@ -37,13 +36,6 @@ func NewUpdateDomainDataHandler(domainDataService service.IDomainDataService) ap } func (h *updateDomainDataHandler) Validate(context *api.BizContext, request api.ProtoRequest, errs *errorcode.Errs) { - updateReq, _ := request.(*kusciaapi.UpdateDomainDataRequest) - if updateReq.DomainId == "" { - errs.AppendErr(errors.New("domainID should not be empty")) - } - if updateReq.DomaindataId == "" { - errs.AppendErr(errors.New("domaindata id should not be empty")) - } } func (h *updateDomainDataHandler) Handle(context *api.BizContext, request api.ProtoRequest) api.ProtoResponse { diff --git a/pkg/kusciaapi/service/domaindata_source.go b/pkg/kusciaapi/service/domaindata_source.go index cb789dd0..ac48b370 100644 --- a/pkg/kusciaapi/service/domaindata_source.go +++ b/pkg/kusciaapi/service/domaindata_source.go @@ -79,16 +79,18 @@ func (s domainDataSourceService) CreateDomainDataSource(ctx context.Context, req } } - if request.DatasourceId == "" { - name := "" - if request.Name != nil { - name = *request.Name + if err = validateDataSourceType(request.Type); err != nil { + nlog.Errorf(errCreateDomainDataSource, err.Error()) + return &kusciaapi.CreateDomainDataSourceResponse{ + Status: utils.BuildErrorResponseStatus(errorcode.ErrRequestValidate, err.Error()), } - request.DatasourceId = common.GenDomainDataID(name) } - if err = validateDataSourceType(request.Type); err != nil { - nlog.Errorf(errCreateDomainDataSource, err.Error()) + if request.DatasourceId == "" { + request.DatasourceId = common.GenDomainDataSourceID(request.Type) + } + + if err = resources.ValidateK8sName(request.DatasourceId, "datasource_id"); err != nil { return &kusciaapi.CreateDomainDataSourceResponse{ Status: utils.BuildErrorResponseStatus(errorcode.ErrRequestValidate, err.Error()), } @@ -123,7 +125,7 @@ func (s domainDataSourceService) CreateDomainDataSource(ctx context.Context, req dataSource.Spec.Name = *request.Name } - if request.InfoKey != nil { + if request.InfoKey != nil && *request.InfoKey != "" { datasourceInfo, err := s.getDsInfoByKey(ctx, request.Type, *request.InfoKey) if err != nil { return &kusciaapi.CreateDomainDataSourceResponse{ diff --git a/pkg/kusciaapi/service/job_service.go b/pkg/kusciaapi/service/job_service.go index f306c809..67ae919f 100644 --- a/pkg/kusciaapi/service/job_service.go +++ b/pkg/kusciaapi/service/job_service.go @@ -26,12 +26,13 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + k8sresource "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/watch" "github.com/secretflow/kuscia/pkg/common" - v1alpha1 "github.com/secretflow/kuscia/pkg/crd/apis/kuscia/v1alpha1" + "github.com/secretflow/kuscia/pkg/crd/apis/kuscia/v1alpha1" kusciaclientset "github.com/secretflow/kuscia/pkg/crd/clientset/versioned" "github.com/secretflow/kuscia/pkg/kusciaapi/config" "github.com/secretflow/kuscia/pkg/kusciaapi/errorcode" @@ -42,7 +43,6 @@ import ( consts "github.com/secretflow/kuscia/pkg/web/constants" utils2 "github.com/secretflow/kuscia/pkg/web/utils" "github.com/secretflow/kuscia/proto/api/v1alpha1/kusciaapi" - k8sresource "k8s.io/apimachinery/pkg/api/resource" ) type IJobService interface { @@ -704,6 +704,7 @@ func (h *jobService) buildJobStatus(ctx context.Context, kusciaJob *v1alpha1.Kus taskID := kt.TaskID ts := &kusciaapi.TaskStatus{ TaskId: taskID, + Alias: kt.Alias, State: getTaskState(v1alpha1.TaskPending), } if phase, ok := kusciaJobStatus.TaskStatus[taskID]; ok { diff --git a/pkg/ssexporter/parse/domain.go b/pkg/ssexporter/parse/domain.go index b53886fe..17b90cb2 100644 --- a/pkg/ssexporter/parse/domain.go +++ b/pkg/ssexporter/parse/domain.go @@ -28,16 +28,25 @@ import ( ) // GetIPFromDomain get a list of IP addresses from a local domain name -func GetIPFromDomain(localDomainName string) []string { - ipAddresses, err := net.LookupIP(localDomainName) - var ipAddr []string +func GetIPFromDomain(domainName string) (ipList []string) { + if IsIP(domainName) { + ipList = append(ipList, domainName) + return + } + ipAddresses, err := net.LookupIP(domainName) if err != nil { - nlog.Error("Cannot find IP address:", err) + nlog.Warnf("Cannot find IP address: %s", err.Error()) + return } for _, ip := range ipAddresses { - ipAddr = append(ipAddr, ip.String()) + ipList = append(ipList, ip.String()) } - return ipAddr + return +} + +func IsIP(ipStr string) bool { + ip := net.ParseIP(ipStr) + return ip != nil } // GetClusterAddress get the address and port of a remote domain connected by a local domain diff --git a/pkg/transport/msq/mem_control.go b/pkg/transport/msq/mem_control.go index b1dcdde8..c14a8306 100644 --- a/pkg/transport/msq/mem_control.go +++ b/pkg/transport/msq/mem_control.go @@ -18,6 +18,7 @@ import ( "sync" "time" + "github.com/secretflow/kuscia/pkg/utils/nlog" "gitlab.com/jonas.jasas/condchan" ) @@ -45,6 +46,11 @@ func (mc *MemControl) Prefetch(byteSize uint64, timeout time.Duration) (bool, ti mc.Lock() available := mc.availableToPush(byteSize) if !available { + if byteSize > mc.totalByteSizeLimit { + nlog.Warnf("input body size(%d) max than maxByteLimit(%d), so skip it", byteSize, mc.totalByteSizeLimit) + return false, leftTimeout + } + start := time.Now() timeCh := time.After(timeout) waitTimeout := false diff --git a/pkg/transport/msq/session_queue.go b/pkg/transport/msq/session_queue.go index 77283b2c..c5213dca 100644 --- a/pkg/transport/msq/session_queue.go +++ b/pkg/transport/msq/session_queue.go @@ -21,6 +21,7 @@ import ( "gitlab.com/jonas.jasas/condchan" "github.com/secretflow/kuscia/pkg/transport/transerr" + "github.com/secretflow/kuscia/pkg/utils/nlog" ) type SessionQueue struct { @@ -169,6 +170,11 @@ func (s *SessionQueue) waitUntil(check func() bool, cond *condchan.CondChan, tim } func (s *SessionQueue) tryPush(topic string, message *Message, timeout time.Duration) *transerr.TransError { + if message.ByteSize() > s.ByteSizeLimit { + nlog.Warnf("session queue topic(%s) new message len(%d) max than total buffer size(%d)", + topic, message.ByteSize(), s.ByteSizeLimit) + return transerr.NewTransError(transerr.BufferOverflow) + } checkFn := func() bool { return s.availableToPush(message) } @@ -180,6 +186,7 @@ func (s *SessionQueue) tryPush(topic string, message *Message, timeout time.Dura return err } if !available { + nlog.Infof("not found available buffer for topic(%s), len(%d)", topic, message.ByteSize()) return transerr.NewTransError(transerr.BufferOverflow) } s.innerPush(topic, message) diff --git a/pkg/transport/msq/session_queue_test.go b/pkg/transport/msq/session_queue_test.go index 1b0920ea..44db1245 100644 --- a/pkg/transport/msq/session_queue_test.go +++ b/pkg/transport/msq/session_queue_test.go @@ -48,7 +48,7 @@ func TestSessionQueuePushNoWait(t *testing.T) { sq.Push("topic", NewMessageByStr("12"), time.Minute) } processTime := time.Now().Sub(start) - assert.True(t, processTime < time.Second*5) + assert.Less(t, processTime, time.Millisecond*5500) } func TestSessionQueuePushTimeout(t *testing.T) { @@ -60,7 +60,7 @@ func TestSessionQueuePushTimeout(t *testing.T) { err := sq.Push("topic", msg, time.Second*5) assert.NotNil(t, err) processTime := time.Now().Sub(start) - assert.True(t, processTime >= time.Second*5) + assert.Less(t, processTime, time.Millisecond*100) } func producer(sq *SessionQueue, t *testing.T) { diff --git a/pkg/transport/server/grpc/server_test.go b/pkg/transport/server/grpc/server_test.go index aefae15d..5625ae58 100644 --- a/pkg/transport/server/grpc/server_test.go +++ b/pkg/transport/server/grpc/server_test.go @@ -379,21 +379,16 @@ func TestPerformance(t *testing.T) { var popFailCount int64 = 0 var workerCount int = 5 - stop = false wg := sync.WaitGroup{} wg.Add(workerCount * 2) produceFn := func(idx int) { - for !stop { - producer(t, &pushSucceedCount, &pushFailCount) - } + producer(t, &pushSucceedCount, &pushFailCount) wg.Done() } consumerFn := func(idx int) { - for !stop { - consumer(t, &popSucceedCount, &popFailCount) - } + consumer(t, &popSucceedCount, &popFailCount) wg.Done() } @@ -401,19 +396,7 @@ func TestPerformance(t *testing.T) { go produceFn(i) go consumerFn(i) } - start := time.Now() - go func() { - for !stop { - time.Sleep(time.Second * 30) - - fmt.Printf("-----Current time: %s, Cost: %s----\n", time.Now().Format(time.RFC3339), time.Now().Sub(start)) - fmt.Printf("pushSucceedCount=%d pushFailCount=%d\n", pushSucceedCount, pushFailCount) - fmt.Printf("popSucceedCount=%d popFailCount=%d \n\n\n", popSucceedCount, popFailCount) - } - }() - time.Sleep(time.Second * 20) - stop = true wg.Wait() var leftCount int64 = 0 @@ -433,8 +416,8 @@ func TestPerformance(t *testing.T) { assert.Equal(t, pushSucceedCount, popSucceedCount+leftCount) - fmt.Printf("pushSucceedCount=%d pushFailCount=%d\n", pushSucceedCount, pushFailCount) - fmt.Printf("popSucceedCount=%d popFailCount=%d leftCount=%d totalRecvCount=%d\n", + fmt.Printf("pushSucceedCount=%d pushFailCount=%d", pushSucceedCount, pushFailCount) + nlog.Infof("popSucceedCount=%d popFailCount=%d leftCount=%d totalRecvCount=%d", popSucceedCount, popFailCount, leftCount, leftCount+popSucceedCount) } @@ -475,6 +458,17 @@ func TestLoadOverrideGrpcTransConfig(t *testing.T) { newServer := NewServer(newGrpcConfig, msq.NewSessionManager()) go newServer.Start(context.Background()) + for i := 0; i < 10; i++ { + if dial, err := grpc.Dial("127.0.0.1:9091", + grpc.WithTransportCredentials(insecure.NewCredentials())); err != nil { + // because server start in new coroutine, may not started here + time.Sleep(10 * time.Millisecond) + } else { + assert.NoError(t, dial.Close()) + break + } + } + dial, err := grpc.Dial("127.0.0.1:9091", grpc.WithTransportCredentials(insecure.NewCredentials())) assert.NoError(t, err) diff --git a/pkg/transport/server/http/server.go b/pkg/transport/server/http/server.go index 010641c3..467cb90c 100644 --- a/pkg/transport/server/http/server.go +++ b/pkg/transport/server/http/server.go @@ -79,15 +79,15 @@ func (s *Server) Start(ctx context.Context) error { select { case err := <-errChan: - nlog.Fatalf("Transport server exit with error: %v", err) + nlog.Errorf("Transport server exit with error: %v", err) return err case <-ctx.Done(): if err := sr.Shutdown(ctx); err != nil { nlog.Warnf("Transport shutdown fail:%v ", err) } - nlog.Fatal("Transport server has been canceled") + nlog.Errorf("Transport server has been canceled") + return fmt.Errorf("transport server has been canceled") } - return nil } func Run(ctx context.Context, configFile string) error { diff --git a/pkg/transport/server/http/server_test.go b/pkg/transport/server/http/server_test.go index 3814eee9..163d060f 100644 --- a/pkg/transport/server/http/server_test.go +++ b/pkg/transport/server/http/server_test.go @@ -49,12 +49,18 @@ var httpConfig *config.ServerConfig var msqConfig *msq.Config func NewRandomStr(l int) []byte { - str := "0123456789abcdefghijklmnopqrstuvwxyz" - bytes := []byte(str) + const str = "0123456789abcdefghijklmnopqrstuvwxyz" content := make([]byte, l, l) - r := rand.New(rand.NewSource(time.Now().UnixNano())) - for i := 0; i < l; i++ { - content[i] = bytes[r.Intn(len(bytes))] + for l > 0 { + c := len(str) + if l < len(str) { + c = l + } + + for i := 0; i < c; i++ { + l-- + content[l] = str[i] + } } return content } @@ -71,7 +77,7 @@ func verifyResponse(t *testing.T, req *http.Request, code transerr.ErrorCode) *c body, err := io.ReadAll(resp.Body) assert.NoError(t, err) outbound, err := server.codec.UnMarshal(body) - assert.Equal(t, outbound.Code, string(code)) + assert.Equal(t, string(code), outbound.Code) return outbound } @@ -83,7 +89,8 @@ func TestMain(m *testing.M) { msq.Init(msqConfig) server = NewServer(httpConfig, msq.NewSessionManager()) go server.Start(context.Background()) - time.Sleep(time.Second * 2) + // wait server startup + time.Sleep(time.Millisecond * 200) os.Exit(m.Run()) } @@ -117,11 +124,6 @@ func TestPeek(t *testing.T) { func TestPopWithData(t *testing.T) { server.sm = msq.NewSessionManager() - go func() { - time.Sleep(time.Second * 1) - err := server.sm.Push("session3", "node0-topic2", &msq.Message{Content: NewRandomStr(10)}, time.Second) - assert.Nil(t, err) - }() popReq, _ := http.NewRequest("POST", generatePath(pop), bytes.NewBuffer(nil)) popReq.Header.Set(codec.PtpTopicID, "topic2") @@ -131,12 +133,18 @@ func TestPopWithData(t *testing.T) { params.Add("timeout", "5") popReq.URL.RawQuery = params.Encode() + go func() { + time.Sleep(time.Millisecond * 100) + err := server.sm.Push("session3", "node0-topic2", &msq.Message{Content: NewRandomStr(10)}, time.Second) + assert.Nil(t, err) + }() + start := time.Now() outbound := verifyResponse(t, popReq, transerr.Success) assert.Equal(t, len(outbound.Payload), 10) processTime := time.Now().Sub(start) - assert.True(t, processTime >= time.Second) + assert.True(t, processTime >= time.Millisecond*50) } func TestPopTimeout(t *testing.T) { @@ -153,7 +161,8 @@ func TestPopTimeout(t *testing.T) { assert.True(t, outbound.Payload == nil) processTime := time.Now().Sub(start) - assert.True(t, processTime >= time.Second*2 && processTime <= time.Second*3) + assert.Greater(t, processTime, time.Millisecond*1500) // 1.5s + assert.Less(t, processTime, time.Millisecond*2500) // 2.5s } func TestReleaseTopic(t *testing.T) { @@ -195,12 +204,22 @@ func TestReleaseSession(t *testing.T) { } func TestTooLargeBody(t *testing.T) { - pushReq, _ := http.NewRequest("POST", generatePath(invoke), bytes.NewBuffer(NewRandomStr(int(httpConfig. - ReqBodyMaxSize+1)))) - pushReq.Header.Set(codec.PtpTopicID, "topic2") - pushReq.Header.Set(codec.PtpSessionID, "session7") - pushReq.Header.Set(codec.PtpSourceNodeID, "node0") - verifyResponse(t, pushReq, transerr.BodyTooLarge) + { + pushReq, _ := http.NewRequest("POST", generatePath(invoke), bytes.NewBuffer(NewRandomStr(int(httpConfig. + ReqBodyMaxSize+1)))) + pushReq.Header.Set(codec.PtpTopicID, "topic2") + pushReq.Header.Set(codec.PtpSessionID, "session7") + pushReq.Header.Set(codec.PtpSourceNodeID, "node0") + verifyResponse(t, pushReq, transerr.BodyTooLarge) + } + + { // session buffer is too small + pushReq, _ := http.NewRequest("POST", generatePath(invoke), bytes.NewBuffer(NewRandomStr(int(msq.DefaultMsgConfig().PerSessionByteSizeLimit+1)))) + pushReq.Header.Set(codec.PtpTopicID, "topic2") + pushReq.Header.Set(codec.PtpSessionID, "session7") + pushReq.Header.Set(codec.PtpSourceNodeID, "node0") + verifyResponse(t, pushReq, transerr.BufferOverflow) + } } func TestPushWait(t *testing.T) { @@ -223,8 +242,8 @@ func TestPushWait(t *testing.T) { start := time.Now() verifyResponse(t, pushReq, transerr.Success) processTime := time.Now().Sub(start) - assert.True(t, processTime >= time.Second && processTime <= time.Second*2) - + assert.Greater(t, processTime, time.Millisecond*500) // 0.5s + assert.Less(t, processTime, time.Millisecond*2500) // 2.5s } func TestBadRequestParam(t *testing.T) { @@ -239,12 +258,13 @@ var topicCount int = 5 var stop bool = false -func producer(t *testing.T, sendSucceedCount, sendFailCount *int64) { +func producer(t *testing.T, sendSucceedCount, sendFailCount *int64, sessionIdx, topicIdx int) { msgLength := 256 * 1024 r := rand.New(rand.NewSource(time.Now().UnixNano())) - sid := fmt.Sprintf("sessionx-%d", r.Intn(sessionCount)) - topic := fmt.Sprintf("topic-%d", r.Intn(topicCount)) - content := NewRandomStr(r.Intn(msgLength) + 256*1024) + sid := fmt.Sprintf("sessionx-%d", sessionIdx) + topic := fmt.Sprintf("topic-%d", topicIdx) + content := NewRandomStr(r.Intn(msgLength) + 128*1024) + nlog.Infof("new send content length=%d", len(content)) req, _ := http.NewRequest("POST", generatePath(invoke), bytes.NewBuffer(content)) req.Header.Set(codec.PtpTopicID, topic) @@ -265,16 +285,15 @@ func producer(t *testing.T, sendSucceedCount, sendFailCount *int64) { atomic.AddInt64(sendSucceedCount, 1) } else { if outbound != nil { - nlog.Warnf("%v", outbound) + nlog.Warnf("producer failed with: %v", outbound) } atomic.AddInt64(sendFailCount, 1) } } -func consumer(t *testing.T, popMsgCount, popFailCount *int64) { - r := rand.New(rand.NewSource(time.Now().UnixNano())) - sid := fmt.Sprintf("sessionx-%d", r.Intn(sessionCount)) - topic := fmt.Sprintf("topic-%d", r.Intn(topicCount)) +func consumer(t *testing.T, popMsgCount, popFailCount *int64, sessionIdx, topicIdx int) { + sid := fmt.Sprintf("sessionx-%d", sessionIdx) + topic := fmt.Sprintf("topic-%d", topicIdx) req, _ := http.NewRequest("POST", generatePath(pop), bytes.NewBuffer(nil)) req.Header.Set(codec.PtpTopicID, topic) @@ -307,21 +326,26 @@ func TestPerformance(t *testing.T) { var sendFailCount int64 = 0 var workerCount int = 5 - stop := false wg := sync.WaitGroup{} wg.Add(workerCount * 2) produceFn := func(idx int) { - for !stop { - producer(t, &sendSucceedCount, &sendFailCount) + for i := 0; i < sessionCount/workerCount; i++ { + for j := 0; j < topicCount; j++ { + producer(t, &sendSucceedCount, &sendFailCount, idx*workerCount+i, j) + } } + wg.Done() } consumerFn := func(idx int) { - for !stop { - consumer(t, &popMsgCount, &popFailCount) + for i := 0; i < sessionCount/workerCount; i++ { + for j := 0; j < topicCount; j++ { + consumer(t, &popMsgCount, &popFailCount, idx*workerCount+i, j) + } } + wg.Done() } @@ -329,8 +353,7 @@ func TestPerformance(t *testing.T) { go produceFn(i) go consumerFn(i) } - time.Sleep(time.Second * 20) - stop = true + wg.Wait() var leftCount int64 = 0 @@ -348,10 +371,10 @@ func TestPerformance(t *testing.T) { } } - // assert.Equal(t, sendFailCount, 0) - // assert.Equal(t, popFailCount, 0) + assert.Equal(t, int64(0), sendFailCount) + assert.Equal(t, int64(0), popFailCount) assert.Equal(t, sendSucceedCount, popMsgCount+leftCount) - fmt.Printf("sendSucceedCount=%d sendFailCount=%d\n", sendSucceedCount, sendFailCount) - fmt.Printf("popMsgCount=%d popFailCount=%d leftCount=%d totalRecvCount=%d\n", + nlog.Infof("sendSucceedCount=%d sendFailCount=%d", sendSucceedCount, sendFailCount) + nlog.Infof("popMsgCount=%d popFailCount=%d leftCount=%d totalRecvCount=%d", popMsgCount, popFailCount, leftCount, leftCount+popMsgCount) } diff --git a/pkg/utils/cgroup/cgroup_linux.go b/pkg/utils/cgroup/cgroup_linux.go new file mode 100644 index 00000000..5b68d00a --- /dev/null +++ b/pkg/utils/cgroup/cgroup_linux.go @@ -0,0 +1,303 @@ +//go:build linux +// +build linux + +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cgroup + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/containerd/cgroups/v3" + "github.com/containerd/cgroups/v3/cgroup1" + "github.com/containerd/cgroups/v3/cgroup2" + "github.com/opencontainers/runtime-spec/specs-go" + + "github.com/secretflow/kuscia/pkg/utils/nlog" + "github.com/secretflow/kuscia/pkg/utils/paths" +) + +func NewManager(conf *Config) (Manager, error) { + if conf == nil || conf.Group == "" { + return nil, fmt.Errorf("cgroup group can't be empty") + } + + mode := cgroups.Mode() + switch mode { + case cgroups.Unified, cgroups.Hybrid: + return newKCgroup2(conf) + case cgroups.Legacy: + return newKCgroup1(conf) + default: + return nil, fmt.Errorf("unsupported cgroup version: %v", mode) + } +} + +type KCgroup1 struct { + *Config +} + +func newKCgroup1(conf *Config) (*KCgroup1, error) { + m := &KCgroup1{conf} + return m, nil +} + +func (m *KCgroup1) AddCgroup() error { + resources := buildCgroup1Resources(m.CPUQuota, m.CPUPeriod, m.MemoryLimit) + cg, err := cgroup1.New(cgroup1.StaticPath(m.Group), resources) + if err != nil { + return err + } + + if m.Pid > 0 { + return cg.AddProc(m.Pid) + } + return nil +} + +func (m *KCgroup1) UpdateCgroup() error { + if err := paths.EnsurePath(filepath.Join(DefaultMountPoint, "/cpu", m.Group), false); err != nil { + return err + } + + cg, err := cgroup1.Load(cgroup1.StaticPath(m.Group)) + if err != nil { + return err + } + resources := buildCgroup1Resources(m.CPUQuota, m.CPUPeriod, m.MemoryLimit) + return cg.Update(resources) +} + +func (m *KCgroup1) DeleteCgroup() error { + cg, err := cgroup1.Load(cgroup1.StaticPath(m.Group)) + if err != nil { + return err + } + return cg.Delete() +} + +type KCgroup2 struct { + *Config +} + +func newKCgroup2(conf *Config) (*KCgroup2, error) { + m := &KCgroup2{conf} + return m, nil +} + +func (m *KCgroup2) AddCgroup() error { + resources := buildCgroup2Resources(m.CPUQuota, m.CPUPeriod, m.MemoryLimit) + cg, err := cgroup2.NewManager(DefaultMountPoint, m.Group, resources) + if err != nil { + return err + } + + if m.Pid > 0 { + return cg.AddProc(m.Pid) + } + return nil +} + +func (m *KCgroup2) UpdateCgroup() error { + if err := paths.EnsurePath(filepath.Join(DefaultMountPoint, m.Group), false); err != nil { + return err + } + + cg, err := cgroup2.Load(m.Group) + if err != nil { + return err + } + resources := buildCgroup2Resources(m.CPUQuota, m.CPUPeriod, m.MemoryLimit) + return cg.Update(resources) +} + +func (m *KCgroup2) DeleteCgroup() error { + cg, err := cgroup2.Load(m.Group) + if err != nil { + return err + } + return cg.Delete() +} + +func HasPermission() bool { + return IsCgroupExist(KusciaAppsGroup, true) +} + +func IsCgroupExist(group string, autoCreate bool) bool { + groupPath := "" + mode := cgroups.Mode() + switch mode { + case cgroups.Unified, cgroups.Hybrid: + groupPath = filepath.Join(DefaultMountPoint, group) + case cgroups.Legacy: + groupPath = filepath.Join(DefaultMountPoint, "/cpu", group) + default: + nlog.Warnf("Unsupported cgroup version: %v", mode) + return false + } + + err := paths.EnsurePath(groupPath, autoCreate) + if err != nil { + nlog.Infof("Cgroup path does not exist, %v", err) + return false + } + + return true +} + +func buildCgroup2Resources(cpuQuota *int64, cpuPeriod *uint64, memoryLimit *int64) *cgroup2.Resources { + resources := &cgroup2.Resources{} + if (cpuQuota != nil && *cpuQuota != 0) || (cpuPeriod != nil && *cpuPeriod != 0) { + resources.CPU = &cgroup2.CPU{ + Max: cgroup2.NewCPUMax(cpuQuota, cpuPeriod), + } + } + + if memoryLimit != nil && *memoryLimit != 0 { + resources.Memory = &cgroup2.Memory{ + Max: memoryLimit, + } + } + return resources +} + +func buildCgroup1Resources(cpuQuota *int64, cpuPeriod *uint64, memoryLimit *int64) *specs.LinuxResources { + resources := &specs.LinuxResources{} + if (cpuQuota != nil && *cpuQuota != 0) || (cpuPeriod != nil && *cpuPeriod != 0) { + resources.CPU = &specs.LinuxCPU{ + Quota: cpuQuota, + Period: cpuPeriod, + } + } + + if memoryLimit != nil && *memoryLimit != 0 { + resources.Memory = &specs.LinuxMemory{ + Limit: memoryLimit, + } + } + return resources +} + +func GetMemoryLimit(group string) (int64, error) { + mode := cgroups.Mode() + switch mode { + case cgroups.Unified, cgroups.Hybrid: + return parseCgroup2MemoryLimit(group) + case cgroups.Legacy: + return parseCgroup1MemoryLimit(group) + default: + return 0, fmt.Errorf("unsupported cgroup version: %v", mode) + } +} + +func parseCgroup2MemoryLimit(group string) (limit int64, err error) { + content, err := os.ReadFile(filepath.Join(group, "/memory.max")) + if err != nil { + return 0, err + } + + contentStr := strings.TrimSpace(string(content)) + if contentStr == "max" { + limit = MaxMemoryLimit + } else { + limit, err = strconv.ParseInt(strings.TrimSpace(string(content)), 10, 64) + if err != nil { + return 0, fmt.Errorf("invalid memory limit content: %s", content) + } + } + + return limit, nil +} + +func parseCgroup1MemoryLimit(group string) (int64, error) { + content, err := os.ReadFile(filepath.Join(group, "/memory/memory.limit_in_bytes")) + if err != nil { + return 0, err + } + + limit, err := strconv.ParseInt(strings.TrimSpace(string(content)), 10, 64) + if err != nil { + return 0, fmt.Errorf("invalid memory limit content: %s", content) + } + + return limit, nil +} + +func GetCPUQuotaAndPeriod(group string) (quota int64, period int64, err error) { + mode := cgroups.Mode() + switch mode { + case cgroups.Unified, cgroups.Hybrid: + return parseCgroup2CPUQuotaAndPeriod(group) + case cgroups.Legacy: + return parseCgroup1CPUQuotaAndPeriod(group) + default: + return 0, 0, fmt.Errorf("unsupported cgroup version: %v", mode) + } +} + +func parseCgroup2CPUQuotaAndPeriod(group string) (quota int64, period int64, err error) { + content, err := os.ReadFile(filepath.Join(group, "/cpu.max")) + if err != nil { + return 0, 0, err + } + parts := strings.SplitN(strings.TrimSpace(string(content)), " ", 2) + if len(parts) != 2 { + return 0, 0, fmt.Errorf("invalid cpu.max content: %s", content) + } + + if parts[0] == "max" { + quota = MaxCPUQuota + } else { + quota, err = strconv.ParseInt(parts[0], 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("invalid cpu quota content: %s", parts[0]) + } + } + + period, err = strconv.ParseInt(parts[1], 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("invalid cpu period content: %s", parts[0]) + } + + return quota, period, nil +} + +func parseCgroup1CPUQuotaAndPeriod(group string) (quota int64, period int64, err error) { + content, err := os.ReadFile(filepath.Join(group, "/cpu/cpu.cfs_quota_us")) + if err != nil { + return 0, 0, err + } + + quota, err = strconv.ParseInt(strings.TrimSpace(string(content)), 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("invalid cpu quota content: %s", content) + } + + content, err = os.ReadFile(filepath.Join(group, "/cpu/cpu.cfs_period_us")) + if err != nil { + return 0, 0, err + } + + period, err = strconv.ParseInt(strings.TrimSpace(string(content)), 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("invalid cpu period content: %s", content) + } + + return quota, period, nil +} diff --git a/pkg/utils/cgroup/cgroup_linux_test.go b/pkg/utils/cgroup/cgroup_linux_test.go new file mode 100644 index 00000000..02f7ecca --- /dev/null +++ b/pkg/utils/cgroup/cgroup_linux_test.go @@ -0,0 +1,219 @@ +//go:build linux +// +build linux + +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cgroup + +import ( + "testing" + + "github.com/containerd/cgroups/v3/cgroup2" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/assert" +) + +func newMockManager() (Manager, error) { + conf := &Config{ + Group: "kuscia.test", + Pid: 0, + CPUQuota: nil, + CPUPeriod: nil, + MemoryLimit: nil, + } + return NewManager(conf) +} + +func TestNewManager(t *testing.T) { + _, err := NewManager(nil) + assert.NotNil(t, err) +} + +func TestAddCgroup(t *testing.T) { + m, _ := newMockManager() + m.AddCgroup() +} + +func TestUpdateCgroup(t *testing.T) { + m, _ := newMockManager() + m.UpdateCgroup() +} + +func TestDeleteCgroup(t *testing.T) { + m, _ := newMockManager() + m.DeleteCgroup() +} + +func TestHasPermission(t *testing.T) { + HasPermission() +} + +func TestIsCgroupExist(t *testing.T) { + IsCgroupExist("kuscia.test", false) +} + +func TestBuildCgroup2Resources(t *testing.T) { + cpuQuota := int64(100000) + cpuPeriod := uint64(100000) + memoryLimit := int64(100000) + tests := []struct { + name string + cpuQuota *int64 + cpuPeriod *uint64 + memoryLimit *int64 + want *cgroup2.Resources + }{ + { + name: "no limits", + cpuQuota: nil, + cpuPeriod: nil, + memoryLimit: nil, + want: &cgroup2.Resources{}, + }, + { + name: "cpu limit", + cpuQuota: &cpuQuota, + cpuPeriod: &cpuPeriod, + memoryLimit: nil, + want: &cgroup2.Resources{ + CPU: &cgroup2.CPU{ + Max: cgroup2.NewCPUMax(&cpuQuota, &cpuPeriod), + }, + }, + }, + { + name: "memory limit", + cpuQuota: nil, + cpuPeriod: nil, + memoryLimit: &memoryLimit, + want: &cgroup2.Resources{ + Memory: &cgroup2.Memory{ + Max: &memoryLimit, + }, + }, + }, + { + name: "both limits", + cpuQuota: &cpuQuota, + cpuPeriod: &cpuPeriod, + memoryLimit: &memoryLimit, + want: &cgroup2.Resources{ + CPU: &cgroup2.CPU{ + Max: cgroup2.NewCPUMax(&cpuQuota, &cpuPeriod), + }, + Memory: &cgroup2.Memory{ + Max: &memoryLimit, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := buildCgroup2Resources(tt.cpuQuota, tt.cpuPeriod, tt.memoryLimit) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestBuildCgroup1Resources(t *testing.T) { + cpuQuota := int64(100000) + cpuPeriod := uint64(100000) + memoryLimit := int64(100000) + + tests := []struct { + name string + cpuQuota *int64 + cpuPeriod *uint64 + memoryLimit *int64 + expectedValue *specs.LinuxResources + }{ + { + name: "no limits", + cpuQuota: nil, + cpuPeriod: nil, + memoryLimit: nil, + expectedValue: &specs.LinuxResources{}, + }, + { + name: "cpu limit", + cpuQuota: &cpuQuota, + cpuPeriod: &cpuPeriod, + memoryLimit: nil, + expectedValue: &specs.LinuxResources{ + CPU: &specs.LinuxCPU{ + Quota: &cpuQuota, + Period: &cpuPeriod, + }, + }, + }, + { + name: "memory limit", + cpuQuota: nil, + cpuPeriod: nil, + memoryLimit: &memoryLimit, + expectedValue: &specs.LinuxResources{ + Memory: &specs.LinuxMemory{Limit: &memoryLimit}, + }, + }, + { + name: "both limits", + cpuQuota: &cpuQuota, + cpuPeriod: &cpuPeriod, + memoryLimit: &memoryLimit, + expectedValue: &specs.LinuxResources{ + CPU: &specs.LinuxCPU{ + Quota: &cpuQuota, + Period: &cpuPeriod, + }, + Memory: &specs.LinuxMemory{Limit: &memoryLimit}}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := buildCgroup1Resources(tt.cpuQuota, tt.cpuPeriod, tt.memoryLimit) + assert.Equal(t, tt.expectedValue, got) + }) + } +} + +func TestGetMemoryLimit(t *testing.T) { + limit, err := GetMemoryLimit("kuscia.test/test") + assert.Equal(t, int64(0), limit) + assert.NotNil(t, err) +} + +func TestGetCPUQuotaAndPeriod(t *testing.T) { + quota, period, err := GetCPUQuotaAndPeriod("kuscia.test/test") + assert.Equal(t, int64(0), quota) + assert.Equal(t, int64(0), period) + assert.NotNil(t, err) +} + +func TestParseCgroup2MemoryLimit(t *testing.T) { + parseCgroup2MemoryLimit(DefaultMountPoint) +} + +func TestParseCgroup1MemoryLimit(t *testing.T) { + parseCgroup1MemoryLimit(DefaultMountPoint) +} + +func TestParseCgroup2CPUQuotaAndPeriod(t *testing.T) { + parseCgroup2CPUQuotaAndPeriod(DefaultMountPoint) +} + +func TestParseCgroup1CPUQuotaAndPeriod(t *testing.T) { + parseCgroup1CPUQuotaAndPeriod(DefaultMountPoint) +} diff --git a/pkg/utils/cgroup/cgroup_unsupported.go b/pkg/utils/cgroup/cgroup_unsupported.go new file mode 100644 index 00000000..58f35ff7 --- /dev/null +++ b/pkg/utils/cgroup/cgroup_unsupported.go @@ -0,0 +1,59 @@ +//go:build !linux +// +build !linux + +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cgroup + +import ( + "fmt" +) + +type MockManager struct { + *Config +} + +func NewManager(conf *Config) (Manager, error) { + return &MockManager{conf}, nil +} + +func (m *MockManager) AddCgroup() error { + return fmt.Errorf("cgroup is not implemented in non LinuxOS") +} + +func (m *MockManager) UpdateCgroup() error { + return fmt.Errorf("cgroup is not implemented in non LinuxOS") +} + +func (m *MockManager) DeleteCgroup() error { + return fmt.Errorf("cgroup is not implemented in non LinuxOS") + +} + +func HasPermission() bool { + return false +} + +func IsCgroupExist(group string, autoCreate bool) bool { + return false +} + +func GetMemoryLimit(group string) (int64, error) { + return 0, fmt.Errorf("cgroup is not implemented in non LinuxOS") +} + +func GetCPUQuotaAndPeriod(group string) (quota int64, period int64, err error) { + return 0, 0, fmt.Errorf("cgroup is not implemented in non LinuxOS") +} diff --git a/pkg/utils/cgroup/cgroup_unsupported_test.go b/pkg/utils/cgroup/cgroup_unsupported_test.go new file mode 100644 index 00000000..f8fea564 --- /dev/null +++ b/pkg/utils/cgroup/cgroup_unsupported_test.go @@ -0,0 +1,67 @@ +//go:build !linux +// +build !linux + +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cgroup + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +var ( + cpuQuota = int64(100000) + cpuPeriod = uint64(100000) + memoryLimit = int64(100000) +) + +func newMockManager() (Manager, error) { + return NewManager(nil) +} + +func TestAddCgroup(t *testing.T) { + m, _ := newMockManager() + got := m.AddCgroup() + assert.NotNil(t, got) +} + +func TestUpdateCgroup(t *testing.T) { + m, _ := newMockManager() + got := m.UpdateCgroup() + assert.NotNil(t, got) +} + +func TestDeleteCgroup(t *testing.T) { + m, _ := newMockManager() + got := m.DeleteCgroup() + assert.NotNil(t, got) +} + +func TestHasPermission(t *testing.T) { + got := HasPermission() + assert.False(t, got) +} + +func TestIsCgroupExist(t *testing.T) { + got := IsCgroupExist("test", false) + assert.False(t, got) +} + +func TestGetMemoryLimit(t *testing.T) { + _, got := GetMemoryLimit("test") + assert.NotNil(t, got) +} diff --git a/pkg/utils/cgroup/common.go b/pkg/utils/cgroup/common.go new file mode 100644 index 00000000..a55980a8 --- /dev/null +++ b/pkg/utils/cgroup/common.go @@ -0,0 +1,42 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cgroup + +const ( + DefaultMountPoint = "/sys/fs/cgroup" + KusciaAppsGroup = "/kuscia.apps" + K8sIOGroup = "/k8s.io" +) + +const ( + // MaxMemoryLimit represents the unlimited cgroup memory.limit_in_bytes value + MaxMemoryLimit int64 = 9223372036854771712 + // MaxCPUQuota represents the unlimited cgroup cpu.cfs_quota_us value + MaxCPUQuota int64 = -1 +) + +type Config struct { + Group string + Pid uint64 + CPUQuota *int64 + CPUPeriod *uint64 + MemoryLimit *int64 +} + +type Manager interface { + AddCgroup() error + UpdateCgroup() error + DeleteCgroup() error +} diff --git a/pkg/utils/common/rand_string.go b/pkg/utils/common/rand_string.go index 82173834..a60ba796 100644 --- a/pkg/utils/common/rand_string.go +++ b/pkg/utils/common/rand_string.go @@ -30,8 +30,8 @@ func GenerateRandomBytes(l int) []byte { } // GenerateID generates a random unique id. -func GenerateID() string { - b := make([]byte, 16) +func GenerateID(len int) string { + b := make([]byte, len) rand.Read(b) return hex.EncodeToString(b) } diff --git a/pkg/utils/lock/key_locker_test.go b/pkg/utils/lock/key_locker_test.go index 38e3703a..ba5ec36b 100644 --- a/pkg/utils/lock/key_locker_test.go +++ b/pkg/utils/lock/key_locker_test.go @@ -23,7 +23,8 @@ import ( "gotest.tools/v3/assert" ) -func doTestKeyLocker(t *testing.T) { +func TestKeyLocker(t *testing.T) { + t.Parallel() kl := NewKeyLocker() totalCase := 20 @@ -55,9 +56,3 @@ func doTestKeyLocker(t *testing.T) { } assert.Equal(t, len(kl.inUse), 0) } - -func TestKeyLocker(t *testing.T) { - for round := 0; round < 10; round++ { - t.Run(fmt.Sprintf("round_%v", round), doTestKeyLocker) - } -} diff --git a/pkg/utils/process/process.go b/pkg/utils/process/process.go index fa06a83b..53257714 100644 --- a/pkg/utils/process/process.go +++ b/pkg/utils/process/process.go @@ -15,13 +15,22 @@ package process import ( + "fmt" + "os" + "strconv" "strings" - "github.com/secretflow/kuscia/pkg/utils/nlog" "github.com/shirou/gopsutil/v3/process" + + "github.com/secretflow/kuscia/pkg/utils/nlog" ) -// CheckProcessExists check whether process exists by name +const ( + oomScoreAdjMax = 1000 + oomScoreAdjMin = -1000 +) + +// CheckExists check whether process exists by name func CheckExists(processName string) bool { // currently running processes. processes, err := process.Processes() @@ -46,3 +55,24 @@ func CheckExists(processName string) bool { return isExist } + +// SetOOMScore sets the oom score for the provided pid. +func SetOOMScore(pid, score int) error { + if score > oomScoreAdjMax || score < oomScoreAdjMin { + return fmt.Errorf("invalid score %v, oom score must be between %d and %d", score, oomScoreAdjMin, oomScoreAdjMax) + } + + path := fmt.Sprintf("/proc/%d/oom_score_adj", pid) + f, err := os.OpenFile(path, os.O_WRONLY, 0) + if err != nil { + return err + } + defer f.Close() + + if _, err = f.WriteString(strconv.Itoa(score)); err != nil { + return err + } + + nlog.Infof("Set pid[%v] oom score adj to %v", pid, score) + return nil +} diff --git a/cmd/kuscia/modules/envoy_test.go b/pkg/utils/process/process_test.go similarity index 65% rename from cmd/kuscia/modules/envoy_test.go rename to pkg/utils/process/process_test.go index 4541aa0b..a60d94ca 100644 --- a/cmd/kuscia/modules/envoy_test.go +++ b/pkg/utils/process/process_test.go @@ -12,24 +12,27 @@ // See the License for the specific language governing permissions and // limitations under the License. -package modules +package process import ( - "context" "testing" - "github.com/secretflow/kuscia/cmd/kuscia/confloader" + "github.com/stretchr/testify/assert" ) -func TestRunEnvoy(t *testing.T) { - dependency := &Dependencies{ - KusciaConfig: confloader.KusciaConfig{ - RootDir: "./", - DomainID: "alice", - }, +func TestSetOOMScore(t *testing.T) { + tests := []struct { + pid int + score int + wantErr bool + }{ + {1, 1001, true}, + {1, -1001, true}, + {-10, -1000, true}, } - runCtx, cancel := context.WithCancel(context.Background()) - - RunEnvoy(runCtx, cancel, dependency) + for _, tt := range tests { + err := SetOOMScore(tt.pid, tt.score) + assert.Equal(t, tt.wantErr, err != nil) + } } diff --git a/pkg/utils/resources/common.go b/pkg/utils/resources/common.go index fe933c32..60b5f86a 100644 --- a/pkg/utils/resources/common.go +++ b/pkg/utils/resources/common.go @@ -15,8 +15,6 @@ package resources import ( - "crypto/sha256" - "encoding/hex" "errors" "fmt" "reflect" @@ -25,14 +23,13 @@ import ( corelisters "k8s.io/client-go/listers/core/v1" + k8sresource "k8s.io/apimachinery/pkg/api/resource" + "github.com/secretflow/kuscia/pkg/common" kusciaapisv1alpha1 "github.com/secretflow/kuscia/pkg/crd/apis/kuscia/v1alpha1" kuscialistersv1alpha1 "github.com/secretflow/kuscia/pkg/crd/listers/kuscia/v1alpha1" - k8sresource "k8s.io/apimachinery/pkg/api/resource" ) -const k3sRegex = `^[a-z0-9]([a-z0-9.-]{0,61}[a-z0-9])?$` - // GetMasterDomain is used to get master domain id. func GetMasterDomain(domainLister kuscialistersv1alpha1.DomainLister, domainID string) (string, error) { domain, err := domainLister.Get(domainID) @@ -100,11 +97,11 @@ func IsOuterBFIAInterConnDomain(nsLister corelisters.NamespaceLister, domainID s } // ValidateK8sName checks dns subdomain names -func ValidateK8sName(val string, feildName string) error { +func ValidateK8sName(val string, fieldName string) error { - match, _ := regexp.MatchString(k3sRegex, val) + match, _ := regexp.MatchString(common.K3sRegex, val) if !match { - errorMsg := fmt.Sprintf("Field '%s' is invalid, Invalid value: '%s': regex used for validation is '%s' ", feildName, val, k3sRegex) + errorMsg := fmt.Sprintf("Field '%s' is invalid, Invalid value: '%s': regex used for validation is '%s' ", fieldName, val, common.K3sRegex) return errors.New(errorMsg) } @@ -126,16 +123,6 @@ func IsPartnerDomain(nsLister corelisters.NamespaceLister, domainID string) bool return false } -func HashString(input string) (string, error) { - hasher := sha256.New() - _, err := hasher.Write([]byte(input)) - if err != nil { - return "", err - } - - return hex.EncodeToString(hasher.Sum(nil))[:32], nil -} - // IsEmpty will judge whether data is empty func IsEmpty(v interface{}) bool { return reflect.DeepEqual(v, reflect.Zero(reflect.TypeOf(v)).Interface()) @@ -151,25 +138,25 @@ func SplitRSC(rsc string, n int) (string, error) { if unit == k8sresource.DecimalSI { quantity.SetMilli(quantity.MilliValue() / int64(n)) return quantity.String(), nil - } else { - bytes := quantity.Value() - bytesPerPart := bytes / int64(n) - var result string - switch { - case bytesPerPart >= 1<<60: - result = fmt.Sprintf("%.0fPi", float64(bytesPerPart)/(1<<50)) - case bytesPerPart >= 1<<50: - result = fmt.Sprintf("%.0fTi", float64(bytesPerPart)/(1<<40)) - case bytesPerPart >= 1<<40: - result = fmt.Sprintf("%.0fGi", float64(bytesPerPart)/(1<<30)) - case bytesPerPart >= 1<<30: - result = fmt.Sprintf("%.0fMi", float64(bytesPerPart)/(1<<20)) - case bytesPerPart >= 1<<20: - result = fmt.Sprintf("%.0fKi", float64(bytesPerPart)/(1<<10)) - default: - quantity.Set(bytesPerPart) - result = quantity.String() - } - return result, nil } + + bytes := quantity.Value() + bytesPerPart := bytes / int64(n) + var result string + switch { + case bytesPerPart >= 1<<60: + result = fmt.Sprintf("%.0fPi", float64(bytesPerPart)/(1<<50)) + case bytesPerPart >= 1<<50: + result = fmt.Sprintf("%.0fTi", float64(bytesPerPart)/(1<<40)) + case bytesPerPart >= 1<<40: + result = fmt.Sprintf("%.0fGi", float64(bytesPerPart)/(1<<30)) + case bytesPerPart >= 1<<30: + result = fmt.Sprintf("%.0fMi", float64(bytesPerPart)/(1<<20)) + case bytesPerPart >= 1<<20: + result = fmt.Sprintf("%.0fKi", float64(bytesPerPart)/(1<<10)) + default: + quantity.Set(bytesPerPart) + result = quantity.String() + } + return result, nil } diff --git a/pkg/utils/resources/common_test.go b/pkg/utils/resources/common_test.go index d80d64c2..623a75ea 100644 --- a/pkg/utils/resources/common_test.go +++ b/pkg/utils/resources/common_test.go @@ -151,12 +151,6 @@ func TestValidateK8sName(t *testing.T) { } } -func TestHashString(t *testing.T) { - input := "11111111-11111111-11111111-11111111-11111111-11111111-11111111-11111111" - output, _ := HashString(input) - assert.Equal(t, 32, len(output)) -} - func TestIsEmpty(t *testing.T) { var limitResource corev1.ResourceList assert.Equal(t, IsEmpty(limitResource), true, "IsEmpty() function cannot judge whether the data is empty. ") diff --git a/pkg/utils/resources/service.go b/pkg/utils/resources/service.go index 5c64083e..6e57e8f5 100644 --- a/pkg/utils/resources/service.go +++ b/pkg/utils/resources/service.go @@ -16,6 +16,7 @@ package resources import ( "context" + "crypto/sha256" "encoding/json" "fmt" "strings" @@ -109,18 +110,24 @@ func UpdateServiceAnnotations(kubeClient kubernetes.Interface, service *corev1.S return retry.OnError(retry.DefaultBackoff, net.IsConnectionRefused, updateFn) } +// GenerateServiceName is used to generate service name. +// Service name generation rules: +// 1. If the first character is a number, add svc- as a prefix; +// 2. If the length of the name exceeds 63 characters, it will be truncated to 63 characters. +// 3. The final name must comply with DNS subdomain naming rules. func GenerateServiceName(prefix, portName string) string { + prefix = strings.Trim(prefix, "-") + portName = strings.Trim(portName, "-") name := fmt.Sprintf("%s-%s", prefix, portName) - if len(name) > 62 { - value, err := HashString(name) - if err != nil { - value = strings.Trim(name[:40], "-") - } + if name[0] >= '0' && name[0] <= '9' { + name = "svc-" + name + } - name = portName + "-" + value - if len(name) > 62 { - name = strings.Trim(portName[:10], "-") + "-" + value - } + if len(name) > 63 { + hash := sha256.Sum256([]byte(name)) + hashStr := fmt.Sprintf("%x", hash) + maxPrefixLen := 63 - 16 - len(portName) - 6 + name = fmt.Sprintf("svc-%s-%s-%s", prefix[:maxPrefixLen], portName, hashStr[:16]) } return name } diff --git a/pkg/utils/resources/service_test.go b/pkg/utils/resources/service_test.go index 227c7375..0b2ab363 100644 --- a/pkg/utils/resources/service_test.go +++ b/pkg/utils/resources/service_test.go @@ -16,8 +16,10 @@ package resources import ( "context" + "strings" "testing" + "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clientsetfake "k8s.io/client-go/kubernetes/fake" @@ -71,3 +73,56 @@ func TestPatchService(t *testing.T) { }) } } + +func TestGenerateServiceName(t *testing.T) { + tests := []struct { + name string + prefix string + portName string + expected string + }{ + { + name: "prefix-port is less than 63 characters", + prefix: "service-test-11111111", + portName: "domain", + expected: "service-test-11111111-domain", + }, + { + name: "prefix-port is equal to 63 characters", + prefix: "abc-012345678-012345678-012345678-012345678-012345678-01", + portName: "domain", + expected: "abc-012345678-012345678-012345678-012345678-012345678-01-domain", + }, + { + name: "prefix is greater than 63 characters", + prefix: "abc-012345678-012345678-012345678-012345678-012345678-012", + portName: "domain", + expected: "svc-abc-012345678-012345678-012345678-0-domain", + }, + { + name: "prefix is digital", + prefix: "123-456-789", + portName: "domain", + expected: "svc-123-456-789-domain", + }, + { + name: "prefix is digital and svc-prefix-port length greater than 63", + prefix: "12-012345678-012345678-012345678-012345678-012345678", + portName: "domain", + expected: "svc-12-012345678-012345678-012345678-012345678-012345678-domain", + }, + { + name: "prefix is digital and svc-prefix-port length greater than 63", + prefix: "123-012345678-012345678-012345678-012345678-012345678", + portName: "domain", + expected: "svc-123-012345678-012345678-012345678-0-domain", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := GenerateServiceName(tt.prefix, tt.portName) + assert.True(t, strings.HasPrefix(got, tt.expected)) + }) + } +} diff --git a/pkg/utils/signals/signal.go b/pkg/utils/signals/signal.go index a646752a..da123b12 100644 --- a/pkg/utils/signals/signal.go +++ b/pkg/utils/signals/signal.go @@ -41,7 +41,8 @@ func SetupSignalHandler() (stopCh <-chan struct{}) { sig := <-c nlog.Infof("Caught signal %v. Shutting down...", sig.String()) close(stop) - <-c + sig2 := <-c + nlog.Infof("Caught second signal %v. Exit directly.", sig2.String()) os.Exit(1) // second signal. Exit directly. }() diff --git a/pkg/utils/supervisor/supervisor.go b/pkg/utils/supervisor/supervisor.go index e6af36ee..ac619eb2 100644 --- a/pkg/utils/supervisor/supervisor.go +++ b/pkg/utils/supervisor/supervisor.go @@ -19,7 +19,6 @@ import ( "errors" "fmt" "math" - "os/exec" "time" "github.com/secretflow/kuscia/pkg/utils/nlog" @@ -36,6 +35,8 @@ const defaultMinRunningTimeMS = 3000 type Cmd interface { Start() error Wait() error + Pid() int + SetOOMScore() error } type Supervisor struct { @@ -117,13 +118,6 @@ func (s *Supervisor) Run(ctx context.Context, startup func(ctx context.Context) } } -func getCmdProcessID(c Cmd) int { - if cmd, ok := c.(*exec.Cmd); ok && cmd != nil && cmd.Process != nil { - return cmd.Process.Pid - } - return 0 -} - func (s *Supervisor) runProcess(ctx context.Context, cmd Cmd) error { stime := time.Now() if cmd == nil { @@ -131,18 +125,22 @@ func (s *Supervisor) runProcess(ctx context.Context, cmd Cmd) error { } if err := cmd.Start(); err != nil { - return fmt.Errorf("start process(%d) failed with %v", getCmdProcessID(cmd), err) + return fmt.Errorf("start process(%d) failed with %v", cmd.Pid(), err) + } + + if err := cmd.SetOOMScore(); err != nil { + nlog.Warnf("Set process(%d) oom_score_adj failed, %v, skip setting it", cmd.Pid(), err) } err := cmd.Wait() if err != nil { // process exit failed - nlog.Warnf("Process(%d) exit with error: %v", getCmdProcessID(cmd), err) + nlog.Warnf("Process(%d) exit with error: %v", cmd.Pid(), err) } else { - nlog.Infof("Process(%d) exit normally", getCmdProcessID(cmd)) + nlog.Infof("Process(%d) exit normally", cmd.Pid()) } if dt := time.Since(stime); dt.Milliseconds() <= int64(s.minRunningTimeMS) { - tmerr := fmt.Sprintf("process(%d) only existed %d ms, less than %d ms", getCmdProcessID(cmd), dt.Milliseconds(), s.minRunningTimeMS) + tmerr := fmt.Sprintf("process(%d) only existed %d ms, less than %d ms", cmd.Pid(), dt.Milliseconds(), s.minRunningTimeMS) if err != nil { return fmt.Errorf("%s, with error: %v", tmerr, err) } diff --git a/pkg/utils/supervisor/supervisor_test.go b/pkg/utils/supervisor/supervisor_test.go index 3a2298e9..11e89411 100644 --- a/pkg/utils/supervisor/supervisor_test.go +++ b/pkg/utils/supervisor/supervisor_test.go @@ -17,17 +17,21 @@ package supervisor import ( "context" "errors" + "sync" "testing" "time" "github.com/stretchr/testify/assert" + "gitlab.com/jonas.jasas/condchan" "github.com/secretflow/kuscia/pkg/utils/nlog" ) type FackCmd struct { - startMock func() error - waitMock func() error + startMock func() error + waitMock func() error + pidMock func() int + setOOMScoreMock func() error } func (fc *FackCmd) Start() error { @@ -46,8 +50,23 @@ func (fc *FackCmd) Wait() error { return nil } -func newMockCmd(startMock func() error, waitMock func() error) Cmd { +func (fc *FackCmd) Pid() int { + nlog.Info("fack cmd.pid") + if fc.pidMock != nil { + return fc.pidMock() + } + return 0 +} + +func (fc *FackCmd) SetOOMScore() error { + nlog.Info("fack cmd.setOOMScore") + if fc.setOOMScoreMock != nil { + return fc.setOOMScoreMock() + } + return nil +} +func newMockCmd(startMock func() error, waitMock func() error) Cmd { fack := FackCmd{ startMock: startMock, waitMock: waitMock, @@ -190,18 +209,31 @@ func TestSupervisorRun_CancelContext(t *testing.T) { sp.minRunningTimeMS = 100 ctx, cancel := context.WithCancel(context.Background()) + pv := condchan.New(&sync.Mutex{}) + count := 0 go func() { - time.Sleep(50 * time.Millisecond) + pv.L.Lock() + pv.Wait() + pv.L.Unlock() + cancel() + nlog.Infof("cancel the context") }() + // make sure pv.wait had called + time.Sleep(20 * time.Millisecond) + err := sp.Run(ctx, func(ctx context.Context) Cmd { return newMockCmd(nil, func() error { + pv.L.Lock() count++ + pv.Signal() + pv.L.Unlock() + select { - case <-time.After(300 * time.Millisecond): + case <-time.After(3000 * time.Millisecond): case <-ctx.Done(): } diff --git a/proto/api/v1alpha1/kusciaapi/job.pb.go b/proto/api/v1alpha1/kusciaapi/job.pb.go index 23ea8172..d5636ed2 100644 --- a/proto/api/v1alpha1/kusciaapi/job.pb.go +++ b/proto/api/v1alpha1/kusciaapi/job.pb.go @@ -2107,6 +2107,7 @@ type TaskStatus struct { StartTime string `protobuf:"bytes,5,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` EndTime string `protobuf:"bytes,6,opt,name=end_time,json=endTime,proto3" json:"end_time,omitempty"` Parties []*PartyStatus `protobuf:"bytes,7,rep,name=parties,proto3" json:"parties,omitempty"` + Alias string `protobuf:"bytes,8,opt,name=alias,proto3" json:"alias,omitempty"` } func (x *TaskStatus) Reset() { @@ -2190,6 +2191,13 @@ func (x *TaskStatus) GetParties() []*PartyStatus { return nil } +func (x *TaskStatus) GetAlias() string { + if x != nil { + return x.Alias + } + return "" +} + type PartyStatus struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -3063,7 +3071,7 @@ var file_kuscia_proto_api_v1alpha1_kusciaapi_job_proto_rawDesc = []byte{ 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x49, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x73, - 0x74, 0x61, 0x74, 0x65, 0x22, 0xfb, 0x01, 0x0a, 0x0a, 0x54, 0x61, 0x73, 0x6b, 0x53, 0x74, 0x61, + 0x74, 0x61, 0x74, 0x65, 0x22, 0x91, 0x02, 0x0a, 0x0a, 0x54, 0x61, 0x73, 0x6b, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x17, 0x0a, 0x07, 0x74, 0x61, 0x73, 0x6b, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x74, 0x61, 0x73, 0x6b, 0x49, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x73, 0x74, 0x61, @@ -3079,200 +3087,201 @@ var file_kuscia_proto_api_v1alpha1_kusciaapi_job_proto_rawDesc = []byte{ 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x50, 0x61, 0x72, 0x74, 0x79, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x07, 0x70, 0x61, 0x72, 0x74, 0x69, - 0x65, 0x73, 0x22, 0xae, 0x01, 0x0a, 0x0b, 0x50, 0x61, 0x72, 0x74, 0x79, 0x53, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x5f, 0x69, 0x64, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x49, 0x64, 0x12, - 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, - 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x65, 0x72, 0x72, 0x5f, 0x6d, 0x73, 0x67, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x65, 0x72, 0x72, 0x4d, 0x73, 0x67, 0x12, 0x53, - 0x0a, 0x09, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x35, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, - 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x4a, 0x6f, 0x62, 0x50, 0x61, 0x72, 0x74, 0x79, - 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x52, 0x09, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, - 0x6e, 0x74, 0x73, 0x22, 0x77, 0x0a, 0x1a, 0x42, 0x61, 0x74, 0x63, 0x68, 0x51, 0x75, 0x65, 0x72, - 0x79, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x12, 0x40, 0x0a, 0x06, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x0b, 0x32, 0x28, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x48, 0x65, 0x61, 0x64, 0x65, 0x72, 0x52, 0x06, 0x68, 0x65, 0x61, - 0x64, 0x65, 0x72, 0x12, 0x17, 0x0a, 0x07, 0x6a, 0x6f, 0x62, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x02, - 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x6a, 0x6f, 0x62, 0x49, 0x64, 0x73, 0x22, 0xb2, 0x01, 0x0a, - 0x1b, 0x42, 0x61, 0x74, 0x63, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4a, 0x6f, 0x62, 0x53, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x39, 0x0a, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6b, - 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, - 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x58, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x44, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, - 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x42, 0x61, 0x74, 0x63, - 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x44, 0x61, 0x74, 0x61, 0x52, 0x04, 0x64, 0x61, 0x74, - 0x61, 0x22, 0x65, 0x0a, 0x1f, 0x42, 0x61, 0x74, 0x63, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4a, - 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x44, 0x61, 0x74, 0x61, 0x12, 0x42, 0x0a, 0x04, 0x6a, 0x6f, 0x62, 0x73, 0x18, 0x01, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, - 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x52, 0x04, 0x6a, 0x6f, 0x62, 0x73, 0x22, 0x9e, 0x01, 0x0a, 0x11, 0x4a, 0x6f, 0x62, - 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x39, - 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, - 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, - 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, - 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x4e, 0x0a, 0x04, 0x64, 0x61, 0x74, - 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x3a, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, + 0x65, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x61, 0x6c, 0x69, 0x61, 0x73, 0x18, 0x08, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x05, 0x61, 0x6c, 0x69, 0x61, 0x73, 0x22, 0xae, 0x01, 0x0a, 0x0b, 0x50, 0x61, 0x72, + 0x74, 0x79, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x64, 0x6f, 0x6d, 0x61, + 0x69, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x64, 0x6f, 0x6d, + 0x61, 0x69, 0x6e, 0x49, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x65, + 0x72, 0x72, 0x5f, 0x6d, 0x73, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x65, 0x72, + 0x72, 0x4d, 0x73, 0x67, 0x12, 0x53, 0x0a, 0x09, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, + 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x35, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x4a, 0x6f, - 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x44, - 0x61, 0x74, 0x61, 0x52, 0x04, 0x64, 0x61, 0x74, 0x61, 0x22, 0x7c, 0x0a, 0x15, 0x4a, 0x6f, 0x62, - 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x44, 0x61, - 0x74, 0x61, 0x12, 0x15, 0x0a, 0x06, 0x6a, 0x6f, 0x62, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x49, 0x64, 0x12, 0x4c, 0x0a, 0x06, 0x73, 0x74, 0x61, - 0x74, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x34, 0x2e, 0x6b, 0x75, 0x73, 0x63, - 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, - 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, - 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x44, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x52, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x70, 0x0a, 0x09, 0x4a, 0x6f, 0x62, 0x53, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x12, 0x15, 0x0a, 0x06, 0x6a, 0x6f, 0x62, 0x5f, 0x69, 0x64, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x49, 0x64, 0x12, 0x4c, 0x0a, 0x06, 0x73, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x34, 0x2e, 0x6b, 0x75, + 0x62, 0x50, 0x61, 0x72, 0x74, 0x79, 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x52, 0x09, + 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x73, 0x22, 0x77, 0x0a, 0x1a, 0x42, 0x61, 0x74, + 0x63, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x40, 0x0a, 0x06, 0x68, 0x65, 0x61, 0x64, 0x65, + 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, + 0x68, 0x61, 0x31, 0x2e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x48, 0x65, 0x61, 0x64, 0x65, + 0x72, 0x52, 0x06, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x17, 0x0a, 0x07, 0x6a, 0x6f, 0x62, + 0x5f, 0x69, 0x64, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x6a, 0x6f, 0x62, 0x49, + 0x64, 0x73, 0x22, 0xb2, 0x01, 0x0a, 0x1b, 0x42, 0x61, 0x74, 0x63, 0x68, 0x51, 0x75, 0x65, 0x72, + 0x79, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x12, 0x39, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x53, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x58, 0x0a, + 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x44, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, - 0x69, 0x2e, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x44, 0x65, 0x74, 0x61, 0x69, - 0x6c, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x7c, 0x0a, 0x0f, 0x57, 0x61, 0x74, - 0x63, 0x68, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x40, 0x0a, 0x06, - 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x6b, - 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, - 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x48, 0x65, 0x61, 0x64, 0x65, 0x72, 0x52, 0x06, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x27, - 0x0a, 0x0f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x5f, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, - 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0e, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, - 0x53, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x73, 0x22, 0xa3, 0x01, 0x0a, 0x15, 0x57, 0x61, 0x74, 0x63, - 0x68, 0x4a, 0x6f, 0x62, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x12, 0x42, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x2e, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, + 0x69, 0x2e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4a, 0x6f, 0x62, 0x53, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x44, 0x61, 0x74, + 0x61, 0x52, 0x04, 0x64, 0x61, 0x74, 0x61, 0x22, 0x65, 0x0a, 0x1f, 0x42, 0x61, 0x74, 0x63, 0x68, + 0x51, 0x75, 0x65, 0x72, 0x79, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, + 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x44, 0x61, 0x74, 0x61, 0x12, 0x42, 0x0a, 0x04, 0x6a, 0x6f, + 0x62, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, + 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, + 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x4a, + 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x04, 0x6a, 0x6f, 0x62, 0x73, 0x22, 0x9e, + 0x01, 0x0a, 0x11, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x39, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, + 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, + 0x4e, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x3a, 0x2e, + 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, + 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, + 0x61, 0x70, 0x69, 0x2e, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x44, 0x61, 0x74, 0x61, 0x52, 0x04, 0x64, 0x61, 0x74, 0x61, 0x22, + 0x7c, 0x0a, 0x15, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x44, 0x61, 0x74, 0x61, 0x12, 0x15, 0x0a, 0x06, 0x6a, 0x6f, 0x62, 0x5f, + 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x49, 0x64, 0x12, + 0x4c, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x34, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, - 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, - 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x46, 0x0a, 0x06, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, + 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x44, + 0x65, 0x74, 0x61, 0x69, 0x6c, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x70, 0x0a, + 0x09, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x15, 0x0a, 0x06, 0x6a, 0x6f, + 0x62, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x49, + 0x64, 0x12, 0x4c, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x34, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, + 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x44, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, + 0x7c, 0x0a, 0x0f, 0x57, 0x61, 0x74, 0x63, 0x68, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x12, 0x40, 0x0a, 0x06, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x48, 0x65, 0x61, 0x64, 0x65, 0x72, 0x52, 0x06, 0x68, 0x65, + 0x61, 0x64, 0x65, 0x72, 0x12, 0x27, 0x0a, 0x0f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x5f, + 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0e, 0x74, + 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x53, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x73, 0x22, 0xa3, 0x01, + 0x0a, 0x15, 0x57, 0x61, 0x74, 0x63, 0x68, 0x4a, 0x6f, 0x62, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x42, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2e, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, - 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x4a, 0x6f, 0x62, 0x53, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x22, 0x61, 0x0a, - 0x10, 0x4a, 0x6f, 0x62, 0x50, 0x61, 0x72, 0x74, 0x79, 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, - 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x70, 0x6f, 0x72, 0x74, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x72, 0x74, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x14, - 0x0a, 0x05, 0x73, 0x63, 0x6f, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x73, - 0x63, 0x6f, 0x70, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, - 0x2a, 0x91, 0x01, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x6e, - 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x00, 0x12, 0x0b, 0x0a, 0x07, 0x50, 0x65, 0x6e, 0x64, 0x69, - 0x6e, 0x67, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x52, 0x75, 0x6e, 0x6e, 0x69, 0x6e, 0x67, 0x10, - 0x02, 0x12, 0x0d, 0x0a, 0x09, 0x53, 0x75, 0x63, 0x63, 0x65, 0x65, 0x64, 0x65, 0x64, 0x10, 0x03, - 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x61, 0x69, 0x6c, 0x65, 0x64, 0x10, 0x04, 0x12, 0x14, 0x0a, 0x10, - 0x41, 0x77, 0x61, 0x69, 0x74, 0x69, 0x6e, 0x67, 0x41, 0x70, 0x70, 0x72, 0x6f, 0x76, 0x61, 0x6c, - 0x10, 0x05, 0x12, 0x12, 0x0a, 0x0e, 0x41, 0x70, 0x70, 0x72, 0x6f, 0x76, 0x61, 0x6c, 0x52, 0x65, - 0x6a, 0x65, 0x63, 0x74, 0x10, 0x06, 0x12, 0x0d, 0x0a, 0x09, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, - 0x6c, 0x65, 0x64, 0x10, 0x07, 0x12, 0x0d, 0x0a, 0x09, 0x53, 0x75, 0x73, 0x70, 0x65, 0x6e, 0x64, - 0x65, 0x64, 0x10, 0x08, 0x2a, 0x61, 0x0a, 0x0d, 0x41, 0x70, 0x70, 0x72, 0x6f, 0x76, 0x65, 0x52, - 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x1a, 0x0a, 0x16, 0x41, 0x50, 0x50, 0x52, 0x4f, 0x56, 0x45, - 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, - 0x00, 0x12, 0x19, 0x0a, 0x15, 0x41, 0x50, 0x50, 0x52, 0x4f, 0x56, 0x45, 0x5f, 0x52, 0x45, 0x53, - 0x55, 0x4c, 0x54, 0x5f, 0x41, 0x43, 0x43, 0x45, 0x50, 0x54, 0x10, 0x01, 0x12, 0x19, 0x0a, 0x15, - 0x41, 0x50, 0x50, 0x52, 0x4f, 0x56, 0x45, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x52, - 0x45, 0x4a, 0x45, 0x43, 0x54, 0x10, 0x02, 0x2a, 0x4b, 0x0a, 0x09, 0x45, 0x76, 0x65, 0x6e, 0x74, - 0x54, 0x79, 0x70, 0x65, 0x12, 0x09, 0x0a, 0x05, 0x41, 0x44, 0x44, 0x45, 0x44, 0x10, 0x00, 0x12, - 0x0c, 0x0a, 0x08, 0x4d, 0x4f, 0x44, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x01, 0x12, 0x0b, 0x0a, - 0x07, 0x44, 0x45, 0x4c, 0x45, 0x54, 0x45, 0x44, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x45, 0x52, - 0x52, 0x4f, 0x52, 0x10, 0x03, 0x12, 0x0d, 0x0a, 0x09, 0x48, 0x45, 0x41, 0x52, 0x54, 0x42, 0x45, - 0x41, 0x54, 0x10, 0x04, 0x32, 0x87, 0x0a, 0x0a, 0x0a, 0x4a, 0x6f, 0x62, 0x53, 0x65, 0x72, 0x76, - 0x69, 0x63, 0x65, 0x12, 0x7a, 0x0a, 0x09, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x4a, 0x6f, 0x62, - 0x12, 0x35, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, - 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, - 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x4a, 0x6f, 0x62, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x36, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, - 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, - 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x43, 0x72, - 0x65, 0x61, 0x74, 0x65, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, - 0x77, 0x0a, 0x08, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4a, 0x6f, 0x62, 0x12, 0x34, 0x2e, 0x6b, 0x75, + 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x45, 0x76, 0x65, 0x6e, + 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x46, 0x0a, 0x06, 0x6f, + 0x62, 0x6a, 0x65, 0x63, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, - 0x69, 0x2e, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x1a, 0x35, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x69, 0x2e, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x6f, 0x62, 0x6a, + 0x65, 0x63, 0x74, 0x22, 0x61, 0x0a, 0x10, 0x4a, 0x6f, 0x62, 0x50, 0x61, 0x72, 0x74, 0x79, 0x45, + 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x70, 0x6f, 0x72, 0x74, 0x5f, + 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x6f, 0x72, 0x74, + 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x63, 0x6f, 0x70, 0x65, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x05, 0x73, 0x63, 0x6f, 0x70, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x65, 0x6e, + 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x65, 0x6e, + 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x2a, 0x91, 0x01, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, + 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x00, 0x12, 0x0b, 0x0a, + 0x07, 0x50, 0x65, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x52, 0x75, + 0x6e, 0x6e, 0x69, 0x6e, 0x67, 0x10, 0x02, 0x12, 0x0d, 0x0a, 0x09, 0x53, 0x75, 0x63, 0x63, 0x65, + 0x65, 0x64, 0x65, 0x64, 0x10, 0x03, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x61, 0x69, 0x6c, 0x65, 0x64, + 0x10, 0x04, 0x12, 0x14, 0x0a, 0x10, 0x41, 0x77, 0x61, 0x69, 0x74, 0x69, 0x6e, 0x67, 0x41, 0x70, + 0x70, 0x72, 0x6f, 0x76, 0x61, 0x6c, 0x10, 0x05, 0x12, 0x12, 0x0a, 0x0e, 0x41, 0x70, 0x70, 0x72, + 0x6f, 0x76, 0x61, 0x6c, 0x52, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x10, 0x06, 0x12, 0x0d, 0x0a, 0x09, + 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x6c, 0x65, 0x64, 0x10, 0x07, 0x12, 0x0d, 0x0a, 0x09, 0x53, + 0x75, 0x73, 0x70, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x10, 0x08, 0x2a, 0x61, 0x0a, 0x0d, 0x41, 0x70, + 0x70, 0x72, 0x6f, 0x76, 0x65, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x1a, 0x0a, 0x16, 0x41, + 0x50, 0x50, 0x52, 0x4f, 0x56, 0x45, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x55, 0x4e, + 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x19, 0x0a, 0x15, 0x41, 0x50, 0x50, 0x52, 0x4f, + 0x56, 0x45, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x41, 0x43, 0x43, 0x45, 0x50, 0x54, + 0x10, 0x01, 0x12, 0x19, 0x0a, 0x15, 0x41, 0x50, 0x50, 0x52, 0x4f, 0x56, 0x45, 0x5f, 0x52, 0x45, + 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x52, 0x45, 0x4a, 0x45, 0x43, 0x54, 0x10, 0x02, 0x2a, 0x4b, 0x0a, + 0x09, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x09, 0x0a, 0x05, 0x41, 0x44, + 0x44, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x4d, 0x4f, 0x44, 0x49, 0x46, 0x49, 0x45, + 0x44, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x44, 0x45, 0x4c, 0x45, 0x54, 0x45, 0x44, 0x10, 0x02, + 0x12, 0x09, 0x0a, 0x05, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x03, 0x12, 0x0d, 0x0a, 0x09, 0x48, + 0x45, 0x41, 0x52, 0x54, 0x42, 0x45, 0x41, 0x54, 0x10, 0x04, 0x32, 0x87, 0x0a, 0x0a, 0x0a, 0x4a, + 0x6f, 0x62, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x7a, 0x0a, 0x09, 0x43, 0x72, 0x65, + 0x61, 0x74, 0x65, 0x4a, 0x6f, 0x62, 0x12, 0x35, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, + 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x43, 0x72, 0x65, + 0x61, 0x74, 0x65, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x36, 0x2e, + 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, + 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, + 0x61, 0x70, 0x69, 0x2e, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x77, 0x0a, 0x08, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4a, 0x6f, + 0x62, 0x12, 0x34, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4a, 0x6f, 0x62, - 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x98, 0x01, 0x0a, 0x13, 0x42, 0x61, 0x74, - 0x63, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x12, 0x3f, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, - 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, - 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x51, 0x75, 0x65, 0x72, - 0x79, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x1a, 0x40, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, - 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x42, 0x61, 0x74, 0x63, 0x68, 0x51, 0x75, 0x65, - 0x72, 0x79, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, - 0x6e, 0x73, 0x65, 0x12, 0x74, 0x0a, 0x07, 0x53, 0x74, 0x6f, 0x70, 0x4a, 0x6f, 0x62, 0x12, 0x33, - 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, - 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, - 0x61, 0x61, 0x70, 0x69, 0x2e, 0x53, 0x74, 0x6f, 0x70, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x1a, 0x34, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, - 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x53, 0x74, 0x6f, 0x70, 0x4a, 0x6f, - 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x7d, 0x0a, 0x0a, 0x52, 0x65, 0x73, - 0x74, 0x61, 0x72, 0x74, 0x4a, 0x6f, 0x62, 0x12, 0x36, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x35, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, - 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x52, 0x65, - 0x73, 0x74, 0x61, 0x72, 0x74, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, - 0x37, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, - 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, - 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x52, 0x65, 0x73, 0x74, 0x61, 0x72, 0x74, 0x4a, 0x6f, 0x62, - 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x7d, 0x0a, 0x0a, 0x53, 0x75, 0x73, 0x70, - 0x65, 0x6e, 0x64, 0x4a, 0x6f, 0x62, 0x12, 0x36, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, + 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x51, 0x75, + 0x65, 0x72, 0x79, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x98, + 0x01, 0x0a, 0x13, 0x42, 0x61, 0x74, 0x63, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4a, 0x6f, 0x62, + 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3f, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, - 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x53, 0x75, 0x73, - 0x70, 0x65, 0x6e, 0x64, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x37, - 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, - 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, - 0x61, 0x61, 0x70, 0x69, 0x2e, 0x53, 0x75, 0x73, 0x70, 0x65, 0x6e, 0x64, 0x4a, 0x6f, 0x62, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x7a, 0x0a, 0x09, 0x43, 0x61, 0x6e, 0x63, 0x65, - 0x6c, 0x4a, 0x6f, 0x62, 0x12, 0x35, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, - 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x43, 0x61, 0x6e, 0x63, 0x65, - 0x6c, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x36, 0x2e, 0x6b, 0x75, - 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, - 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, - 0x69, 0x2e, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, - 0x6e, 0x73, 0x65, 0x12, 0x7a, 0x0a, 0x09, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x4a, 0x6f, 0x62, - 0x12, 0x35, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, - 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, - 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x4a, 0x6f, 0x62, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x36, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, + 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x42, 0x61, 0x74, + 0x63, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x40, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, - 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x44, 0x65, - 0x6c, 0x65, 0x74, 0x65, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, - 0x7e, 0x0a, 0x08, 0x57, 0x61, 0x74, 0x63, 0x68, 0x4a, 0x6f, 0x62, 0x12, 0x34, 0x2e, 0x6b, 0x75, - 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, - 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, - 0x69, 0x2e, 0x57, 0x61, 0x74, 0x63, 0x68, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x1a, 0x3a, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, - 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x57, 0x61, 0x74, 0x63, 0x68, 0x4a, 0x6f, 0x62, - 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x30, 0x01, 0x12, - 0x7d, 0x0a, 0x0a, 0x41, 0x70, 0x70, 0x72, 0x6f, 0x76, 0x65, 0x4a, 0x6f, 0x62, 0x12, 0x36, 0x2e, + 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x42, 0x61, + 0x74, 0x63, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x74, 0x0a, 0x07, 0x53, 0x74, 0x6f, + 0x70, 0x4a, 0x6f, 0x62, 0x12, 0x33, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, + 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x53, 0x74, 0x6f, 0x70, 0x4a, + 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x34, 0x2e, 0x6b, 0x75, 0x73, 0x63, + 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, + 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, + 0x53, 0x74, 0x6f, 0x70, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, + 0x7d, 0x0a, 0x0a, 0x52, 0x65, 0x73, 0x74, 0x61, 0x72, 0x74, 0x4a, 0x6f, 0x62, 0x12, 0x36, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, - 0x61, 0x70, 0x69, 0x2e, 0x41, 0x70, 0x70, 0x72, 0x6f, 0x76, 0x65, 0x4a, 0x6f, 0x62, 0x52, 0x65, + 0x61, 0x70, 0x69, 0x2e, 0x52, 0x65, 0x73, 0x74, 0x61, 0x72, 0x74, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x37, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, - 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x41, 0x70, 0x70, 0x72, - 0x6f, 0x76, 0x65, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x42, 0x5e, - 0x0a, 0x21, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x65, 0x63, 0x72, 0x65, 0x74, 0x66, 0x6c, 0x6f, 0x77, + 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x52, 0x65, 0x73, 0x74, + 0x61, 0x72, 0x74, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x7d, + 0x0a, 0x0a, 0x53, 0x75, 0x73, 0x70, 0x65, 0x6e, 0x64, 0x4a, 0x6f, 0x62, 0x12, 0x36, 0x2e, 0x6b, + 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, + 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, + 0x70, 0x69, 0x2e, 0x53, 0x75, 0x73, 0x70, 0x65, 0x6e, 0x64, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x1a, 0x37, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, + 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x53, 0x75, 0x73, 0x70, 0x65, + 0x6e, 0x64, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x7a, 0x0a, + 0x09, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x4a, 0x6f, 0x62, 0x12, 0x35, 0x2e, 0x6b, 0x75, 0x73, + 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, + 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, + 0x2e, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x1a, 0x36, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, + 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x4a, 0x6f, + 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x7a, 0x0a, 0x09, 0x44, 0x65, 0x6c, + 0x65, 0x74, 0x65, 0x4a, 0x6f, 0x62, 0x12, 0x35, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, + 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x44, 0x65, 0x6c, + 0x65, 0x74, 0x65, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x36, 0x2e, + 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, - 0x61, 0x70, 0x69, 0x5a, 0x39, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, - 0x73, 0x65, 0x63, 0x72, 0x65, 0x74, 0x66, 0x6c, 0x6f, 0x77, 0x2f, 0x6b, 0x75, 0x73, 0x63, 0x69, - 0x61, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x61, 0x70, 0x69, 0x2f, 0x76, 0x31, 0x61, 0x6c, - 0x70, 0x68, 0x61, 0x31, 0x2f, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x62, 0x06, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x61, 0x70, 0x69, 0x2e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x7e, 0x0a, 0x08, 0x57, 0x61, 0x74, 0x63, 0x68, 0x4a, 0x6f, + 0x62, 0x12, 0x34, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, + 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x57, 0x61, 0x74, 0x63, 0x68, 0x4a, 0x6f, 0x62, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x3a, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, + 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x57, 0x61, + 0x74, 0x63, 0x68, 0x4a, 0x6f, 0x62, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x30, 0x01, 0x12, 0x7d, 0x0a, 0x0a, 0x41, 0x70, 0x70, 0x72, 0x6f, 0x76, 0x65, + 0x4a, 0x6f, 0x62, 0x12, 0x36, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, + 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x2e, 0x41, 0x70, 0x70, 0x72, 0x6f, 0x76, + 0x65, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x37, 0x2e, 0x6b, 0x75, + 0x73, 0x63, 0x69, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, + 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, + 0x69, 0x2e, 0x41, 0x70, 0x70, 0x72, 0x6f, 0x76, 0x65, 0x4a, 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x42, 0x5e, 0x0a, 0x21, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x65, 0x63, 0x72, + 0x65, 0x74, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, + 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x61, 0x70, 0x69, 0x5a, 0x39, 0x67, 0x69, 0x74, 0x68, 0x75, + 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, 0x65, 0x63, 0x72, 0x65, 0x74, 0x66, 0x6c, 0x6f, 0x77, + 0x2f, 0x6b, 0x75, 0x73, 0x63, 0x69, 0x61, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x61, 0x70, + 0x69, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2f, 0x6b, 0x75, 0x73, 0x63, 0x69, + 0x61, 0x61, 0x70, 0x69, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/proto/api/v1alpha1/kusciaapi/job.proto b/proto/api/v1alpha1/kusciaapi/job.proto index 8145cc26..8ffa6b55 100644 --- a/proto/api/v1alpha1/kusciaapi/job.proto +++ b/proto/api/v1alpha1/kusciaapi/job.proto @@ -229,6 +229,7 @@ message TaskStatus { string start_time = 5; string end_time = 6; repeated PartyStatus parties = 7; + string alias = 8; } message PartyStatus { diff --git a/scripts/deploy/cgroup_pre_detect.sh b/scripts/deploy/cgroup_pre_detect.sh new file mode 100755 index 00000000..638afcfe --- /dev/null +++ b/scripts/deploy/cgroup_pre_detect.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Copyright 2024 Ant Group Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +if [ -f /sys/fs/cgroup/cgroup.controllers ]; then + # move the processes from the root group to the /init group, + # otherwise writing subtree_control fails with EBUSY. + mkdir -p /sys/fs/cgroup/init + xargs -rn1 < /sys/fs/cgroup/cgroup.procs > /sys/fs/cgroup/init/cgroup.procs || : + # enable controllers + sed -e 's/ / +/g' -e 's/^/+/' <"/sys/fs/cgroup/cgroup.controllers" >"/sys/fs/cgroup/cgroup.subtree_control" +fi diff --git a/scripts/deploy/create_reverse_tunnel_test_cluster.sh b/scripts/deploy/create_reverse_tunnel_test_cluster.sh new file mode 100644 index 00000000..01eed0ba --- /dev/null +++ b/scripts/deploy/create_reverse_tunnel_test_cluster.sh @@ -0,0 +1,196 @@ +#!/bin/bash +# +# Copyright 2024 Ant Group Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +function docker_get_ctrs() { + local prefix=$1 + docker ps | grep $prefix | awk '{print $1}' +} + +function build_replica_conn() { + local hostname=$(hostname -I | awk '{print $1}') + + read -ra alice_ctrs <<< $(docker_get_ctrs kuscia-autonomy-alice) + read -ra bob_ctrs <<< $(docker_get_ctrs kuscia-autonomy-bob) + + local alice_choice_ctr=${alice_ctrs[0]} + local bob_choice_ctr=${bob_ctrs[0]} + + docker cp $alice_choice_ctr:/home/kuscia/var/certs/domain.crt alice.domain.crt + for ctr in "${bob_ctrs[@]}"; do + docker cp alice.domain.crt $ctr:/home/kuscia/var/certs/alice.domain.crt + rm_iptables $ctr + done + + docker cp $bob_choice_ctr:/home/kuscia/var/certs/domain.crt bob.domain.crt + for ctr in "${alice_ctrs[@]}"; do + docker cp bob.domain.crt $ctr:/home/kuscia/var/certs/bob.domain.crt + rm_iptables $ctr + done + + docker exec -it ${alice_choice_ctr} scripts/deploy/add_domain.sh bob p2p + docker exec -it ${bob_choice_ctr} scripts/deploy/add_domain.sh alice p2p + docker exec -it ${alice_choice_ctr} scripts/deploy/join_to_host.sh alice bob https://$hostname:24869 + docker exec -it ${bob_choice_ctr} scripts/deploy/join_to_host.sh bob alice https://$hostname:14869 + # reverse tunnel + docker exec -it ${alice_choice_ctr} kubectl patch cdr alice-bob --type=merge -p '{"spec":{"transit":{"transitMethod":"REVERSE-TUNNEL"}}}' + docker exec -it ${bob_choice_ctr} kubectl patch cdr alice-bob --type=merge -p '{"spec":{"transit":{"transitMethod":"REVERSE-TUNNEL"}}}' + # create demo data + create_domaindata_alice_table ${alice_choice_ctr} alice + create_domaindata_bob_table ${bob_choice_ctr} bob + create_domaindatagrant_alice2bob ${alice_choice_ctr} + create_domaindatagrant_bob2alice ${bob_choice_ctr} + # create secretflow app image + create_secretflow_app_image ${alice_choice_ctr} + create_secretflow_app_image ${bob_choice_ctr} +} + +function rm_iptables() { + local ctr=$1 + local pid=$(docker inspect --format '{{.State.Pid}}' $ctr) + local name=$(docker inspect --format '{{.Name}}' $ctr | cut -b 2- | cut -d '.' -f1,2 | sed 's/_/-/g') + nsenter -t $pid -n iptables -L -n -v + nsenter -t $pid -n iptables -F INPUT + nsenter -t $pid -n iptables -F OUTPUT + + # nsenter -t $pid --uts hostname $name +} + +function create_network() { + local hostname=$(hostname -I | awk '{print $1}') + + network_name="kuscia-swarm-exchange" + exists=$(docker network ls | grep $network_name | wc -l) + if [ $exists != "1" ]; then + docker swarm init --advertise-addr $hostname + docker network create -d overlay --subnet 12.0.0.0/8 --attachable $network_name + else + echo "network $network_name exists!" + fi +} + +function create_kuscia_yaml() { + local hostname=$(hostname -I | awk '{print $1}') + + if [ ! -d alice ]; then + mkdir alice + fi + docker run -it --rm ${KUSCIA_IMAGE} kuscia init --mode autonomy --domain "alice" --runtime "runp" --log-level "DEBUG" --datastore-endpoint "mysql://root:password@tcp($hostname:13307)/kine" > alice/kuscia.yaml + + if [ ! -d bob ]; then + mkdir bob + fi + docker run -it --rm ${KUSCIA_IMAGE} kuscia init --mode autonomy --domain "bob" --runtime "runp" --log-level "DEBUG" --datastore-endpoint "mysql://root:password@tcp($hostname:13308)/kine" > bob/kuscia.yaml +} + +function create_load() { + script_dir=$(realpath $(dirname "$0")) + cat << EOF > kuscia-autonomy.yaml + version: '3.8' + + services: + kuscia-autonomy-alice: + image: $IMAGE + command: + - bin/kuscia + - start + - -c + - etc/conf/kuscia.yaml + environment: + NAMESPACE: alice + PATH: '/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:home/kuscia/tmp/bin:/home/kuscia/bin:/bin/aux' + volumes: + - /tmp:/tmp + - $script_dir/alice/kuscia.yaml:/home/kuscia/etc/conf/kuscia.yaml + ports: + - "14869:1080/tcp" + networks: + - kuscia-swarm-exchange + depends_on: + - mysql-alice + deploy: + replicas: 3 + + kuscia-autonomy-bob: + image: $IMAGE + command: + - bin/kuscia + - start + - -c + - etc/conf/kuscia.yaml + environment: + NAMESPACE: bob + PATH: '/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:home/kuscia/tmp/bin:/home/kuscia/bin:/bin/aux' + volumes: + - /tmp:/tmp + - $script_dir/bob/kuscia.yaml:/home/kuscia/etc/conf/kuscia.yaml + ports: + - "24869:1080/tcp" + networks: + networks: + - kuscia-swarm-exchange + depends_on: + - mysql-bob + deploy: + replicas: 1 + + mysql-alice: + image: mysql:8.0 + environment: + MYSQL_ROOT_PASSWORD: password + MYSQL_DATABASE: kine + MYSQL_USER: user + MYSQL_PASSWORD: password + ports: + - "13307:3306" + networks: + - kuscia-swarm-exchange + + mysql-bob: + image: mysql:8.0 + environment: + MYSQL_ROOT_PASSWORD: password + MYSQL_DATABASE: kine + MYSQL_USER: user + MYSQL_PASSWORD: password + ports: + - "13308:3306" + networks: + - kuscia-swarm-exchange + + networks: + kuscia-swarm-exchange: + name: kuscia-swarm-exchange + external: true +EOF + docker stack deploy -c kuscia-autonomy.yaml kuscia-autonomy +} + +function clean_replica() { + docker stack rm kuscia-autonomy +} + +function run_replica() { + create_network + clean_replica + sleep 10 + create_kuscia_yaml + create_load + sleep 60 + build_replica_conn +} + +run_replica \ No newline at end of file diff --git a/scripts/deploy/create_secretflow_app_image.sh b/scripts/deploy/create_secretflow_app_image.sh new file mode 100755 index 00000000..f1dec5db --- /dev/null +++ b/scripts/deploy/create_secretflow_app_image.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# +# Copyright 2023 Ant Group Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +SF_IMAGE_NAME=$1 + +usage="$(basename "$0") SF_IMAGE_NAME" + +if [[ ${SF_IMAGE_NAME} == "" ]]; then + echo "missing argument: $usage" + exit 1 +fi + +if [[ "${SF_IMAGE_NAME}" == *":"* ]]; then + IMAGE_REPO=${SF_IMAGE_NAME%%:*} + IMAGE_TAG=${SF_IMAGE_NAME##*:} +fi + +APP_TYPE=$(echo "${IMAGE_REPO}" | awk -F'/' '{print $NF}' | awk -F'-' '{print $1}') +if [[ ${APP_TYPE} != "psi" ]]; then + APP_TYPE="secretflow" +fi + +ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd -P) + +APP_IMAGE_TEMPLATE=$(sed "s!{{.SF_IMAGE_NAME}}!'${IMAGE_REPO}'!g; + s!{{.SF_IMAGE_TAG}}!'${IMAGE_TAG}'!g; + s!{{.SF_IMAGE_ID}}!'${SF_IMAGE_ID}'!g" \ + < "${ROOT}/scripts/templates/app_image.${APP_TYPE}.yaml") + +echo "${APP_IMAGE_TEMPLATE}" | kubectl apply -f - \ No newline at end of file diff --git a/scripts/deploy/deploy.sh b/scripts/deploy/deploy.sh index 8aa6cdfc..66a9104b 100755 --- a/scripts/deploy/deploy.sh +++ b/scripts/deploy/deploy.sh @@ -53,7 +53,7 @@ fi log "KUSCIA_IMAGE=${KUSCIA_IMAGE}" if [[ "$SECRETFLOW_IMAGE" == "" ]]; then - SECRETFLOW_IMAGE=secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/secretflow-lite-anolis8:1.3.0b0 + SECRETFLOW_IMAGE=secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/secretflow-lite-anolis8:1.5.0b0 fi log "SECRETFLOW_IMAGE=${SECRETFLOW_IMAGE}" diff --git a/scripts/deploy/import_engine_image.sh b/scripts/deploy/import_engine_image.sh new file mode 100644 index 00000000..7afc45ff --- /dev/null +++ b/scripts/deploy/import_engine_image.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# +# Copyright 2023 Ant Group Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +GREEN='\033[0;32m' +NC='\033[0m' +RED='\033[31m' + +kuscia_container_name=$1 +engine_image=$2 + +if docker exec -it ${kuscia_container_name} crictl inspecti ${engine_image} >/dev/null 2>&1; then + echo -e "${GREEN}Image '${engine_image}' already exists in domain ${kuscia_container_name}${NC}" +else + if docker image inspect ${engine_image} >/dev/null 2>&1; then + echo -e "${GREEN}Found the engine image '${engine_image}' on host${NC}" + else + echo -e "${GREEN}Not found the engine image '${engine_image}' on host${NC}" + echo -e "${GREEN}Start pulling image '${engine_image}' ...${NC}" + docker pull ${engine_image} + fi + image_tag=$(echo ${engine_image} | cut -d ':' -f 2) + echo -e "${GREEN}Start importing image '${engine_image}' Please be patient...${NC}" + + image_tar=/tmp/${image_tag}.tar + docker save ${engine_image} -o ${image_tar} + docker exec -it ${kuscia_container_name} ctr -a=/home/kuscia/containerd/run/containerd.sock -n=k8s.io images import ${image_tar} + + if docker exec -it ${kuscia_container_name} crictl inspecti ${engine_image} >/dev/null 2>&1; then + rm -rf ${image_tar} + echo -e "${GREEN}image ${engine_image} import successfully${NC}" + else + echo -e "${RED}error: ${engine_image} import failed${NC}" + fi +fi \ No newline at end of file diff --git a/scripts/deploy/containerd_pre_detect.sh b/scripts/deploy/iptables_pre_detect.sh similarity index 70% rename from scripts/deploy/containerd_pre_detect.sh rename to scripts/deploy/iptables_pre_detect.sh index a64625f5..e8aac6be 100755 --- a/scripts/deploy/containerd_pre_detect.sh +++ b/scripts/deploy/iptables_pre_detect.sh @@ -33,13 +33,4 @@ if [[ $legacy_valid == true && $nft_valid != true ]]; then elif [[ $legacy_valid != true && $nft_valid == true ]]; then export IPTABLES_MODE=nft iptables -V > /dev/null -fi - -if [ -f /sys/fs/cgroup/cgroup.controllers ]; then - # move the processes from the root group to the /init group, - # otherwise writing subtree_control fails with EBUSY. - mkdir -p /sys/fs/cgroup/init - xargs -rn1 < /sys/fs/cgroup/cgroup.procs > /sys/fs/cgroup/init/cgroup.procs || : - # enable controllers - sed -e 's/ / +/g' -e 's/^/+/' <"/sys/fs/cgroup/cgroup.controllers" >"/sys/fs/cgroup/cgroup.subtree_control" -fi +fi \ No newline at end of file diff --git a/scripts/deploy/kuscia.sh b/scripts/deploy/kuscia.sh old mode 100644 new mode 100755 index bef999db..3ff72e53 --- a/scripts/deploy/kuscia.sh +++ b/scripts/deploy/kuscia.sh @@ -17,44 +17,36 @@ set -e -ROOT_DIR=$HOME/kuscia - GREEN='\033[0;32m' +YELLOW='\033[1;33m' NC='\033[0m' RED='\033[31m' - -if [[ ${KUSCIA_IMAGE} == "" ]]; then - KUSCIA_IMAGE=secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/kuscia:latest -fi - -if [ "$SECRETFLOW_IMAGE" != "" ]; then - echo -e "SECRETFLOW_IMAGE=${SECRETFLOW_IMAGE}" -fi - -CTR_PREFIX=${USER}-kuscia CTR_ROOT=/home/kuscia CTR_CERT_ROOT=${CTR_ROOT}/var/certs -MASTER_DOMAIN="kuscia-system" -ALICE_DOMAIN="alice" -BOB_DOMAIN="bob" -MASTER_CTR=${CTR_PREFIX}-master -FORCE_START=false -MASTER_MEMORY_LIMIT=2G -LITE_MEMORY_LIMIT=4G -AUTONOMY_MEMORY_LIMIT=6G -SF_IMAGE_NAME="secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/secretflow-lite-anolis8" -SF_IMAGE_TAG="1.3.0b0" SF_IMAGE_REGISTRY="" NETWORK_NAME="kuscia-exchange" -VOLUME_PATH="${ROOT_DIR}" +CLUSTER_NETWORK_NAME="kuscia-exchange-cluster" +IMPORT_SF_IMAGE=secretflow function log() { local log_content=$1 echo -e "${GREEN}${log_content}${NC}" } +if [[ ${KUSCIA_IMAGE} == "" ]]; then + KUSCIA_IMAGE=secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/kuscia:latest +fi +log "KUSCIA_IMAGE=${KUSCIA_IMAGE}" + +if [[ "$SECRETFLOW_IMAGE" == "" ]]; then + SECRETFLOW_IMAGE=secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/secretflow-lite-anolis8:1.5.0b0 +fi +log "SECRETFLOW_IMAGE=${SECRETFLOW_IMAGE}" + function arch_check() { - local arch=$(uname -a) + local arch + arch=$(uname -a) + if [[ $arch == *"ARM"* ]] || [[ $arch == *"aarch64"* ]]; then echo "Warning: arm64 architecture. Continuing..." elif [[ $arch == *"x86_64"* ]]; then @@ -74,6 +66,19 @@ function pre_check() { fi } +function init_k3s_data() { + if [ ! -d "${K3S_DB_PATH}" ]; then + pre_check "${K3S_DB_PATH}" + else + echo -e "${GREEN}k3s data already exists ${K3S_DB_PATH}...${NC}" + read -rp "$(echo -e "${GREEN}Whether to retain k3s data?(y/N): ${NC}")" reuse + reuse=${reuse:-N} + if [[ "${reuse}" =~ ^([nN][oO]|[nN])$ ]]; then + rm -rf "${K3S_DB_PATH:?}"/* + fi + fi +} + function init_sf_image_info() { if [ "$SECRETFLOW_IMAGE" != "" ]; then SF_IMAGE_TAG=${SECRETFLOW_IMAGE##*:} @@ -132,6 +137,7 @@ agent: } function need_start_docker_container() { + local force_start=false ctr=$1 if [[ ! "$(docker ps -a -q -f name=^/${ctr}$)" ]]; then @@ -139,7 +145,7 @@ function need_start_docker_container() { return 0 fi - if $FORCE_START; then + if $force_start; then log "Remove container '${ctr}' ..." docker rm -f $ctr >/dev/null 2>&1 # need start your container @@ -155,8 +161,8 @@ function need_start_docker_container() { return 0 ;; *) - echo -e "${RED}installation exit.${NC}" - exit 1 + echo -e "${YELLOW}installation exit.${NC}" + exit 0 ;; esac @@ -191,7 +197,7 @@ function probe_k3s() { local domain_ctr=$1 if ! do_http_probe $domain_ctr "https://127.0.0.1:6443" 60; then - echo "[Error] Probe k3s in container '$domain_ctr' failed. Please check k3s log in container, path: /home/kuscia/var/logs/k3s.log" >&2 + echo "[Error] Probe k3s in container '$domain_ctr' failed. Please check k3s log in container, path: ${CTR_ROOT}/var/logs/k3s.log" >&2 exit 1 fi } @@ -212,7 +218,7 @@ function probe_gateway_crd() { sleep 1 retry=$((retry + 1)) done - echo "[Error] Probe gateway in namespace '$domain' failed. Please check envoy log in container, path: /home/kuscia/var/logs/envoy" >&2 + echo "[Error] Probe gateway in namespace '$domain' failed. Please check envoy log in container, path: ${CTR_ROOT}/var/logs/envoy" >&2 exit 1 } @@ -227,6 +233,21 @@ function generate_env_flag() { echo $env_flag } +function createVolume() { + local VOLUME_NAME=$1 + if ! docker volume ls --format '{{.Name}}' | grep "^${VOLUME_NAME}$"; then + docker volume create $VOLUME_NAME + fi +} + +function generate_hostname() { + local prefix=$1 + local local_hostname + local_hostname=$(hostname) + local_hostname=$(echo "${local_hostname}" | tr '[:upper:]_.' '[:lower:]--' | sed 's/[^a-z0-9]$//g' ) + echo "${prefix}-${local_hostname}" | cut -c 1-63 +} + function copy_between_containers() { local src_file=$1 local dest_file=$2 @@ -239,17 +260,12 @@ function copy_between_containers() { echo "Copy file successfully src_file:'$src_file' to dest_file:'$dest_file'" } -function create_secretflow_app_image() { - local ctr=$1 - docker exec -it ${ctr} scripts/deploy/create_sf_app_image.sh "${SF_IMAGE_NAME}" "${SF_IMAGE_TAG}" - log "create secretflow app image done" -} - function probe_datamesh() { local domain_ctr=$1 if ! do_http_probe "$domain_ctr" "https://127.0.0.1:8070/healthZ" 30 true; then - echo "[Error] Probe datamesh in container '$domain_ctr' failed." >&2 - echo "You cloud run command that 'docker logs $domain_ctr' to check the log" >&2 + echo -e "${RED}[Error] Probe datamesh in container '$domain_ctr' failed.${NC}" >&2 + echo -e "${RED}You cloud run command that 'docker logs $domain_ctr' to check the log${NC}" >&2 + exit 1 fi log "Probe datamesh successfully" } @@ -265,70 +281,19 @@ function get_runtime() { } function generate_mount_flag() { - local mount_flag="-v ${DOMAIN_DATA_DIR}:/home/kuscia/var/storage/data -v ${DOMAIN_LOG_DIR}:/home/kuscia/var/stdout" + local mount_flag="-v ${DOMAIN_DATA_DIR}:${CTR_ROOT}/var/storage/data -v ${DOMAIN_LOG_DIR}:${CTR_ROOT}/var/stdout ${k3s_volume}" echo "$mount_flag" } function create_cluster_domain_route() { + local ctr_prefix=${USER}-kuscia + local master_ctr=${ctr_prefix}-master local src_domain=$1 local dest_domain=$2 log "Starting create cluster domain route from '${src_domain}' to '${dest_domain}'" - docker exec -it ${MASTER_CTR} scripts/deploy/create_cluster_domain_route.sh ${src_domain} ${dest_domain} http://${CTR_PREFIX}-lite-${dest_domain}:1080 - log "Cluster domain route from '${src_domain}' to '${dest_domain}' created successfully dest_endpoint: '${CTR_PREFIX}'-lite-'${dest_domain}':1080" -} - -function check_sf_image() { - local domain_id=$1 - local domain_ctr=$2 - local volume_path=$3 - local env_file=${ROOT}/env.list - local default_repo=${SF_IMAGE_REGISTRY} - local repo - if [ -e $env_file ]; then - repo=$(awk -F "=" '/REGISTRY_ENDPOINT/ {print $2}' $env_file) - fi - local sf_image="${SF_IMAGE_NAME}:${SF_IMAGE_TAG}" - if [ "$repo" != "" ]; then - sf_image="${repo}/${SF_IMAGE_NAME##*/}:${SF_IMAGE_TAG}" - elif [ "$default_repo" != "" ]; then - sf_image="${default_repo}/${SF_IMAGE_NAME##*/}:${SF_IMAGE_TAG}" - fi - if [ "$SECRETFLOW_IMAGE" != "" ]; then - sf_image=$SECRETFLOW_IMAGE - fi - - if docker exec -it $domain_ctr crictl inspecti $sf_image >/dev/null 2>&1; then - log "Image '${sf_image}' already exists in domain '${domain_id}'" - return - fi - - local has_sf_image=false - if docker image inspect ${sf_image} >/dev/null 2>&1; then - has_sf_image=true - fi - - if [ "$has_sf_image" == true ]; then - log "Found the secretflow image '${sf_image}' on host" - else - log "Not found the secretflow image '${sf_image}' on host" - if [ "$repo" != "" ]; then - docker login $repo - fi - log "Start pulling image '${sf_image}' ..." - docker pull ${sf_image} - fi - - log "Start importing image '${sf_image}' Please be patient..." - local image_id - image_id=$(docker images --filter="reference=${sf_image}" --format "{{.ID}}") - local image_tar - image_tar=/tmp/$(echo ${sf_image} | sed 's/\//_/g').${image_id}.tar - if [ ! -e $image_tar ]; then - docker save $sf_image -o $image_tar - fi - docker exec -it $domain_ctr ctr -a=${CTR_ROOT}/containerd/run/containerd.sock -n=k8s.io images import $image_tar - log "Successfully imported image '${sf_image}' to container '${domain_ctr}' ..." + docker exec -it ${master_ctr} scripts/deploy/create_cluster_domain_route.sh ${src_domain} ${dest_domain} http://${ctr_prefix}-lite-${dest_domain}:1080 + log "Cluster domain route from '${src_domain}' to '${dest_domain}' created successfully dest_endpoint: '${ctr_prefix}'-lite-'${dest_domain}':1080" } function build_interconn() { @@ -375,65 +340,127 @@ function init() { log "ROOT=${ROOT}" log "DOMAIN_ID=${domain_id}" log "DOMAIN_HOST_PORT=${DOMAIN_HOST_PORT}" - log "DOMAIN_HOST_INTERNAL_PORT=${DOMAIN_HOST_INTERNAL_PORT}" + log "DOMAIN_HOST_INTERNAL_PORT=${domain_host_internal_port}" log "DOMAIN_DATA_DIR=${DOMAIN_DATA_DIR}" log "DOMAIN_LOG_DIR=${DOMAIN_LOG_DIR}" log "KUSCIA_IMAGE=${KUSCIA_IMAGE}" - log "KUSCIAAPI_HTTP_PORT=${KUSCIAAPI_HTTP_PORT}" - log "KUSCIAAPI_GRPC_PORT=${KUSCIAAPI_GRPC_PORT}" - - build_kuscia_network + log "KUSCIAAPI_HTTP_PORT=${kusciaapi_http_port}" + log "KUSCIAAPI_GRPC_PORT=${kusciaapi_grpc_port}" } function start_container() { - docker run -dit${privileged_flag} --name="${domain_ctr}" --hostname="${domain_ctr}" --restart=always --network=${NETWORK_NAME} -m $LITE_MEMORY_LIMIT \ - -p "${domain_host_internal_port}":80 \ - -p "${domain_host_port}":1080 \ - -p "${kusciaapi_http_port}":8082 \ - -p "${kusciaapi_grpc_port}":8083 \ - --mount source=${domain_ctr}-containerd,target=${CTR_ROOT}/containerd \ - -v /tmp:/tmp \ - -v ${kuscia_conf_file}:/home/kuscia/etc/conf/kuscia.yaml \ + local domain_ctr=$1 + local domain_id=$2 + local env_flag=$3 + local kuscia_conf_file=$4 + local mount_flag=$5 + local memory_limit=$6 + local domain_host_port=$7 + local kusciaapi_http_port=$8 + local kusciaapi_grpc_port=$9 + local domain_host_internal_port=${10} + local mountcontainerd="" + local export_port="-p ${domain_host_internal_port}:80 \ + -p ${domain_host_port}:1080 \ + -p ${kusciaapi_http_port}:8082 \ + -p ${kusciaapi_grpc_port}:8083" + + local local_network_name=${NETWORK_NAME} + + if [[ ${mode} != "start" ]] && [[ "${EXPOSE_PORTS}" != true ]]; then + export_port="" + fi + if [[ ${domain_type} != "master" && ${runtime} == "runc" ]]; then + createVolume "${domain_ctr}-containerd" + mountcontainerd="-v ${domain_ctr}-containerd:${CTR_ROOT}/containerd" + privileged_flag=" --privileged" + fi + log "domain_hostname=${domain_hostname}" + + if [[ ${mode} == "start" ]] && [[ "${CLUSTERED}" == true ]]; then + local_network_name=${CLUSTER_NETWORK_NAME} + fi + log "network=${local_network_name}" + + docker run -dit${privileged_flag} --name="${domain_ctr}" --hostname="${domain_hostname}" --restart=always --network=${local_network_name} ${memory_limit} \ + ${export_port} ${mountcontainerd} -v /tmp:/tmp\ + -v ${kuscia_conf_file}:${CTR_ROOT}/etc/conf/kuscia.yaml \ ${env_flag} ${mount_flag} \ - --env NAMESPACE=${domain_id} \ + --env NAMESPACE="${domain_id}" \ "${KUSCIA_IMAGE}" bin/kuscia start -c etc/conf/kuscia.yaml } function start_kuscia_container() { local domain_type=$1 - local domain_id=${2:-$DOMAIN_ID} + local domain_id=$2 local runtime=$3 local master_endpoint=$4 - local domain_host_port=${5:-$DOMAIN_HOST_PORT} - local kusciaapi_http_port=${6:-$KUSCIAAPI_HTTP_PORT} - local kusciaapi_grpc_port=${7:-$KUSCIAAPI_GRPC_PORT} - local domain_host_internal_port=${8:-$DOMAIN_HOST_INTERNAL_PORT} - local domain_ctr=$9 - local init_kuscia_conf_file_true_or_false=${10} - local mount_flag=${11} - local env_flag=$(generate_env_flag) - - if [[ ${init_kuscia_conf_file_true_or_false} = "true" ]]; then + local domain_ctr=$5 + local init_kuscia_conf_file=$6 + local mount_flag=$7 + local domain_host_port=$8 + local kusciaapi_http_port=$9 + local kusciaapi_grpc_port=${10} + local domain_host_internal_port=${11} + local env_flag + local memory_limit + local limit + env_flag=$(generate_env_flag) + + local domain_hostname + domain_hostname=$(generate_hostname "${domain_ctr}") || { echo -e "${RED}Failed to generate hostname${NC}"; exit 1; } + + if [[ ${MEMORY_LIMIT} = "-1" ]]; then + memory_limit="" + else + case "${MEMORY_LIMIT}" in + "") + case "${domain_type}" in + "lite") + limit="4GiB" + ;; + "autonomy") + limit="6GiB" + ;; + "master") + limit="2GiB" + ;; + esac + ;; + *) + limit="${MEMORY_LIMIT}" + ;; + esac + memory_limit="-m ${limit}" + fi + + build_kuscia_network + + if [[ ${init_kuscia_conf_file} = "true" ]]; then local kuscia_conf_file="${PWD}/${domain_ctr}/kuscia.yaml" - init_kuscia_conf_file ${domain_type} ${domain_id} ${domain_ctr} ${kuscia_conf_file} ${master_endpoint} + init_kuscia_conf_file "${domain_type}" "${domain_id}" "${domain_ctr}" "${kuscia_conf_file}" "${master_endpoint}" fi if need_start_docker_container "$domain_ctr"; then log "Starting container $domain_ctr ..." - start_container "${domain_ctr}" "${domain_id}" "${domain_host_port}" "${kusciaapi_http_port}" "${kusciaapi_grpc_port}" "${domain_host_internal_port}" "${env_flag}" "${kuscia_conf_file}" "${mount_flag}" - if [[ "$domain_type" != "lite" ]]; then - probe_gateway_crd "${domain_ctr}" "${domain_id}" "${domain_ctr}" 60 - else - probe_datamesh "${domain_ctr}" - fi + start_container "${domain_ctr}" "${domain_id}" "${env_flag}" "${kuscia_conf_file}" "${mount_flag}" "${memory_limit}" "${domain_host_port}" "${kusciaapi_http_port}" "${kusciaapi_grpc_port}" "${domain_host_internal_port}" "${domain_hostname}" + [[ "$domain_type" != "lite" ]] && probe_gateway_crd "${domain_ctr}" "${domain_id}" "${domain_hostname}" 60 + [[ "$domain_type" != "master" ]] && probe_datamesh "${domain_ctr}" fi - if [[ "$domain_type" != "master" ]] && [[ ${runtime} == "runc" ]]; then - check_sf_image "${domain_id}" "${domain_ctr}" + if [[ ${IMPORT_SF_IMAGE} = "none" ]]; then + echo -e "${GREEN}skip importing sf image${NC}" + elif [[ ${IMPORT_SF_IMAGE} = "secretflow" ]]; then + if [[ "$domain_type" != "master" ]] && [[ ${runtime} == "runc" ]]; then + docker run --rm $KUSCIA_IMAGE cat ${CTR_ROOT}/scripts/deploy/import_engine_image.sh > import_engine_image.sh && chmod u+x import_engine_image.sh + bash import_engine_image.sh ${domain_ctr} ${SECRETFLOW_IMAGE} + rm -rf import_engine_image.sh + fi fi if [[ "$domain_type" != "lite" ]]; then - create_secretflow_app_image "${domain_ctr}" + docker exec -it "${domain_ctr}" scripts/deploy/create_secretflow_app_image.sh "${SECRETFLOW_IMAGE}" + log "Create secretflow app image done" fi log "$domain_type domain '${domain_id}' deployed successfully" } @@ -441,7 +468,7 @@ function start_kuscia_container() { function get_config_value() { local config_file=$1 local key=$2 - grep "$key:" "$config_file" | awk '{ print $2 }' | tr -d '\r\n' + grep "$key:" "$config_file" | awk '{ print $2 }' | sed 's/"//g' | tr -d '\r\n' } function start_kuscia() { @@ -449,138 +476,193 @@ function start_kuscia() { local domain_id=$(get_config_value "$kuscia_conf_file" "domainID") local deploy_mode=$(get_config_value "$kuscia_conf_file" "mode") local master_endpoint=$(get_config_value "$kuscia_conf_file" "masterEndpoint") + local store_endpoint=$(get_config_value "$kuscia_conf_file" "datastoreEndpoint") local runtime=$(get_runtime "$kuscia_conf_file") local privileged_flag + local domain_host_internal_port=${DOMAIN_HOST_INTERNAL_PORT:-13081} + local kusciaapi_http_port=${KUSCIAAPI_HTTP_PORT:-13082} + local kusciaapi_grpc_port=${KUSCIAAPI_GRPC_PORT:-13083} + local k3s_volume="" wrap_kuscia_config_file ${kuscia_conf_file} - local domain_ctr="${CTR_PREFIX}-${deploy_mode}-${domain_id}" + local ctr_prefix=${USER}-kuscia + local master_ctr=${ctr_prefix}-master + local domain_ctr="${ctr_prefix}-${deploy_mode}-${domain_id}" if [[ "${deploy_mode}" == "master" ]]; then - domain_ctr="${MASTER_CTR}" + domain_ctr="${master_ctr}" fi - [[ ${runtime} == "runc" ]] && privileged_flag=" --privileged" + K3S_DB_PATH="${HOME}/kuscia/${domain_ctr}/k3s" [[ ${DOMAIN_HOST_PORT} == "" ]] && { printf "empty domain host port\n" >&2; exit 1; } - [[ ${DOMAIN_HOST_INTERNAL_PORT} == "" ]] && DOMAIN_HOST_INTERNAL_PORT=13081 - [[ ${KUSCIAAPI_HTTP_PORT} == "" ]] && KUSCIAAPI_HTTP_PORT=13082 - [[ ${KUSCIAAPI_GRPC_PORT} == "" ]] && KUSCIAAPI_GRPC_PORT=13083 - + [[ ${deploy_mode} != "lite" && ${store_endpoint} == "" ]] && { init_k3s_data; k3s_volume="-v ${K3S_DB_PATH}:${CTR_ROOT}/var/k3s"; } init ${domain_ctr} local mount_flag=$(generate_mount_flag) - start_kuscia_container "${deploy_mode}" "${domain_id}" "$runtime" "$master_endpoint" "${DOMAIN_HOST_PORT}" "${KUSCIAAPI_HTTP_PORT}" "${KUSCIAAPI_GRPC_PORT}" "${DOMAIN_HOST_INTERNAL_PORT}" "${domain_ctr}" "false" "${mount_flag}" + start_kuscia_container "${deploy_mode}" "${domain_id}" "$runtime" "$master_endpoint" "${domain_ctr}" "false" "${mount_flag}" "${DOMAIN_HOST_PORT}" "${kusciaapi_http_port}" "${kusciaapi_grpc_port}" "${domain_host_internal_port}" } function start_center_cluster() { + local alice_domain=alice + local bob_domain=bob + local ctr_prefix=${USER}-kuscia + local master_ctr=${ctr_prefix}-master local runtime="runc" local privileged_flag=" --privileged" - local alice_ctr=${CTR_PREFIX}-lite-${ALICE_DOMAIN} - local bob_ctr=${CTR_PREFIX}-lite-${BOB_DOMAIN} - start_kuscia_container "master" "${MASTER_DOMAIN}" "" "" "18081" "18082" "18083" "18084" "${MASTER_CTR}" "true" - start_kuscia_container "lite" "${ALICE_DOMAIN}" "${runtime}" "https://${MASTER_CTR}:1080" "28081" "28082" "28083" "28084" "${alice_ctr}" "true" - start_kuscia_container "lite" "${BOB_DOMAIN}" "${runtime}" "https://${MASTER_CTR}:1080" "38081" "38082" "38083" "38084" "${bob_ctr}" "true" - create_cluster_domain_route ${ALICE_DOMAIN} ${BOB_DOMAIN} - create_cluster_domain_route ${BOB_DOMAIN} ${ALICE_DOMAIN} - docker exec -it ${alice_ctr} scripts/deploy/init_example_data.sh ${ALICE_DOMAIN} - docker exec -it ${bob_ctr} scripts/deploy/init_example_data.sh ${BOB_DOMAIN} + local alice_ctr=${ctr_prefix}-lite-${alice_domain} + local bob_ctr=${ctr_prefix}-lite-${bob_domain} + start_kuscia_container "master" "kuscia-system" "" "" "${master_ctr}" "true" "" + start_kuscia_container "lite" "${alice_domain}" "${runtime}" "https://${master_ctr}:1080" "${alice_ctr}" "true" + start_kuscia_container "lite" "${bob_domain}" "${runtime}" "https://${master_ctr}:1080" "${bob_ctr}" "true" + create_cluster_domain_route ${alice_domain} ${bob_domain} + create_cluster_domain_route ${bob_domain} ${alice_domain} + docker exec -it ${alice_ctr} scripts/deploy/init_example_data.sh ${alice_domain} + docker exec -it ${bob_ctr} scripts/deploy/init_example_data.sh ${bob_domain} log "Kuscia ${mode} cluster started successfully" } function start_p2p_cluster() { + local alice_domain=alice + local bob_domain=bob + local ctr_prefix=${USER}-kuscia local runtime="runc" local p2p_protocol=$1 local privileged_flag=" --privileged" - local alice_ctr=${CTR_PREFIX}-autonomy-${ALICE_DOMAIN} - local bob_ctr=${CTR_PREFIX}-autonomy-${BOB_DOMAIN} - start_kuscia_container "autonomy" "${ALICE_DOMAIN}" "${runtime}" " " "11081" "11082" "11083" "11084" "${alice_ctr}" "true" - start_kuscia_container "autonomy" "${BOB_DOMAIN}" "${runtime}" " " "12081" "12082" "12083" "12084" "${bob_ctr}" "true" - build_interconn ${bob_ctr} ${alice_ctr} ${ALICE_DOMAIN} ${BOB_DOMAIN} ${p2p_protocol} - build_interconn ${alice_ctr} ${bob_ctr} ${BOB_DOMAIN} ${ALICE_DOMAIN} ${p2p_protocol} - docker exec -it ${alice_ctr} scripts/deploy/init_example_data.sh ${ALICE_DOMAIN} - docker exec -it ${bob_ctr} scripts/deploy/init_example_data.sh ${BOB_DOMAIN} + local alice_ctr=${ctr_prefix}-autonomy-${alice_domain} + local bob_ctr=${ctr_prefix}-autonomy-${bob_domain} + start_kuscia_container "autonomy" "${alice_domain}" "${runtime}" " " "${alice_ctr}" "true" + start_kuscia_container "autonomy" "${bob_domain}" "${runtime}" " " "${bob_ctr}" "true" + build_interconn ${bob_ctr} ${alice_ctr} ${alice_domain} ${bob_domain} ${p2p_protocol} + build_interconn ${alice_ctr} ${bob_ctr} ${bob_domain} ${alice_domain} ${p2p_protocol} + docker exec -it ${alice_ctr} scripts/deploy/init_example_data.sh ${alice_domain} + docker exec -it ${bob_ctr} scripts/deploy/init_example_data.sh ${bob_domain} log "Kuscia ${mode} cluster started successfully" } function start_cxc_cluster() { + local alice_domain=alice + local bob_domain=bob + local ctr_prefix=${USER}-kuscia local runtime="runc" local privileged_flag=" --privileged" - local alice_ctr=${CTR_PREFIX}-lite-${ALICE_DOMAIN} - local bob_ctr=${CTR_PREFIX}-lite-${BOB_DOMAIN} - local alice_master_domain="master-alice" - local bob_master_domain="master-bob" - local alice_master_ctr=${CTR_PREFIX}-${alice_master_domain} - local bob_master_ctr=${CTR_PREFIX}-${bob_master_domain} + local alice_ctr=${ctr_prefix}-lite-cxc-${alice_domain} + local bob_ctr=${ctr_prefix}-lite-cxc-${bob_domain} + local alice_master_domain="master-cxc-alice" + local bob_master_domain="master-cxc-bob" + local alice_master_ctr=${ctr_prefix}-${alice_master_domain} + local bob_master_ctr=${ctr_prefix}-${bob_master_domain} local p2p_protocol="kuscia" local transit=$1 - start_kuscia_container "master" "${alice_master_domain}" "" "" "18081" "18082" "18083" "18084" "${alice_master_ctr}" "true" - start_kuscia_container "master" "${bob_master_domain}" "" "" "28081" "28082" "28083" "28084" "${bob_master_ctr}" "true" - start_kuscia_container "lite" "${ALICE_DOMAIN}" "${runtime}" "https://${alice_master_ctr}:1080" "38081" "38082" "38083" "38084" "${alice_ctr}" "true" - start_kuscia_container "lite" "${BOB_DOMAIN}" "${runtime}" "https://${bob_master_ctr}:1080" "48081" "48082" "48083" "48084" "${bob_ctr}" "true" + start_kuscia_container "master" "${alice_master_domain}" "" "" "${alice_master_ctr}" "true" + start_kuscia_container "master" "${bob_master_domain}" "" "" "${bob_master_ctr}" "true" + start_kuscia_container "lite" "${alice_domain}" "${runtime}" "https://${alice_master_ctr}:1080" "${alice_ctr}" "true" + start_kuscia_container "lite" "${bob_domain}" "${runtime}" "https://${bob_master_ctr}:1080" "${bob_ctr}" "true" build_interconn ${bob_master_ctr} ${alice_master_ctr} ${alice_master_domain} ${bob_master_domain} ${p2p_protocol} build_interconn ${alice_master_ctr} ${bob_master_ctr} ${bob_master_domain} ${alice_master_domain} ${p2p_protocol} - copy_between_containers ${alice_ctr}:${CTR_CERT_ROOT}/domain.crt ${bob_master_ctr}:${CTR_CERT_ROOT}/${ALICE_DOMAIN}.domain.crt - copy_between_containers ${bob_ctr}:${CTR_CERT_ROOT}/domain.crt ${alice_master_ctr}:${CTR_CERT_ROOT}/${BOB_DOMAIN}.domain.crt - docker exec -it ${alice_master_ctr} scripts/deploy/add_domain.sh ${BOB_DOMAIN} p2p ${p2p_protocol} ${bob_master_domain} - docker exec -it ${bob_master_ctr} scripts/deploy/add_domain.sh ${ALICE_DOMAIN} p2p ${p2p_protocol} ${alice_master_domain} + copy_between_containers ${alice_ctr}:${CTR_CERT_ROOT}/domain.crt ${bob_master_ctr}:${CTR_CERT_ROOT}/${alice_domain}.domain.crt + copy_between_containers ${bob_ctr}:${CTR_CERT_ROOT}/domain.crt ${alice_master_ctr}:${CTR_CERT_ROOT}/${bob_domain}.domain.crt + docker exec -it ${alice_master_ctr} scripts/deploy/add_domain.sh ${bob_domain} p2p ${p2p_protocol} ${bob_master_domain} + docker exec -it ${bob_master_ctr} scripts/deploy/add_domain.sh ${alice_domain} p2p ${p2p_protocol} ${alice_master_domain} if [[ $transit == false ]]; then - docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${ALICE_DOMAIN} ${BOB_DOMAIN} http://${bob_ctr}:1080 -i false -p ${p2p_protocol} - docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh ${BOB_DOMAIN} ${ALICE_DOMAIN} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} - docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh ${ALICE_DOMAIN} ${BOB_DOMAIN} http://${bob_ctr}:1080 -i false -p ${p2p_protocol} - docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${BOB_DOMAIN} ${ALICE_DOMAIN} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${alice_domain} ${bob_domain} http://${bob_ctr}:1080 -i false -p ${p2p_protocol} + docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh ${bob_domain} ${alice_domain} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} + docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh ${alice_domain} ${bob_domain} http://${bob_ctr}:1080 -i false -p ${p2p_protocol} + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${bob_domain} ${alice_domain} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} else - docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh ${bob_master_domain} ${BOB_DOMAIN} http://${bob_ctr}:1080 -i false -p ${p2p_protocol} - docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${alice_master_domain} ${BOB_DOMAIN} http://${bob_ctr}:1080 -i false -x ${bob_master_domain} -p ${p2p_protocol} - docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${ALICE_DOMAIN} ${BOB_DOMAIN} http://${bob_ctr}:1080 -i false -x ${alice_master_domain} -p ${p2p_protocol} - docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${alice_master_domain} ${ALICE_DOMAIN} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} - docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh ${bob_master_domain} ${ALICE_DOMAIN} http://${alice_ctr}:1080 -i false -x ${alice_master_domain} -p ${p2p_protocol} - docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh ${BOB_DOMAIN} ${ALICE_DOMAIN} http://${alice_ctr}:1080 -i false -x ${bob_master_domain} -p ${p2p_protocol} + docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh ${bob_master_domain} ${bob_domain} http://${bob_ctr}:1080 -i false -p ${p2p_protocol} + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${alice_master_domain} ${bob_domain} http://${bob_ctr}:1080 -i false -x ${bob_master_domain} -p ${p2p_protocol} + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${alice_domain} ${bob_domain} http://${bob_ctr}:1080 -i false -x ${alice_master_domain} -p ${p2p_protocol} + docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh ${alice_domain} ${bob_domain} http://${bob_ctr}:1080 -i false -p ${p2p_protocol} -x $alice_master_domain + docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh $alice_master_domain ${bob_domain} http://${bob_ctr}:1080 -i false -p ${p2p_protocol} -x $bob_master_domain + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${alice_master_domain} ${alice_domain} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} + docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh ${bob_master_domain} ${alice_domain} http://${alice_ctr}:1080 -i false -x ${alice_master_domain} -p ${p2p_protocol} + docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh ${bob_domain} ${alice_domain} http://${alice_ctr}:1080 -i false -x ${bob_master_domain} -p ${p2p_protocol} + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${bob_domain} ${alice_domain} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} -x $bob_master_domain + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh $bob_master_domain ${alice_domain} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} -x $alice_master_domain fi - docker exec -it ${alice_ctr} scripts/deploy/init_example_data.sh ${ALICE_DOMAIN} - docker exec -it ${bob_ctr} scripts/deploy/init_example_data.sh ${BOB_DOMAIN} + docker exec -it ${alice_ctr} scripts/deploy/init_example_data.sh ${alice_domain} + docker exec -it ${bob_ctr} scripts/deploy/init_example_data.sh ${bob_domain} log "Kuscia ${mode} cluster started successfully" } function start_cxp_cluster() { + local alice_domain=alice + local bob_domain=bob + local ctr_prefix=${USER}-kuscia local runtime="runc" local privileged_flag=" --privileged" - local alice_ctr=${CTR_PREFIX}-lite-${ALICE_DOMAIN} - local bob_ctr=${CTR_PREFIX}-autonomy-${BOB_DOMAIN} - local alice_master_domain="master-alice" - local alice_master_ctr=${CTR_PREFIX}-${alice_master_domain} + local alice_ctr=${ctr_prefix}-lite-cxp-${alice_domain} + local bob_ctr=${ctr_prefix}-autonomy-cxp-${bob_domain} + local alice_master_domain="master-cxp-alice" + local alice_master_ctr=${ctr_prefix}-${alice_master_domain} local p2p_protocol="kuscia" local transit=$1 - start_kuscia_container "master" "${alice_master_domain}" "" "" "18081" "18082" "18083" "18084" "${alice_master_ctr}" "true" - start_kuscia_container "lite" "${ALICE_DOMAIN}" "${runtime}" "https://${alice_master_ctr}:1080" "28081" "28082" "28083" "28084" "${alice_ctr}" "true" - start_kuscia_container "autonomy" "${BOB_DOMAIN}" "${runtime}" "https://${alice_master_ctr}:1080" "12081" "12082" "12083" "12084" "${bob_ctr}" "true" + start_kuscia_container "master" "${alice_master_domain}" "" "" "${alice_master_ctr}" "true" + start_kuscia_container "lite" "${alice_domain}" "${runtime}" "https://${alice_master_ctr}:1080" "${alice_ctr}" "true" + start_kuscia_container "autonomy" "${bob_domain}" "${runtime}" "https://${alice_master_ctr}:1080" "${bob_ctr}" "true" - build_interconn ${bob_ctr} ${alice_master_ctr} ${alice_master_domain} ${BOB_DOMAIN} ${p2p_protocol} - build_interconn ${alice_master_ctr} ${bob_ctr} ${BOB_DOMAIN} ${alice_master_domain} ${p2p_protocol} - copy_between_containers ${alice_ctr}:${CTR_CERT_ROOT}/domain.crt ${bob_ctr}:${CTR_CERT_ROOT}/${ALICE_DOMAIN}.domain.crt - docker exec -it ${bob_ctr} scripts/deploy/add_domain.sh ${ALICE_DOMAIN} p2p ${p2p_protocol} ${alice_master_domain} + build_interconn ${bob_ctr} ${alice_master_ctr} ${alice_master_domain} ${bob_domain} ${p2p_protocol} + build_interconn ${alice_master_ctr} ${bob_ctr} ${bob_domain} ${alice_master_domain} ${p2p_protocol} + copy_between_containers ${alice_ctr}:${CTR_CERT_ROOT}/domain.crt ${bob_ctr}:${CTR_CERT_ROOT}/${alice_domain}.domain.crt + docker exec -it ${bob_ctr} scripts/deploy/add_domain.sh ${alice_domain} p2p ${p2p_protocol} ${alice_master_domain} if [[ $transit == false ]]; then - docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${ALICE_DOMAIN} ${BOB_DOMAIN} https://${bob_ctr}:1080 -i false -p ${p2p_protocol} - docker exec -it ${bob_ctr} scripts/deploy/join_to_host.sh ${BOB_DOMAIN} ${ALICE_DOMAIN} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} - docker exec -it ${bob_ctr} scripts/deploy/join_to_host.sh ${ALICE_DOMAIN} ${BOB_DOMAIN} http://${bob_ctr}:1080 -i false -p ${p2p_protocol} - docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${BOB_DOMAIN} ${ALICE_DOMAIN} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${alice_domain} ${bob_domain} https://${bob_ctr}:1080 -i false -p ${p2p_protocol} + docker exec -it ${bob_ctr} scripts/deploy/join_to_host.sh ${bob_domain} ${alice_domain} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} + docker exec -it ${bob_ctr} scripts/deploy/join_to_host.sh ${alice_domain} ${bob_domain} http://${bob_ctr}:1080 -i false -p ${p2p_protocol} + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${bob_domain} ${alice_domain} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} else - docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${ALICE_DOMAIN} ${BOB_DOMAIN} https://${bob_ctr}:1080 -i false -x ${alice_master_domain} -p ${p2p_protocol} - docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh $alice_master_domain ${ALICE_DOMAIN} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} - docker exec -it ${bob_ctr} scripts/deploy/join_to_host.sh ${BOB_DOMAIN} ${ALICE_DOMAIN} http://${alice_ctr}:1080 -i false -x ${alice_master_domain} -p ${p2p_protocol} + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${alice_domain} ${bob_domain} https://${bob_ctr}:1080 -i false -x ${alice_master_domain} -p ${p2p_protocol} + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh $alice_master_domain ${alice_domain} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} + docker exec -it ${bob_ctr} scripts/deploy/join_to_host.sh ${bob_domain} ${alice_domain} http://${alice_ctr}:1080 -i false -x ${alice_master_domain} -p ${p2p_protocol} + docker exec -it ${bob_ctr} scripts/deploy/join_to_host.sh ${alice_domain} ${bob_domain} https://${bob_ctr}:1080 -i false -p ${p2p_protocol} + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh ${bob_domain} ${alice_domain} http://${alice_ctr}:1080 -i false -p ${p2p_protocol} fi - docker exec -it ${alice_ctr} scripts/deploy/init_example_data.sh ${ALICE_DOMAIN} - docker exec -it ${bob_ctr} scripts/deploy/init_example_data.sh ${BOB_DOMAIN} + docker exec -it ${alice_ctr} scripts/deploy/init_example_data.sh ${alice_domain} + docker exec -it ${bob_ctr} scripts/deploy/init_example_data.sh ${bob_domain} log "Kuscia ${mode} cluster started successfully" } function build_kuscia_network() { - if [[ ! "$(docker network ls -q -f name=${NETWORK_NAME})" ]]; then + if [[ ${mode} == "start" ]] && [[ "${CLUSTERED}" == true ]]; then + # Clustered mode does not create network. + pre_check_cluster_network + elif [[ ! "$(docker network ls -q -f name=^${NETWORK_NAME}$)" ]]; then docker network create ${NETWORK_NAME} fi } +# pre check docker network +function pre_check_cluster_network() { + + local container_id + local rm_container=false + if [[ ! "$(docker network ls -q -f name=${CLUSTER_NETWORK_NAME})" ]]; then + container_id=$(docker run -dit --rm --network ${CLUSTER_NETWORK_NAME} ${KUSCIA_IMAGE} bash) + rm_container=true + fi + + local network_info + network_info=$(docker network inspect "$CLUSTER_NETWORK_NAME") + + if ! (echo "${network_info}" | grep '"Driver": "overlay"' > /dev/null); then + echo -e "${RED}Network '${CLUSTER_NETWORK_NAME}' exists, but its Driver is not 'overlay'.${NC}" + exit 1 + fi + + if ! (echo "${network_info}" | grep '"Attachable": true' > /dev/null); then + echo -e "${RED}Network '${CLUSTER_NETWORK_NAME}' exists, but its Attachable is not 'true'.${NC}" + exit 1 + fi + + log "Network '${CLUSTER_NETWORK_NAME}' is overlay and swarm scope type." + if [ $rm_container ] ;then + docker rm -f "${container_id}" || true + fi +} + function get_absolute_path() { echo "$( cd "$(dirname -- "$1")" >/dev/null @@ -598,18 +680,20 @@ DEPLOY_MODE: start Multi-machine mode Common Options: - -h Show this help text. - -c The host path of kuscia configure file. It will be mounted into the domain container. - -d The data directory used to store domain data. It will be mounted into the domain container. - You can set Env 'DOMAIN_DATA_DIR' instead. Default is '{{ROOT}}/{{DOMAIN_CONTAINER_NAME}}/data'. - -l The data directory used to store domain logs. It will be mounted into the domain container. - You can set Env 'DOMAIN_LOG_DIR' instead. Default is '{{ROOT}}/{{DOMAIN_CONTAINER_NAME}}/logs'. - -p The port exposed by domain. You can set Env 'DOMAIN_HOST_PORT' instead. - -q (Only used in autonomy or lite mode)The port exposed for internal use by domain. You can set Env 'DOMAIN_HOST_INTERNAL_PORT' instead. - -r The install directory. You can set Env 'ROOT' instead. Default is $(pwd). - -t (Only used in lite mode) The deploy token. You can set Env 'DOMAIN_TOKEN' instead. - -k (Only used in autonomy or master mode)The http port exposed by KusciaAPI , default is 13082. You can set Env 'KUSCIAAPI_HTTP_PORT' instead. - -g (Only used in autonomy or master mode)The grpc port exposed by KusciaAPI, default is 13083. You can set Env 'KUSCIAAPI_GRPC_PORT' instead." + -h,--help Show this help text. + -a Whether to import secretflow image like '-a secretflow'. 'none' indicates that no image is imported. + -c The host path of kuscia configure file. It will be mounted into the domain container. + -d The data directory used to store domain data. It will be mounted into the domain container. + You can set Env 'DOMAIN_DATA_DIR' instead. Default is '{{ROOT}}/{{DOMAIN_CONTAINER_NAME}}/data'. + -l The data directory used to store domain logs. It will be mounted into the domain container. + You can set Env 'DOMAIN_LOG_DIR' instead. Default is '{{ROOT}}/{{DOMAIN_CONTAINER_NAME}}/logs'. + -m,--memory-limit Set an appropriate memory limit. For example, '-m 4GiB or --memory-limit=4GiB','-1' means no limit.Default master mode 2GiB,lite mode 4GiB,autonomy mode 6GiB. + -p The port exposed by domain. You can set Env 'DOMAIN_HOST_PORT' instead. + -q (Only used in autonomy or lite mode)The port exposed for internal use by domain. You can set Env 'DOMAIN_HOST_INTERNAL_PORT' instead. + -t Gateway routing forwarding capability. You can set Env 'transit=true' instead. + -k (Only used in autonomy or master mode)The http port exposed by KusciaAPI , default is 13082. You can set Env 'KUSCIAAPI_HTTP_PORT' instead. + -g (Only used in autonomy or master mode)The grpc port exposed by KusciaAPI, default is 13083. You can set Env 'KUSCIAAPI_GRPC_PORT' instead. + --cluster (Only used in Multi-machine mode) This parameter can be used when deploying a single node. In a multi-copy scenario, the cluster network will be used. For example: '--cluster'" } mode= @@ -620,9 +704,33 @@ center | p2p | cxc | cxp | start) ;; esac +NEW_ARGS=() + +for arg in "$@"; do + case "$arg" in + --help) + usage + exit 0 + ;; + --expose-ports) + EXPOSE_PORTS=true + ;; + --memory-limit=*) + MEMORY_LIMIT="${arg#*=}" + ;; + --cluster) + CLUSTERED=true + ;; + *) + NEW_ARGS+=("$arg") + ;; + esac +done + interconn_protocol= transit=false -while getopts 'P:c:d:l:p:q:t:r:k:g:h' option; do +set -- "${NEW_ARGS[@]}" +while getopts 'P:a:c:d:l:m:p:q:tk:g:h' option; do case "$option" in P) interconn_protocol=$OPTARG @@ -631,8 +739,11 @@ while getopts 'P:c:d:l:p:q:t:r:k:g:h' option; do usage exit ;; + a) + IMPORT_SF_IMAGE=$OPTARG + ;; c) - KUSCIA_CONFIG_FILE=$(get_absolute_path $OPTARG) + KUSCIA_CONFIG_FILE=$(get_absolute_path "${OPTARG}") ;; d) DOMAIN_DATA_DIR=$OPTARG @@ -640,6 +751,9 @@ while getopts 'P:c:d:l:p:q:t:r:k:g:h' option; do l) DOMAIN_LOG_DIR=$OPTARG ;; + m) + MEMORY_LIMIT=$OPTARG + ;; p) DOMAIN_HOST_PORT=$OPTARG ;; @@ -649,9 +763,6 @@ while getopts 'P:c:d:l:p:q:t:r:k:g:h' option; do t) transit=true ;; - r) - ROOT_DIR=$OPTARG - ;; k) KUSCIAAPI_HTTP_PORT=$OPTARG ;; @@ -675,8 +786,6 @@ done shift $((OPTIND - 1)) [ "$interconn_protocol" == "bfia" ] || interconn_protocol="kuscia" -[ "$mode" == "" ] && mode=$1 -[ "$mode" == "" -o "$mode" == "centralized" ] && mode="center" if [ "$mode" == "center" -a "$interconn_protocol" != "kuscia" ]; then printf "In current quickstart script, center mode just support 'kuscia'\n" >&2 exit 1 diff --git a/scripts/deploy/start_standalone.sh b/scripts/deploy/start_standalone.sh index f92cd769..55061ae4 100755 --- a/scripts/deploy/start_standalone.sh +++ b/scripts/deploy/start_standalone.sh @@ -42,7 +42,7 @@ MASTER_MEMORY_LIMIT=2G LITE_MEMORY_LIMIT=4G AUTONOMY_MEMORY_LIMIT=6G SF_IMAGE_NAME="secretflow-registry.cn-hangzhou.cr.aliyuncs.com/secretflow/secretflow-lite-anolis8" -SF_IMAGE_TAG="1.3.0b0" +SF_IMAGE_TAG="1.5.0b0" SF_IMAGE_REGISTRY="" NETWORK_NAME="kuscia-exchange" VOLUME_PATH="${ROOT}" @@ -234,7 +234,7 @@ function copy_volume_file_to_container() { function create_secretflow_app_image() { local ctr=$1 - docker exec -it ${ctr} scripts/deploy/create_sf_app_image.sh "${SF_IMAGE_NAME}" "${SF_IMAGE_TAG}" + docker exec -it ${ctr} scripts/deploy/create_sf_app_image.sh "${SF_IMAGE_NAME}:${SF_IMAGE_TAG}" log "create secretflow app image done" } @@ -510,6 +510,9 @@ function run_hybrid_centerX2() { docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh $bob_master_domain $bob_domain http://${bob_ctr}:1080 -i false -p ${p2p_protocol} docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh $alice_master_domain $bob_domain http://${bob_ctr}:1080 -i false -p ${p2p_protocol} -x $bob_master_domain docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh $alice_domain $bob_domain http://${bob_ctr}:1080 -i false -p ${p2p_protocol} -x $alice_master_domain + # cdr declaration for handshake + docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh $alice_domain $bob_domain http://${bob_ctr}:1080 -i false -p ${p2p_protocol} -x $alice_master_domain + docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh $alice_master_domain $bob_domain http://${bob_ctr}:1080 -i false -p ${p2p_protocol} -x $bob_master_domain # bob to alice = # alice-master to alice + # bob-master to alice transit by alice-master + @@ -517,6 +520,9 @@ function run_hybrid_centerX2() { docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh $alice_master_domain $alice_domain http://${alice_ctr}:1080 -i false -p ${p2p_protocol} docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh $bob_master_domain $alice_domain http://${alice_ctr}:1080 -i false -p ${p2p_protocol} -x $alice_master_domain docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh $bob_domain $alice_domain http://${alice_ctr}:1080 -i false -p ${p2p_protocol} -x $bob_master_domain + # cdr declaration for handshake + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh $bob_domain $alice_domain http://${alice_ctr}:1080 -i false -p ${p2p_protocol} -x $bob_master_domain + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh $bob_master_domain $alice_domain http://${alice_ctr}:1080 -i false -p ${p2p_protocol} -x $alice_master_domain fi check_sf_image $alice_domain ${alice_ctr} ${alice_ctr} @@ -572,6 +578,9 @@ function run_hybrid_centerXp2p() { # bob to alice transit by alice-master docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh $alice_master_domain $alice_domain http://${alice_ctr}:1080 -i false -p ${p2p_protocol} docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh $bob_domain $alice_domain http://${alice_ctr}:1080 -i false -p ${p2p_protocol} -x $alice_master_domain + # cdr declaration for handshake + docker exec -it ${bob_master_ctr} scripts/deploy/join_to_host.sh $alice_domain $bob_domain https://${bob_ctr}:1080 -i false -p ${p2p_protocol} + docker exec -it ${alice_master_ctr} scripts/deploy/join_to_host.sh $bob_domain $alice_domain http://${alice_ctr}:1080 -i false -p ${p2p_protocol} fi check_sf_image $alice_domain ${alice_ctr} ${alice_ctr} @@ -735,7 +744,7 @@ esac interconn_protocol= transit=false -while getopts 'p:t:h' option; do +while getopts 'p:th' option; do case "$option" in p) interconn_protocol=$OPTARG diff --git a/scripts/templates/cluster_domain_route.token.transit.yaml b/scripts/templates/cluster_domain_route.token.transit.yaml index 3fb7c28e..7d7213ae 100644 --- a/scripts/templates/cluster_domain_route.token.transit.yaml +++ b/scripts/templates/cluster_domain_route.token.transit.yaml @@ -15,10 +15,14 @@ spec: isTLS: {{.ISTLS}} port: {{.PORT}} transit: + transitMethod: THIRD-DOMAIN domain: domainID: {{.TRANSIT_DOMAIN}} + bodyEncryption: + algorithm: AES authenticationType: Token tokenConfig: tokenGenMethod: RSA-GEN + rollingUpdatePeriod: 86400 requestHeadersToAdd: Authorization: Bearer {{.TOKEN}} \ No newline at end of file diff --git a/scripts/templates/cluster_domain_route.token.yaml b/scripts/templates/cluster_domain_route.token.yaml index 541e6b95..b4432165 100644 --- a/scripts/templates/cluster_domain_route.token.yaml +++ b/scripts/templates/cluster_domain_route.token.yaml @@ -17,6 +17,6 @@ spec: authenticationType: Token tokenConfig: tokenGenMethod: RSA-GEN - rollingUpdatePeriod: 600 + rollingUpdatePeriod: 86400 requestHeadersToAdd: Authorization: Bearer {{.TOKEN}} \ No newline at end of file diff --git a/scripts/templates/scql/broker_alice.yaml b/scripts/templates/scql/broker_alice.yaml new file mode 100644 index 00000000..167682f1 --- /dev/null +++ b/scripts/templates/scql/broker_alice.yaml @@ -0,0 +1,14 @@ +apiVersion: kuscia.secretflow/v1alpha1 +kind: KusciaDeployment +metadata: + labels: + kuscia.secretflow/app-type: scql + name: scql + namespace: cross-domain +spec: + initiator: alice + inputConfig: "" + parties: + - appImageRef: scql + domainID: alice + role: broker \ No newline at end of file diff --git a/scripts/templates/scql/broker_bob.yaml b/scripts/templates/scql/broker_bob.yaml new file mode 100644 index 00000000..577b80dc --- /dev/null +++ b/scripts/templates/scql/broker_bob.yaml @@ -0,0 +1,14 @@ +apiVersion: kuscia.secretflow/v1alpha1 +kind: KusciaDeployment +metadata: + labels: + kuscia.secretflow/app-type: scql + name: scql + namespace: cross-domain +spec: + initiator: bob + inputConfig: "" + parties: + - appImageRef: scql + domainID: bob + role: broker \ No newline at end of file diff --git a/scripts/templates/scql/broker_master.yaml b/scripts/templates/scql/broker_master.yaml new file mode 100644 index 00000000..85b7f60a --- /dev/null +++ b/scripts/templates/scql/broker_master.yaml @@ -0,0 +1,17 @@ +apiVersion: kuscia.secretflow/v1alpha1 +kind: KusciaDeployment +metadata: + labels: + kuscia.secretflow/app-type: scql + name: scql + namespace: cross-domain +spec: + initiator: alice + inputConfig: "" + parties: + - appImageRef: scql + domainID: alice + role: broker + - appImageRef: scql + domainID: bob + role: broker \ No newline at end of file diff --git a/scripts/templates/task_input_config.2pc_balanced_psi.json b/scripts/templates/task_input_config.2pc_balanced_psi.json index 94409f36..dda6c4b5 100644 --- a/scripts/templates/task_input_config.2pc_balanced_psi.json +++ b/scripts/templates/task_input_config.2pc_balanced_psi.json @@ -1,55 +1,52 @@ - { - "sf_datasource_config": { - "bob": { - "id": "default-data-source" - }, - "alice": { - "id": "default-data-source" - } - }, - "sf_cluster_desc": { - "parties": ["bob", "alice"], - "devices": [{ - "name": "spu", - "type": "spu", - "parties": ["bob", "alice"], - "config": "{\"runtime_config\":{\"protocol\":\"SEMI2K\",\"field\":\"FM128\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}" - }, { - "name": "heu", - "type": "heu", - "parties": ["bob", "alice"], - "config": "{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}" - }], - "ray_fed_config": { - "cross_silo_comm_backend": "brpc_link" - } - }, - "sf_node_eval_param": { - "domain": "data_prep", - "name": "psi", - "version": "0.0.2", - "attr_paths": ["input/receiver_input/key", "input/sender_input/key", "protocol", "disable_alignment", "skip_duplicates_check", "check_hash_digest", "ecdh_curve"], - "attrs": [{ - "is_na": false, - "ss": ["id1"] - }, { - "is_na": false, - "ss": ["id2"] - }, { - "is_na": false, - "s": "PROTOCOL_ECDH" - }, { - "is_na": true - }, { - "is_na": true - }, { - "is_na": true - }, { - "is_na": false, - "s": "CURVE_FOURQ" - }] - }, - "sf_output_uris": ["hgpl-qiusyjvx-node-3-output-0"], - "sf_input_ids": ["alice-table", "bob-table"], - "sf_output_ids": ["hgpl-qiusyjvx-node-3-output-0"] - } \ No newline at end of file +{ + "sf_datasource_config": { + "alice": { + "id": "default-data-source" + }, + "bob": { + "id": "default-data-source" + } + }, + "sf_cluster_desc": { + "parties": ["alice", "bob"], + "devices": [{ + "name": "spu", + "type": "spu", + "parties": ["alice", "bob"], + "config": "{\"runtime_config\":{\"protocol\":\"REF2K\",\"field\":\"FM64\"},\"link_desc\":{\"connect_retry_times\":60,\"connect_retry_interval_ms\":1000,\"brpc_channel_protocol\":\"http\",\"brpc_channel_connection_type\":\"pooled\",\"recv_timeout_ms\":1200000,\"http_timeout_ms\":1200000}}" + }, { + "name": "heu", + "type": "heu", + "parties": ["alice", "bob"], + "config": "{\"mode\": \"PHEU\", \"schema\": \"paillier\", \"key_size\": 2048}" + }], + "ray_fed_config": { + "cross_silo_comm_backend": "brpc_link" + } + }, + "sf_node_eval_param": { + "domain": "data_prep", + "name": "psi", + "version": "0.0.4", + "attr_paths": ["input/receiver_input/key", "input/sender_input/key", "protocol", "precheck_input", "bucket_size", "curve_type", "left_side"], + "attrs": [{ + "ss": ["id1"] + }, { + "ss": ["id2"] + }, { + "s": "PROTOCOL_ECDH" + }, { + "b": true + }, { + "i64": "1048576" + }, { + "s": "CURVE_FOURQ" + }, { + "is_na": false, + "ss": ["alice"] + }] + }, + "sf_input_ids": ["alice-table","bob-table"], + "sf_output_ids": ["psi-output"], + "sf_output_uris": ["psi-output.csv"] +} diff --git a/scripts/templates/task_input_config.2pc_balanced_psi_dp.json b/scripts/templates/task_input_config.2pc_balanced_psi_dp.json index 16e172d2..4b4d98d3 100644 --- a/scripts/templates/task_input_config.2pc_balanced_psi_dp.json +++ b/scripts/templates/task_input_config.2pc_balanced_psi_dp.json @@ -27,8 +27,8 @@ "sf_node_eval_param": { "domain": "data_prep", "name": "psi", - "version": "0.0.2", - "attr_paths": ["input/receiver_input/key", "input/sender_input/key", "protocol", "precheck_input", "bucket_size", "curve_type"], + "version": "0.0.4", + "attr_paths": ["input/receiver_input/key", "input/sender_input/key", "protocol", "precheck_input", "bucket_size", "curve_type","left_side"], "attrs": [{ "ss": ["id1"] }, { @@ -41,6 +41,9 @@ "i64": "1048576" }, { "s": "CURVE_FOURQ" + }, { + "is_na": false, + "ss": ["alice"] }] }, "sf_input_ids": ["alice-dp-table","bob-dp-table"], diff --git a/scripts/test/suite/center/base.sh b/scripts/test/suite/center/base.sh index 067042ec..899db19a 100755 --- a/scripts/test/suite/center/base.sh +++ b/scripts/test/suite/center/base.sh @@ -48,14 +48,16 @@ function test_centralized_example_kuscia_job() { } function test_centralized_kuscia_api_http_available() { - local http_status_code=$(get_kuscia_api_healthz_http_status_code "127.0.0.1:18082" "${TEST_SUITE_RUN_KUSCIA_DIR}"/master) + local http_port=$(docker inspect --format='{{(index (index .NetworkSettings.Ports "8082/tcp") 0).HostPort}}' ${MASTER_CONTAINER}) + local http_status_code=$(get_kuscia_api_healthz_http_status_code "127.0.0.1:${http_port}" "${TEST_SUITE_RUN_KUSCIA_DIR}"/master) assertEquals "KusciaApi healthZ http code" "200" "${http_status_code}" unset http_status_code } function test_centralized_kuscia_api_grpc_available() { - local status_message=$(get_kuscia_api_healthz_grpc_status_message "${TEST_BIN_DIR}"/grpcurl "127.0.0.1:18083" "${TEST_SUITE_RUN_KUSCIA_DIR}"/master) + local grpc_port=$(docker inspect --format='{{(index (index .NetworkSettings.Ports "8083/tcp") 0).HostPort}}' ${MASTER_CONTAINER}) + local status_message=$(get_kuscia_api_healthz_grpc_status_message "${TEST_BIN_DIR}"/grpcurl "127.0.0.1:${grpc_port}" "${TEST_SUITE_RUN_KUSCIA_DIR}"/master) assertEquals "KusciaApi healthZ grpc status message" "success" "$(echo "${status_message}" | "${TEST_BIN_DIR}"/jq .status.message | sed -e 's/"//g')" unset status_message diff --git a/scripts/test/suite/core/functions.sh b/scripts/test/suite/core/functions.sh index eb990b64..c3687693 100644 --- a/scripts/test/suite/core/functions.sh +++ b/scripts/test/suite/core/functions.sh @@ -1,4 +1,4 @@ -# +#!/bin/bash # Copyright 2023 Ant Group Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -154,7 +154,7 @@ function start_center_mode() { mkdir -p "${test_suite_run_kuscia_dir}" # Run as Center - ./kuscia.sh center + ./kuscia.sh center --expose-ports # Check centralized container Up local master_container_state=$(get_container_state "${MASTER_CONTAINER}") @@ -191,7 +191,7 @@ function start_p2p_mode() { mkdir -p "${test_suite_run_kuscia_dir}" # Run as P2P - ./kuscia.sh p2p + ./kuscia.sh p2p --expose-ports # Check p2p container Up local autonomy_alice_container_state=$(get_container_state "${AUTONOMY_ALICE_CONTAINER}") diff --git a/scripts/test/suite/p2p/base.sh b/scripts/test/suite/p2p/base.sh index cc984680..439abbd0 100755 --- a/scripts/test/suite/p2p/base.sh +++ b/scripts/test/suite/p2p/base.sh @@ -47,13 +47,15 @@ function test_p2p_kuscia_job() { } function test_p2p_kuscia_api_http_available() { - local alice_http_status_code=$(get_kuscia_api_healthz_http_status_code "127.0.0.1:11082" "${TEST_SUITE_P2P_TEST_RUN_KUSCIA_DIR}"/alice) + local alice_http_port=$(docker inspect --format='{{(index (index .NetworkSettings.Ports "8082/tcp") 0).HostPort}}' ${AUTONOMY_ALICE_CONTAINER}) + local alice_http_status_code=$(get_kuscia_api_healthz_http_status_code "127.0.0.1:${alice_http_port}" "${TEST_SUITE_P2P_TEST_RUN_KUSCIA_DIR}"/alice) assertEquals "KusciaApi healthZ http code" "200" "${alice_http_status_code}" unset alice_http_status_code local autonomy_bob_container_ip=$(get_container_ip "${AUTONOMY_BOB_CONTAINER}") - local bob_http_status_code=$(get_kuscia_api_healthz_http_status_code 127.0.0.1:12082 "${TEST_SUITE_P2P_TEST_RUN_KUSCIA_DIR}"/bob) + local bob_http_port=$(docker inspect --format='{{(index (index .NetworkSettings.Ports "8082/tcp") 0).HostPort}}' ${AUTONOMY_BOB_CONTAINER}) + local bob_http_status_code=$(get_kuscia_api_healthz_http_status_code "127.0.0.1:${bob_http_port}" "${TEST_SUITE_P2P_TEST_RUN_KUSCIA_DIR}"/bob) assertEquals "KusciaApi healthZ http code" "200" "${bob_http_status_code}" unset bob_http_status_code @@ -62,13 +64,15 @@ function test_p2p_kuscia_api_http_available() { } function test_p2p_kuscia_api_grpc_available() { - local alice_status_message=$(get_kuscia_api_healthz_grpc_status_message "${TEST_BIN_DIR}"/grpcurl "127.0.0.1:11083" "${TEST_SUITE_P2P_TEST_RUN_KUSCIA_DIR}"/alice) + local alice_grpc_port=$(docker inspect --format='{{(index (index .NetworkSettings.Ports "8083/tcp") 0).HostPort}}' ${AUTONOMY_ALICE_CONTAINER}) + local alice_status_message=$(get_kuscia_api_healthz_grpc_status_message "${TEST_BIN_DIR}"/grpcurl "127.0.0.1:${alice_grpc_port}" "${TEST_SUITE_P2P_TEST_RUN_KUSCIA_DIR}"/alice) assertEquals "KusciaApi healthZ grpc status message" "success" "$(echo "${alice_status_message}" | "${TEST_BIN_DIR}"/jq .status.message | sed -e 's/"//g')" unset alice_status_message local autonomy_bob_container_ip=$(get_container_ip "${AUTONOMY_BOB_CONTAINER}") - local bob_status_message=$(get_kuscia_api_healthz_grpc_status_message "${TEST_BIN_DIR}"/grpcurl "127.0.0.1:12083" "${TEST_SUITE_P2P_TEST_RUN_KUSCIA_DIR}"/bob) + local bob_grpc_port=$(docker inspect --format='{{(index (index .NetworkSettings.Ports "8083/tcp") 0).HostPort}}' ${AUTONOMY_BOB_CONTAINER}) + local bob_status_message=$(get_kuscia_api_healthz_grpc_status_message "${TEST_BIN_DIR}"/grpcurl "127.0.0.1:${bob_grpc_port}" "${TEST_SUITE_P2P_TEST_RUN_KUSCIA_DIR}"/bob) assertEquals "KusciaApi healthZ grpc status message" "success" "$(echo "${bob_status_message}" | "${TEST_BIN_DIR}"/jq .status.message | sed -e 's/"//g')" unset bob_status_message diff --git a/scripts/user/create_example_job.sh b/scripts/user/create_example_job.sh index 8f257a86..264e9c12 100755 --- a/scripts/user/create_example_job.sh +++ b/scripts/user/create_example_job.sh @@ -23,9 +23,8 @@ SUB_HOST_REGEXP="^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])? USAGE="$(basename "$0") [JOB_EXAMPLE] [JOB_NAME] JOB_EXAMPLE: - PSI run psi with default-data-source (default) - PSI_WITH_DP run psi with default-dp-data-source - NSJAIL_PSI run psi via nsjail + PSI run psi with default-data-source (default). + NSJAIL_PSI run psi via nsjail. Set env 'export ALLOW_PRIVILEGED=true' before deployment. " JOB_EXAMPLE=$1 JOB_NAME=$2 diff --git a/testdata/scql-alice.csv b/testdata/scql-alice.csv new file mode 100644 index 00000000..cc4d7676 --- /dev/null +++ b/testdata/scql-alice.csv @@ -0,0 +1,10 @@ +ID,credit_rank,income,age +id0001,6,100000,20 +id0002,5,90000,19 +id0003,6,89700,32 +id0005,6,607000,30 +id0006,5,30070,25 +id0007,6,12070,28 +id0008,6,200800,50 +id0009,6,607000,30 +id0010,5,30070,25 \ No newline at end of file diff --git a/testdata/scql-bob.csv b/testdata/scql-bob.csv new file mode 100644 index 00000000..8a7ea639 --- /dev/null +++ b/testdata/scql-bob.csv @@ -0,0 +1,11 @@ +ID,order_amount,is_active +id0001,3598.0,1 +id0002,100.0,0 +id0003,2549.0,1 +id0004,21698.5,1 +id0005,4985.5,1 +id0006,3598.0,1 +id0007,322,0 +id0008,9816.2,1 +id0009,3598.0,1 +id0010,322,0 \ No newline at end of file