From a72ab934ab333f2016523a8da98e7d90050a0e18 Mon Sep 17 00:00:00 2001 From: Yuki Iwai Date: Mon, 31 Jul 2023 04:04:50 +0900 Subject: [PATCH] Refactor katib-config using kustomize vars Signed-off-by: Yuki Iwai --- docs/developer-guide.md | 2 +- docs/new-algorithm-service.md | 22 +++---- .../components/controller/katib-config.yaml | 59 ------------------- .../components/controller/kustomization.yaml | 6 -- .../components/katib-config/katib-config.yaml | 59 +++++++++++++++++++ .../katib-config/kustomization.yaml | 21 +++++++ .../katib-config/transformer-configs.yaml | 4 ++ .../katib-cert-manager/kustomization.yaml | 2 + .../katib-external-db/kustomization.yaml | 2 + .../katib-leader-election/katib-config.yaml | 32 +++++----- .../katib-openshift/kustomization.yaml | 2 + .../kustomization.yaml | 2 + .../katib-standalone/kustomization.yaml | 2 + scripts/v1beta1/update-images.sh | 6 +- test/e2e/v1beta1/scripts/aws/setup-katib.sh | 2 +- .../v1beta1/scripts/gh-actions/build-load.sh | 8 +-- .../v1beta1/scripts/gh-actions/setup-katib.sh | 2 +- 17 files changed, 129 insertions(+), 104 deletions(-) delete mode 100644 manifests/v1beta1/components/controller/katib-config.yaml create mode 100644 manifests/v1beta1/components/katib-config/katib-config.yaml create mode 100644 manifests/v1beta1/components/katib-config/kustomization.yaml create mode 100644 manifests/v1beta1/components/katib-config/transformer-configs.yaml diff --git a/docs/developer-guide.md b/docs/developer-guide.md index e222e9e541c..0e255e145a6 100644 --- a/docs/developer-guide.md +++ b/docs/developer-guide.md @@ -29,7 +29,7 @@ make build REGISTRY= TAG= To use your custom images for the Katib components, modify [Kustomization file](https://github.com/kubeflow/katib/blob/master/manifests/v1beta1/installs/katib-standalone/kustomization.yaml) -and [Katib Config](https://github.com/kubeflow/katib/blob/master/manifests/v1beta1/components/controller/katib-config.yaml) +and [Katib Config](https://github.com/kubeflow/katib/blob/master/manifests/v1beta1/components/katib-config/katib-config.yaml) You can deploy Katib v1beta1 manifests into a Kubernetes cluster as follows: diff --git a/docs/new-algorithm-service.md b/docs/new-algorithm-service.md index a9c35a68f2e..6789657e88c 100644 --- a/docs/new-algorithm-service.md +++ b/docs/new-algorithm-service.md @@ -90,21 +90,17 @@ Then build the Docker image. ### Use the algorithm in Katib. -Update the [Katib config](../manifests/v1beta1/components/controller/katib-config.yaml) and [operator](../operators/katib-controller/src/suggestion.json) with the new algorithm entity: +Update the [Katib config](../manifests/v1beta1/components/katib-config/katib-config.yaml) and [operator](../operators/katib-controller/src/suggestion.json) with the new algorithm entity: ```diff - suggestion: |- - { - "tpe": { - "image": "docker.io/kubeflowkatib/suggestion-hyperopt" - }, - "random": { - "image": "docker.io/kubeflowkatib/suggestion-hyperopt" - }, -+ "": { -+ "image": "image built in the previous stage" -+ } - } + runtime: + suggestions: + - algorithmName: random + image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION) + - algorithmName: tpe + image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION) ++ - algorithmName: ++ image: "image built in the previous stage":$(KATIB_VERSION) ``` Learn more about Katib config in the diff --git a/manifests/v1beta1/components/controller/katib-config.yaml b/manifests/v1beta1/components/controller/katib-config.yaml deleted file mode 100644 index 1e3af3fb59b..00000000000 --- a/manifests/v1beta1/components/controller/katib-config.yaml +++ /dev/null @@ -1,59 +0,0 @@ ---- -apiVersion: config.kubeflow.org/v1beta1 -kind: KatibConfig -init: - controller: - webhookPort: 8443 - trialResources: - - Job.v1.batch - - TFJob.v1.kubeflow.org - - PyTorchJob.v1.kubeflow.org - - MPIJob.v1.kubeflow.org - - XGBoostJob.v1.kubeflow.org - - MXJob.v1.kubeflow.org -runtime: - metricsCollectors: - - kind: StdOut - image: docker.io/kubeflowkatib/file-metrics-collector:latest - - kind: File - image: docker.io/kubeflowkatib/file-metrics-collector:latest - - kind: TensorFlowEvent - image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest - resources: - limits: - memory: 1Gi - suggestions: - - algorithmName: random - image: docker.io/kubeflowkatib/suggestion-hyperopt:latest - - algorithmName: tpe - image: docker.io/kubeflowkatib/suggestion-hyperopt:latest - - algorithmName: grid - image: docker.io/kubeflowkatib/suggestion-optuna:latest - - algorithmName: hyperband - image: docker.io/kubeflowkatib/suggestion-hyperband:latest - - algorithmName: bayesianoptimization - image: docker.io/kubeflowkatib/suggestion-skopt:latest - - algorithmName: cmaes - image: docker.io/kubeflowkatib/suggestion-goptuna:latest - - algorithmName: sobol - image: docker.io/kubeflowkatib/suggestion-goptuna:latest - - algorithmName: multivariate-tpe - image: docker.io/kubeflowkatib/suggestion-optuna:latest - - algorithmName: enas - image: docker.io/kubeflowkatib/suggestion-enas:latest - resources: - limits: - memory: 200Mi - - algorithmName: darts - image: docker.io/kubeflowkatib/suggestion-darts:latest - - algorithmName: pbt - image: docker.io/kubeflowkatib/suggestion-pbt:latest - persistentVolumeClaimSpec: - accessModes: - - ReadWriteMany - resources: - requests: - storage: 5Gi - earlyStoppings: - - algorithmName: medianstop - image: docker.io/kubeflowkatib/earlystopping-medianstop:latest diff --git a/manifests/v1beta1/components/controller/kustomization.yaml b/manifests/v1beta1/components/controller/kustomization.yaml index bda9c9acb06..18979ddba4c 100644 --- a/manifests/v1beta1/components/controller/kustomization.yaml +++ b/manifests/v1beta1/components/controller/kustomization.yaml @@ -7,9 +7,3 @@ resources: - rbac.yaml - service.yaml - trial-templates.yaml -configMapGenerator: - - name: katib-config - files: - - katib-config.yaml - options: - disableNameSuffixHash: true diff --git a/manifests/v1beta1/components/katib-config/katib-config.yaml b/manifests/v1beta1/components/katib-config/katib-config.yaml new file mode 100644 index 00000000000..fcb38e92ff2 --- /dev/null +++ b/manifests/v1beta1/components/katib-config/katib-config.yaml @@ -0,0 +1,59 @@ +--- +apiVersion: config.kubeflow.org/v1beta1 +kind: KatibConfig +init: + controller: + webhookPort: 8443 + trialResources: + - Job.v1.batch + - TFJob.v1.kubeflow.org + - PyTorchJob.v1.kubeflow.org + - MPIJob.v1.kubeflow.org + - XGBoostJob.v1.kubeflow.org + - MXJob.v1.kubeflow.org +runtime: + metricsCollectors: + - kind: StdOut + image: docker.io/kubeflo`wkatib/file-metrics-collector:$(KATIB_VERSION) + - kind: File + image: docker.io/kubeflowkatib/file-metrics-collector:$(KATIB_VERSION) + - kind: TensorFlowEvent + image: docker.io/kubeflowkatib/tfevent-metrics-collector:$(KATIB_VERSION) + resources: + limits: + memory: 1Gi + suggestions: + - algorithmName: random + image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION) + - algorithmName: tpe + image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION) + - algorithmName: grid + image: docker.io/kubeflowkatib/suggestion-optuna:$(KATIB_VERSION) + - algorithmName: hyperband + image: docker.io/kubeflowkatib/suggestion-hyperband:$(KATIB_VERSION) + - algorithmName: bayesianoptimization + image: docker.io/kubeflowkatib/suggestion-skopt:$(KATIB_VERSION) + - algorithmName: cmaes + image: docker.io/kubeflowkatib/suggestion-goptuna:$(KATIB_VERSION) + - algorithmName: sobol + image: docker.io/kubeflowkatib/suggestion-goptuna:$(KATIB_VERSION) + - algorithmName: multivariate-tpe + image: docker.io/kubeflowkatib/suggestion-optuna:$(KATIB_VERSION) + - algorithmName: enas + image: docker.io/kubeflowkatib/suggestion-enas:$(KATIB_VERSION) + resources: + limits: + memory: 200Mi + - algorithmName: darts + image: docker.io/kubeflowkatib/suggestion-darts:$(KATIB_VERSION) + - algorithmName: pbt + image: docker.io/kubeflowkatib/suggestion-pbt:$(KATIB_VERSION) + persistentVolumeClaimSpec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 5Gi + earlyStoppings: + - algorithmName: medianstop + image: docker.io/kubeflowkatib/earlystopping-medianstop:$(KATIB_VERSION) diff --git a/manifests/v1beta1/components/katib-config/kustomization.yaml b/manifests/v1beta1/components/katib-config/kustomization.yaml new file mode 100644 index 00000000000..d49fcc753a1 --- /dev/null +++ b/manifests/v1beta1/components/katib-config/kustomization.yaml @@ -0,0 +1,21 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +configMapGenerator: + - name: katib-config + files: + - katib-config.yaml + options: + disableNameSuffixHash: true + labels: + katib.kubeflow.org/version: latest +configurations: + - transformer-configs.yaml +vars: + - name: KATIB_VERSION + fieldref: + fieldpath: metadata.labels.katib\.kubeflow\.org/version + objref: + apiVersion: v1 + kind: ConfigMap + name: katib-config diff --git a/manifests/v1beta1/components/katib-config/transformer-configs.yaml b/manifests/v1beta1/components/katib-config/transformer-configs.yaml new file mode 100644 index 00000000000..a32a457af2f --- /dev/null +++ b/manifests/v1beta1/components/katib-config/transformer-configs.yaml @@ -0,0 +1,4 @@ +--- +varReference: + - kind: ConfigMap + path: data diff --git a/manifests/v1beta1/installs/katib-cert-manager/kustomization.yaml b/manifests/v1beta1/installs/katib-cert-manager/kustomization.yaml index cd6b57f51f4..d5358eddaa3 100644 --- a/manifests/v1beta1/installs/katib-cert-manager/kustomization.yaml +++ b/manifests/v1beta1/installs/katib-cert-manager/kustomization.yaml @@ -19,6 +19,8 @@ resources: - ../../components/webhook/ # Cert-manager certificate for webhooks - certificate.yaml + # Katib Config. + - ../../components/katib-config/ images: - name: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller diff --git a/manifests/v1beta1/installs/katib-external-db/kustomization.yaml b/manifests/v1beta1/installs/katib-external-db/kustomization.yaml index 0f6fe165541..9d8ff99fea7 100644 --- a/manifests/v1beta1/installs/katib-external-db/kustomization.yaml +++ b/manifests/v1beta1/installs/katib-external-db/kustomization.yaml @@ -17,6 +17,8 @@ resources: - ../../components/cert-generator/ # Katib webhooks. - ../../components/webhook/ + # Katib Config. + - ../../components/katib-config/ images: - name: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller diff --git a/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml b/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml index 579bb8922d1..f1a8503259b 100644 --- a/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml +++ b/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml @@ -1,4 +1,4 @@ -# This KatibConfig is mostly same as https://github.com/kubeflow/katib/manifests/v1beta1/components/controller/katib-config.yaml. +# This KatibConfig is mostly same as https://github.com/kubeflow/katib/manifests/v1beta1/components/katib-config/katib-config.yaml. # Only `.init.controller.enableLeaderElection` field is different. --- apiVersion: config.kubeflow.org/v1beta1 @@ -17,40 +17,40 @@ init: runtime: metricsCollectors: - kind: StdOut - image: docker.io/kubeflowkatib/file-metrics-collector:latest + image: docker.io/kubeflowkatib/file-metrics-collector:$(KATIB_VERSION) - kind: File - image: docker.io/kubeflowkatib/file-metrics-collector:latest + image: docker.io/kubeflowkatib/file-metrics-collector:$(KATIB_VERSION) - kind: TensorFlowEvent - image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest + image: docker.io/kubeflowkatib/tfevent-metrics-collector:$(KATIB_VERSION) resources: limits: memory: 1Gi suggestions: - algorithmName: random - image: docker.io/kubeflowkatib/suggestion-hyperopt:latest + image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION) - algorithmName: tpe - image: docker.io/kubeflowkatib/suggestion-hyperopt:latest + image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION) - algorithmName: grid - image: docker.io/kubeflowkatib/suggestion-optuna:latest + image: docker.io/kubeflowkatib/suggestion-optuna:$(KATIB_VERSION) - algorithmName: hyperband - image: docker.io/kubeflowkatib/suggestion-hyperband:latest + image: docker.io/kubeflowkatib/suggestion-hyperband:$(KATIB_VERSION) - algorithmName: bayesianoptimization - image: docker.io/kubeflowkatib/suggestion-skopt:latest + image: docker.io/kubeflowkatib/suggestion-skopt:$(KATIB_VERSION) - algorithmName: cmaes - image: docker.io/kubeflowkatib/suggestion-goptuna:latest + image: docker.io/kubeflowkatib/suggestion-goptuna:$(KATIB_VERSION) - algorithmName: sobol - image: docker.io/kubeflowkatib/suggestion-goptuna:latest + image: docker.io/kubeflowkatib/suggestion-goptuna:$(KATIB_VERSION) - algorithmName: multivariate-tpe - image: docker.io/kubeflowkatib/suggestion-optuna:latest + image: docker.io/kubeflowkatib/suggestion-optuna:$(KATIB_VERSION) - algorithmName: enas - image: docker.io/kubeflowkatib/suggestion-enas:latest + image: docker.io/kubeflowkatib/suggestion-enas:$(KATIB_VERSION) resources: limits: memory: 200Mi - algorithmName: darts - image: docker.io/kubeflowkatib/suggestion-darts:latest + image: docker.io/kubeflowkatib/suggestion-darts:$(KATIB_VERSION) - algorithmName: pbt - image: docker.io/kubeflowkatib/suggestion-pbt:latest + image: docker.io/kubeflowkatib/suggestion-pbt:$(KATIB_VERSION) persistentVolumeClaimSpec: accessModes: - ReadWriteMany @@ -59,4 +59,4 @@ runtime: storage: 5Gi earlyStoppings: - algorithmName: medianstop - image: docker.io/kubeflowkatib/earlystopping-medianstop:latest + image: docker.io/kubeflowkatib/earlystopping-medianstop:$(KATIB_VERSION) diff --git a/manifests/v1beta1/installs/katib-openshift/kustomization.yaml b/manifests/v1beta1/installs/katib-openshift/kustomization.yaml index 58e5bf050a1..b3ba0c97dfa 100644 --- a/manifests/v1beta1/installs/katib-openshift/kustomization.yaml +++ b/manifests/v1beta1/installs/katib-openshift/kustomization.yaml @@ -27,6 +27,8 @@ resources: - ../../components/ui/ # Katib webhooks. - ../../components/webhook/ + # Katib Config. + - ../../components/katib-config/ images: - name: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller diff --git a/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml b/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml index 30a5b7193f3..1c05bf7b423 100644 --- a/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml +++ b/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml @@ -19,6 +19,8 @@ resources: - ../../components/cert-generator/ # Katib webhooks. - ../../components/webhook/ + # Katib Config. + - ../../components/katib-config/ images: - name: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller diff --git a/manifests/v1beta1/installs/katib-standalone/kustomization.yaml b/manifests/v1beta1/installs/katib-standalone/kustomization.yaml index 7e71b26a388..117ce4c7f45 100644 --- a/manifests/v1beta1/installs/katib-standalone/kustomization.yaml +++ b/manifests/v1beta1/installs/katib-standalone/kustomization.yaml @@ -19,6 +19,8 @@ resources: - ../../components/cert-generator/ # Katib webhooks. - ../../components/webhook/ + # Katib Config. + - ../../components/katib-config/ images: - name: docker.io/kubeflowkatib/katib-controller newName: docker.io/kubeflowkatib/katib-controller diff --git a/scripts/v1beta1/update-images.sh b/scripts/v1beta1/update-images.sh index b35892ebc5b..e5198ee8876 100755 --- a/scripts/v1beta1/update-images.sh +++ b/scripts/v1beta1/update-images.sh @@ -73,11 +73,11 @@ update_yaml_files "${INSTALLS_PATH}" "newName: ${OLD_PREFIX}" "newName: ${NEW_PR update_yaml_files "${INSTALLS_PATH}" "newTag: .*" "newTag: ${TAG}" # Katib Config images. -CONFIG_PATH="manifests/v1beta1/components/controller/katib-config.yaml" +CONFIG_PATH="manifests/v1beta1/components/katib-config/" echo -e "Update Katib Metrics Collectors, Suggestions and EarlyStopping images\n" -update_yaml_files "${CONFIG_PATH}" "${OLD_PREFIX}" "${NEW_PREFIX}" -update_yaml_files "${CONFIG_PATH}" ":[^[:space:]].*\"" ":${TAG}\"" +update_yaml_files "${CONFIG_PATH}" "image: ${OLD_PREFIX}" "image: ${NEW_PREFIX}" +update_yaml_files "${CONFIG_PATH}" "katib.kubeflow.org/version: .*" "katib.kubeflow.org/version: ${TAG}" # Katib Trial training container images. diff --git a/test/e2e/v1beta1/scripts/aws/setup-katib.sh b/test/e2e/v1beta1/scripts/aws/setup-katib.sh index 9fed8462e5d..009afc7a846 100755 --- a/test/e2e/v1beta1/scripts/aws/setup-katib.sh +++ b/test/e2e/v1beta1/scripts/aws/setup-katib.sh @@ -36,7 +36,7 @@ make update-images OLD_PREFIX="docker.io/kubeflowkatib/" NEW_PREFIX="${ECR_REGIS echo -e "\n The Katib will be deployed with the following configs" cat "manifests/v1beta1/installs/katib-standalone/kustomization.yaml" -cat "manifests/v1beta1/components/controller/katib-config.yaml" +cat "manifests/v1beta1/components/katib-config/katib-config.yaml" echo "Creating Kubeflow namespace" kubectl create namespace kubeflow diff --git a/test/e2e/v1beta1/scripts/gh-actions/build-load.sh b/test/e2e/v1beta1/scripts/gh-actions/build-load.sh index 0de13c26678..ca8e24ea3c5 100755 --- a/test/e2e/v1beta1/scripts/gh-actions/build-load.sh +++ b/test/e2e/v1beta1/scripts/gh-actions/build-load.sh @@ -79,8 +79,8 @@ run() { exp_path=$(find examples/v1beta1 -name "${exp_name}.yaml") algorithm_name="$(yq eval '.spec.algorithm.algorithmName' "$exp_path")" - suggestion_image_name="$(yq eval '.data.suggestion' manifests/v1beta1/components/controller/katib-config.yaml | - algorithm_name=$algorithm_name yq eval '.[env(algorithm_name)].image' | cut -d: -f1)" + suggestion_image_name="$(algorithm_name=$algorithm_name yq eval '.runtime.suggestions.[] | select(.algorithmName == env(algorithm_name)) | .image' \ + manifests/v1beta1/components/katib-config/katib-config.yaml | cut -d: -f1)" suggestion_name="$(basename "$suggestion_image_name")" suggestions+=("$suggestion_name") @@ -105,8 +105,8 @@ run() { exp_path=$(find examples/v1beta1 -name "${exp_name}.yaml") algorithm_name="$(yq eval '.spec.earlyStopping.algorithmName' "$exp_path")" - earlystopping_image_name="$(yq eval '.data.early-stopping' manifests/v1beta1/components/controller/katib-config.yaml | - algorithm_name=$algorithm_name yq eval '.[env(algorithm_name)].image' | cut -d: -f1)" + earlystopping_image_name="$(algorithm_name=$algorithm_name yq eval '.runtime.earlyStoppings.[] | select(.algorithmName == env(algorithm_name)) | .image' \ + manifests/v1beta1/components/katib-config/katib-config.yaml | cut -d: -f1)" earlystopping_name="$(basename "$earlystopping_image_name")" earlystoppings+=("$earlystopping_name") diff --git a/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh b/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh index e2547e2efad..0b6fc040454 100755 --- a/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh +++ b/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh @@ -53,7 +53,7 @@ yq eval -i '.spec.resources.requests.storage|="2Gi"' $PVC_FILE echo -e "\n The Katib will be deployed with the following configs" cat $KUSTOMIZATION_FILE -cat ../../../../../manifests/v1beta1/components/controller/katib-config.yaml +cat ../../../../../manifests/v1beta1/components/katib-config/katib-config.yaml # If the user wants to deploy training operator, then use the kustomization file for training operator. if "$DEPLOY_TRAINING_OPERATOR"; then