Skip to content

Commit

Permalink
Refactor katib-config using kustomize vars
Browse files Browse the repository at this point in the history
Signed-off-by: Yuki Iwai <[email protected]>
  • Loading branch information
tenzen-y committed Jul 31, 2023
1 parent 8e97a80 commit a72ab93
Show file tree
Hide file tree
Showing 17 changed files with 129 additions and 104 deletions.
2 changes: 1 addition & 1 deletion docs/developer-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ make build REGISTRY=<image-registry> TAG=<image-tag>

To use your custom images for the Katib components, modify
[Kustomization file](https://github.com/kubeflow/katib/blob/master/manifests/v1beta1/installs/katib-standalone/kustomization.yaml)
and [Katib Config](https://github.com/kubeflow/katib/blob/master/manifests/v1beta1/components/controller/katib-config.yaml)
and [Katib Config](https://github.com/kubeflow/katib/blob/master/manifests/v1beta1/components/katib-config/katib-config.yaml)

You can deploy Katib v1beta1 manifests into a Kubernetes cluster as follows:

Expand Down
22 changes: 9 additions & 13 deletions docs/new-algorithm-service.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,21 +90,17 @@ Then build the Docker image.

### Use the algorithm in Katib.

Update the [Katib config](../manifests/v1beta1/components/controller/katib-config.yaml) and [operator](../operators/katib-controller/src/suggestion.json) with the new algorithm entity:
Update the [Katib config](../manifests/v1beta1/components/katib-config/katib-config.yaml) and [operator](../operators/katib-controller/src/suggestion.json) with the new algorithm entity:

```diff
suggestion: |-
{
"tpe": {
"image": "docker.io/kubeflowkatib/suggestion-hyperopt"
},
"random": {
"image": "docker.io/kubeflowkatib/suggestion-hyperopt"
},
+ "<new-algorithm-name>": {
+ "image": "image built in the previous stage"
+ }
}
runtime:
suggestions:
- algorithmName: random
image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION)
- algorithmName: tpe
image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION)
+ - algorithmName: <new-algorithm-name>
+ image: "image built in the previous stage":$(KATIB_VERSION)
```

Learn more about Katib config in the
Expand Down
59 changes: 0 additions & 59 deletions manifests/v1beta1/components/controller/katib-config.yaml

This file was deleted.

6 changes: 0 additions & 6 deletions manifests/v1beta1/components/controller/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,3 @@ resources:
- rbac.yaml
- service.yaml
- trial-templates.yaml
configMapGenerator:
- name: katib-config
files:
- katib-config.yaml
options:
disableNameSuffixHash: true
59 changes: 59 additions & 0 deletions manifests/v1beta1/components/katib-config/katib-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
---
apiVersion: config.kubeflow.org/v1beta1
kind: KatibConfig
init:
controller:
webhookPort: 8443
trialResources:
- Job.v1.batch
- TFJob.v1.kubeflow.org
- PyTorchJob.v1.kubeflow.org
- MPIJob.v1.kubeflow.org
- XGBoostJob.v1.kubeflow.org
- MXJob.v1.kubeflow.org
runtime:
metricsCollectors:
- kind: StdOut
image: docker.io/kubeflo`wkatib/file-metrics-collector:$(KATIB_VERSION)
- kind: File
image: docker.io/kubeflowkatib/file-metrics-collector:$(KATIB_VERSION)
- kind: TensorFlowEvent
image: docker.io/kubeflowkatib/tfevent-metrics-collector:$(KATIB_VERSION)
resources:
limits:
memory: 1Gi
suggestions:
- algorithmName: random
image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION)
- algorithmName: tpe
image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION)
- algorithmName: grid
image: docker.io/kubeflowkatib/suggestion-optuna:$(KATIB_VERSION)
- algorithmName: hyperband
image: docker.io/kubeflowkatib/suggestion-hyperband:$(KATIB_VERSION)
- algorithmName: bayesianoptimization
image: docker.io/kubeflowkatib/suggestion-skopt:$(KATIB_VERSION)
- algorithmName: cmaes
image: docker.io/kubeflowkatib/suggestion-goptuna:$(KATIB_VERSION)
- algorithmName: sobol
image: docker.io/kubeflowkatib/suggestion-goptuna:$(KATIB_VERSION)
- algorithmName: multivariate-tpe
image: docker.io/kubeflowkatib/suggestion-optuna:$(KATIB_VERSION)
- algorithmName: enas
image: docker.io/kubeflowkatib/suggestion-enas:$(KATIB_VERSION)
resources:
limits:
memory: 200Mi
- algorithmName: darts
image: docker.io/kubeflowkatib/suggestion-darts:$(KATIB_VERSION)
- algorithmName: pbt
image: docker.io/kubeflowkatib/suggestion-pbt:$(KATIB_VERSION)
persistentVolumeClaimSpec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 5Gi
earlyStoppings:
- algorithmName: medianstop
image: docker.io/kubeflowkatib/earlystopping-medianstop:$(KATIB_VERSION)
21 changes: 21 additions & 0 deletions manifests/v1beta1/components/katib-config/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
configMapGenerator:
- name: katib-config
files:
- katib-config.yaml
options:
disableNameSuffixHash: true
labels:
katib.kubeflow.org/version: latest
configurations:
- transformer-configs.yaml
vars:
- name: KATIB_VERSION
fieldref:
fieldpath: metadata.labels.katib\.kubeflow\.org/version
objref:
apiVersion: v1
kind: ConfigMap
name: katib-config
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
varReference:
- kind: ConfigMap
path: data
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ resources:
- ../../components/webhook/
# Cert-manager certificate for webhooks
- certificate.yaml
# Katib Config.
- ../../components/katib-config/
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ resources:
- ../../components/cert-generator/
# Katib webhooks.
- ../../components/webhook/
# Katib Config.
- ../../components/katib-config/
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
Expand Down
32 changes: 16 additions & 16 deletions manifests/v1beta1/installs/katib-leader-election/katib-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# This KatibConfig is mostly same as https://github.com/kubeflow/katib/manifests/v1beta1/components/controller/katib-config.yaml.
# This KatibConfig is mostly same as https://github.com/kubeflow/katib/manifests/v1beta1/components/katib-config/katib-config.yaml.
# Only `.init.controller.enableLeaderElection` field is different.
---
apiVersion: config.kubeflow.org/v1beta1
Expand All @@ -17,40 +17,40 @@ init:
runtime:
metricsCollectors:
- kind: StdOut
image: docker.io/kubeflowkatib/file-metrics-collector:latest
image: docker.io/kubeflowkatib/file-metrics-collector:$(KATIB_VERSION)
- kind: File
image: docker.io/kubeflowkatib/file-metrics-collector:latest
image: docker.io/kubeflowkatib/file-metrics-collector:$(KATIB_VERSION)
- kind: TensorFlowEvent
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
image: docker.io/kubeflowkatib/tfevent-metrics-collector:$(KATIB_VERSION)
resources:
limits:
memory: 1Gi
suggestions:
- algorithmName: random
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION)
- algorithmName: tpe
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
image: docker.io/kubeflowkatib/suggestion-hyperopt:$(KATIB_VERSION)
- algorithmName: grid
image: docker.io/kubeflowkatib/suggestion-optuna:latest
image: docker.io/kubeflowkatib/suggestion-optuna:$(KATIB_VERSION)
- algorithmName: hyperband
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
image: docker.io/kubeflowkatib/suggestion-hyperband:$(KATIB_VERSION)
- algorithmName: bayesianoptimization
image: docker.io/kubeflowkatib/suggestion-skopt:latest
image: docker.io/kubeflowkatib/suggestion-skopt:$(KATIB_VERSION)
- algorithmName: cmaes
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
image: docker.io/kubeflowkatib/suggestion-goptuna:$(KATIB_VERSION)
- algorithmName: sobol
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
image: docker.io/kubeflowkatib/suggestion-goptuna:$(KATIB_VERSION)
- algorithmName: multivariate-tpe
image: docker.io/kubeflowkatib/suggestion-optuna:latest
image: docker.io/kubeflowkatib/suggestion-optuna:$(KATIB_VERSION)
- algorithmName: enas
image: docker.io/kubeflowkatib/suggestion-enas:latest
image: docker.io/kubeflowkatib/suggestion-enas:$(KATIB_VERSION)
resources:
limits:
memory: 200Mi
- algorithmName: darts
image: docker.io/kubeflowkatib/suggestion-darts:latest
image: docker.io/kubeflowkatib/suggestion-darts:$(KATIB_VERSION)
- algorithmName: pbt
image: docker.io/kubeflowkatib/suggestion-pbt:latest
image: docker.io/kubeflowkatib/suggestion-pbt:$(KATIB_VERSION)
persistentVolumeClaimSpec:
accessModes:
- ReadWriteMany
Expand All @@ -59,4 +59,4 @@ runtime:
storage: 5Gi
earlyStoppings:
- algorithmName: medianstop
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
image: docker.io/kubeflowkatib/earlystopping-medianstop:$(KATIB_VERSION)
2 changes: 2 additions & 0 deletions manifests/v1beta1/installs/katib-openshift/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ resources:
- ../../components/ui/
# Katib webhooks.
- ../../components/webhook/
# Katib Config.
- ../../components/katib-config/
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ resources:
- ../../components/cert-generator/
# Katib webhooks.
- ../../components/webhook/
# Katib Config.
- ../../components/katib-config/
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ resources:
- ../../components/cert-generator/
# Katib webhooks.
- ../../components/webhook/
# Katib Config.
- ../../components/katib-config/
images:
- name: docker.io/kubeflowkatib/katib-controller
newName: docker.io/kubeflowkatib/katib-controller
Expand Down
6 changes: 3 additions & 3 deletions scripts/v1beta1/update-images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,11 @@ update_yaml_files "${INSTALLS_PATH}" "newName: ${OLD_PREFIX}" "newName: ${NEW_PR
update_yaml_files "${INSTALLS_PATH}" "newTag: .*" "newTag: ${TAG}"

# Katib Config images.
CONFIG_PATH="manifests/v1beta1/components/controller/katib-config.yaml"
CONFIG_PATH="manifests/v1beta1/components/katib-config/"

echo -e "Update Katib Metrics Collectors, Suggestions and EarlyStopping images\n"
update_yaml_files "${CONFIG_PATH}" "${OLD_PREFIX}" "${NEW_PREFIX}"
update_yaml_files "${CONFIG_PATH}" ":[^[:space:]].*\"" ":${TAG}\""
update_yaml_files "${CONFIG_PATH}" "image: ${OLD_PREFIX}" "image: ${NEW_PREFIX}"
update_yaml_files "${CONFIG_PATH}" "katib.kubeflow.org/version: .*" "katib.kubeflow.org/version: ${TAG}"

# Katib Trial training container images.

Expand Down
2 changes: 1 addition & 1 deletion test/e2e/v1beta1/scripts/aws/setup-katib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ make update-images OLD_PREFIX="docker.io/kubeflowkatib/" NEW_PREFIX="${ECR_REGIS

echo -e "\n The Katib will be deployed with the following configs"
cat "manifests/v1beta1/installs/katib-standalone/kustomization.yaml"
cat "manifests/v1beta1/components/controller/katib-config.yaml"
cat "manifests/v1beta1/components/katib-config/katib-config.yaml"

echo "Creating Kubeflow namespace"
kubectl create namespace kubeflow
Expand Down
8 changes: 4 additions & 4 deletions test/e2e/v1beta1/scripts/gh-actions/build-load.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ run() {
exp_path=$(find examples/v1beta1 -name "${exp_name}.yaml")
algorithm_name="$(yq eval '.spec.algorithm.algorithmName' "$exp_path")"

suggestion_image_name="$(yq eval '.data.suggestion' manifests/v1beta1/components/controller/katib-config.yaml |
algorithm_name=$algorithm_name yq eval '.[env(algorithm_name)].image' | cut -d: -f1)"
suggestion_image_name="$(algorithm_name=$algorithm_name yq eval '.runtime.suggestions.[] | select(.algorithmName == env(algorithm_name)) | .image' \
manifests/v1beta1/components/katib-config/katib-config.yaml | cut -d: -f1)"
suggestion_name="$(basename "$suggestion_image_name")"

suggestions+=("$suggestion_name")
Expand All @@ -105,8 +105,8 @@ run() {
exp_path=$(find examples/v1beta1 -name "${exp_name}.yaml")
algorithm_name="$(yq eval '.spec.earlyStopping.algorithmName' "$exp_path")"

earlystopping_image_name="$(yq eval '.data.early-stopping' manifests/v1beta1/components/controller/katib-config.yaml |
algorithm_name=$algorithm_name yq eval '.[env(algorithm_name)].image' | cut -d: -f1)"
earlystopping_image_name="$(algorithm_name=$algorithm_name yq eval '.runtime.earlyStoppings.[] | select(.algorithmName == env(algorithm_name)) | .image' \
manifests/v1beta1/components/katib-config/katib-config.yaml | cut -d: -f1)"
earlystopping_name="$(basename "$earlystopping_image_name")"

earlystoppings+=("$earlystopping_name")
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ yq eval -i '.spec.resources.requests.storage|="2Gi"' $PVC_FILE

echo -e "\n The Katib will be deployed with the following configs"
cat $KUSTOMIZATION_FILE
cat ../../../../../manifests/v1beta1/components/controller/katib-config.yaml
cat ../../../../../manifests/v1beta1/components/katib-config/katib-config.yaml

# If the user wants to deploy training operator, then use the kustomization file for training operator.
if "$DEPLOY_TRAINING_OPERATOR"; then
Expand Down

0 comments on commit a72ab93

Please sign in to comment.