From a925b2968e68f3085092adc04e9af39d4f5045f3 Mon Sep 17 00:00:00 2001 From: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> Date: Mon, 4 Mar 2024 13:04:42 +0100 Subject: [PATCH] Update to Kind 1.27 and fix the integration tests (#2637) (#2639) * update to Kind 1.27 * update katib test * update katib test * increase test pipeline timeout from 60 to 120 seconds --------- Signed-off-by: juliusvonkohout <45896133+juliusvonkohout@users.noreply.github.com> --- .github/workflows/pipeline_kind_test.yaml | 2 +- tests/gh-actions/kf-objects/katib_test.yaml | 94 +++++++------------ tests/gh-actions/kind-cluster-1-24.yaml | 27 ------ ...uster-1-25.yaml => kind-cluster-1-26.yaml} | 6 +- tests/gh-actions/kind-cluster.yaml | 6 +- 5 files changed, 43 insertions(+), 92 deletions(-) delete mode 100644 tests/gh-actions/kind-cluster-1-24.yaml rename tests/gh-actions/{kind-cluster-1-25.yaml => kind-cluster-1-26.yaml} (71%) diff --git a/.github/workflows/pipeline_kind_test.yaml b/.github/workflows/pipeline_kind_test.yaml index 24c407c322..f6d1f44e53 100644 --- a/.github/workflows/pipeline_kind_test.yaml +++ b/.github/workflows/pipeline_kind_test.yaml @@ -43,4 +43,4 @@ jobs: kubectl apply -f tests/e2e/yamls python3 ./tests/gh-actions/kf-objects/test_pipeline.py ./tests/gh-actions/install_argo_cli.sh - argo wait @latest -n kubeflow-user-example-com --request-timeout 60 + argo wait @latest -n kubeflow-user-example-com --request-timeout 120 diff --git a/tests/gh-actions/kf-objects/katib_test.yaml b/tests/gh-actions/kf-objects/katib_test.yaml index 5d0c4a97d7..1d25ef0db4 100644 --- a/tests/gh-actions/kf-objects/katib_test.yaml +++ b/tests/gh-actions/kf-objects/katib_test.yaml @@ -1,78 +1,56 @@ --- +--- apiVersion: kubeflow.org/v1beta1 kind: Experiment metadata: - name: grid-example namespace: kubeflow-user + name: grid spec: + objective: + type: minimize + goal: 0.1 + objectiveMetricName: loss + algorithm: + algorithmName: grid + parallelTrialCount: 2 + maxTrialCount: 2 + maxFailedTrialCount: 2 parameters: - name: lr parameterType: double feasibleSpace: - max: '0.01' - min: '0.001' - step: '0.001' - - name: num-layers - parameterType: int - feasibleSpace: - max: '3' - min: '2' - - name: optimizer - parameterType: categorical + min: "0.01" + step: "0.005" + max: "0.05" + - name: momentum + parameterType: double feasibleSpace: - list: - - adam - objective: - type: maximize - goal: 0.80 - objectiveMetricName: Validation-accuracy - additionalMetricNames: - - Train-accuracy - metricStrategies: - - name: Validation-accuracy - value: max - - name: Train-accuracy - value: max - algorithm: - algorithmName: grid + min: "0.5" + step: "0.1" + max: "0.9" trialTemplate: + primaryContainerName: training-container + trialParameters: + - name: learningRate + description: Learning rate for the training model + reference: lr + - name: momentum + description: Momentum for the training model + reference: momentum trialSpec: apiVersion: batch/v1 kind: Job spec: template: - metadata: - labels: - sidecar.istio.io/inject: 'false' spec: containers: - - command: - - python3 - - /opt/mxnet-mnist/mnist.py - - '--batch-size=64' - - '--lr=${trialParameters.learningRate}' - - '--num-layers=${trialParameters.numberLayers}' - - '--optimizer=${trialParameters.optimizer}' - image: docker.io/kubeflowkatib/mxnet-mnist:latest - name: training-container + - name: training-container + image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest + command: + - "python3" + - "/opt/pytorch-mnist/mnist.py" + - "--epochs=1" + - "--batch-size=16" + - "--lr=${trialParameters.learningRate}" + - "--momentum=${trialParameters.momentum}" restartPolicy: Never - trialParameters: - - name: learningRate - description: Learning rate for the training model - reference: lr - - name: numberLayers - description: Number of training model layers - reference: num-layers - - name: optimizer - description: Training model optimizer (sdg, adam or ftrl) - reference: optimizer - primaryContainerName: training-container - successCondition: status.conditions.#(type=="Complete")#|#(status=="True")# - failureCondition: status.conditions.#(type=="Failed")#|#(status=="True")# - parallelTrialCount: 1 - maxTrialCount: 1 - maxFailedTrialCount: 1 - metricsCollectorSpec: - collector: - kind: StdOut - resumePolicy: LongRunning \ No newline at end of file diff --git a/tests/gh-actions/kind-cluster-1-24.yaml b/tests/gh-actions/kind-cluster-1-24.yaml deleted file mode 100644 index bd047c160f..0000000000 --- a/tests/gh-actions/kind-cluster-1-24.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# This testing option is available for testing projects that don't yet support k8s 1.25 -apiVersion: kind.x-k8s.io/v1alpha4 -kind: Cluster -# Configure registry for KinD. -containerdConfigPatches: -- |- - [plugins."io.containerd.grpc.v1.cri".registry.mirrors."$REGISTRY_NAME:$REGISTRY_PORT"] - endpoint = ["http://$REGISTRY_NAME:$REGISTRY_PORT"] -# This is needed in order to support projected volumes with service account tokens. -# See: https://kubernetes.slack.com/archives/CEKK1KTN2/p1600268272383600 -kubeadmConfigPatches: - - | - apiVersion: kubeadm.k8s.io/v1beta2 - kind: ClusterConfiguration - metadata: - name: config - apiServer: - extraArgs: - "service-account-issuer": "kubernetes.default.svc" - "service-account-signing-key-file": "/etc/kubernetes/pki/sa.key" -nodes: -- role: control-plane - image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315 -- role: worker - image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315 -- role: worker - image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315 diff --git a/tests/gh-actions/kind-cluster-1-25.yaml b/tests/gh-actions/kind-cluster-1-26.yaml similarity index 71% rename from tests/gh-actions/kind-cluster-1-25.yaml rename to tests/gh-actions/kind-cluster-1-26.yaml index 3a1385b5d4..84ea5a2749 100644 --- a/tests/gh-actions/kind-cluster-1-25.yaml +++ b/tests/gh-actions/kind-cluster-1-26.yaml @@ -19,8 +19,8 @@ kubeadmConfigPatches: "service-account-signing-key-file": "/etc/kubernetes/pki/sa.key" nodes: - role: control-plane - image: kindest/node:v1.25.3@sha256:f52781bc0d7a19fb6c405c2af83abfeb311f130707a0e219175677e366cc45d1 + image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5 - role: worker - image: kindest/node:v1.25.3@sha256:f52781bc0d7a19fb6c405c2af83abfeb311f130707a0e219175677e366cc45d1 + image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5 - role: worker - image: kindest/node:v1.25.3@sha256:f52781bc0d7a19fb6c405c2af83abfeb311f130707a0e219175677e366cc45d1 \ No newline at end of file + image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5 \ No newline at end of file diff --git a/tests/gh-actions/kind-cluster.yaml b/tests/gh-actions/kind-cluster.yaml index 84ea5a2749..b9630bc2e4 100644 --- a/tests/gh-actions/kind-cluster.yaml +++ b/tests/gh-actions/kind-cluster.yaml @@ -19,8 +19,8 @@ kubeadmConfigPatches: "service-account-signing-key-file": "/etc/kubernetes/pki/sa.key" nodes: - role: control-plane - image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5 + image: kindest/node:v1.27.11@sha256:ec04b9f650954c033c978db9c25a9071b449179b0e509df258350c2f3034fb57 - role: worker - image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5 + image: kindest/node:v1.27.11@sha256:ec04b9f650954c033c978db9c25a9071b449179b0e509df258350c2f3034fb57 - role: worker - image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5 \ No newline at end of file + image: kindest/node:v1.27.11@sha256:ec04b9f650954c033c978db9c25a9071b449179b0e509df258350c2f3034fb57 \ No newline at end of file