kubeflow · juliusvonkohout · Mar 4, 2024 · Mar 4, 2024 · Mar 4, 2024 · Mar 4, 2024
diff --git a/.github/workflows/pipeline_kind_test.yaml b/.github/workflows/pipeline_kind_test.yaml
@@ -49,4 +49,4 @@ jobs:
         kubectl apply -f tests/e2e/yamls
         python3 ./tests/gh-actions/kf-objects/test_pipeline.py
         ./tests/gh-actions/install_argo_cli.sh
-        argo wait @latest -n kubeflow-user-example-com --request-timeout 60
+        argo wait @latest -n kubeflow-user-example-com --request-timeout 120
diff --git a/tests/gh-actions/kf-objects/katib_test.yaml b/tests/gh-actions/kf-objects/katib_test.yaml
@@ -1,78 +1,56 @@
 ---
+---
---
---
 apiVersion: kubeflow.org/v1beta1
 kind: Experiment
 metadata:
-  name: grid-example
   namespace: kubeflow-user
+  name: grid
 spec:
+  objective:
+    type: minimize
+    goal: 0.1
+    objectiveMetricName: loss
+  algorithm:
+    algorithmName: grid
+  parallelTrialCount: 2
+  maxTrialCount: 2
+  maxFailedTrialCount: 2
   parameters:
     - name: lr
       parameterType: double
       feasibleSpace:
-        max: '0.01'
-        min: '0.001'
-        step: '0.001'
-    - name: num-layers
-      parameterType: int
-      feasibleSpace:
-        max: '3'
-        min: '2'
-    - name: optimizer
-      parameterType: categorical
+        min: "0.01"
+        step: "0.005"
+        max: "0.05"
+    - name: momentum
+      parameterType: double
       feasibleSpace:
-        list:
-          - adam
-  objective:
-    type: maximize
-    goal: 0.80
-    objectiveMetricName: Validation-accuracy
-    additionalMetricNames:
-      - Train-accuracy
-    metricStrategies:
-      - name: Validation-accuracy
-        value: max
-      - name: Train-accuracy
-        value: max
-  algorithm:
-    algorithmName: grid
+        min: "0.5"
+        step: "0.1"
+        max: "0.9"
   trialTemplate:
+    primaryContainerName: training-container
+    trialParameters:
+      - name: learningRate
+        description: Learning rate for the training model
+        reference: lr
+      - name: momentum
+        description: Momentum for the training model
+        reference: momentum
     trialSpec:
       apiVersion: batch/v1
       kind: Job
       spec:
         template:
-          metadata:
-            labels:
-              sidecar.istio.io/inject: 'false'
           spec:
             containers:
-              - command:
-                  - python3
-                  - /opt/mxnet-mnist/mnist.py
-                  - '--batch-size=64'
-                  - '--lr=${trialParameters.learningRate}'
-                  - '--num-layers=${trialParameters.numberLayers}'
-                  - '--optimizer=${trialParameters.optimizer}'
-                image: docker.io/kubeflowkatib/mxnet-mnist:latest
-                name: training-container
+              - name: training-container
+                image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
+                command:
+                  - "python3"
+                  - "/opt/pytorch-mnist/mnist.py"
+                  - "--epochs=1"
+                  - "--batch-size=16"
+                  - "--lr=${trialParameters.learningRate}"
+                  - "--momentum=${trialParameters.momentum}"
             restartPolicy: Never
-    trialParameters:
-      - name: learningRate
-        description: Learning rate for the training model
-        reference: lr
-      - name: numberLayers
-        description: Number of training model layers
-        reference: num-layers
-      - name: optimizer
-        description: Training model optimizer (sdg, adam or ftrl)
-        reference: optimizer
-    primaryContainerName: training-container
-    successCondition: status.conditions.#(type=="Complete")#|#(status=="True")#
-    failureCondition: status.conditions.#(type=="Failed")#|#(status=="True")#
-  parallelTrialCount: 1
-  maxTrialCount: 1
-  maxFailedTrialCount: 1
-  metricsCollectorSpec:
-    collector:
-      kind: StdOut
-  resumePolicy: LongRunning
diff --git a/tests/gh-actions/kind-cluster-1-24.yaml b/tests/gh-actions/kind-cluster-1-24.yaml
diff --git a/tests/gh-actions/kind-cluster-1-25.yaml → tests/gh-actions/kind-cluster-1-26.yaml b/tests/gh-actions/kind-cluster-1-25.yaml → tests/gh-actions/kind-cluster-1-26.yaml
@@ -19,8 +19,8 @@ kubeadmConfigPatches:
         "service-account-signing-key-file": "/etc/kubernetes/pki/sa.key"
 nodes:
 - role: control-plane
-  image: kindest/node:v1.25.3@sha256:f52781bc0d7a19fb6c405c2af83abfeb311f130707a0e219175677e366cc45d1
+  image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5
 - role: worker
-  image: kindest/node:v1.25.3@sha256:f52781bc0d7a19fb6c405c2af83abfeb311f130707a0e219175677e366cc45d1
+  image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5
 - role: worker
-  image: kindest/node:v1.25.3@sha256:f52781bc0d7a19fb6c405c2af83abfeb311f130707a0e219175677e366cc45d1
+  image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5
diff --git a/tests/gh-actions/kind-cluster.yaml b/tests/gh-actions/kind-cluster.yaml
@@ -19,8 +19,8 @@ kubeadmConfigPatches:
         "service-account-signing-key-file": "/etc/kubernetes/pki/sa.key"
 nodes:
 - role: control-plane
-  image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5
+  image: kindest/node:v1.27.11@sha256:ec04b9f650954c033c978db9c25a9071b449179b0e509df258350c2f3034fb57
 - role: worker
-  image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5
+  image: kindest/node:v1.27.11@sha256:ec04b9f650954c033c978db9c25a9071b449179b0e509df258350c2f3034fb57
 - role: worker
-  image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5
+  image: kindest/node:v1.27.11@sha256:ec04b9f650954c033c978db9c25a9071b449179b0e509df258350c2f3034fb57