Skip to content

Commit

Permalink
Remove MXNet examples (#2267)
Browse files Browse the repository at this point in the history
* UT: Replace MXNet example with PyTorch example

Signed-off-by: Yuki Iwai <[email protected]>

* CI: Replace MXNet examples with PyTorch examples

Signed-off-by: Yuki Iwai <[email protected]>

---------

Signed-off-by: Yuki Iwai <[email protected]>
  • Loading branch information
tenzen-y committed Mar 4, 2024
1 parent 8df3c5c commit fc858d1
Show file tree
Hide file tree
Showing 47 changed files with 384 additions and 1,205 deletions.
42 changes: 0 additions & 42 deletions .github/workflows/e2e-test-mxnet-mnist.yaml

This file was deleted.

4 changes: 4 additions & 0 deletions .github/workflows/e2e-test-pytorch-mnist.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,9 @@ jobs:
kubernetes-version: ["v1.25.12", "v1.26.6", "v1.27.3"]
# Comma Delimited
experiments:
# suggestion-hyperopt
- "long-running-resume,from-volume-resume,median-stop"
# others
- "grid,bayesian-optimization,tpe,multivariate-tpe,cma-es,hyperband"
- "file-metrics-collector,pytorchjob-mnist"
- "median-stop-with-json-format,file-metrics-collector-with-json-format"
2 changes: 1 addition & 1 deletion .github/workflows/e2e-test-ui-random-search-postgres.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
with:
experiments: random
# Comma Delimited
trial-images: mxnet-mnist
trial-images: pytorch-mnist-cpu
katib-ui: true
database-type: postgres

Expand Down
3 changes: 0 additions & 3 deletions .github/workflows/publish-trial-images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ jobs:
fail-fast: false
matrix:
include:
- trial-name: mxnet-mnist
platforms: linux/amd64,linux/arm64
dockerfile: examples/v1beta1/trial-images/mxnet-mnist/Dockerfile
- trial-name: pytorch-mnist-cpu
platforms: linux/amd64,linux/arm64
dockerfile: examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.cpu
Expand Down
11 changes: 0 additions & 11 deletions docs/images-location.md
Original file line number Diff line number Diff line change
Expand Up @@ -238,17 +238,6 @@ The following table shows images for training containers which are used in the
<b>Location</b>
</td>
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/mxnet-mnist</code>
</td>
<td>
MXNet MNIST example with collecting metrics time
</td>
<td>
<a href="https://github.com/kubeflow/katib/blob/master/examples/v1beta1/trial-images/mxnet-mnist/Dockerfile">Dockerfile</a>
</td>
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/pytorch-mnist-cpu</code>
Expand Down
2 changes: 0 additions & 2 deletions examples/v1beta1/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,6 @@ Check the following images for the Trial containers:

- [Tensorflow MNIST with summaries](./trial-images/tf-mnist-with-summaries)

- [MXNet MNIST](./trial-images/mxnet-mnist)

- [PyTorch MNIST](./trial-images/pytorch-mnist)

- [ENAS Keras CNN CIFAR-10](./trial-images/enas-cnn-cifar10)
Expand Down
25 changes: 12 additions & 13 deletions examples/v1beta1/argo/argo-workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@ metadata:
name: katib-argo-workflow
spec:
objective:
type: maximize
goal: 0.99
objectiveMetricName: Validation-accuracy
additionalMetricNames:
- Train-accuracy
type: minimize
goal: 0.001
objectiveMetricName: loss
algorithm:
algorithmName: random
parallelTrialCount: 2
Expand Down Expand Up @@ -50,35 +48,36 @@ spec:
- name: hp-workflow
steps:
- - name: data-preprocessing
template: gen-num-examples
template: gen-epochs
- - name: model-training
template: model-training
arguments:
parameters:
- name: num-examples
- name: epochs
value: "{{steps.data-preprocessing.outputs.result}}"

- name: gen-num-examples
- name: gen-epochs
script:
image: python:alpine3.6
command:
- python
source: |
import random
print(60000//random.randint(10, 100))
print(60000//random.randint(3000, 30000))
- name: model-training
metadata:
labels:
katib.kubeflow.org/model-training: "true"
inputs:
parameters:
- name: num-examples
- name: epochs
container:
name: model-training
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"
- "/opt/pytorch-mnist/mnist.py"
- "--lr=${trialParameters.learningRate}"
- "--num-examples={{inputs.parameters.num-examples}}"
- "--epochs={{inputs.parameters.epochs}}"
- "--batch-size=16"
33 changes: 16 additions & 17 deletions examples/v1beta1/early-stopping/median-stop.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@ metadata:
name: median-stop
spec:
objective:
type: maximize
goal: 0.99
objectiveMetricName: Validation-accuracy
additionalMetricNames:
- Train-accuracy
type: minimize
goal: 0.001
objectiveMetricName: loss
algorithm:
algorithmName: random
earlyStopping:
Expand All @@ -30,22 +28,22 @@ spec:
parameterType: double
feasibleSpace:
min: "0.01"
max: "0.5"
- name: num-epochs
parameterType: int
max: "0.05"
- name: momentum
parameterType: double
feasibleSpace:
min: "3"
max: "4"
min: "0.5"
max: "0.9"
trialTemplate:
retain: true
primaryContainerName: training-container
trialParameters:
- name: learningRate
description: Learning rate for the training model
reference: lr
- name: numberEpochs
description: Number of epochs to train the model
reference: num-epochs
- name: momentum
description: Momentum for the training model
reference: momentum
trialSpec:
apiVersion: batch/v1
kind: Job
Expand All @@ -54,11 +52,12 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"
- "--batch-size=64"
- "/opt/pytorch-mnist/mnist.py"
- "--epochs=1"
- "--batch-size=16"
- "--lr=${trialParameters.learningRate}"
- "--num-epochs=${trialParameters.numberEpochs}"
- "--momentum=${trialParameters.momentum}"
restartPolicy: Never
44 changes: 16 additions & 28 deletions examples/v1beta1/hp-tuning/bayesian-optimization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@ metadata:
name: bayesian-optimization
spec:
objective:
type: maximize
goal: 0.99
objectiveMetricName: Validation-accuracy
additionalMetricNames:
- Train-accuracy
type: minimize
goal: 0.001
objectiveMetricName: loss
algorithm:
algorithmName: bayesianoptimization
algorithmSettings:
Expand All @@ -24,31 +22,21 @@ spec:
parameterType: double
feasibleSpace:
min: "0.01"
max: "0.03"
- name: num-layers
parameterType: int
feasibleSpace:
min: "2"
max: "5"
- name: optimizer
parameterType: categorical
max: "0.05"
- name: momentum
parameterType: double
feasibleSpace:
list:
- sgd
- adam
- ftrl
min: "0.5"
max: "0.9"
trialTemplate:
primaryContainerName: training-container
trialParameters:
- name: learningRate
description: Learning rate for the training model
reference: lr
- name: numberLayers
description: Number of training model layers
reference: num-layers
- name: optimizer
description: Training model optimizer (sdg, adam or ftrl)
reference: optimizer
- name: momentum
description: Momentum for the training model
reference: momentum
trialSpec:
apiVersion: batch/v1
kind: Job
Expand All @@ -57,12 +45,12 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"
- "--batch-size=64"
- "/opt/pytorch-mnist/mnist.py"
- "--epochs=1"
- "--batch-size=16"
- "--lr=${trialParameters.learningRate}"
- "--num-layers=${trialParameters.numberLayers}"
- "--optimizer=${trialParameters.optimizer}"
- "--momentum=${trialParameters.momentum}"
restartPolicy: Never
44 changes: 16 additions & 28 deletions examples/v1beta1/hp-tuning/cma-es.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@ metadata:
name: cmaes
spec:
objective:
type: maximize
goal: 0.99
objectiveMetricName: Validation-accuracy
additionalMetricNames:
- Train-accuracy
type: minimize
goal: 0.001
objectiveMetricName: loss
algorithm:
algorithmName: cmaes
algorithmSettings:
Expand All @@ -24,31 +22,21 @@ spec:
parameterType: double
feasibleSpace:
min: "0.01"
max: "0.03"
- name: num-layers
parameterType: int
feasibleSpace:
min: "2"
max: "5"
- name: optimizer
parameterType: categorical
max: "0.05"
- name: momentum
parameterType: double
feasibleSpace:
list:
- sgd
- adam
- ftrl
min: "0.5"
max: "0.9"
trialTemplate:
primaryContainerName: training-container
trialParameters:
- name: learningRate
description: Learning rate for the training model
reference: lr
- name: numberLayers
description: Number of training model layers
reference: num-layers
- name: optimizer
description: Training model optimizer (sdg, adam or ftrl)
reference: optimizer
- name: momentum
description: Momentum for the training model
reference: momentum
trialSpec:
apiVersion: batch/v1
kind: Job
Expand All @@ -57,12 +45,12 @@ spec:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:latest
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"
- "--batch-size=64"
- "/opt/pytorch-mnist/mnist.py"
- "--epochs=1"
- "--batch-size=16"
- "--lr=${trialParameters.learningRate}"
- "--num-layers=${trialParameters.numberLayers}"
- "--optimizer=${trialParameters.optimizer}"
- "--momentum=${trialParameters.momentum}"
restartPolicy: Never
Loading

0 comments on commit fc858d1

Please sign in to comment.