forked from kubeflow/katib
-
Notifications
You must be signed in to change notification settings - Fork 0
/
enas-gpu.yaml
149 lines (148 loc) · 4.3 KB
/
enas-gpu.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
---
# This GPU example aims to show all the possible operations
# is not very likely to get good result due to the extensive search space
# In practice, setting up a limited search space with more common operations is more likely to get better performance.
# For example, Efficient Neural Architecture Search via Parameter Sharing (https://arxiv.org/abs/1802.03268)
# uses only 6 operations, 3x3/5x5 convolution, 3x3/5x5 separable_convolution and 3x3 max_pooling/avg_pooling
apiVersion: kubeflow.org/v1beta1
kind: Experiment
metadata:
namespace: kubeflow
name: enas-gpu
spec:
parallelTrialCount: 3
maxTrialCount: 12
maxFailedTrialCount: 3
objective:
type: maximize
goal: 0.99
objectiveMetricName: Validation-Accuracy
algorithm:
algorithmName: enas
nasConfig:
graphConfig:
numLayers: 8
inputSizes:
- 32
- 32
- 3
outputSizes:
- 10
operations:
- operationType: convolution
parameters:
- name: filter_size
parameterType: categorical
feasibleSpace:
list:
- "3"
- "5"
- "7"
- name: num_filter
parameterType: categorical
feasibleSpace:
list:
- "32"
- "48"
- "64"
- "96"
- "128"
- name: stride
parameterType: categorical
feasibleSpace:
list:
- "1"
- "2"
- operationType: separable_convolution
parameters:
- name: filter_size
parameterType: categorical
feasibleSpace:
list:
- "3"
- "5"
- "7"
- name: num_filter
parameterType: categorical
feasibleSpace:
list:
- "32"
- "48"
- "64"
- "96"
- "128"
- name: stride
parameterType: categorical
feasibleSpace:
list:
- "1"
- "2"
- name: depth_multiplier
parameterType: categorical
feasibleSpace:
list:
- "1"
- "2"
- operationType: depthwise_convolution
parameters:
- name: filter_size
parameterType: categorical
feasibleSpace:
list:
- "3"
- "5"
- "7"
- name: stride
parameterType: categorical
feasibleSpace:
list:
- "1"
- "2"
- name: depth_multiplier
parameterType: categorical
feasibleSpace:
list:
- "1"
- "2"
- operationType: reduction
parameters:
- name: reduction_type
parameterType: categorical
feasibleSpace:
list:
- max_pooling
- avg_pooling
- name: pool_size
parameterType: int
feasibleSpace:
min: "2"
max: "3"
step: "1"
trialTemplate:
primaryContainerName: training-container
trialParameters:
- name: neuralNetworkArchitecture
description: NN architecture contains operations ID on each NN layer and skip connections between layers
reference: architecture
- name: neuralNetworkConfig
description: Configuration contains NN number of layers, input and output sizes, description what each operation ID means
reference: nn_config
trialSpec:
apiVersion: batch/v1
kind: Job
spec:
template:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:latest
command:
- python3
- -u
- RunTrial.py
- --architecture="${trialParameters.neuralNetworkArchitecture}"
- --nn_config="${trialParameters.neuralNetworkConfig}"
resources:
limits:
nvidia.com/gpu: 1
restartPolicy: Never