forked from kubeflow/arena
-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathrun_arena.sh
89 lines (72 loc) · 3.29 KB
/
run_arena.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env bash
set -x -e
function log() {
echo $(date +"[%Y%m%d %H:%M:%S]: ") $1
}
if ! [ -f $KUBECONFIG ]; then
log "Failed to find $KUBECONFIG. Please mount kubeconfig file into the pod and make sure it's $KUBECONFIG"
exit 1
fi
if ! helm list >/dev/null 2>&1; then
log "Warning: Failed to run 'helm list', please check if tiller is installed appropriately."
fi
set +e
if [[ ! -z "${registry}" ]]; then
find /charts/ -name *.yaml | xargs sed -i "s/registry.cn-zhangjiakou.aliyuncs.com/${registry}/g"
find /charts/ -name *.yaml | xargs sed -i "s/registry.cn-hangzhou.aliyuncs.com/${registry}/g"
find /root/kubernetes-artifacts/ -name *.yaml | xargs sed -i "s/registry.cn-zhangjiakou.aliyuncs.com/${registry}/g"
find /root/kubernetes-artifacts/ -name *.yaml | xargs sed -i "s/registry.cn-hangzhou.aliyuncs.com/${registry}/g"
fi
if [[ ! -z "${namespace}" ]]; then
find /root/kubernetes-artifacts/ -name *.yaml | xargs sed -i "s/arena-system/${namespace}/g"
fi
if [[ ! -z "${repo_namespace}" ]]; then
find /charts/ -name *.yaml | xargs sed -i "s/tensorflow-samples/${repo_namespace}/g"
find /root/kubernetes-artifacts/ -name *.yaml | xargs sed -i "s/tensorflow-samples/${repo_namespace}/g"
fi
if [ "$useLoadBlancer" == "true" ]; then
find /charts/ -name *.yaml | xargs sed -i "s/NodePort/LoadBalancer/g"
find /root/kubernetes-artifacts/ -name *.yaml | xargs sed -i "s/NodePort/LoadBalancer/g"
fi
if ! kubectl get serviceaccount --all-namespaces | grep jobmon; then
kubectl apply -f /root/kubernetes-artifacts/jobmon/jobmon-role.yaml
fi
if ! kubectl get serviceaccount --all-namespaces | grep tf-job-operator; then
kubectl apply -f /root/kubernetes-artifacts/tf-operator/tf-crd.yaml
kubectl apply -f /root/kubernetes-artifacts/tf-operator/tf-operator.yaml
else
if kubectl get crd tfjobs.kubeflow.org -oyaml |grep -i 'version: v1alpha2'; then
kubectl delete -f /root/kubernetes-artifacts/tf-operator/tf-operator-v1alpha2.yaml
kubectl apply -f /root/kubernetes-artifacts/tf-operator/tf-crd.yaml
kubectl apply -f /root/kubernetes-artifacts/tf-operator/tf-operator.yaml
fi
fi
if ! kubectl get serviceaccount --all-namespaces | grep mpi-operator; then
kubectl apply -f /root/kubernetes-artifacts/mpi-operator/mpi-operator.yaml
fi
if [ "$usePrometheus" == "true" ]; then
if [ "$platform" == "ack" ]; then
sed -i 's|accelerator/nvidia_gpu|aliyun.accelerator/nvidia_count|g' /root/kubernetes-artifacts/prometheus/gpu-exporter.yaml
fi
if ! kubectl get serviceaccount --all-namespaces | grep prometheus; then
kubectl apply -f /root/kubernetes-artifacts/prometheus/gpu-exporter.yaml
kubectl apply -f /root/kubernetes-artifacts/prometheus/prometheus.yaml
kubectl apply -f /root/kubernetes-artifacts/prometheus/grafana.yaml
fi
fi
set -e
if [ "$useHostNetwork" == "true" ]; then
find /charts/ -name values.yaml | xargs sed -i "/useHostNetwork/s/false/true/g"
fi
if [ -d "/host" ]; then
now=$(date "+%Y%m%d%H%M%S")
if [ -f "/host/usr/local/bin/arena" ]; then
mv /host/usr/local/bin/arena /host/usr/local/bin/arena-$now
fi
cp /usr/local/bin/arena /host/usr/local/bin/arena
if [ -d "/host/charts" ]; then
mv /host/charts /host/charts-$now
fi
cp -r /charts /host
fi
tail -f /dev/null