-
Notifications
You must be signed in to change notification settings - Fork 2
/
3-custom-gpu-resources-daemonset.yml
129 lines (128 loc) · 3.09 KB
/
3-custom-gpu-resources-daemonset.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
---
#
# config maps that is holding the script that is run inside DaemonSet in all gpu nodes to set gpu memory
#
apiVersion: v1
kind: ConfigMap
metadata:
name: add-gpu-memory
namespace: kube-system
data:
app.sh: |
#!/bin/bash
gpu_memory_value="15079Mi"
timeout 240 kubectl proxy &
sleep 3
curl --header "Content-Type: application/json-patch+json" \
--request PATCH \
--max-time 10 --retry 10 --retry-delay 2 \
--data "[{\"op\": \"add\", \"path\": \"/status/capacity/veriff.com~1gpu-memory\", \"value\": \"${gpu_memory_value}\"}]" \
"http://127.0.0.1:8001/api/v1/nodes/${K8S_NODE_NAME}/status"
echo " * ${gpu_memory_value} of gpu memory added to ${K8S_NODE_NAME} (veriff.com/gpu-memory)"
sleep infinity
---
#
# this DaemonSet will run in all g4dn.xlarge nodes and patching them to add them gpu memory custom resources
#
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: add-gpu-memory
namespace: kube-system
spec:
selector:
matchLabels:
name: add-gpu-memory
template:
metadata:
labels:
name: add-gpu-memory
spec:
tolerations:
- key: "special"
operator: "Exists"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node.kubernetes.io/instance-type
operator: In
values:
- g4dn.xlarge
serviceAccountName: add-gpu-memory
containers:
- name: add-gpu-memory
image: bitnami/kubectl
resources:
limits:
cpu: 40m
memory: 50M
requests:
cpu: 1m
memory: 1M
volumeMounts:
- mountPath: /app.sh
name: code
readOnly: true
subPath: app.sh
command:
- bash
- /app.sh
env:
#
# this variable is used in script to patch nodes to know what node he is running
#
- name: "K8S_NODE_NAME"
valueFrom:
fieldRef:
apiVersion: "v1"
fieldPath: "spec.nodeName"
priorityClassName: system-node-critical
volumes:
- name: code
configMap:
name: add-gpu-memory
#
# rbac permissions used by daemonset to patch nodes
#
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: add-gpu-memory
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: add-gpu-memory
namespace: kube-system
rules:
- apiGroups:
- "*"
resources:
- nodes
verbs:
- get
- list
- apiGroups:
- "*"
resources:
- nodes/status
verbs:
- patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: add-gpu-memory
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: add-gpu-memory
subjects:
- kind: ServiceAccount
name: add-gpu-memory
namespace: kube-system