diff --git a/swan-cern/Chart.lock b/swan-cern/Chart.lock index 2309a17..10ca103 100644 --- a/swan-cern/Chart.lock +++ b/swan-cern/Chart.lock @@ -7,6 +7,6 @@ dependencies: version: 0.4.3 - name: gpu-operator repository: oci://registry.cern.ch/kubernetes/charts - version: v22.9.1 -digest: sha256:4600d0fa64afa906aaa7570a138dd796a87b20edd18cb96829f0d8e08372c4f3 -generated: "2024-10-01T09:39:24.176258854Z" + version: v24.6.2 +digest: sha256:02b128aa07fd796252255ac4f09708fef9b99aabd0d01d4f2c29d28a2e1aca55 +generated: "2024-10-16T10:39:56.250436142+02:00" diff --git a/swan-cern/Chart.yaml b/swan-cern/Chart.yaml index 5a7cb1f..87af8c2 100644 --- a/swan-cern/Chart.yaml +++ b/swan-cern/Chart.yaml @@ -16,6 +16,6 @@ dependencies: version: 0.4.3 repository: https://fluent.github.io/helm-charts - name: gpu-operator - version: v22.9.1 + version: v24.6.2 repository: oci://registry.cern.ch/kubernetes/charts condition: gpu-operator.enabled diff --git a/swan-cern/templates/gpu/nvidia-time-slicing-config.yaml b/swan-cern/templates/gpu/nvidia-device-plugin-config.yaml similarity index 87% rename from swan-cern/templates/gpu/nvidia-time-slicing-config.yaml rename to swan-cern/templates/gpu/nvidia-device-plugin-config.yaml index 858fa3c..77305d4 100644 --- a/swan-cern/templates/gpu/nvidia-time-slicing-config.yaml +++ b/swan-cern/templates/gpu/nvidia-device-plugin-config.yaml @@ -2,9 +2,12 @@ apiVersion: v1 kind: ConfigMap metadata: - name: nvidia-time-slicing-config + name: nvidia-device-plugin-config namespace: {{ .Release.Namespace }} data: + default: |- + version: v1 + flags: {} slice-4: |- version: v1 sharing: @@ -24,4 +27,3 @@ data: - name: nvidia.com/gpu replicas: 10 {{- end }} - diff --git a/swan-cern/values.yaml b/swan-cern/values.yaml index 56bdaf1..d6253ab 100644 --- a/swan-cern/values.yaml +++ b/swan-cern/values.yaml @@ -274,10 +274,10 @@ gpu-operator: enabled: true validator: repository: registry.cern.ch/kubernetes - version: v22.9.1 + version: v24.6.2 operator: repository: registry.cern.ch/kubernetes - version: v22.9.1 + version: v24.6.2 defaultRuntime: containerd initContainer: repository: registry.cern.ch/kubernetes @@ -285,15 +285,16 @@ gpu-operator: driver: repository: registry.cern.ch/kubernetes image: nvidia-gpu-driver - version: "v550.54.15-6.6.13-200.fc39.x86_64" + version: "v550.54.15-6.8.4-200.fc39.x86_64" imagePullPolicy: Always manager: repository: registry.cern.ch/kubernetes version: v0.6.0 licensingConfig: configMapName: "nvidia-grid-license" - kernelModuleConfig: - name: kernel-module-params + nlsEnabled: false + nfd: + enabled: false vgpuManager: repository: registry.cern.ch driverManager: @@ -305,12 +306,15 @@ gpu-operator: driverManager: repository: registry.cern.ch version: v0.5.1 + gdrcopy: + repository: registry.cern.ch/kubernetes + version: v2.4.1-1 vgpuDeviceManager: repository: registry.cern.ch version: v0.2.0 toolkit: repository: registry.cern.ch/kubernetes - version: v1.11.0 + version: v1.16.2-ubuntu20.04 devicePlugin: repository: registry.cern.ch/kubernetes version: v0.13.0 @@ -324,7 +328,7 @@ gpu-operator: name: nvidia-dcgm-exporter-metrics gfd: repository: registry.cern.ch/kubernetes - version: v0.7.0 + version: v0.16.2-ubi8 mig: strategy: mixed migManager: @@ -334,17 +338,19 @@ gpu-operator: name: nvidia-mig-config nodeStatusExporter: repository: registry.cern.ch - version: v22.9.1 + version: v24.6.2 + kataManager: + repository: registry.cern.ch/kubernetes + version: v0.2.1 sandboxDevicePlugin: repository: registry.cern.ch/kubernetes version: v1.2.1 + ccManager: + repository: registry.cern.ch/kubernetes + version: v0.1.1 devicePlugin: repository: registry.cern.ch/kubernetes version: v0.12.2-ubi8 config: - name: nvidia-time-slicing-config - node-feature-discovery: - image: - repository: registry.cern.ch/kubernetes/node-feature-discovery - tag: v0.10.1 - + name: nvidia-device-plugin-config + default: "default"