From adb5814dbf769992badf429f136a24696fb5c033 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 16 Mar 2023 09:15:03 +0200 Subject: [PATCH 1/4] Bump version to v0.14.0-rc.1 Signed-off-by: Evan Lezar --- versions.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.mk b/versions.mk index f6dd286a3..3666da401 100644 --- a/versions.mk +++ b/versions.mk @@ -14,7 +14,7 @@ MODULE := github.com/NVIDIA/k8s-device-plugin -VERSION ?= v0.13.0 +VERSION ?= v0.14.0-rc.1 # vVERSION represents the version with a guaranteed v-prefix vVERSION := v$(VERSION:v%=%) From 1eb5edc284140a3606f4848be2ac84a9c4b13b41 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 16 Mar 2023 09:15:36 +0200 Subject: [PATCH 2/4] Bump version to v0.14.0-rc.1 in deployments Signed-off-by: Evan Lezar --- deployments/helm/nvidia-device-plugin/Chart.yaml | 4 ++-- .../static/extensions-v1beta1-nvidia-device-plugin.yml | 2 +- .../static/nvidia-device-plugin-compat-with-cpumanager.yml | 2 +- .../nvidia-device-plugin-privileged-with-service-account.yml | 2 +- nvidia-device-plugin.yml | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/deployments/helm/nvidia-device-plugin/Chart.yaml b/deployments/helm/nvidia-device-plugin/Chart.yaml index c83f87d0b..df26e0309 100644 --- a/deployments/helm/nvidia-device-plugin/Chart.yaml +++ b/deployments/helm/nvidia-device-plugin/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: nvidia-device-plugin type: application description: A Helm chart for the nvidia-device-plugin on Kubernetes -version: "0.13.0" -appVersion: "0.13.0" +version: "0.14.0-rc.1" +appVersion: "0.14.0-rc.1" kubeVersion: ">= 1.10.0-0" home: https://github.com/NVIDIA/k8s-device-plugin diff --git a/deployments/static/extensions-v1beta1-nvidia-device-plugin.yml b/deployments/static/extensions-v1beta1-nvidia-device-plugin.yml index 50cbfe118..fcfc6ea81 100644 --- a/deployments/static/extensions-v1beta1-nvidia-device-plugin.yml +++ b/deployments/static/extensions-v1beta1-nvidia-device-plugin.yml @@ -35,7 +35,7 @@ spec: # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ priorityClassName: "system-node-critical" containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.13.0 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.14.0-rc.1 name: nvidia-device-plugin-ctr env: - name: FAIL_ON_INIT_ERROR diff --git a/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml b/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml index a629fa843..5593506d8 100644 --- a/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml +++ b/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml @@ -38,7 +38,7 @@ spec: # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ priorityClassName: "system-node-critical" containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.13.0 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.14.0-rc.1 name: nvidia-device-plugin-ctr env: - name: FAIL_ON_INIT_ERROR diff --git a/deployments/static/nvidia-device-plugin-privileged-with-service-account.yml b/deployments/static/nvidia-device-plugin-privileged-with-service-account.yml index 7b1913588..08235f5ce 100644 --- a/deployments/static/nvidia-device-plugin-privileged-with-service-account.yml +++ b/deployments/static/nvidia-device-plugin-privileged-with-service-account.yml @@ -124,7 +124,7 @@ spec: - env: - name: PASS_DEVICE_SPECS value: "true" - image: nvcr.io/nvidia/k8s-device-plugin:v0.13.0 + image: nvcr.io/nvidia/k8s-device-plugin:v0.14.0-rc.1 name: nvidia-device-plugin-ctr securityContext: privileged: true diff --git a/nvidia-device-plugin.yml b/nvidia-device-plugin.yml index 09bb44c60..6e5e54454 100644 --- a/nvidia-device-plugin.yml +++ b/nvidia-device-plugin.yml @@ -38,7 +38,7 @@ spec: # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ priorityClassName: "system-node-critical" containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.13.0 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.14.0-rc.1 name: nvidia-device-plugin-ctr env: - name: FAIL_ON_INIT_ERROR From 244efeb1af624ba74cd66b219ef01f359d7f70d5 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 16 Mar 2023 09:16:44 +0200 Subject: [PATCH 3/4] Bump gpu-feature-discovery version to v0.8.0-rc.1 Signed-off-by: Evan Lezar --- deployments/helm/nvidia-device-plugin/Chart.lock | 6 +++--- deployments/helm/nvidia-device-plugin/Chart.yaml | 2 +- .../charts/gpu-feature-discovery/Chart.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/deployments/helm/nvidia-device-plugin/Chart.lock b/deployments/helm/nvidia-device-plugin/Chart.lock index 44951ec6a..df8fe0c24 100644 --- a/deployments/helm/nvidia-device-plugin/Chart.lock +++ b/deployments/helm/nvidia-device-plugin/Chart.lock @@ -4,6 +4,6 @@ dependencies: version: 0.12.1 - name: gpu-feature-discovery repository: "" - version: 0.7.0 -digest: sha256:97fa7c7c0d07692c3c3f8ca924826b97b7a7cc270934af9af03f7374a327cdfd -generated: "2023-01-31T16:33:13.333048878+01:00" + version: 0.8.0-rc.1 +digest: sha256:b7b5d7fa86224d90de245d1b927d966ff9a0bd9e9d5ba6055706838c086dd394 +generated: "2023-03-16T09:17:42.839961+02:00" diff --git a/deployments/helm/nvidia-device-plugin/Chart.yaml b/deployments/helm/nvidia-device-plugin/Chart.yaml index df26e0309..dc12263d2 100644 --- a/deployments/helm/nvidia-device-plugin/Chart.yaml +++ b/deployments/helm/nvidia-device-plugin/Chart.yaml @@ -15,5 +15,5 @@ dependencies: repository: https://kubernetes-sigs.github.io/node-feature-discovery/charts - name: gpu-feature-discovery alias: gfd - version: "0.7.0" + version: "0.8.0-rc.1" condition: gfd.enabled diff --git a/deployments/helm/nvidia-device-plugin/charts/gpu-feature-discovery/Chart.yaml b/deployments/helm/nvidia-device-plugin/charts/gpu-feature-discovery/Chart.yaml index b0dbb6ef4..33677178b 100644 --- a/deployments/helm/nvidia-device-plugin/charts/gpu-feature-discovery/Chart.yaml +++ b/deployments/helm/nvidia-device-plugin/charts/gpu-feature-discovery/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: gpu-feature-discovery type: application description: A Helm chart for gpu-feature-discovery on Kubernetes -version: "0.7.0" -appVersion: "0.7.0" +version: "0.8.0-rc.1" +appVersion: "0.8.0-rc.1" kubeVersion: ">= 1.10.0-0" home: https://github.com/NVIDIA/gpu-feature-discovery From 8f76973222f04058244fb9bf791facd5cad8b958 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 16 Mar 2023 14:13:24 +0200 Subject: [PATCH 4/4] Bump version to v0.14.0-rc.1 in README and RELEASE Signed-off-by: Evan Lezar --- README.md | 49 +++++++++++++++++++++++++++++-------------------- RELEASE.md | 2 +- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index c6ecc05a1..c46a9cf88 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ Once you have configured the options above on all the GPU nodes in your cluster, you can enable GPU support by deploying the following Daemonset: ```shell -$ kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.13.0/nvidia-device-plugin.yml +$ kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.0-rc.1/nvidia-device-plugin.yml ``` **Note:** This is a simple static daemonset meant to demonstrate the basic @@ -462,11 +462,11 @@ $ helm repo add nvdp https://nvidia.github.io/k8s-device-plugin $ helm repo update ``` -Then verify that the latest release (`v0.13.0`) of the plugin is available: +Then verify that the latest release (`v0.14.0-rc.1`) of the plugin is available: ``` $ helm search repo nvdp --devel NAME CHART VERSION APP VERSION DESCRIPTION -nvdp/nvidia-device-plugin 0.13.0 0.13.0 A Helm chart for ... +nvdp/nvidia-device-plugin 0.14.0-rc.1 0.14.0-rc.1 A Helm chart for ... ``` Once this repo is updated, you can begin installing packages from it to deploy @@ -477,7 +477,7 @@ The most basic installation command without any options is then: helm upgrade -i nvdp nvdp/nvidia-device-plugin \ --namespace nvidia-device-plugin \ --create-namespace \ - --version 0.13.0 + --version 0.14.0-rc.1 ``` **Note:** You only need the to pass the `--devel` flag to `helm search repo` @@ -486,7 +486,7 @@ version (e.g. `-rc.1`). Full releases will be listed without this. ### Configuring the device plugin's `helm` chart -The `helm` chart for the latest release of the plugin (`v0.13.0`) includes +The `helm` chart for the latest release of the plugin (`v0.14.0-rc.1`) includes a number of customizable values. Prior to `v0.12.0` the most commonly used values were those that had direct @@ -496,7 +496,7 @@ case of the original values is then to override an option from the `ConfigMap` if desired. Both methods are discussed in more detail below. The full set of values that can be set are found here: -[here](https://github.com/NVIDIA/k8s-device-plugin/blob/v0.13.0/deployments/helm/nvidia-device-plugin/values.yaml). +[here](https://github.com/NVIDIA/k8s-device-plugin/blob/v0.14.0-rc.1/deployments/helm/nvidia-device-plugin/values.yaml). #### Passing configuration to the plugin via a `ConfigMap`. @@ -535,7 +535,7 @@ EOF And deploy the device plugin via helm (pointing it at this config file and giving it a name): ``` $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.13.0 \ + --version=0.14.0-rc.1 \ --namespace nvidia-device-plugin \ --create-namespace \ --set-file config.map.config=/tmp/dp-example-config0.yaml @@ -557,7 +557,7 @@ $ kubectl create cm -n nvidia-device-plugin nvidia-plugin-configs \ ``` ``` $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.13.0 \ + --version=0.14.0-rc.1 \ --namespace nvidia-device-plugin \ --create-namespace \ --set config.name=nvidia-plugin-configs @@ -585,7 +585,7 @@ EOF And redeploy the device plugin via helm (pointing it at both configs with a specified default). ``` $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.13.0 \ + --version=0.14.0-rc.1 \ --namespace nvidia-device-plugin \ --create-namespace \ --set config.default=config0 \ @@ -604,7 +604,7 @@ $ kubectl create cm -n nvidia-device-plugin nvidia-plugin-configs \ ``` ``` $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.13.0 \ + --version=0.14.0-rc.1 \ --namespace nvidia-device-plugin \ --create-namespace \ --set config.default=config0 \ @@ -690,7 +690,7 @@ chart values that are commonly overridden are: ``` Please take a look in the -[`values.yaml`](https://github.com/NVIDIA/k8s-device-plugin/blob/v0.13.0/deployments/helm/nvidia-device-plugin/values.yaml) +[`values.yaml`](https://github.com/NVIDIA/k8s-device-plugin/blob/v0.14.0-rc.1/deployments/helm/nvidia-device-plugin/values.yaml) file to see the full set of overridable parameters for the device plugin. Examples of setting these options include: @@ -699,7 +699,7 @@ Enabling compatibility with the `CPUManager` and running with a request for 100ms of CPU time and a limit of 512MB of memory. ```shell $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.13.0 \ + --version=0.14.0-rc.1 \ --namespace nvidia-device-plugin \ --create-namespace \ --set compatWithCPUManager=true \ @@ -710,7 +710,7 @@ $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ Using the legacy Daemonset API (only available on Kubernetes < `v1.16`): ```shell $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.13.0 \ + --version=0.14.0-rc.1 \ --namespace nvidia-device-plugin \ --create-namespace \ --set legacyDaemonsetAPI=true @@ -719,7 +719,7 @@ $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ Enabling compatibility with the `CPUManager` and the `mixed` `migStrategy` ```shell $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.13.0 \ + --version=0.14.0-rc.1 \ --namespace nvidia-device-plugin \ --create-namespace \ --set compatWithCPUManager=true \ @@ -738,7 +738,7 @@ Discovery to perform this labeling. To enable it, simply set `gfd.enabled=true` during helm install. ``` helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.13.0 \ + --version=0.14.0-rc.1 \ --namespace nvidia-device-plugin \ --create-namespace \ --set gfd.enabled=true @@ -793,14 +793,14 @@ Using the default values for the flags: $ helm upgrade -i nvdp \ --namespace nvidia-device-plugin \ --create-namespace \ - https://nvidia.github.io/k8s-device-plugin/stable/nvidia-device-plugin-0.13.0.tgz + https://nvidia.github.io/k8s-device-plugin/stable/nvidia-device-plugin-0.14.0-rc.1.tgz ``` ## Building and Running Locally The next sections are focused on building the device plugin locally and running it. It is intended purely for development and testing, and not required by most users. -It assumes you are pinning to the latest release tag (i.e. `v0.13.0`), but can +It assumes you are pinning to the latest release tag (i.e. `v0.14.0-rc.1`), but can easily be modified to work with any available tag or branch. ### With Docker @@ -808,8 +808,8 @@ easily be modified to work with any available tag or branch. #### Build Option 1, pull the prebuilt image from [Docker Hub](https://hub.docker.com/r/nvidia/k8s-device-plugin): ```shell -$ docker pull nvcr.io/nvidia/k8s-device-plugin:v0.13.0 -$ docker tag nvcr.io/nvidia/k8s-device-plugin:v0.13.0 nvcr.io/nvidia/k8s-device-plugin:devel +$ docker pull nvcr.io/nvidia/k8s-device-plugin:v0.14.0-rc.1 +$ docker tag nvcr.io/nvidia/k8s-device-plugin:v0.14.0-rc.1 nvcr.io/nvidia/k8s-device-plugin:devel ``` Option 2, build without cloning the repository: @@ -817,7 +817,7 @@ Option 2, build without cloning the repository: $ docker build \ -t nvcr.io/nvidia/k8s-device-plugin:devel \ -f deployments/container/Dockerfile.ubuntu \ - https://github.com/NVIDIA/k8s-device-plugin.git#v0.13.0 + https://github.com/NVIDIA/k8s-device-plugin.git#v0.14.0-rc.1 ``` Option 3, if you want to modify the code: @@ -871,6 +871,15 @@ $ ./k8s-device-plugin --pass-device-specs ## Changelog +### Version v0.14.0-rc.1 + +- Added --cdi-enabled flag to GPU Device Plugin. With this enabled, the device plugin will generate CDI specifications for available NVIDIA devices. Allocation will add CDI anntiations (`cdi.k8s.io/*`) to the response. These are read by a CDI-enabled runtime to make the required modifications to a container being created. +- Updated GFD subchard to version 0.8.0-rc.1 +- Bumped Golang version to 1.20.1 +- Bumped CUDA base images version to 12.1.0 +- Switched to klog for logging +- Added a static deployment file for Microshift + ### Version v0.13.0 - Promote v0.13.0-rc.3 to v0.13.0 diff --git a/RELEASE.md b/RELEASE.md index 3728a8d88..60263bcda 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -9,7 +9,7 @@ Publishing the helm chart is currently manual, and we should move to an automate # Release Process Checklist - [ ] Update the README changelog -- [ ] Update the README to change occurances of the old version (e.g: `v0.13.0`) with the new version +- [ ] Update the README to change occurances of the old version (e.g: `v0.14.0-rc.1`) with the new version - [ ] Commit, Tag and Push to Gitlab - [ ] Build a new helm package with `helm package ./deployments/helm/nvidia-device-plugin` - [ ] Switch to the `gh-pages` branch and move the newly generated package to the `stable` helm repo