From 3424978d4b8813c96a5095c413d64ec9e12ac5a0 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 16 Jul 2024 14:13:09 +0200 Subject: [PATCH 1/6] Bump version for v0.16.0 release Signed-off-by: Evan Lezar --- versions.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.mk b/versions.mk index d8ab3e7fa..576559375 100644 --- a/versions.mk +++ b/versions.mk @@ -17,7 +17,7 @@ MODULE := github.com/NVIDIA/$(DRIVER_NAME) REGISTRY ?= nvcr.io/nvidia -VERSION ?= v0.16.0-rc.1 +VERSION ?= v0.16.0 # vVERSION represents the version with a guaranteed v-prefix vVERSION := v$(VERSION:v%=%) From 94905300e9c2a9238a154fa49d7134c490be8f60 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 16 Jul 2024 14:13:11 +0200 Subject: [PATCH 2/6] Bump version to 0.16.0 in deployments Signed-off-by: Evan Lezar --- deployments/helm/nvidia-device-plugin/Chart.yaml | 4 ++-- .../gpu-feature-discovery-daemonset-with-mig-mixed.yaml | 6 +++--- .../gpu-feature-discovery-daemonset-with-mig-single.yaml | 6 +++--- deployments/static/gpu-feature-discovery-daemonset.yaml | 6 +++--- deployments/static/gpu-feature-discovery-job.yaml.template | 6 +++--- .../static/nvidia-device-plugin-compat-with-cpumanager.yml | 2 +- deployments/static/nvidia-device-plugin.yml | 2 +- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/deployments/helm/nvidia-device-plugin/Chart.yaml b/deployments/helm/nvidia-device-plugin/Chart.yaml index db8663e1c..a7817bd62 100644 --- a/deployments/helm/nvidia-device-plugin/Chart.yaml +++ b/deployments/helm/nvidia-device-plugin/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: nvidia-device-plugin type: application description: A Helm chart for the nvidia-device-plugin on Kubernetes -version: "0.16.0-rc.1" -appVersion: "0.16.0-rc.1" +version: "0.16.0" +appVersion: "0.16.0" kubeVersion: ">= 1.10.0-0" home: https://github.com/NVIDIA/k8s-device-plugin diff --git a/deployments/static/gpu-feature-discovery-daemonset-with-mig-mixed.yaml b/deployments/static/gpu-feature-discovery-daemonset-with-mig-mixed.yaml index fceedd36c..9b164f3ca 100644 --- a/deployments/static/gpu-feature-discovery-daemonset-with-mig-mixed.yaml +++ b/deployments/static/gpu-feature-discovery-daemonset-with-mig-mixed.yaml @@ -4,7 +4,7 @@ metadata: name: gpu-feature-discovery labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.16.0-rc.1 + app.kubernetes.io/version: 0.16.0 app.kubernetes.io/part-of: nvidia-gpu spec: selector: @@ -15,11 +15,11 @@ spec: metadata: labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.16.0-rc.1 + app.kubernetes.io/version: 0.16.0 app.kubernetes.io/part-of: nvidia-gpu spec: containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.16.0-rc.1 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.16.0 name: gpu-feature-discovery command: ["/usr/bin/gpu-feature-discovery"] volumeMounts: diff --git a/deployments/static/gpu-feature-discovery-daemonset-with-mig-single.yaml b/deployments/static/gpu-feature-discovery-daemonset-with-mig-single.yaml index 6d51a2212..d13f846c3 100644 --- a/deployments/static/gpu-feature-discovery-daemonset-with-mig-single.yaml +++ b/deployments/static/gpu-feature-discovery-daemonset-with-mig-single.yaml @@ -4,7 +4,7 @@ metadata: name: gpu-feature-discovery labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.16.0-rc.1 + app.kubernetes.io/version: 0.16.0 app.kubernetes.io/part-of: nvidia-gpu spec: selector: @@ -15,11 +15,11 @@ spec: metadata: labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.16.0-rc.1 + app.kubernetes.io/version: 0.16.0 app.kubernetes.io/part-of: nvidia-gpu spec: containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.16.0-rc.1 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.16.0 name: gpu-feature-discovery command: ["/usr/bin/gpu-feature-discovery"] volumeMounts: diff --git a/deployments/static/gpu-feature-discovery-daemonset.yaml b/deployments/static/gpu-feature-discovery-daemonset.yaml index d83c9cad9..73d70fb32 100644 --- a/deployments/static/gpu-feature-discovery-daemonset.yaml +++ b/deployments/static/gpu-feature-discovery-daemonset.yaml @@ -4,7 +4,7 @@ metadata: name: gpu-feature-discovery labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.16.0-rc.1 + app.kubernetes.io/version: 0.16.0 app.kubernetes.io/part-of: nvidia-gpu spec: selector: @@ -15,11 +15,11 @@ spec: metadata: labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.16.0-rc.1 + app.kubernetes.io/version: 0.16.0 app.kubernetes.io/part-of: nvidia-gpu spec: containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.16.0-rc.1 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.16.0 name: gpu-feature-discovery command: ["/usr/bin/gpu-feature-discovery"] volumeMounts: diff --git a/deployments/static/gpu-feature-discovery-job.yaml.template b/deployments/static/gpu-feature-discovery-job.yaml.template index 996fdb5e2..c5ff741fc 100644 --- a/deployments/static/gpu-feature-discovery-job.yaml.template +++ b/deployments/static/gpu-feature-discovery-job.yaml.template @@ -4,19 +4,19 @@ metadata: name: gpu-feature-discovery labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.16.0-rc.1 + app.kubernetes.io/version: 0.16.0 app.kubernetes.io/part-of: nvidia-gpu spec: template: metadata: labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.16.0-rc.1 + app.kubernetes.io/version: 0.16.0 app.kubernetes.io/part-of: nvidia-gpu spec: nodeName: NODE_NAME containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.16.0-rc.1 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.16.0 name: gpu-feature-discovery command: ["/usr/bin/gpu-feature-discovery"] args: diff --git a/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml b/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml index c0a241714..5f7047515 100644 --- a/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml +++ b/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml @@ -38,7 +38,7 @@ spec: # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ priorityClassName: "system-node-critical" containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.16.0-rc.1 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.16.0 name: nvidia-device-plugin-ctr env: - name: FAIL_ON_INIT_ERROR diff --git a/deployments/static/nvidia-device-plugin.yml b/deployments/static/nvidia-device-plugin.yml index abd1e0d86..681c23de2 100644 --- a/deployments/static/nvidia-device-plugin.yml +++ b/deployments/static/nvidia-device-plugin.yml @@ -38,7 +38,7 @@ spec: # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ priorityClassName: "system-node-critical" containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.16.0-rc.1 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.16.0 name: nvidia-device-plugin-ctr env: - name: FAIL_ON_INIT_ERROR From 1bfde0366ec95b3869f65b8f741dbcd8dddc144d Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 16 Jul 2024 14:27:24 +0200 Subject: [PATCH 3/6] Bump version to v0.16.0 in README Signed-off-by: Evan Lezar --- README.md | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 73bcd5f37..7ca9a0542 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ The NVIDIA device plugin for Kubernetes is a Daemonset that allows you to automa - Run GPU enabled containers in your Kubernetes cluster. This repository contains NVIDIA's official implementation of the [Kubernetes device plugin](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/). -As of v0.15.0 this repository also holds the implementation for GPU Feature Discovery labels, +As of v0.16.0 this repository also holds the implementation for GPU Feature Discovery labels, for further information on GPU Feature Discovery see [here](docs/gpu-feature-discovery/README.md). Please note that: @@ -123,7 +123,7 @@ Once you have configured the options above on all the GPU nodes in your cluster, you can enable GPU support by deploying the following Daemonset: ```shell -$ kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.15.0/deployments/static/nvidia-device-plugin.yml +$ kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.16.0/deployments/static/nvidia-device-plugin.yml ``` **Note:** This is a simple static daemonset meant to demonstrate the basic @@ -558,11 +558,11 @@ $ helm repo add nvdp https://nvidia.github.io/k8s-device-plugin $ helm repo update ``` -Then verify that the latest release (`v0.15.0`) of the plugin is available: +Then verify that the latest release (`v0.16.0`) of the plugin is available: ``` $ helm search repo nvdp --devel NAME CHART VERSION APP VERSION DESCRIPTION -nvdp/nvidia-device-plugin 0.15.0 0.15.0 A Helm chart for ... +nvdp/nvidia-device-plugin 0.16.0 0.16.0 A Helm chart for ... ``` Once this repo is updated, you can begin installing packages from it to deploy @@ -573,7 +573,7 @@ The most basic installation command without any options is then: helm upgrade -i nvdp nvdp/nvidia-device-plugin \ --namespace nvidia-device-plugin \ --create-namespace \ - --version 0.15.0 + --version 0.16.0 ``` **Note:** You only need the to pass the `--devel` flag to `helm search repo` @@ -582,7 +582,7 @@ version (e.g. `-rc.1`). Full releases will be listed without this. ### Configuring the device plugin's `helm` chart -The `helm` chart for the latest release of the plugin (`v0.15.0`) includes +The `helm` chart for the latest release of the plugin (`v0.16.0`) includes a number of customizable values. Prior to `v0.12.0` the most commonly used values were those that had direct @@ -592,7 +592,7 @@ case of the original values is then to override an option from the `ConfigMap` if desired. Both methods are discussed in more detail below. The full set of values that can be set are found here: -[here](https://github.com/NVIDIA/k8s-device-plugin/blob/v0.15.0/deployments/helm/nvidia-device-plugin/values.yaml). +[here](https://github.com/NVIDIA/k8s-device-plugin/blob/v0.16.0/deployments/helm/nvidia-device-plugin/values.yaml). #### Passing configuration to the plugin via a `ConfigMap`. @@ -631,7 +631,7 @@ EOF And deploy the device plugin via helm (pointing it at this config file and giving it a name): ``` $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.15.0 \ + --version=0.16.0 \ --namespace nvidia-device-plugin \ --create-namespace \ --set-file config.map.config=/tmp/dp-example-config0.yaml @@ -653,7 +653,7 @@ $ kubectl create cm -n nvidia-device-plugin nvidia-plugin-configs \ ``` ``` $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.15.0 \ + --version=0.16.0 \ --namespace nvidia-device-plugin \ --create-namespace \ --set config.name=nvidia-plugin-configs @@ -681,7 +681,7 @@ EOF And redeploy the device plugin via helm (pointing it at both configs with a specified default). ``` $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.15.0 \ + --version=0.16.0 \ --namespace nvidia-device-plugin \ --create-namespace \ --set config.default=config0 \ @@ -700,7 +700,7 @@ $ kubectl create cm -n nvidia-device-plugin nvidia-plugin-configs \ ``` ``` $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.15.0 \ + --version=0.16.0 \ --namespace nvidia-device-plugin \ --create-namespace \ --set config.default=config0 \ @@ -783,7 +783,7 @@ chart values that are commonly overridden are: ``` Please take a look in the -[`values.yaml`](https://github.com/NVIDIA/k8s-device-plugin/blob/v0.15.0/deployments/helm/nvidia-device-plugin/values.yaml) +[`values.yaml`](https://github.com/NVIDIA/k8s-device-plugin/blob/v0.16.0/deployments/helm/nvidia-device-plugin/values.yaml) file to see the full set of overridable parameters for the device plugin. Examples of setting these options include: @@ -792,7 +792,7 @@ Enabling compatibility with the `CPUManager` and running with a request for 100ms of CPU time and a limit of 512MB of memory. ```shell $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.15.0 \ + --version=0.16.0 \ --namespace nvidia-device-plugin \ --create-namespace \ --set compatWithCPUManager=true \ @@ -803,7 +803,7 @@ $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ Enabling compatibility with the `CPUManager` and the `mixed` `migStrategy` ```shell $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.15.0 \ + --version=0.16.0 \ --namespace nvidia-device-plugin \ --create-namespace \ --set compatWithCPUManager=true \ @@ -822,7 +822,7 @@ Discovery to perform this labeling. To enable it, simply set `gfd.enabled=true` during helm install. ``` helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.15.0 \ + --version=0.16.0 \ --namespace nvidia-device-plugin \ --create-namespace \ --set gfd.enabled=true @@ -867,7 +867,7 @@ nvidia.com/gpu.product = A100-SXM4-40GB-MIG-1g.5gb-SHARED #### Deploying gpu-feature-discovery in standalone mode -As of v0.15.0, the device plugin's helm chart has integrated support to deploy +As of v0.16.0, the device plugin's helm chart has integrated support to deploy [`gpu-feature-discovery`](https://gitlab.com/nvidia/kubernetes/gpu-feature-discovery/-/tree/main) When gpu-feature-discovery in deploying standalone, begin by setting up the @@ -878,13 +878,13 @@ $ helm repo add nvdp https://nvidia.github.io/k8s-device-plugin $ helm repo update ``` -Then verify that the latest release (`v0.15.0`) of the plugin is available +Then verify that the latest release (`v0.16.0`) of the plugin is available (Note that this includes the GFD chart): ```shell $ helm search repo nvdp --devel NAME CHART VERSION APP VERSION DESCRIPTION -nvdp/nvidia-device-plugin 0.15.0 0.15.0 A Helm chart for ... +nvdp/nvidia-device-plugin 0.16.0 0.16.0 A Helm chart for ... ``` Once this repo is updated, you can begin installing packages from it to deploy @@ -894,7 +894,7 @@ The most basic installation command without any options is then: ``` $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version 0.15.0 \ + --version 0.16.0 \ --namespace gpu-feature-discovery \ --create-namespace \ --set devicePlugin.enabled=false @@ -905,7 +905,7 @@ the default namespace. ```shell $ helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.15.0 \ + --version=0.16.0 \ --set allowDefaultNamespace=true \ --set nfd.enabled=false \ --set migStrategy=mixed \ @@ -928,14 +928,14 @@ Using the default values for the flags: $ helm upgrade -i nvdp \ --namespace nvidia-device-plugin \ --create-namespace \ - https://nvidia.github.io/k8s-device-plugin/stable/nvidia-device-plugin-0.15.0.tgz + https://nvidia.github.io/k8s-device-plugin/stable/nvidia-device-plugin-0.16.0.tgz ``` ## Building and Running Locally The next sections are focused on building the device plugin locally and running it. It is intended purely for development and testing, and not required by most users. -It assumes you are pinning to the latest release tag (i.e. `v0.15.0`), but can +It assumes you are pinning to the latest release tag (i.e. `v0.16.0`), but can easily be modified to work with any available tag or branch. ### With Docker @@ -943,8 +943,8 @@ easily be modified to work with any available tag or branch. #### Build Option 1, pull the prebuilt image from [Docker Hub](https://hub.docker.com/r/nvidia/k8s-device-plugin): ```shell -$ docker pull nvcr.io/nvidia/k8s-device-plugin:v0.15.0 -$ docker tag nvcr.io/nvidia/k8s-device-plugin:v0.15.0 nvcr.io/nvidia/k8s-device-plugin:devel +$ docker pull nvcr.io/nvidia/k8s-device-plugin:v0.16.0 +$ docker tag nvcr.io/nvidia/k8s-device-plugin:v0.16.0 nvcr.io/nvidia/k8s-device-plugin:devel ``` Option 2, build without cloning the repository: @@ -952,7 +952,7 @@ Option 2, build without cloning the repository: $ docker build \ -t nvcr.io/nvidia/k8s-device-plugin:devel \ -f deployments/container/Dockerfile.ubuntu \ - https://github.com/NVIDIA/k8s-device-plugin.git#v0.15.0 + https://github.com/NVIDIA/k8s-device-plugin.git#v0.16.0 ``` Option 3, if you want to modify the code: From 2be357b531590bd330dcc6eb4991013ff43cca1e Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 16 Jul 2024 14:21:31 +0200 Subject: [PATCH 4/6] Bump CHANGELOG.md for v0.16.0 release Signed-off-by: Evan Lezar --- CHANGELOG.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2d85dee5..f318d8b8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,14 @@ ## Changelog -### Version v1.16.0-rc.1 - +### v0.16.0 +- Fixed logic of atomic writing of the feature file +- Replaced `WithDialer` with `WithContextDialer` +- Fixed SELinux context of MPS pipe directory. +- Changed behavior for empty MIG devices to issue a warning instead of an error when the mixed strategy is selected +- Added a a GFD node label for the GPU mode. +- Update CUDA base image version to 12.5.1 + +### v0.16.0-rc.1 - Skip container updates if only CDI is selected - Allow cdi hook path to be set - Add nvidiaDevRoot config option From cc1bc9b9d002bbf837ef1981c9fa549ca7540e95 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 16 Jul 2024 14:21:53 +0200 Subject: [PATCH 5/6] [no-relnote] Fix generate changelog script Signed-off-by: Evan Lezar --- hack/generate-changelog.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hack/generate-changelog.sh b/hack/generate-changelog.sh index ccd9322ab..f4f95f213 100755 --- a/hack/generate-changelog.sh +++ b/hack/generate-changelog.sh @@ -20,7 +20,7 @@ this=`basename $0` usage () { cat << EOF Generate a changelog for the specified tag -Usage: $this --reference [--remote ] +Usage: $this --since [--remote ] Options: --since specify the tag to start the changelog from (default: latest tag) @@ -70,10 +70,15 @@ if [ -z "$REFERENCE" ]; then fi fi +SHA=$(git rev-parse ${VERSION}) +if [[ $? -ne 0 ]]; then + SHA="HEAD" +fi + # Print the changelog echo "## Changelog" echo "" echo "### Version $VERSION" # Iterate over the commit messages and ignore the ones that start with "Merge" or "Bump" -git log --pretty=format:"%s" $REFERENCE..@ | grep -Ev "(^Merge )|(^Bump)|(no-rel-?note)|(^---)" | sed 's/^\(.*\)/- \1/g' +git log --pretty=format:"%s" $REFERENCE..$SHA | grep -Ev "(^Merge )|(^Bump)|(no-rel-?note)|(^---)" | sed 's/^\(.*\)/- \1/g' From f61727c669f048489ad49b6a91ef2dacd56bf392 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 16 Jul 2024 14:31:52 +0200 Subject: [PATCH 6/6] Update v0.16.0-rc.2 changelog Signed-off-by: Evan Lezar --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f318d8b8b..29e750504 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ - Allow cdi hook path to be set - Add nvidiaDevRoot config option - Detect devRoot for driver installation -- Set /dev/shm size from /proc/meminfo +- Changed the automatically created MPS /dev/shm to half of the total memory as obtained from /proc/meminfo - Remove redundant version log - Remove provenance information from image manifests - add ngc image signing job for auto signing