diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 00000000..1725d57f --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,63 @@ +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Run this workflow on pull requests +name: build + +on: + pull_request: + types: + - opened + - synchronize + branches: + - main + - release-* + push: + branches: + - main + - release-* + +jobs: + packages: + runs-on: ubuntu-latest + strategy: + matrix: + package: + - deb + - rpm + - tarball + ispr: + - ${{github.event_name == 'pull_request'}} + fail-fast: false + steps: + - uses: actions/checkout@v4 + name: Check out code + with: + fetch-tags: true + fetch-depth: 1024 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: build ${{ matrix.package }} packages + run: | + sudo apt-get install -y coreutils build-essential sed git bash make + echo "Building packages" + make -f deployments/systemd/packages/Makefile ${{ matrix.package }} + - name: 'Upload Artifacts' + uses: actions/upload-artifact@v4 + with: + compression-level: 0 + name: mig-parted-${{ matrix.package }}-${{ github.run_id }} + path: ${{ github.workspace }}/dist/* diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..94867ea5 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,107 @@ +# NVIDIA MIG Manager Changelog + +## v0.6.0 +- Update to latest CUDA base image 12.3.2 +- Migrate to using github.com/NVIDIA/go-nvlib +- Bump Golang version to 1.20.5 +- Bump nvidia-ctk version used by k8s-mig-manager to 1.14.6 +- Update vendored go dependencies +- Minor code improvements and refactoring + +## v0.5.5 +- Update to latest CUDA base image 12.2.2 + +## v0.5.4 +- Update MIG config for Hopper with device ID of H100 80GB HBM3 SKU + +## v0.5.3 +- Update to latest CUDA image 12.2.0 +- Update example config for Hopper with H100 NVL and H800 NVL + +## v0.5.2 +- Update to latest CUDA image 12.1.0 +- Update k8s-mig-manager to support CDI +- Add two new example configs for the newly supported profiles on A100 +- Update MIG profile code to rely on go-nvlib +- Update vendored go-nvlib to latest +- Update NVML wrapper to include MIG profiles from NVML v12.0 + +## v0.5.1 +- Update to latest CUDA image 12.0.1 +- Add newer MIG profiles supported with NVML 12.0 to default config.yaml files +- Add profiles with media extensions for A30-24GB to default config.yaml files +- Add H100 and H800 profiles to default config.yaml files +- Add A800 profiles to default config.yaml files +- Update all calls to enumerate GPUs to use NVML or PCI as appropriate +- Bump vendored go-nvml to v12.0 +- Bump Golang version to 1.20.1 + +## v0.5.0 +- Bump CUDA base image to 11.7.1 +- Remove CUDA compat libs from mig-manager in favor of libs installed by the Driver +- Use symlink for config.yaml instead of static config file +- Add k8s-mig-manager-example for Hopper +- Update k8s-mig-manager-example with standalone RBAC objects +- Explicitly delete pods launched by operator validator before reconfig +- Allow missing GPUClients file in k8s-mig-manager +- Add hooks-minimal.yaml that gets linked if on Hopper or above +- Use symlink for hooks.yaml instead of static config file +- Update install script to use go 1.16.4 +- Update hooks.sh to split out start/stop of k8s services from k8s pods +- Explicitly clear all MIG configurations before disabling MIG mode + +## v0.4.3 +- Update calculation for GB in MIG profile name +- Make the systemd-mig-manager a dependency of systemd-resolved.service + +## v0.4.2 +- Update CUDA image to 11.7.0 +- Add extra assert in k8s-mig-manager to double check mig-mode change applied +- Update mig-manager image to use NGC DL license + +## v0.4.1 +- Keep NVML alive across all mig-parted commands (except GPU reset) +- Remove unnecessary services from hooks.sh + +## v0.4.0 +- Update nvidia-mig-parted.sh to include MIG_PARTED_CHECKPOINT_FILE +- Add checkpoint / restore commands to mig-parted CLI +- Update golang version to 1.16.4 +- Support instantiation of *_PROFILE_6_SLICE GIs and CIs +- Update cyrus-sasl-lib to address CVE-2022-24407 +- Add support for MIG profiles with +me as an attribute extension +- Support Compute Instances in mig-parted config such that CI != GI +- Update go-nvml to v0.11.6 +- Change semantics of 'all' to mean 'all-mig-capable' in mig-parted config + +## v0.3.0 +- k8s-mig-manager: Add support for multi-arch images +- k8s-mig-manager: Handle eviction of NVSM pod when applying MIG changes + +## v0.2.0 +- nvidia-mig-parted: Support passing newer GI and CI profile enums on older drivers +- k8s-mig-manager: Rename nvcr.io/nvidia to nvcr.io/nvidia/cloud-native +- k8s-mig-manager: Add support for pre-installed drivers +- systemd-mig-manager: Update logic to remove 'containerd' containers in utils.sh +- systemd-mig-manager: Update logic to shutdown only active systemd services in list +- ci-infrastructure: Rework build and CI to align with other projects +- ci-infrastructure: Use pulse instead of contamer for scans + +## v0.1.3 +- Add default configs for the PG506-96GB card +- Remove CombinedMigManager and add wrappers for Mode/Config Managers +- Add a function to check the minimum NVML version required +- Add SystemGetNVMLVersion() to the NVML interface +- Fix small bug in assert logic for non MIG-capable GPUs + +## v0.1.2 +- Do not start nvidia-mig-manager.service when installing the .deb +- Restore lost assert_gpu_reset_available() function +- Add nvidia-dcgm.service to driver_services array +- Split dcgm, and dcgm-exporter in k8s-mig-manager + +## v0.1.1 +- Update packaged config.yaml to include more supported devices + +## v0.1.0 +- Initial release of rpm package for v0.1.0 \ No newline at end of file diff --git a/deployments/systemd/packages/Dockerfile.ubuntu b/deployments/systemd/packages/Dockerfile.deb similarity index 51% rename from deployments/systemd/packages/Dockerfile.ubuntu rename to deployments/systemd/packages/Dockerfile.deb index 7203e718..07d417fe 100644 --- a/deployments/systemd/packages/Dockerfile.ubuntu +++ b/deployments/systemd/packages/Dockerfile.deb @@ -15,11 +15,43 @@ # build go binary ARG BASE_IMAGE=undefined ARG GOLANG_VERSION=undefined -FROM golang:${GOLANG_VERSION} AS go-build +FROM ${BASE_IMAGE} as go-build + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + git \ + build-essential \ + dh-make \ + fakeroot \ + devscripts \ + lsb-release && \ + rm -rf /var/lib/apt/lists/* + +ARG GOLANG_VERSION=0.0.0 +RUN set -eux; \ + \ + arch="$(uname -m)"; \ + case "${arch##*-}" in \ + x86_64 | amd64) ARCH='amd64' ;; \ + ppc64el | ppc64le) ARCH='ppc64le' ;; \ + aarch64 | arm64) ARCH='arm64' ;; \ + *) echo "unsupported architecture" ; exit 1 ;; \ + esac; \ + wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \ + | tar -C /usr/local -xz + +ENV GOPATH /go +ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH WORKDIR /build COPY . . -RUN go build -o /artifacts/nvidia-mig-parted ./cmd/nvidia-mig-parted + +RUN mkdir /artifacts +ARG VERSION="N/A" +ARG GIT_COMMIT="unknown" +RUN make PREFIX=/artifacts cmds # build package FROM ${BASE_IMAGE} @@ -27,17 +59,19 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y devscripts debhelper # envs for packaging +ENV DEBFULLNAME "NVIDIA CORPORATION" +ENV DEBEMAIL "cudatools@nvidia.com" ARG PACKAGE_NAME=undefined ARG PACKAGE_VERSION=undefined ARG PACKAGE_REVISION=undefined ENV PACKAGE_NAME ${PACKAGE_NAME} ENV PACKAGE_VERSION ${PACKAGE_VERSION} ENV PACKAGE_REVISION ${PACKAGE_REVISION} +ENV PACKAGE_VERSION_STRING "${PACKAGE_VERSION}-${PACKAGE_REVISION}" ENV SECTION "" # working directory -ENV PWD=/tmp/${PACKAGE_NAME}-${PACKAGE_VERSION} -WORKDIR ${PWD} +WORKDIR /tmp/${PACKAGE_NAME}-${PACKAGE_VERSION_STRING} # sources COPY ./LICENSE . @@ -49,8 +83,11 @@ COPY ./deployments/systemd/packages/debian/Makefile . # output directory RUN mkdir -p /dist -# Check that the latest changelog entry matches the current version info -RUN if [ "${PACKAGE_VERSION}-${PACKAGE_REVISION}" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi +RUN dch --create --package="${PACKAGE_NAME}" \ + --newversion "${PACKAGE_VERSION_STRING##v}" \ + "See https://github.com/NVIDIA/mig-parted/-/blob/${GIT_COMMIT}/CHANGELOG.md for the changelog" && \ + dch -r "" && \ + if [ "${PACKAGE_VERSION_STRING##v}" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi # build command CMD export DISTRIB=$(lsb_release -c -s) && \ diff --git a/deployments/systemd/packages/Dockerfile.ubi8 b/deployments/systemd/packages/Dockerfile.rpm similarity index 62% rename from deployments/systemd/packages/Dockerfile.ubi8 rename to deployments/systemd/packages/Dockerfile.rpm index 33326e35..20d4729e 100644 --- a/deployments/systemd/packages/Dockerfile.ubi8 +++ b/deployments/systemd/packages/Dockerfile.rpm @@ -15,11 +15,40 @@ # build go binary ARG BASE_IMAGE=undefined ARG GOLANG_VERSION=undefined -FROM golang:${GOLANG_VERSION} AS go-build +FROM ${BASE_IMAGE} as go-build + +RUN yum install -y \ + ca-certificates \ + gcc \ + wget \ + git \ + make \ + rpm-build && \ + rm -rf /var/cache/yum/* + +ARG GOLANG_VERSION=0.0.0 +RUN set -eux; \ + \ + arch="$(uname -m)"; \ + case "${arch##*-}" in \ + x86_64 | amd64) ARCH='amd64' ;; \ + ppc64el | ppc64le) ARCH='ppc64le' ;; \ + aarch64 | arm64) ARCH='arm64' ;; \ + *) echo "unsupported architecture"; exit 1 ;; \ + esac; \ + wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \ + | tar -C /usr/local -xz + +ENV GOPATH /go +ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH WORKDIR /build COPY . . -RUN go build -o /artifacts/nvidia-mig-parted ./cmd/nvidia-mig-parted + +RUN mkdir /artifacts +ARG VERSION="N/A" +ARG GIT_COMMIT="unknown" +RUN make PREFIX=/artifacts cmds # build package FROM ${BASE_IMAGE} @@ -32,10 +61,10 @@ ARG PACKAGE_REVISION=undefined ENV PACKAGE_NAME ${PACKAGE_NAME} ENV PACKAGE_VERSION ${PACKAGE_VERSION} ENV PACKAGE_REVISION ${PACKAGE_REVISION} +ENV PACKAGE_VERSION_STRING "${PACKAGE_VERSION}-${PACKAGE_REVISION}" # working directory -ENV PWD=/tmp/${PACKAGE_NAME}-${PACKAGE_VERSION} -WORKDIR ${PWD} +WORKDIR /tmp/${PACKAGE_NAME}-${PACKAGE_VERSION_STRING} # specs RUN mkdir -p ./SPECS @@ -59,5 +88,7 @@ CMD arch=$(uname -m) && \ -D "_topdir ${PWD}" \ -D "version ${PACKAGE_VERSION}" \ -D "revision ${PACKAGE_REVISION}" \ + -D "git_commit ${GIT_COMMIT}" \ + -D "release_date $(date +'%a %b %d %Y')" \ SPECS/${PACKAGE_NAME}.spec && \ mv RPMS/$arch/*.rpm /dist diff --git a/deployments/systemd/packages/Dockerfile.tarball b/deployments/systemd/packages/Dockerfile.tarball index 64e20ee0..1db284a7 100644 --- a/deployments/systemd/packages/Dockerfile.tarball +++ b/deployments/systemd/packages/Dockerfile.tarball @@ -13,28 +13,71 @@ # limitations under the License. # build go binary +ARG BASE_IMAGE=undefined ARG GOLANG_VERSION=undefined -FROM golang:${GOLANG_VERSION} AS go-build +FROM ${BASE_IMAGE} as go-build + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + git \ + build-essential \ + dh-make \ + fakeroot \ + devscripts \ + lsb-release && \ + rm -rf /var/lib/apt/lists/* + +ARG GOLANG_VERSION=0.0.0 +RUN set -eux; \ + \ + arch="$(uname -m)"; \ + case "${arch##*-}" in \ + x86_64 | amd64) ARCH='amd64' ;; \ + ppc64el | ppc64le) ARCH='ppc64le' ;; \ + aarch64 | arm64) ARCH='arm64' ;; \ + *) echo "unsupported architecture" ; exit 1 ;; \ + esac; \ + wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \ + | tar -C /usr/local -xz + +ENV GOPATH /go +ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH + +WORKDIR /build +COPY . . + +RUN mkdir /artifacts +ARG VERSION="N/A" +ARG GIT_COMMIT="unknown" +RUN make PREFIX=/artifacts cmds + +# build package +FROM ${BASE_IMAGE} +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y devscripts debhelper # envs for packaging +ENV DEBFULLNAME "NVIDIA CORPORATION" +ENV DEBEMAIL "cudatools@nvidia.com" ARG PACKAGE_NAME=undefined ARG PACKAGE_VERSION=undefined ARG PACKAGE_REVISION=undefined ENV PACKAGE_NAME ${PACKAGE_NAME} ENV PACKAGE_VERSION ${PACKAGE_VERSION} ENV PACKAGE_REVISION ${PACKAGE_REVISION} +ENV PACKAGE_VERSION_STRING "${PACKAGE_VERSION}-${PACKAGE_REVISION}" +ENV SECTION "" -# destination to put tarball files -ENV DESTDIR=/${PACKAGE_NAME}-${PACKAGE_VERSION}-${PACKAGE_REVISION} -# working directory -WORKDIR /build -COPY . . +# destination to put tarball files +WORKDIR /${PACKAGE_NAME}-${PACKAGE_VERSION_STRING} +ENV DESTDIR=/${PACKAGE_NAME}-${PACKAGE_VERSION_STRING} # collect tarball files -RUN mkdir -p ${DESTDIR} -RUN go build -o ${DESTDIR}/nvidia-mig-parted ./cmd/nvidia-mig-parted -COPY ./LICENSE ${DESTDIR} +COPY ./LICENSE . +COPY --from=go-build /artifacts/nvidia-mig-parted . COPY ./deployments/systemd/packages/tarball/install.sh ${DESTDIR} COPY ./deployments/systemd/config-default.yaml ${DESTDIR} COPY ./deployments/systemd/hooks.sh ${DESTDIR} @@ -47,7 +90,7 @@ COPY ./deployments/systemd/service.sh ${DESTDIR} COPY ./deployments/systemd/uninstall.sh ${DESTDIR} COPY ./deployments/systemd/utils.sh ${DESTDIR} -# output directory for final tarball +# output directory RUN mkdir -p /dist # build command diff --git a/deployments/systemd/packages/Makefile b/deployments/systemd/packages/Makefile index 483ad149..f11acc6a 100644 --- a/deployments/systemd/packages/Makefile +++ b/deployments/systemd/packages/Makefile @@ -16,9 +16,10 @@ NAME = nvidia-mig-manager REVISION ?= 1 DOCKER ?= docker -BUILD_DIR ?= ../../.. +DIST_DIR ?= $(CURDIR)/dist -include $(BUILD_DIR)/versions.mk +##### Global variables ##### +include $(CURDIR)/versions.mk ifeq ($(IMAGE),) REGISTRY ?= nvcr.io/nvidia @@ -28,51 +29,37 @@ endif # strip 'v' from version string PACKAGE_VERSION := $(VERSION:v%=%) +deb: BASE_IMAGE = ubuntu:18.04 +deb: ARTIFACTS_DIR = $(DIST_DIR)/deb +deb: DOCKERFILE = $(CURDIR)/deployments/systemd/packages/Dockerfile.deb + +rpm: BASE_IMAGE = centos:7 +rpm: ARTIFACTS_DIR = $(DIST_DIR)/rpm +rpm: DOCKERFILE = $(CURDIR)/deployments/systemd/packages/Dockerfile.rpm + +tarball: BASE_IMAGE = ubuntu:18.04 +tarball: ARTIFACTS_DIR = $(DIST_DIR)/tarball +tarball: DOCKERFILE = $(CURDIR)/deployments/systemd/packages/Dockerfile.tarball + ##### Public rules ##### -VALID_TARGETS = tarball ubuntu20.04 ubi8 +VALID_TARGETS = tarball deb rpm all: $(VALID_TARGETS) .PHONY: all $(VALID_TARGETS) -tarball: - $(DOCKER) build --pull \ - --build-arg PACKAGE_NAME=$(NAME) \ - --build-arg PACKAGE_VERSION=$(PACKAGE_VERSION) \ - --build-arg PACKAGE_REVISION=$(REVISION) \ - --build-arg GOLANG_VERSION=$(GOLANG_VERSION) \ - --tag $(IMAGE):$(vVERSION)-tarball \ - --file Dockerfile.tarball \ - $(BUILD_DIR) - $(DOCKER) run \ - -v $$(pwd)/dist/$(@):/dist \ - $(IMAGE):$(vVERSION)-$(@) - -ubuntu20.04: +$(VALID_TARGETS): %: $(DOCKER) build --pull \ - --build-arg PACKAGE_NAME=$(NAME) \ - --build-arg PACKAGE_VERSION=$(PACKAGE_VERSION) \ - --build-arg PACKAGE_REVISION=$(REVISION) \ + --build-arg BASE_IMAGE=$(BASE_IMAGE) \ --build-arg GOLANG_VERSION=$(GOLANG_VERSION) \ - --build-arg BASE_IMAGE=nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-ubuntu20.04 \ - --tag $(IMAGE):$(vVERSION)-ubuntu20.04 \ - --file Dockerfile.ubuntu \ - $(BUILD_DIR) - $(DOCKER) run \ - -v $$(pwd)/dist/$(@):/dist \ - $(IMAGE):$(vVERSION)-$(@) - -ubi8: - $(DOCKER) build --pull \ + --build-arg VERSION=$(VERSION) \ + --build-arg GIT_COMMIT=$(GIT_COMMIT) \ --build-arg PACKAGE_NAME=$(NAME) \ --build-arg PACKAGE_VERSION=$(PACKAGE_VERSION) \ --build-arg PACKAGE_REVISION=$(REVISION) \ - --build-arg GOLANG_VERSION=$(GOLANG_VERSION) \ - --build-arg BASE_IMAGE=nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-ubi8 \ - --tag $(IMAGE):$(vVERSION)-ubi8 \ - --file Dockerfile.ubi8 \ - $(BUILD_DIR) + --tag $(IMAGE):$(VERSION)-$(*) \ + --file $(DOCKERFILE) \ + $(CURDIR) $(DOCKER) run \ - -v $$(pwd)/dist/$(@):/dist \ - $(IMAGE):$(vVERSION)-$(@) - + -v $(ARTIFACTS_DIR):/dist \ + $(IMAGE):$(VERSION)-$(*) diff --git a/deployments/systemd/packages/debian/changelog b/deployments/systemd/packages/debian/changelog.old similarity index 97% rename from deployments/systemd/packages/debian/changelog rename to deployments/systemd/packages/debian/changelog.old index 1a016a7b..e3325345 100644 --- a/deployments/systemd/packages/debian/changelog +++ b/deployments/systemd/packages/debian/changelog.old @@ -1,3 +1,6 @@ +# Note: as of 0.6.1 this file is no longer updated, with a changelog +# entry for a given release generated as part of the packaging step. + nvidia-mig-manager (0.6.0-1) UNRELEASED; urgency=medium * Update to latest CUDA base image 12.3.2 diff --git a/deployments/systemd/packages/rpm/SPECS/changelog.old b/deployments/systemd/packages/rpm/SPECS/changelog.old new file mode 100644 index 00000000..1da64787 --- /dev/null +++ b/deployments/systemd/packages/rpm/SPECS/changelog.old @@ -0,0 +1,108 @@ +# Note: as of 0.6.0-1 this file is no longer updated, with a changelog +# entry for a given release generated as part of the packaging step. + +* Wed Feb 28 2024 NVIDIA CORPORATION 0.6.0-1 +- Update to latest CUDA base image 12.3.2 +- Migrate to using github.com/NVIDIA/go-nvlib +- Bump Golang version to 1.20.5 +- Bump nvidia-ctk version used by k8s-mig-manager to 1.14.6 +- Update vendored go dependencies +- Minor code improvements and refactoring + +* Thu Oct 18 2023 NVIDIA CORPORATION 0.5.5-1 +- Update to latest CUDA base image 12.2.2 + +* Thu Sep 7 2023 NVIDIA CORPORATION 0.5.4-1 +- Update MIG config for Hopper with device ID of H100 80GB HBM3 SKU + +* Wed Jul 12 2023 NVIDIA CORPORATION 0.5.3-1 +- Update to latest CUDA image 12.2.0 +- Update example config for Hopper with H100 NVL and H800 NVL + +* Tue Mar 28 2023 NVIDIA CORPORATION 0.5.2-1 +- Update to latest CUDA image 12.1.0 +- Update k8s-mig-manager to support CDI +- Add two new example configs for the newly supported profiles on A100 +- Update MIG profile code to rely on go-nvlib +- Update vendored go-nvlib to latest +- Update NVML wrapper to include MIG profiles from NVML v12.0 + +* Thu Mar 09 2023 NVIDIA CORPORATION 0.5.1-1 +- Update to latest CUDA image 12.0.1 +- Add newer MIG profiles supported with NVML 12.0 to default config.yaml files +- Add profiles with media extensions for A30-24GB to default config.yaml files +- Add H100 and H800 profiles to default config.yaml files +- Add A800 profiles to default config.yaml files +- Update all calls to enumerate GPUs to use NVML or PCI as appropriate +- Bump vendored go-nvml to v12.0 +- Bump Golang version to 1.20.1 + +* Thu Sep 08 2022 NVIDIA CORPORATION 0.5.0-1 +- Bump CUDA base image to 11.7.1 +- Remove CUDA compat libs from mig-manager in favor of libs installed by the Driver +- Use symlink for config.yaml instead of static config file +- Add k8s-mig-manager-example for Hopper +- Update k8s-mig-manager-example with standalone RBAC objects +- Explicitly delete pods launched by operator validator before reconfig +- Allow missing GPUClients file in k8s-mig-manager +- Add hooks-minimal.yaml that gets linked if on Hopper or above +- Use symlink for hooks.yaml instead of static config file +- Update install script to use go 1.16.4 +- Update hooks.sh to split out start/stop of k8s services from k8s pods +- Explicitly clear all MIG configurations before disabling MIG mode + +* Wed Aug 10 2022 NVIDIA CORPORATION 0.4.3-1 +- Update calculation for GB in MIG profile name +- Make the systemd-mig-manager a dependency of systemd-resolved.service + +* Thu Jun 16 2022 NVIDIA CORPORATION 0.4.2-1 +- Update CUDA image to 11.7.0 +- Add extra assert in k8s-mig-manager to double check mig-mode change applied +- Update mig-manager image to use NGC DL license + +* Mon May 30 2022 NVIDIA CORPORATION 0.4.1-1 +- Keep NVML alive across all mig-parted commands (except GPU reset) +- Remove unnecessary services from hooks.sh + +* Tue Apr 05 2022 NVIDIA CORPORATION 0.4.0-1 +- Update nvidia-mig-parted.sh to include MIG_PARTED_CHECKPOINT_FILE +- Add checkpoint / restore commands to mig-parted CLI +- Update golang version to 1.16.4 +- Support instantiation of *_PROFILE_6_SLICE GIs and CIs +- Update cyrus-sasl-lib to address CVE-2022-24407 +- Add support for MIG profiles with +me as an attribute extension +- Support Compute Instances in mig-parted config such that CI != GI +- Update go-nvml to v0.11.6 +- Change semantics of 'all' to mean 'all-mig-capable' in mig-parted config + +* Fri Mar 18 2022 NVIDIA CORPORATION 0.3.0-1 +- k8s-mig-manager: Add support for multi-arch images +- k8s-mig-manager: Handle eviction of NVSM pod when applying MIG changes + +* Wed Nov 17 2021 NVIDIA CORPORATION 0.2.0-1 +- nvidia-mig-parted: Support passing newer GI and CI profile enums on older drivers +- k8s-mig-manager: Rename nvcr.io/nvidia to nvcr.io/nvidia/cloud-native +- k8s-mig-manager: Add support for pre-installed drivers +- systemd-mig-manager: Update logic to remove 'containerd' containers in utils.sh +- systemd-mig-manager: Update logic to shutdown only active systemd services in list +- ci-infrastructure: Rework build and CI to align with other projects +- ci-infrastructure: Use pulse instead of contamer for scans + +* Mon Sep 20 2021 NVIDIA CORPORATION 0.1.3-1 +- Add default configs for the PG506-96GB card +- Remove CombinedMigManager and add wrappers for Mode/Config Managers +- Add a function to check the minimum NVML version required +- Add SystemGetNVMLVersion() to the NVML interface +- Fix small bug in assert logic for non MIG-capable GPUs + +* Thu Aug 05 2021 NVIDIA CORPORATION 0.1.2-1 +- Do not start nvidia-mig-manager.service when installing the .deb +- Restore lost assert_gpu_reset_available() function +- Add nvidia-dcgm.service to driver_services array +- Split dcgm, and dcgm-exporter in k8s-mig-manager + +* Wed May 19 2021 NVIDIA CORPORATION 0.1.1-1 +- Update packaged config.yaml to include more supported devices + +* Fri May 07 2021 NVIDIA CORPORATION 0.1.0-1 +- Initial release of rpm package for v0.1.0 \ No newline at end of file diff --git a/deployments/systemd/packages/rpm/SPECS/nvidia-mig-manager.spec b/deployments/systemd/packages/rpm/SPECS/nvidia-mig-manager.spec index a299b07d..cdffc8e2 100644 --- a/deployments/systemd/packages/rpm/SPECS/nvidia-mig-manager.spec +++ b/deployments/systemd/packages/rpm/SPECS/nvidia-mig-manager.spec @@ -133,108 +133,6 @@ maybe_remove_hooks_symlink maybe_remove_config_symlink %changelog -* Wed Feb 28 2024 NVIDIA CORPORATION 0.6.0-1 -- Update to latest CUDA base image 12.3.2 -- Migrate to using github.com/NVIDIA/go-nvlib -- Bump Golang version to 1.20.5 -- Bump nvidia-ctk version used by k8s-mig-manager to 1.14.6 -- Update vendored go dependencies -- Minor code improvements and refactoring - -* Thu Oct 18 2023 NVIDIA CORPORATION 0.5.5-1 -- Update to latest CUDA base image 12.2.2 - -* Thu Sep 7 2023 NVIDIA CORPORATION 0.5.4-1 -- Update MIG config for Hopper with device ID of H100 80GB HBM3 SKU - -* Wed Jul 12 2023 NVIDIA CORPORATION 0.5.3-1 -- Update to latest CUDA image 12.2.0 -- Update example config for Hopper with H100 NVL and H800 NVL - -* Tue Mar 28 2023 NVIDIA CORPORATION 0.5.2-1 -- Update to latest CUDA image 12.1.0 -- Update k8s-mig-manager to support CDI -- Add two new example configs for the newly supported profiles on A100 -- Update MIG profile code to rely on go-nvlib -- Update vendored go-nvlib to latest -- Update NVML wrapper to include MIG profiles from NVML v12.0 - -* Thu Mar 09 2023 NVIDIA CORPORATION 0.5.1-1 -- Update to latest CUDA image 12.0.1 -- Add newer MIG profiles supported with NVML 12.0 to default config.yaml files -- Add profiles with media extensions for A30-24GB to default config.yaml files -- Add H100 and H800 profiles to default config.yaml files -- Add A800 profiles to default config.yaml files -- Update all calls to enumerate GPUs to use NVML or PCI as appropriate -- Bump vendored go-nvml to v12.0 -- Bump Golang version to 1.20.1 - -* Thu Sep 08 2022 NVIDIA CORPORATION 0.5.0-1 -- Bump CUDA base image to 11.7.1 -- Remove CUDA compat libs from mig-manager in favor of libs installed by the Driver -- Use symlink for config.yaml instead of static config file -- Add k8s-mig-manager-example for Hopper -- Update k8s-mig-manager-example with standalone RBAC objects -- Explicitly delete pods launched by operator validator before reconfig -- Allow missing GPUClients file in k8s-mig-manager -- Add hooks-minimal.yaml that gets linked if on Hopper or above -- Use symlink for hooks.yaml instead of static config file -- Update install script to use go 1.16.4 -- Update hooks.sh to split out start/stop of k8s services from k8s pods -- Explicitly clear all MIG configurations before disabling MIG mode - -* Wed Aug 10 2022 NVIDIA CORPORATION 0.4.3-1 -- Update calculation for GB in MIG profile name -- Make the systemd-mig-manager a dependency of systemd-resolved.service - -* Thu Jun 16 2022 NVIDIA CORPORATION 0.4.2-1 -- Update CUDA image to 11.7.0 -- Add extra assert in k8s-mig-manager to double check mig-mode change applied -- Update mig-manager image to use NGC DL license - -* Mon May 30 2022 NVIDIA CORPORATION 0.4.1-1 -- Keep NVML alive across all mig-parted commands (except GPU reset) -- Remove unnecessary services from hooks.sh - -* Tue Apr 05 2022 NVIDIA CORPORATION 0.4.0-1 -- Update nvidia-mig-parted.sh to include MIG_PARTED_CHECKPOINT_FILE -- Add checkpoint / restore commands to mig-parted CLI -- Update golang version to 1.16.4 -- Support instantiation of *_PROFILE_6_SLICE GIs and CIs -- Update cyrus-sasl-lib to address CVE-2022-24407 -- Add support for MIG profiles with +me as an attribute extension -- Support Compute Instances in mig-parted config such that CI != GI -- Update go-nvml to v0.11.6 -- Change semantics of 'all' to mean 'all-mig-capable' in mig-parted config - -* Fri Mar 18 2022 NVIDIA CORPORATION 0.3.0-1 -- k8s-mig-manager: Add support for multi-arch images -- k8s-mig-manager: Handle eviction of NVSM pod when applying MIG changes - -* Wed Nov 17 2021 NVIDIA CORPORATION 0.2.0-1 -- nvidia-mig-parted: Support passing newer GI and CI profile enums on older drivers -- k8s-mig-manager: Rename nvcr.io/nvidia to nvcr.io/nvidia/cloud-native -- k8s-mig-manager: Add support for pre-installed drivers -- systemd-mig-manager: Update logic to remove 'containerd' containers in utils.sh -- systemd-mig-manager: Update logic to shutdown only active systemd services in list -- ci-infrastructure: Rework build and CI to align with other projects -- ci-infrastructure: Use pulse instead of contamer for scans - -* Mon Sep 20 2021 NVIDIA CORPORATION 0.1.3-1 -- Add default configs for the PG506-96GB card -- Remove CombinedMigManager and add wrappers for Mode/Config Managers -- Add a function to check the minimum NVML version required -- Add SystemGetNVMLVersion() to the NVML interface -- Fix small bug in assert logic for non MIG-capable GPUs - -* Thu Aug 05 2021 NVIDIA CORPORATION 0.1.2-1 -- Do not start nvidia-mig-manager.service when installing the .deb -- Restore lost assert_gpu_reset_available() function -- Add nvidia-dcgm.service to driver_services array -- Split dcgm, and dcgm-exporter in k8s-mig-manager - -* Wed May 19 2021 NVIDIA CORPORATION 0.1.1-1 -- Update packaged config.yaml to include more supported devices - -* Fri May 07 2021 NVIDIA CORPORATION 0.1.0-1 -- Initial release of rpm package for v0.1.0 +# As of 0.6.0-1 we generate the release information automatically +* %{release_date} NVIDIA CORPORATION %{version}-%{release} +- See https://github.com/NVIDIA/mig-parted/-/blob/%{git_commit}/CHANGELOG.md