Skip to content

Commit

Permalink
amazon linux 2023 support
Browse files Browse the repository at this point in the history
Signed-off-by: shiva kumar <[email protected]>
  • Loading branch information
shivakunv committed Oct 30, 2024
1 parent 2014a3a commit 74b2554
Show file tree
Hide file tree
Showing 11 changed files with 968 additions and 2 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ jobs:
driver:
- 535.216.01
- 550.127.05
- 560.35.03
dist:
- ubuntu20.04
- ubuntu22.04
- amzn2023
- rhel8
ispr:
- ${{github.event_name == 'pull_request'}}
Expand All @@ -49,6 +51,18 @@ jobs:
- ispr: true
dist: ubuntu20.04
driver: 550.127.05
- ispr: true
dist: ubuntu20.04
driver: 560.35.03
- ispr: true
dist: ubuntu22.04
driver: 560.35.03
- ispr: true
dist: amzn2023
driver: 535.216.01
- ispr: true
dist: amzn2023
driver: 535.216.01
fail-fast: false
steps:
- uses: actions/checkout@v4
Expand Down
19 changes: 19 additions & 0 deletions .nvidia-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ variables:
.image-pull-ubuntu22.04:
# Perform for each DRIVER_VERSION
extends:
- .driver-versions
- .driver-versions
- .image-pull-generic
rules:
Expand Down Expand Up @@ -184,6 +185,18 @@ image-rhel8:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- !reference [.pipeline-trigger-rules, rules]

.scan-amzn2023:
# Repeat for each DRIVER_VERSION
extends:
- .driver-versions-amzn2023
- .scan-generic
rules:
- !reference [.scan-rules-common, rules]
- if: $CI_PIPELINE_SOURCE == "schedule"
when: never
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- !reference [.pipeline-trigger-rules, rules]

.scan-precompiled-ubuntu22.04:
variables:
DIST: signed_ubuntu22.04
Expand Down Expand Up @@ -278,6 +291,12 @@ release:ngc-ubuntu22.04:
- .dist-ubuntu22.04
- .driver-versions

release:ngc-amzn2023:
extends:
- .release:ngc
- .dist-amzn2023
- .driver-versions-amzn2023

release:ngc-precompiled-ubuntu22.04:
variables:
DIST: signed_ubuntu22.04
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(OUT_DIST)
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)

##### Public rules #####
DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 signed_ubuntu20.04 signed_ubuntu22.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos
DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 amzn2023 signed_ubuntu20.04 signed_ubuntu22.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
BASE_FROM := jammy focal
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
Expand Down
102 changes: 102 additions & 0 deletions amzn2023/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
FROM nvcr.io/nvidia/cuda:12.6.2-base-amzn2023 AS build

ARG TARGETARCH

SHELL ["/bin/bash", "-c"]

# Remove cuda repository to avoid GPG errors
RUN rm -f /etc/yum.repos.d/cuda*

RUN dnf update -y && dnf makecache && \
dnf install -y \
gcc \
gcc-c++ \
make \
ca-certificates \
git \
tar && \
dnf clean all && rm -rf /var/cache/yum/*

ENV GOLANG_VERSION=1.23.2

# download appropriate binary based on the target architecture for multi-arch builds
RUN curl https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${TARGETARCH}.tar.gz \
| tar -C /usr/local -xz

ENV PATH /usr/local/bin:$PATH
ENV PATH /usr/local/go/bin:$PATH

WORKDIR /work

RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
cd driver/vgpu/src && \
go build -o vgpu-util && \
mv vgpu-util /work

FROM nvcr.io/nvidia/cuda:12.6.2-base-amzn2023

SHELL ["/bin/bash", "-c"]

ARG BASE_URL=https://us.download.nvidia.com/tesla
ARG TARGETARCH
ENV TARGETARCH=$TARGETARCH
ENV DRIVER_ARCH=${TARGETARCH/amd64/x86_64}
ARG DRIVER_VERSION
ENV DRIVER_VERSION=$DRIVER_VERSION

# Arg to indicate if driver type is either of passthrough(baremetal) or vgpu
ARG DRIVER_TYPE=passthrough
ENV DRIVER_TYPE=$DRIVER_TYPE
ARG DRIVER_BRANCH=560
ENV DRIVER_BRANCH=$DRIVER_BRANCH
ARG VGPU_LICENSE_SERVER_TYPE=NLS
ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE
# Enable vGPU version compability check by default
ARG DISABLE_VGPU_VERSION_CHECK=true
ENV DISABLE_VGPU_VERSION_CHECK=$DISABLE_VGPU_VERSION_CHECK
ENV NVIDIA_VISIBLE_DEVICES=void

RUN echo "TARGETARCH=$TARGETARCH"

ADD install.sh /tmp

RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/${DRIVER_ARCH}/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict - && \
curl -fsSL -o /etc/yum.repos.d/cuda.repo https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/${DRIVER_ARCH}/cuda-amzn2023.repo

RUN dnf clean all && dnf makecache && dnf update -y && dnf install -y shadow-utils
RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \
curl -fsSL -o /usr/local/bin/donkey https://github.com/3XX0/donkey/releases/download/v1.1.0/donkey && \
chmod +x /usr/local/bin/donkey

COPY nvidia-driver /usr/local/bin
COPY --from=build /work/vgpu-util /usr/local/bin

RUN curl -fsSL -o /usr/local/bin/extract-vmlinux https://raw.githubusercontent.com/torvalds/linux/master/scripts/extract-vmlinux && \
chmod +x /usr/local/bin/extract-vmlinux

ADD drivers drivers/

# Fetch the installer automatically for passthrough/baremetal types
RUN if [ "$DRIVER_TYPE" != "vgpu" ]; then \
cd drivers && \
/tmp/install.sh download_installer; fi

# install fabric-manager and nvidia-nscq
RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$TARGETARCH" != "arm64" ]; then \
dnf install -y nvidia-fabric-manager-${DRIVER_VERSION}-1 libnvidia-nscq-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; fi

WORKDIR /drivers

ARG PUBLIC_KEY=empty
COPY ${PUBLIC_KEY} kernel/pubkey.x509

# Remove cuda repository to avoid GPG errors
RUN rm -f /etc/yum.repos.d/cuda*
RUN dnf clean all

# Add NGC DL license from the CUDA image
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE

ENTRYPOINT ["nvidia-driver", "init"]
3 changes: 3 additions & 0 deletions amzn2023/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# AmazonLinux2 [![build status](https://gitlab.com/nvidia/driver/badges/master/build.svg)](https://gitlab.com/nvidia/driver/commits/master)

See https://github.com/NVIDIA/nvidia-docker/wiki/Driver-containers-(Beta)
1 change: 1 addition & 0 deletions amzn2023/drivers/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Folder for downloading vGPU drivers and dependent metadata files
Empty file added amzn2023/empty
Empty file.
58 changes: 58 additions & 0 deletions amzn2023/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env bash

set -eux

download_installer () {
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && curl -fSsl -O $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run && \
chmod +x NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run;
}

dep_install () {
if [ "$TARGETARCH" = "amd64" ]; then
DRIVER_ARCH=${TARGETARCH/amd64/x86_64}
dnf update -y && dnf install -y \
gcc \
make \
glibc-devel \
ca-certificates \
kmod \
file \
elfutils-libelf-devel \
libglvnd-devel \
shadow-utils \
util-linux \
tar \
rpm-build \
dnf-utils \
pkgconfig && \
dnf clean all && \
rm -rf /var/cache/yum/*
fi
}

repo_setup () {
if [ "$TARGETARCH" = "amd64" ]; then
echo "[cuda-amzn2023-x86_64]
name=cuda-amzn2023-x86_64
baseurl=https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/$DRIVER_ARCH
enabled=1
gpgcheck=1
gpgkey=https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/$DRIVER_ARCH/D42D0685.pub" > /etc/yum.repos.d/cuda.repo && \
usermod -o -u 0 -g 0 nobody
else
echo "TARGETARCH doesn't match a known arch target"
exit 1
fi
}

if [ "$1" = "reposetup" ]; then
repo_setup
elif [ "$1" = "depinstall" ]; then
dep_install
elif [ "$1" = "download_installer" ]; then
download_installer
else
echo "Unknown function: $1"
exit 1
fi

Loading

0 comments on commit 74b2554

Please sign in to comment.