Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Amazon linux support #127

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .common-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ trigger-pipeline:
DIST: ubuntu22.04
CVE_UPDATES: "openssl"

.dist-amzn2023:
variables:
DIST: amzn2023

.dist-rhel8:
variables:
DIST: rhel8
Expand Down Expand Up @@ -162,6 +166,14 @@ trigger-pipeline:
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

.release-amzn2023:
# Perform for each DRIVER_VERSION
extends:
- .release-generic
- .driver-versions
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

.release-rhel9:
# Perform for each DRIVER_VERSION
extends:
Expand Down Expand Up @@ -199,6 +211,15 @@ trigger-pipeline:
OUT_REGISTRY: "${CI_REGISTRY}"
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/driver"

.release:staging-amzn2023:
extends:
- .release-amzn2023
variables:
OUT_REGISTRY_USER: "${CI_REGISTRY_USER}"
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
OUT_REGISTRY: "${CI_REGISTRY}"
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/driver"

.release:staging-rhel9:
extends:
- .release-rhel9
Expand Down
16 changes: 16 additions & 0 deletions .github/workflows/image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ jobs:
dist:
- ubuntu20.04
- ubuntu22.04
- amzn2023
- rhel8
ispr:
- ${{github.event_name == 'pull_request'}}
Expand All @@ -50,6 +51,21 @@ jobs:
- ispr: true
dist: ubuntu20.04
driver: 550.127.05
- ispr: true
dist: ubuntu20.04
driver: 560.35.03
- ispr: true
dist: ubuntu22.04
driver: 560.35.03
- ispr: true
dist: amzn2023
driver: 535.216.01
- ispr: true
dist: amzn2023
driver: 550.127.05
- ispr: true
dist: amzn2023
driver: 560.35.03
fail-fast: false
steps:
- uses: actions/checkout@v4
Expand Down
14 changes: 14 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ include:
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

# Define the image build targets
.image-build-amzn2023:
# Perform for each DRIVER_VERSION
extends:
- .driver-versions
- .image-build-generic
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

# Define the image build targets
.image-build-rhel9:
# Perform for each DRIVER_VERSION
Expand All @@ -69,6 +78,11 @@ image-ubuntu22.04:
- .image-build-ubuntu22.04
- .dist-ubuntu22.04

image-amzn2023:
extends:
- .image-build-amzn2023
- .dist-amzn2023

image-rhel8:
extends:
- .image-build
Expand Down
21 changes: 21 additions & 0 deletions .nvidia-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,18 @@ image-rhel8:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- !reference [.pipeline-trigger-rules, rules]

.scan-amzn2023:
# Repeat for each DRIVER_VERSION
extends:
- .driver-versions
- .scan-generic
rules:
- !reference [.scan-rules-common, rules]
- if: $CI_PIPELINE_SOURCE == "schedule"
when: never
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- !reference [.pipeline-trigger-rules, rules]

.scan-precompiled-ubuntu22.04:
variables:
DIST: signed_ubuntu22.04
Expand Down Expand Up @@ -278,6 +290,12 @@ release:ngc-ubuntu22.04:
- .dist-ubuntu22.04
- .driver-versions

release:ngc-amzn2023:
extends:
- .release:ngc
- .dist-amzn2023
- .driver-versions

release:ngc-precompiled-ubuntu22.04:
variables:
DIST: signed_ubuntu22.04
Expand Down Expand Up @@ -439,3 +457,6 @@ sign:ngc-ubuntu-rhel-rhcos:
- SIGN_JOB_NAME: ["rhcos"]
VERSION: ["4.12","4.13","4.14","4.15", "4.16", "4.17"]
DRIVER_VERSION: ["535.216.01", "550.127.05", "565.57.01"]
- SIGN_JOB_NAME: ["amzn"]
VERSION: ["2023"]
DRIVER_VERSION: ["565.57.01"]
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(OUT_DIST)
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)

##### Public rules #####
DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 signed_ubuntu20.04 signed_ubuntu22.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos
DISTRIBUTIONS := amzn2023 flatcar fedora36 precompiled_rhcos rhel8 rhel9 signed_ubuntu20.04 signed_ubuntu22.04 sles15.3 ubuntu18.04 ubuntu20.04 ubuntu22.04
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
BASE_FROM := jammy focal
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
Expand Down
101 changes: 101 additions & 0 deletions amzn2023/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
FROM nvcr.io/nvidia/cuda:12.6.2-base-amzn2023 AS build

ARG TARGETARCH

SHELL ["/bin/bash", "-c"]

# Remove cuda repository to avoid GPG errors
RUN rm -f /etc/yum.repos.d/cuda*
shivakunv marked this conversation as resolved.
Show resolved Hide resolved

RUN dnf update -y && dnf makecache && \
dnf install -y \
gcc \
gcc-c++ \
make \
ca-certificates \
git \
tar && \
dnf clean all && rm -rf /var/cache/yum/*

ENV GOLANG_VERSION=1.23.2

# download appropriate binary based on the target architecture for multi-arch builds
RUN curl https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${TARGETARCH}.tar.gz \
| tar -C /usr/local -xz

ENV PATH /usr/local/bin:$PATH
ENV PATH /usr/local/go/bin:$PATH

WORKDIR /work

RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
cd driver/vgpu/src && \
go build -o vgpu-util && \
mv vgpu-util /work

FROM nvcr.io/nvidia/cuda:12.6.2-base-amzn2023

SHELL ["/bin/bash", "-c"]

ARG BASE_URL=https://us.download.nvidia.com/tesla
ARG TARGETARCH
ENV TARGETARCH=$TARGETARCH
ENV DRIVER_ARCH=${TARGETARCH/amd64/x86_64}
ARG DRIVER_VERSION
ENV DRIVER_VERSION=$DRIVER_VERSION

# Arg to indicate if driver type is either of passthrough(baremetal) or vgpu
ARG DRIVER_TYPE=passthrough
ENV DRIVER_TYPE=$DRIVER_TYPE
ARG DRIVER_BRANCH=560
ENV DRIVER_BRANCH=$DRIVER_BRANCH
ARG VGPU_LICENSE_SERVER_TYPE=NLS
ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE
# Enable vGPU version compability check by default
ARG DISABLE_VGPU_VERSION_CHECK=true
ENV DISABLE_VGPU_VERSION_CHECK=$DISABLE_VGPU_VERSION_CHECK
ENV NVIDIA_VISIBLE_DEVICES=void

RUN echo "TARGETARCH=$TARGETARCH"

ADD install.sh /tmp

RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/${DRIVER_ARCH}/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict - && \
curl -fsSL -o /etc/yum.repos.d/cuda.repo https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/${DRIVER_ARCH}/cuda-amzn2023.repo

RUN dnf clean all && dnf makecache && dnf update -y && dnf install -y shadow-utils
RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \
curl -fsSL -o /usr/local/bin/donkey https://github.com/3XX0/donkey/releases/download/v1.1.0/donkey && \
chmod +x /usr/local/bin/donkey

COPY nvidia-driver /usr/local/bin
COPY --from=build /work/vgpu-util /usr/local/bin

RUN curl -fsSL -o /usr/local/bin/extract-vmlinux https://raw.githubusercontent.com/torvalds/linux/master/scripts/extract-vmlinux && \
shivakunv marked this conversation as resolved.
Show resolved Hide resolved
chmod +x /usr/local/bin/extract-vmlinux

ADD drivers drivers/

# Fetch the installer automatically for passthrough/baremetal types
RUN if [ "$DRIVER_TYPE" != "vgpu" ]; then \
cd drivers && \
/tmp/install.sh download_installer; fi

# Check for nvidia-fabric-manager or nvidia-fabricmanager availability and install
RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$TARGETARCH" != "arm64" ]; then \
dnf install -y nvidia-fabric-manager-${DRIVER_VERSION}-1 libnvidia-nscq-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; fi

WORKDIR /drivers

ARG PUBLIC_KEY=empty
COPY ${PUBLIC_KEY} kernel/pubkey.x509

# Remove cuda repository to avoid GPG errors
# clean cache
# Add NGC DL license from the CUDA image
RUN rm -f /etc/yum.repos.d/cuda* && dnf clean all && \
mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE

ENTRYPOINT ["nvidia-driver", "init"]
3 changes: 3 additions & 0 deletions amzn2023/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# AmazonLinux2 [![build status](https://gitlab.com/nvidia/driver/badges/master/build.svg)](https://gitlab.com/nvidia/driver/commits/master)

See https://github.com/NVIDIA/nvidia-docker/wiki/Driver-containers-(Beta)
1 change: 1 addition & 0 deletions amzn2023/drivers/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Folder for downloading vGPU drivers and dependent metadata files
Empty file added amzn2023/empty
Empty file.
58 changes: 58 additions & 0 deletions amzn2023/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env bash

set -eu

download_installer () {
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && curl -fSsl -O $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run && \
chmod +x NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run;
}

dep_install () {
if [ "$TARGETARCH" = "amd64" ]; then
DRIVER_ARCH=${TARGETARCH/amd64/x86_64}
dnf update -y && dnf install -y \
gcc \
make \
glibc-devel \
ca-certificates \
kmod \
file \
elfutils-libelf-devel \
libglvnd-devel \
shadow-utils \
util-linux \
tar \
rpm-build \
dnf-utils \
pkgconfig && \
dnf clean all && \
rm -rf /var/cache/yum/*
fi
}

repo_setup () {
if [ "$TARGETARCH" = "amd64" ]; then
echo "[cuda-amzn2023-x86_64]
name=cuda-amzn2023-x86_64
baseurl=https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/$DRIVER_ARCH
enabled=1
gpgcheck=1
gpgkey=https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/$DRIVER_ARCH/D42D0685.pub" > /etc/yum.repos.d/cuda.repo && \
usermod -o -u 0 -g 0 nobody
else
echo "TARGETARCH doesn't match a known arch target"
exit 1
fi
}

if [ "$1" = "reposetup" ]; then
repo_setup
elif [ "$1" = "depinstall" ]; then
dep_install
elif [ "$1" = "download_installer" ]; then
download_installer
else
echo "Unknown function: $1"
exit 1
fi

Loading