From 83b3b05544c90159cd043d6edb70b0b9a51de73f Mon Sep 17 00:00:00 2001 From: Nicholas Sielicki Date: Thu, 15 Aug 2024 09:23:47 -0700 Subject: [PATCH] feat(ci): add package generation stack-info: PR: https://github.com/aws/aws-ofi-nccl/pull/592, branch: aws-nslick/stack/33 --- .docker/Dockerfile.dnf | 44 ++++++++ .docker/Dockerfile.dpkg | 65 +++++++++++ .docker/Dockerfile.efa | 15 +++ .docker/Dockerfile.makedist | 26 +++++ .docker/Dockerfile.srpm | 18 ++++ .docker/Dockerfile.yum | 42 ++++++++ .github/workflows/packages.yaml | 93 ++++++++++++++++ .gitignore | 2 + .packit.yml | 20 ++++ .packit/libnccl-net-ofi.spec | 98 +++++++++++++++++ .version | 1 + Makefile.am | 1 + configure.ac | 2 +- docker-bake.hcl | 186 ++++++++++++++++++++++++++++++++ 14 files changed, 612 insertions(+), 1 deletion(-) create mode 100644 .docker/Dockerfile.dnf create mode 100644 .docker/Dockerfile.dpkg create mode 100644 .docker/Dockerfile.efa create mode 100644 .docker/Dockerfile.makedist create mode 100644 .docker/Dockerfile.srpm create mode 100644 .docker/Dockerfile.yum create mode 100644 .github/workflows/packages.yaml create mode 100644 .packit.yml create mode 100644 .packit/libnccl-net-ofi.spec create mode 100644 .version create mode 100644 docker-bake.hcl diff --git a/.docker/Dockerfile.dnf b/.docker/Dockerfile.dnf new file mode 100644 index 000000000..67bd052a4 --- /dev/null +++ b/.docker/Dockerfile.dnf @@ -0,0 +1,44 @@ +# +# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved. +# +# See LICENSE.txt for license information +# + +ARG FAMILY=fedora +ARG VERSION=rawhide +ARG VARIANT=cuda +ARG CUDA_DISTRO +ARG AWS_BUILD +ARG ENABLE_POWERTOOLS + +# Install EFA-installer deps. +FROM ${FAMILY}:${VERSION} AS builder +ARG CUDA_DISTRO +ARG ENABLE_POWERTOOLS +ENV CUDA_DISTRO=${CUDA_DISTRO} +ENV ENABLE_POWERTOOLS=${ENABLE_POWERTOOLS} +# Add NVIDIA repo for CUDA builds. +COPY --from=efainstaller / / +RUN --mount=type=cache,target=/var/cache/yum,sharing=locked \ + --mount=type=cache,target=/var/cache/dnf,sharing=locked \ + bash -c "cd /aws-efa-installer && dnf install -y gcc rpmdevtools rpmlint dnf-plugins-core util-linux && ./efa_installer.sh -n -l -k -d -y && rm -rf /aws-efa-installer" && \ + dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/${CUDA_DISTRO}/$(uname -m)/cuda-${CUDA_DISTRO}.repo && \ + ( test "${ENABLE_POWERTOOLS}" = "1" && sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/Rocky-PowerTools.repo || /bin/true ) && \ + dnf -y update && dnf -y upgrade +RUN rpmdev-setuptree + +FROM builder AS environment +ARG VARIANT +ARG AWS_BUILD +ENV VARIANT=${VARIANT} +ENV AWS_BUILD=${AWS_BUILD} +COPY --from=srpm . . +RUN yum search hwloc +RUN echo "%with_${VARIANT} 1" >> ~/.rpmmacros +RUN echo "%with_platform_aws ${AWS_BUILD}" >> ~/.rpmmacros +RUN --mount=type=cache,target=/var/cache/yum,sharing=locked \ + --mount=type=cache,target=/var/cache/dnf,sharing=locked \ + dnf -y install cuda-cudart-devel-12-6 && dnf -y builddep *.src.rpm && rpmbuild --rebuild *.src.rpm + +FROM scratch +COPY --from=environment /root/rpmbuild/RPMS/**/* / diff --git a/.docker/Dockerfile.dpkg b/.docker/Dockerfile.dpkg new file mode 100644 index 000000000..475cad99e --- /dev/null +++ b/.docker/Dockerfile.dpkg @@ -0,0 +1,65 @@ +# +# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved. +# +# See LICENSE.txt for license information +# + +ARG FAMILY=ubuntu +ARG VERSION=latest +ARG CUDA_DISTRO +ARG DEBIAN_FRONTEND=noninteractive +ARG AWS_BUILD + +FROM ${FAMILY}:${VERSION} AS build +ARG CUDA_DISTRO +ENV CUDA_DISTRO=${CUDA_DISTRO} +ARG AWS_BUILD=0 +ENV AWS_BUILD=${AWS_BUILD} + +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + apt-get update -y && apt-get install wget -y + +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/${CUDA_DISTRO}/$(uname -m)/cuda-keyring_1.1-1_all.deb + +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + dpkg -i cuda-keyring_1.1-1_all.deb + +COPY --from=efainstaller / . +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + bash -c "apt-get update -y && cd /aws-efa-installer && ./efa_installer.sh /efa_installer.sh -n -l -k -d -y && apt-get install -y autoconf automake libtool gcc g++ git libhwloc-dev make && rm -rf /aws-efa-installer" + +COPY --from=makedist / . +RUN tar xvf ./aws-ofi-nccl*.tar.gz -C . +RUN cd aws-ofi-nccl* && \ + ./configure --$(test "$ACCELERATOR" = "cuda" && echo "with-cuda=/usr/local/cuda" || echo "enable-neuron=yes") \ + --prefix=/opt/amazon/libnccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws") \ + --with-libfabric=/opt/amazon/efa \ + --disable-tests \ + --$(test "$AWS_BUILD" -eq 0 && echo -n "disable" || echo -n "enable")-platform-aws \ + --with-mpi=no && make -j && make install + +FROM ubuntu:latest AS packager +ARG FAMILY +ARG VERSION +ARG AWS_BUILD=0 +ENV AWS_BUILD=${AWS_BUILD} +ENV FAMILY=${FAMILY} +ENV VERSION=${VERSION} +COPY --from=build /opt/amazon/ /opt/amazon/ +RUN find /opt/amazon/ | grep -E \.la$ | xargs rm +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + apt-get update -y && apt-get install -y ruby tar squashfs-tools binutils && gem install fpm +RUN fpm \ + -s dir -t deb \ + --license Apache2.0 \ + -p /libnccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws")-${FAMILY}-${VERSION}.deb \ + --name nccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws") \ + /opt/amazon/libnccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws")/=/opt/amazon/libnccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws") + +FROM scratch +COPY --from=packager /libnccl-net-ofi* / + diff --git a/.docker/Dockerfile.efa b/.docker/Dockerfile.efa new file mode 100644 index 000000000..92c1e1161 --- /dev/null +++ b/.docker/Dockerfile.efa @@ -0,0 +1,15 @@ +# +# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved. +# +# See LICENSE.txt for license information +# + +FROM alpine:latest AS efa_installer_extracted +ARG EFA_INSTALLER_VERSION=latest +ENV EFA_INSTALLER_VERSION=${EFA_INSTALLER_VERSION} +RUN apk add tar curl +RUN mkdir /libfabric +RUN curl -s -L https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz | tar -xvzf - -C / + +FROM scratch +COPY --from=efa_installer_extracted /aws-efa-installer /aws-efa-installer diff --git a/.docker/Dockerfile.makedist b/.docker/Dockerfile.makedist new file mode 100644 index 000000000..10d3a4426 --- /dev/null +++ b/.docker/Dockerfile.makedist @@ -0,0 +1,26 @@ +# +# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved. +# +# See LICENSE.txt for license information +# + +ARG ACCELERATOR +ARG BASE_IMAGE=ubuntu:22.04 +FROM ${BASE_IMAGE} AS distbuilder +ARG ACCELERATOR +ENV ACCELERATOR=${ACCELERATOR} +RUN mkdir /aws-efa-installer +COPY --from=efainstaller /aws-efa-installer /aws-efa-installer +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + bash -c "apt-get update -y && cd /aws-efa-installer && ./efa_installer.sh /efa_installer.sh -n -l -k -d -y && apt-get install -y autoconf automake libtool gcc git libhwloc-dev make && rm -rf /aws-efa-installer" +COPY ../ /proj +WORKDIR /proj +RUN autoreconf -ivf +RUN ./configure --with-libfabric=/opt/amazon/efa --$(test "$ACCELERATOR" = "cuda" && echo "with-cuda=/usr/local/cuda" || echo "enable-neuron=yes") --with-libfabric=/opt/amazon/efa +RUN make dist +RUN ls -lart +RUN pwd + +FROM scratch +COPY --from=distbuilder /proj/aws-ofi-nccl*.tar.gz / diff --git a/.docker/Dockerfile.srpm b/.docker/Dockerfile.srpm new file mode 100644 index 000000000..09e1138b5 --- /dev/null +++ b/.docker/Dockerfile.srpm @@ -0,0 +1,18 @@ +# +# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved. +# +# See LICENSE.txt for license information +# + +FROM fedora:rawhide AS packitimg +RUN dnf install -y packit mock + +FROM packitimg AS srpm +RUN mkdir /proj +WORKDIR /proj +COPY --from=src . . +COPY --from=makedist . . +RUN packit srpm + +FROM scratch +COPY --from=srpm /proj/*.src.rpm / diff --git a/.docker/Dockerfile.yum b/.docker/Dockerfile.yum new file mode 100644 index 000000000..4d02d7580 --- /dev/null +++ b/.docker/Dockerfile.yum @@ -0,0 +1,42 @@ +# +# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved. +# +# See LICENSE.txt for license information +# + +ARG FAMILY=amazonlinux +ARG VERSION=2 +ARG VARIANT=cuda +ARG CUDA_DISTRO +ARG AWS_BUILD + +# Install EFA-installer deps. +FROM ${FAMILY}:${VERSION} AS builder +ARG CUDA_DISTRO +ENV CUDA_DISTRO=${CUDA_DISTRO} +# Add NVIDIA repo for CUDA builds. +COPY --from=efainstaller / / +RUN --mount=type=cache,target=/var/cache/yum,sharing=locked \ + --mount=type=cache,target=/var/cache/dnf,sharing=locked \ + bash -c "cd /aws-efa-installer && yum install -y gcc rpmdevtools rpmlint yum-utils util-linux && ./efa_installer.sh -n -l -k -d -y && rm -rf /aws-efa-installer" && \ + yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/${CUDA_DISTRO}/$(uname -m)/cuda-${CUDA_DISTRO}.repo && \ + yum update -y +RUN rpmdev-setuptree + +FROM builder AS environment +ARG VARIANT +ARG AWS_BUILD +ARG TOOLKIT_VERSION=12-6 +ENV VARIANT=${VARIANT} +ENV AWS_BUILD=${AWS_BUILD} +ENV TOOLKIT_VERSION=${TOOLKIT_VERSION} +COPY --from=srpm . . +RUN echo "%with_${VARIANT} 1" >> ~/.rpmmacros +RUN echo "%with_platform_aws ${AWS_BUILD}" >> ~/.rpmmacros +RUN echo "%_cuda_toolkit_version ${TOOLKIT_VERSION}" >> ~/.rpmmacros +RUN --mount=type=cache,target=/var/cache/yum,sharing=locked \ + --mount=type=cache,target=/var/cache/dnf,sharing=locked \ + yum install -y cuda-cudart-devel-${TOOLKIT_VERSION} && yum-builddep -y *.src.rpm && rpmbuild --rebuild *.src.rpm + +FROM scratch +COPY --from=environment /root/rpmbuild/RPMS/**/* / diff --git a/.github/workflows/packages.yaml b/.github/workflows/packages.yaml new file mode 100644 index 000000000..f3c08599e --- /dev/null +++ b/.github/workflows/packages.yaml @@ -0,0 +1,93 @@ +name: Package Generation +on: + push: + branches: + - master + - main + - v* + pull_request: + +jobs: + dist: + name: Call make dist + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + platforms: linux/amd64,linux/arm64 + - name: docker buildx bake makedist + uses: docker/bake-action@v5 + with: + set: | + *.cache-from=type=gha + *.cache-to=type=gha,mode=max + push: true + targets: makedist + srpm: + name: Generate a universal SRPM + needs: [ dist ] + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + platforms: linux/amd64,linux/arm64 + - name: docker buildx bake srpm + uses: docker/bake-action@v5 + with: + set: | + *.cache-from=type=gha + *.cache-to=type=gha,mode=max + push: true + targets: srpm + debs: + name: Generate Debian-like Packages + needs: [ dist ] + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + platforms: linux/amd64,linux/arm64 + - name: docker buildx bake debs + uses: docker/bake-action@v5 + with: + set: | + *.cache-from=type=gha + *.cache-to=type=gha,mode=max + push: ${{ github.event_name != 'pull_request' }} + targets: debs + rpms: + name: Generate RPM-like Packages + needs: [ srpm ] + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + platforms: linux/amd64,linux/arm64 + - name: docker buildx bake rpms + uses: docker/bake-action@v5 + with: + set: | + *.cache-from=type=gha + *.cache-to=type=gha,mode=max + push: ${{ github.event_name != 'pull_request' }} + targets: rpms diff --git a/.gitignore b/.gitignore index 7f8ecadfc..b82acce0c 100644 --- a/.gitignore +++ b/.gitignore @@ -75,3 +75,5 @@ m4/lt~obsolete.m4 .idea/ .devenv/ .direnv +*.src.rpm +dockerbld diff --git a/.packit.yml b/.packit.yml new file mode 100644 index 000000000..823daacb1 --- /dev/null +++ b/.packit.yml @@ -0,0 +1,20 @@ +--- +# vi:ts=2 sw=2 et: +# +# Docs: https://packit.dev/docs/ + + +srpm_build_deps: + - git + +actions: + get-current-version: + - bash -c "cat .version" + create-archive: + - bash -c "echo ./aws-ofi-nccl-${PACKIT_PROJECT_VERSION}.tar.gz" + +specfile_path: .packit/libnccl-net-ofi.spec +upstream_package_name: libnccl-net-ofi +downstream_package_name: libnccl-net-ofi +release_suffix: "{PACKIT_PROJECT_BRANCH}" +update_release: false diff --git a/.packit/libnccl-net-ofi.spec b/.packit/libnccl-net-ofi.spec new file mode 100644 index 000000000..de412133a --- /dev/null +++ b/.packit/libnccl-net-ofi.spec @@ -0,0 +1,98 @@ +# Whether to build with cuda support. Default: on if neuron +%if "%{with_cuda}" == "1" && "%{with_neuron}" == "1" +%{error:Neuron and CUDA must not be enabled together} +%endif + +%if "%{with_cuda}" == "0" && "%{with_neuron}" == "0" +%{error:One of Neuron or CUDA must be enabled} +%endif + +%if "%{with_cuda}" == "1" +%{!?target: %global target nccl} +%endif +%if "%{with_neuron}" == "1" +%{!?target: %global target nccom} +%endif + +%global pname_base lib%{!?with_neuron:nccl}%{?with_neuron:nccom}-net-ofi +%global pname %{pname_base}%{?with_platform_aws:-aws} + +%if "%{with_platform_aws}" +%global _prefix /opt/amazon/%{pname_base} +%endif + +# (CUDA only) what toolkit package to declare a build dependency on. Default: 12-6 +%{!?_cuda_toolkit_version: %global _cuda_toolkit_version 12-6} + +Name: %{pname} +Version: null +Release: 0%{dist} +Summary: NCCL + libfabric compatibility layer +License: Apache-2.0 +URL: https://github.com/aws/aws-ofi-nccl +Source0: null +%if "%{_vendor}" == "debbuild" +Group: devel +%else +Group: Development/Tools%{?suse_version:/Building} +BuildRequires: hwloc-devel +BuildRequires: make +BuildRequires: gcc +BuildRequires: gcc-c++ +%if "%{with_platform_aws}" +BuildRequires: libfabric-aws-devel +Requires: libfabric-aws +%else +BuildRequires: libfabric1-devel +Requires: libfabric +%endif +%if "%{with_cuda}" == "1" +BuildRequires: cuda-cudart-devel-%{_cuda_toolkit_version} +%endif +%endif +Requires: hwloc + +%description +This is a plugin which lets EC2 developers use libfabric as network provider +while running NCCL applications. + + +%prep +%setup +%build +%configure \ + --prefix="%{_prefix}" \ + --disable-tests \ + --with-mpi=no \ +%if "%{with_cuda}" == "1" + --with-cuda=/usr/local/cuda-12 \ + --enable-neuron=no \ +%else + --with-cuda=no \ + --enable-neuron=yes \ +%endif +%if "%{with_platform_aws}" == "1" + --enable-platform-aws \ + --with-libfabric=/opt/amazon/efa +%else + --disable-platform-aws +%endif +%make_build + + +%install +%make_install +find %{buildroot} -name '*.la' -exec rm -f {} ';' +%ldconfig_scriptlets + + +%files +%{_libdir}/*.so +%{_datadir}/aws-ofi-nccl/xml/*.xml +%license LICENSE NOTICE +%doc + + +%changelog +* Thu Aug 08 2024 Nicholas Sielicki +Initial Package diff --git a/.version b/.version new file mode 100644 index 000000000..f32954fbd --- /dev/null +++ b/.version @@ -0,0 +1 @@ +1.12.0pre diff --git a/Makefile.am b/Makefile.am index f2fdfa8d2..8b9c600d1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -7,6 +7,7 @@ ACLOCAL_AMFLAGS = -I m4 SUBDIRS = include src topology tests EXTRA_DIST = \ + .version \ autogen.sh \ CODE_OF_CONDUCT.md \ CONTRIBUTING.md \ diff --git a/configure.ac b/configure.ac index 5084e267d..7793d7cb0 100644 --- a/configure.ac +++ b/configure.ac @@ -6,7 +6,7 @@ # # Initialization -AC_INIT([aws-ofi-nccl], [GitHub-dev], [al-ofi-nccl-team@amazon.com], , [http://github.com/aws/aws-ofi-nccl]) +AC_INIT([aws-ofi-nccl], m4_normalize(m4_include([.version])), [al-ofi-nccl-team@amazon.com], , [http://github.com/aws/aws-ofi-nccl]) AC_PREREQ([2.69]) AC_CONFIG_SRCDIR([src/nccl_ofi_net.c]) AC_CONFIG_AUX_DIR([build-aux]) diff --git a/docker-bake.hcl b/docker-bake.hcl new file mode 100644 index 000000000..baf4a1f00 --- /dev/null +++ b/docker-bake.hcl @@ -0,0 +1,186 @@ +# +# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved. +# +# See LICENSE.txt for license information +# +# +# Usage: https://docs.docker.com/reference/cli/docker/buildx/bake/ + +# Notes: +# * arm64 builds will use qemu by default, but requires containerd snapshotting +# to be enabled in docker's daemon.json, or explicit creation of an arm64 +# capable context. +# +# * developers should strongly consider standing up an eks cluster and +# configuring a k8s builder for native arm64 builds: +# https://docs.docker.com/build/builders/drivers/kubernetes/ + +group "default" { targets = [ "rpms", "debs" ] } + +# Cache efa installer packages. +target "efainstaller" { + platforms = [ + "linux/amd64", + # "linux/arm64" + ] + context = "." + dockerfile = ".docker/Dockerfile.efa" + output = ["type=cacheonly"] +} + +# Generate a `make dist` tarball. Note that this requires ./configure to be +# called, and that the contents of this "dist tarball" may (read: do) differ +# depending on the configuration options passed. Requires dependencies to be +# installed as ./configure aborts if they cannot resolve. +target "makedist" { + platforms = [ + "linux/amd64", + # "linux/arm64" + ] + name = "makedist-${item.accelerator}" + matrix = { + item = [ + { accelerator = "neuron", base_image = "ubuntu:22.04" }, + { accelerator = "cuda", base_image = "nvidia/cuda:12.6.0-devel-ubuntu22.04" }, + ] + } + context = "." + contexts = { src = ".", efainstaller = "target:efainstaller" } + args = { ACCELERATOR = item.accelerator, BASE_IMAGE = item.base_image } + dockerfile = ".docker/Dockerfile.makedist" + output = ["type=local,dest=dockerbld/tarball"] +} + +# Generate a universal srpm using packit. +target "srpm" { + platforms = [ + "linux/amd64", + # "linux/arm64" + ] + context = "." + contexts = { src = ".", makedist = "target:makedist-neuron" } + dockerfile = ".docker/Dockerfile.srpm" + output = ["type=local,dest=dockerbld/srpm"] +} + +# Generate RPMs from the srpm above. +target "rpms" { + name = "pkg${item.aws == "1" ? "-aws" : ""}-${replace(item.family, "/", "_")}-${replace(item.version, ".", "_")}-${replace(item.platform, "/", "_")}" + matrix = { + item = [ + { + platform = "amd64", + family = "amazonlinux", + package_frontend = "dnf", + version = "2023", + efa = "latest", + cuda_distro = "amzn2023", + toolkit_version = "12-6", + accelerator = "cuda", + enable_powertools = "0", + aws = "1" + }, + { + platform = "amd64", + family = "amazonlinux", + package_frontend = "yum", + version = "2", + efa = "latest", + cuda_distro = "rhel7", + toolkit_version = "12-3", + accelerator = "cuda", + enable_powertools = "0", + aws = "1" + }, + { + platform = "amd64", + family = "rockylinux", + package_frontend = "dnf", + version = "8", + efa = "latest", + cuda_distro = "rhel8", + toolkit_version = "12-6", + accelerator = "cuda", + enable_powertools = "1", + aws = "1" + }, + { + platform = "amd64", + family = "rockylinux", + package_frontend = "dnf", + version = "9", + efa = "latest", + cuda_distro = "rhel9", + toolkit_version = "12-6", + accelerator = "cuda", + enable_powertools = "0", + aws = "1" + }, + ] + } + platforms = [ "linux/${item.platform}" ] + context = "." + contexts = { + efainstaller = "target:efainstaller" + srpm = "target:srpm" + } + dockerfile = ".docker/Dockerfile.${item.package_frontend}" + output = ["type=local,dest=dockerbld/pkgs"] + args = { + FAMILY = item.family, + VERSION = item.version + EFA_INSTALLER_VERSION = item.efa + CUDA_DISTRO = item.cuda_distro + VARIANT = item.accelerator + AWS_BUILD = item.aws + TOOLKIT_VERSION = item.toolkit_version + ENABLE_POWERTOOLS = item.enable_powertools + } +} + +# Build and package for debian-like distributions by building and invoking fpm. +target "debs" { + name = "pkg-${item.accelerator}${item.aws == "1" ? "-aws" : ""}-${replace(item.family, "/", "_")}-${replace(item.version, ".", "_")}-${replace(item.platform, "/", "_")}" + matrix = { + item = [ + { accelerator = "cuda", aws = "1", platform = "amd64", family = "debian", version = "oldstable", cuda_distro = "debian11" }, + # XXX: EFA Installer lacks support. + #{ accelerator = "cuda", aws = "1", platform = "amd64", family = "debian", version = "stable", cuda_distro = "debian11" }, + { accelerator = "cuda", aws = "1", platform = "amd64", family = "ubuntu", version = "20.04", cuda_distro = "ubuntu2004" }, + { accelerator = "cuda", aws = "1", platform = "amd64", family = "ubuntu", version = "22.04", cuda_distro = "ubuntu2204" }, + { accelerator = "cuda", aws = "1", platform = "amd64", family = "ubuntu", version = "24.04", cuda_distro = "ubuntu2404" }, + { accelerator = "cuda", aws = "0", platform = "amd64", family = "debian", version = "oldstable", cuda_distro = "debian11" }, + # XXX: EFA Installer lacks support. + #{ accelerator = "cuda", aws = "0", platform = "amd64", family = "debian", version = "stable", cuda_distro = "debian11" }, + { accelerator = "cuda", aws = "0", platform = "amd64", family = "ubuntu", version = "20.04", cuda_distro = "ubuntu2004" }, + { accelerator = "cuda", aws = "0", platform = "amd64", family = "ubuntu", version = "22.04", cuda_distro = "ubuntu2204" }, + { accelerator = "cuda", aws = "0", platform = "amd64", family = "ubuntu", version = "24.04", cuda_distro = "ubuntu2404" }, + # XXX: todo + # { accelerator = "neuron", aws = "1", platform = "amd64", family = "debian", version = "oldstable", cuda_distro = "debian11" }, + # #{ accelerator = "neuron", aws = "1", platform = "amd64", family = "debian", version = "stable", cuda_distro = "debian11" }, + # { accelerator = "neuron", aws = "1", platform = "amd64", family = "ubuntu", version = "20.04", cuda_distro = "ubuntu2004" }, + # { accelerator = "neuron", aws = "1", platform = "amd64", family = "ubuntu", version = "22.04", cuda_distro = "ubuntu2204" }, + # { accelerator = "neuron", aws = "1", platform = "amd64", family = "ubuntu", version = "24.04", cuda_distro = "ubuntu2404" }, + + # { accelerator = "neuron", aws = "0", platform = "amd64", family = "debian", version = "oldstable", cuda_distro = "debian11" }, + # #{ accelerator = "neuron", aws = "0", platform = "amd64", family = "debian", version = "stable", cuda_distro = "debian11" }, + # { accelerator = "neuron", aws = "0", platform = "amd64", family = "ubuntu", version = "20.04", cuda_distro = "ubuntu2004" }, + # { accelerator = "neuron", aws = "0", platform = "amd64", family = "ubuntu", version = "22.04", cuda_distro = "ubuntu2204" }, + # { accelerator = "neuron", aws = "0", platform = "amd64", family = "ubuntu", version = "24.04", cuda_distro = "ubuntu2404" }, + ] + } + platforms = [ "linux/${item.platform}" ] + context = "." + contexts = { + efainstaller = "target:efainstaller" + makedist = "target:makedist-${item.accelerator}" + } + dockerfile = ".docker/Dockerfile.dpkg" + output = ["type=local,dest=dockerbld/pkgs"] + args = { + FAMILY = item.family, + VERSION = item.version + CUDA_DISTRO = item.cuda_distro + AWS_BUILD = item.aws + } +}