Skip to content

Commit

Permalink
feat(ci): add package generation
Browse files Browse the repository at this point in the history
stack-info: PR: aws#592, branch: aws-nslick/stack/33
Signed-off-by: Nicholas Sielicki <[email protected]>
  • Loading branch information
aws-nslick committed Oct 2, 2024
1 parent 867a848 commit 77aee9c
Show file tree
Hide file tree
Showing 18 changed files with 879 additions and 1 deletion.
22 changes: 22 additions & 0 deletions .docker/cfg/use-local.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#
# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved.
#
# See LICENSE.txt for license information
#
# What is this?
# This overrides defaults for caching, targets, tags, platforms, etc. This
# file is expected to be symlinked into the root of the tree to select the
# builder used by `docker buildx bake'.
#
# This file configures local builds.
#
# See https://docs.docker.com/build/bake/reference/#file-format

variable "VERSION" { default = "master" }

target "efainstaller" {
platforms = [ "linux/amd64", "linux/arm64" ]
context = "."
dockerfile = ".docker/containers/Dockerfile.efa"
output = ["type=cacheonly"]
}
49 changes: 49 additions & 0 deletions .docker/containers/Dockerfile.cache_efa
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#
# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved.
#
# See LICENSE.txt for license information
#

FROM alpine:latest AS extractor
RUN apk add --no-cache tar

FROM extractor AS extracted
ARG INSTALLER_PREFIX
ENV INSTALLER_PREFIX=${INSTALLER_PREFIX}
COPY --from=efa_installer_tarball aws-efa-installer*.tar.gz .
RUN tar xvf *.tar.gz \
--wildcards "aws-efa-installer/*.txt" \
--wildcards "aws-efa-installer/*.sh" \
--wildcards "aws-efa-installer/**/${INSTALLER_PREFIX}/$(uname -m)" && \
rm -rf *.tar.gz

FROM distro_image AS installed
ARG ENABLE_EFA_INSTALLER_DEBUG_INFO=0
ARG ENABLE_MPI4=0
ARG ENABLE_MPI5=0

ENV ENABLE_EFA_INSTALLER_DEBUG_INFO=${ENABLE_EFA_INSTALLER_DEBUG_INFO}
ENV ENABLE_MPI4=${ENABLE_MPI4}
ENV ENABLE_MPI5=${ENABLE_MPI5}

RUN mkdir /aws-efa-installer
COPY --from=extracted /aws-efa-installer /aws-efa-installer

# XXX: the EFA installer script should refresh the package caches itself.
# XXX: the EFA installer depends on util-linux, which many contianers don't have.
RUN ( ! command -v getopt && ( apt install -y util-linux || \
dnf -y install util-linux || \
yum install -y util-linux ) || /bin/true) && \
((command -v apt-get && apt-get update -y) || /bin/true ) && \
((command -v yum && yum update -y ) || /bin/true ) && \
((command -v dnf && dnf -y update ) || /bin/true ) && \
cd /aws-efa-installer && \
./efa_installer.sh -y -n -l -k -g \
$(test "$ENABLE_EFA_INSTALLER_DEBUG_INFO" -eq "1" && echo "-d") \
$(test "$ENABLE_MPI4" -eq "1" && echo "--mpi openmpi4") \
$(test "$ENABLE_MPI5" -eq "1" && echo "--mpi openmpi5") && \
cd && rm -rf /aws-efa-installer && \
((command -v apt-get && apt-get purge -y && apt-get clean -y) || /bin/true ) && \
((command -v dnf && dnf clean all -y) || /bin/true ) && \
((command -v yum && yum clean all -y) || /bin/true ) && \
((command -v zypper && zypper clean ) || /bin/true )
44 changes: 44 additions & 0 deletions .docker/containers/Dockerfile.dnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#
# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved.
#
# See LICENSE.txt for license information
#

ARG FAMILY=fedora
ARG VERSION=rawhide
ARG VARIANT=cuda
ARG CUDA_DISTRO
ARG AWS_BUILD
ARG ENABLE_POWERTOOLS

# Install EFA-installer deps.
FROM ${FAMILY}:${VERSION} AS builder
ARG CUDA_DISTRO
ARG ENABLE_POWERTOOLS
ENV CUDA_DISTRO=${CUDA_DISTRO}
ENV ENABLE_POWERTOOLS=${ENABLE_POWERTOOLS}
# Add NVIDIA repo for CUDA builds.
COPY --from=efainstaller / /
RUN --mount=type=cache,target=/var/cache/yum,sharing=locked \
--mount=type=cache,target=/var/cache/dnf,sharing=locked \
bash -c "cd /aws-efa-installer && dnf install -y gcc rpmdevtools rpmlint dnf-plugins-core util-linux && ./efa_installer.sh -n -l -k -d -y && rm -rf /aws-efa-installer" && \
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/${CUDA_DISTRO}/$(uname -m)/cuda-${CUDA_DISTRO}.repo && \
( test "${ENABLE_POWERTOOLS}" = "1" && sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/Rocky-PowerTools.repo || /bin/true ) && \
dnf -y update && dnf -y upgrade
RUN rpmdev-setuptree

FROM builder AS environment
ARG VARIANT
ARG AWS_BUILD
ENV VARIANT=${VARIANT}
ENV AWS_BUILD=${AWS_BUILD}
COPY --from=srpm . .
RUN yum search hwloc
RUN echo "%with_${VARIANT} 1" >> ~/.rpmmacros
RUN echo "%with_platform_aws ${AWS_BUILD}" >> ~/.rpmmacros
RUN --mount=type=cache,target=/var/cache/yum,sharing=locked \
--mount=type=cache,target=/var/cache/dnf,sharing=locked \
dnf -y install cuda-cudart-devel-12-6 && dnf -y builddep *.src.rpm && rpmbuild --rebuild *.src.rpm

FROM scratch
COPY --from=environment /root/rpmbuild/RPMS/**/* /
65 changes: 65 additions & 0 deletions .docker/containers/Dockerfile.dpkg
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#
# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved.
#
# See LICENSE.txt for license information
#

ARG FAMILY=ubuntu
ARG VERSION=latest
ARG CUDA_DISTRO
ARG DEBIAN_FRONTEND=noninteractive
ARG AWS_BUILD

FROM ${FAMILY}:${VERSION} AS build
ARG CUDA_DISTRO
ENV CUDA_DISTRO=${CUDA_DISTRO}
ARG AWS_BUILD=0
ENV AWS_BUILD=${AWS_BUILD}

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update -y && apt-get install wget -y

RUN wget https://developer.download.nvidia.com/compute/cuda/repos/${CUDA_DISTRO}/$(uname -m)/cuda-keyring_1.1-1_all.deb

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
dpkg -i cuda-keyring_1.1-1_all.deb

COPY --from=efainstaller / .
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
bash -c "apt-get update -y && cd /aws-efa-installer && ./efa_installer.sh /efa_installer.sh -n -l -k -d -y && apt-get install -y autoconf automake libtool gcc g++ git libhwloc-dev make && rm -rf /aws-efa-installer"

COPY --from=makedist / .
RUN tar xvf ./aws-ofi-nccl*.tar.gz -C .
RUN cd aws-ofi-nccl* && \
./configure --$(test "$ACCELERATOR" = "cuda" && echo "with-cuda=/usr/local/cuda" || echo "enable-neuron=yes") \
--prefix=/opt/amazon/libnccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws") \
--with-libfabric=/opt/amazon/efa \
--disable-tests \
--$(test "$AWS_BUILD" -eq 0 && echo -n "disable" || echo -n "enable")-platform-aws \
--with-mpi=no && make -j && make install

FROM ubuntu:latest AS packager
ARG FAMILY
ARG VERSION
ARG AWS_BUILD=0
ENV AWS_BUILD=${AWS_BUILD}
ENV FAMILY=${FAMILY}
ENV VERSION=${VERSION}
COPY --from=build /opt/amazon/ /opt/amazon/
RUN find /opt/amazon/ | grep -E \.la$ | xargs rm
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update -y && apt-get install -y ruby tar squashfs-tools binutils && gem install fpm
RUN fpm \
-s dir -t deb \
--license Apache2.0 \
-p /libnccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws")-${FAMILY}-${VERSION}.deb \
--name nccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws") \
/opt/amazon/libnccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws")/=/opt/amazon/libnccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws")

FROM scratch
COPY --from=packager /libnccl-net-ofi* /

11 changes: 11 additions & 0 deletions .docker/containers/Dockerfile.dpkg_add_cuda_repo
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM alpine:latest AS downloader
ARG CUDA_DISTRO
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/${CUDA_DISTRO}/$(uname -m)/cuda-keyring_1.1-1_all.deb

FROM base_image
ARG CUDA_TOOLKIT_VERSION_SUFFIX
ENV CUDA_TOOLKIT_VERSION_SUFFIX=${CUDA_TOOLKIT_VERSION_SUFFIX}
COPY --from=downloader cuda-keyring*.deb .
RUN apt-get update -y && apt-get install -y ca-certificates && dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring*.deb && \
apt-get update -y && apt-get install -y cuda-cudart-dev-${CUDA_TOOLKIT_VERSION_SUFFIX} && \
apt-get purge -y && apt-get clean -y
17 changes: 17 additions & 0 deletions .docker/containers/Dockerfile.install_efa
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM distro_image
RUN mkdir /aws-efa-installer
COPY --from=efa_installer_contents /aws-efa-installer /aws-efa-installer

# XXX: EFA installer doesn't refresh the package caches if they're unpopulated,
# as they always are in container images.
#
# XXX: EFA installer depends on util-linux, which many contianers don't have.
RUN (command -v getopt || apt install -y util-linux 2>/dev/null || \
dnf -y install util-linux 2>/dev/null || yum -y install util-linux 2>/dev/null) && \
(command -v apt-get && apt-get update -y || /bin/true ) && \
(! command -v yum || yum update -y ) && \
cd /aws-efa-installer && \
./efa_installer.sh -d -y -n -l -k -g --mpi openmpi4,openmpi5 && \
cd && rm -rf /aws-efa-installer && (command -v apt-get && apt-get purge -y && apt-get clean -y || /bin/true ) \
(command -v dnf && dnf clean -y || /bin/true ) \
(command -v yum && yum clean -y || /bin/true )
23 changes: 23 additions & 0 deletions .docker/containers/Dockerfile.makedist
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#
# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved.
#
# See LICENSE.txt for license information
#

FROM base_image AS buildenv
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update -y && apt-get install -y automake autoconf libtool libhwloc-dev

FROM buildenv AS distbuilder
ARG ACCELERATOR
ENV ACCELERATOR=${ACCELERATOR}
COPY ../ /proj
WORKDIR /proj
RUN autoreconf -ivf && \
./configure --with-libfabric=/opt/amazon/efa \
--$(test "$ACCELERATOR" = "cuda" && echo "with-cuda=/usr/local/cuda" || echo "enable-neuron=yes") && \
make -j dist

FROM scratch
COPY --from=distbuilder /proj/aws-ofi-nccl*.tar.gz /
18 changes: 18 additions & 0 deletions .docker/containers/Dockerfile.srpm
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#
# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved.
#
# See LICENSE.txt for license information
#

FROM fedora:rawhide AS packitimg
RUN dnf install -y packit mock

FROM packitimg AS srpm
RUN mkdir /proj
WORKDIR /proj
COPY --from=src . .
COPY --from=makedist . .
RUN packit srpm

FROM scratch
COPY --from=srpm /proj/*.src.rpm /
42 changes: 42 additions & 0 deletions .docker/containers/Dockerfile.yum
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#
# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved.
#
# See LICENSE.txt for license information
#

ARG FAMILY=amazonlinux
ARG VERSION=2
ARG VARIANT=cuda
ARG CUDA_DISTRO
ARG AWS_BUILD

# Install EFA-installer deps.
FROM ${FAMILY}:${VERSION} AS builder
ARG CUDA_DISTRO
ENV CUDA_DISTRO=${CUDA_DISTRO}
# Add NVIDIA repo for CUDA builds.
COPY --from=efainstaller / /
RUN --mount=type=cache,target=/var/cache/yum,sharing=locked \
--mount=type=cache,target=/var/cache/dnf,sharing=locked \
bash -c "cd /aws-efa-installer && yum install -y gcc rpmdevtools rpmlint yum-utils util-linux && ./efa_installer.sh -n -l -k -d -y && rm -rf /aws-efa-installer" && \
yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/${CUDA_DISTRO}/$(uname -m)/cuda-${CUDA_DISTRO}.repo && \
yum update -y
RUN rpmdev-setuptree

FROM builder AS environment
ARG VARIANT
ARG AWS_BUILD
ARG TOOLKIT_VERSION=12-6
ENV VARIANT=${VARIANT}
ENV AWS_BUILD=${AWS_BUILD}
ENV TOOLKIT_VERSION=${TOOLKIT_VERSION}
COPY --from=srpm . .
RUN echo "%with_${VARIANT} 1" >> ~/.rpmmacros
RUN echo "%with_platform_aws ${AWS_BUILD}" >> ~/.rpmmacros
RUN echo "%_cuda_toolkit_version ${TOOLKIT_VERSION}" >> ~/.rpmmacros
RUN --mount=type=cache,target=/var/cache/yum,sharing=locked \
--mount=type=cache,target=/var/cache/dnf,sharing=locked \
yum install -y cuda-cudart-devel-${TOOLKIT_VERSION} && yum-builddep -y *.src.rpm && rpmbuild --rebuild *.src.rpm

FROM scratch
COPY --from=environment /root/rpmbuild/RPMS/**/* /
81 changes: 81 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
*~

src/*.o
src/tuner/*.o
src/*.lo
src/*.la
libnccl-net.so
tags

src/tuner/.dirstamp
src/tuner/*.lo

tests/functional/*.o
tests/unit/*.o
tests/unit/*.log
tests/unit/*.trs
tests/functional/nccl_connection
tests/functional/nccl_message_transfer
tests/functional/ring
tests/functional/cuda_check
tests/unit/msgbuff
tests/unit/freelist
tests/unit/deque
tests/unit/scheduler
tests/unit/idpool
tests/unit/show_tuner_decisions
tests/unit/show_tuner_costs
tests/unit/ep_addr_list
tests/unit/mr

# http://www.gnu.org/software/automake
.deps/
Makefile.in
Makefile
/ar-lib
/mdate-sh
/py-compile
/test-driver
/ylwrap

# http://www.gnu.org/software/autoconf
build-aux/
autom4te.cache
/autoscan.log
/autoscan-*.log
/aclocal.m4
/compile
/config.guess
/config.log
/config.status
/config.sub
/config.cache
/configure
/configure.scan
/depcomp
/install-sh
/missing
/stamp-h1
/include/stamp-h1
/include/config.h
/include/config.h.in

# https://www.gnu.org/software/libtool/
/ltmain.sh
.libs/
libtool

# http://www.gnu.org/software/m4/
m4/libtool.m4
m4/ltoptions.m4
m4/ltsugar.m4
m4/ltversion.m4
m4/lt~obsolete.m4

# other
.idea/
.devenv/
.direnv
*.src.rpm
dockerbld
result
Loading

0 comments on commit 77aee9c

Please sign in to comment.