Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a workflow to build torchtitan-ubuntu-20.04-clang12 Docker image for CI #338

Merged
merged 6 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .ci/docker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Docker images for TorchTitan CI

This directory contains everything needed to build the Docker images
that are used in TorchTitan CI. The content of this directory are copied
from PyTorch CI https://github.com/pytorch/pytorch/tree/main/.ci/docker.
It also uses the same directory structure as PyTorch.

## Contents

* `build.sh` -- dispatch script to launch all builds
* `common` -- scripts used to execute individual Docker build stages
* `ubuntu` -- Dockerfile for Ubuntu image for CPU build and test jobs

## Usage

```bash
# Generic usage
./build.sh "${IMAGE_NAME}" "${DOCKER_BUILD_PARAMETERS}"

# Build a specific image
./build.sh torchtitan-ubuntu-20.04-clang12 -t myimage:latest
```
39 changes: 39 additions & 0 deletions .ci/docker/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -exu

IMAGE_NAME="$1"
shift

echo "Building ${IMAGE_NAME} Docker image"

OS=ubuntu
OS_VERSION=20.04
CLANG_VERSION=""
PYTHON_VERSION=3.11
MINICONDA_VERSION=24.3.0-0

case "${IMAGE_NAME}" in
torchtitan-ubuntu-20.04-clang12)
CLANG_VERSION=12
;;
*)
echo "Invalid image name ${IMAGE_NAME}"
exit 1
esac

docker build \
--no-cache \
--progress=plain \
--build-arg "OS_VERSION=${OS_VERSION}" \
--build-arg "CLANG_VERSION=${CLANG_VERSION}" \
--build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
--build-arg "MINICONDA_VERSION=${MINICONDA_VERSION}" \
-f "${OS}"/Dockerfile \
"$@" \
.
44 changes: 44 additions & 0 deletions .ci/docker/common/install_base.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -ex

install_ubuntu() {
apt-get update

apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
curl \
git \
wget \
sudo \
vim \
jq \
vim \
unzip \
gdb \
rsync \
libssl-dev \
zip

# Cleanup package manager
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
}

# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac
42 changes: 42 additions & 0 deletions .ci/docker/common/install_clang.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -ex

install_ubuntu() {
apt-get update

apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
# Also require LLD linker from llvm and libomp to build PyTorch from source
apt-get install -y lld "libomp-${CLANG_VERSION}-dev"

# Use update-alternatives to make this version the default
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-"$CLANG_VERSION" 50
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-"$CLANG_VERSION" 50
# Override cc/c++ to clang as well
update-alternatives --install /usr/bin/cc cc /usr/bin/clang 50
update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++ 50

# Cleanup package manager
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
}

if [ -n "$CLANG_VERSION" ]; then
# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac
fi
64 changes: 64 additions & 0 deletions .ci/docker/common/install_conda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -ex

# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"

install_miniconda() {
BASE_URL="https://repo.anaconda.com/miniconda"
CONDA_FILE="Miniconda3-py${PYTHON_VERSION//./}_${MINICONDA_VERSION}-Linux-x86_64.sh"

mkdir -p /opt/conda
chown ci-user:ci-user /opt/conda

pushd /tmp
wget -q "${BASE_URL}/${CONDA_FILE}"
# Install miniconda
as_ci_user bash "${CONDA_FILE}" -b -f -p "/opt/conda"
# Clean up the download file
rm "${CONDA_FILE}"
popd

sed -e 's|PATH="\(.*\)"|PATH="/opt/conda/bin:\1"|g' -i /etc/environment
export PATH="/opt/conda/bin:$PATH"
}

install_python() {
pushd /opt/conda
# Install the correct Python version
as_ci_user conda create -n "py_${PYTHON_VERSION}" -y --file /opt/conda/conda-env-ci.txt python="${PYTHON_VERSION}"
popd
}

install_pip_dependencies() {
pushd /opt/conda
# Install all Python dependencies
pip_install -r /opt/conda/dev-requirements.txt
pip_install -r /opt/conda/requirements.txt
popd
}

fix_conda_ubuntu_libstdcxx() {
cat /etc/issue
# WARNING: This is a HACK from PyTorch core to be able to build PyTorch on 22.04.
# Specifically, ubuntu-20+ all comes lib libstdc++ newer than 3.30+, but anaconda
# is stuck with 3.29. So, remove libstdc++6.so.3.29 as installed by
# https://anaconda.org/anaconda/libstdcxx-ng/files?version=11.2.0
#
# PyTorch sev: https://github.com/pytorch/pytorch/issues/105248
# Ref: https://github.com/pytorch/pytorch/blob/main/.ci/docker/common/install_conda.sh
if grep -e "2[02].04." /etc/issue >/dev/null; then
rm "/opt/conda/envs/py_${PYTHON_VERSION}/lib/libstdc++.so.6"
fi
}

install_miniconda
install_python
install_pip_dependencies
fix_conda_ubuntu_libstdcxx
22 changes: 22 additions & 0 deletions .ci/docker/common/install_gcc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -ex

if [ -n "$GCC_VERSION" ]; then

apt-get update
apt-get install -y g++-"$GCC_VERSION"
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"$GCC_VERSION" 50
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"$GCC_VERSION" 50
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-"$GCC_VERSION" 50

# Cleanup package manager
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

fi
24 changes: 24 additions & 0 deletions .ci/docker/common/install_user.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -ex

# Same as ec2-user
echo "ci-user:x:1000:1000::/var/lib/ci-user:" >> /etc/passwd
echo "ci-user:x:1000:" >> /etc/group
# Needed on Focal or newer
echo "ci-user:*:19110:0:99999:7:::" >> /etc/shadow

# Create $HOME
mkdir -p /var/lib/ci-user
chown ci-user:ci-user /var/lib/ci-user

# Allow sudo
echo 'ci-user ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/ci-user

# Test that sudo works
sudo -u ci-user sudo -v
29 changes: 29 additions & 0 deletions .ci/docker/common/utils.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

as_ci_user() {
# NB: unsetting the environment variables works around a conda bug
# https://github.com/conda/conda/issues/6576
# NB: Pass on PATH and LD_LIBRARY_PATH to sudo invocation
# NB: This must be run from a directory that the user has access to
sudo -E -H -u ci-user env -u SUDO_UID -u SUDO_GID -u SUDO_COMMAND -u SUDO_USER env "PATH=${PATH}" "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}" "$@"
}

conda_install() {
# Ensure that the install command don't upgrade/downgrade Python
# This should be called as
# conda_install pkg1 pkg2 ... [-c channel]
as_ci_user conda install -q -n "py_${PYTHON_VERSION}" -y python="${PYTHON_VERSION}" "$@"
}

conda_run() {
as_ci_user conda run -n "py_${PYTHON_VERSION}" --no-capture-output "$@"
}

pip_install() {
as_ci_user conda run -n "py_${PYTHON_VERSION}" pip install --progress-bar off "$@"
}
2 changes: 2 additions & 0 deletions .ci/docker/conda-env-ci.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cmake=3.22.1
ninja=1.10.2
3 changes: 3 additions & 0 deletions .ci/docker/dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pytest
pytest-cov
pre-commit
7 changes: 7 additions & 0 deletions .ci/docker/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
torch >= 2.2.0.dev
datasets
tomli >= 1.1.0 ; python_version < "3.11"
tensorboard
sentencepiece
tiktoken
blobfile
40 changes: 40 additions & 0 deletions .ci/docker/ubuntu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
ARG OS_VERSION

FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu${OS_VERSION}

ARG OS_VERSION

ENV DEBIAN_FRONTEND noninteractive

# Install common dependencies
COPY ./common/install_base.sh install_base.sh
RUN bash ./install_base.sh && rm install_base.sh

# Install clang
ARG CLANG_VERSION
COPY ./common/install_clang.sh install_clang.sh
RUN bash ./install_clang.sh && rm install_clang.sh

# Install gcc
ARG GCC_VERSION
COPY ./common/install_gcc.sh install_gcc.sh
RUN bash ./install_gcc.sh && rm install_gcc.sh

# Setup user
COPY ./common/install_user.sh install_user.sh
RUN bash ./install_user.sh && rm install_user.sh

# Install conda and other dependencies
ARG MINICONDA_VERSION
ARG PYTHON_VERSION
ENV PYTHON_VERSION=$PYTHON_VERSION
ENV PATH /opt/conda/envs/py_$PYTHON_VERSION/bin:/opt/conda/bin:$PATH
COPY dev-requirements.txt /opt/conda/
COPY requirements.txt /opt/conda/
COPY conda-env-ci.txt /opt/conda/
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/utils.sh utils.sh
RUN bash ./install_conda.sh && rm install_conda.sh utils.sh /opt/conda/dev-requirements.txt /opt/conda/requirements.txt /opt/conda/conda-env-ci.txt

USER ci-user
CMD ["bash"]
Loading
Loading