diff --git a/.github/workflows/docker_image.yml b/.github/workflows/docker_image.yml new file mode 100644 index 0000000..1afa50d --- /dev/null +++ b/.github/workflows/docker_image.yml @@ -0,0 +1,47 @@ +name: Docker Image CI + +on: + workflow_dispatch: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push-image: + runs-on: [self-hosted, Linux, X64, Docker] + + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + flavor: | + latest=auto + tags: | + type=sha,format=long + type=raw,value=latest,enable={{is_default_branch}} + + - name: Build and push Docker image + uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + build-args: MAX_JOBS=64 diff --git a/Dockerfile b/Dockerfile index d376f49..59d777f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,29 +1,37 @@ FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 +ARG MAX_JOBS=32 + WORKDIR /app COPY ./patches /app ENV TZ=Etc/UTC \ DEBIAN_FRONTEND=noninteractive \ - NINJA_MAX_JOBS=32 \ - MAX_JOBS=32 \ - TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" + NINJA_MAX_JOBS=$MAX_JOBS \ + MAX_JOBS=$MAX_JOBS \ + TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0"\ + TORCH_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1"\ + _GLIBCXX_USE_CXX11_ABI=1\ + PYTORCH_VERSION="2.2.0a0+git6c8c5ad" RUN apt-get update \ && apt-get install -y software-properties-common \ - && apt-get install -y python3.10 python3-pip git git-lfs cmake ninja-build \ + && apt-get install -y python3.10 python3-pip git git-lfs cmake ninja-build build-essential gcc g++ \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* \ && ln -sf /usr/bin/python3.10 /usr/bin/python3 \ && ln -sf /usr/bin/pip3.10 /usr/bin/pip3 \ && pip3 install --upgrade pip setuptools wheel \ && apt update \ - && apt install -y python-is-python3 + && apt install -y python-is-python3 openssh-server \ + && apt clean RUN bash build_pytorch_w_patch.sh +RUN bash build_torchvision.sh + RUN bash build_torchdistX_w_patch.sh @@ -31,6 +39,7 @@ RUN pip3 install --no-cache-dir packaging \ && pip3 install --no-cache-dir mpmath==1.3.0 \ && pip3 install --no-cache-dir "setuptools>=69.0.0" \ && pip3 install --no-cache-dir regex \ + && pip3 install --no-cache-dir pillow \ && pip3 install --no-cache-dir pybind11 \ && pip3 install --no-cache-dir einops \ && pip3 install --no-cache-dir expecttest \ diff --git a/patches/build_pytorch_w_patch.sh b/patches/build_pytorch_w_patch.sh index f89aaee..3b9eb0c 100644 --- a/patches/build_pytorch_w_patch.sh +++ b/patches/build_pytorch_w_patch.sh @@ -1,4 +1,4 @@ -#! usr/bin/bash +#!/usr/bin/bash set -e SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" @@ -13,7 +13,7 @@ fi git clone --branch v2.2.1-rc3 --depth 1 https://github.com/pytorch/pytorch.git pushd pytorch -git apply $PATCH_PATH +git apply "$PATCH_PATH" git submodule sync git submodule update --init --recursive --depth 1 pip3 install -r requirements.txt diff --git a/patches/build_torchdistX_w_patch.sh b/patches/build_torchdistX_w_patch.sh index 29ff3e5..553e527 100644 --- a/patches/build_torchdistX_w_patch.sh +++ b/patches/build_torchdistX_w_patch.sh @@ -1,4 +1,4 @@ -#! usr/bin/bash +#!/usr/bin/bash set -e SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" @@ -15,7 +15,7 @@ git clone --depth 1 https://github.com/pytorch/torchdistx.git pushd torchdistx git pull git checkout 9c1b9f5cb2fa36bfb8b70ec07c40ed42a33cc87a -git apply $PATCH_PATH +git apply "$PATCH_PATH" git submodule sync git submodule update --init --recursive --depth 1 cmake -DTORCHDIST_INSTALL_STANDALONE=ON -GNinja -DCAKE_CXX_COMPILER_LAUNCHER=ccache -B build diff --git a/patches/build_torchvision.sh b/patches/build_torchvision.sh new file mode 100644 index 0000000..d7b14a9 --- /dev/null +++ b/patches/build_torchvision.sh @@ -0,0 +1,11 @@ +#!/usr/bin/bash +set -e + +# build torchvision +git clone -b v0.17.0 https://github.com/pytorch/vision.git +pushd vision +python3 setup.py install +popd +rm -rf vision +python3 -c "import torchvision" +pip3 list | grep torch