From cbdfe255371c63696dd10cabbd619397079f2f9d Mon Sep 17 00:00:00 2001 From: jakki Date: Thu, 28 Nov 2024 13:40:37 +0200 Subject: [PATCH] Add Dockerfile.rocm --- docker/Dockerfile | 12 +- docker/Dockerfile.rocm | 321 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 327 insertions(+), 6 deletions(-) create mode 100644 docker/Dockerfile.rocm diff --git a/docker/Dockerfile b/docker/Dockerfile index 94f4a1ba99..3a2ba23a98 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -37,7 +37,7 @@ ARG BRANCH_NAME ARG REPO_URL=https://github.com/pytorch/serve.git ENV PYTHONUNBUFFERED TRUE -RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \ +RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \ apt-get update && \ apt-get upgrade -y && \ apt-get install software-properties-common -y && \ @@ -112,12 +112,12 @@ FROM ${BASE_IMAGE} AS production-image ARG PYTHON_VERSION ENV PYTHONUNBUFFERED TRUE -RUN --mount=type=cache,target=/var/cache/apt \ +RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \ apt-get update && \ apt-get upgrade -y && \ apt-get install software-properties-common -y && \ add-apt-repository ppa:deadsnakes/ppa -y && \ - apt remove python-pip python3-pip && \ + apt remove -y python-pip python3-pip && \ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ python$PYTHON_VERSION \ python3-distutils \ @@ -158,12 +158,12 @@ ARG PYTHON_VERSION ARG BRANCH_NAME ENV PYTHONUNBUFFERED TRUE -RUN --mount=type=cache,target=/var/cache/apt \ +RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \ apt-get update && \ apt-get upgrade -y && \ apt-get install software-properties-common -y && \ add-apt-repository -y ppa:deadsnakes/ppa && \ - apt remove python-pip python3-pip && \ + apt remove -y python-pip python3-pip && \ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ python$PYTHON_VERSION \ python3-distutils \ @@ -207,7 +207,7 @@ ARG BUILD_WITH_IPEX ARG IPEX_VERSION=1.11.0 ARG IPEX_URL=https://software.intel.com/ipex-whl-stable ENV PYTHONUNBUFFERED TRUE -RUN --mount=type=cache,target=/var/cache/apt \ +RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \ apt-get update && \ apt-get upgrade -y && \ apt-get install software-properties-common -y && \ diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm new file mode 100644 index 0000000000..d8c7b842f7 --- /dev/null +++ b/docker/Dockerfile.rocm @@ -0,0 +1,321 @@ +# syntax = docker/dockerfile:experimental +# +# This file can build images for cpu and gpu env. By default it builds image for CPU. +# Use following option to build image for cuda/GPU: --build-arg BASE_IMAGE=nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 +# Here is complete command for GPU/cuda - +# $ DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE=nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 -t torchserve:latest . +# +# Following comments have been shamelessly copied from https://github.com/pytorch/pytorch/blob/master/Dockerfile +# +# NOTE: To build this you will need a docker version > 18.06 with +# experimental enabled and DOCKER_BUILDKIT=1 +# +# If you do not use buildkit you are not going to have a good time +# +# For reference: +# https://docs.docker.com/develop/develop-images/build_enhancements/ + +ARG BASE_IMAGE=ubuntu:24.04 +ARG BRANCH_NAME=master +# Note: +# Define here the default python version to be used in all later build-stages as default. +# ARG and ENV variables do not persist across stages (they're build-stage scoped). +# That is crucial for ARG PYTHON_VERSION, which otherwise becomes "" leading to nasty bugs, +# that don't let the build fail, but break current version handling logic and result +# in images with wrong python version. To fix that, we will restate the ARG PYTHON_VERSION +# on each build-stage. +ARG PYTHON_VERSION=3.11 + +FROM ${BASE_IMAGE} AS compile-image +ARG BASE_IMAGE=ubuntu:24.04 +ARG PYTHON_VERSION +ARG BUILD_NIGHTLY +ARG BUILD_FROM_SRC +ARG LOCAL_CHANGES +ARG BRANCH_NAME +ARG REPO_URL=https://github.com/pytorch/serve.git +ENV PYTHONUNBUFFERED TRUE + +RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \ + apt-get update && \ + apt-get upgrade -y && \ + apt-get install software-properties-common -y && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + apt remove -y python-pip python3-pip && \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + ca-certificates \ + g++ \ + python3-setuptools \ + python$PYTHON_VERSION \ + python$PYTHON_VERSION-dev \ + python$PYTHON_VERSION-venv \ + openjdk-17-jdk \ + curl \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Make the virtual environment and "activating" it by adding it first to the path. +# From here on the python$PYTHON_VERSION interpreter is used and the packages +# are installed in /home/venv which is what we need for the "runtime-image" +RUN python$PYTHON_VERSION -m venv /home/venv +ENV PATH="/home/venv/bin:$PATH" + +ARG USE_ROCM_VERSION="" + +RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \ + if [ -n "$USE_ROCM_VERSION" ]; then \ + apt-get update \ + && curl -O https://repo.radeon.com/amdgpu-install/6.2.2/ubuntu/noble/amdgpu-install_6.2.60202-1_all.deb \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y ./amdgpu-install_6.2.60202-1_all.deb \ + && apt-get update \ + && apt-get install --no-install-recommends -y amdgpu-dkms rocm; \ + else \ + echo "Skip ROCm installation"; \ + fi + +COPY ./ serve + +RUN \ + if echo "$LOCAL_CHANGES" | grep -q "false"; then \ + rm -rf /serve;\ + git clone --recursive $REPO_URL -b $BRANCH_NAME /serve; \ + fi + + +WORKDIR "/serve" + +RUN cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh + +RUN \ + # Install ROCm version specific binary when ROCm version is specified as a build arg + if [ "$USE_ROCM_VERSION" ]; then \ + python ./ts_scripts/install_dependencies.py --rocm $USE_ROCM_VERSION \ + && python -m pip install /opt/rocm/share/amd_smi; \ + # Install the binary with the latest CPU image on a ROCm base image + else \ + python ./ts_scripts/install_dependencies.py;\ + fi; + +# Make sure latest version of torchserve is uploaded before running this +RUN \ + if echo "$BUILD_FROM_SRC" | grep -q "true"; then \ + python -m pip install -r requirements/developer.txt \ + && python ts_scripts/install_from_src.py;\ + elif echo "$BUILD_NIGHTLY" | grep -q "false"; then \ + python -m pip install --no-cache-dir torchserve torch-model-archiver torch-workflow-archiver;\ + else \ + python -m pip install --no-cache-dir torchserve-nightly torch-model-archiver-nightly torch-workflow-archiver-nightly;\ + fi + +# Final image for production +FROM ${BASE_IMAGE} AS production-image +# Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top) +ARG PYTHON_VERSION +ARG USE_ROCM_VERSION +ENV PYTHONUNBUFFERED TRUE + +RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \ + apt-get update && \ + apt-get upgrade -y && \ + apt-get install software-properties-common -y && \ + add-apt-repository ppa:deadsnakes/ppa -y && \ + apt remove -y python-pip python3-pip && \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + python$PYTHON_VERSION \ + python3-setuptools \ + python$PYTHON_VERSION-dev \ + python$PYTHON_VERSION-venv \ + # using openjdk-17-jdk due to circular dependency(ca-certificates) bug in openjdk-17-jre-headless debian package + # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1009905 + openjdk-17-jdk \ + build-essential \ + && rm -rf /var/lib/apt/lists/* \ + && cd /tmp + +RUN --mount=type=bind,sharing=locked,from=compile-image,target=/mnt \ + if [ "$USE_ROCM_VERSION" ]; then \ + apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y ./mnt/amdgpu-install_6.2.60202-1_all.deb \ + && apt-get update \ + && apt-get install --no-install-recommends -y amdgpu-dkms rocm; \ + else \ + echo "Skip ROCm installation"; \ + fi + +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp + +COPY --chown=model-server --from=compile-image /home/venv /home/venv +COPY --from=compile-image /usr/local/bin/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh +ENV PATH="/home/venv/bin:$PATH" + +COPY --from=compile-image /opt/rocm/share/amd_smi /opt/rocm/share/amd_smi + +RUN \ + if [ "$USE_ROCM_VERSION" ]; then \ + python -m pip install /opt/rocm/share/amd_smi; \ + else \ + echo "Skip ROCm installation"; \ + fi + +RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh \ + && chown -R model-server /home/model-server + +COPY docker/config.properties /home/model-server/config.properties +RUN mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store + +EXPOSE 8080 8081 8082 7070 7071 + +USER model-server +WORKDIR /home/model-server +ENV TEMP=/home/model-server/tmp +ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] +CMD ["serve"] + +# Final image for docker regression +FROM ${BASE_IMAGE} AS ci-image +# Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top) +ARG PYTHON_VERSION +ARG BRANCH_NAME +ENV PYTHONUNBUFFERED TRUE + +RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \ + apt-get update && \ + apt-get upgrade -y && \ + apt-get install software-properties-common -y && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + apt remove -y python-pip python3-pip && \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + python$PYTHON_VERSION \ + python3-setuptools \ + python$PYTHON_VERSION-dev \ + python$PYTHON_VERSION-venv \ + # using openjdk-17-jdk due to circular dependency(ca-certificates) bug in openjdk-17-jre-headless debian package + # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1009905 + openjdk-17-jdk \ + build-essential \ + wget \ + numactl \ + nodejs \ + npm \ + zip \ + unzip \ + && npm install -g newman@5.3.2 newman-reporter-htmlextra markdown-link-check \ + && rm -rf /var/lib/apt/lists/* \ + && cd /tmp + +RUN --mount=type=bind,sharing=locked,from=compile-image,target=/mnt \ + if [ "$USE_ROCM_VERSION" ]; then \ + apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y ./mnt/amdgpu-install_6.2.60202-1_all.deb \ + && apt-get update \ + && apt-get install --no-install-recommends -y amdgpu-dkms rocm; \ + else \ + echo "Skip ROCm installation"; \ + fi + +COPY --from=compile-image /home/venv /home/venv +ENV PATH="/home/venv/bin:$PATH" + +RUN python -m pip install --no-cache-dir -r https://raw.githubusercontent.com/pytorch/serve/$BRANCH_NAME/requirements/developer.txt + +COPY --from=compile-image /opt/rocm/share/amd_smi /opt/rocm/share/amd_smi + +RUN \ + if [ "$USE_ROCM_VERSION" ]; then \ + python -m pip install /opt/rocm/share/amd_smi; \ + else \ + echo "Skip ROCm installation"; \ + fi + +RUN mkdir /serve +ENV TS_RUN_IN_DOCKER True + +WORKDIR /serve +CMD ["python", "test/regression_tests.py"] + +#Final image for developer Docker image +FROM ${BASE_IMAGE} as dev-image +# Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top) +ARG PYTHON_VERSION +ARG BRANCH_NAME +ARG USE_ROCM_VERSION +ARG BUILD_FROM_SRC +ARG LOCAL_CHANGES +ARG BUILD_WITH_IPEX +ARG IPEX_VERSION=1.11.0 +ARG IPEX_URL=https://software.intel.com/ipex-whl-stable +ENV PYTHONUNBUFFERED TRUE +RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \ + apt-get update && \ + apt-get upgrade -y && \ + apt-get install software-properties-common -y && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + fakeroot \ + ca-certificates \ + dpkg-dev \ + sudo \ + g++ \ + git \ + python$PYTHON_VERSION \ + python$PYTHON_VERSION-dev \ + python3-setuptools \ + python$PYTHON_VERSION-venv \ + # using openjdk-17-jdk due to circular dependency(ca-certificates) bug in openjdk-17-jre-headless debian package + # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1009905 + openjdk-17-jdk \ + build-essential \ + curl \ + vim \ + numactl \ + && rm -rf /var/lib/apt/lists/* + +RUN --mount=type=bind,sharing=locked,from=compile-image,target=/mnt \ + if [ "$USE_ROCM_VERSION" ]; then \ + apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y ./mnt/amdgpu-install_6.2.60202-1_all.deb \ + && apt-get update \ + && apt-get install --no-install-recommends -y amdgpu-dkms rocm; \ + else \ + echo "Skip ROCm installation"; \ + fi + +COPY ./ /serve + +RUN \ + if echo "$LOCAL_CHANGES" | grep -q "false"; then \ + rm -rf /serve;\ + git clone --recursive $REPO_URL -b $BRANCH_NAME /serve; \ + fi + +COPY --from=compile-image /home/venv /home/venv +ENV PATH="/home/venv/bin:$PATH" + +WORKDIR "/serve" + +RUN \ + if [ "$USE_ROCM_VERSION" ]; then \ + python ts_scripts/install_dependencies.py --environment=dev --rocm $USE_ROCM_VERSION \ + && python -m pip install /opt/rocm/share/amd_smi; \ + # Install the binary with the latest CPU image on a ROCm base image + else \ + python ts_scripts/install_dependencies.py --environment=dev;\ + fi; + +RUN python -m pip install -U pip setuptools \ + && python -m pip install --no-cache-dir -r requirements/developer.txt \ + && python ts_scripts/install_from_src.py --environment=dev \ + && useradd -m model-server \ + && mkdir -p /home/model-server/tmp \ + && cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh \ + && chmod +x /usr/local/bin/dockerd-entrypoint.sh \ + && chown -R model-server /home/model-server \ + && cp docker/config.properties /home/model-server/config.properties \ + && mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store \ + && chown -R model-server /home/venv +EXPOSE 8080 8081 8082 7070 7071 +WORKDIR /home/model-server +ENV TEMP=/home/model-server/tmp +ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] +CMD ["serve"]