Skip to content

Commit

Permalink
Add Dockerfile.rocm
Browse files Browse the repository at this point in the history
  • Loading branch information
jakki-amd committed Nov 28, 2024
1 parent d330494 commit cbdfe25
Show file tree
Hide file tree
Showing 2 changed files with 327 additions and 6 deletions.
12 changes: 6 additions & 6 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ARG BRANCH_NAME
ARG REPO_URL=https://github.com/pytorch/serve.git
ENV PYTHONUNBUFFERED TRUE

RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
apt-get update && \
apt-get upgrade -y && \
apt-get install software-properties-common -y && \
Expand Down Expand Up @@ -112,12 +112,12 @@ FROM ${BASE_IMAGE} AS production-image
ARG PYTHON_VERSION
ENV PYTHONUNBUFFERED TRUE

RUN --mount=type=cache,target=/var/cache/apt \
RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
apt-get update && \
apt-get upgrade -y && \
apt-get install software-properties-common -y && \
add-apt-repository ppa:deadsnakes/ppa -y && \
apt remove python-pip python3-pip && \
apt remove -y python-pip python3-pip && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
python$PYTHON_VERSION \
python3-distutils \
Expand Down Expand Up @@ -158,12 +158,12 @@ ARG PYTHON_VERSION
ARG BRANCH_NAME
ENV PYTHONUNBUFFERED TRUE

RUN --mount=type=cache,target=/var/cache/apt \
RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
apt-get update && \
apt-get upgrade -y && \
apt-get install software-properties-common -y && \
add-apt-repository -y ppa:deadsnakes/ppa && \
apt remove python-pip python3-pip && \
apt remove -y python-pip python3-pip && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
python$PYTHON_VERSION \
python3-distutils \
Expand Down Expand Up @@ -207,7 +207,7 @@ ARG BUILD_WITH_IPEX
ARG IPEX_VERSION=1.11.0
ARG IPEX_URL=https://software.intel.com/ipex-whl-stable
ENV PYTHONUNBUFFERED TRUE
RUN --mount=type=cache,target=/var/cache/apt \
RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
apt-get update && \
apt-get upgrade -y && \
apt-get install software-properties-common -y && \
Expand Down
321 changes: 321 additions & 0 deletions docker/Dockerfile.rocm
Original file line number Diff line number Diff line change
@@ -0,0 +1,321 @@
# syntax = docker/dockerfile:experimental
#
# This file can build images for cpu and gpu env. By default it builds image for CPU.
# Use following option to build image for cuda/GPU: --build-arg BASE_IMAGE=nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
# Here is complete command for GPU/cuda -
# $ DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE=nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 -t torchserve:latest .
#
# Following comments have been shamelessly copied from https://github.com/pytorch/pytorch/blob/master/Dockerfile
#
# NOTE: To build this you will need a docker version > 18.06 with
# experimental enabled and DOCKER_BUILDKIT=1
#
# If you do not use buildkit you are not going to have a good time
#
# For reference:
# https://docs.docker.com/develop/develop-images/build_enhancements/

ARG BASE_IMAGE=ubuntu:24.04
ARG BRANCH_NAME=master
# Note:
# Define here the default python version to be used in all later build-stages as default.
# ARG and ENV variables do not persist across stages (they're build-stage scoped).
# That is crucial for ARG PYTHON_VERSION, which otherwise becomes "" leading to nasty bugs,
# that don't let the build fail, but break current version handling logic and result
# in images with wrong python version. To fix that, we will restate the ARG PYTHON_VERSION
# on each build-stage.
ARG PYTHON_VERSION=3.11

FROM ${BASE_IMAGE} AS compile-image
ARG BASE_IMAGE=ubuntu:24.04
ARG PYTHON_VERSION
ARG BUILD_NIGHTLY
ARG BUILD_FROM_SRC
ARG LOCAL_CHANGES
ARG BRANCH_NAME
ARG REPO_URL=https://github.com/pytorch/serve.git
ENV PYTHONUNBUFFERED TRUE

RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
apt-get update && \
apt-get upgrade -y && \
apt-get install software-properties-common -y && \
add-apt-repository -y ppa:deadsnakes/ppa && \
apt remove -y python-pip python3-pip && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
ca-certificates \
g++ \
python3-setuptools \
python$PYTHON_VERSION \
python$PYTHON_VERSION-dev \
python$PYTHON_VERSION-venv \
openjdk-17-jdk \
curl \
git \
&& rm -rf /var/lib/apt/lists/*

# Make the virtual environment and "activating" it by adding it first to the path.
# From here on the python$PYTHON_VERSION interpreter is used and the packages
# are installed in /home/venv which is what we need for the "runtime-image"
RUN python$PYTHON_VERSION -m venv /home/venv
ENV PATH="/home/venv/bin:$PATH"

ARG USE_ROCM_VERSION=""

RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
if [ -n "$USE_ROCM_VERSION" ]; then \
apt-get update \
&& curl -O https://repo.radeon.com/amdgpu-install/6.2.2/ubuntu/noble/amdgpu-install_6.2.60202-1_all.deb \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y ./amdgpu-install_6.2.60202-1_all.deb \
&& apt-get update \
&& apt-get install --no-install-recommends -y amdgpu-dkms rocm; \
else \
echo "Skip ROCm installation"; \
fi

COPY ./ serve

RUN \
if echo "$LOCAL_CHANGES" | grep -q "false"; then \
rm -rf /serve;\
git clone --recursive $REPO_URL -b $BRANCH_NAME /serve; \
fi


WORKDIR "/serve"

RUN cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh

RUN \
# Install ROCm version specific binary when ROCm version is specified as a build arg
if [ "$USE_ROCM_VERSION" ]; then \
python ./ts_scripts/install_dependencies.py --rocm $USE_ROCM_VERSION \
&& python -m pip install /opt/rocm/share/amd_smi; \
# Install the binary with the latest CPU image on a ROCm base image
else \
python ./ts_scripts/install_dependencies.py;\
fi;

# Make sure latest version of torchserve is uploaded before running this
RUN \
if echo "$BUILD_FROM_SRC" | grep -q "true"; then \
python -m pip install -r requirements/developer.txt \
&& python ts_scripts/install_from_src.py;\
elif echo "$BUILD_NIGHTLY" | grep -q "false"; then \
python -m pip install --no-cache-dir torchserve torch-model-archiver torch-workflow-archiver;\
else \
python -m pip install --no-cache-dir torchserve-nightly torch-model-archiver-nightly torch-workflow-archiver-nightly;\
fi

# Final image for production
FROM ${BASE_IMAGE} AS production-image
# Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top)
ARG PYTHON_VERSION
ARG USE_ROCM_VERSION
ENV PYTHONUNBUFFERED TRUE

RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
apt-get update && \
apt-get upgrade -y && \
apt-get install software-properties-common -y && \
add-apt-repository ppa:deadsnakes/ppa -y && \
apt remove -y python-pip python3-pip && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
python$PYTHON_VERSION \
python3-setuptools \
python$PYTHON_VERSION-dev \
python$PYTHON_VERSION-venv \
# using openjdk-17-jdk due to circular dependency(ca-certificates) bug in openjdk-17-jre-headless debian package
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1009905
openjdk-17-jdk \
build-essential \
&& rm -rf /var/lib/apt/lists/* \
&& cd /tmp

RUN --mount=type=bind,sharing=locked,from=compile-image,target=/mnt \
if [ "$USE_ROCM_VERSION" ]; then \
apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y ./mnt/amdgpu-install_6.2.60202-1_all.deb \
&& apt-get update \
&& apt-get install --no-install-recommends -y amdgpu-dkms rocm; \
else \
echo "Skip ROCm installation"; \
fi

RUN useradd -m model-server \
&& mkdir -p /home/model-server/tmp

COPY --chown=model-server --from=compile-image /home/venv /home/venv
COPY --from=compile-image /usr/local/bin/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
ENV PATH="/home/venv/bin:$PATH"

COPY --from=compile-image /opt/rocm/share/amd_smi /opt/rocm/share/amd_smi

RUN \
if [ "$USE_ROCM_VERSION" ]; then \
python -m pip install /opt/rocm/share/amd_smi; \
else \
echo "Skip ROCm installation"; \
fi

RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh \
&& chown -R model-server /home/model-server

COPY docker/config.properties /home/model-server/config.properties
RUN mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store

EXPOSE 8080 8081 8082 7070 7071

USER model-server
WORKDIR /home/model-server
ENV TEMP=/home/model-server/tmp
ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
CMD ["serve"]

# Final image for docker regression
FROM ${BASE_IMAGE} AS ci-image
# Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top)
ARG PYTHON_VERSION
ARG BRANCH_NAME
ENV PYTHONUNBUFFERED TRUE

RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
apt-get update && \
apt-get upgrade -y && \
apt-get install software-properties-common -y && \
add-apt-repository -y ppa:deadsnakes/ppa && \
apt remove -y python-pip python3-pip && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
python$PYTHON_VERSION \
python3-setuptools \
python$PYTHON_VERSION-dev \
python$PYTHON_VERSION-venv \
# using openjdk-17-jdk due to circular dependency(ca-certificates) bug in openjdk-17-jre-headless debian package
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1009905
openjdk-17-jdk \
build-essential \
wget \
numactl \
nodejs \
npm \
zip \
unzip \
&& npm install -g [email protected] newman-reporter-htmlextra markdown-link-check \
&& rm -rf /var/lib/apt/lists/* \
&& cd /tmp

RUN --mount=type=bind,sharing=locked,from=compile-image,target=/mnt \
if [ "$USE_ROCM_VERSION" ]; then \
apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y ./mnt/amdgpu-install_6.2.60202-1_all.deb \
&& apt-get update \
&& apt-get install --no-install-recommends -y amdgpu-dkms rocm; \
else \
echo "Skip ROCm installation"; \
fi

COPY --from=compile-image /home/venv /home/venv
ENV PATH="/home/venv/bin:$PATH"

RUN python -m pip install --no-cache-dir -r https://raw.githubusercontent.com/pytorch/serve/$BRANCH_NAME/requirements/developer.txt

COPY --from=compile-image /opt/rocm/share/amd_smi /opt/rocm/share/amd_smi

RUN \
if [ "$USE_ROCM_VERSION" ]; then \
python -m pip install /opt/rocm/share/amd_smi; \
else \
echo "Skip ROCm installation"; \
fi

RUN mkdir /serve
ENV TS_RUN_IN_DOCKER True

WORKDIR /serve
CMD ["python", "test/regression_tests.py"]

#Final image for developer Docker image
FROM ${BASE_IMAGE} as dev-image
# Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top)
ARG PYTHON_VERSION
ARG BRANCH_NAME
ARG USE_ROCM_VERSION
ARG BUILD_FROM_SRC
ARG LOCAL_CHANGES
ARG BUILD_WITH_IPEX
ARG IPEX_VERSION=1.11.0
ARG IPEX_URL=https://software.intel.com/ipex-whl-stable
ENV PYTHONUNBUFFERED TRUE
RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
apt-get update && \
apt-get upgrade -y && \
apt-get install software-properties-common -y && \
add-apt-repository -y ppa:deadsnakes/ppa && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
fakeroot \
ca-certificates \
dpkg-dev \
sudo \
g++ \
git \
python$PYTHON_VERSION \
python$PYTHON_VERSION-dev \
python3-setuptools \
python$PYTHON_VERSION-venv \
# using openjdk-17-jdk due to circular dependency(ca-certificates) bug in openjdk-17-jre-headless debian package
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1009905
openjdk-17-jdk \
build-essential \
curl \
vim \
numactl \
&& rm -rf /var/lib/apt/lists/*

RUN --mount=type=bind,sharing=locked,from=compile-image,target=/mnt \
if [ "$USE_ROCM_VERSION" ]; then \
apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y ./mnt/amdgpu-install_6.2.60202-1_all.deb \
&& apt-get update \
&& apt-get install --no-install-recommends -y amdgpu-dkms rocm; \
else \
echo "Skip ROCm installation"; \
fi

COPY ./ /serve

RUN \
if echo "$LOCAL_CHANGES" | grep -q "false"; then \
rm -rf /serve;\
git clone --recursive $REPO_URL -b $BRANCH_NAME /serve; \
fi

COPY --from=compile-image /home/venv /home/venv
ENV PATH="/home/venv/bin:$PATH"

WORKDIR "/serve"

RUN \
if [ "$USE_ROCM_VERSION" ]; then \
python ts_scripts/install_dependencies.py --environment=dev --rocm $USE_ROCM_VERSION \
&& python -m pip install /opt/rocm/share/amd_smi; \
# Install the binary with the latest CPU image on a ROCm base image
else \
python ts_scripts/install_dependencies.py --environment=dev;\
fi;

RUN python -m pip install -U pip setuptools \
&& python -m pip install --no-cache-dir -r requirements/developer.txt \
&& python ts_scripts/install_from_src.py --environment=dev \
&& useradd -m model-server \
&& mkdir -p /home/model-server/tmp \
&& cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh \
&& chmod +x /usr/local/bin/dockerd-entrypoint.sh \
&& chown -R model-server /home/model-server \
&& cp docker/config.properties /home/model-server/config.properties \
&& mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store \
&& chown -R model-server /home/venv
EXPOSE 8080 8081 8082 7070 7071
WORKDIR /home/model-server
ENV TEMP=/home/model-server/tmp
ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
CMD ["serve"]

0 comments on commit cbdfe25

Please sign in to comment.