Skip to content

Commit

Permalink
Add Dockerfile and build_image.sh modification
Browse files Browse the repository at this point in the history
  • Loading branch information
jakki-amd committed Dec 20, 2024
1 parent 9bcbd22 commit 251a6df
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 37 deletions.
73 changes: 64 additions & 9 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ ARG BRANCH_NAME
ARG REPO_URL=https://github.com/pytorch/serve.git
ENV PYTHONUNBUFFERED TRUE

RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
apt-get update && \
apt-get upgrade -y && \
apt-get install software-properties-common -y && \
add-apt-repository -y ppa:deadsnakes/ppa && \
apt remove python-pip python3-pip && \
apt remove -y python-pip python3-pip && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
ca-certificates \
g++ \
Expand All @@ -55,6 +55,13 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
git \
&& rm -rf /var/lib/apt/lists/*

RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
if [ "$USE_ROCM_VERSION" ]; then \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y rocm-dev amd-smi-lib \
&& rm -rf /var/lib/apt/lists/* ; \
fi

# Make the virtual environment and "activating" it by adding it first to the path.
# From here on the python$PYTHON_VERSION interpreter is used and the packages
# are installed in /home/venv which is what we need for the "runtime-image"
Expand All @@ -67,6 +74,7 @@ RUN python -m pip install -U pip setuptools
RUN export USE_CUDA=1

ARG USE_CUDA_VERSION=""
ARG USE_ROCM_VERSION=""

COPY ./ serve

Expand All @@ -76,7 +84,6 @@ RUN \
git clone --recursive $REPO_URL -b $BRANCH_NAME serve; \
fi


WORKDIR "serve"

RUN cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
Expand All @@ -90,6 +97,14 @@ RUN \
else \
python ./ts_scripts/install_dependencies.py;\
fi; \
elif echo "${BASE_IMAGE}" | grep -q "rocm/"; then \
# Install ROCm version specific binary when ROCm version is specified as a build arg
if [ "$USE_ROCM_VERSION" ]; then \
python ./ts_scripts/install_dependencies.py --rocm $USE_ROCM_VERSION;\
# Install the binary with the latest CPU image on a ROCm base image
else \
python ./ts_scripts/install_dependencies.py; \
fi; \
# Install the CPU binary
else \
python ./ts_scripts/install_dependencies.py; \
Expand All @@ -111,13 +126,14 @@ FROM ${BASE_IMAGE} AS production-image
# Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top)
ARG PYTHON_VERSION
ENV PYTHONUNBUFFERED TRUE
ARG USE_ROCM_VERSION

RUN --mount=type=cache,target=/var/cache/apt \
RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
apt-get update && \
apt-get upgrade -y && \
apt-get install software-properties-common -y && \
add-apt-repository ppa:deadsnakes/ppa -y && \
apt remove python-pip python3-pip && \
apt remove -y python-pip python3-pip && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
python$PYTHON_VERSION \
python3-distutils \
Expand All @@ -130,13 +146,25 @@ RUN --mount=type=cache,target=/var/cache/apt \
&& rm -rf /var/lib/apt/lists/* \
&& cd /tmp

RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
if [ "$USE_ROCM_VERSION" ]; then \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y rocm-dev amd-smi-lib \
&& rm -rf /var/lib/apt/lists/* ; \
fi

RUN useradd -m model-server \
&& mkdir -p /home/model-server/tmp

COPY --chown=model-server --from=compile-image /home/venv /home/venv
COPY --from=compile-image /usr/local/bin/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
ENV PATH="/home/venv/bin:$PATH"

RUN \
if [ "$USE_ROCM_VERSION" ]; then \
python -m pip install /opt/rocm/share/amd_smi; \
fi

RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh \
&& chown -R model-server /home/model-server

Expand All @@ -157,13 +185,14 @@ FROM ${BASE_IMAGE} AS ci-image
ARG PYTHON_VERSION
ARG BRANCH_NAME
ENV PYTHONUNBUFFERED TRUE
ARG USE_ROCM_VERSION

RUN --mount=type=cache,target=/var/cache/apt \
RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
apt-get update && \
apt-get upgrade -y && \
apt-get install software-properties-common -y && \
add-apt-repository -y ppa:deadsnakes/ppa && \
apt remove python-pip python3-pip && \
apt remove -y python-pip python3-pip && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
python$PYTHON_VERSION \
python3-distutils \
Expand All @@ -183,13 +212,24 @@ RUN --mount=type=cache,target=/var/cache/apt \
&& rm -rf /var/lib/apt/lists/* \
&& cd /tmp

RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
if [ "$USE_ROCM_VERSION" ]; then \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y rocm-dev amd-smi-lib \
&& rm -rf /var/lib/apt/lists/* ; \
fi

COPY --from=compile-image /home/venv /home/venv

ENV PATH="/home/venv/bin:$PATH"

RUN python -m pip install --no-cache-dir -r https://raw.githubusercontent.com/pytorch/serve/$BRANCH_NAME/requirements/developer.txt

RUN \
if [ "$USE_ROCM_VERSION" ]; then \
python -m pip install /opt/rocm/share/amd_smi; \
fi

RUN mkdir /home/serve
ENV TS_RUN_IN_DOCKER True

Expand All @@ -203,11 +243,13 @@ ARG PYTHON_VERSION
ARG BRANCH_NAME
ARG BUILD_FROM_SRC
ARG LOCAL_CHANGES
ARG USE_ROCM_VERSION
ARG BUILD_WITH_IPEX
ARG IPEX_VERSION=1.11.0
ARG IPEX_URL=https://software.intel.com/ipex-whl-stable
ENV PYTHONUNBUFFERED TRUE
RUN --mount=type=cache,target=/var/cache/apt \

RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
apt-get update && \
apt-get upgrade -y && \
apt-get install software-properties-common -y && \
Expand All @@ -227,9 +269,15 @@ RUN --mount=type=cache,target=/var/cache/apt \
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1009905
openjdk-17-jdk \
build-essential \
wget \
curl \
vim \
numactl \
nodejs \
npm \
zip \
unzip \
&& npm install -g [email protected] newman-reporter-htmlextra markdown-link-check \
&& if [ "$BUILD_WITH_IPEX" = "true" ]; then apt-get update && apt-get install -y libjemalloc-dev libgoogle-perftools-dev libomp-dev && ln -s /usr/lib/x86_64-linux-gnu/libjemalloc.so /usr/lib/libjemalloc.so && ln -s /usr/lib/x86_64-linux-gnu/libtcmalloc.so /usr/lib/libtcmalloc.so && ln -s /usr/lib/x86_64-linux-gnu/libiomp5.so /usr/lib/libiomp5.so; fi \
&& rm -rf /var/lib/apt/lists/*

Expand All @@ -243,10 +291,17 @@ RUN \

COPY --from=compile-image /home/venv /home/venv
ENV PATH="/home/venv/bin:$PATH"

RUN \
if [ "$USE_ROCM_VERSION" ]; then \
python -m pip install /opt/rocm/share/amd_smi; \
fi

WORKDIR "serve"

RUN python -m pip install -U pip setuptools \
&& python -m pip install --no-cache-dir -r requirements/developer.txt \
&& python ts_scripts/install_from_src.py \
&& python ts_scripts/install_from_src.py --environment=dev\
&& useradd -m model-server \
&& mkdir -p /home/model-server/tmp \
&& cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh \
Expand Down
30 changes: 27 additions & 3 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ Use `build_image.sh` script to build the docker images. The script builds the `p
|-bt, --buildtype|Which type of docker image to build. Can be one of : production, dev, ci|
|-t, --tag|Tag name for image. If not specified, script uses torchserve default tag names.|
|-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118`. `cu121`, Default `cu121`|
|-rv, --rocmversion| Specify to rocm version to use. Supported values `rocm60`, `rocm61`, `rocm62` |
|-ipex, --build-with-ipex| Specify to build with intel_extension_for_pytorch. If not specified, script builds without intel_extension_for_pytorch.|
|-cpp, --build-cpp specify to build TorchServe CPP|
|-n, --nightly| Specify to build with TorchServe nightly.|
Expand All @@ -62,9 +63,9 @@ Creates a docker image with publicly available `torchserve` and `torch-model-arc
./build_image.sh
```

- To create a GPU based image with cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118`
- To create a GPU based image with cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118` for CUDA and `rocm60`, `rocm61`, `rocm62` for ROCm.

- GPU images are built with NVIDIA CUDA base image. If you want to use ONNX, please specify the base image as shown in the next section.
- GPU images are built with either NVIDIA CUDA base image amd AMD ROCm base image. If you want to use ONNX, please specify the base image as shown in the next section.

```bash
./build_image.sh -g -cv cu117
Expand Down Expand Up @@ -132,6 +133,24 @@ Creates a docker image with `torchserve` and `torch-model-archiver` installed fr
./build_image.sh -bt dev -g -cv cu92
```

- For creating GPU based image with rocm version 6.0:

```bash
./build_image.sh -bt dev -g -rv rocm60
```

- For creating GPU based image with rocm version 6.1:

```bash
./build_image.sh -bt dev -g -rv rocm61
```

- For creating GPU based image with rocm version 6.2:

```bash
./build_image.sh -bt dev -g -rv rocm62
```

- For creating GPU based image with a different branch:

```bash
Expand Down Expand Up @@ -164,7 +183,6 @@ Creates a docker image with `torchserve` and `torch-model-archiver` installed fr
./build_image.sh -bt dev -g [-cv cu121|cu118] -cpp
```

- For ROCm support (*experimental*), refer to [this documentation](../docs/hardware_support/amd_support.md).

## Start a container with a TorchServe image

Expand Down Expand Up @@ -204,6 +222,12 @@ For GPU latest image with gpu devices 1 and 2:
docker run --rm -it --gpus '"device=1,2"' -p 127.0.0.1:8080:8080 -p 127.0.0.1:8081:8081 -p 127.0.0.1:8082:8082 -p 127.0.0.1:7070:7070 -p 127.0.0.1:7071:7071 pytorch/torchserve:latest-gpu
```

For GPU with ROCm support with gpu devices 1 and 2:

```bash
docker run --rm -it --device=/dev/kfd --device=/dev/dri -e HIP_VISIBLE_DEVICES=1,2 -p 127.0.0.1:8080:8080 -p 127.0.0.1:8081:8081 -p 127.0.0.1:8082:8082 -p 127.0.0.1:7070:7070 -p 127.0.0.1:7071:7071 pytorch/torchserve:latest-gpu
```

For specific versions you can pass in the specific tag to use (ex: `0.1.1-cuda10.1-cudnn7-runtime`):

```bash
Expand Down
43 changes: 40 additions & 3 deletions docker/build_image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ BASE_IMAGE="ubuntu:20.04"
UPDATE_BASE_IMAGE=false
USE_CUSTOM_TAG=false
CUDA_VERSION=""
ROCM_VERSION=""
USE_LOCAL_SERVE_FOLDER=false
BUILD_WITH_IPEX=false
BUILD_CPP=false
Expand All @@ -33,6 +34,7 @@ do
echo "-bi, --baseimage specify base docker image. Example: nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04 "
echo "-bt, --buildtype specify for type of created image. Possible values: production, dev, ci."
echo "-cv, --cudaversion specify to cuda version to use"
echo "-rv, --rocmversion spesify to rocm version to use"
echo "-t, --tag specify tag name for docker image"
echo "-lf, --use-local-serve-folder specify this option for the benchmark image if the current 'serve' folder should be used during automated benchmarks"
echo "-ipex, --build-with-ipex specify to build with intel_extension_for_pytorch"
Expand Down Expand Up @@ -167,6 +169,24 @@ do
shift
shift
;;
-rv|--rocmversion)
ROCM_VERSION="$2"
if [ "${ROCM_VERSION}" == "rocm60" ];
then
BASE_IMAGE="rocm/dev-ubuntu-22.04:6.0.2"
elif [ "${ROCM_VERSION}" == "rocm61" ];
then
BASE_IMAGE="rocm/dev-ubuntu-22.04:6.1.2"
elif [ "${ROCM_VERSION}" == "rocm62" ];
then
BASE_IMAGE="rocm/dev-ubuntu-22.04:6.2.4"
else
echo "ROCM version not supported"
exit 1
fi
shift
shift
;;
esac
done

Expand Down Expand Up @@ -218,6 +238,23 @@ then
exit 1
fi
fi

if [[ "${MACHINE}" == "gpu" || "${ROCM_VERSION}" != "" ]];
then
if [ "${ROCM_VERSION}" == "rocm60" ];
then
BASE_IMAGE="rocm/dev-ubuntu-22.04:6.0.2-complete"
elif [ "${ROCM_VERSION}" == "rocm61" ];
then
BASE_IMAGE="rocm/dev-ubuntu-22.04:6.1.2-complete"
elif [ "${ROCM_VERSION}" == "rocm62" ];
then
BASE_IMAGE="rocm/dev-ubuntu-22.04:6.2.4-complete"
else
echo "ROCm version $ROCM_VERSION is not supported for CPP"
exit 1
fi
fi
fi

if [ "${BUILD_TYPE}" == "production" ]; then
Expand All @@ -232,16 +269,16 @@ if [ "${BUILD_TYPE}" == "production" ]; then
fi
elif [ "${BUILD_TYPE}" == "ci" ];
then
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg USE_ROCM_VERSION="${ROCM_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
--build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}"\
--build-arg LOCAL_CHANGES="${LOCAL_CHANGES}" -t "${DOCKER_TAG}" --target ci-image ../
else
if [ "${BUILD_CPP}" == "true" ]
then
DOCKER_BUILDKIT=1 docker build --file Dockerfile.cpp --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
DOCKER_BUILDKIT=1 docker build --file Dockerfile.cpp --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg USE_ROCM_VERSION="${ROCM_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
--build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" -t "${DOCKER_TAG}" --target cpp-dev-image .
else
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg USE_ROCM_VERSION="${ROCM_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
--build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}" --build-arg LOCAL_CHANGES="${LOCAL_CHANGES}"\
--build-arg BUILD_WITH_IPEX="${BUILD_WITH_IPEX}" -t "${DOCKER_TAG}" --target dev-image ../
fi
Expand Down
18 changes: 2 additions & 16 deletions docs/hardware_support/amd_support.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ TorchServe can be run on any combination of operating system and device that is

## Supported Versions of ROCm

The current stable `major.patch` version of ROCm and the previous path version will be supported. For example version `N.2` and `N.1` where `N` is the current major version.
The current stable `major.patch` version of ROCm and the previous patch version will be supported. For example version `N.2` and `N.1` where `N` is the current major version.

## Installation

Expand Down Expand Up @@ -35,7 +35,7 @@ The current stable `major.patch` version of ROCm and the previous path version w
- install the dependencies needed for ROCm support.

```bash
python ./ts_scripts/install_dependencies.py --rocm=rocm61
python ./ts_scripts/install_dependencies.py --rocm=rocm62
python ./ts_scripts/install_from_src.py
```
- enable amd-smi in the python virtual environment
Expand All @@ -60,20 +60,6 @@ If you have 8 accelerators but only want TorchServe to see the last four of them
> ⚠️ Setting both `CUDA_VISIBLE_DEVICES` and `HIP_VISIBLE_DEVICES` may cause unintended behaviour and should be avoided.
> Doing so may cause an exception in the future.

## Docker

**In Development**

`Dockerfile.rocm` provides preliminary ROCm support for TorchServe.

Building and running `dev-image`:

```bash
docker build --file docker/Dockerfile.rocm --target dev-image -t torch-serve-dev-image-rocm --build-arg USE_ROCM_VERSION=rocm62 --build-arg BUILD_FROM_SRC=true .
docker run -it --rm --device=/dev/kfd --device=/dev/dri torch-serve-dev-image-rocm bash
```

## Example Usage

After installing TorchServe with the required dependencies for ROCm you should be ready to serve your model.
Expand Down
5 changes: 2 additions & 3 deletions docs/sphinx/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

# Minimal makefile for Sphinx documentation
#

Expand Down Expand Up @@ -26,6 +26,5 @@ docset: html
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
cp ../../SECURITY.md ../security.md
cp ../../examples/usecases/RAG_based_LLM_serving/README.md ../enhancing_llm_serving_compile_rag.md
cp ../../examples/usecases/llm_diffusion_serving_app/README.md ../llm_diffusion_serving_app.md
cp ../../examples//usecases/RAG_based_LLM_serving/README.md ../enhancing_llm_serving_compile_rag.md
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
Loading

0 comments on commit 251a6df

Please sign in to comment.