diff --git a/docker/Dockerfile b/docker/Dockerfile index 330f89c4e..8f5eb7cf1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,52 +1,66 @@ FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 -ENV HOME=/app/aphrodite-engine +# Adjust UID/GID when mounting volumes need to match the host UID/GID +ARG UID=1000 +ARG GID=1000 -WORKDIR $HOME +ARG APHRODITE_BRANCH=main + +# Setting MAX_JOBS allows build server to limit ninja build jobs. For reference +# see https://github.com/PygmalionAI/aphrodite-engine/wiki/1.-Installation#build-from-source +ARG MAX_JOBS + +# Setting TORCH_CUDA_ARCH_LIST specifies the CUDA architectures to compile for. +# a list of possible values can be found at: +# https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-feature-list +ARG TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" + +ARG FLASH_ATTN_VERSION="2.5.8" # Upgrade OS Packages + Prepare Python Environment RUN set -eux; \ - export DEBIAN_FRONTEND=noninteractive \ - && apt-get update \ - && apt-get upgrade -y \ - && apt-get install -y bzip2 g++ git make python3-pip tzdata \ - && rm -fr /var/lib/apt/lists/* - -# Alias python3 to python -RUN ln -s /usr/bin/python3 /usr/bin/python + export DEBIAN_FRONTEND=noninteractive; \ + apt-get update && \ + apt-get upgrade -y && \ + apt-get install -y \ + bzip2 \ + g++ \ + git \ + make \ + python3-pip \ + tzdata && \ + python3 -m pip install --no-cache-dir --upgrade pip && \ + ln -s /usr/bin/python3 /usr/bin/python && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -RUN python3 -m pip install --no-cache-dir --upgrade pip +# Add image service account +ENV USER=aphrodite-engine +ENV APP_HOME=/app +RUN groupadd -g ${GID} ${USER} && \ + useradd -u ${UID} -g ${GID} -d /home/${USER} -m -s /bin/bash ${USER} -RUN git clone https://github.com/PygmalionAI/aphrodite-engine.git /tmp/aphrodite-engine \ - && mv /tmp/aphrodite-engine/* . \ - && rm -fr /tmp/aphrodite-engine \ - && chmod +x docker/entrypoint.sh +# Install aphrodite-engine, creates APP_HOME +RUN git clone -b ${APHRODITE_BRANCH} https://github.com/PygmalionAI/aphrodite-engine.git ${APP_HOME} && \ + chmod +x ${APP_HOME}/docker/entrypoint.sh -# Allow build servers to limit ninja build jobs. For reference -# see https://github.com/PygmalionAI/aphrodite-engine/wiki/1.-Installation#build-from-source -ARG MAX_JOBS +# Install aphrodite-engine dependencies ENV MAX_JOBS=${MAX_JOBS} - -# Export the CUDA_HOME variable correctly ENV CUDA_HOME=/usr/local/cuda - ENV HF_HOME=/tmp ENV NUMBA_CACHE_DIR=$HF_HOME/numba_cache -ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX" -RUN python3 -m pip install --no-cache-dir -e . +ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} +RUN python3 -m pip install --no-cache-dir -e ${APP_HOME} -# Workaround to properly install flash-attn. For reference -# see: https://github.com/Dao-AILab/flash-attention/issues/453 -RUN python3 -m pip install 'flash-attn>=2.5.8' --no-build-isolation -# Entrypoint exec form doesn't do variable substitution automatically ($HOME) -ENTRYPOINT ["/app/aphrodite-engine/docker/entrypoint.sh"] +# Workaround to properly install flash-attn. This needs to be executed after installing aphrodite-engine +# build dependencies. For reference see: https://github.com/Dao-AILab/flash-attention/issues/453 +RUN python3 -m pip install "flash-attn>=${FLASH_ATTN_VERSION}" --no-build-isolation --no-cache-dir EXPOSE 7860 -# Service UID needs write access to $HOME to create temporary folders, see #458 -RUN chown 1000:1000 ${HOME} - -USER 1000:0 - VOLUME ["/tmp"] + +USER ${USER} +WORKDIR ${APP_HOME} +ENTRYPOINT ["/bin/sh", "-c", "${APP_HOME}/docker/entrypoint.sh"] diff --git a/requirements-common.txt b/requirements-common.txt index 6132cab1d..1215b61b7 100644 --- a/requirements-common.txt +++ b/requirements-common.txt @@ -13,7 +13,6 @@ fastapi colorlog einops # for phi prometheus_client # for prometheus metrics -triton >= 2.2.0 lark == 1.1.8 # for grammars scipy # for quip rich