-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #17 from maysunfaisal/vllm-0.6.4-1
Upload vllm 0.6.4 model server
- Loading branch information
Showing
29 changed files
with
3,014 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
FROM registry.access.redhat.com/ubi9/python-311 as cuda-runtime | ||
|
||
################################################################################################### | ||
# CUDA 12.1 Layer, from https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.1.1 # | ||
################################################################################################### | ||
|
||
# Base | ||
USER 0 | ||
|
||
ENV NVARCH x86_64 | ||
ENV NVIDIA_REQUIRE_CUDA "cuda>=12.1 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526" | ||
ENV NV_CUDA_CUDART_VERSION 12.1.105-1 | ||
|
||
COPY cuda.repo-x86_64 /etc/yum.repos.d/cuda.repo | ||
|
||
RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \ | ||
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel9/${NVARCH}/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \ | ||
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict - | ||
|
||
ENV CUDA_VERSION 12.1.1 | ||
|
||
# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a | ||
RUN yum upgrade -y && yum install -y \ | ||
cuda-cudart-12-1-${NV_CUDA_CUDART_VERSION} \ | ||
cuda-compat-12-1 \ | ||
&& ln -s cuda-12.1 /usr/local/cuda \ | ||
&& yum -y clean all --enablerepo='*' && \ | ||
rm -rf /var/cache/dnf && \ | ||
find /var/log -type f -name "*.log" -exec rm -f {} \; | ||
|
||
# nvidia-docker 1.0 | ||
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \ | ||
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf | ||
|
||
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} | ||
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 | ||
|
||
COPY NGC-DL-CONTAINER-LICENSE / | ||
|
||
# nvidia-container-runtime | ||
ENV NVIDIA_VISIBLE_DEVICES all | ||
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility | ||
|
||
# Runtime | ||
ENV NV_CUDA_LIB_VERSION 12.1.1-1 | ||
|
||
ENV NV_NVTX_VERSION 12.1.105-1 | ||
ENV NV_LIBNPP_VERSION 12.1.0.40-1 | ||
ENV NV_LIBNPP_PACKAGE libnpp-12-1-${NV_LIBNPP_VERSION} | ||
ENV NV_LIBCUBLAS_VERSION 12.1.3.1-1 | ||
ENV NV_LIBNCCL_PACKAGE_NAME libnccl | ||
ENV NV_LIBNCCL_PACKAGE_VERSION 2.17.1-1 | ||
ENV NV_LIBNCCL_VERSION 2.17.1 | ||
ENV NCCL_VERSION 2.17.1 | ||
ENV NV_LIBNCCL_PACKAGE ${NV_LIBNCCL_PACKAGE_NAME}-${NV_LIBNCCL_PACKAGE_VERSION}+cuda12.1 | ||
|
||
RUN yum install -y \ | ||
cuda-libraries-12-1-${NV_CUDA_LIB_VERSION} \ | ||
cuda-nvtx-12-1-${NV_NVTX_VERSION} \ | ||
${NV_LIBNPP_PACKAGE} \ | ||
libcublas-12-1-${NV_LIBCUBLAS_VERSION} \ | ||
${NV_LIBNCCL_PACKAGE} \ | ||
&& yum clean all \ | ||
&& rm -rf /var/cache/yum/* | ||
|
||
# Set this flag so that libraries can find the location of CUDA | ||
ENV XLA_FLAGS=--xla_gpu_cuda_data_dir=/usr/local/cuda | ||
|
||
# CUDA Devel image | ||
FROM cuda-runtime as cuda-devel | ||
ENV NVIDIA_VISIBLE_DEVICES all | ||
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility | ||
|
||
ENV NV_CUDA_LIB_VERSION 12.1.1-1 | ||
ENV NV_NVPROF_VERSION 12.1.105-1 | ||
ENV NV_NVPROF_DEV_PACKAGE cuda-nvprof-12-1-${NV_NVPROF_VERSION} | ||
ENV NV_CUDA_CUDART_DEV_VERSION 12.1.105-1 | ||
ENV NV_NVML_DEV_VERSION 12.1.105-1 | ||
ENV NV_LIBCUBLAS_DEV_VERSION 12.1.3.1-1 | ||
ENV NV_LIBNPP_DEV_VERSION 12.1.0.40-1 | ||
ENV NV_LIBNPP_DEV_PACKAGE libnpp-devel-12-1-${NV_LIBNPP_DEV_VERSION} | ||
ENV NV_LIBNCCL_DEV_PACKAGE_NAME libnccl-devel | ||
ENV NV_LIBNCCL_DEV_PACKAGE_VERSION 2.17.1-1 | ||
ENV NCCL_VERSION 2.17.1 | ||
ENV NV_LIBNCCL_DEV_PACKAGE ${NV_LIBNCCL_DEV_PACKAGE_NAME}-${NV_LIBNCCL_DEV_PACKAGE_VERSION}+cuda12.1 | ||
ENV NV_CUDA_NSIGHT_COMPUTE_VERSION 12.1.1-1 | ||
ENV NV_CUDA_NSIGHT_COMPUTE_DEV_PACKAGE cuda-nsight-compute-12-1-${NV_CUDA_NSIGHT_COMPUTE_VERSION} | ||
|
||
|
||
RUN yum install -y \ | ||
make \ | ||
findutils \ | ||
cuda-command-line-tools-12-1-${NV_CUDA_LIB_VERSION} \ | ||
cuda-libraries-devel-12-1-${NV_CUDA_LIB_VERSION} \ | ||
cuda-minimal-build-12-1-${NV_CUDA_LIB_VERSION} \ | ||
cuda-cudart-devel-12-1-${NV_CUDA_CUDART_DEV_VERSION} \ | ||
${NV_NVPROF_DEV_PACKAGE} \ | ||
cuda-nvml-devel-12-1-${NV_NVML_DEV_VERSION} \ | ||
libcublas-devel-12-1-${NV_LIBCUBLAS_DEV_VERSION} \ | ||
${NV_LIBNPP_DEV_PACKAGE} \ | ||
${NV_LIBNCCL_DEV_PACKAGE} \ | ||
${NV_CUDA_NSIGHT_COMPUTE_DEV_PACKAGE} \ | ||
&& yum clean all \ | ||
&& rm -rf /var/cache/yum/* | ||
|
||
ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs | ||
|
||
############################################# | ||
# End of CUDA 12.1 Layer # | ||
############################################# | ||
|
||
################################### | ||
# vLLM install in build container # | ||
################################### | ||
|
||
FROM cuda-devel as vllm-install | ||
|
||
WORKDIR /opt/app-root/src | ||
|
||
USER 1001 | ||
|
||
COPY --chown=1001:0 requirements.txt ./ | ||
|
||
RUN pip install --no-cache-dir -r requirements.txt && \ | ||
rm -f requirements.txt && \ | ||
# Install flash-attn from PyPI \ | ||
pip install flash-attn==2.5.8 --no-build-isolation && \ | ||
# Correction for FIPS mode \ | ||
sed -i s/md5/sha1/g /opt/app-root/lib64/python3.11/site-packages/triton/runtime/jit.py && \ | ||
# Fix permissions to support pip in Openshift environments \ | ||
chmod -R g+w /opt/app-root/lib/python3.11/site-packages && \ | ||
fix-permissions /opt/app-root -P | ||
|
||
################## | ||
# vLLM container # | ||
################## | ||
|
||
FROM cuda-runtime as vllm-container | ||
|
||
WORKDIR /opt/app-root/src | ||
|
||
COPY --from=vllm-install --chown=1001:0 /opt/app-root/lib64/python3.11/site-packages /opt/app-root/lib64/python3.11/site-packages | ||
COPY --from=vllm-install --chown=1001:0 /opt/app-root/src/.config/vllm/nccl/cu12/libnccl.so.2* /usr/local/lib/libnccl.so.2 | ||
|
||
# Fix VLLM_NCCL_SO_PATH | ||
ENV VLLM_NCCL_SO_PATH=/usr/local/lib/libnccl.so.2 | ||
|
||
USER 1001 | ||
|
||
EXPOSE 8000 | ||
|
||
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] | ||
|
Oops, something went wrong.