Skip to content

Commit

Permalink
Merge pull request #17 from maysunfaisal/vllm-0.6.4-1
Browse files Browse the repository at this point in the history
Upload vllm 0.6.4 model server
  • Loading branch information
maysunfaisal authored Dec 20, 2024
2 parents 3993403 + 67c9be1 commit 0987b6a
Show file tree
Hide file tree
Showing 29 changed files with 3,014 additions and 0 deletions.
153 changes: 153 additions & 0 deletions model-servers/vllm/0.6.4/Containerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
FROM registry.access.redhat.com/ubi9/python-311 as cuda-runtime

###################################################################################################
# CUDA 12.1 Layer, from https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.1.1 #
###################################################################################################

# Base
USER 0

ENV NVARCH x86_64
ENV NVIDIA_REQUIRE_CUDA "cuda>=12.1 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526"
ENV NV_CUDA_CUDART_VERSION 12.1.105-1

COPY cuda.repo-x86_64 /etc/yum.repos.d/cuda.repo

RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel9/${NVARCH}/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -

ENV CUDA_VERSION 12.1.1

# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a
RUN yum upgrade -y && yum install -y \
cuda-cudart-12-1-${NV_CUDA_CUDART_VERSION} \
cuda-compat-12-1 \
&& ln -s cuda-12.1 /usr/local/cuda \
&& yum -y clean all --enablerepo='*' && \
rm -rf /var/cache/dnf && \
find /var/log -type f -name "*.log" -exec rm -f {} \;

# nvidia-docker 1.0
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf

ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64

COPY NGC-DL-CONTAINER-LICENSE /

# nvidia-container-runtime
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility

# Runtime
ENV NV_CUDA_LIB_VERSION 12.1.1-1

ENV NV_NVTX_VERSION 12.1.105-1
ENV NV_LIBNPP_VERSION 12.1.0.40-1
ENV NV_LIBNPP_PACKAGE libnpp-12-1-${NV_LIBNPP_VERSION}
ENV NV_LIBCUBLAS_VERSION 12.1.3.1-1
ENV NV_LIBNCCL_PACKAGE_NAME libnccl
ENV NV_LIBNCCL_PACKAGE_VERSION 2.17.1-1
ENV NV_LIBNCCL_VERSION 2.17.1
ENV NCCL_VERSION 2.17.1
ENV NV_LIBNCCL_PACKAGE ${NV_LIBNCCL_PACKAGE_NAME}-${NV_LIBNCCL_PACKAGE_VERSION}+cuda12.1

RUN yum install -y \
cuda-libraries-12-1-${NV_CUDA_LIB_VERSION} \
cuda-nvtx-12-1-${NV_NVTX_VERSION} \
${NV_LIBNPP_PACKAGE} \
libcublas-12-1-${NV_LIBCUBLAS_VERSION} \
${NV_LIBNCCL_PACKAGE} \
&& yum clean all \
&& rm -rf /var/cache/yum/*

# Set this flag so that libraries can find the location of CUDA
ENV XLA_FLAGS=--xla_gpu_cuda_data_dir=/usr/local/cuda

# CUDA Devel image
FROM cuda-runtime as cuda-devel
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility

ENV NV_CUDA_LIB_VERSION 12.1.1-1
ENV NV_NVPROF_VERSION 12.1.105-1
ENV NV_NVPROF_DEV_PACKAGE cuda-nvprof-12-1-${NV_NVPROF_VERSION}
ENV NV_CUDA_CUDART_DEV_VERSION 12.1.105-1
ENV NV_NVML_DEV_VERSION 12.1.105-1
ENV NV_LIBCUBLAS_DEV_VERSION 12.1.3.1-1
ENV NV_LIBNPP_DEV_VERSION 12.1.0.40-1
ENV NV_LIBNPP_DEV_PACKAGE libnpp-devel-12-1-${NV_LIBNPP_DEV_VERSION}
ENV NV_LIBNCCL_DEV_PACKAGE_NAME libnccl-devel
ENV NV_LIBNCCL_DEV_PACKAGE_VERSION 2.17.1-1
ENV NCCL_VERSION 2.17.1
ENV NV_LIBNCCL_DEV_PACKAGE ${NV_LIBNCCL_DEV_PACKAGE_NAME}-${NV_LIBNCCL_DEV_PACKAGE_VERSION}+cuda12.1
ENV NV_CUDA_NSIGHT_COMPUTE_VERSION 12.1.1-1
ENV NV_CUDA_NSIGHT_COMPUTE_DEV_PACKAGE cuda-nsight-compute-12-1-${NV_CUDA_NSIGHT_COMPUTE_VERSION}


RUN yum install -y \
make \
findutils \
cuda-command-line-tools-12-1-${NV_CUDA_LIB_VERSION} \
cuda-libraries-devel-12-1-${NV_CUDA_LIB_VERSION} \
cuda-minimal-build-12-1-${NV_CUDA_LIB_VERSION} \
cuda-cudart-devel-12-1-${NV_CUDA_CUDART_DEV_VERSION} \
${NV_NVPROF_DEV_PACKAGE} \
cuda-nvml-devel-12-1-${NV_NVML_DEV_VERSION} \
libcublas-devel-12-1-${NV_LIBCUBLAS_DEV_VERSION} \
${NV_LIBNPP_DEV_PACKAGE} \
${NV_LIBNCCL_DEV_PACKAGE} \
${NV_CUDA_NSIGHT_COMPUTE_DEV_PACKAGE} \
&& yum clean all \
&& rm -rf /var/cache/yum/*

ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs

#############################################
# End of CUDA 12.1 Layer #
#############################################

###################################
# vLLM install in build container #
###################################

FROM cuda-devel as vllm-install

WORKDIR /opt/app-root/src

USER 1001

COPY --chown=1001:0 requirements.txt ./

RUN pip install --no-cache-dir -r requirements.txt && \
rm -f requirements.txt && \
# Install flash-attn from PyPI \
pip install flash-attn==2.5.8 --no-build-isolation && \
# Correction for FIPS mode \
sed -i s/md5/sha1/g /opt/app-root/lib64/python3.11/site-packages/triton/runtime/jit.py && \
# Fix permissions to support pip in Openshift environments \
chmod -R g+w /opt/app-root/lib/python3.11/site-packages && \
fix-permissions /opt/app-root -P

##################
# vLLM container #
##################

FROM cuda-runtime as vllm-container

WORKDIR /opt/app-root/src

COPY --from=vllm-install --chown=1001:0 /opt/app-root/lib64/python3.11/site-packages /opt/app-root/lib64/python3.11/site-packages
COPY --from=vllm-install --chown=1001:0 /opt/app-root/src/.config/vllm/nccl/cu12/libnccl.so.2* /usr/local/lib/libnccl.so.2

# Fix VLLM_NCCL_SO_PATH
ENV VLLM_NCCL_SO_PATH=/usr/local/lib/libnccl.so.2

USER 1001

EXPOSE 8000

ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]

Loading

0 comments on commit 0987b6a

Please sign in to comment.