Skip to content

BLD: upgrade base image for dockerfile #3318

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 28 additions & 15 deletions xinference/deploy/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
FROM vllm/vllm-openai:v0.6.0
FROM vllm/vllm-openai:latest

COPY . /opt/inference
WORKDIR /opt/inference

ENV NVM_DIR /usr/local/nvm
ENV NODE_VERSION 14.21.1
ENV NVM_DIR=/usr/local/nvm
ENV NODE_VERSION=14.21.1

# Install system dependencies and Node.js (libfst-dev should be able to solve the errors of pyini)
RUN apt-get -y update \
&& apt install -y wget curl procps git libgl1 \
# upgrade libstdc++ and libc for llama-cpp-python
&& apt install -y wget curl procps git libgl1 libfst-dev cmake libssl-dev \
&& printf "\ndeb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy main restricted universe multiverse" >> /etc/apt/sources.list \
&& apt-get -y update \
&& apt-get install -y --only-upgrade libstdc++6 && apt install -y libc6 \
Expand All @@ -20,20 +20,30 @@ RUN apt-get -y update \
&& nvm use default \
&& apt-get -yq clean

ENV PATH $NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH
ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/lib/python3.10/dist-packages/nvidia/cublas/lib
ENV PATH=$NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.10/dist-packages/nvidia/cublas/lib
ENV FLASH_ATTENTION_SKIP_CUDA_BUILD TRUE

# Install pip dependencies
ARG LLAMA_CPP_USE_CUDA=true
ARG PIP_INDEX=https://pypi.org/simple
RUN pip install --upgrade -i "$PIP_INDEX" pip setuptools wheel&& \
pip install -i "$PIP_INDEX" "diskcache>=5.6.1" "jinja2>=2.11.3" && \
RUN pip install --upgrade -i "$PIP_INDEX" pip setuptools wheel && \
# use pre-built whl package for llama-cpp-python, otherwise may core dump when init llama in some envs
pip install "llama-cpp-python>=0.2.82" -i https://abetlen.github.io/llama-cpp-python/whl/cu124 && \
pip install -i "$PIP_INDEX" "diskcache>=5.6.1" "jinja2>=2.11.3" && \
# Determine whether to use the CUDA version (false represents CPU build,true represents CUDA build (GPU supported))
if [ "$LLAMA_CPP_USE_CUDA" = "true" ]; then \
echo "🔧 Using CUDA version llama-cpp-python..." && \
pip install "llama-cpp-python>=0.2.82" -i https://abetlen.github.io/llama-cpp-python/whl/cu124; \
else \
echo "⚙️ Using CPU version llama-cpp-python..." && \
pip install "llama-cpp-python>=0.2.82" -i "$PIP_INDEX"; \
fi && \
pip install flash-attn --no-build-isolation && \
pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /opt/inference/xinference/deploy/docker/requirements-base.txt && \
pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /opt/inference/xinference/deploy/docker/requirements-ml.txt && \
pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /opt/inference/xinference/deploy/docker/requirements-models.txt && \
# pip install -i "$PIP_INDEX" --no-deps sglang && \
pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /opt/inference/xinference/deploy/docker/requirements-ml.txt && \
pip install -i "$PIP_INDEX" --no-deps sglang && \
pip install torch==2.6.0 -i "$PIP_INDEX" && \
pip uninstall flashinfer -y && \
pip install flashinfer-python -i https://flashinfer.ai/whl/cu124/torch2.6 && \
cd /opt/inference && \
Expand All @@ -45,16 +55,19 @@ RUN pip install --upgrade -i "$PIP_INDEX" pip setuptools wheel&& \
# clean packages
pip cache purge

# Install Miniforge3 (only for FFmpeg, do not replace system Python)
RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" && \
# Install Miniforge3 and FFmpeg
RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/download/4.12.0-0/Miniforge3-4.12.0-0-Linux-x86_64.sh" && \
bash Miniforge3.sh -b -p /opt/conda && \
rm Miniforge3.sh

# When installing the Conda environment, only FFmpeg should be installed to avoid modifying the system Python
RUN /opt/conda/bin/conda create -n ffmpeg-env -c conda-forge 'ffmpeg<7' -y && \
#Create a soft link to the system path
ln -s /opt/conda/envs/ffmpeg-env/bin/ffmpeg /usr/local/bin/ffmpeg && \
ln -s /opt/conda/envs/ffmpeg-env/bin/ffprobe /usr/local/bin/ffprobe && \
# Clear the Conda cache
/opt/conda/bin/conda clean --all -y

# Overwrite the entrypoint of vllm's base image
# Override the default entrypoint of the vllm base image
ENTRYPOINT []
CMD ["/bin/bash"]
42 changes: 42 additions & 0 deletions xinference/deploy/docker/requirements-apps.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
funasr<1.1.17
omegaconf~=2.3.0 # For ChatTTS
nemo_text_processing<1.1.0
WeTextProcessing<1.0.4
librosa # For ChatTTS
ChatTTS>=0.2.1
xxhash # For ChatTTS
pypinyin # For F5-TTS
tomli # For F5-TTS
vocos # For F5-TTS
librosa # For F5-TTS
jieba # For F5-TTS
soundfile # For F5-TTS & MeloTTS
cached_path # For MeloTTS
unidic-lite # For MeloTTS, unidic requires manually download
cn2an # For MeloTTS
mecab-python3 # For MeloTTS
num2words # For MeloTTS
pykakasi # For MeloTTS
fugashi # For MeloTTS
g2p_en # For MeloTTS
anyascii # For MeloTTS
gruut[de,es,fr] # For MeloTTS
kokoro>=0.7.15 # Kokoro
spacy>3.0.6
misaki[en,ja,zh]>=0.7.15 # Kokoro
en_core_web_trf@https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl # Kokoro misaki[en]
en_core_web_sm@https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl # Kokoro misaki[en]
qwen-vl-utils!=0.0.9 # For qwen2-vl
datamodel_code_generator # for minicpm-4B
jsonschema # for minicpm-4B
deepcache # for sd
verovio>=4.3.1 # For got_ocr2
langdetect # MegaTTS3
pyloudnorm # MegaTTS3
orjson
imageio-ffmpeg # For video
loguru # For Fish Speech
natsort # For Fish Speech
ormsgpack # For Fish Speech
cachetools # For Fish Speech
silero-vad # For Fish Speech
2 changes: 1 addition & 1 deletion xinference/deploy/docker/requirements-base.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
xoscar>=0.6.1
xoscar>=0.4.4
gradio==5.22.0
pillow
click
Expand Down
39 changes: 20 additions & 19 deletions xinference/deploy/docker/requirements-ml.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ einops
tiktoken>=0.6.0
sentence-transformers>=3.1.0
controlnet_aux
autoawq<0.2.6 # autoawq 0.2.6 pinned torch to 2.3
gptqmodel
autoawq>0.2.6 # autoawq 0.2.6
optimum
attrdict # For deepseek VL
timm>=0.9.16 # For deepseek VL
Expand All @@ -25,7 +26,7 @@ diffusers>=0.32.0 # For CosyVoice, matcha
gdown # For CosyVoice, matcha
pyarrow # For CosyVoice, matcha
HyperPyYAML # For CosyVoice
onnxruntime-gpu==1.16.0; sys_platform == 'linux' # For CosyVoice
onnxruntime-gpu>1.16.0; sys_platform == 'linux' # For CosyVoice
onnxruntime==1.16.0; sys_platform == 'darwin' or sys_platform == 'windows' # For CosyVoice
boto3>=1.28.55,<1.28.65 # For tensorizer
tensorizer~=2.9.0
Expand All @@ -36,22 +37,22 @@ vector-quantize-pytorch<=1.17.3,>=1.14.24 # For Fish Speech
torchdiffeq # For F5-TTS
x_transformers>=1.31.14 # For F5-TTS
gguf
vllm==0.8.4

# sglang
#decord
#hf_transfer
#huggingface_hub
#interegular
#outlines>=0.0.44,<=0.1.11
#packaging
#prometheus-client>=0.20.0
#psutil
#python-multipart
#pyzmq>=25.1.2
#torchao>=0.7.0
#uvloop
#xgrammar>=0.1.10
#cuda-python
#sgl-kernel>=0.0.3.post3
#IPython
decord
hf_transfer
huggingface_hub
interegular
outlines>=0.0.44,<=0.1.11
packaging
prometheus-client>=0.20.0
psutil
python-multipart
pyzmq>=25.1.2
torchao>=0.7.0
uvloop
xgrammar>=0.1.10
vllm
cuda-python
sgl-kernel>=0.0.3.post3
IPython
25 changes: 13 additions & 12 deletions xinference/deploy/docker/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,29 @@ transformers>=4.46.0
accelerate>=0.28.0
sentencepiece
transformers_stream_generator
bitsandbytes
protobuf
einops
# bitsandbytes
# protobuf
# einops
tiktoken>=0.6.0
sentence-transformers>=3.1.0
# sentence-transformers>=3.1.0
controlnet_aux
orjson
gptqmodel
autoawq<0.2.6 # autoawq 0.2.6 pinned torch to 2.3
optimum
autoawq>=0.2.5,<0.3.0 # autoawq 0.2.6 pinned torch to 2.3
# optimum
attrdict # For deepseek VL
timm>=0.9.16 # For deepseek VL
torchvision # For deepseek VL
FlagEmbedding # For rerank
funasr<1.1.17
# timm>=0.9.16 # For deepseek VL
# torchvision # For deepseek VL
# FlagEmbedding # For rerank
# funasr>=1.1.0,<1.2.0
omegaconf~=2.3.0 # For ChatTTS
nemo_text_processing<1.1.0 # 1.1.0 requires pynini==2.1.6.post1
WeTextProcessing<1.0.4 # 1.0.4 requires pynini==2.1.6
librosa # For ChatTTS
torchaudio # For ChatTTS
ChatTTS>=0.2.1
xxhash # For ChatTTS
torch>=2.0.0 # For CosyVoice
# torch>=2.0.0 # For CosyVoice
lightning>=2.0.0 # For CosyVoice, matcha
hydra-core>=1.3.2 # For CosyVoice, matcha
inflect # For CosyVoice, matcha
Expand All @@ -61,7 +61,7 @@ diffusers>=0.32.0 # For CosyVoice, matcha
gdown # For CosyVoice, matcha
pyarrow # For CosyVoice, matcha
HyperPyYAML # For CosyVoice
onnxruntime-gpu==1.16.0; sys_platform == 'linux' # For CosyVoice
onnxruntime-gpu>=1.17.0,<1.18.0; sys_platform == 'linux' # For CosyVoice
onnxruntime==1.16.0; sys_platform == 'darwin' or sys_platform == 'windows' # For CosyVoice
boto3>=1.28.55,<1.28.65 # For tensorizer
tensorizer~=2.9.0
Expand Down Expand Up @@ -126,3 +126,4 @@ vllm==0.7.3
cuda-python
sgl-kernel>=0.0.3.post3
IPython
filelock
Loading