xorbitsai · zwt-1234 · Apr 22, 2025 · Apr 22, 2025 · Apr 22, 2025
diff --git a/xinference/deploy/docker/Dockerfile b/xinference/deploy/docker/Dockerfile
@@ -1,14 +1,14 @@
-FROM vllm/vllm-openai:v0.6.0
+FROM vllm/vllm-openai:latest
 
 COPY . /opt/inference
 WORKDIR /opt/inference
 
-ENV NVM_DIR /usr/local/nvm
-ENV NODE_VERSION 14.21.1
+ENV NVM_DIR=/usr/local/nvm
+ENV NODE_VERSION=14.21.1
 
+# Install system dependencies and Node.js (libfst-dev should be able to solve the errors of pyini)
 RUN apt-get -y update \
-  && apt install -y wget curl procps git libgl1 \
-  # upgrade libstdc++ and libc for llama-cpp-python
+  && apt install -y wget curl procps git libgl1 libfst-dev cmake libssl-dev \
   && printf "\ndeb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy main restricted universe multiverse" >> /etc/apt/sources.list \
   && apt-get -y update \
   && apt-get install -y --only-upgrade libstdc++6 && apt install -y libc6 \
@@ -20,20 +20,30 @@ RUN apt-get -y update \
   && nvm use default \
   && apt-get -yq clean
 
-ENV PATH $NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH
-ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/lib/python3.10/dist-packages/nvidia/cublas/lib
+ENV PATH=$NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.10/dist-packages/nvidia/cublas/lib
 ENV FLASH_ATTENTION_SKIP_CUDA_BUILD TRUE
 
+# Install pip dependencies
+ARG LLAMA_CPP_USE_CUDA=true
 ARG PIP_INDEX=https://pypi.org/simple
-RUN pip install --upgrade -i "$PIP_INDEX" pip setuptools wheel&& \
-    pip install -i "$PIP_INDEX" "diskcache>=5.6.1" "jinja2>=2.11.3" && \
+RUN pip install --upgrade -i "$PIP_INDEX" pip setuptools wheel && \
     # use pre-built whl package for llama-cpp-python, otherwise may core dump when init llama in some envs
-    pip install "llama-cpp-python>=0.2.82" -i https://abetlen.github.io/llama-cpp-python/whl/cu124 && \
+    pip install -i "$PIP_INDEX" "diskcache>=5.6.1" "jinja2>=2.11.3" && \
+    # Determine whether to use the CUDA version (false represents CPU build,true represents CUDA build (GPU supported))
+    if [ "$LLAMA_CPP_USE_CUDA" = "true" ]; then \
+        echo "🔧 Using CUDA version llama-cpp-python..." && \
+        pip install "llama-cpp-python>=0.2.82" -i https://abetlen.github.io/llama-cpp-python/whl/cu124; \
+    else \
+        echo "⚙️ Using CPU version llama-cpp-python..." && \
+        pip install "llama-cpp-python>=0.2.82" -i "$PIP_INDEX";  \
+    fi && \
     pip install flash-attn --no-build-isolation && \
     pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /opt/inference/xinference/deploy/docker/requirements-base.txt && \
     pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /opt/inference/xinference/deploy/docker/requirements-ml.txt && \
-    pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /opt/inference/xinference/deploy/docker/requirements-models.txt && \
-#    pip install -i "$PIP_INDEX" --no-deps sglang && \
+    pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /opt/inference/xinference/deploy/docker/requirements-ml.txt && \
+    pip install -i "$PIP_INDEX" --no-deps sglang && \
+    pip install torch==2.6.0 -i "$PIP_INDEX" && \
     pip uninstall flashinfer -y && \
     pip install flashinfer-python -i https://flashinfer.ai/whl/cu124/torch2.6 && \
     cd /opt/inference && \
@@ -45,16 +55,19 @@ RUN pip install --upgrade -i "$PIP_INDEX" pip setuptools wheel&& \
     # clean packages
     pip cache purge
 
-# Install Miniforge3 (only for FFmpeg, do not replace system Python)
-RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" && \
+# Install Miniforge3 and FFmpeg
+RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/download/4.12.0-0/Miniforge3-4.12.0-0-Linux-x86_64.sh" && \
     bash Miniforge3.sh -b -p /opt/conda && \
     rm Miniforge3.sh
 
+# When installing the Conda environment, only FFmpeg should be installed to avoid modifying the system Python
 RUN /opt/conda/bin/conda create -n ffmpeg-env -c conda-forge 'ffmpeg<7' -y && \
+    #Create a soft link to the system path
     ln -s /opt/conda/envs/ffmpeg-env/bin/ffmpeg /usr/local/bin/ffmpeg && \
     ln -s /opt/conda/envs/ffmpeg-env/bin/ffprobe /usr/local/bin/ffprobe && \
+    # Clear the Conda cache
     /opt/conda/bin/conda clean --all -y
 
-# Overwrite the entrypoint of vllm's base image
+# Override the default entrypoint of the vllm base image
 ENTRYPOINT []
 CMD ["/bin/bash"]
diff --git a/xinference/deploy/docker/requirements-apps.txt b/xinference/deploy/docker/requirements-apps.txt
@@ -0,0 +1,42 @@
+funasr<1.1.17
+omegaconf~=2.3.0  # For ChatTTS
+nemo_text_processing<1.1.0  
+WeTextProcessing<1.0.4  
+librosa  # For ChatTTS
+ChatTTS>=0.2.1
+xxhash  # For ChatTTS
+pypinyin  # For F5-TTS
+tomli  # For F5-TTS
+vocos  # For F5-TTS
+librosa  # For F5-TTS
+jieba  # For F5-TTS
+soundfile  # For F5-TTS & MeloTTS
+cached_path  # For MeloTTS
+unidic-lite  # For MeloTTS, unidic requires manually download
+cn2an  # For MeloTTS
+mecab-python3  # For MeloTTS
+num2words  # For MeloTTS
+pykakasi  # For MeloTTS
+fugashi  # For MeloTTS
+g2p_en  # For MeloTTS
+anyascii  # For MeloTTS
+gruut[de,es,fr]  # For MeloTTS
+kokoro>=0.7.15  # Kokoro
+spacy>3.0.6
+misaki[en,ja,zh]>=0.7.15  # Kokoro
+en_core_web_trf@https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl  # Kokoro misaki[en]
+en_core_web_sm@https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl  # Kokoro misaki[en]
+qwen-vl-utils!=0.0.9 # For qwen2-vl
+datamodel_code_generator # for minicpm-4B
+jsonschema # for minicpm-4B
+deepcache # for sd
+verovio>=4.3.1  # For got_ocr2
+langdetect  # MegaTTS3
+pyloudnorm  # MegaTTS3
+orjson
+imageio-ffmpeg  # For video
+loguru  # For Fish Speech
+natsort  # For Fish Speech
+ormsgpack  # For Fish Speech
+cachetools  # For Fish Speech
+silero-vad  # For Fish Speech
diff --git a/xinference/deploy/docker/requirements-base.txt b/xinference/deploy/docker/requirements-base.txt
@@ -1,4 +1,4 @@
-xoscar>=0.6.1
+xoscar>=0.4.4
 gradio==5.22.0
 pillow
 click

diff --git a/xinference/deploy/docker/requirements-ml.txt b/xinference/deploy/docker/requirements-ml.txt
@@ -9,7 +9,8 @@ einops
 tiktoken>=0.6.0
 sentence-transformers>=3.1.0
 controlnet_aux
-autoawq<0.2.6  # autoawq 0.2.6 pinned torch to 2.3
+gptqmodel
+autoawq>0.2.6  # autoawq 0.2.6
 optimum
 attrdict  # For deepseek VL
 timm>=0.9.16  # For deepseek VL
@@ -25,7 +26,7 @@ diffusers>=0.32.0  # For CosyVoice, matcha
 gdown  # For CosyVoice, matcha
 pyarrow  # For CosyVoice, matcha
 HyperPyYAML  # For CosyVoice
-onnxruntime-gpu==1.16.0; sys_platform == 'linux'  # For CosyVoice
+onnxruntime-gpu>1.16.0; sys_platform == 'linux'  # For CosyVoice
 onnxruntime==1.16.0; sys_platform == 'darwin' or sys_platform == 'windows'  # For CosyVoice
 boto3>=1.28.55,<1.28.65 # For tensorizer
 tensorizer~=2.9.0
@@ -36,22 +37,22 @@ vector-quantize-pytorch<=1.17.3,>=1.14.24 # For Fish Speech
 torchdiffeq  # For F5-TTS
 x_transformers>=1.31.14  # For F5-TTS
 gguf
-vllm==0.8.4
 
 # sglang
-#decord
-#hf_transfer
-#huggingface_hub
-#interegular
-#outlines>=0.0.44,<=0.1.11
-#packaging
-#prometheus-client>=0.20.0
-#psutil
-#python-multipart
-#pyzmq>=25.1.2
-#torchao>=0.7.0
-#uvloop
-#xgrammar>=0.1.10
-#cuda-python
-#sgl-kernel>=0.0.3.post3
-#IPython
+decord
+hf_transfer
+huggingface_hub
+interegular
+outlines>=0.0.44,<=0.1.11
+packaging
+prometheus-client>=0.20.0
+psutil
+python-multipart
+pyzmq>=25.1.2
+torchao>=0.7.0
+uvloop
+xgrammar>=0.1.10
+vllm
+cuda-python
+sgl-kernel>=0.0.3.post3
+IPython
diff --git a/xinference/deploy/docker/requirements.txt b/xinference/deploy/docker/requirements.txt
@@ -30,29 +30,29 @@ transformers>=4.46.0
 accelerate>=0.28.0
 sentencepiece
 transformers_stream_generator
-bitsandbytes
-protobuf
-einops
+# bitsandbytes
+# protobuf
+# einops
 tiktoken>=0.6.0
-sentence-transformers>=3.1.0
+# sentence-transformers>=3.1.0
 controlnet_aux
 orjson
 gptqmodel
-autoawq<0.2.6  # autoawq 0.2.6 pinned torch to 2.3
-optimum
+autoawq>=0.2.5,<0.3.0  # autoawq 0.2.6 pinned torch to 2.3
+# optimum
 attrdict  # For deepseek VL
-timm>=0.9.16  # For deepseek VL
-torchvision  # For deepseek VL
-FlagEmbedding  # For rerank
-funasr<1.1.17
+# timm>=0.9.16  # For deepseek VL
+# torchvision  # For deepseek VL
+# FlagEmbedding  # For rerank
+# funasr>=1.1.0,<1.2.0
 omegaconf~=2.3.0  # For ChatTTS
 nemo_text_processing<1.1.0  # 1.1.0 requires pynini==2.1.6.post1
 WeTextProcessing<1.0.4  # 1.0.4 requires pynini==2.1.6
 librosa  # For ChatTTS
 torchaudio  # For ChatTTS
 ChatTTS>=0.2.1
 xxhash  # For ChatTTS
-torch>=2.0.0  # For CosyVoice
+# torch>=2.0.0  # For CosyVoice
 lightning>=2.0.0  # For CosyVoice, matcha
 hydra-core>=1.3.2  # For CosyVoice, matcha
 inflect  # For CosyVoice, matcha
@@ -61,7 +61,7 @@ diffusers>=0.32.0  # For CosyVoice, matcha
 gdown  # For CosyVoice, matcha
 pyarrow  # For CosyVoice, matcha
 HyperPyYAML  # For CosyVoice
-onnxruntime-gpu==1.16.0; sys_platform == 'linux'  # For CosyVoice
+onnxruntime-gpu>=1.17.0,<1.18.0; sys_platform == 'linux'  # For CosyVoice
 onnxruntime==1.16.0; sys_platform == 'darwin' or sys_platform == 'windows'  # For CosyVoice
 boto3>=1.28.55,<1.28.65 # For tensorizer
 tensorizer~=2.9.0
@@ -126,3 +126,4 @@ vllm==0.7.3
 cuda-python
 sgl-kernel>=0.0.3.post3
 IPython
+filelock