diff --git a/Dockerfile b/Dockerfile index 63011d2b3..11bd2754c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -208,7 +208,7 @@ COPY server/Makefile server/Makefile RUN cd server && \ make gen-server && \ - pip install ".[bnb, accelerate, quantize]" --no-cache-dir + pip install ".[bnb, accelerate, quantize, peft]" --no-cache-dir # Install router COPY --from=builder /usr/src/target/release/lorax-router /usr/local/bin/lorax-router diff --git a/server/Makefile b/server/Makefile index 333d85ef1..0a20ce55d 100644 --- a/server/Makefile +++ b/server/Makefile @@ -32,4 +32,4 @@ run-dev: # SAFETENSORS_FAST_GPU=1 python -m torch.distributed.run --nproc_per_node=1 lorax_server/cli.py serve flozi00/Mistral-7B-german-assistant-v5-4bit-autogptq --quantize gptq export-requirements: - poetry export -o requirements.txt -E bnb -E quantize --without-hashes + poetry export -o requirements.txt -E bnb --without-hashes diff --git a/server/poetry.lock b/server/poetry.lock index d586ffaa9..dd1c1af93 100644 --- a/server/poetry.lock +++ b/server/poetry.lock @@ -4,7 +4,7 @@ name = "accelerate" version = "0.24.1" description = "Accelerate" -optional = false +optional = true python-versions = ">=3.8.0" files = [ {file = "accelerate-0.24.1-py3-none-any.whl", hash = "sha256:866dec394da60e8da964be212379d8cf6cc0d0e5e28a7c0d7e09507715d21c61"}, @@ -915,7 +915,7 @@ files = [ name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, @@ -961,7 +961,7 @@ dev = ["Sphinx (>=4.1.1)", "black (>=19.10b0)", "colorama (>=0.3.4)", "docutils name = "markupsafe" version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, @@ -1030,7 +1030,7 @@ files = [ name = "mpmath" version = "1.3.0" description = "Python library for arbitrary-precision floating-point arithmetic" -optional = false +optional = true python-versions = "*" files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, @@ -1158,7 +1158,7 @@ dill = ">=0.3.7" name = "networkx" version = "3.2.1" description = "Python package for creating and manipulating graphs and networks" -optional = false +optional = true python-versions = ">=3.9" files = [ {file = "networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2"}, @@ -1221,7 +1221,7 @@ files = [ name = "nvidia-cublas-cu12" version = "12.1.3.1" description = "CUBLAS native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"}, @@ -1232,7 +1232,7 @@ files = [ name = "nvidia-cuda-cupti-cu12" version = "12.1.105" description = "CUDA profiling tools runtime libs." -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"}, @@ -1243,7 +1243,7 @@ files = [ name = "nvidia-cuda-nvrtc-cu12" version = "12.1.105" description = "NVRTC native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"}, @@ -1254,7 +1254,7 @@ files = [ name = "nvidia-cuda-runtime-cu12" version = "12.1.105" description = "CUDA Runtime native Libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"}, @@ -1265,7 +1265,7 @@ files = [ name = "nvidia-cudnn-cu12" version = "8.9.2.26" description = "cuDNN runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9"}, @@ -1278,7 +1278,7 @@ nvidia-cublas-cu12 = "*" name = "nvidia-cufft-cu12" version = "11.0.2.54" description = "CUFFT native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"}, @@ -1289,7 +1289,7 @@ files = [ name = "nvidia-curand-cu12" version = "10.3.2.106" description = "CURAND native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"}, @@ -1300,7 +1300,7 @@ files = [ name = "nvidia-cusolver-cu12" version = "11.4.5.107" description = "CUDA solver native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"}, @@ -1316,7 +1316,7 @@ nvidia-nvjitlink-cu12 = "*" name = "nvidia-cusparse-cu12" version = "12.1.0.106" description = "CUSPARSE native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"}, @@ -1330,7 +1330,7 @@ nvidia-nvjitlink-cu12 = "*" name = "nvidia-nccl-cu12" version = "2.18.1" description = "NVIDIA Collective Communication Library (NCCL) Runtime" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:1a6c4acefcbebfa6de320f412bf7866de856e786e0462326ba1bac40de0b5e71"}, @@ -1340,7 +1340,7 @@ files = [ name = "nvidia-nvjitlink-cu12" version = "12.3.101" description = "Nvidia JIT LTO Library" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux1_x86_64.whl", hash = "sha256:64335a8088e2b9d196ae8665430bc6a2b7e6ef2eb877a9c735c804bd4ff6467c"}, @@ -1351,7 +1351,7 @@ files = [ name = "nvidia-nvtx-cu12" version = "12.1.105" description = "NVIDIA Tools Extension" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"}, @@ -1613,7 +1613,7 @@ xml = ["lxml (>=4.8.0)"] name = "peft" version = "0.4.0" description = "Parameter-Efficient Fine-Tuning (PEFT)" -optional = false +optional = true python-versions = ">=3.8.0" files = [ {file = "peft-0.4.0-py3-none-any.whl", hash = "sha256:2cf992772a6d703814477e0bdcdadd68cb8ea388111ce2d793dd2ff0e438f357"}, @@ -1675,7 +1675,7 @@ files = [ name = "psutil" version = "5.9.7" description = "Cross-platform lib for process and system monitoring in Python." -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ {file = "psutil-5.9.7-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:0bd41bf2d1463dfa535942b2a8f0e958acf6607ac0be52265ab31f7923bcd5e6"}, @@ -2203,7 +2203,7 @@ dev = ["absl-py"] name = "sympy" version = "1.12" description = "Computer algebra system (CAS) in Python" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, @@ -2406,7 +2406,7 @@ files = [ name = "torch" version = "2.1.1" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -optional = false +optional = true python-versions = ">=3.8.0" files = [ {file = "torch-2.1.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:5ebc43f5355a9b7be813392b3fb0133991f0380f6f0fcc8218d5468dc45d1071"}, @@ -2957,9 +2957,11 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [extras] accelerate = ["accelerate"] bnb = ["bitsandbytes"] +peft = ["peft"] quantize = ["accelerate", "datasets", "texttable"] +torch = ["torch"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "08d153c387b68b0acead81e7f221058be44df0fa72537dad62766f812bd260a4" +content-hash = "9ebdc761192d071363f54433cc7ec0a82dd05daef2ead21058b6aa3592c506df" diff --git a/server/pyproject.toml b/server/pyproject.toml index 5a5d4c052..4fb0e1c91 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -32,15 +32,17 @@ einops = "^0.6.1" tiktoken = "^0.5.2" texttable = { version = "^1.6.7", optional = true } datasets = { version = "^2.14.0", optional = true } -torch = {version = "2.1.1"} -peft = "0.4.0" +torch = {version = "2.1.1", optional = true } +peft = {version = "0.4.0", optional = true } boto3 = "^1.28.34" urllib3 = "<=1.26.18" stanford-stk = "^0.0.6" [tool.poetry.extras] +torch = ["torch"] accelerate = ["accelerate"] bnb = ["bitsandbytes"] +peft = ["peft"] quantize = ["texttable", "datasets", "accelerate"] [tool.poetry.group.dev.dependencies] diff --git a/server/requirements.txt b/server/requirements.txt index a47c11b0d..da0fdd3e1 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -1,8 +1,3 @@ -accelerate==0.24.1 ; python_version >= "3.9" and python_version < "4.0" -aiohttp==3.9.1 ; python_version >= "3.9" and python_version < "4.0" -aiosignal==1.3.1 ; python_version >= "3.9" and python_version < "4.0" -async-timeout==4.0.3 ; python_version >= "3.9" and python_version < "3.11" -attrs==23.1.0 ; python_version >= "3.9" and python_version < "4.0" backoff==2.2.1 ; python_version >= "3.9" and python_version < "4.0" bitsandbytes==0.41.3.post2 ; python_version >= "3.9" and python_version < "4.0" boto3==1.34.4 ; python_version >= "3.9" and python_version < "4.0" @@ -11,14 +6,10 @@ certifi==2023.11.17 ; python_version >= "3.9" and python_version < "4.0" charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "4.0" click==8.1.7 ; python_version >= "3.9" and python_version < "4.0" colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0" and (sys_platform == "win32" or platform_system == "Windows") -datasets==2.15.0 ; python_version >= "3.9" and python_version < "4.0" deprecated==1.2.14 ; python_version >= "3.9" and python_version < "4.0" -dill==0.3.7 ; python_version >= "3.9" and python_version < "4.0" einops==0.6.1 ; python_version >= "3.9" and python_version < "4.0" filelock==3.13.1 ; python_version >= "3.9" and python_version < "4.0" -frozenlist==1.4.1 ; python_version >= "3.9" and python_version < "4.0" fsspec==2023.10.0 ; python_version >= "3.9" and python_version < "4.0" -fsspec[http]==2023.10.0 ; python_version >= "3.9" and python_version < "4.0" googleapis-common-protos==1.62.0 ; python_version >= "3.9" and python_version < "4.0" grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "4.0" grpcio-reflection==1.60.0 ; python_version >= "3.9" and python_version < "4.0" @@ -28,27 +19,9 @@ hf-transfer==0.1.4 ; python_version >= "3.9" and python_version < "4.0" huggingface-hub==0.19.4 ; python_version >= "3.9" and python_version < "4.0" idna==3.6 ; python_version >= "3.9" and python_version < "4.0" importlib-metadata==6.11.0 ; python_version >= "3.9" and python_version < "4.0" -jinja2==3.1.2 ; python_version >= "3.9" and python_version < "4.0" jmespath==1.0.1 ; python_version >= "3.9" and python_version < "4.0" loguru==0.6.0 ; python_version >= "3.9" and python_version < "4.0" -markupsafe==2.1.3 ; python_version >= "3.9" and python_version < "4.0" -mpmath==1.3.0 ; python_version >= "3.9" and python_version < "4.0" -multidict==6.0.4 ; python_version >= "3.9" and python_version < "4.0" -multiprocess==0.70.15 ; python_version >= "3.9" and python_version < "4.0" -networkx==3.2.1 ; python_version >= "3.9" and python_version < "4.0" numpy==1.26.2 ; python_version >= "3.9" and python_version < "4.0" -nvidia-cublas-cu12==12.1.3.1 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "4.0" -nvidia-cuda-cupti-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "4.0" -nvidia-cuda-nvrtc-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "4.0" -nvidia-cuda-runtime-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "4.0" -nvidia-cudnn-cu12==8.9.2.26 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "4.0" -nvidia-cufft-cu12==11.0.2.54 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "4.0" -nvidia-curand-cu12==10.3.2.106 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "4.0" -nvidia-cusolver-cu12==11.4.5.107 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "4.0" -nvidia-cusparse-cu12==12.1.0.106 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "4.0" -nvidia-nccl-cu12==2.18.1 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "4.0" -nvidia-nvjitlink-cu12==12.3.101 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "4.0" -nvidia-nvtx-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.9" and python_version < "4.0" opentelemetry-api==1.21.0 ; python_version >= "3.9" and python_version < "4.0" opentelemetry-exporter-otlp-proto-common==1.21.0 ; python_version >= "3.9" and python_version < "4.0" opentelemetry-exporter-otlp-proto-grpc==1.21.0 ; python_version >= "3.9" and python_version < "4.0" @@ -60,14 +33,8 @@ opentelemetry-proto==1.21.0 ; python_version >= "3.9" and python_version < "4.0" opentelemetry-sdk==1.21.0 ; python_version >= "3.9" and python_version < "4.0" opentelemetry-semantic-conventions==0.42b0 ; python_version >= "3.9" and python_version < "4.0" packaging==23.2 ; python_version >= "3.9" and python_version < "4.0" -pandas==2.1.4 ; python_version >= "3.9" and python_version < "4.0" -peft==0.4.0 ; python_version >= "3.9" and python_version < "4.0" protobuf==4.25.1 ; python_version >= "3.9" and python_version < "4.0" -psutil==5.9.7 ; python_version >= "3.9" and python_version < "4.0" -pyarrow-hotfix==0.6 ; python_version >= "3.9" and python_version < "4.0" -pyarrow==14.0.2 ; python_version >= "3.9" and python_version < "4.0" python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0" -pytz==2023.3.post1 ; python_version >= "3.9" and python_version < "4.0" pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "4.0" regex==2023.10.3 ; python_version >= "3.9" and python_version < "4.0" requests==2.31.0 ; python_version >= "3.9" and python_version < "4.0" @@ -77,20 +44,14 @@ sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "4.0" setuptools==69.0.2 ; python_version >= "3.9" and python_version < "4.0" six==1.16.0 ; python_version >= "3.9" and python_version < "4.0" stanford-stk==0.0.6 ; python_version >= "3.9" and python_version < "4.0" -sympy==1.12 ; python_version >= "3.9" and python_version < "4.0" -texttable==1.7.0 ; python_version >= "3.9" and python_version < "4.0" tiktoken==0.5.2 ; python_version >= "3.9" and python_version < "4.0" tokenizers==0.15.0 ; python_version >= "3.9" and python_version < "4.0" -torch==2.1.1 ; python_version >= "3.9" and python_version < "4.0" tqdm==4.66.1 ; python_version >= "3.9" and python_version < "4.0" transformers==4.36.0 ; python_version >= "3.9" and python_version < "4.0" triton==2.1.0 ; python_version >= "3.9" and python_version < "4.0" typer==0.6.1 ; python_version >= "3.9" and python_version < "4.0" typing-extensions==4.9.0 ; python_version >= "3.9" and python_version < "4.0" -tzdata==2023.3 ; python_version >= "3.9" and python_version < "4.0" urllib3==1.26.18 ; python_version >= "3.9" and python_version < "4.0" win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "win32" wrapt==1.16.0 ; python_version >= "3.9" and python_version < "4.0" -xxhash==3.4.1 ; python_version >= "3.9" and python_version < "4.0" -yarl==1.9.4 ; python_version >= "3.9" and python_version < "4.0" zipp==3.17.0 ; python_version >= "3.9" and python_version < "4.0"