Skip to content

Commit

Permalink
Merge branch 'main' into refactor-model
Browse files Browse the repository at this point in the history
Conflicts:
	lmdeploy/cli/cli.py
  • Loading branch information
AllentDan committed Mar 6, 2024
2 parents 0b291ea + 9149a49 commit 498ebb2
Show file tree
Hide file tree
Showing 97 changed files with 3,880 additions and 3,244 deletions.
9 changes: 9 additions & 0 deletions .github/md-link-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@
},
{
"pattern": "^http://localhost"
},
{
"pattern": "^https://twitter.com"
},
{
"pattern": "^https://platform.openai.com"
},
{
"pattern": "^http://0.0.0.0"
}
],
"httpHeaders": [
Expand Down
68 changes: 49 additions & 19 deletions .github/workflows/daily_ete_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: daily_ete_test
on:
workflow_dispatch:
schedule:
- cron: '00 23 * * *'
- cron: '00 18 * * *'

env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
Expand All @@ -13,7 +13,7 @@ env:
jobs:
test_functions:
runs-on: [self-hosted, linux-a100]
timeout-minutes: 240
timeout-minutes: 420
env:
REPORT_DIR: /nvme/qa_test_models/test-reports
container:
Expand Down Expand Up @@ -68,36 +68,66 @@ jobs:
run: |
python3 -m pip list
lmdeploy check_env
- name: Test lmdeploy - quantization
- name: Test lmdeploy - quantization w4a16
continue-on-error: true
run: |
pytest autotest -m '(quantization or quantization_w8a8) and not Baichuan2_7B_Chat and not Baichuan2_13B_Chat' -n 8 --alluredir=allure-results --clean-alluredir
pytest autotest/tools/quantization/test_quantization_w4a16.py -m 'not pr_test' -n 8 --alluredir=allure-results --clean-alluredir
- name: Test lmdeploy - quantization kv int8
continue-on-error: true
run: |
pytest autotest/tools/quantization/test_quantization_kvint8.py -n 8 --alluredir=allure-results
- name: Test lmdeploy - quantization w8a8
continue-on-error: true
run: |
pytest autotest/tools/quantization/test_quantization_w8a8.py -n 8 --alluredir=allure-results
- name: Test lmdeploy - quantization kv int8 and w4a16
continue-on-error: true
run: |
pytest autotest/tools/quantization/test_quantization_kvint8_w4a16.py -n 8 --alluredir=allure-results
- name: Test lmdeploy - convert
continue-on-error: true
run: |
pytest autotest -m 'convert and not Baichuan2_7B_Chat and not Baichuan2_13B_Chat' -n 6 --alluredir=allure-results
- name: Test lmdeploy - pipeline
pytest autotest/tools/convert -m 'not pr_test' -n 6 --alluredir=allure-results --dist loadgroup
- name: Test lmdeploy - interface turbomind case
continue-on-error: true
timeout-minutes: 60
run: pytest autotest -m '(pipeline_chat) and not Baichuan2_7B_Chat and not Baichuan2_13B_Chat' --alluredir=allure-results
- name: Test lmdeploy - restful
timeout-minutes: 20
run: |
pytest autotest/interface/pipeline/test_pipeline_turbomind_func.py -m 'not pr_test' --alluredir=allure-results
- name: Test lmdeploy - pipeline turbomind
continue-on-error: true
run: pytest autotest -m restful_api --alluredir=allure-results
- name: Test lmdeploy - chat
timeout-minutes: 45
run: pytest autotest/tools/pipeline/test_pipeline_chat_turbomind.py -m 'not pr_test' --alluredir=allure-results
- name: Test lmdeploy - pipeline torch
continue-on-error: true
timeout-minutes: 75
run: pytest autotest/tools/pipeline/test_pipeline_chat_pytorch.py -m 'not pr_test' --alluredir=allure-results
- name: Test lmdeploy - restful turbomind
continue-on-error: true
timeout-minutes: 60
run: pytest autotest/tools/restful/test_restful_chat_turbomind.py -m 'not pr_test' --alluredir=allure-results
- name: Test lmdeploy - restful torch
continue-on-error: true
timeout-minutes: 80
run: pytest autotest/tools/restful/test_restful_chat_pytorch.py -m 'not pr_test' --alluredir=allure-results
- name: Test lmdeploy - chat workspace
continue-on-error: true
timeout-minutes: 30
run: |
pytest autotest -m '(command_chat or command_chat_hf or command_chat_pytorch) and not Baichuan2_7B_Chat and not Baichuan2_13B_Chat' -n 4 --alluredir=allure-results
- name: Downgrade transformers
run: python3 -m pip install transformers==4.33.0
- name: Test lmdeploy - run Baichuan
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - chat hf turbomind
continue-on-error: true
timeout-minutes: 50
timeout-minutes: 45
run: |
pytest autotest -m '(Baichuan2_7B_Chat or Baichuan2_13B_Chat) and not pipeline_chat_pytorch' --alluredir=allure-results
- name: Test lmdeploy - rerun fail cases
pytest autotest/tools/chat/test_command_chat_hf_turbomind.py -m 'not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - chat hf torch
continue-on-error: true
timeout-minutes: 60
run: |
pytest autotest/tools/chat/test_command_chat_hf_pytorch.py -m 'not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - rerun all fail cases
timeout-minutes: 60
run: |
pytest autotest --alluredir=allure-results --lf
pytest autotest --lf --alluredir=allure-results
- name: Generate reports
if: always()
run: |
Expand Down
99 changes: 99 additions & 0 deletions .github/workflows/pr_ete_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
name: pr_ete_test

on:
pull_request:
paths:
- ".github/workflows/pr_ete_test.yml"
- "cmake/**"
- "src/**"
- "autotest/**"
- "3rdparty/**"
- "lmdeploy/**"
- "requirements/**"
- "requirements.txt"
- "CMakeLists.txt"
- "setup.py"
workflow_dispatch:


env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai


jobs:
pr_functions_test:
runs-on: [self-hosted, linux-a100-pr]
timeout-minutes: 120
env:
REPORT_DIR: /nvme/qa_test_models/test-reports
container:
image: nvcr.io/nvidia/tritonserver:22.12-py3
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip"
volumes:
- /nvme/share_data/github-actions/pip-cache:/root/.cache/pip
- /nvme/share_data/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Setup systems
run: |
rm /etc/apt/sources.list.d/cuda*.list
apt-get update && apt-get install -y --no-install-recommends rapidjson-dev \
libgoogle-glog-dev libgl1 openjdk-8-jre-headless
dpkg -i /root/packages/allure_2.24.1-1_all.deb
rm -rf /var/lib/apt/lists/*
- name: Clone repository
uses: actions/checkout@v2
- name: Install pytorch
run: |
python3 -m pip cache dir
python3 -m pip install torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cu118
- name: Build lmdeploy
run: |
python3 -m pip install cmake
python3 -m pip install -r requirements/build.txt
mkdir build
cd build
cmake .. \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
-DCMAKE_INSTALL_PREFIX=/opt/tritonserver \
-DBUILD_PY_FFI=ON \
-DBUILD_MULTI_GPU=ON \
-DCMAKE_CUDA_FLAGS="-lineinfo" \
-DUSE_NVTX=ON \
-DSM=80 \
-DCMAKE_CUDA_ARCHITECTURES=80 \
-DBUILD_TEST=OFF
make -j$(nproc) && make install
- name: Install lmdeploy
run: |
python3 -m pip install packaging protobuf transformers_stream_generator transformers datasets
# manually install flash attn
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.6/flash_attn-2.3.6+cu118torch2.0cxx11abiFALSE-cp38-cp38-linux_x86_64.whl
python3 -m pip install /root/packages/flash_attn-2.3.6+cu118torch2.1cxx11abiFALSE-cp38-cp38-linux_x86_64.whl
python3 -m pip install -r requirements.txt -r requirements/test.txt
python3 -m pip install .
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
- name: Test lmdeploy
timeout-minutes: 120
run: CUDA_VISIBLE_DEVICES=5,6 pytest autotest -m pr_test --alluredir=allure-results --clean-alluredir
- name: Generate reports
if: always()
run: |
export date_today="$(date +'%Y%m%d-%H%M%S')"
export report_dir="$REPORT_DIR/$date_today"
echo "Save report to $ALLURE_DIR"
allure generate -c -o $report_dir
- name: Clear workfile
if: always()
run: |
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
2 changes: 1 addition & 1 deletion .github/workflows/unit-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ jobs:
make -j$(nproc) && make install
- name: Install lmdeploy
run: |
python3 -m pip install pynvml packaging protobuf transformers_stream_generator transformers==4.33.0
python3 -m pip install pynvml packaging protobuf transformers_stream_generator
# manually install flash attn
python3 -m pip install /root/packages/flash_attn-2.3.6+cu118torch2.1cxx11abiFALSE-cp38-cp38-linux_x86_64.whl
python3 -m pip install -r requirements.txt -r requirements/test.txt
Expand Down
16 changes: 10 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
<div align="center">
<img src="docs/en/_static/image/lmdeploy-logo.svg" width="450"/>

[![docs](https://img.shields.io/badge/docs-latest-blue)](https://lmdeploy.readthedocs.io/en/latest/)
[![badge](https://github.com/InternLM/lmdeploy/workflows/lint/badge.svg)](https://github.com/InternLM/lmdeploy/actions)
[![PyPI](https://img.shields.io/pypi/v/lmdeploy)](https://pypi.org/project/lmdeploy)
![PyPI - Downloads](https://img.shields.io/pypi/dm/lmdeploy)
[![license](https://img.shields.io/github/license/InternLM/lmdeploy.svg)](https://github.com/InternLM/lmdeploy/tree/main/LICENSE)
[![issue resolution](https://img.shields.io/github/issues-closed-raw/InternLM/lmdeploy)](https://github.com/InternLM/lmdeploy/issues)
[![open issues](https://img.shields.io/github/issues-raw/InternLM/lmdeploy)](https://github.com/InternLM/lmdeploy/issues)

[📘Documentation](https://lmdeploy.readthedocs.io/en/latest/) |
[🛠️Quick Start](https://lmdeploy.readthedocs.io/en/latest/get_started.html) |
[🤔Reporting Issues](https://github.com/InternLM/lmdeploy/issues/new/choose)

English | [简体中文](README_zh-CN.md)

</div>
👋 join us on [![Static Badge](https://img.shields.io/badge/-grey?style=social&logo=wechat&label=WeChat)](https://r.vansin.top/?r=internwx)
[![Static Badge](https://img.shields.io/badge/-grey?style=social&logo=twitter&label=Twitter)](https://twitter.com/intern_lm)
[![Static Badge](https://img.shields.io/badge/-grey?style=social&logo=discord&label=Discord)](https://discord.gg/xa29JuW87d)

<p align="center">
👋 join us on <a href="https://twitter.com/intern_lm" target="_blank">Twitter</a>, <a href="https://discord.gg/xa29JuW87d" target="_blank">Discord</a> and <a href="https://r.vansin.top/?r=internwx" target="_blank">WeChat</a>
</p>
</div>

______________________________________________________________________

Expand All @@ -23,6 +26,7 @@ ______________________________________________________________________
<details open>
<summary><b>2024</b></summary>

- \[2024/02\] Support Qwen 1.5, Gemma, Mistral, Mixtral, Deepseek-MOE and so on.
- \[2024/01\] [OpenAOE](https://github.com/InternLM/OpenAOE) seamless integration with [LMDeploy Serving Service](./docs/en/serving/restful_api.md).
- \[2024/01\] Support for multi-model, multi-machine, multi-card inference services. For usage instructions, please refer to [here](./docs/en/serving/proxy_server.md)
- \[2024/01\] Support [PyTorch inference engine](./docs/en/inference/pytorch.md), developed entirely in Python, helping to lower the barriers for developers and enable rapid experimentation with new features and technologies.
Expand Down
16 changes: 10 additions & 6 deletions README_zh-CN.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
<div align="center">
<img src="docs/en/_static/image/lmdeploy-logo.svg" width="450"/>

[![docs](https://img.shields.io/badge/docs-latest-blue)](https://lmdeploy.readthedocs.io/zh-cn/latest/)
[![badge](https://github.com/InternLM/lmdeploy/workflows/lint/badge.svg)](https://github.com/InternLM/lmdeploy/actions)
[![PyPI](https://img.shields.io/pypi/v/lmdeploy)](https://pypi.org/project/lmdeploy)
![PyPI - Downloads](https://img.shields.io/pypi/dm/lmdeploy)
[![license](https://img.shields.io/github/license/InternLM/lmdeploy.svg)](https://github.com/InternLM/lmdeploy/tree/main/LICENSE)
[![issue resolution](https://img.shields.io/github/issues-closed-raw/InternLM/lmdeploy)](https://github.com/InternLM/lmdeploy/issues)
[![open issues](https://img.shields.io/github/issues-raw/InternLM/lmdeploy)](https://github.com/InternLM/lmdeploy/issues)

[📘Documentation](https://lmdeploy.readthedocs.io/zh-cn/latest/) |
[🛠️Quick Start](https://lmdeploy.readthedocs.io/zh-cn/latest/get_started.html) |
[🤔Reporting Issues](https://github.com/InternLM/lmdeploy/issues/new/choose)

[English](README.md) | 简体中文

</div>
👋 join us on [![Static Badge](https://img.shields.io/badge/-grey?style=social&logo=wechat&label=WeChat)](https://r.vansin.top/?r=internwx)
[![Static Badge](https://img.shields.io/badge/-grey?style=social&logo=twitter&label=Twitter)](https://twitter.com/intern_lm)
[![Static Badge](https://img.shields.io/badge/-grey?style=social&logo=discord&label=Discord)](https://discord.gg/xa29JuW87d)

<p align="center">
👋 join us on <a href="https://twitter.com/intern_lm" target="_blank">Twitter</a>, <a href="https://discord.gg/xa29JuW87d" target="_blank">Discord</a> and <a href="https://r.vansin.top/?r=internwx" target="_blank">WeChat</a>
</p>
</div>

______________________________________________________________________

Expand All @@ -23,6 +26,7 @@ ______________________________________________________________________
<details open>
<summary><b>2024</b></summary>

- \[2024/02\] 支持 Qwen 1.5、Gemma、Mistral、Mixtral、Deepseek-MOE 等模型
- \[2024/01\] [OpenAOE](https://github.com/InternLM/OpenAOE) 发布,支持无缝接入[LMDeploy Serving Service](./docs/zh_cn/serving/restful_api.md)
- \[2024/01\] 支持多模型、多机、多卡推理服务。使用方法请参考[此处](./docs/zh_cn/serving/proxy_server.md)
- \[2024/01\] 增加 [PyTorch 推理引擎](./docs/zh_cn/inference/pytorch.md),作为 TurboMind 引擎的补充。帮助降低开发门槛,和快速实验新特性、新技术
Expand Down
Loading

0 comments on commit 498ebb2

Please sign in to comment.