From 8996a0421a0197b680563cc32c055e520c797b9c Mon Sep 17 00:00:00 2001
From: RunningLeon <mnsheng@yeah.net>
Date: Wed, 18 Oct 2023 14:31:01 +0800
Subject: [PATCH 01/11] update reqs

---
 requirements.txt          | 20 +++-----------------
 requirements/build.txt    |  2 ++
 requirements/optional.txt |  5 +++++
 requirements/runtime.txt  |  9 +++++++++
 requirements/test.txt     |  2 ++
 setup.py                  |  8 +++++++-
 6 files changed, 28 insertions(+), 18 deletions(-)
 create mode 100644 requirements/build.txt
 create mode 100644 requirements/optional.txt
 create mode 100644 requirements/runtime.txt
 create mode 100644 requirements/test.txt

diff --git a/requirements.txt b/requirements.txt
index 861623c040..176d0edefa 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,17 +1,3 @@
-accelerate
-datasets
-fastapi
-fire
-gradio
-mmengine
-numpy
-pybind11
-safetensors
-sentencepiece
-setuptools
-shortuuid
-tiktoken
-torch
-transformers>=4.33.0
-tritonclient[all]
-uvicorn
+-r requirements/build.txt
+-r requirements/runtime.txt
+-r requirements/optional.txt
diff --git a/requirements/build.txt b/requirements/build.txt
new file mode 100644
index 0000000000..b4430ae374
--- /dev/null
+++ b/requirements/build.txt
@@ -0,0 +1,2 @@
+pybind11
+setuptools
diff --git a/requirements/optional.txt b/requirements/optional.txt
new file mode 100644
index 0000000000..e4d306d96d
--- /dev/null
+++ b/requirements/optional.txt
@@ -0,0 +1,5 @@
+accelerate
+datasets
+fastapi
+shortuuid
+uvicorn
diff --git a/requirements/runtime.txt b/requirements/runtime.txt
new file mode 100644
index 0000000000..765cf9d7dc
--- /dev/null
+++ b/requirements/runtime.txt
@@ -0,0 +1,9 @@
+fire
+gradio
+mmengine
+numpy
+safetensors
+sentencepiece
+tiktoken
+torch
+transformers>=4.33.0
diff --git a/requirements/test.txt b/requirements/test.txt
new file mode 100644
index 0000000000..7093b61a35
--- /dev/null
+++ b/requirements/test.txt
@@ -0,0 +1,2 @@
+coverage
+pytest
diff --git a/setup.py b/setup.py
index df36118c23..2f384548ca 100644
--- a/setup.py
+++ b/setup.py
@@ -134,7 +134,13 @@ def gen_packages_items():
             'lmdeploy': lmdeploy_package_data,
         },
         include_package_data=True,
-        install_requires=parse_requirements('requirements.txt'),
+        setup_requires=parse_requirements('requirements/build.txt'),
+        tests_require=parse_requirements('requirements/test.txt'),
+        install_requires=parse_requirements('requirements/runtime.txt'),
+        extras_require={
+            'all': parse_requirements('requirements.txt'),
+            'optional': parse_requirements('requirements/optional.txt'),
+        },
         has_ext_modules=check_ext_modules,
         classifiers=[
             'Programming Language :: Python :: 3.8',

From 8cd2c5c573f6d04e8e66c99c5976b053345d1c19 Mon Sep 17 00:00:00 2001
From: RunningLeon <mnsheng@yeah.net>
Date: Wed, 18 Oct 2023 15:14:33 +0800
Subject: [PATCH 02/11] update docs

---
 README.md                                | 3 ++-
 README_zh-CN.md                          | 3 ++-
 docs/en/faq.md                           | 4 ++--
 docs/en/supported_models/codellama.md    | 2 +-
 docs/en/w4a16.md                         | 2 +-
 docs/zh_cn/faq.md                        | 4 ++--
 docs/zh_cn/supported_models/codellama.md | 2 +-
 docs/zh_cn/w4a16.md                      | 2 +-
 requirements.txt                         | 3 ++-
 requirements/lite.txt                    | 2 ++
 requirements/{optional.txt => serve.txt} | 2 --
 11 files changed, 16 insertions(+), 13 deletions(-)
 create mode 100644 requirements/lite.txt
 rename requirements/{optional.txt => serve.txt} (56%)

diff --git a/README.md b/README.md
index c65cff7e5f..24ef5fc0e3 100644
--- a/README.md
+++ b/README.md
@@ -100,7 +100,7 @@ And the request throughput of TurboMind is 30% higher than vLLM.
 Install lmdeploy with pip ( python 3.8+) or [from source](./docs/en/build.md)
 
 ```shell
-pip install lmdeploy
+pip install lmdeploy[all]
 ```
 
 ### Deploy InternLM
@@ -182,6 +182,7 @@ bash workspace/service_docker_up.sh
 Then, you can communicate with the inference server by command line,
 
 ```shell
+python3 -m pip install tritonclient[grpc]
 lmdeploy serve triton_client {server_ip_addresss}:33337
 ```
 
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 84f860ef3d..65cfb47767 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -101,7 +101,7 @@ TurboMind 的 output token throughput 超过 2000 token/s, 整体比 DeepSpeed 
 使用 pip ( python 3.8+) 安装 LMDeploy，或者[源码安装](./docs/zh_cn/build.md)
 
 ```shell
-pip install lmdeploy
+pip install lmdeploy[all]
 ```
 
 ### 部署 InternLM
@@ -182,6 +182,7 @@ bash workspace/service_docker_up.sh
 你可以通过命令行方式与推理服务进行对话：
 
 ```shell
+python3 -m pip install tritonclient[grpc]
 lmdeploy serve triton_client {server_ip_addresss}:33337
 ```
 
diff --git a/docs/en/faq.md b/docs/en/faq.md
index 636da5c947..6f109997cc 100644
--- a/docs/en/faq.md
+++ b/docs/en/faq.md
@@ -17,7 +17,7 @@ It may have been caused by the following reasons.
 1. You haven't installed lmdeploy's precompiled package. `_turbomind` is the pybind package of c++ turbomind, which involves compilation. It is recommended that you install the precompiled one.
 
 ```shell
-pip install lmdeploy
+pip install lmdeploy[all]
 ```
 
 2. If you have installed it and still encounter this issue, it is probably because you are executing turbomind-related command in the root directory of lmdeploy source code. Switching to another directory will fix it
@@ -26,7 +26,7 @@ pip install lmdeploy
 
 ### libnccl.so.2 not found
 
-Make sure you have install lmdeploy (>=v0.0.5) through `pip install lmdeploy`.
+Make sure you have install lmdeploy (>=v0.0.5) through `pip install lmdeploy[all]`.
 
 If the issue still exists after lmdeploy installation, add the path of `libnccl.so.2` to environment variable LD_LIBRARY_PATH.
 
diff --git a/docs/en/supported_models/codellama.md b/docs/en/supported_models/codellama.md
index 886dc5922c..72a1644eba 100644
--- a/docs/en/supported_models/codellama.md
+++ b/docs/en/supported_models/codellama.md
@@ -26,7 +26,7 @@ Based on the above table, download the model that meets your requirements. Execu
 
 ```shell
 # install lmdeploy
-python3 -m pip install lmdeploy
+python3 -m pip install lmdeploy[all]
 
 # convert weight layout
 lmdeploy convert codellama /the/path/of/codellama/model
diff --git a/docs/en/w4a16.md b/docs/en/w4a16.md
index dc70c23c2c..7b44ece9e1 100644
--- a/docs/en/w4a16.md
+++ b/docs/en/w4a16.md
@@ -5,7 +5,7 @@ LMDeploy supports LLM model inference of 4-bit weight, with the minimum requirem
 Before proceeding with the inference, please ensure that lmdeploy is installed.
 
 ```shell
-pip install lmdeploy
+pip install lmdeploy[all]
 ```
 
 ## 4-bit LLM model Inference
diff --git a/docs/zh_cn/faq.md b/docs/zh_cn/faq.md
index 5f3bf0b117..c86bfc1841 100644
--- a/docs/zh_cn/faq.md
+++ b/docs/zh_cn/faq.md
@@ -17,7 +17,7 @@ pip install --upgrade mmengine
 1. 您没有安装 lmdeploy 的预编译包。`_turbomind`是 turbomind c++ 的 pybind部分，涉及到编译。推荐您直接安装预编译包。
 
 ```
-pip install lmdeploy
+pip install lmdeploy[all]
 ```
 
 2. 如果已经安装了，还是出现这个问题，请检查下执行目录。不要在 lmdeploy 的源码根目录下执行 python -m lmdeploy.turbomind.\*下的package，换到其他目录下执行。
@@ -26,7 +26,7 @@ pip install lmdeploy
 
 ### libnccl.so.2 not found
 
-确保通过 `pip install lmdeploy` 安装了 lmdeploy (>=v0.0.5)。
+确保通过 `pip install lmdeploy[all]` 安装了 lmdeploy (>=v0.0.5)。
 
 如果安装之后，问题还存在，那么就把`libnccl.so.2`的路径加入到环境变量 LD_LIBRARY_PATH 中。
 
diff --git a/docs/zh_cn/supported_models/codellama.md b/docs/zh_cn/supported_models/codellama.md
index a2abd2f4a9..47db94b62c 100644
--- a/docs/zh_cn/supported_models/codellama.md
+++ b/docs/zh_cn/supported_models/codellama.md
@@ -26,7 +26,7 @@
 
 ```shell
 # 安装 lmdeploy
-python3 -m pip install lmdeploy
+python3 -m pip install lmdeploy[all]
 
 # 转模型格式
 lmdeploy convert codellama /path/of/codellama/model
diff --git a/docs/zh_cn/w4a16.md b/docs/zh_cn/w4a16.md
index 46f5c58a91..259847da16 100644
--- a/docs/zh_cn/w4a16.md
+++ b/docs/zh_cn/w4a16.md
@@ -5,7 +5,7 @@ LMDeploy 支持 4bit 权重模型的推理，**对 NVIDIA 显卡的最低要求
 在推理之前，请确保安装了 lmdeploy
 
 ```shell
-pip install lmdeploy
+pip install lmdeploy[all]
 ```
 
 ## 4bit 权重模型推理
diff --git a/requirements.txt b/requirements.txt
index 176d0edefa..91d38808f1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 -r requirements/build.txt
 -r requirements/runtime.txt
--r requirements/optional.txt
+-r requirements/lite.txt
+-r requirements/serve.txt
diff --git a/requirements/lite.txt b/requirements/lite.txt
new file mode 100644
index 0000000000..1c2319c73d
--- /dev/null
+++ b/requirements/lite.txt
@@ -0,0 +1,2 @@
+accelerate
+datasets
diff --git a/requirements/optional.txt b/requirements/serve.txt
similarity index 56%
rename from requirements/optional.txt
rename to requirements/serve.txt
index e4d306d96d..6a58dfaba5 100644
--- a/requirements/optional.txt
+++ b/requirements/serve.txt
@@ -1,5 +1,3 @@
-accelerate
-datasets
 fastapi
 shortuuid
 uvicorn

From 0e35d07de611aac1dbe466b653f89727b366a1b6 Mon Sep 17 00:00:00 2001
From: RunningLeon <mnsheng@yeah.net>
Date: Wed, 25 Oct 2023 14:19:56 +0800
Subject: [PATCH 03/11] resolve comments

---
 requirements/runtime.txt | 1 -
 requirements/serve.txt   | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/runtime.txt b/requirements/runtime.txt
index 765cf9d7dc..c65dceae70 100644
--- a/requirements/runtime.txt
+++ b/requirements/runtime.txt
@@ -1,5 +1,4 @@
 fire
-gradio
 mmengine
 numpy
 safetensors
diff --git a/requirements/serve.txt b/requirements/serve.txt
index 6a58dfaba5..71bf5f4469 100644
--- a/requirements/serve.txt
+++ b/requirements/serve.txt
@@ -1,3 +1,4 @@
 fastapi
+gradio
 shortuuid
 uvicorn

From b8267bbbbce7bb360211f5b572091922a7f914ac Mon Sep 17 00:00:00 2001
From: RunningLeon <mnsheng@yeah.net>
Date: Wed, 25 Oct 2023 14:46:41 +0800
Subject: [PATCH 04/11] upgrade pydantic

---
 requirements/serve.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements/serve.txt b/requirements/serve.txt
index 71bf5f4469..6f1ce9fb42 100644
--- a/requirements/serve.txt
+++ b/requirements/serve.txt
@@ -1,4 +1,5 @@
 fastapi
 gradio
+pydantic>2.0.0
 shortuuid
 uvicorn

From 080d7ad1d5e0f33fd0bc7f45d2e3a8b322ede9e2 Mon Sep 17 00:00:00 2001
From: RunningLeon <mnsheng@yeah.net>
Date: Wed, 25 Oct 2023 16:25:31 +0800
Subject: [PATCH 05/11] fix rebase

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 2f384548ca..ff009b735f 100644
--- a/setup.py
+++ b/setup.py
@@ -139,7 +139,8 @@ def gen_packages_items():
         install_requires=parse_requirements('requirements/runtime.txt'),
         extras_require={
             'all': parse_requirements('requirements.txt'),
-            'optional': parse_requirements('requirements/optional.txt'),
+            'lite': parse_requirements('requirements/lite.txt'),
+            'serve': parse_requirements('requirements/serve.txt')
         },
         has_ext_modules=check_ext_modules,
         classifiers=[

From f615bbc4bd0ba144fa7501eea082c85591cad8e5 Mon Sep 17 00:00:00 2001
From: RunningLeon <mnsheng@yeah.net>
Date: Thu, 26 Oct 2023 14:25:24 +0800
Subject: [PATCH 06/11] update doc

---
 README.md       | 8 ++++++++
 README_zh-CN.md | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/README.md b/README.md
index 24ef5fc0e3..2978724064 100644
--- a/README.md
+++ b/README.md
@@ -103,6 +103,14 @@ Install lmdeploy with pip ( python 3.8+) or [from source](./docs/en/build.md)
 pip install lmdeploy[all]
 ```
 
+```{note}
+Users can install lmdeploy with extra required packages, eg: pip install lmdeploy[all]
+
+- all: Install lmdeploy with dependencies in requirements.txt
+- lite: Install lmdeploy with dependencies in requirements/runtime.txt and requirements/lite.txt
+- serve: Install lmdeploy with dependencies in requirements/runtime.txt and requirements/serve.txt
+```
+
 ### Deploy InternLM
 
 #### Get InternLM model
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 65cfb47767..e163cf9793 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -104,6 +104,14 @@ TurboMind 的 output token throughput 超过 2000 token/s, 整体比 DeepSpeed 
 pip install lmdeploy[all]
 ```
 
+```{note}
+用户在安装lmdeplou时可指定需要安装的额外依赖包，例如: pip install lmdeploy[all]会安装所有依赖包。
+
+- all: 安装lmdeploy时，安装在 requirements.txt 中的所有依赖包。
+- lite: 安装lmdeploy时，安装在 requirements/runtime.txt 和 requirements/lite.txt 中的依赖包。
+- serve: 安装lmdeploy时，安装在 requirements/runtime.txt 和 requirements/lite.txt 中的依赖包。
+```
+
 ### 部署 InternLM
 
 #### 获取 InternLM 模型

From 94309e6dc44879a0df5087e1aa6405589d4a5900 Mon Sep 17 00:00:00 2001
From: RunningLeon <mnsheng@yeah.net>
Date: Thu, 26 Oct 2023 14:36:03 +0800
Subject: [PATCH 07/11] update

---
 README.md       | 13 ++++++-------
 README_zh-CN.md | 13 ++++++-------
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 2978724064..76cfbfe7ff 100644
--- a/README.md
+++ b/README.md
@@ -103,13 +103,12 @@ Install lmdeploy with pip ( python 3.8+) or [from source](./docs/en/build.md)
 pip install lmdeploy[all]
 ```
 
-```{note}
-Users can install lmdeploy with extra required packages, eg: pip install lmdeploy[all]
-
-- all: Install lmdeploy with dependencies in requirements.txt
-- lite: Install lmdeploy with dependencies in requirements/runtime.txt and requirements/lite.txt
-- serve: Install lmdeploy with dependencies in requirements/runtime.txt and requirements/serve.txt
-```
+> **Note**<br />
+> Users can install lmdeploy with extra required packages, eg: `pip install lmdeploy[all]`
+>
+> - `all`: Install lmdeploy with dependencies in `requirements.txt`
+> - `lite`: Install lmdeploy with dependencies in `requirements/runtime.txt` and `requirements/lite.txt`
+> - `serve`: Install lmdeploy with dependencies in `requirements/runtime.txt` and `requirements/serve.txt`
 
 ### Deploy InternLM
 
diff --git a/README_zh-CN.md b/README_zh-CN.md
index e163cf9793..33eaf7e01f 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -104,13 +104,12 @@ TurboMind 的 output token throughput 超过 2000 token/s, 整体比 DeepSpeed 
 pip install lmdeploy[all]
 ```
 
-```{note}
-用户在安装lmdeplou时可指定需要安装的额外依赖包，例如: pip install lmdeploy[all]会安装所有依赖包。
-
-- all: 安装lmdeploy时，安装在 requirements.txt 中的所有依赖包。
-- lite: 安装lmdeploy时，安装在 requirements/runtime.txt 和 requirements/lite.txt 中的依赖包。
-- serve: 安装lmdeploy时，安装在 requirements/runtime.txt 和 requirements/lite.txt 中的依赖包。
-```
+> **Note**<br />
+> 用户在安装lmdeplou时可指定需要安装的额外依赖包，例如: `pip install lmdeploy[all]` 会安装所有依赖包。
+>
+> - `all`: 安装lmdeploy时，安装在 `requirements.txt` 中的所有依赖包。
+> - `lite`: 安装lmdeploy时，安装在 `requirements/runtime.txt` 和 `requirements/lite.txt` 中的依赖包。
+> - `serve`: 安装lmdeploy时，安装在 `requirements/runtime.txt` 和 `requirements/lite.txt` 中的依赖包。
 
 ### 部署 InternLM
 

From 1d0ce881296ca7cbba547e129fa69d7bf7940982 Mon Sep 17 00:00:00 2001
From: RunningLeon <mnsheng@yeah.net>
Date: Thu, 26 Oct 2023 14:36:55 +0800
Subject: [PATCH 08/11] update

---
 README_zh-CN.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README_zh-CN.md b/README_zh-CN.md
index 33eaf7e01f..b0a0b6602b 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -105,11 +105,11 @@ pip install lmdeploy[all]
 ```
 
 > **Note**<br />
-> 用户在安装lmdeplou时可指定需要安装的额外依赖包，例如: `pip install lmdeploy[all]` 会安装所有依赖包。
+> 用户在安装lmdeplou时可指定需要安装的额外依赖包，例如: `pip install lmdeploy[all]` 会安装所有依赖包
 >
-> - `all`: 安装lmdeploy时，安装在 `requirements.txt` 中的所有依赖包。
-> - `lite`: 安装lmdeploy时，安装在 `requirements/runtime.txt` 和 `requirements/lite.txt` 中的依赖包。
-> - `serve`: 安装lmdeploy时，安装在 `requirements/runtime.txt` 和 `requirements/lite.txt` 中的依赖包。
+> - `all`: 安装lmdeploy时，安装在 `requirements.txt` 中的所有依赖包
+> - `lite`: 安装lmdeploy时，安装在 `requirements/runtime.txt` 和 `requirements/lite.txt` 中的依赖包
+> - `serve`: 安装lmdeploy时，安装在 `requirements/runtime.txt` 和 `requirements/lite.txt` 中的依赖包
 
 ### 部署 InternLM
 

From 25c467bd226591a084436d0b4c89e71e2a0efc20 Mon Sep 17 00:00:00 2001
From: RunningLeon <mnsheng@yeah.net>
Date: Thu, 26 Oct 2023 14:58:53 +0800
Subject: [PATCH 09/11] update readme

---
 README.md       | 9 +++++----
 README_zh-CN.md | 8 ++++----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 76cfbfe7ff..034f5754bd 100644
--- a/README.md
+++ b/README.md
@@ -104,11 +104,12 @@ pip install lmdeploy[all]
 ```
 
 > **Note**<br />
-> Users can install lmdeploy with extra required packages, eg: `pip install lmdeploy[all]`
+> `pip install lmdeploy` can only install the runtime required packages. If users want to run codes from modules like `lmdeploy.lite` and `lmdeploy.serve`, they need to install the extra required packages.
+> For instance, running `pip install lmdeploy[lite]` would install extra dependencies for `lmdeploy.lite` module.
 >
-> - `all`: Install lmdeploy with dependencies in `requirements.txt`
-> - `lite`: Install lmdeploy with dependencies in `requirements/runtime.txt` and `requirements/lite.txt`
-> - `serve`: Install lmdeploy with dependencies in `requirements/runtime.txt` and `requirements/serve.txt`
+> - `all`: Install lmdeploy with all dependencies in `requirements.txt`
+> - `lite`: Install lmdeploy with extra dependencies in `requirements/lite.txt`
+> - `serve`: Install lmdeploy with dependencies in `requirements/serve.txt`
 
 ### Deploy InternLM
 
diff --git a/README_zh-CN.md b/README_zh-CN.md
index b0a0b6602b..0217da795d 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -105,11 +105,11 @@ pip install lmdeploy[all]
 ```
 
 > **Note**<br />
-> 用户在安装lmdeplou时可指定需要安装的额外依赖包，例如: `pip install lmdeploy[all]` 会安装所有依赖包
+> `pip install lmdeploy`默认安装runtime依赖包，使用lmdeploy的lite和serve功能时，用户需要安装额外依赖包。例如: `pip install lmdeploy[lite]` 会额外安装`lmdeploy.lite`模块的依赖包
 >
-> - `all`: 安装lmdeploy时，安装在 `requirements.txt` 中的所有依赖包
-> - `lite`: 安装lmdeploy时，安装在 `requirements/runtime.txt` 和 `requirements/lite.txt` 中的依赖包
-> - `serve`: 安装lmdeploy时，安装在 `requirements/runtime.txt` 和 `requirements/lite.txt` 中的依赖包
+> - `all`: 安装`lmdeploy`所有依赖包，具体可查看`requirements.txt`
+> - `lite`: 额外安装`lmdeploy.lite`模块的依赖包，具体可查看`requirements/lite.txt`
+> - `serve`: 额外安装`lmdeploy.serve`模块的依赖包，具体可查看`requirements/serve.txt`
 
 ### 部署 InternLM
 

From 88e648c759084756db742b9ff2510456a78c8b76 Mon Sep 17 00:00:00 2001
From: RunningLeon <mnsheng@yeah.net>
Date: Tue, 7 Nov 2023 08:58:40 +0800
Subject: [PATCH 10/11] update

---
 README.md       | 8 +++++++-
 README_zh-CN.md | 8 +++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 189c5a9d55..b3ff5231bb 100644
--- a/README.md
+++ b/README.md
@@ -100,7 +100,7 @@ And the request throughput of TurboMind is 30% higher than vLLM.
 Install lmdeploy with pip ( python 3.8+) or [from source](./docs/en/build.md)
 
 ```shell
-pip install lmdeploy[all]
+pip install lmdeploy
 ```
 
 > **Note**<br />
@@ -148,6 +148,9 @@ lmdeploy chat turbomind ./workspace
 #### Serving with gradio
 
 ```shell
+# install lmdeploy with extra dependencies
+pip install lmdeploy[serve]
+
 lmdeploy serve gradio ./workspace
 ```
 
@@ -158,6 +161,9 @@ lmdeploy serve gradio ./workspace
 Launch inference server by:
 
 ```shell
+# install lmdeploy with extra dependencies
+pip install lmdeploy[serve]
+
 lmdeploy serve api_server ./workspace --instance_num 32 --tp 1
 ```
 
diff --git a/README_zh-CN.md b/README_zh-CN.md
index d3f0177fef..7ea119aadd 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -101,7 +101,7 @@ TurboMind 的 output token throughput 超过 2000 token/s, 整体比 DeepSpeed 
 使用 pip ( python 3.8+) 安装 LMDeploy，或者[源码安装](./docs/zh_cn/build.md)
 
 ```shell
-pip install lmdeploy[all]
+pip install lmdeploy
 ```
 
 > **Note**<br />
@@ -147,6 +147,9 @@ lmdeploy chat turbomind ./workspace
 #### 启动 gradio server
 
 ```shell
+# 安装lmdeploy额外依赖
+pip install lmdeploy[serve]
+
 lmdeploy serve gradio ./workspace
 ```
 
@@ -157,6 +160,9 @@ lmdeploy serve gradio ./workspace
 使用下面的命令启动推理服务：
 
 ```shell
+# 安装lmdeploy额外依赖
+pip install lmdeploy[serve]
+
 lmdeploy serve api_server ./workspace --server_name 0.0.0.0 --server_port ${server_port} --instance_num 32 --tp 1
 ```
 

From c4a701201e17789a93c439c6a8528d5c2a4d9ee4 Mon Sep 17 00:00:00 2001
From: RunningLeon <mnsheng@yeah.net>
Date: Thu, 9 Nov 2023 09:30:49 +0800
Subject: [PATCH 11/11] add flash-attn

---
 requirements/lite.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements/lite.txt b/requirements/lite.txt
index 1c2319c73d..bd10933103 100644
--- a/requirements/lite.txt
+++ b/requirements/lite.txt
@@ -1,2 +1,3 @@
 accelerate
 datasets
+flash-attn