InternLM · AllentDan · Aug 30, 2024 · Dec 11, 2024 · Dec 11, 2024 · Dec 18, 2024
diff --git a/docs/en/llm/api_server.md b/docs/en/llm/api_server.md
@@ -249,6 +249,33 @@ curl http://{server_ip}:{server_port}/v1/chat/interactive \
 lmdeploy serve gradio api_server_url --server-name ${gradio_ui_ip} --server-port ${gradio_ui_port}
 ```
 
+## Launch multiple api servers
+
+Following is a possible way to launch multiple api servers through torchrun. Just create a python script with the following codes.
+Launch the script through `torchrun --nproc_per_node 2 script.py InternLM/internlm2-chat-1_8b`.
+
+```python
+from typing import List
+import fire
+
+import os
+
+def main(model_path: str,
+         port: int = 23333):
+    local_rank = int(os.environ.get('LOCAL_RANK', -1))
+    if isinstance(port, List):
+        assert len(port) == int(os.environ.get('WORLD_SIZE', -1))
+        port = port[local_rank]
+    else:
+        port += local_rank*10
+    command = f'CUDA_VISIBLE_DEVICES={local_rank} lmdeploy serve api_server {model_path} --server-port {port}'
+    os.system(command)
+
+
+if __name__ == '__main__':
+    fire.Fire(main)
+```
+
 ## FAQ
 
 1. When user got `"finish_reason":"length"`, it means the session is too long to be continued. The session length can be

diff --git a/docs/zh_cn/llm/api_server.md b/docs/zh_cn/llm/api_server.md
@@ -258,6 +258,32 @@ curl http://{server_ip}:{server_port}/v1/chat/interactive \
   }'
 ```
 
+## 同时启动多个 api_server
+
+下面是一个可以用 torchrun 启动的脚本。用下面的代码跑 torchrun： `torchrun --nproc_per_node 2 script.py InternLM/internlm2-chat-1_8b`.
+
+```python
+from typing import List
+import fire
+
+import os
+
+def main(model_path: str,
+         port: int = 23333):
+    local_rank = int(os.environ.get('LOCAL_RANK', -1))
+    if isinstance(port, List):
+        assert len(port) == int(os.environ.get('WORLD_SIZE', -1))
+        port = port[local_rank]
+    else:
+        port += local_rank*10
+    command = f'CUDA_VISIBLE_DEVICES={local_rank} lmdeploy serve api_server {model_path} --server-port {port}'
+    os.system(command)
+
+
+if __name__ == '__main__':
+    fire.Fire(main)
+```
+
 ## 接入 WebUI
 
 LMDeploy 提供 gradio 和 [OpenAOE](https://github.com/InternLM/OpenAOE) 两种方式，为 api_server 接入 WebUI。

diff --git a/lmdeploy/serve/openai/api_server.py b/lmdeploy/serve/openai/api_server.py
@@ -910,6 +910,20 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
         return JSONResponse(ret)
 
 
+def handle_torchrun():
+    """To disable mmengine logging logic when using torchrun."""
+
+    def dummy_get_device_id():
+        return 0
+
+    if int(os.environ.get('LOCAL_RANK', -1)) > 0:
+        from lmdeploy.vl.model.utils import _set_func
+
+        # the replacement can't be recovered
+        _set_func('mmengine.logging.logger._get_device_id',
+                  dummy_get_device_id)
+
+
 def serve(model_path: str,
           model_name: Optional[str] = None,
           backend: Literal['turbomind', 'pytorch'] = 'turbomind',
@@ -986,8 +1000,8 @@ def serve(model_path: str,
         ssl_certfile = os.environ['SSL_CERTFILE']
         http_or_https = 'https'
 
+    handle_torchrun()
     _, pipeline_class = get_task(model_path)
-
     VariableInterface.async_engine = pipeline_class(
         model_path=model_path,
         model_name=model_name,