From 08ae7fdd29c8f70e92c5ffe0b50ade3d5189e192 Mon Sep 17 00:00:00 2001
From: AllentDan <AllentDan@yeah.net>
Date: Wed, 11 Dec 2024 16:52:06 +0800
Subject: [PATCH] update with proxy

---
 docs/en/llm/api_server.md    | 26 +++++++++++++++++++++-----
 docs/zh_cn/llm/api_server.md | 25 +++++++++++++++++++++----
 2 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/docs/en/llm/api_server.md b/docs/en/llm/api_server.md
index 52a7007b9..2c281da5a 100644
--- a/docs/en/llm/api_server.md
+++ b/docs/en/llm/api_server.md
@@ -251,24 +251,40 @@ lmdeploy serve gradio api_server_url --server-name ${gradio_ui_ip} --server-port
 
 ## Launch multiple api servers
 
-Following is a possible way to launch multiple api servers through torchrun. Just create a python script with the following codes.
-Launch the script through `torchrun --nproc_per_node 2 script.py InternLM/internlm2-chat-1_8b`.
+Following are two steps to launch multiple api servers through torchrun. Just create a python script with the following codes.
+
+1. Launch the proxy server through `lmdeploy serve proxy`. Get the correct proxy server url.
+2. Launch the script through `torchrun --nproc_per_node 2 script.py InternLM/internlm2-chat-1_8b http://{proxy_node_name}:{proxy_node_port}`.**Note**: Please do not use `0.0.0.0:8000` here, instead, we input the real ip name, `11.25.34.55:8000` for example.
 
 ```python
+import os
+import socket
 from typing import List
+
 import fire
 
-import os
+
+def get_host_ip():
+    try:
+        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+        s.connect(('8.8.8.8', 80))
+        ip = s.getsockname()[0]
+    finally:
+        s.close()
+    return ip
+
 
 def main(model_path: str,
+         proxy_url: str = 'http://0.0.0.0:8000',
          port: int = 23333):
     local_rank = int(os.environ.get('LOCAL_RANK', -1))
+    local_ip = get_host_ip()
     if isinstance(port, List):
         assert len(port) == int(os.environ.get('WORLD_SIZE', -1))
         port = port[local_rank]
     else:
-        port += local_rank*10
-    command = f'CUDA_VISIBLE_DEVICES={local_rank} lmdeploy serve api_server {model_path} --server-port {port}'
+        port += local_rank * 10
+    command = f'CUDA_VISIBLE_DEVICES={local_rank} lmdeploy serve api_server {model_path} --server-name {local_ip} --server-port {port} --proxy-url {proxy_url}'
     os.system(command)
 
 
diff --git a/docs/zh_cn/llm/api_server.md b/docs/zh_cn/llm/api_server.md
index ee4ceec5b..2a415b576 100644
--- a/docs/zh_cn/llm/api_server.md
+++ b/docs/zh_cn/llm/api_server.md
@@ -260,23 +260,40 @@ curl http://{server_ip}:{server_port}/v1/chat/interactive \
 
 ## 同时启动多个 api_server
 
-下面是一个可以用 torchrun 启动的脚本。用下面的代码跑 torchrun： `torchrun --nproc_per_node 2 script.py InternLM/internlm2-chat-1_8b`.
+两步直接启动多机多卡服务。先用下面的代码创建一个启动脚本。然后：
+
+1. 启动代理服务 `lmdeploy serve proxy`。
+2. torchrun 启动脚本 `torchrun --nproc_per_node 2 script.py InternLM/internlm2-chat-1_8b http://{proxy_node_name}:{proxy_node_port}`. **注意**： 多级多卡不要用默认 url `0.0.0.0:8000`，我们需要输入真实ip对应的地址，如：`11.25.34.55:8000`。
 
 ```python
+import os
+import socket
 from typing import List
+
 import fire
 
-import os
+
+def get_host_ip():
+    try:
+        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+        s.connect(('8.8.8.8', 80))
+        ip = s.getsockname()[0]
+    finally:
+        s.close()
+    return ip
+
 
 def main(model_path: str,
+         proxy_url: str = 'http://0.0.0.0:8000',
          port: int = 23333):
     local_rank = int(os.environ.get('LOCAL_RANK', -1))
+    local_ip = get_host_ip()
     if isinstance(port, List):
         assert len(port) == int(os.environ.get('WORLD_SIZE', -1))
         port = port[local_rank]
     else:
-        port += local_rank*10
-    command = f'CUDA_VISIBLE_DEVICES={local_rank} lmdeploy serve api_server {model_path} --server-port {port}'
+        port += local_rank * 10
+    command = f'CUDA_VISIBLE_DEVICES={local_rank} lmdeploy serve api_server {model_path} --server-name {local_ip} --server-port {port} --proxy-url {proxy_url}'
     os.system(command)