Skip to content

Commit

Permalink
custom tp and backend
Browse files Browse the repository at this point in the history
  • Loading branch information
AllentDan committed Dec 18, 2024
1 parent 08ae7fd commit b1e30ef
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 24 deletions.
25 changes: 13 additions & 12 deletions docs/en/llm/api_server.md
Original file line number Diff line number Diff line change
Expand Up @@ -259,11 +259,8 @@ Following are two steps to launch multiple api servers through torchrun. Just cr
```python
import os
import socket
from typing import List

from typing import List, Literal
import fire


def get_host_ip():
try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
Expand All @@ -272,22 +269,26 @@ def get_host_ip():
finally:
s.close()
return ip


def main(model_path: str,
tp: int=1,
proxy_url: str = 'http://0.0.0.0:8000',
port: int = 23333):
port: int = 23333,
backend: Literal['turbomind', 'pytorch']='turbomind'):
local_rank = int(os.environ.get('LOCAL_RANK', -1))
world_size = int(os.environ.get('WORLD_SIZE', -1))
local_ip = get_host_ip()
if isinstance(port, List):
assert len(port) == int(os.environ.get('WORLD_SIZE', -1))
assert len(port) == world_size
port = port[local_rank]
else:
port += local_rank * 10
command = f'CUDA_VISIBLE_DEVICES={local_rank} lmdeploy serve api_server {model_path} --server-name {local_ip} --server-port {port} --proxy-url {proxy_url}'
os.system(command)


if (world_size-local_rank)%tp==0:
rank_list = ','.join([str(local_rank+i) for i in range(tp)])
command = f'CUDA_VISIBLE_DEVICES={rank_list} lmdeploy serve api_server {model_path} '\
f'--server-name {local_ip} --server-port {port} --tp {tp} '\
f'--proxy-url {proxy_url} --backend {backend}'
print(f'running command: {command}')
os.system(command)
if __name__ == '__main__':
fire.Fire(main)
```
Expand Down
25 changes: 13 additions & 12 deletions docs/zh_cn/llm/api_server.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,8 @@ curl http://{server_ip}:{server_port}/v1/chat/interactive \
```python
import os
import socket
from typing import List

from typing import List, Literal
import fire


def get_host_ip():
try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
Expand All @@ -281,22 +278,26 @@ def get_host_ip():
finally:
s.close()
return ip


def main(model_path: str,
tp: int=1,
proxy_url: str = 'http://0.0.0.0:8000',
port: int = 23333):
port: int = 23333,
backend: Literal['turbomind', 'pytorch']='turbomind'):
local_rank = int(os.environ.get('LOCAL_RANK', -1))
world_size = int(os.environ.get('WORLD_SIZE', -1))
local_ip = get_host_ip()
if isinstance(port, List):
assert len(port) == int(os.environ.get('WORLD_SIZE', -1))
assert len(port) == world_size
port = port[local_rank]
else:
port += local_rank * 10
command = f'CUDA_VISIBLE_DEVICES={local_rank} lmdeploy serve api_server {model_path} --server-name {local_ip} --server-port {port} --proxy-url {proxy_url}'
os.system(command)


if (world_size-local_rank)%tp==0:
rank_list = ','.join([str(local_rank+i) for i in range(tp)])
command = f'CUDA_VISIBLE_DEVICES={rank_list} lmdeploy serve api_server {model_path} '\
f'--server-name {local_ip} --server-port {port} --tp {tp} '\
f'--proxy-url {proxy_url} --backend {backend}'
print(f'running command: {command}')
os.system(command)
if __name__ == '__main__':
fire.Fire(main)
```
Expand Down

0 comments on commit b1e30ef

Please sign in to comment.