Skip to content

Commit

Permalink
Add tp hint for deployment (#555)
Browse files Browse the repository at this point in the history
* add tp hint for deploy

* fix lint

* assert tp in turbomind

* fix lint
  • Loading branch information
irexyc authored Oct 13, 2023
1 parent 6904053 commit 77a2681
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
4 changes: 3 additions & 1 deletion lmdeploy/serve/turbomind/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,7 @@ def main(model_name: str,
META's llama format, and 'hf' means huggingface format
tokenizer_path (str): the path of tokenizer model
dst_path (str): the destination path that saves outputs
tp (int): the number of GPUs used for tensor parallelism
tp (int): the number of GPUs used for tensor parallelism, should be 2^n
quant_path (str): path of the quantized model, which can be None
group_size (int): a parameter used in AWQ to quantize fp16 weights
to 4 bits
Expand All @@ -981,6 +981,8 @@ def main(model_name: str,
f"'{model_name}' is not supported. " \
f'The supported models are: {MODELS.module_dict.keys()}'

assert ((tp & (tp - 1) == 0) and tp != 0), 'tp should be 2^n'

if model_format is None:
model_format = 'qwen' if model_name == 'qwen-7b' else 'hf'

Expand Down
1 change: 1 addition & 0 deletions lmdeploy/turbomind/turbomind.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def __init__(self, model_path: str, eos_id: int = 2, tp: int = 1):
node_num = 1

# read meta from model path
assert ((tp & (tp - 1) == 0) and tp != 0), 'tp should be 2^n'
self.gpu_count = tp
self.session_len = 2048
data_type = 'fp16'
Expand Down

0 comments on commit 77a2681

Please sign in to comment.