Add tp hint for deployment (#555)

* add tp hint for deploy * fix lint * assert tp in turbomind * fix lint
InternLM · Oct 13, 2023 · 77a2681 · 77a2681
1 parent 6904053
commit 77a2681
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 1 deletion.
diff --git a/lmdeploy/serve/turbomind/deploy.py b/lmdeploy/serve/turbomind/deploy.py
@@ -972,7 +972,7 @@ def main(model_name: str,
             META's llama format, and 'hf' means huggingface format
         tokenizer_path (str): the path of tokenizer model
         dst_path (str): the destination path that saves outputs
-        tp (int): the number of GPUs used for tensor parallelism
+        tp (int): the number of GPUs used for tensor parallelism, should be 2^n
         quant_path (str): path of the quantized model, which can be None
         group_size (int): a parameter used in AWQ to quantize fp16 weights
             to 4 bits
@@ -981,6 +981,8 @@ def main(model_name: str,
         f"'{model_name}' is not supported. " \
         f'The supported models are: {MODELS.module_dict.keys()}'
 
+    assert ((tp & (tp - 1) == 0) and tp != 0), 'tp should be 2^n'
+
     if model_format is None:
         model_format = 'qwen' if model_name == 'qwen-7b' else 'hf'
 

diff --git a/lmdeploy/turbomind/turbomind.py b/lmdeploy/turbomind/turbomind.py
@@ -86,6 +86,7 @@ def __init__(self, model_path: str, eos_id: int = 2, tp: int = 1):
         node_num = 1
 
         # read meta from model path
+        assert ((tp & (tp - 1) == 0) and tp != 0), 'tp should be 2^n'
         self.gpu_count = tp
         self.session_len = 2048
         data_type = 'fp16'