From 93e1082a888b6d469defd3bbcfe00328cff4126d Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Mon, 15 Jul 2024 15:02:46 +0800 Subject: [PATCH] add chat template for codegeex4 (#2013) --- README.md | 1 + README_zh-CN.md | 1 + docs/en/supported_models/supported_models.md | 2 + .../supported_models/supported_models.md | 2 + lmdeploy/model.py | 39 +++++++++++++++++++ tests/test_lmdeploy/test_model.py | 26 +++++++++++++ 6 files changed, 71 insertions(+) diff --git a/README.md b/README.md index dfc5718ec..70655c6a0 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,7 @@ For detailed inference benchmarks in more devices and more settings, please refe
  • Code Llama (7B - 34B)
  • ChatGLM2 (6B)
  • GLM4 (9B)
  • +
  • CodeGeeX4 (9B)
  • Falcon (7B - 180B)
  • YI (6B-34B)
  • Mistral (7B)
  • diff --git a/README_zh-CN.md b/README_zh-CN.md index ef1f806a0..966506b14 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -122,6 +122,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力,在各种规模的模型
  • Code Llama (7B - 34B)
  • ChatGLM2 (6B)
  • GLM4 (9B)
  • +
  • CodeGeeX4 (9B)
  • Falcon (7B - 180B)
  • YI (6B-34B)
  • Mistral (7B)
  • diff --git a/docs/en/supported_models/supported_models.md b/docs/en/supported_models/supported_models.md index b30776339..cdda35a7c 100644 --- a/docs/en/supported_models/supported_models.md +++ b/docs/en/supported_models/supported_models.md @@ -27,6 +27,7 @@ | MiniCPM | Llama3-V-2_5 | Yes | Yes | Yes | Yes | | MiniGeminiLlama | 7B | Yes | No | No | Yes | | GLM4 | 9B | Yes | Yes | Yes | No | +| CodeGeeX4 | 9B | Yes | Yes | Yes | No | "-" means not verified yet. @@ -67,3 +68,4 @@ The TurboMind engine doesn't support window attention. Therefore, for models tha | InternVL-Chat(v1.5) | 2B-26B | Yes | No | No | | Gemma2 | 9B-27B | Yes | No | No | | GLM4 | 9B | Yes | No | No | +| CodeGeeX4 | 9B | Yes | No | No | diff --git a/docs/zh_cn/supported_models/supported_models.md b/docs/zh_cn/supported_models/supported_models.md index cca1ce8f3..840e8b66f 100644 --- a/docs/zh_cn/supported_models/supported_models.md +++ b/docs/zh_cn/supported_models/supported_models.md @@ -27,6 +27,7 @@ | MiniCPM | Llama3-V-2_5 | Yes | Yes | Yes | Yes | | MiniGeminiLlama | 7B | Yes | No | No | Yes | | GLM4 | 9B | Yes | Yes | Yes | No | +| CodeGeeX4 | 9B | Yes | Yes | Yes | No | “-” 表示还没有验证。 @@ -67,3 +68,4 @@ turbomind 引擎不支持 window attention。所以,对于应用了 window att | InternVL-Chat(v1.5) | 2B-26B | Yes | No | No | | Gemma2 | 9B-27B | Yes | No | No | | GLM4 | 9B | Yes | No | No | +| CodeGeeX4 | 9B | Yes | No | No | diff --git a/lmdeploy/model.py b/lmdeploy/model.py index 397092de0..626f3f5b1 100644 --- a/lmdeploy/model.py +++ b/lmdeploy/model.py @@ -1602,6 +1602,45 @@ def match(cls, model_path: str) -> Optional[str]: return 'glm4' +@MODELS.register_module(name='codegeex4') +class CodeGeeX4Chat(BaseChatTemplate): + """Chat template of THUDM/codegeex4-all-9b model.""" + + def __init__( + self, + system='<|system|>\n', + meta_instruction='你是一位智能编程助手,你叫CodeGeeX。你会为用户回答关于编程、代码、计算机方面的任何问题,并提供格式规范、可以执行、准确安全的代码,并在必要时提供详细的解释。', + eosys='', + user='<|user|>\n', + eoh='', + assistant='<|assistant|>\n', + eoa='', + separator='', + stop_words=['<|endoftext|>', '<|user|>', '<|observation|>'], + **kwargs): + super().__init__(system=system, + meta_instruction=meta_instruction, + eosys=eosys, + user=user, + eoh=eoh, + assistant=assistant, + eoa=eoa, + separator=separator, + stop_words=stop_words, + **kwargs) + + @classmethod + def match(cls, model_path: str) -> Optional[str]: + """Return the model_name that was registered to MODELS. + + Args: + model_path (str): the model path used for matching. + """ + path = model_path.lower() + if 'codegeex4' in path: + return 'codegeex4' + + @MODELS.register_module(name='internvl-phi3') class InternVLPhi3(Phi3Instruct): """Chat template of InternVL Chat 4B model.""" diff --git a/tests/test_lmdeploy/test_model.py b/tests/test_lmdeploy/test_model.py index e178c3926..f38b952e3 100644 --- a/tests/test_lmdeploy/test_model.py +++ b/tests/test_lmdeploy/test_model.py @@ -405,3 +405,29 @@ def test_internvl2(): 'assistant\nI am an AI<|im_end|>\n<|im_start|>assistant\n' res = model.messages2prompt(messages) assert res == expected + + +def test_codegeex4(): + model_path_and_name = 'THUDM/codegeex4-all-9b' + deduced_name = best_match_model(model_path_and_name) + assert deduced_name == 'codegeex4' + model = MODELS.get(deduced_name)() + messages = [{ + 'role': 'system', + 'content': 'you are a helpful assistant' + }, { + 'role': 'user', + 'content': 'who are you' + }, { + 'role': 'assistant', + 'content': 'I am an AI' + }, { + 'role': 'user', + 'content': 'AGI is?' + }] + from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained(model_path_and_name, + trust_remote_code=True) + ref = tokenizer.apply_chat_template(messages, tokenize=False) + res = model.messages2prompt(messages) + assert res.startswith(ref)