From 6eefb4d7d25879db42cefae8332ca9db88bff851 Mon Sep 17 00:00:00 2001 From: steven Date: Mon, 4 Nov 2024 10:35:03 +0800 Subject: [PATCH] support granite3 models --- README.md | 1 + README_zh.md | 1 + src/llamafactory/data/template.py | 12 +++++++++ src/llamafactory/extras/constants.py | 39 ++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+) diff --git a/README.md b/README.md index 8724520ba9..768242bfe0 100644 --- a/README.md +++ b/README.md @@ -181,6 +181,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | +| [Granite 3.0](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | | [Index](https://huggingface.co/IndexTeam) | 1.9B | index | | [InternLM2/InternLM2.5](https://huggingface.co/internlm) | 7B/20B | intern2 | | [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | diff --git a/README_zh.md b/README_zh.md index 88c3abb4ea..8bc59e17ae 100644 --- a/README_zh.md +++ b/README_zh.md @@ -182,6 +182,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | +| [Granite 3.0](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | | [Index](https://huggingface.co/IndexTeam) | 1.9B | index | | [InternLM2/InternLM2.5](https://huggingface.co/internlm) | 7B/20B | intern2 | | [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 89827f695b..ae4bad0b86 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -691,6 +691,18 @@ def get_template_and_fix_tokenizer(tokenizer: "PreTrainedTokenizer", data_args: ) +_register_template( + name="granite3", + format_user=StringFormatter(slots=["<|start_of_role|>user<|end_of_role|>{{content}}<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>"]), + format_system=StringFormatter(slots=["<|start_of_role|>system<|end_of_role|>{{content}}<|end_of_text|>\n"]), + format_assistant=StringFormatter(slots=["{{content}}<|end_of_text|>\n"]), + format_separator=EmptyFormatter(slots=["\n"]), + stop_words=["<|end_of_text|>"], + replace_eos=True, + replace_jinja_template=True, +) + + _register_template( name="index", format_user=StringFormatter(slots=["reserved_0{{content}}reserved_1"]), diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 8a3ed036ad..efc3ee4b5e 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -596,6 +596,45 @@ def register_model_group( ) +register_model_group( + models={ + "Granite-3.0-8B-Instruct": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.0-8b-instruct", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-8b-instruct", + }, + "Granite-3.0-8B-Base": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.0-8b-base", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-8b-base", + }, + "Granite-3.0-2B-Instruct": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.0-2b-instruct", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-2b-instruct", + }, + "Granite-3.0-2B-Base": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.0-2b-base", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-2b-base", + }, + "Granite-3.0-3B-A800M-Instruct": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.0-3b-a800m-instruct", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-3b-a800m-instruct", + }, + "Granite-3.0-3B-A800M-Base": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.0-3b-a800m-base", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-3b-a800m-base", + }, + "Granite-3.0-1B-A400M-Instruct": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.0-1b-a400m-instruct", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-1b-a400m-instruct", + }, + "Granite-3.0-1B-A400M-Base": { + DownloadSource.DEFAULT: "ibm-granite/granite-3.0-1b-a400m-base", + DownloadSource.MODELSCOPE: "AI-ModelScope/granite-3.0-1b-a400m-base", + }, + }, + template="granite3", +) + + register_model_group( models={ "Index-1.9B-Chat": {