Add CodeLlama-70b-Instruct-hf (#752)

* Add CodeLlama-70b-Instruct-hf * add comment to reduce * Added missing newlines to prompt format for codellama 70b * remove extra space * stop tokens * Remove source newline * fix preprompt * fix prompt one last time * add news * shorter text * fix link & remove old tokens --------- Co-authored-by: Mishig Davaadorj <[email protected]>
huggingface · Jan 30, 2024 · a8ca669 · a8ca669
1 parent f730778
commit a8ca669
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 16 deletions.
diff --git a/.env b/.env
@@ -99,9 +99,9 @@ PUBLIC_SHARE_PREFIX=#https://hf.co/chat
 PUBLIC_GOOGLE_ANALYTICS_ID=#G-XXXXXXXX / Leave empty to disable
 PUBLIC_ANNOUNCEMENT_BANNERS=`[
     {
-    "title": "Llama v2 is live on HuggingChat! 🦙",
+    "title": "Code Llama 70B is live! 🦙",
     "linkTitle": "Announcement",
-    "linkHref": "https://huggingface.co/blog/llama2"
+    "linkHref": "https://ai.meta.com/blog/code-llama-large-language-model-coding/"
   }
 ]`
 

diff --git a/.env.template b/.env.template
@@ -89,16 +89,12 @@ MODELS=`[
       }
     },
     {
-      "name": "codellama/CodeLlama-34b-Instruct-hf",
-      "displayName": "codellama/CodeLlama-34b-Instruct-hf",
-      "description": "Code Llama, a state of the art code model from Meta.",
-      "websiteUrl": "https://about.fb.com/news/2023/08/code-llama-ai-for-coding/",
-      "userMessageToken": "",
-      "userMessageEndToken": " [/INST] ",
-      "assistantMessageToken": "",
-      "assistantMessageEndToken": " </s><s>[INST] ",
-      "preprompt": " ",
-      "chatPromptTemplate" : "<s>[INST] <<SYS>>\n{{preprompt}}\n<</SYS>>\n\n{{#each messages}}{{#ifUser}}{{content}} [/INST] {{/ifUser}}{{#ifAssistant}}{{content}} </s><s>[INST] {{/ifAssistant}}{{/each}}",
+      "name": "codellama/CodeLlama-70b-Instruct-hf",
+      "displayName": "codellama/CodeLlama-70b-Instruct-hf",
+      "description": "Code Llama, a state of the art code model from Meta. Now in 70B!",
+      "websiteUrl": "https://ai.meta.com/blog/code-llama-large-language-model-coding/",
+      "preprompt": "",
+      "chatPromptTemplate" : "<s>{{#if @root.preprompt}}Source: system\n\n {{@root.preprompt}} <step> {{/if}}{{#each messages}}{{#ifUser}}Source: user\n\n {{content}} <step> {{/ifUser}}{{#ifAssistant}}Source: assistant\n\n {{content}} <step> {{/ifAssistant}}{{/each}}Source: assistant\nDestination: user\n\n ",
       "promptExamples": [
         {
           "title": "Fibonacci in Python",
@@ -118,7 +114,7 @@ MODELS=`[
         "top_k": 50,
         "truncate": 4096,
         "max_new_tokens": 4096,
-        "stop": [" </s><s>[INST] "]
+        "stop": ["<step>", " <step>", " <step> "],
       }
       },
     {
@@ -217,7 +213,8 @@ OLD_MODELS=`[
   {"name":"HuggingFaceH4/zephyr-7b-alpha"},
   {"name":"openchat/openchat_3.5"},
   {"name":"openchat/openchat-3.5-1210"},
-  {"name": "tiiuae/falcon-180B-chat"}
+  {"name": "tiiuae/falcon-180B-chat"},
+  {"name": "codellama/CodeLlama-34b-Instruct-hf"}
 ]`
 
 TASK_MODEL='mistralai/Mistral-7B-Instruct-v0.1'

diff --git a/PROMPTS.md b/PROMPTS.md
@@ -55,3 +55,9 @@ System: {{preprompt}}\nUser:{{#each messages}}{{#ifUser}}{{content}}\nFalcon:{{/
 ```env
 {{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}}
 ```
+
+## CodeLlama 70B
+
+```env
+<s>{{#if @root.preprompt}}Source: system\n\n {{@root.preprompt}} <step> {{/if}}{{#each messages}}{{#ifUser}}Source: user\n\n {{content}} <step> {{/ifUser}}{{#ifAssistant}}Source: assistant\n\n {{content}} <step> {{/ifAssistant}}{{/each}}Source: assistant\nDestination: user\n\n ``
+```
diff --git a/src/routes/conversation/[id]/+server.ts b/src/routes/conversation/[id]/+server.ts
@@ -310,13 +310,23 @@ export async function POST({ request, locals, params, getClientAddress }) {
 							}
 						}
 					} else {
+						let interrupted = !output.token.special;
 						// add output.generated text to the last message
+						// strip end tokens from the output.generated_text
+						const text = (model.parameters.stop ?? []).reduce((acc: string, curr: string) => {
+							if (acc.endsWith(curr)) {
+								interrupted = false;
+								return acc.slice(0, acc.length - curr.length);
+							}
+							return acc;
+						}, output.generated_text.trimEnd());
+
 						messages = [
 							...messages.slice(0, -1),
 							{
 								...messages[messages.length - 1],
-								content: previousContent + output.generated_text,
-								interrupted: !output.token.special, // if its a special token it finished on its own, else it was interrupted
+								content: previousContent + text,
+								interrupted, // if its a special token it finished on its own, else it was interrupted
 								updates,
 								updatedAt: new Date(),
 							},