diff --git a/.env b/.env index c14a1ae41b9..a3036589674 100644 --- a/.env +++ b/.env @@ -99,9 +99,9 @@ PUBLIC_SHARE_PREFIX=#https://hf.co/chat PUBLIC_GOOGLE_ANALYTICS_ID=#G-XXXXXXXX / Leave empty to disable PUBLIC_ANNOUNCEMENT_BANNERS=`[ { - "title": "Llama v2 is live on HuggingChat! 🦙", + "title": "Code Llama 70B is live! 🦙", "linkTitle": "Announcement", - "linkHref": "https://huggingface.co/blog/llama2" + "linkHref": "https://ai.meta.com/blog/code-llama-large-language-model-coding/" } ]` diff --git a/.env.template b/.env.template index b46a99eb809..3df2cc0982f 100644 --- a/.env.template +++ b/.env.template @@ -89,16 +89,12 @@ MODELS=`[ } }, { - "name": "codellama/CodeLlama-34b-Instruct-hf", - "displayName": "codellama/CodeLlama-34b-Instruct-hf", - "description": "Code Llama, a state of the art code model from Meta.", - "websiteUrl": "https://about.fb.com/news/2023/08/code-llama-ai-for-coding/", - "userMessageToken": "", - "userMessageEndToken": " [/INST] ", - "assistantMessageToken": "", - "assistantMessageEndToken": " [INST] ", - "preprompt": " ", - "chatPromptTemplate" : "[INST] <>\n{{preprompt}}\n<>\n\n{{#each messages}}{{#ifUser}}{{content}} [/INST] {{/ifUser}}{{#ifAssistant}}{{content}} [INST] {{/ifAssistant}}{{/each}}", + "name": "codellama/CodeLlama-70b-Instruct-hf", + "displayName": "codellama/CodeLlama-70b-Instruct-hf", + "description": "Code Llama, a state of the art code model from Meta. Now in 70B!", + "websiteUrl": "https://ai.meta.com/blog/code-llama-large-language-model-coding/", + "preprompt": "", + "chatPromptTemplate" : "{{#if @root.preprompt}}Source: system\n\n {{@root.preprompt}} {{/if}}{{#each messages}}{{#ifUser}}Source: user\n\n {{content}} {{/ifUser}}{{#ifAssistant}}Source: assistant\n\n {{content}} {{/ifAssistant}}{{/each}}Source: assistant\nDestination: user\n\n ", "promptExamples": [ { "title": "Fibonacci in Python", @@ -118,7 +114,7 @@ MODELS=`[ "top_k": 50, "truncate": 4096, "max_new_tokens": 4096, - "stop": [" [INST] "] + "stop": ["", " ", " "], } }, { @@ -217,7 +213,8 @@ OLD_MODELS=`[ {"name":"HuggingFaceH4/zephyr-7b-alpha"}, {"name":"openchat/openchat_3.5"}, {"name":"openchat/openchat-3.5-1210"}, - {"name": "tiiuae/falcon-180B-chat"} + {"name": "tiiuae/falcon-180B-chat"}, + {"name": "codellama/CodeLlama-34b-Instruct-hf"} ]` TASK_MODEL='mistralai/Mistral-7B-Instruct-v0.1' diff --git a/PROMPTS.md b/PROMPTS.md index aa83045dbd9..4f921a6d5e9 100644 --- a/PROMPTS.md +++ b/PROMPTS.md @@ -55,3 +55,9 @@ System: {{preprompt}}\nUser:{{#each messages}}{{#ifUser}}{{content}}\nFalcon:{{/ ```env {{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}} ``` + +## CodeLlama 70B + +```env +{{#if @root.preprompt}}Source: system\n\n {{@root.preprompt}} {{/if}}{{#each messages}}{{#ifUser}}Source: user\n\n {{content}} {{/ifUser}}{{#ifAssistant}}Source: assistant\n\n {{content}} {{/ifAssistant}}{{/each}}Source: assistant\nDestination: user\n\n `` +``` diff --git a/src/routes/conversation/[id]/+server.ts b/src/routes/conversation/[id]/+server.ts index 0fb75c9ffdf..888a38268ac 100644 --- a/src/routes/conversation/[id]/+server.ts +++ b/src/routes/conversation/[id]/+server.ts @@ -310,13 +310,23 @@ export async function POST({ request, locals, params, getClientAddress }) { } } } else { + let interrupted = !output.token.special; // add output.generated text to the last message + // strip end tokens from the output.generated_text + const text = (model.parameters.stop ?? []).reduce((acc: string, curr: string) => { + if (acc.endsWith(curr)) { + interrupted = false; + return acc.slice(0, acc.length - curr.length); + } + return acc; + }, output.generated_text.trimEnd()); + messages = [ ...messages.slice(0, -1), { ...messages[messages.length - 1], - content: previousContent + output.generated_text, - interrupted: !output.token.special, // if its a special token it finished on its own, else it was interrupted + content: previousContent + text, + interrupted, // if its a special token it finished on its own, else it was interrupted updates, updatedAt: new Date(), },