Spaces:
Running
Running
Add CodeLlama-70b-Instruct-hf (#752)
Browse files* Add CodeLlama-70b-Instruct-hf
* add comment to reduce
* Added missing newlines to prompt format for codellama 70b
* remove extra space
* stop tokens
* Remove source newline
* fix preprompt
* fix prompt one last time
* add news
* shorter text
* fix link & remove old tokens
---------
Co-authored-by: Mishig Davaadorj <[email protected]>
- .env +2 -2
- .env.template +9 -12
- PROMPTS.md +6 -0
- src/routes/conversation/[id]/+server.ts +12 -2
.env
CHANGED
@@ -99,9 +99,9 @@ PUBLIC_SHARE_PREFIX=#https://hf.co/chat
|
|
99 |
PUBLIC_GOOGLE_ANALYTICS_ID=#G-XXXXXXXX / Leave empty to disable
|
100 |
PUBLIC_ANNOUNCEMENT_BANNERS=`[
|
101 |
{
|
102 |
-
"title": "Llama
|
103 |
"linkTitle": "Announcement",
|
104 |
-
"linkHref": "https://
|
105 |
}
|
106 |
]`
|
107 |
|
|
|
99 |
PUBLIC_GOOGLE_ANALYTICS_ID=#G-XXXXXXXX / Leave empty to disable
|
100 |
PUBLIC_ANNOUNCEMENT_BANNERS=`[
|
101 |
{
|
102 |
+
"title": "Code Llama 70B is live! 🦙",
|
103 |
"linkTitle": "Announcement",
|
104 |
+
"linkHref": "https://ai.meta.com/blog/code-llama-large-language-model-coding/"
|
105 |
}
|
106 |
]`
|
107 |
|
.env.template
CHANGED
@@ -89,16 +89,12 @@ MODELS=`[
|
|
89 |
}
|
90 |
},
|
91 |
{
|
92 |
-
"name": "codellama/CodeLlama-
|
93 |
-
"displayName": "codellama/CodeLlama-
|
94 |
-
"description": "Code Llama, a state of the art code model from Meta.",
|
95 |
-
"websiteUrl": "https://
|
96 |
-
"
|
97 |
-
"
|
98 |
-
"assistantMessageToken": "",
|
99 |
-
"assistantMessageEndToken": " </s><s>[INST] ",
|
100 |
-
"preprompt": " ",
|
101 |
-
"chatPromptTemplate" : "<s>[INST] <<SYS>>\n{{preprompt}}\n<</SYS>>\n\n{{#each messages}}{{#ifUser}}{{content}} [/INST] {{/ifUser}}{{#ifAssistant}}{{content}} </s><s>[INST] {{/ifAssistant}}{{/each}}",
|
102 |
"promptExamples": [
|
103 |
{
|
104 |
"title": "Fibonacci in Python",
|
@@ -118,7 +114,7 @@ MODELS=`[
|
|
118 |
"top_k": 50,
|
119 |
"truncate": 4096,
|
120 |
"max_new_tokens": 4096,
|
121 |
-
"stop": ["
|
122 |
}
|
123 |
},
|
124 |
{
|
@@ -217,7 +213,8 @@ OLD_MODELS=`[
|
|
217 |
{"name":"HuggingFaceH4/zephyr-7b-alpha"},
|
218 |
{"name":"openchat/openchat_3.5"},
|
219 |
{"name":"openchat/openchat-3.5-1210"},
|
220 |
-
{"name": "tiiuae/falcon-180B-chat"}
|
|
|
221 |
]`
|
222 |
|
223 |
TASK_MODEL='mistralai/Mistral-7B-Instruct-v0.1'
|
|
|
89 |
}
|
90 |
},
|
91 |
{
|
92 |
+
"name": "codellama/CodeLlama-70b-Instruct-hf",
|
93 |
+
"displayName": "codellama/CodeLlama-70b-Instruct-hf",
|
94 |
+
"description": "Code Llama, a state of the art code model from Meta. Now in 70B!",
|
95 |
+
"websiteUrl": "https://ai.meta.com/blog/code-llama-large-language-model-coding/",
|
96 |
+
"preprompt": "",
|
97 |
+
"chatPromptTemplate" : "<s>{{#if @root.preprompt}}Source: system\n\n {{@root.preprompt}} <step> {{/if}}{{#each messages}}{{#ifUser}}Source: user\n\n {{content}} <step> {{/ifUser}}{{#ifAssistant}}Source: assistant\n\n {{content}} <step> {{/ifAssistant}}{{/each}}Source: assistant\nDestination: user\n\n ",
|
|
|
|
|
|
|
|
|
98 |
"promptExamples": [
|
99 |
{
|
100 |
"title": "Fibonacci in Python",
|
|
|
114 |
"top_k": 50,
|
115 |
"truncate": 4096,
|
116 |
"max_new_tokens": 4096,
|
117 |
+
"stop": ["<step>", " <step>", " <step> "],
|
118 |
}
|
119 |
},
|
120 |
{
|
|
|
213 |
{"name":"HuggingFaceH4/zephyr-7b-alpha"},
|
214 |
{"name":"openchat/openchat_3.5"},
|
215 |
{"name":"openchat/openchat-3.5-1210"},
|
216 |
+
{"name": "tiiuae/falcon-180B-chat"},
|
217 |
+
{"name": "codellama/CodeLlama-34b-Instruct-hf"}
|
218 |
]`
|
219 |
|
220 |
TASK_MODEL='mistralai/Mistral-7B-Instruct-v0.1'
|
PROMPTS.md
CHANGED
@@ -55,3 +55,9 @@ System: {{preprompt}}\nUser:{{#each messages}}{{#ifUser}}{{content}}\nFalcon:{{/
|
|
55 |
```env
|
56 |
{{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}}
|
57 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
```env
|
56 |
{{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}}
|
57 |
```
|
58 |
+
|
59 |
+
## CodeLlama 70B
|
60 |
+
|
61 |
+
```env
|
62 |
+
<s>{{#if @root.preprompt}}Source: system\n\n {{@root.preprompt}} <step> {{/if}}{{#each messages}}{{#ifUser}}Source: user\n\n {{content}} <step> {{/ifUser}}{{#ifAssistant}}Source: assistant\n\n {{content}} <step> {{/ifAssistant}}{{/each}}Source: assistant\nDestination: user\n\n ``
|
63 |
+
```
|
src/routes/conversation/[id]/+server.ts
CHANGED
@@ -310,13 +310,23 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
310 |
}
|
311 |
}
|
312 |
} else {
|
|
|
313 |
// add output.generated text to the last message
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
314 |
messages = [
|
315 |
...messages.slice(0, -1),
|
316 |
{
|
317 |
...messages[messages.length - 1],
|
318 |
-
content: previousContent +
|
319 |
-
interrupted
|
320 |
updates,
|
321 |
updatedAt: new Date(),
|
322 |
},
|
|
|
310 |
}
|
311 |
}
|
312 |
} else {
|
313 |
+
let interrupted = !output.token.special;
|
314 |
// add output.generated text to the last message
|
315 |
+
// strip end tokens from the output.generated_text
|
316 |
+
const text = (model.parameters.stop ?? []).reduce((acc: string, curr: string) => {
|
317 |
+
if (acc.endsWith(curr)) {
|
318 |
+
interrupted = false;
|
319 |
+
return acc.slice(0, acc.length - curr.length);
|
320 |
+
}
|
321 |
+
return acc;
|
322 |
+
}, output.generated_text.trimEnd());
|
323 |
+
|
324 |
messages = [
|
325 |
...messages.slice(0, -1),
|
326 |
{
|
327 |
...messages[messages.length - 1],
|
328 |
+
content: previousContent + text,
|
329 |
+
interrupted, // if its a special token it finished on its own, else it was interrupted
|
330 |
updates,
|
331 |
updatedAt: new Date(),
|
332 |
},
|