Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +12 -0
- checkpoint/.gitattributes +36 -0
- checkpoint/README.md +246 -0
- checkpoint/config.json +29 -0
- checkpoint/generation_config.json +6 -0
- checkpoint/model-00001-of-00059.safetensors +3 -0
- checkpoint/model-00002-of-00059.safetensors +3 -0
- checkpoint/model-00003-of-00059.safetensors +3 -0
- checkpoint/model-00004-of-00059.safetensors +3 -0
- checkpoint/model-00005-of-00059.safetensors +3 -0
- checkpoint/model-00006-of-00059.safetensors +3 -0
- checkpoint/model-00007-of-00059.safetensors +3 -0
- checkpoint/model-00008-of-00059.safetensors +3 -0
- checkpoint/model-00009-of-00059.safetensors +3 -0
- checkpoint/model-00010-of-00059.safetensors +3 -0
- checkpoint/model-00011-of-00059.safetensors +3 -0
- checkpoint/model-00012-of-00059.safetensors +3 -0
- checkpoint/model-00013-of-00059.safetensors +3 -0
- checkpoint/model-00014-of-00059.safetensors +3 -0
- checkpoint/model-00015-of-00059.safetensors +3 -0
- checkpoint/model-00016-of-00059.safetensors +3 -0
- checkpoint/model-00017-of-00059.safetensors +3 -0
- checkpoint/model-00018-of-00059.safetensors +3 -0
- checkpoint/model-00019-of-00059.safetensors +3 -0
- checkpoint/model-00020-of-00059.safetensors +3 -0
- checkpoint/model-00021-of-00059.safetensors +3 -0
- checkpoint/model-00022-of-00059.safetensors +3 -0
- checkpoint/model-00023-of-00059.safetensors +3 -0
- checkpoint/model-00024-of-00059.safetensors +3 -0
- checkpoint/model-00025-of-00059.safetensors +3 -0
- checkpoint/model-00026-of-00059.safetensors +3 -0
- checkpoint/model-00027-of-00059.safetensors +3 -0
- checkpoint/model-00028-of-00059.safetensors +3 -0
- checkpoint/model-00029-of-00059.safetensors +3 -0
- checkpoint/model-00030-of-00059.safetensors +3 -0
- checkpoint/model-00031-of-00059.safetensors +3 -0
- checkpoint/model-00032-of-00059.safetensors +3 -0
- checkpoint/model-00033-of-00059.safetensors +3 -0
- checkpoint/model-00034-of-00059.safetensors +3 -0
- checkpoint/model-00035-of-00059.safetensors +3 -0
- checkpoint/model-00036-of-00059.safetensors +3 -0
- checkpoint/model-00037-of-00059.safetensors +3 -0
- checkpoint/model-00038-of-00059.safetensors +3 -0
- checkpoint/model-00039-of-00059.safetensors +3 -0
- checkpoint/model-00040-of-00059.safetensors +3 -0
- checkpoint/model-00041-of-00059.safetensors +3 -0
- checkpoint/model-00042-of-00059.safetensors +3 -0
- checkpoint/model-00043-of-00059.safetensors +3 -0
- checkpoint/model-00044-of-00059.safetensors +3 -0
- checkpoint/model-00045-of-00059.safetensors +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
compiled/1490848b523a12b5041b.neff filter=lfs diff=lfs merge=lfs -text
|
37 |
+
compiled/25fbc38e110240e4c644.neff filter=lfs diff=lfs merge=lfs -text
|
38 |
+
compiled/6989e8293a1c2f7216d4.neff filter=lfs diff=lfs merge=lfs -text
|
39 |
+
compiled/75b4177e54d3da05957e.neff filter=lfs diff=lfs merge=lfs -text
|
40 |
+
compiled/7b9943b66d65f23ee419.neff filter=lfs diff=lfs merge=lfs -text
|
41 |
+
compiled/7ec5a7d199f27379925d.neff filter=lfs diff=lfs merge=lfs -text
|
42 |
+
compiled/b63b81656983d63aa86d.neff filter=lfs diff=lfs merge=lfs -text
|
43 |
+
compiled/cff52905f4da4034b124.neff filter=lfs diff=lfs merge=lfs -text
|
44 |
+
compiled/d03e92620d47e92267a4.neff filter=lfs diff=lfs merge=lfs -text
|
45 |
+
compiled/d6407b57c2e615c6f238.neff filter=lfs diff=lfs merge=lfs -text
|
46 |
+
compiled/f85290f055dde229197d.neff filter=lfs diff=lfs merge=lfs -text
|
47 |
+
compiled/fc80db7af59e26d7c940.neff filter=lfs diff=lfs merge=lfs -text
|
checkpoint/.gitattributes
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.model.v3 filter=lfs diff=lfs merge=lfs -text
|
checkpoint/README.md
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
- es
|
5 |
+
- it
|
6 |
+
- de
|
7 |
+
- fr
|
8 |
+
license: apache-2.0
|
9 |
+
base_model: mistralai/Mixtral-8x22B-v0.1
|
10 |
+
|
11 |
+
extra_gated_description: If you want to learn more about how we process your personal data, please read our <a href="https://mistral.ai/terms/">Privacy Policy</a>.
|
12 |
+
---
|
13 |
+
|
14 |
+
# Model Card for Mixtral-8x22B-Instruct-v0.1
|
15 |
+
|
16 |
+
|
17 |
+
## Encode and Decode with `mistral_common`
|
18 |
+
|
19 |
+
```py
|
20 |
+
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
21 |
+
from mistral_common.protocol.instruct.messages import UserMessage
|
22 |
+
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
23 |
+
|
24 |
+
mistral_models_path = "MISTRAL_MODELS_PATH"
|
25 |
+
|
26 |
+
tokenizer = MistralTokenizer.v3()
|
27 |
+
|
28 |
+
completion_request = ChatCompletionRequest(messages=[UserMessage(content="Explain Machine Learning to me in a nutshell.")])
|
29 |
+
|
30 |
+
tokens = tokenizer.encode_chat_completion(completion_request).tokens
|
31 |
+
```
|
32 |
+
|
33 |
+
## Inference with `mistral_inference`
|
34 |
+
|
35 |
+
```py
|
36 |
+
from mistral_inference.transformer import Transformer
|
37 |
+
from mistral_inference.generate import generate
|
38 |
+
|
39 |
+
model = Transformer.from_folder(mistral_models_path)
|
40 |
+
out_tokens, _ = generate([tokens], model, max_tokens=64, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
|
41 |
+
|
42 |
+
result = tokenizer.decode(out_tokens[0])
|
43 |
+
|
44 |
+
print(result)
|
45 |
+
```
|
46 |
+
|
47 |
+
## Preparing inputs with Hugging Face `transformers`
|
48 |
+
|
49 |
+
```py
|
50 |
+
from transformers import AutoTokenizer
|
51 |
+
|
52 |
+
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1")
|
53 |
+
|
54 |
+
chat = [{"role": "user", "content": "Explain Machine Learning to me in a nutshell."}]
|
55 |
+
|
56 |
+
tokens = tokenizer.apply_chat_template(chat, return_dict=True, return_tensors="pt", add_generation_prompt=True)
|
57 |
+
```
|
58 |
+
|
59 |
+
## Inference with hugging face `transformers`
|
60 |
+
|
61 |
+
```py
|
62 |
+
from transformers import AutoModelForCausalLM
|
63 |
+
import torch
|
64 |
+
|
65 |
+
# You can also use 8-bit or 4-bit quantization here
|
66 |
+
model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1", torch_dtype=torch.bfloat16, device_map="auto")
|
67 |
+
model.to("cuda")
|
68 |
+
|
69 |
+
generated_ids = model.generate(**tokens, max_new_tokens=1000, do_sample=True)
|
70 |
+
|
71 |
+
# decode with HF tokenizer
|
72 |
+
result = tokenizer.decode(generated_ids[0])
|
73 |
+
print(result)
|
74 |
+
```
|
75 |
+
|
76 |
+
> [!TIP]
|
77 |
+
> PRs to correct the `transformers` tokenizer so that it gives 1-to-1 the same results as the `mistral_common` reference implementation are very welcome!
|
78 |
+
|
79 |
+
---
|
80 |
+
The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the [Mixtral-8x22B-v0.1](https://huggingface.co/mistralai/Mixtral-8x22B-v0.1).
|
81 |
+
|
82 |
+
## Function calling example
|
83 |
+
```python
|
84 |
+
from transformers import AutoModelForCausalLM
|
85 |
+
from mistral_common.protocol.instruct.messages import (
|
86 |
+
AssistantMessage,
|
87 |
+
UserMessage,
|
88 |
+
)
|
89 |
+
from mistral_common.protocol.instruct.tool_calls import (
|
90 |
+
Tool,
|
91 |
+
Function,
|
92 |
+
)
|
93 |
+
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
94 |
+
from mistral_common.tokens.instruct.normalize import ChatCompletionRequest
|
95 |
+
|
96 |
+
device = "cuda" # the device to load the model onto
|
97 |
+
|
98 |
+
tokenizer_v3 = MistralTokenizer.v3()
|
99 |
+
|
100 |
+
mistral_query = ChatCompletionRequest(
|
101 |
+
tools=[
|
102 |
+
Tool(
|
103 |
+
function=Function(
|
104 |
+
name="get_current_weather",
|
105 |
+
description="Get the current weather",
|
106 |
+
parameters={
|
107 |
+
"type": "object",
|
108 |
+
"properties": {
|
109 |
+
"location": {
|
110 |
+
"type": "string",
|
111 |
+
"description": "The city and state, e.g. San Francisco, CA",
|
112 |
+
},
|
113 |
+
"format": {
|
114 |
+
"type": "string",
|
115 |
+
"enum": ["celsius", "fahrenheit"],
|
116 |
+
"description": "The temperature unit to use. Infer this from the users location.",
|
117 |
+
},
|
118 |
+
},
|
119 |
+
"required": ["location", "format"],
|
120 |
+
},
|
121 |
+
)
|
122 |
+
)
|
123 |
+
],
|
124 |
+
messages=[
|
125 |
+
UserMessage(content="What's the weather like today in Paris"),
|
126 |
+
],
|
127 |
+
model="test",
|
128 |
+
)
|
129 |
+
|
130 |
+
encodeds = tokenizer_v3.encode_chat_completion(mistral_query).tokens
|
131 |
+
model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1")
|
132 |
+
model_inputs = encodeds.to(device)
|
133 |
+
model.to(device)
|
134 |
+
|
135 |
+
generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
|
136 |
+
sp_tokenizer = tokenizer_v3.instruct_tokenizer.tokenizer
|
137 |
+
decoded = sp_tokenizer.decode(generated_ids[0])
|
138 |
+
print(decoded)
|
139 |
+
```
|
140 |
+
|
141 |
+
## Function calling with `transformers`
|
142 |
+
|
143 |
+
To use this example, you'll need `transformers` version 4.42.0 or higher. Please see the
|
144 |
+
[function calling guide](https://huggingface.co/docs/transformers/main/chat_templating#advanced-tool-use--function-calling)
|
145 |
+
in the `transformers` docs for more information.
|
146 |
+
|
147 |
+
```python
|
148 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
149 |
+
import torch
|
150 |
+
|
151 |
+
model_id = "mistralai/Mixtral-8x22B-Instruct-v0.1"
|
152 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
153 |
+
|
154 |
+
def get_current_weather(location: str, format: str):
|
155 |
+
"""
|
156 |
+
Get the current weather
|
157 |
+
|
158 |
+
Args:
|
159 |
+
location: The city and state, e.g. San Francisco, CA
|
160 |
+
format: The temperature unit to use. Infer this from the users location. (choices: ["celsius", "fahrenheit"])
|
161 |
+
"""
|
162 |
+
pass
|
163 |
+
|
164 |
+
conversation = [{"role": "user", "content": "What's the weather like in Paris?"}]
|
165 |
+
tools = [get_current_weather]
|
166 |
+
|
167 |
+
# format and tokenize the tool use prompt
|
168 |
+
inputs = tokenizer.apply_chat_template(
|
169 |
+
conversation,
|
170 |
+
tools=tools,
|
171 |
+
add_generation_prompt=True,
|
172 |
+
return_dict=True,
|
173 |
+
return_tensors="pt",
|
174 |
+
)
|
175 |
+
|
176 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
|
177 |
+
|
178 |
+
inputs.to(model.device)
|
179 |
+
outputs = model.generate(**inputs, max_new_tokens=1000)
|
180 |
+
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
181 |
+
```
|
182 |
+
|
183 |
+
Note that, for reasons of space, this example does not show a complete cycle of calling a tool and adding the tool call and tool
|
184 |
+
results to the chat history so that the model can use them in its next generation. For a full tool calling example, please
|
185 |
+
see the [function calling guide](https://huggingface.co/docs/transformers/main/chat_templating#advanced-tool-use--function-calling),
|
186 |
+
and note that Mixtral **does** use tool call IDs, so these must be included in your tool calls and tool results. They should be
|
187 |
+
exactly 9 alphanumeric characters.
|
188 |
+
|
189 |
+
# Instruct tokenizer
|
190 |
+
The HuggingFace tokenizer included in this release should match our own. To compare:
|
191 |
+
`pip install mistral-common`
|
192 |
+
|
193 |
+
```py
|
194 |
+
from mistral_common.protocol.instruct.messages import (
|
195 |
+
AssistantMessage,
|
196 |
+
UserMessage,
|
197 |
+
)
|
198 |
+
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
199 |
+
from mistral_common.tokens.instruct.normalize import ChatCompletionRequest
|
200 |
+
|
201 |
+
from transformers import AutoTokenizer
|
202 |
+
|
203 |
+
tokenizer_v3 = MistralTokenizer.v3()
|
204 |
+
|
205 |
+
mistral_query = ChatCompletionRequest(
|
206 |
+
messages=[
|
207 |
+
UserMessage(content="How many experts ?"),
|
208 |
+
AssistantMessage(content="8"),
|
209 |
+
UserMessage(content="How big ?"),
|
210 |
+
AssistantMessage(content="22B"),
|
211 |
+
UserMessage(content="Noice 🎉 !"),
|
212 |
+
],
|
213 |
+
model="test",
|
214 |
+
)
|
215 |
+
hf_messages = mistral_query.model_dump()['messages']
|
216 |
+
|
217 |
+
tokenized_mistral = tokenizer_v3.encode_chat_completion(mistral_query).tokens
|
218 |
+
|
219 |
+
tokenizer_hf = AutoTokenizer.from_pretrained('mistralai/Mixtral-8x22B-Instruct-v0.1')
|
220 |
+
tokenized_hf = tokenizer_hf.apply_chat_template(hf_messages, tokenize=True)
|
221 |
+
|
222 |
+
assert tokenized_hf == tokenized_mistral
|
223 |
+
```
|
224 |
+
|
225 |
+
# Function calling and special tokens
|
226 |
+
This tokenizer includes more special tokens, related to function calling :
|
227 |
+
- [TOOL_CALLS]
|
228 |
+
- [AVAILABLE_TOOLS]
|
229 |
+
- [/AVAILABLE_TOOLS]
|
230 |
+
- [TOOL_RESULTS]
|
231 |
+
- [/TOOL_RESULTS]
|
232 |
+
|
233 |
+
If you want to use this model with function calling, please be sure to apply it similarly to what is done in our [SentencePieceTokenizerV3](https://github.com/mistralai/mistral-common/blob/main/src/mistral_common/tokens/tokenizers/sentencepiece.py#L299).
|
234 |
+
|
235 |
+
# The Mistral AI Team
|
236 |
+
Albert Jiang, Alexandre Sablayrolles, Alexis Tacnet, Antoine Roux,
|
237 |
+
Arthur Mensch, Audrey Herblin-Stoop, Baptiste Bout, Baudouin de Monicault,
|
238 |
+
Blanche Savary, Bam4d, Caroline Feldman, Devendra Singh Chaplot,
|
239 |
+
Diego de las Casas, Eleonore Arcelin, Emma Bou Hanna, Etienne Metzger,
|
240 |
+
Gianna Lengyel, Guillaume Bour, Guillaume Lample, Harizo Rajaona,
|
241 |
+
Jean-Malo Delignon, Jia Li, Justus Murke, Louis Martin, Louis Ternon,
|
242 |
+
Lucile Saulnier, Lélio Renard Lavaud, Margaret Jennings, Marie Pellat,
|
243 |
+
Marie Torelli, Marie-Anne Lachaux, Nicolas Schuhl, Patrick von Platen,
|
244 |
+
Pierre Stock, Sandeep Subramanian, Sophia Yang, Szymon Antoniak, Teven Le Scao,
|
245 |
+
Thibaut Lavril, Timothée Lacroix, Théophile Gervet, Thomas Wang,
|
246 |
+
Valera Nemychnikova, William El Sayed, William Marshall
|
checkpoint/config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"MixtralForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_dropout": 0.0,
|
6 |
+
"bos_token_id": 1,
|
7 |
+
"eos_token_id": 2,
|
8 |
+
"hidden_act": "silu",
|
9 |
+
"hidden_size": 6144,
|
10 |
+
"initializer_range": 0.02,
|
11 |
+
"intermediate_size": 16384,
|
12 |
+
"max_position_embeddings": 65536,
|
13 |
+
"model_type": "mixtral",
|
14 |
+
"num_attention_heads": 48,
|
15 |
+
"num_experts_per_tok": 2,
|
16 |
+
"num_hidden_layers": 56,
|
17 |
+
"num_key_value_heads": 8,
|
18 |
+
"num_local_experts": 8,
|
19 |
+
"output_router_logits": false,
|
20 |
+
"rms_norm_eps": 1e-05,
|
21 |
+
"rope_theta": 1000000.0,
|
22 |
+
"router_aux_loss_coef": 0.001,
|
23 |
+
"sliding_window": null,
|
24 |
+
"tie_word_embeddings": false,
|
25 |
+
"torch_dtype": "bfloat16",
|
26 |
+
"transformers_version": "4.38.0",
|
27 |
+
"use_cache": true,
|
28 |
+
"vocab_size": 32768
|
29 |
+
}
|
checkpoint/generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"transformers_version": "4.34.0.dev0"
|
6 |
+
}
|
checkpoint/model-00001-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae67519f5c2e29175b011b75cda7f0dd263b487c9fe06978bd4d83c2106c4627
|
3 |
+
size 4806774160
|
checkpoint/model-00002-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bc60e5fe602eb111d02ac362867f443daf604f1153dc26b9b24e7f650657311
|
3 |
+
size 4806799120
|
checkpoint/model-00003-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84ad12eb4ad8e31c9f3c4113ce80603c3a8a9ccad19b2211e056fc984d62c627
|
3 |
+
size 4806799120
|
checkpoint/model-00004-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:605b71552c4b5b4385c0dedb09482afec44cec9ba6382d15ea7fa2179c5066b0
|
3 |
+
size 4806799120
|
checkpoint/model-00005-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88288a00084c9959175250443f1d78f6d4a635a350a28f14a299c3bfbef969b8
|
3 |
+
size 4806799120
|
checkpoint/model-00006-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:628709b4590d32c5e40b912ef1caad07a5654a724751c8095902f2a2c50f4adb
|
3 |
+
size 4806799120
|
checkpoint/model-00007-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99c70ead16cb04ea2c413da7bcc223142cb3c31f90b2a9efd80c75c1d2a37f81
|
3 |
+
size 4806799120
|
checkpoint/model-00008-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e12f54a5fd2ee56f65b2753bdfd09fdc9037a55c578147ea470f0088d003924b
|
3 |
+
size 4806799120
|
checkpoint/model-00009-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc0c97ce0446e6e3962a51fca994071cc78fd0f16c73f3d4926e116612fd0137
|
3 |
+
size 4806799120
|
checkpoint/model-00010-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:964064ccf170614c58fc88dce5b5fba03230c0705203ce40fe8fb67fc585241d
|
3 |
+
size 4806799120
|
checkpoint/model-00011-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbe49101cb1e8bc28dc12901596b4160d19bffc576f83727c973339323cb3712
|
3 |
+
size 4806799136
|
checkpoint/model-00012-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ed4cadd0f45b9ce8e0d73f13a33e2a34116879fb3c1df852ee388d265436d11
|
3 |
+
size 4806799152
|
checkpoint/model-00013-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:659331ab675bf0b62f41558080a4cd14f073c1514dd404cd489b8cf25ed1e5e5
|
3 |
+
size 4806799152
|
checkpoint/model-00014-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:335825d72bd8e23a6c79517e76a9c0645f999179c2eb3fe2c9fb5ddaa5ec093b
|
3 |
+
size 4806799152
|
checkpoint/model-00015-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc55868948885b7edb6a82ace117b5e1d468b9a104b11c4ebb1c012a93edd2d6
|
3 |
+
size 4806799152
|
checkpoint/model-00016-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b6b2b857278ad7a6cde4486558c2775a61c97a6d437bb065f467470e1421f6a
|
3 |
+
size 4806799152
|
checkpoint/model-00017-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03221dfaf66dde1e04f771057b5b5b6ee85987f0405cf9ce3330256655b927b5
|
3 |
+
size 4806799152
|
checkpoint/model-00018-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bce770897dd57cf4956c28dfd376145b60f7b0d70cfcbc8947df4111e9d65404
|
3 |
+
size 4806799152
|
checkpoint/model-00019-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:753f9aefeb0610632bf6ece53a893c882869d905712acf3d33d0f77a0a8e4601
|
3 |
+
size 4806799152
|
checkpoint/model-00020-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95746be030dc48450affd3b3591b4ac930611fb503fe81971ef0f5c46a827dbb
|
3 |
+
size 4806799152
|
checkpoint/model-00021-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19afd104d6fc376fd55c862407d4a82d03a5a81a294e587a4b892e849c70d847
|
3 |
+
size 4806799152
|
checkpoint/model-00022-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66de10fb62470e7fe4022034201bd9b36d28002fdfbeac434d9742a52e165d4a
|
3 |
+
size 4806799152
|
checkpoint/model-00023-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1aebca805f3d0e0e87bb0dd7dcf2bb05d01a27aca7157480d6b82e6e73b3e9a8
|
3 |
+
size 4932529864
|
checkpoint/model-00024-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74e5f843e88c39be9ec79f153d6f42535aa87505a30181fc5876352b068e58d7
|
3 |
+
size 4995542848
|
checkpoint/model-00025-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60f8c05884be46785b2251fb0b5299f845c7e8a7baf9dba07a5e8f6a6cade695
|
3 |
+
size 4995542848
|
checkpoint/model-00026-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:142d379a009e8f624a2a38450ceb750b2e2f8d3e63a9907afa0c48839aa75893
|
3 |
+
size 4932628288
|
checkpoint/model-00027-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc7e26d6b742ea34d55eb57125f4f66dc5a1bdb00547e2a63f8c8e950b2facbe
|
3 |
+
size 4806774344
|
checkpoint/model-00028-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df49d875e37fb2835bcc723b792b7c8b6be750e957a5add45398634098aded89
|
3 |
+
size 4806799144
|
checkpoint/model-00029-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:264f130b64ba42846e4b2da8e6ae33ff6f6c4e88beb382b1ca5e7d25eeec1671
|
3 |
+
size 4806799144
|
checkpoint/model-00030-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:949c75266c1dffe8a5df987d2060b51a15fb1e95f26ef5ff0dfed0e210ff96b7
|
3 |
+
size 4806799144
|
checkpoint/model-00031-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:343f01143dc6a4f28b00031365a84ff1e1105068e904a7fe98a7cdbaca94e51c
|
3 |
+
size 4806799144
|
checkpoint/model-00032-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0dafb3fccf3571f3800e0248188e9ff0e66341deee204499c56e9409a47cdae
|
3 |
+
size 4806799152
|
checkpoint/model-00033-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ba3c9e3b4288cecb67fbfcf5c851293c981c7f9b95be1bc9fd61be78379e7e5
|
3 |
+
size 4806799152
|
checkpoint/model-00034-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e0438c9a72b10be0ba1d2fe3d50e142f3efff04c42e7ddb09d13abe536242cf
|
3 |
+
size 4806799152
|
checkpoint/model-00035-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e4ecee32f7c180a0d7c707b7e908681f045191f74a01d9ed598b36a867a234e
|
3 |
+
size 4806799152
|
checkpoint/model-00036-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5eed503a212655cc1e9b83d7c55f61202c30b2c8a426be816da3f028107f1d61
|
3 |
+
size 4806799152
|
checkpoint/model-00037-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39d15148395e7e1f1d1543373bbc678bbef8e0475c7cbd21c384bfb27fc925dd
|
3 |
+
size 4806799152
|
checkpoint/model-00038-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f54613a2af1bc62d0e2f34bfd4e18ba333f2e1050bfe9288a14d6f0a717a15f7
|
3 |
+
size 4806799152
|
checkpoint/model-00039-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:840bcc5ec0aa48de06d9fd61d035c920c47046b32ddd53114ce2cb72eba969f3
|
3 |
+
size 4806799152
|
checkpoint/model-00040-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ee6e52a68ad639a70fa3d53934c477b408fa1f324f74317aad6d2f75965db93
|
3 |
+
size 4806799152
|
checkpoint/model-00041-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7540fe808da2d66e31da717455e105c1b965fbd1573ba96b7b97a152f8042b5c
|
3 |
+
size 4806799152
|
checkpoint/model-00042-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7365d08feb99691e6a03820e623460ef38f4c46ac827fb18f187ee37fd951a79
|
3 |
+
size 4806799152
|
checkpoint/model-00043-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:535278440166af2f53c2cfc07e06c5f6ce2f0bcae53dc023faa3948f19ddc01a
|
3 |
+
size 4806799152
|
checkpoint/model-00044-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63930f8b116415312d6f7bc762605b733e5b6836061bbef9ed518aacaf8ef695
|
3 |
+
size 4806799152
|
checkpoint/model-00045-of-00059.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47a90e17552c4e92e2012e9a0cd1dc1d0c90fac78376e0a17102698213afe7b2
|
3 |
+
size 4806799152
|