Felladrin commited on
Commit
8019b40
·
verified ·
1 Parent(s): 9ccf666

Upload folder using huggingface_hub

Browse files
mlc-chat-config.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "llama",
4
+ "quantization": "q0f16",
5
+ "model_config": {
6
+ "hidden_size": 1536,
7
+ "intermediate_size": 4096,
8
+ "num_attention_heads": 16,
9
+ "num_hidden_layers": 16,
10
+ "rms_norm_eps": 1e-05,
11
+ "vocab_size": 32000,
12
+ "tie_word_embeddings": false,
13
+ "position_embedding_base": 100000,
14
+ "rope_scaling": null,
15
+ "context_window_size": 8192,
16
+ "prefill_chunk_size": 2048,
17
+ "num_key_value_heads": 8,
18
+ "head_dim": 96,
19
+ "tensor_parallel_shards": 1,
20
+ "max_batch_size": 80
21
+ },
22
+ "vocab_size": 32000,
23
+ "context_window_size": 8192,
24
+ "sliding_window_size": -1,
25
+ "prefill_chunk_size": 2048,
26
+ "attention_sink_size": -1,
27
+ "tensor_parallel_shards": 1,
28
+ "temperature": 1.0,
29
+ "presence_penalty": 0.0,
30
+ "frequency_penalty": 0.0,
31
+ "repetition_penalty": 1.0,
32
+ "top_p": 1.0,
33
+ "tokenizer_files": [
34
+ "tokenizer.model",
35
+ "tokenizer.json",
36
+ "tokenizer_config.json"
37
+ ],
38
+ "tokenizer_info": {
39
+ "token_postproc_method": "byte_fallback",
40
+ "prepend_space_in_encode": false,
41
+ "strip_space_in_decode": false
42
+ },
43
+ "conv_template": {
44
+ "name": "chatml",
45
+ "system_template": "<|im_start|>system\n{system_message}<|im_end|>\n",
46
+ "system_message": "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.",
47
+ "system_prefix_token_ids": null,
48
+ "add_role_after_system_message": true,
49
+ "roles": {
50
+ "user": "<|im_start|>user",
51
+ "assistant": "<|im_start|>assistant"
52
+ },
53
+ "role_templates": {
54
+ "user": "{user_message}",
55
+ "assistant": "{assistant_message}",
56
+ "tool": "{tool_message}"
57
+ },
58
+ "messages": [],
59
+ "seps": [
60
+ "<|im_end|>\n"
61
+ ],
62
+ "role_content_sep": "\n",
63
+ "role_empty_sep": "\n",
64
+ "stop_str": [
65
+ "<|im_end|>"
66
+ ],
67
+ "stop_token_ids": [
68
+ 2
69
+ ],
70
+ "function_string": "",
71
+ "use_function_calling": false
72
+ },
73
+ "pad_token_id": 0,
74
+ "bos_token_id": 1,
75
+ "eos_token_id": 2
76
+ }
model.wasm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aee35fd100fd34b620386f251a47e87b0a8ce91c876a351c68495e2bee8ec6fd
3
+ size 4131676
ndarray-cache.json ADDED
@@ -0,0 +1,1337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 99,
4
+ "ParamBytes": 1027181568.0,
5
+ "BitsPerParam": 16.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 98304000,
12
+ "records": [
13
+ {
14
+ "name": "lm_head.weight",
15
+ "shape": [
16
+ 32000,
17
+ 1536
18
+ ],
19
+ "dtype": "float16",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 98304000,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "4cacaade4fa9480fcb128b01802010bc"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 98304000,
31
+ "records": [
32
+ {
33
+ "name": "model.embed_tokens.weight",
34
+ "shape": [
35
+ 32000,
36
+ 1536
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 98304000,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "128c3e824ae1c77e76b7bebec282b68c"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 25165824,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
53
+ "shape": [
54
+ 8192,
55
+ 1536
56
+ ],
57
+ "dtype": "float16",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 25165824,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "73308bcc241ddfe9a276e1365343a9a5"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 26747904,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.0.input_layernorm.weight",
72
+ "shape": [
73
+ 1536
74
+ ],
75
+ "dtype": "float16",
76
+ "format": "f32-to-bf16",
77
+ "nbytes": 3072,
78
+ "byteOffset": 0
79
+ },
80
+ {
81
+ "name": "model.layers.0.mlp.down_proj.weight",
82
+ "shape": [
83
+ 1536,
84
+ 4096
85
+ ],
86
+ "dtype": "float16",
87
+ "format": "f32-to-bf16",
88
+ "nbytes": 12582912,
89
+ "byteOffset": 3072
90
+ },
91
+ {
92
+ "name": "model.layers.0.post_attention_layernorm.weight",
93
+ "shape": [
94
+ 1536
95
+ ],
96
+ "dtype": "float16",
97
+ "format": "f32-to-bf16",
98
+ "nbytes": 3072,
99
+ "byteOffset": 12585984
100
+ },
101
+ {
102
+ "name": "model.layers.0.self_attn.qkv_proj.weight",
103
+ "shape": [
104
+ 3072,
105
+ 1536
106
+ ],
107
+ "dtype": "float16",
108
+ "format": "f32-to-bf16",
109
+ "nbytes": 9437184,
110
+ "byteOffset": 12589056
111
+ },
112
+ {
113
+ "name": "model.layers.0.self_attn.o_proj.weight",
114
+ "shape": [
115
+ 1536,
116
+ 1536
117
+ ],
118
+ "dtype": "float16",
119
+ "format": "f32-to-bf16",
120
+ "nbytes": 4718592,
121
+ "byteOffset": 22026240
122
+ },
123
+ {
124
+ "name": "model.layers.1.input_layernorm.weight",
125
+ "shape": [
126
+ 1536
127
+ ],
128
+ "dtype": "float16",
129
+ "format": "f32-to-bf16",
130
+ "nbytes": 3072,
131
+ "byteOffset": 26744832
132
+ }
133
+ ],
134
+ "md5sum": "61ffd05d3b598a4bf392b2883678c6c8"
135
+ },
136
+ {
137
+ "dataPath": "params_shard_4.bin",
138
+ "format": "raw-shard",
139
+ "nbytes": 25165824,
140
+ "records": [
141
+ {
142
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
143
+ "shape": [
144
+ 8192,
145
+ 1536
146
+ ],
147
+ "dtype": "float16",
148
+ "format": "f32-to-bf16",
149
+ "nbytes": 25165824,
150
+ "byteOffset": 0
151
+ }
152
+ ],
153
+ "md5sum": "3e08d4f83d0234b8f04ee5c08686ec81"
154
+ },
155
+ {
156
+ "dataPath": "params_shard_5.bin",
157
+ "format": "raw-shard",
158
+ "nbytes": 26744832,
159
+ "records": [
160
+ {
161
+ "name": "model.layers.1.mlp.down_proj.weight",
162
+ "shape": [
163
+ 1536,
164
+ 4096
165
+ ],
166
+ "dtype": "float16",
167
+ "format": "f32-to-bf16",
168
+ "nbytes": 12582912,
169
+ "byteOffset": 0
170
+ },
171
+ {
172
+ "name": "model.layers.1.post_attention_layernorm.weight",
173
+ "shape": [
174
+ 1536
175
+ ],
176
+ "dtype": "float16",
177
+ "format": "f32-to-bf16",
178
+ "nbytes": 3072,
179
+ "byteOffset": 12582912
180
+ },
181
+ {
182
+ "name": "model.layers.1.self_attn.qkv_proj.weight",
183
+ "shape": [
184
+ 3072,
185
+ 1536
186
+ ],
187
+ "dtype": "float16",
188
+ "format": "f32-to-bf16",
189
+ "nbytes": 9437184,
190
+ "byteOffset": 12585984
191
+ },
192
+ {
193
+ "name": "model.layers.1.self_attn.o_proj.weight",
194
+ "shape": [
195
+ 1536,
196
+ 1536
197
+ ],
198
+ "dtype": "float16",
199
+ "format": "f32-to-bf16",
200
+ "nbytes": 4718592,
201
+ "byteOffset": 22023168
202
+ },
203
+ {
204
+ "name": "model.layers.10.input_layernorm.weight",
205
+ "shape": [
206
+ 1536
207
+ ],
208
+ "dtype": "float16",
209
+ "format": "f32-to-bf16",
210
+ "nbytes": 3072,
211
+ "byteOffset": 26741760
212
+ }
213
+ ],
214
+ "md5sum": "ac190343ae5018ae4ff4b3c72668e64c"
215
+ },
216
+ {
217
+ "dataPath": "params_shard_6.bin",
218
+ "format": "raw-shard",
219
+ "nbytes": 25165824,
220
+ "records": [
221
+ {
222
+ "name": "model.layers.10.mlp.gate_up_proj.weight",
223
+ "shape": [
224
+ 8192,
225
+ 1536
226
+ ],
227
+ "dtype": "float16",
228
+ "format": "f32-to-bf16",
229
+ "nbytes": 25165824,
230
+ "byteOffset": 0
231
+ }
232
+ ],
233
+ "md5sum": "d150ae0a002e81aa75d3a9869255318f"
234
+ },
235
+ {
236
+ "dataPath": "params_shard_7.bin",
237
+ "format": "raw-shard",
238
+ "nbytes": 26744832,
239
+ "records": [
240
+ {
241
+ "name": "model.layers.10.mlp.down_proj.weight",
242
+ "shape": [
243
+ 1536,
244
+ 4096
245
+ ],
246
+ "dtype": "float16",
247
+ "format": "f32-to-bf16",
248
+ "nbytes": 12582912,
249
+ "byteOffset": 0
250
+ },
251
+ {
252
+ "name": "model.layers.10.post_attention_layernorm.weight",
253
+ "shape": [
254
+ 1536
255
+ ],
256
+ "dtype": "float16",
257
+ "format": "f32-to-bf16",
258
+ "nbytes": 3072,
259
+ "byteOffset": 12582912
260
+ },
261
+ {
262
+ "name": "model.layers.10.self_attn.qkv_proj.weight",
263
+ "shape": [
264
+ 3072,
265
+ 1536
266
+ ],
267
+ "dtype": "float16",
268
+ "format": "f32-to-bf16",
269
+ "nbytes": 9437184,
270
+ "byteOffset": 12585984
271
+ },
272
+ {
273
+ "name": "model.layers.10.self_attn.o_proj.weight",
274
+ "shape": [
275
+ 1536,
276
+ 1536
277
+ ],
278
+ "dtype": "float16",
279
+ "format": "f32-to-bf16",
280
+ "nbytes": 4718592,
281
+ "byteOffset": 22023168
282
+ },
283
+ {
284
+ "name": "model.layers.11.input_layernorm.weight",
285
+ "shape": [
286
+ 1536
287
+ ],
288
+ "dtype": "float16",
289
+ "format": "f32-to-bf16",
290
+ "nbytes": 3072,
291
+ "byteOffset": 26741760
292
+ }
293
+ ],
294
+ "md5sum": "729fdb8f0eda6fb8be72c549a265b99d"
295
+ },
296
+ {
297
+ "dataPath": "params_shard_8.bin",
298
+ "format": "raw-shard",
299
+ "nbytes": 25165824,
300
+ "records": [
301
+ {
302
+ "name": "model.layers.11.mlp.gate_up_proj.weight",
303
+ "shape": [
304
+ 8192,
305
+ 1536
306
+ ],
307
+ "dtype": "float16",
308
+ "format": "f32-to-bf16",
309
+ "nbytes": 25165824,
310
+ "byteOffset": 0
311
+ }
312
+ ],
313
+ "md5sum": "f113f6c464d7323085f32cefad26d060"
314
+ },
315
+ {
316
+ "dataPath": "params_shard_9.bin",
317
+ "format": "raw-shard",
318
+ "nbytes": 26744832,
319
+ "records": [
320
+ {
321
+ "name": "model.layers.11.mlp.down_proj.weight",
322
+ "shape": [
323
+ 1536,
324
+ 4096
325
+ ],
326
+ "dtype": "float16",
327
+ "format": "f32-to-bf16",
328
+ "nbytes": 12582912,
329
+ "byteOffset": 0
330
+ },
331
+ {
332
+ "name": "model.layers.11.post_attention_layernorm.weight",
333
+ "shape": [
334
+ 1536
335
+ ],
336
+ "dtype": "float16",
337
+ "format": "f32-to-bf16",
338
+ "nbytes": 3072,
339
+ "byteOffset": 12582912
340
+ },
341
+ {
342
+ "name": "model.layers.11.self_attn.qkv_proj.weight",
343
+ "shape": [
344
+ 3072,
345
+ 1536
346
+ ],
347
+ "dtype": "float16",
348
+ "format": "f32-to-bf16",
349
+ "nbytes": 9437184,
350
+ "byteOffset": 12585984
351
+ },
352
+ {
353
+ "name": "model.layers.11.self_attn.o_proj.weight",
354
+ "shape": [
355
+ 1536,
356
+ 1536
357
+ ],
358
+ "dtype": "float16",
359
+ "format": "f32-to-bf16",
360
+ "nbytes": 4718592,
361
+ "byteOffset": 22023168
362
+ },
363
+ {
364
+ "name": "model.layers.12.input_layernorm.weight",
365
+ "shape": [
366
+ 1536
367
+ ],
368
+ "dtype": "float16",
369
+ "format": "f32-to-bf16",
370
+ "nbytes": 3072,
371
+ "byteOffset": 26741760
372
+ }
373
+ ],
374
+ "md5sum": "a94367f453649a530637805bb686bed1"
375
+ },
376
+ {
377
+ "dataPath": "params_shard_10.bin",
378
+ "format": "raw-shard",
379
+ "nbytes": 25165824,
380
+ "records": [
381
+ {
382
+ "name": "model.layers.12.mlp.gate_up_proj.weight",
383
+ "shape": [
384
+ 8192,
385
+ 1536
386
+ ],
387
+ "dtype": "float16",
388
+ "format": "f32-to-bf16",
389
+ "nbytes": 25165824,
390
+ "byteOffset": 0
391
+ }
392
+ ],
393
+ "md5sum": "4c7e4e11421a7e78191585cf85e6f492"
394
+ },
395
+ {
396
+ "dataPath": "params_shard_11.bin",
397
+ "format": "raw-shard",
398
+ "nbytes": 26744832,
399
+ "records": [
400
+ {
401
+ "name": "model.layers.12.mlp.down_proj.weight",
402
+ "shape": [
403
+ 1536,
404
+ 4096
405
+ ],
406
+ "dtype": "float16",
407
+ "format": "f32-to-bf16",
408
+ "nbytes": 12582912,
409
+ "byteOffset": 0
410
+ },
411
+ {
412
+ "name": "model.layers.12.post_attention_layernorm.weight",
413
+ "shape": [
414
+ 1536
415
+ ],
416
+ "dtype": "float16",
417
+ "format": "f32-to-bf16",
418
+ "nbytes": 3072,
419
+ "byteOffset": 12582912
420
+ },
421
+ {
422
+ "name": "model.layers.12.self_attn.qkv_proj.weight",
423
+ "shape": [
424
+ 3072,
425
+ 1536
426
+ ],
427
+ "dtype": "float16",
428
+ "format": "f32-to-bf16",
429
+ "nbytes": 9437184,
430
+ "byteOffset": 12585984
431
+ },
432
+ {
433
+ "name": "model.layers.12.self_attn.o_proj.weight",
434
+ "shape": [
435
+ 1536,
436
+ 1536
437
+ ],
438
+ "dtype": "float16",
439
+ "format": "f32-to-bf16",
440
+ "nbytes": 4718592,
441
+ "byteOffset": 22023168
442
+ },
443
+ {
444
+ "name": "model.layers.13.input_layernorm.weight",
445
+ "shape": [
446
+ 1536
447
+ ],
448
+ "dtype": "float16",
449
+ "format": "f32-to-bf16",
450
+ "nbytes": 3072,
451
+ "byteOffset": 26741760
452
+ }
453
+ ],
454
+ "md5sum": "f7aff300446f28f284dfba1c059f1df6"
455
+ },
456
+ {
457
+ "dataPath": "params_shard_12.bin",
458
+ "format": "raw-shard",
459
+ "nbytes": 25165824,
460
+ "records": [
461
+ {
462
+ "name": "model.layers.13.mlp.gate_up_proj.weight",
463
+ "shape": [
464
+ 8192,
465
+ 1536
466
+ ],
467
+ "dtype": "float16",
468
+ "format": "f32-to-bf16",
469
+ "nbytes": 25165824,
470
+ "byteOffset": 0
471
+ }
472
+ ],
473
+ "md5sum": "a1c8805615170e587880431364849a04"
474
+ },
475
+ {
476
+ "dataPath": "params_shard_13.bin",
477
+ "format": "raw-shard",
478
+ "nbytes": 26744832,
479
+ "records": [
480
+ {
481
+ "name": "model.layers.13.mlp.down_proj.weight",
482
+ "shape": [
483
+ 1536,
484
+ 4096
485
+ ],
486
+ "dtype": "float16",
487
+ "format": "f32-to-bf16",
488
+ "nbytes": 12582912,
489
+ "byteOffset": 0
490
+ },
491
+ {
492
+ "name": "model.layers.13.post_attention_layernorm.weight",
493
+ "shape": [
494
+ 1536
495
+ ],
496
+ "dtype": "float16",
497
+ "format": "f32-to-bf16",
498
+ "nbytes": 3072,
499
+ "byteOffset": 12582912
500
+ },
501
+ {
502
+ "name": "model.layers.13.self_attn.qkv_proj.weight",
503
+ "shape": [
504
+ 3072,
505
+ 1536
506
+ ],
507
+ "dtype": "float16",
508
+ "format": "f32-to-bf16",
509
+ "nbytes": 9437184,
510
+ "byteOffset": 12585984
511
+ },
512
+ {
513
+ "name": "model.layers.13.self_attn.o_proj.weight",
514
+ "shape": [
515
+ 1536,
516
+ 1536
517
+ ],
518
+ "dtype": "float16",
519
+ "format": "f32-to-bf16",
520
+ "nbytes": 4718592,
521
+ "byteOffset": 22023168
522
+ },
523
+ {
524
+ "name": "model.layers.14.input_layernorm.weight",
525
+ "shape": [
526
+ 1536
527
+ ],
528
+ "dtype": "float16",
529
+ "format": "f32-to-bf16",
530
+ "nbytes": 3072,
531
+ "byteOffset": 26741760
532
+ }
533
+ ],
534
+ "md5sum": "a2759da9f1b7863921b4196085a09084"
535
+ },
536
+ {
537
+ "dataPath": "params_shard_14.bin",
538
+ "format": "raw-shard",
539
+ "nbytes": 25165824,
540
+ "records": [
541
+ {
542
+ "name": "model.layers.14.mlp.gate_up_proj.weight",
543
+ "shape": [
544
+ 8192,
545
+ 1536
546
+ ],
547
+ "dtype": "float16",
548
+ "format": "f32-to-bf16",
549
+ "nbytes": 25165824,
550
+ "byteOffset": 0
551
+ }
552
+ ],
553
+ "md5sum": "2da23d083df683c1dcae490831b95460"
554
+ },
555
+ {
556
+ "dataPath": "params_shard_15.bin",
557
+ "format": "raw-shard",
558
+ "nbytes": 26744832,
559
+ "records": [
560
+ {
561
+ "name": "model.layers.14.mlp.down_proj.weight",
562
+ "shape": [
563
+ 1536,
564
+ 4096
565
+ ],
566
+ "dtype": "float16",
567
+ "format": "f32-to-bf16",
568
+ "nbytes": 12582912,
569
+ "byteOffset": 0
570
+ },
571
+ {
572
+ "name": "model.layers.14.post_attention_layernorm.weight",
573
+ "shape": [
574
+ 1536
575
+ ],
576
+ "dtype": "float16",
577
+ "format": "f32-to-bf16",
578
+ "nbytes": 3072,
579
+ "byteOffset": 12582912
580
+ },
581
+ {
582
+ "name": "model.layers.14.self_attn.qkv_proj.weight",
583
+ "shape": [
584
+ 3072,
585
+ 1536
586
+ ],
587
+ "dtype": "float16",
588
+ "format": "f32-to-bf16",
589
+ "nbytes": 9437184,
590
+ "byteOffset": 12585984
591
+ },
592
+ {
593
+ "name": "model.layers.14.self_attn.o_proj.weight",
594
+ "shape": [
595
+ 1536,
596
+ 1536
597
+ ],
598
+ "dtype": "float16",
599
+ "format": "f32-to-bf16",
600
+ "nbytes": 4718592,
601
+ "byteOffset": 22023168
602
+ },
603
+ {
604
+ "name": "model.layers.15.input_layernorm.weight",
605
+ "shape": [
606
+ 1536
607
+ ],
608
+ "dtype": "float16",
609
+ "format": "f32-to-bf16",
610
+ "nbytes": 3072,
611
+ "byteOffset": 26741760
612
+ }
613
+ ],
614
+ "md5sum": "0d74164cfbfa518ba337ccbfd5100d28"
615
+ },
616
+ {
617
+ "dataPath": "params_shard_16.bin",
618
+ "format": "raw-shard",
619
+ "nbytes": 25165824,
620
+ "records": [
621
+ {
622
+ "name": "model.layers.15.mlp.gate_up_proj.weight",
623
+ "shape": [
624
+ 8192,
625
+ 1536
626
+ ],
627
+ "dtype": "float16",
628
+ "format": "f32-to-bf16",
629
+ "nbytes": 25165824,
630
+ "byteOffset": 0
631
+ }
632
+ ],
633
+ "md5sum": "33d9a7a41dd0f541307ee90b4ad70328"
634
+ },
635
+ {
636
+ "dataPath": "params_shard_17.bin",
637
+ "format": "raw-shard",
638
+ "nbytes": 26744832,
639
+ "records": [
640
+ {
641
+ "name": "model.layers.15.mlp.down_proj.weight",
642
+ "shape": [
643
+ 1536,
644
+ 4096
645
+ ],
646
+ "dtype": "float16",
647
+ "format": "f32-to-bf16",
648
+ "nbytes": 12582912,
649
+ "byteOffset": 0
650
+ },
651
+ {
652
+ "name": "model.layers.15.post_attention_layernorm.weight",
653
+ "shape": [
654
+ 1536
655
+ ],
656
+ "dtype": "float16",
657
+ "format": "f32-to-bf16",
658
+ "nbytes": 3072,
659
+ "byteOffset": 12582912
660
+ },
661
+ {
662
+ "name": "model.layers.15.self_attn.qkv_proj.weight",
663
+ "shape": [
664
+ 3072,
665
+ 1536
666
+ ],
667
+ "dtype": "float16",
668
+ "format": "f32-to-bf16",
669
+ "nbytes": 9437184,
670
+ "byteOffset": 12585984
671
+ },
672
+ {
673
+ "name": "model.layers.15.self_attn.o_proj.weight",
674
+ "shape": [
675
+ 1536,
676
+ 1536
677
+ ],
678
+ "dtype": "float16",
679
+ "format": "f32-to-bf16",
680
+ "nbytes": 4718592,
681
+ "byteOffset": 22023168
682
+ },
683
+ {
684
+ "name": "model.layers.2.input_layernorm.weight",
685
+ "shape": [
686
+ 1536
687
+ ],
688
+ "dtype": "float16",
689
+ "format": "f32-to-bf16",
690
+ "nbytes": 3072,
691
+ "byteOffset": 26741760
692
+ }
693
+ ],
694
+ "md5sum": "2f7d100b62508b23d809954b86c1ee2e"
695
+ },
696
+ {
697
+ "dataPath": "params_shard_18.bin",
698
+ "format": "raw-shard",
699
+ "nbytes": 25165824,
700
+ "records": [
701
+ {
702
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
703
+ "shape": [
704
+ 8192,
705
+ 1536
706
+ ],
707
+ "dtype": "float16",
708
+ "format": "f32-to-bf16",
709
+ "nbytes": 25165824,
710
+ "byteOffset": 0
711
+ }
712
+ ],
713
+ "md5sum": "30c70e6edd714d2c85f9426157e8c80f"
714
+ },
715
+ {
716
+ "dataPath": "params_shard_19.bin",
717
+ "format": "raw-shard",
718
+ "nbytes": 26744832,
719
+ "records": [
720
+ {
721
+ "name": "model.layers.2.mlp.down_proj.weight",
722
+ "shape": [
723
+ 1536,
724
+ 4096
725
+ ],
726
+ "dtype": "float16",
727
+ "format": "f32-to-bf16",
728
+ "nbytes": 12582912,
729
+ "byteOffset": 0
730
+ },
731
+ {
732
+ "name": "model.layers.2.post_attention_layernorm.weight",
733
+ "shape": [
734
+ 1536
735
+ ],
736
+ "dtype": "float16",
737
+ "format": "f32-to-bf16",
738
+ "nbytes": 3072,
739
+ "byteOffset": 12582912
740
+ },
741
+ {
742
+ "name": "model.layers.2.self_attn.qkv_proj.weight",
743
+ "shape": [
744
+ 3072,
745
+ 1536
746
+ ],
747
+ "dtype": "float16",
748
+ "format": "f32-to-bf16",
749
+ "nbytes": 9437184,
750
+ "byteOffset": 12585984
751
+ },
752
+ {
753
+ "name": "model.layers.2.self_attn.o_proj.weight",
754
+ "shape": [
755
+ 1536,
756
+ 1536
757
+ ],
758
+ "dtype": "float16",
759
+ "format": "f32-to-bf16",
760
+ "nbytes": 4718592,
761
+ "byteOffset": 22023168
762
+ },
763
+ {
764
+ "name": "model.layers.3.input_layernorm.weight",
765
+ "shape": [
766
+ 1536
767
+ ],
768
+ "dtype": "float16",
769
+ "format": "f32-to-bf16",
770
+ "nbytes": 3072,
771
+ "byteOffset": 26741760
772
+ }
773
+ ],
774
+ "md5sum": "3fb2640e234b84defda32dfc1a42104e"
775
+ },
776
+ {
777
+ "dataPath": "params_shard_20.bin",
778
+ "format": "raw-shard",
779
+ "nbytes": 25165824,
780
+ "records": [
781
+ {
782
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
783
+ "shape": [
784
+ 8192,
785
+ 1536
786
+ ],
787
+ "dtype": "float16",
788
+ "format": "f32-to-bf16",
789
+ "nbytes": 25165824,
790
+ "byteOffset": 0
791
+ }
792
+ ],
793
+ "md5sum": "53150dde82ef99fd9e1d6348d946f2fe"
794
+ },
795
+ {
796
+ "dataPath": "params_shard_21.bin",
797
+ "format": "raw-shard",
798
+ "nbytes": 26744832,
799
+ "records": [
800
+ {
801
+ "name": "model.layers.3.mlp.down_proj.weight",
802
+ "shape": [
803
+ 1536,
804
+ 4096
805
+ ],
806
+ "dtype": "float16",
807
+ "format": "f32-to-bf16",
808
+ "nbytes": 12582912,
809
+ "byteOffset": 0
810
+ },
811
+ {
812
+ "name": "model.layers.3.post_attention_layernorm.weight",
813
+ "shape": [
814
+ 1536
815
+ ],
816
+ "dtype": "float16",
817
+ "format": "f32-to-bf16",
818
+ "nbytes": 3072,
819
+ "byteOffset": 12582912
820
+ },
821
+ {
822
+ "name": "model.layers.3.self_attn.qkv_proj.weight",
823
+ "shape": [
824
+ 3072,
825
+ 1536
826
+ ],
827
+ "dtype": "float16",
828
+ "format": "f32-to-bf16",
829
+ "nbytes": 9437184,
830
+ "byteOffset": 12585984
831
+ },
832
+ {
833
+ "name": "model.layers.3.self_attn.o_proj.weight",
834
+ "shape": [
835
+ 1536,
836
+ 1536
837
+ ],
838
+ "dtype": "float16",
839
+ "format": "f32-to-bf16",
840
+ "nbytes": 4718592,
841
+ "byteOffset": 22023168
842
+ },
843
+ {
844
+ "name": "model.layers.4.input_layernorm.weight",
845
+ "shape": [
846
+ 1536
847
+ ],
848
+ "dtype": "float16",
849
+ "format": "f32-to-bf16",
850
+ "nbytes": 3072,
851
+ "byteOffset": 26741760
852
+ }
853
+ ],
854
+ "md5sum": "8d367d8f6d1b43767339847d87c0540d"
855
+ },
856
+ {
857
+ "dataPath": "params_shard_22.bin",
858
+ "format": "raw-shard",
859
+ "nbytes": 25165824,
860
+ "records": [
861
+ {
862
+ "name": "model.layers.4.mlp.gate_up_proj.weight",
863
+ "shape": [
864
+ 8192,
865
+ 1536
866
+ ],
867
+ "dtype": "float16",
868
+ "format": "f32-to-bf16",
869
+ "nbytes": 25165824,
870
+ "byteOffset": 0
871
+ }
872
+ ],
873
+ "md5sum": "cfaf8058ee5a8a20f44eb27a0b7b573f"
874
+ },
875
+ {
876
+ "dataPath": "params_shard_23.bin",
877
+ "format": "raw-shard",
878
+ "nbytes": 26744832,
879
+ "records": [
880
+ {
881
+ "name": "model.layers.4.mlp.down_proj.weight",
882
+ "shape": [
883
+ 1536,
884
+ 4096
885
+ ],
886
+ "dtype": "float16",
887
+ "format": "f32-to-bf16",
888
+ "nbytes": 12582912,
889
+ "byteOffset": 0
890
+ },
891
+ {
892
+ "name": "model.layers.4.post_attention_layernorm.weight",
893
+ "shape": [
894
+ 1536
895
+ ],
896
+ "dtype": "float16",
897
+ "format": "f32-to-bf16",
898
+ "nbytes": 3072,
899
+ "byteOffset": 12582912
900
+ },
901
+ {
902
+ "name": "model.layers.4.self_attn.qkv_proj.weight",
903
+ "shape": [
904
+ 3072,
905
+ 1536
906
+ ],
907
+ "dtype": "float16",
908
+ "format": "f32-to-bf16",
909
+ "nbytes": 9437184,
910
+ "byteOffset": 12585984
911
+ },
912
+ {
913
+ "name": "model.layers.4.self_attn.o_proj.weight",
914
+ "shape": [
915
+ 1536,
916
+ 1536
917
+ ],
918
+ "dtype": "float16",
919
+ "format": "f32-to-bf16",
920
+ "nbytes": 4718592,
921
+ "byteOffset": 22023168
922
+ },
923
+ {
924
+ "name": "model.layers.5.input_layernorm.weight",
925
+ "shape": [
926
+ 1536
927
+ ],
928
+ "dtype": "float16",
929
+ "format": "f32-to-bf16",
930
+ "nbytes": 3072,
931
+ "byteOffset": 26741760
932
+ }
933
+ ],
934
+ "md5sum": "a165373f294ca4281380a26274ac9664"
935
+ },
936
+ {
937
+ "dataPath": "params_shard_24.bin",
938
+ "format": "raw-shard",
939
+ "nbytes": 25165824,
940
+ "records": [
941
+ {
942
+ "name": "model.layers.5.mlp.gate_up_proj.weight",
943
+ "shape": [
944
+ 8192,
945
+ 1536
946
+ ],
947
+ "dtype": "float16",
948
+ "format": "f32-to-bf16",
949
+ "nbytes": 25165824,
950
+ "byteOffset": 0
951
+ }
952
+ ],
953
+ "md5sum": "31865399476b425d4ccea3d19c707990"
954
+ },
955
+ {
956
+ "dataPath": "params_shard_25.bin",
957
+ "format": "raw-shard",
958
+ "nbytes": 26744832,
959
+ "records": [
960
+ {
961
+ "name": "model.layers.5.mlp.down_proj.weight",
962
+ "shape": [
963
+ 1536,
964
+ 4096
965
+ ],
966
+ "dtype": "float16",
967
+ "format": "f32-to-bf16",
968
+ "nbytes": 12582912,
969
+ "byteOffset": 0
970
+ },
971
+ {
972
+ "name": "model.layers.5.post_attention_layernorm.weight",
973
+ "shape": [
974
+ 1536
975
+ ],
976
+ "dtype": "float16",
977
+ "format": "f32-to-bf16",
978
+ "nbytes": 3072,
979
+ "byteOffset": 12582912
980
+ },
981
+ {
982
+ "name": "model.layers.5.self_attn.qkv_proj.weight",
983
+ "shape": [
984
+ 3072,
985
+ 1536
986
+ ],
987
+ "dtype": "float16",
988
+ "format": "f32-to-bf16",
989
+ "nbytes": 9437184,
990
+ "byteOffset": 12585984
991
+ },
992
+ {
993
+ "name": "model.layers.5.self_attn.o_proj.weight",
994
+ "shape": [
995
+ 1536,
996
+ 1536
997
+ ],
998
+ "dtype": "float16",
999
+ "format": "f32-to-bf16",
1000
+ "nbytes": 4718592,
1001
+ "byteOffset": 22023168
1002
+ },
1003
+ {
1004
+ "name": "model.layers.6.input_layernorm.weight",
1005
+ "shape": [
1006
+ 1536
1007
+ ],
1008
+ "dtype": "float16",
1009
+ "format": "f32-to-bf16",
1010
+ "nbytes": 3072,
1011
+ "byteOffset": 26741760
1012
+ }
1013
+ ],
1014
+ "md5sum": "7cff33dad5234670929d6ce9d8f823ea"
1015
+ },
1016
+ {
1017
+ "dataPath": "params_shard_26.bin",
1018
+ "format": "raw-shard",
1019
+ "nbytes": 25165824,
1020
+ "records": [
1021
+ {
1022
+ "name": "model.layers.6.mlp.gate_up_proj.weight",
1023
+ "shape": [
1024
+ 8192,
1025
+ 1536
1026
+ ],
1027
+ "dtype": "float16",
1028
+ "format": "f32-to-bf16",
1029
+ "nbytes": 25165824,
1030
+ "byteOffset": 0
1031
+ }
1032
+ ],
1033
+ "md5sum": "567b76989079f6b8afb9883c30f21dd6"
1034
+ },
1035
+ {
1036
+ "dataPath": "params_shard_27.bin",
1037
+ "format": "raw-shard",
1038
+ "nbytes": 26744832,
1039
+ "records": [
1040
+ {
1041
+ "name": "model.layers.6.mlp.down_proj.weight",
1042
+ "shape": [
1043
+ 1536,
1044
+ 4096
1045
+ ],
1046
+ "dtype": "float16",
1047
+ "format": "f32-to-bf16",
1048
+ "nbytes": 12582912,
1049
+ "byteOffset": 0
1050
+ },
1051
+ {
1052
+ "name": "model.layers.6.post_attention_layernorm.weight",
1053
+ "shape": [
1054
+ 1536
1055
+ ],
1056
+ "dtype": "float16",
1057
+ "format": "f32-to-bf16",
1058
+ "nbytes": 3072,
1059
+ "byteOffset": 12582912
1060
+ },
1061
+ {
1062
+ "name": "model.layers.6.self_attn.qkv_proj.weight",
1063
+ "shape": [
1064
+ 3072,
1065
+ 1536
1066
+ ],
1067
+ "dtype": "float16",
1068
+ "format": "f32-to-bf16",
1069
+ "nbytes": 9437184,
1070
+ "byteOffset": 12585984
1071
+ },
1072
+ {
1073
+ "name": "model.layers.6.self_attn.o_proj.weight",
1074
+ "shape": [
1075
+ 1536,
1076
+ 1536
1077
+ ],
1078
+ "dtype": "float16",
1079
+ "format": "f32-to-bf16",
1080
+ "nbytes": 4718592,
1081
+ "byteOffset": 22023168
1082
+ },
1083
+ {
1084
+ "name": "model.layers.7.input_layernorm.weight",
1085
+ "shape": [
1086
+ 1536
1087
+ ],
1088
+ "dtype": "float16",
1089
+ "format": "f32-to-bf16",
1090
+ "nbytes": 3072,
1091
+ "byteOffset": 26741760
1092
+ }
1093
+ ],
1094
+ "md5sum": "3bd295d16385959c396de51b03ce56e1"
1095
+ },
1096
+ {
1097
+ "dataPath": "params_shard_28.bin",
1098
+ "format": "raw-shard",
1099
+ "nbytes": 25165824,
1100
+ "records": [
1101
+ {
1102
+ "name": "model.layers.7.mlp.gate_up_proj.weight",
1103
+ "shape": [
1104
+ 8192,
1105
+ 1536
1106
+ ],
1107
+ "dtype": "float16",
1108
+ "format": "f32-to-bf16",
1109
+ "nbytes": 25165824,
1110
+ "byteOffset": 0
1111
+ }
1112
+ ],
1113
+ "md5sum": "bee870077fa362451ffe490753151967"
1114
+ },
1115
+ {
1116
+ "dataPath": "params_shard_29.bin",
1117
+ "format": "raw-shard",
1118
+ "nbytes": 26744832,
1119
+ "records": [
1120
+ {
1121
+ "name": "model.layers.7.mlp.down_proj.weight",
1122
+ "shape": [
1123
+ 1536,
1124
+ 4096
1125
+ ],
1126
+ "dtype": "float16",
1127
+ "format": "f32-to-bf16",
1128
+ "nbytes": 12582912,
1129
+ "byteOffset": 0
1130
+ },
1131
+ {
1132
+ "name": "model.layers.7.post_attention_layernorm.weight",
1133
+ "shape": [
1134
+ 1536
1135
+ ],
1136
+ "dtype": "float16",
1137
+ "format": "f32-to-bf16",
1138
+ "nbytes": 3072,
1139
+ "byteOffset": 12582912
1140
+ },
1141
+ {
1142
+ "name": "model.layers.7.self_attn.qkv_proj.weight",
1143
+ "shape": [
1144
+ 3072,
1145
+ 1536
1146
+ ],
1147
+ "dtype": "float16",
1148
+ "format": "f32-to-bf16",
1149
+ "nbytes": 9437184,
1150
+ "byteOffset": 12585984
1151
+ },
1152
+ {
1153
+ "name": "model.layers.7.self_attn.o_proj.weight",
1154
+ "shape": [
1155
+ 1536,
1156
+ 1536
1157
+ ],
1158
+ "dtype": "float16",
1159
+ "format": "f32-to-bf16",
1160
+ "nbytes": 4718592,
1161
+ "byteOffset": 22023168
1162
+ },
1163
+ {
1164
+ "name": "model.layers.8.input_layernorm.weight",
1165
+ "shape": [
1166
+ 1536
1167
+ ],
1168
+ "dtype": "float16",
1169
+ "format": "f32-to-bf16",
1170
+ "nbytes": 3072,
1171
+ "byteOffset": 26741760
1172
+ }
1173
+ ],
1174
+ "md5sum": "9ef1e19c27ba9772c11e5876693706c7"
1175
+ },
1176
+ {
1177
+ "dataPath": "params_shard_30.bin",
1178
+ "format": "raw-shard",
1179
+ "nbytes": 25165824,
1180
+ "records": [
1181
+ {
1182
+ "name": "model.layers.8.mlp.gate_up_proj.weight",
1183
+ "shape": [
1184
+ 8192,
1185
+ 1536
1186
+ ],
1187
+ "dtype": "float16",
1188
+ "format": "f32-to-bf16",
1189
+ "nbytes": 25165824,
1190
+ "byteOffset": 0
1191
+ }
1192
+ ],
1193
+ "md5sum": "01d39adc4287100619cd94e0c84e4c61"
1194
+ },
1195
+ {
1196
+ "dataPath": "params_shard_31.bin",
1197
+ "format": "raw-shard",
1198
+ "nbytes": 26744832,
1199
+ "records": [
1200
+ {
1201
+ "name": "model.layers.8.mlp.down_proj.weight",
1202
+ "shape": [
1203
+ 1536,
1204
+ 4096
1205
+ ],
1206
+ "dtype": "float16",
1207
+ "format": "f32-to-bf16",
1208
+ "nbytes": 12582912,
1209
+ "byteOffset": 0
1210
+ },
1211
+ {
1212
+ "name": "model.layers.8.post_attention_layernorm.weight",
1213
+ "shape": [
1214
+ 1536
1215
+ ],
1216
+ "dtype": "float16",
1217
+ "format": "f32-to-bf16",
1218
+ "nbytes": 3072,
1219
+ "byteOffset": 12582912
1220
+ },
1221
+ {
1222
+ "name": "model.layers.8.self_attn.qkv_proj.weight",
1223
+ "shape": [
1224
+ 3072,
1225
+ 1536
1226
+ ],
1227
+ "dtype": "float16",
1228
+ "format": "f32-to-bf16",
1229
+ "nbytes": 9437184,
1230
+ "byteOffset": 12585984
1231
+ },
1232
+ {
1233
+ "name": "model.layers.8.self_attn.o_proj.weight",
1234
+ "shape": [
1235
+ 1536,
1236
+ 1536
1237
+ ],
1238
+ "dtype": "float16",
1239
+ "format": "f32-to-bf16",
1240
+ "nbytes": 4718592,
1241
+ "byteOffset": 22023168
1242
+ },
1243
+ {
1244
+ "name": "model.layers.9.input_layernorm.weight",
1245
+ "shape": [
1246
+ 1536
1247
+ ],
1248
+ "dtype": "float16",
1249
+ "format": "f32-to-bf16",
1250
+ "nbytes": 3072,
1251
+ "byteOffset": 26741760
1252
+ }
1253
+ ],
1254
+ "md5sum": "3806098b62919ec3c4deb773c46d177d"
1255
+ },
1256
+ {
1257
+ "dataPath": "params_shard_32.bin",
1258
+ "format": "raw-shard",
1259
+ "nbytes": 25165824,
1260
+ "records": [
1261
+ {
1262
+ "name": "model.layers.9.mlp.gate_up_proj.weight",
1263
+ "shape": [
1264
+ 8192,
1265
+ 1536
1266
+ ],
1267
+ "dtype": "float16",
1268
+ "format": "f32-to-bf16",
1269
+ "nbytes": 25165824,
1270
+ "byteOffset": 0
1271
+ }
1272
+ ],
1273
+ "md5sum": "6613b12dd755ddbe02dfd8e7640d1b6b"
1274
+ },
1275
+ {
1276
+ "dataPath": "params_shard_33.bin",
1277
+ "format": "raw-shard",
1278
+ "nbytes": 26744832,
1279
+ "records": [
1280
+ {
1281
+ "name": "model.layers.9.mlp.down_proj.weight",
1282
+ "shape": [
1283
+ 1536,
1284
+ 4096
1285
+ ],
1286
+ "dtype": "float16",
1287
+ "format": "f32-to-bf16",
1288
+ "nbytes": 12582912,
1289
+ "byteOffset": 0
1290
+ },
1291
+ {
1292
+ "name": "model.layers.9.post_attention_layernorm.weight",
1293
+ "shape": [
1294
+ 1536
1295
+ ],
1296
+ "dtype": "float16",
1297
+ "format": "f32-to-bf16",
1298
+ "nbytes": 3072,
1299
+ "byteOffset": 12582912
1300
+ },
1301
+ {
1302
+ "name": "model.layers.9.self_attn.qkv_proj.weight",
1303
+ "shape": [
1304
+ 3072,
1305
+ 1536
1306
+ ],
1307
+ "dtype": "float16",
1308
+ "format": "f32-to-bf16",
1309
+ "nbytes": 9437184,
1310
+ "byteOffset": 12585984
1311
+ },
1312
+ {
1313
+ "name": "model.layers.9.self_attn.o_proj.weight",
1314
+ "shape": [
1315
+ 1536,
1316
+ 1536
1317
+ ],
1318
+ "dtype": "float16",
1319
+ "format": "f32-to-bf16",
1320
+ "nbytes": 4718592,
1321
+ "byteOffset": 22023168
1322
+ },
1323
+ {
1324
+ "name": "model.norm.weight",
1325
+ "shape": [
1326
+ 1536
1327
+ ],
1328
+ "dtype": "float16",
1329
+ "format": "f32-to-bf16",
1330
+ "nbytes": 3072,
1331
+ "byteOffset": 26741760
1332
+ }
1333
+ ],
1334
+ "md5sum": "70dda49a243e5c0a7bf48b85b8a426a1"
1335
+ }
1336
+ ]
1337
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61c6b32b48b3cf22102f290ccfda71e394a325b48271b05e12c544e40b01b123
3
+ size 98304000
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:929d9abffe966e656194542030debc8d76cff9997924a57b6b5d83659f4ac482
3
+ size 98304000
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8054b5f0e37b17161e567cfdd4e7b7b6cbb98b6378fcd751afe035ae8c950d0e
3
+ size 25165824
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97fb918a706e3faf028357680f4c6dff887d085c670f2cd1022cb91759375548
3
+ size 26744832
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e12a6af03c560a58984ae9f5d8e898256a867c7f0974b5b05bbe7b501ded0e4
3
+ size 25165824
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:139ccc9a94d20934aebfdf45bff5d85646d1d60b53d8b0b92a31a2193ba4e877
3
+ size 26744832
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac518fa57925381e2beecc8cc437638748aa34aa6ac91f02a9bf5fe34c2c7ab4
3
+ size 25165824
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13411db044d3520756e0267db7261bfd9198bda59c0f4db02a5b63182e5750f8
3
+ size 26744832
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53eedf84f76e1f3972704f6f8ffc95e5b341ced7a12cd94f20367b55deebbc8b
3
+ size 25165824
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d747c5868363b31a3a1c5c83359b4f381de177fba281b569fc3adbe32a614b
3
+ size 26744832
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56cee65008c37204df81e02d5822f25f93eb8888a050a7b5e3e9a09e582e1934
3
+ size 25165824
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb88fb14fa3e7fe93b5dfcac91b1bc422166a582156d9a22a4af762c14bd0f27
3
+ size 26744832
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a43006c3e5777b6e820157b07484c419a8aac45c151ca8ab363c400ee1199ae
3
+ size 25165824
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01fffc47bb50fffe1312eb67510867c85d075a69aabe4a9cca8f064d19b76394
3
+ size 25165824
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:708291d767c52d8eb4a0234fdbe5f33eb45614231270350d42bd4adbe11a7c72
3
+ size 26744832
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b42cfbe9cc84dc35178bf83c2753a01c908f8b2082fcf3ff615e05bebddd133
3
+ size 25165824
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffb3ab693589f7022e7b11f37e313a88a73a81c2ae4443d889385714eac6411e
3
+ size 26744832
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88e5007e6051700998962a3d28c61fdbedc86260ce3af0e3c423b8e009dfcb2f
3
+ size 25165824
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33c49f6100e4299b35b691ac5bc2a3c20f45554f599e97ec1387a60ce552e883
3
+ size 26744832
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c178dd7721092d0009eaeb7b7bed8ee5f93b1d9e29dc17ebde2d8fde88cb425
3
+ size 25165824
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afe1d372ad11b18f0f1eec5ec72ff37aed14c5a77044932bb33bac3a1b860c91
3
+ size 26744832
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf485579430acb26b7a2c19464bc249375a9d1b63bea00fda0112c028fdba16
3
+ size 25165824
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:382af8b0e35d0a168c04bea54fd123b9cab7873357a09780ffb90fd9ad215b45
3
+ size 26744832
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78a482d5f69ba7f8c4f0ac7002d3c853a34db8b115c88e098854681d34651b3d
3
+ size 26747904
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f383df6544963322bc52ed958b6e4dce40cf37613db7dc16ed5f6c56c2be360e
3
+ size 25165824
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0966770be290afcc4357e637f82237cf83400c44c53cafac915aa77adff59f1b
3
+ size 26744832
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bcc68b7845a5677c32737d9754fb94512ddd574c9c8e7a6fca8830cf16bf0d3
3
+ size 25165824
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38e772beae0aa5e89409921d952f105e970b5e474294eef67d2828829b4fe262
3
+ size 26744832
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cb7b33a6b3611c11a9950998d8642ae22e29545d4b33d27fc7152b2fb85e1bf
3
+ size 25165824
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a3659fcbcd0caf42e2fc3fd25f8425cc9d476be34e2a8d10aad5f8273754f2a
3
+ size 26744832
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b4b9c6fdea6bc48aaf92bed823d17671744152fde58dedb65d76f0668b90568
3
+ size 25165824
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24c6b850e9afdbf385911d78a2e18f7a581ec228ea1b2a131a0f6e06c900fd2a
3
+ size 26744832
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f20be83b6bd2ad03d861304a8af504341b0b068dd82d54b2ba1f785f9cbef4b
3
+ size 25165824
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77e4788c02882f1d747cdcd56e9885bbf7e792acb6de9a11549a95ad7e172225
3
+ size 26744832
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": false,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "additional_special_tokens": [],
32
+ "bos_token": "<s>",
33
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% if ((message['role'] == 'user') != (loop.index0 % 2 == 0)) or ((message['role'] == 'assistant') != (loop.index0 % 2 == 1)) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '<|prompt|>' + message['content'].strip() + eos_token }}{% elif message['role'] == 'assistant' %}{{ '<|answer|>' + message['content'].strip() + eos_token }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|answer|>' }}{% endif %}",
34
+ "clean_up_tokenization_spaces": false,
35
+ "cls_token": "</s>",
36
+ "eos_token": "</s>",
37
+ "legacy": true,
38
+ "model_max_length": 1000000000000000019884624838656,
39
+ "pad_token": "<unk>",
40
+ "sep_token": "</s>",
41
+ "sp_model_kwargs": {},
42
+ "spaces_between_special_tokens": false,
43
+ "tokenizer_class": "LlamaTokenizer",
44
+ "unk_token": "<unk>",
45
+ "use_default_system_prompt": false
46
+ }