imi2 commited on
Commit
0c105e0
1 Parent(s): 3528072

Delete ndarray-cache.json

Browse files
Files changed (1) hide show
  1. ndarray-cache.json +0 -1807
ndarray-cache.json DELETED
@@ -1,1807 +0,0 @@
1
- {
2
- "metadata": {
3
- "ParamSize": 165,
4
- "ParamBytes": 45547008.0,
5
- "BitsPerParam": 3.652428290625048
6
- },
7
- "records": [
8
- {
9
- "dataPath": "params_shard_0.bin",
10
- "format": "raw-shard",
11
- "nbytes": 33464832,
12
- "records": [
13
- {
14
- "name": "lm_head.q_weight",
15
- "shape": [
16
- 52,
17
- 50304
18
- ],
19
- "dtype": "uint32",
20
- "format": "f32-to-bf16",
21
- "nbytes": 10463232,
22
- "byteOffset": 0
23
- },
24
- {
25
- "name": "lm_head.q_scale",
26
- "shape": [
27
- 13,
28
- 50304
29
- ],
30
- "dtype": "float16",
31
- "format": "f32-to-bf16",
32
- "nbytes": 1307904,
33
- "byteOffset": 10463232
34
- },
35
- {
36
- "name": "model.embed_tokens.q_weight",
37
- "shape": [
38
- 50304,
39
- 52
40
- ],
41
- "dtype": "uint32",
42
- "format": "f32-to-bf16",
43
- "nbytes": 10463232,
44
- "byteOffset": 11771136
45
- },
46
- {
47
- "name": "model.embed_tokens.q_scale",
48
- "shape": [
49
- 50304,
50
- 13
51
- ],
52
- "dtype": "float16",
53
- "format": "f32-to-bf16",
54
- "nbytes": 1307904,
55
- "byteOffset": 22234368
56
- },
57
- {
58
- "name": "model.layers.0.input_layernorm.weight",
59
- "shape": [
60
- 512
61
- ],
62
- "dtype": "float16",
63
- "format": "f32-to-bf16",
64
- "nbytes": 1024,
65
- "byteOffset": 23542272
66
- },
67
- {
68
- "name": "model.layers.0.mlp.down_proj.q_weight",
69
- "shape": [
70
- 128,
71
- 512
72
- ],
73
- "dtype": "uint32",
74
- "format": "f32-to-bf16",
75
- "nbytes": 262144,
76
- "byteOffset": 23543296
77
- },
78
- {
79
- "name": "model.layers.0.mlp.down_proj.q_scale",
80
- "shape": [
81
- 32,
82
- 512
83
- ],
84
- "dtype": "float16",
85
- "format": "f32-to-bf16",
86
- "nbytes": 32768,
87
- "byteOffset": 23805440
88
- },
89
- {
90
- "name": "model.layers.0.mlp.gate_up_proj.q_weight",
91
- "shape": [
92
- 52,
93
- 2560
94
- ],
95
- "dtype": "uint32",
96
- "format": "f32-to-bf16",
97
- "nbytes": 532480,
98
- "byteOffset": 23838208
99
- },
100
- {
101
- "name": "model.layers.0.mlp.gate_up_proj.q_scale",
102
- "shape": [
103
- 13,
104
- 2560
105
- ],
106
- "dtype": "float16",
107
- "format": "f32-to-bf16",
108
- "nbytes": 66560,
109
- "byteOffset": 24370688
110
- },
111
- {
112
- "name": "model.layers.0.post_attention_layernorm.weight",
113
- "shape": [
114
- 512
115
- ],
116
- "dtype": "float16",
117
- "format": "f32-to-bf16",
118
- "nbytes": 1024,
119
- "byteOffset": 24437248
120
- },
121
- {
122
- "name": "model.layers.0.self_attn.qkv_proj.q_weight",
123
- "shape": [
124
- 52,
125
- 1536
126
- ],
127
- "dtype": "uint32",
128
- "format": "f32-to-bf16",
129
- "nbytes": 319488,
130
- "byteOffset": 24438272
131
- },
132
- {
133
- "name": "model.layers.0.self_attn.qkv_proj.q_scale",
134
- "shape": [
135
- 13,
136
- 1536
137
- ],
138
- "dtype": "float16",
139
- "format": "f32-to-bf16",
140
- "nbytes": 39936,
141
- "byteOffset": 24757760
142
- },
143
- {
144
- "name": "model.layers.0.self_attn.o_proj.q_weight",
145
- "shape": [
146
- 52,
147
- 512
148
- ],
149
- "dtype": "uint32",
150
- "format": "f32-to-bf16",
151
- "nbytes": 106496,
152
- "byteOffset": 24797696
153
- },
154
- {
155
- "name": "model.layers.0.self_attn.o_proj.q_scale",
156
- "shape": [
157
- 13,
158
- 512
159
- ],
160
- "dtype": "float16",
161
- "format": "f32-to-bf16",
162
- "nbytes": 13312,
163
- "byteOffset": 24904192
164
- },
165
- {
166
- "name": "model.layers.1.input_layernorm.weight",
167
- "shape": [
168
- 512
169
- ],
170
- "dtype": "float16",
171
- "format": "f32-to-bf16",
172
- "nbytes": 1024,
173
- "byteOffset": 24917504
174
- },
175
- {
176
- "name": "model.layers.1.mlp.down_proj.q_weight",
177
- "shape": [
178
- 128,
179
- 512
180
- ],
181
- "dtype": "uint32",
182
- "format": "f32-to-bf16",
183
- "nbytes": 262144,
184
- "byteOffset": 24918528
185
- },
186
- {
187
- "name": "model.layers.1.mlp.down_proj.q_scale",
188
- "shape": [
189
- 32,
190
- 512
191
- ],
192
- "dtype": "float16",
193
- "format": "f32-to-bf16",
194
- "nbytes": 32768,
195
- "byteOffset": 25180672
196
- },
197
- {
198
- "name": "model.layers.1.mlp.gate_up_proj.q_weight",
199
- "shape": [
200
- 52,
201
- 2560
202
- ],
203
- "dtype": "uint32",
204
- "format": "f32-to-bf16",
205
- "nbytes": 532480,
206
- "byteOffset": 25213440
207
- },
208
- {
209
- "name": "model.layers.1.mlp.gate_up_proj.q_scale",
210
- "shape": [
211
- 13,
212
- 2560
213
- ],
214
- "dtype": "float16",
215
- "format": "f32-to-bf16",
216
- "nbytes": 66560,
217
- "byteOffset": 25745920
218
- },
219
- {
220
- "name": "model.layers.1.post_attention_layernorm.weight",
221
- "shape": [
222
- 512
223
- ],
224
- "dtype": "float16",
225
- "format": "f32-to-bf16",
226
- "nbytes": 1024,
227
- "byteOffset": 25812480
228
- },
229
- {
230
- "name": "model.layers.1.self_attn.qkv_proj.q_weight",
231
- "shape": [
232
- 52,
233
- 1536
234
- ],
235
- "dtype": "uint32",
236
- "format": "f32-to-bf16",
237
- "nbytes": 319488,
238
- "byteOffset": 25813504
239
- },
240
- {
241
- "name": "model.layers.1.self_attn.qkv_proj.q_scale",
242
- "shape": [
243
- 13,
244
- 1536
245
- ],
246
- "dtype": "float16",
247
- "format": "f32-to-bf16",
248
- "nbytes": 39936,
249
- "byteOffset": 26132992
250
- },
251
- {
252
- "name": "model.layers.1.self_attn.o_proj.q_weight",
253
- "shape": [
254
- 52,
255
- 512
256
- ],
257
- "dtype": "uint32",
258
- "format": "f32-to-bf16",
259
- "nbytes": 106496,
260
- "byteOffset": 26172928
261
- },
262
- {
263
- "name": "model.layers.1.self_attn.o_proj.q_scale",
264
- "shape": [
265
- 13,
266
- 512
267
- ],
268
- "dtype": "float16",
269
- "format": "f32-to-bf16",
270
- "nbytes": 13312,
271
- "byteOffset": 26279424
272
- },
273
- {
274
- "name": "model.layers.10.input_layernorm.weight",
275
- "shape": [
276
- 512
277
- ],
278
- "dtype": "float16",
279
- "format": "f32-to-bf16",
280
- "nbytes": 1024,
281
- "byteOffset": 26292736
282
- },
283
- {
284
- "name": "model.layers.10.mlp.down_proj.q_weight",
285
- "shape": [
286
- 128,
287
- 512
288
- ],
289
- "dtype": "uint32",
290
- "format": "f32-to-bf16",
291
- "nbytes": 262144,
292
- "byteOffset": 26293760
293
- },
294
- {
295
- "name": "model.layers.10.mlp.down_proj.q_scale",
296
- "shape": [
297
- 32,
298
- 512
299
- ],
300
- "dtype": "float16",
301
- "format": "f32-to-bf16",
302
- "nbytes": 32768,
303
- "byteOffset": 26555904
304
- },
305
- {
306
- "name": "model.layers.10.mlp.gate_up_proj.q_weight",
307
- "shape": [
308
- 52,
309
- 2560
310
- ],
311
- "dtype": "uint32",
312
- "format": "f32-to-bf16",
313
- "nbytes": 532480,
314
- "byteOffset": 26588672
315
- },
316
- {
317
- "name": "model.layers.10.mlp.gate_up_proj.q_scale",
318
- "shape": [
319
- 13,
320
- 2560
321
- ],
322
- "dtype": "float16",
323
- "format": "f32-to-bf16",
324
- "nbytes": 66560,
325
- "byteOffset": 27121152
326
- },
327
- {
328
- "name": "model.layers.10.post_attention_layernorm.weight",
329
- "shape": [
330
- 512
331
- ],
332
- "dtype": "float16",
333
- "format": "f32-to-bf16",
334
- "nbytes": 1024,
335
- "byteOffset": 27187712
336
- },
337
- {
338
- "name": "model.layers.10.self_attn.qkv_proj.q_weight",
339
- "shape": [
340
- 52,
341
- 1536
342
- ],
343
- "dtype": "uint32",
344
- "format": "f32-to-bf16",
345
- "nbytes": 319488,
346
- "byteOffset": 27188736
347
- },
348
- {
349
- "name": "model.layers.10.self_attn.qkv_proj.q_scale",
350
- "shape": [
351
- 13,
352
- 1536
353
- ],
354
- "dtype": "float16",
355
- "format": "f32-to-bf16",
356
- "nbytes": 39936,
357
- "byteOffset": 27508224
358
- },
359
- {
360
- "name": "model.layers.10.self_attn.o_proj.q_weight",
361
- "shape": [
362
- 52,
363
- 512
364
- ],
365
- "dtype": "uint32",
366
- "format": "f32-to-bf16",
367
- "nbytes": 106496,
368
- "byteOffset": 27548160
369
- },
370
- {
371
- "name": "model.layers.10.self_attn.o_proj.q_scale",
372
- "shape": [
373
- 13,
374
- 512
375
- ],
376
- "dtype": "float16",
377
- "format": "f32-to-bf16",
378
- "nbytes": 13312,
379
- "byteOffset": 27654656
380
- },
381
- {
382
- "name": "model.layers.11.input_layernorm.weight",
383
- "shape": [
384
- 512
385
- ],
386
- "dtype": "float16",
387
- "format": "f32-to-bf16",
388
- "nbytes": 1024,
389
- "byteOffset": 27667968
390
- },
391
- {
392
- "name": "model.layers.11.mlp.down_proj.q_weight",
393
- "shape": [
394
- 128,
395
- 512
396
- ],
397
- "dtype": "uint32",
398
- "format": "f32-to-bf16",
399
- "nbytes": 262144,
400
- "byteOffset": 27668992
401
- },
402
- {
403
- "name": "model.layers.11.mlp.down_proj.q_scale",
404
- "shape": [
405
- 32,
406
- 512
407
- ],
408
- "dtype": "float16",
409
- "format": "f32-to-bf16",
410
- "nbytes": 32768,
411
- "byteOffset": 27931136
412
- },
413
- {
414
- "name": "model.layers.11.mlp.gate_up_proj.q_weight",
415
- "shape": [
416
- 52,
417
- 2560
418
- ],
419
- "dtype": "uint32",
420
- "format": "f32-to-bf16",
421
- "nbytes": 532480,
422
- "byteOffset": 27963904
423
- },
424
- {
425
- "name": "model.layers.11.mlp.gate_up_proj.q_scale",
426
- "shape": [
427
- 13,
428
- 2560
429
- ],
430
- "dtype": "float16",
431
- "format": "f32-to-bf16",
432
- "nbytes": 66560,
433
- "byteOffset": 28496384
434
- },
435
- {
436
- "name": "model.layers.11.post_attention_layernorm.weight",
437
- "shape": [
438
- 512
439
- ],
440
- "dtype": "float16",
441
- "format": "f32-to-bf16",
442
- "nbytes": 1024,
443
- "byteOffset": 28562944
444
- },
445
- {
446
- "name": "model.layers.11.self_attn.qkv_proj.q_weight",
447
- "shape": [
448
- 52,
449
- 1536
450
- ],
451
- "dtype": "uint32",
452
- "format": "f32-to-bf16",
453
- "nbytes": 319488,
454
- "byteOffset": 28563968
455
- },
456
- {
457
- "name": "model.layers.11.self_attn.qkv_proj.q_scale",
458
- "shape": [
459
- 13,
460
- 1536
461
- ],
462
- "dtype": "float16",
463
- "format": "f32-to-bf16",
464
- "nbytes": 39936,
465
- "byteOffset": 28883456
466
- },
467
- {
468
- "name": "model.layers.11.self_attn.o_proj.q_weight",
469
- "shape": [
470
- 52,
471
- 512
472
- ],
473
- "dtype": "uint32",
474
- "format": "f32-to-bf16",
475
- "nbytes": 106496,
476
- "byteOffset": 28923392
477
- },
478
- {
479
- "name": "model.layers.11.self_attn.o_proj.q_scale",
480
- "shape": [
481
- 13,
482
- 512
483
- ],
484
- "dtype": "float16",
485
- "format": "f32-to-bf16",
486
- "nbytes": 13312,
487
- "byteOffset": 29029888
488
- },
489
- {
490
- "name": "model.layers.12.input_layernorm.weight",
491
- "shape": [
492
- 512
493
- ],
494
- "dtype": "float16",
495
- "format": "f32-to-bf16",
496
- "nbytes": 1024,
497
- "byteOffset": 29043200
498
- },
499
- {
500
- "name": "model.layers.12.mlp.down_proj.q_weight",
501
- "shape": [
502
- 128,
503
- 512
504
- ],
505
- "dtype": "uint32",
506
- "format": "f32-to-bf16",
507
- "nbytes": 262144,
508
- "byteOffset": 29044224
509
- },
510
- {
511
- "name": "model.layers.12.mlp.down_proj.q_scale",
512
- "shape": [
513
- 32,
514
- 512
515
- ],
516
- "dtype": "float16",
517
- "format": "f32-to-bf16",
518
- "nbytes": 32768,
519
- "byteOffset": 29306368
520
- },
521
- {
522
- "name": "model.layers.12.mlp.gate_up_proj.q_weight",
523
- "shape": [
524
- 52,
525
- 2560
526
- ],
527
- "dtype": "uint32",
528
- "format": "f32-to-bf16",
529
- "nbytes": 532480,
530
- "byteOffset": 29339136
531
- },
532
- {
533
- "name": "model.layers.12.mlp.gate_up_proj.q_scale",
534
- "shape": [
535
- 13,
536
- 2560
537
- ],
538
- "dtype": "float16",
539
- "format": "f32-to-bf16",
540
- "nbytes": 66560,
541
- "byteOffset": 29871616
542
- },
543
- {
544
- "name": "model.layers.12.post_attention_layernorm.weight",
545
- "shape": [
546
- 512
547
- ],
548
- "dtype": "float16",
549
- "format": "f32-to-bf16",
550
- "nbytes": 1024,
551
- "byteOffset": 29938176
552
- },
553
- {
554
- "name": "model.layers.12.self_attn.qkv_proj.q_weight",
555
- "shape": [
556
- 52,
557
- 1536
558
- ],
559
- "dtype": "uint32",
560
- "format": "f32-to-bf16",
561
- "nbytes": 319488,
562
- "byteOffset": 29939200
563
- },
564
- {
565
- "name": "model.layers.12.self_attn.qkv_proj.q_scale",
566
- "shape": [
567
- 13,
568
- 1536
569
- ],
570
- "dtype": "float16",
571
- "format": "f32-to-bf16",
572
- "nbytes": 39936,
573
- "byteOffset": 30258688
574
- },
575
- {
576
- "name": "model.layers.12.self_attn.o_proj.q_weight",
577
- "shape": [
578
- 52,
579
- 512
580
- ],
581
- "dtype": "uint32",
582
- "format": "f32-to-bf16",
583
- "nbytes": 106496,
584
- "byteOffset": 30298624
585
- },
586
- {
587
- "name": "model.layers.12.self_attn.o_proj.q_scale",
588
- "shape": [
589
- 13,
590
- 512
591
- ],
592
- "dtype": "float16",
593
- "format": "f32-to-bf16",
594
- "nbytes": 13312,
595
- "byteOffset": 30405120
596
- },
597
- {
598
- "name": "model.layers.13.input_layernorm.weight",
599
- "shape": [
600
- 512
601
- ],
602
- "dtype": "float16",
603
- "format": "f32-to-bf16",
604
- "nbytes": 1024,
605
- "byteOffset": 30418432
606
- },
607
- {
608
- "name": "model.layers.13.mlp.down_proj.q_weight",
609
- "shape": [
610
- 128,
611
- 512
612
- ],
613
- "dtype": "uint32",
614
- "format": "f32-to-bf16",
615
- "nbytes": 262144,
616
- "byteOffset": 30419456
617
- },
618
- {
619
- "name": "model.layers.13.mlp.down_proj.q_scale",
620
- "shape": [
621
- 32,
622
- 512
623
- ],
624
- "dtype": "float16",
625
- "format": "f32-to-bf16",
626
- "nbytes": 32768,
627
- "byteOffset": 30681600
628
- },
629
- {
630
- "name": "model.layers.13.mlp.gate_up_proj.q_weight",
631
- "shape": [
632
- 52,
633
- 2560
634
- ],
635
- "dtype": "uint32",
636
- "format": "f32-to-bf16",
637
- "nbytes": 532480,
638
- "byteOffset": 30714368
639
- },
640
- {
641
- "name": "model.layers.13.mlp.gate_up_proj.q_scale",
642
- "shape": [
643
- 13,
644
- 2560
645
- ],
646
- "dtype": "float16",
647
- "format": "f32-to-bf16",
648
- "nbytes": 66560,
649
- "byteOffset": 31246848
650
- },
651
- {
652
- "name": "model.layers.13.post_attention_layernorm.weight",
653
- "shape": [
654
- 512
655
- ],
656
- "dtype": "float16",
657
- "format": "f32-to-bf16",
658
- "nbytes": 1024,
659
- "byteOffset": 31313408
660
- },
661
- {
662
- "name": "model.layers.13.self_attn.qkv_proj.q_weight",
663
- "shape": [
664
- 52,
665
- 1536
666
- ],
667
- "dtype": "uint32",
668
- "format": "f32-to-bf16",
669
- "nbytes": 319488,
670
- "byteOffset": 31314432
671
- },
672
- {
673
- "name": "model.layers.13.self_attn.qkv_proj.q_scale",
674
- "shape": [
675
- 13,
676
- 1536
677
- ],
678
- "dtype": "float16",
679
- "format": "f32-to-bf16",
680
- "nbytes": 39936,
681
- "byteOffset": 31633920
682
- },
683
- {
684
- "name": "model.layers.13.self_attn.o_proj.q_weight",
685
- "shape": [
686
- 52,
687
- 512
688
- ],
689
- "dtype": "uint32",
690
- "format": "f32-to-bf16",
691
- "nbytes": 106496,
692
- "byteOffset": 31673856
693
- },
694
- {
695
- "name": "model.layers.13.self_attn.o_proj.q_scale",
696
- "shape": [
697
- 13,
698
- 512
699
- ],
700
- "dtype": "float16",
701
- "format": "f32-to-bf16",
702
- "nbytes": 13312,
703
- "byteOffset": 31780352
704
- },
705
- {
706
- "name": "model.layers.14.input_layernorm.weight",
707
- "shape": [
708
- 512
709
- ],
710
- "dtype": "float16",
711
- "format": "f32-to-bf16",
712
- "nbytes": 1024,
713
- "byteOffset": 31793664
714
- },
715
- {
716
- "name": "model.layers.14.mlp.down_proj.q_weight",
717
- "shape": [
718
- 128,
719
- 512
720
- ],
721
- "dtype": "uint32",
722
- "format": "f32-to-bf16",
723
- "nbytes": 262144,
724
- "byteOffset": 31794688
725
- },
726
- {
727
- "name": "model.layers.14.mlp.down_proj.q_scale",
728
- "shape": [
729
- 32,
730
- 512
731
- ],
732
- "dtype": "float16",
733
- "format": "f32-to-bf16",
734
- "nbytes": 32768,
735
- "byteOffset": 32056832
736
- },
737
- {
738
- "name": "model.layers.14.mlp.gate_up_proj.q_weight",
739
- "shape": [
740
- 52,
741
- 2560
742
- ],
743
- "dtype": "uint32",
744
- "format": "f32-to-bf16",
745
- "nbytes": 532480,
746
- "byteOffset": 32089600
747
- },
748
- {
749
- "name": "model.layers.14.mlp.gate_up_proj.q_scale",
750
- "shape": [
751
- 13,
752
- 2560
753
- ],
754
- "dtype": "float16",
755
- "format": "f32-to-bf16",
756
- "nbytes": 66560,
757
- "byteOffset": 32622080
758
- },
759
- {
760
- "name": "model.layers.14.post_attention_layernorm.weight",
761
- "shape": [
762
- 512
763
- ],
764
- "dtype": "float16",
765
- "format": "f32-to-bf16",
766
- "nbytes": 1024,
767
- "byteOffset": 32688640
768
- },
769
- {
770
- "name": "model.layers.14.self_attn.qkv_proj.q_weight",
771
- "shape": [
772
- 52,
773
- 1536
774
- ],
775
- "dtype": "uint32",
776
- "format": "f32-to-bf16",
777
- "nbytes": 319488,
778
- "byteOffset": 32689664
779
- },
780
- {
781
- "name": "model.layers.14.self_attn.qkv_proj.q_scale",
782
- "shape": [
783
- 13,
784
- 1536
785
- ],
786
- "dtype": "float16",
787
- "format": "f32-to-bf16",
788
- "nbytes": 39936,
789
- "byteOffset": 33009152
790
- },
791
- {
792
- "name": "model.layers.14.self_attn.o_proj.q_weight",
793
- "shape": [
794
- 52,
795
- 512
796
- ],
797
- "dtype": "uint32",
798
- "format": "f32-to-bf16",
799
- "nbytes": 106496,
800
- "byteOffset": 33049088
801
- },
802
- {
803
- "name": "model.layers.14.self_attn.o_proj.q_scale",
804
- "shape": [
805
- 13,
806
- 512
807
- ],
808
- "dtype": "float16",
809
- "format": "f32-to-bf16",
810
- "nbytes": 13312,
811
- "byteOffset": 33155584
812
- },
813
- {
814
- "name": "model.layers.15.input_layernorm.weight",
815
- "shape": [
816
- 512
817
- ],
818
- "dtype": "float16",
819
- "format": "f32-to-bf16",
820
- "nbytes": 1024,
821
- "byteOffset": 33168896
822
- },
823
- {
824
- "name": "model.layers.15.mlp.down_proj.q_weight",
825
- "shape": [
826
- 128,
827
- 512
828
- ],
829
- "dtype": "uint32",
830
- "format": "f32-to-bf16",
831
- "nbytes": 262144,
832
- "byteOffset": 33169920
833
- },
834
- {
835
- "name": "model.layers.15.mlp.down_proj.q_scale",
836
- "shape": [
837
- 32,
838
- 512
839
- ],
840
- "dtype": "float16",
841
- "format": "f32-to-bf16",
842
- "nbytes": 32768,
843
- "byteOffset": 33432064
844
- }
845
- ],
846
- "md5sum": "938eddcccf9773ab32a164863d26a907"
847
- },
848
- {
849
- "dataPath": "params_shard_1.bin",
850
- "format": "raw-shard",
851
- "nbytes": 12082176,
852
- "records": [
853
- {
854
- "name": "model.layers.15.mlp.gate_up_proj.q_weight",
855
- "shape": [
856
- 52,
857
- 2560
858
- ],
859
- "dtype": "uint32",
860
- "format": "f32-to-bf16",
861
- "nbytes": 532480,
862
- "byteOffset": 0
863
- },
864
- {
865
- "name": "model.layers.15.mlp.gate_up_proj.q_scale",
866
- "shape": [
867
- 13,
868
- 2560
869
- ],
870
- "dtype": "float16",
871
- "format": "f32-to-bf16",
872
- "nbytes": 66560,
873
- "byteOffset": 532480
874
- },
875
- {
876
- "name": "model.layers.15.post_attention_layernorm.weight",
877
- "shape": [
878
- 512
879
- ],
880
- "dtype": "float16",
881
- "format": "f32-to-bf16",
882
- "nbytes": 1024,
883
- "byteOffset": 599040
884
- },
885
- {
886
- "name": "model.layers.15.self_attn.qkv_proj.q_weight",
887
- "shape": [
888
- 52,
889
- 1536
890
- ],
891
- "dtype": "uint32",
892
- "format": "f32-to-bf16",
893
- "nbytes": 319488,
894
- "byteOffset": 600064
895
- },
896
- {
897
- "name": "model.layers.15.self_attn.qkv_proj.q_scale",
898
- "shape": [
899
- 13,
900
- 1536
901
- ],
902
- "dtype": "float16",
903
- "format": "f32-to-bf16",
904
- "nbytes": 39936,
905
- "byteOffset": 919552
906
- },
907
- {
908
- "name": "model.layers.15.self_attn.o_proj.q_weight",
909
- "shape": [
910
- 52,
911
- 512
912
- ],
913
- "dtype": "uint32",
914
- "format": "f32-to-bf16",
915
- "nbytes": 106496,
916
- "byteOffset": 959488
917
- },
918
- {
919
- "name": "model.layers.15.self_attn.o_proj.q_scale",
920
- "shape": [
921
- 13,
922
- 512
923
- ],
924
- "dtype": "float16",
925
- "format": "f32-to-bf16",
926
- "nbytes": 13312,
927
- "byteOffset": 1065984
928
- },
929
- {
930
- "name": "model.layers.2.input_layernorm.weight",
931
- "shape": [
932
- 512
933
- ],
934
- "dtype": "float16",
935
- "format": "f32-to-bf16",
936
- "nbytes": 1024,
937
- "byteOffset": 1079296
938
- },
939
- {
940
- "name": "model.layers.2.mlp.down_proj.q_weight",
941
- "shape": [
942
- 128,
943
- 512
944
- ],
945
- "dtype": "uint32",
946
- "format": "f32-to-bf16",
947
- "nbytes": 262144,
948
- "byteOffset": 1080320
949
- },
950
- {
951
- "name": "model.layers.2.mlp.down_proj.q_scale",
952
- "shape": [
953
- 32,
954
- 512
955
- ],
956
- "dtype": "float16",
957
- "format": "f32-to-bf16",
958
- "nbytes": 32768,
959
- "byteOffset": 1342464
960
- },
961
- {
962
- "name": "model.layers.2.mlp.gate_up_proj.q_weight",
963
- "shape": [
964
- 52,
965
- 2560
966
- ],
967
- "dtype": "uint32",
968
- "format": "f32-to-bf16",
969
- "nbytes": 532480,
970
- "byteOffset": 1375232
971
- },
972
- {
973
- "name": "model.layers.2.mlp.gate_up_proj.q_scale",
974
- "shape": [
975
- 13,
976
- 2560
977
- ],
978
- "dtype": "float16",
979
- "format": "f32-to-bf16",
980
- "nbytes": 66560,
981
- "byteOffset": 1907712
982
- },
983
- {
984
- "name": "model.layers.2.post_attention_layernorm.weight",
985
- "shape": [
986
- 512
987
- ],
988
- "dtype": "float16",
989
- "format": "f32-to-bf16",
990
- "nbytes": 1024,
991
- "byteOffset": 1974272
992
- },
993
- {
994
- "name": "model.layers.2.self_attn.qkv_proj.q_weight",
995
- "shape": [
996
- 52,
997
- 1536
998
- ],
999
- "dtype": "uint32",
1000
- "format": "f32-to-bf16",
1001
- "nbytes": 319488,
1002
- "byteOffset": 1975296
1003
- },
1004
- {
1005
- "name": "model.layers.2.self_attn.qkv_proj.q_scale",
1006
- "shape": [
1007
- 13,
1008
- 1536
1009
- ],
1010
- "dtype": "float16",
1011
- "format": "f32-to-bf16",
1012
- "nbytes": 39936,
1013
- "byteOffset": 2294784
1014
- },
1015
- {
1016
- "name": "model.layers.2.self_attn.o_proj.q_weight",
1017
- "shape": [
1018
- 52,
1019
- 512
1020
- ],
1021
- "dtype": "uint32",
1022
- "format": "f32-to-bf16",
1023
- "nbytes": 106496,
1024
- "byteOffset": 2334720
1025
- },
1026
- {
1027
- "name": "model.layers.2.self_attn.o_proj.q_scale",
1028
- "shape": [
1029
- 13,
1030
- 512
1031
- ],
1032
- "dtype": "float16",
1033
- "format": "f32-to-bf16",
1034
- "nbytes": 13312,
1035
- "byteOffset": 2441216
1036
- },
1037
- {
1038
- "name": "model.layers.3.input_layernorm.weight",
1039
- "shape": [
1040
- 512
1041
- ],
1042
- "dtype": "float16",
1043
- "format": "f32-to-bf16",
1044
- "nbytes": 1024,
1045
- "byteOffset": 2454528
1046
- },
1047
- {
1048
- "name": "model.layers.3.mlp.down_proj.q_weight",
1049
- "shape": [
1050
- 128,
1051
- 512
1052
- ],
1053
- "dtype": "uint32",
1054
- "format": "f32-to-bf16",
1055
- "nbytes": 262144,
1056
- "byteOffset": 2455552
1057
- },
1058
- {
1059
- "name": "model.layers.3.mlp.down_proj.q_scale",
1060
- "shape": [
1061
- 32,
1062
- 512
1063
- ],
1064
- "dtype": "float16",
1065
- "format": "f32-to-bf16",
1066
- "nbytes": 32768,
1067
- "byteOffset": 2717696
1068
- },
1069
- {
1070
- "name": "model.layers.3.mlp.gate_up_proj.q_weight",
1071
- "shape": [
1072
- 52,
1073
- 2560
1074
- ],
1075
- "dtype": "uint32",
1076
- "format": "f32-to-bf16",
1077
- "nbytes": 532480,
1078
- "byteOffset": 2750464
1079
- },
1080
- {
1081
- "name": "model.layers.3.mlp.gate_up_proj.q_scale",
1082
- "shape": [
1083
- 13,
1084
- 2560
1085
- ],
1086
- "dtype": "float16",
1087
- "format": "f32-to-bf16",
1088
- "nbytes": 66560,
1089
- "byteOffset": 3282944
1090
- },
1091
- {
1092
- "name": "model.layers.3.post_attention_layernorm.weight",
1093
- "shape": [
1094
- 512
1095
- ],
1096
- "dtype": "float16",
1097
- "format": "f32-to-bf16",
1098
- "nbytes": 1024,
1099
- "byteOffset": 3349504
1100
- },
1101
- {
1102
- "name": "model.layers.3.self_attn.qkv_proj.q_weight",
1103
- "shape": [
1104
- 52,
1105
- 1536
1106
- ],
1107
- "dtype": "uint32",
1108
- "format": "f32-to-bf16",
1109
- "nbytes": 319488,
1110
- "byteOffset": 3350528
1111
- },
1112
- {
1113
- "name": "model.layers.3.self_attn.qkv_proj.q_scale",
1114
- "shape": [
1115
- 13,
1116
- 1536
1117
- ],
1118
- "dtype": "float16",
1119
- "format": "f32-to-bf16",
1120
- "nbytes": 39936,
1121
- "byteOffset": 3670016
1122
- },
1123
- {
1124
- "name": "model.layers.3.self_attn.o_proj.q_weight",
1125
- "shape": [
1126
- 52,
1127
- 512
1128
- ],
1129
- "dtype": "uint32",
1130
- "format": "f32-to-bf16",
1131
- "nbytes": 106496,
1132
- "byteOffset": 3709952
1133
- },
1134
- {
1135
- "name": "model.layers.3.self_attn.o_proj.q_scale",
1136
- "shape": [
1137
- 13,
1138
- 512
1139
- ],
1140
- "dtype": "float16",
1141
- "format": "f32-to-bf16",
1142
- "nbytes": 13312,
1143
- "byteOffset": 3816448
1144
- },
1145
- {
1146
- "name": "model.layers.4.input_layernorm.weight",
1147
- "shape": [
1148
- 512
1149
- ],
1150
- "dtype": "float16",
1151
- "format": "f32-to-bf16",
1152
- "nbytes": 1024,
1153
- "byteOffset": 3829760
1154
- },
1155
- {
1156
- "name": "model.layers.4.mlp.down_proj.q_weight",
1157
- "shape": [
1158
- 128,
1159
- 512
1160
- ],
1161
- "dtype": "uint32",
1162
- "format": "f32-to-bf16",
1163
- "nbytes": 262144,
1164
- "byteOffset": 3830784
1165
- },
1166
- {
1167
- "name": "model.layers.4.mlp.down_proj.q_scale",
1168
- "shape": [
1169
- 32,
1170
- 512
1171
- ],
1172
- "dtype": "float16",
1173
- "format": "f32-to-bf16",
1174
- "nbytes": 32768,
1175
- "byteOffset": 4092928
1176
- },
1177
- {
1178
- "name": "model.layers.4.mlp.gate_up_proj.q_weight",
1179
- "shape": [
1180
- 52,
1181
- 2560
1182
- ],
1183
- "dtype": "uint32",
1184
- "format": "f32-to-bf16",
1185
- "nbytes": 532480,
1186
- "byteOffset": 4125696
1187
- },
1188
- {
1189
- "name": "model.layers.4.mlp.gate_up_proj.q_scale",
1190
- "shape": [
1191
- 13,
1192
- 2560
1193
- ],
1194
- "dtype": "float16",
1195
- "format": "f32-to-bf16",
1196
- "nbytes": 66560,
1197
- "byteOffset": 4658176
1198
- },
1199
- {
1200
- "name": "model.layers.4.post_attention_layernorm.weight",
1201
- "shape": [
1202
- 512
1203
- ],
1204
- "dtype": "float16",
1205
- "format": "f32-to-bf16",
1206
- "nbytes": 1024,
1207
- "byteOffset": 4724736
1208
- },
1209
- {
1210
- "name": "model.layers.4.self_attn.qkv_proj.q_weight",
1211
- "shape": [
1212
- 52,
1213
- 1536
1214
- ],
1215
- "dtype": "uint32",
1216
- "format": "f32-to-bf16",
1217
- "nbytes": 319488,
1218
- "byteOffset": 4725760
1219
- },
1220
- {
1221
- "name": "model.layers.4.self_attn.qkv_proj.q_scale",
1222
- "shape": [
1223
- 13,
1224
- 1536
1225
- ],
1226
- "dtype": "float16",
1227
- "format": "f32-to-bf16",
1228
- "nbytes": 39936,
1229
- "byteOffset": 5045248
1230
- },
1231
- {
1232
- "name": "model.layers.4.self_attn.o_proj.q_weight",
1233
- "shape": [
1234
- 52,
1235
- 512
1236
- ],
1237
- "dtype": "uint32",
1238
- "format": "f32-to-bf16",
1239
- "nbytes": 106496,
1240
- "byteOffset": 5085184
1241
- },
1242
- {
1243
- "name": "model.layers.4.self_attn.o_proj.q_scale",
1244
- "shape": [
1245
- 13,
1246
- 512
1247
- ],
1248
- "dtype": "float16",
1249
- "format": "f32-to-bf16",
1250
- "nbytes": 13312,
1251
- "byteOffset": 5191680
1252
- },
1253
- {
1254
- "name": "model.layers.5.input_layernorm.weight",
1255
- "shape": [
1256
- 512
1257
- ],
1258
- "dtype": "float16",
1259
- "format": "f32-to-bf16",
1260
- "nbytes": 1024,
1261
- "byteOffset": 5204992
1262
- },
1263
- {
1264
- "name": "model.layers.5.mlp.down_proj.q_weight",
1265
- "shape": [
1266
- 128,
1267
- 512
1268
- ],
1269
- "dtype": "uint32",
1270
- "format": "f32-to-bf16",
1271
- "nbytes": 262144,
1272
- "byteOffset": 5206016
1273
- },
1274
- {
1275
- "name": "model.layers.5.mlp.down_proj.q_scale",
1276
- "shape": [
1277
- 32,
1278
- 512
1279
- ],
1280
- "dtype": "float16",
1281
- "format": "f32-to-bf16",
1282
- "nbytes": 32768,
1283
- "byteOffset": 5468160
1284
- },
1285
- {
1286
- "name": "model.layers.5.mlp.gate_up_proj.q_weight",
1287
- "shape": [
1288
- 52,
1289
- 2560
1290
- ],
1291
- "dtype": "uint32",
1292
- "format": "f32-to-bf16",
1293
- "nbytes": 532480,
1294
- "byteOffset": 5500928
1295
- },
1296
- {
1297
- "name": "model.layers.5.mlp.gate_up_proj.q_scale",
1298
- "shape": [
1299
- 13,
1300
- 2560
1301
- ],
1302
- "dtype": "float16",
1303
- "format": "f32-to-bf16",
1304
- "nbytes": 66560,
1305
- "byteOffset": 6033408
1306
- },
1307
- {
1308
- "name": "model.layers.5.post_attention_layernorm.weight",
1309
- "shape": [
1310
- 512
1311
- ],
1312
- "dtype": "float16",
1313
- "format": "f32-to-bf16",
1314
- "nbytes": 1024,
1315
- "byteOffset": 6099968
1316
- },
1317
- {
1318
- "name": "model.layers.5.self_attn.qkv_proj.q_weight",
1319
- "shape": [
1320
- 52,
1321
- 1536
1322
- ],
1323
- "dtype": "uint32",
1324
- "format": "f32-to-bf16",
1325
- "nbytes": 319488,
1326
- "byteOffset": 6100992
1327
- },
1328
- {
1329
- "name": "model.layers.5.self_attn.qkv_proj.q_scale",
1330
- "shape": [
1331
- 13,
1332
- 1536
1333
- ],
1334
- "dtype": "float16",
1335
- "format": "f32-to-bf16",
1336
- "nbytes": 39936,
1337
- "byteOffset": 6420480
1338
- },
1339
- {
1340
- "name": "model.layers.5.self_attn.o_proj.q_weight",
1341
- "shape": [
1342
- 52,
1343
- 512
1344
- ],
1345
- "dtype": "uint32",
1346
- "format": "f32-to-bf16",
1347
- "nbytes": 106496,
1348
- "byteOffset": 6460416
1349
- },
1350
- {
1351
- "name": "model.layers.5.self_attn.o_proj.q_scale",
1352
- "shape": [
1353
- 13,
1354
- 512
1355
- ],
1356
- "dtype": "float16",
1357
- "format": "f32-to-bf16",
1358
- "nbytes": 13312,
1359
- "byteOffset": 6566912
1360
- },
1361
- {
1362
- "name": "model.layers.6.input_layernorm.weight",
1363
- "shape": [
1364
- 512
1365
- ],
1366
- "dtype": "float16",
1367
- "format": "f32-to-bf16",
1368
- "nbytes": 1024,
1369
- "byteOffset": 6580224
1370
- },
1371
- {
1372
- "name": "model.layers.6.mlp.down_proj.q_weight",
1373
- "shape": [
1374
- 128,
1375
- 512
1376
- ],
1377
- "dtype": "uint32",
1378
- "format": "f32-to-bf16",
1379
- "nbytes": 262144,
1380
- "byteOffset": 6581248
1381
- },
1382
- {
1383
- "name": "model.layers.6.mlp.down_proj.q_scale",
1384
- "shape": [
1385
- 32,
1386
- 512
1387
- ],
1388
- "dtype": "float16",
1389
- "format": "f32-to-bf16",
1390
- "nbytes": 32768,
1391
- "byteOffset": 6843392
1392
- },
1393
- {
1394
- "name": "model.layers.6.mlp.gate_up_proj.q_weight",
1395
- "shape": [
1396
- 52,
1397
- 2560
1398
- ],
1399
- "dtype": "uint32",
1400
- "format": "f32-to-bf16",
1401
- "nbytes": 532480,
1402
- "byteOffset": 6876160
1403
- },
1404
- {
1405
- "name": "model.layers.6.mlp.gate_up_proj.q_scale",
1406
- "shape": [
1407
- 13,
1408
- 2560
1409
- ],
1410
- "dtype": "float16",
1411
- "format": "f32-to-bf16",
1412
- "nbytes": 66560,
1413
- "byteOffset": 7408640
1414
- },
1415
- {
1416
- "name": "model.layers.6.post_attention_layernorm.weight",
1417
- "shape": [
1418
- 512
1419
- ],
1420
- "dtype": "float16",
1421
- "format": "f32-to-bf16",
1422
- "nbytes": 1024,
1423
- "byteOffset": 7475200
1424
- },
1425
- {
1426
- "name": "model.layers.6.self_attn.qkv_proj.q_weight",
1427
- "shape": [
1428
- 52,
1429
- 1536
1430
- ],
1431
- "dtype": "uint32",
1432
- "format": "f32-to-bf16",
1433
- "nbytes": 319488,
1434
- "byteOffset": 7476224
1435
- },
1436
- {
1437
- "name": "model.layers.6.self_attn.qkv_proj.q_scale",
1438
- "shape": [
1439
- 13,
1440
- 1536
1441
- ],
1442
- "dtype": "float16",
1443
- "format": "f32-to-bf16",
1444
- "nbytes": 39936,
1445
- "byteOffset": 7795712
1446
- },
1447
- {
1448
- "name": "model.layers.6.self_attn.o_proj.q_weight",
1449
- "shape": [
1450
- 52,
1451
- 512
1452
- ],
1453
- "dtype": "uint32",
1454
- "format": "f32-to-bf16",
1455
- "nbytes": 106496,
1456
- "byteOffset": 7835648
1457
- },
1458
- {
1459
- "name": "model.layers.6.self_attn.o_proj.q_scale",
1460
- "shape": [
1461
- 13,
1462
- 512
1463
- ],
1464
- "dtype": "float16",
1465
- "format": "f32-to-bf16",
1466
- "nbytes": 13312,
1467
- "byteOffset": 7942144
1468
- },
1469
- {
1470
- "name": "model.layers.7.input_layernorm.weight",
1471
- "shape": [
1472
- 512
1473
- ],
1474
- "dtype": "float16",
1475
- "format": "f32-to-bf16",
1476
- "nbytes": 1024,
1477
- "byteOffset": 7955456
1478
- },
1479
- {
1480
- "name": "model.layers.7.mlp.down_proj.q_weight",
1481
- "shape": [
1482
- 128,
1483
- 512
1484
- ],
1485
- "dtype": "uint32",
1486
- "format": "f32-to-bf16",
1487
- "nbytes": 262144,
1488
- "byteOffset": 7956480
1489
- },
1490
- {
1491
- "name": "model.layers.7.mlp.down_proj.q_scale",
1492
- "shape": [
1493
- 32,
1494
- 512
1495
- ],
1496
- "dtype": "float16",
1497
- "format": "f32-to-bf16",
1498
- "nbytes": 32768,
1499
- "byteOffset": 8218624
1500
- },
1501
- {
1502
- "name": "model.layers.7.mlp.gate_up_proj.q_weight",
1503
- "shape": [
1504
- 52,
1505
- 2560
1506
- ],
1507
- "dtype": "uint32",
1508
- "format": "f32-to-bf16",
1509
- "nbytes": 532480,
1510
- "byteOffset": 8251392
1511
- },
1512
- {
1513
- "name": "model.layers.7.mlp.gate_up_proj.q_scale",
1514
- "shape": [
1515
- 13,
1516
- 2560
1517
- ],
1518
- "dtype": "float16",
1519
- "format": "f32-to-bf16",
1520
- "nbytes": 66560,
1521
- "byteOffset": 8783872
1522
- },
1523
- {
1524
- "name": "model.layers.7.post_attention_layernorm.weight",
1525
- "shape": [
1526
- 512
1527
- ],
1528
- "dtype": "float16",
1529
- "format": "f32-to-bf16",
1530
- "nbytes": 1024,
1531
- "byteOffset": 8850432
1532
- },
1533
- {
1534
- "name": "model.layers.7.self_attn.qkv_proj.q_weight",
1535
- "shape": [
1536
- 52,
1537
- 1536
1538
- ],
1539
- "dtype": "uint32",
1540
- "format": "f32-to-bf16",
1541
- "nbytes": 319488,
1542
- "byteOffset": 8851456
1543
- },
1544
- {
1545
- "name": "model.layers.7.self_attn.qkv_proj.q_scale",
1546
- "shape": [
1547
- 13,
1548
- 1536
1549
- ],
1550
- "dtype": "float16",
1551
- "format": "f32-to-bf16",
1552
- "nbytes": 39936,
1553
- "byteOffset": 9170944
1554
- },
1555
- {
1556
- "name": "model.layers.7.self_attn.o_proj.q_weight",
1557
- "shape": [
1558
- 52,
1559
- 512
1560
- ],
1561
- "dtype": "uint32",
1562
- "format": "f32-to-bf16",
1563
- "nbytes": 106496,
1564
- "byteOffset": 9210880
1565
- },
1566
- {
1567
- "name": "model.layers.7.self_attn.o_proj.q_scale",
1568
- "shape": [
1569
- 13,
1570
- 512
1571
- ],
1572
- "dtype": "float16",
1573
- "format": "f32-to-bf16",
1574
- "nbytes": 13312,
1575
- "byteOffset": 9317376
1576
- },
1577
- {
1578
- "name": "model.layers.8.input_layernorm.weight",
1579
- "shape": [
1580
- 512
1581
- ],
1582
- "dtype": "float16",
1583
- "format": "f32-to-bf16",
1584
- "nbytes": 1024,
1585
- "byteOffset": 9330688
1586
- },
1587
- {
1588
- "name": "model.layers.8.mlp.down_proj.q_weight",
1589
- "shape": [
1590
- 128,
1591
- 512
1592
- ],
1593
- "dtype": "uint32",
1594
- "format": "f32-to-bf16",
1595
- "nbytes": 262144,
1596
- "byteOffset": 9331712
1597
- },
1598
- {
1599
- "name": "model.layers.8.mlp.down_proj.q_scale",
1600
- "shape": [
1601
- 32,
1602
- 512
1603
- ],
1604
- "dtype": "float16",
1605
- "format": "f32-to-bf16",
1606
- "nbytes": 32768,
1607
- "byteOffset": 9593856
1608
- },
1609
- {
1610
- "name": "model.layers.8.mlp.gate_up_proj.q_weight",
1611
- "shape": [
1612
- 52,
1613
- 2560
1614
- ],
1615
- "dtype": "uint32",
1616
- "format": "f32-to-bf16",
1617
- "nbytes": 532480,
1618
- "byteOffset": 9626624
1619
- },
1620
- {
1621
- "name": "model.layers.8.mlp.gate_up_proj.q_scale",
1622
- "shape": [
1623
- 13,
1624
- 2560
1625
- ],
1626
- "dtype": "float16",
1627
- "format": "f32-to-bf16",
1628
- "nbytes": 66560,
1629
- "byteOffset": 10159104
1630
- },
1631
- {
1632
- "name": "model.layers.8.post_attention_layernorm.weight",
1633
- "shape": [
1634
- 512
1635
- ],
1636
- "dtype": "float16",
1637
- "format": "f32-to-bf16",
1638
- "nbytes": 1024,
1639
- "byteOffset": 10225664
1640
- },
1641
- {
1642
- "name": "model.layers.8.self_attn.qkv_proj.q_weight",
1643
- "shape": [
1644
- 52,
1645
- 1536
1646
- ],
1647
- "dtype": "uint32",
1648
- "format": "f32-to-bf16",
1649
- "nbytes": 319488,
1650
- "byteOffset": 10226688
1651
- },
1652
- {
1653
- "name": "model.layers.8.self_attn.qkv_proj.q_scale",
1654
- "shape": [
1655
- 13,
1656
- 1536
1657
- ],
1658
- "dtype": "float16",
1659
- "format": "f32-to-bf16",
1660
- "nbytes": 39936,
1661
- "byteOffset": 10546176
1662
- },
1663
- {
1664
- "name": "model.layers.8.self_attn.o_proj.q_weight",
1665
- "shape": [
1666
- 52,
1667
- 512
1668
- ],
1669
- "dtype": "uint32",
1670
- "format": "f32-to-bf16",
1671
- "nbytes": 106496,
1672
- "byteOffset": 10586112
1673
- },
1674
- {
1675
- "name": "model.layers.8.self_attn.o_proj.q_scale",
1676
- "shape": [
1677
- 13,
1678
- 512
1679
- ],
1680
- "dtype": "float16",
1681
- "format": "f32-to-bf16",
1682
- "nbytes": 13312,
1683
- "byteOffset": 10692608
1684
- },
1685
- {
1686
- "name": "model.layers.9.input_layernorm.weight",
1687
- "shape": [
1688
- 512
1689
- ],
1690
- "dtype": "float16",
1691
- "format": "f32-to-bf16",
1692
- "nbytes": 1024,
1693
- "byteOffset": 10705920
1694
- },
1695
- {
1696
- "name": "model.layers.9.mlp.down_proj.q_weight",
1697
- "shape": [
1698
- 128,
1699
- 512
1700
- ],
1701
- "dtype": "uint32",
1702
- "format": "f32-to-bf16",
1703
- "nbytes": 262144,
1704
- "byteOffset": 10706944
1705
- },
1706
- {
1707
- "name": "model.layers.9.mlp.down_proj.q_scale",
1708
- "shape": [
1709
- 32,
1710
- 512
1711
- ],
1712
- "dtype": "float16",
1713
- "format": "f32-to-bf16",
1714
- "nbytes": 32768,
1715
- "byteOffset": 10969088
1716
- },
1717
- {
1718
- "name": "model.layers.9.mlp.gate_up_proj.q_weight",
1719
- "shape": [
1720
- 52,
1721
- 2560
1722
- ],
1723
- "dtype": "uint32",
1724
- "format": "f32-to-bf16",
1725
- "nbytes": 532480,
1726
- "byteOffset": 11001856
1727
- },
1728
- {
1729
- "name": "model.layers.9.mlp.gate_up_proj.q_scale",
1730
- "shape": [
1731
- 13,
1732
- 2560
1733
- ],
1734
- "dtype": "float16",
1735
- "format": "f32-to-bf16",
1736
- "nbytes": 66560,
1737
- "byteOffset": 11534336
1738
- },
1739
- {
1740
- "name": "model.layers.9.post_attention_layernorm.weight",
1741
- "shape": [
1742
- 512
1743
- ],
1744
- "dtype": "float16",
1745
- "format": "f32-to-bf16",
1746
- "nbytes": 1024,
1747
- "byteOffset": 11600896
1748
- },
1749
- {
1750
- "name": "model.layers.9.self_attn.qkv_proj.q_weight",
1751
- "shape": [
1752
- 52,
1753
- 1536
1754
- ],
1755
- "dtype": "uint32",
1756
- "format": "f32-to-bf16",
1757
- "nbytes": 319488,
1758
- "byteOffset": 11601920
1759
- },
1760
- {
1761
- "name": "model.layers.9.self_attn.qkv_proj.q_scale",
1762
- "shape": [
1763
- 13,
1764
- 1536
1765
- ],
1766
- "dtype": "float16",
1767
- "format": "f32-to-bf16",
1768
- "nbytes": 39936,
1769
- "byteOffset": 11921408
1770
- },
1771
- {
1772
- "name": "model.layers.9.self_attn.o_proj.q_weight",
1773
- "shape": [
1774
- 52,
1775
- 512
1776
- ],
1777
- "dtype": "uint32",
1778
- "format": "f32-to-bf16",
1779
- "nbytes": 106496,
1780
- "byteOffset": 11961344
1781
- },
1782
- {
1783
- "name": "model.layers.9.self_attn.o_proj.q_scale",
1784
- "shape": [
1785
- 13,
1786
- 512
1787
- ],
1788
- "dtype": "float16",
1789
- "format": "f32-to-bf16",
1790
- "nbytes": 13312,
1791
- "byteOffset": 12067840
1792
- },
1793
- {
1794
- "name": "model.norm.weight",
1795
- "shape": [
1796
- 512
1797
- ],
1798
- "dtype": "float16",
1799
- "format": "f32-to-bf16",
1800
- "nbytes": 1024,
1801
- "byteOffset": 12081152
1802
- }
1803
- ],
1804
- "md5sum": "0a36288a5e64434e1a495fc10ceb53db"
1805
- }
1806
- ]
1807
- }