riczhou commited on
Commit
3b0ced1
1 Parent(s): f448c13

Initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
logs.txt ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/198 [00:00<?, ?it/s]
1
 
 
2
  0%| | 0/198 [00:00<?, ?it/s]
3
 
 
4
  0%| | 0/198 [00:02<?, ?it/s]
5
  1%| | 1/198 [00:03<12:17, 3.74s/it]
6
 
 
7
  1%| | 1/198 [00:03<12:17, 3.74s/it]
8
 
 
9
  1%| | 1/198 [00:03<12:17, 3.74s/it]
10
 
 
11
  1%| | 1/198 [00:03<12:17, 3.74s/it]
12
  2%|▏ | 4/198 [00:04<02:35, 1.25it/s]
13
 
 
14
  2%|▏ | 4/198 [00:04<02:35, 1.25it/s]
15
 
 
16
  2%|▏ | 4/198 [00:04<02:35, 1.25it/s]
17
 
 
18
  2%|▏ | 4/198 [00:04<02:35, 1.25it/s]
19
 
 
20
  2%|▏ | 4/198 [00:04<02:35, 1.25it/s]
21
 
 
22
  2%|▏ | 4/198 [00:04<02:35, 1.25it/s]
23
 
 
24
  2%|▏ | 4/198 [00:04<02:35, 1.25it/s]
25
  5%|▌ | 10/198 [00:04<00:49, 3.78it/s]
26
 
 
27
  5%|▌ | 10/198 [00:04<00:49, 3.78it/s]
28
 
 
29
  5%|▌ | 10/198 [00:04<00:49, 3.78it/s]
30
  6%|▌ | 12/198 [00:04<00:42, 4.36it/s]
31
 
 
32
  6%|▌ | 12/198 [00:04<00:42, 4.36it/s]
33
 
 
34
  6%|▌ | 12/198 [00:04<00:42, 4.36it/s]
35
 
 
36
  6%|▌ | 12/198 [00:04<00:42, 4.36it/s]
37
 
 
38
  6%|▌ | 12/198 [00:04<00:42, 4.36it/s]
39
 
 
40
  6%|▌ | 12/198 [00:04<00:42, 4.36it/s]
41
  9%|▊ | 17/198 [00:04<00:24, 7.42it/s]
42
 
 
43
  9%|▊ | 17/198 [00:04<00:24, 7.42it/s]
44
 
 
45
  9%|▊ | 17/198 [00:04<00:24, 7.42it/s]
46
 
 
47
  9%|▊ | 17/198 [00:04<00:24, 7.42it/s]
48
  10%|█ | 20/198 [00:04<00:21, 8.33it/s]
49
 
 
50
  10%|█ | 20/198 [00:04<00:21, 8.33it/s]
51
 
 
52
  10%|█ | 20/198 [00:04<00:21, 8.33it/s]
53
 
 
54
  10%|█ | 20/198 [00:04<00:21, 8.33it/s]
55
 
 
56
  10%|█ | 20/198 [00:05<00:21, 8.33it/s]
57
  12%|█▏ | 24/198 [00:05<00:15, 11.06it/s]
58
 
 
59
  12%|█▏ | 24/198 [00:05<00:15, 11.06it/s]
60
 
 
61
  12%|█▏ | 24/198 [00:05<00:15, 11.06it/s]
62
 
 
63
  12%|█▏ | 24/198 [00:05<00:15, 11.06it/s]
64
  14%|█▎ | 27/198 [00:05<00:15, 11.30it/s]
65
 
 
66
  14%|█▎ | 27/198 [00:05<00:15, 11.30it/s]
67
 
 
68
  14%|█▎ | 27/198 [00:05<00:15, 11.30it/s]
69
 
 
70
  14%|█▎ | 27/198 [00:05<00:15, 11.30it/s]
71
 
 
72
  14%|█▎ | 27/198 [00:05<00:15, 11.30it/s]
73
  16%|█▌ | 31/198 [00:05<00:12, 13.34it/s]
74
 
 
75
  16%|█▌ | 31/198 [00:05<00:12, 13.34it/s]
76
 
 
77
  16%|█▌ | 31/198 [00:05<00:12, 13.34it/s]
78
  17%|█▋ | 33/198 [00:05<00:13, 11.79it/s]
79
 
 
80
  17%|█▋ | 33/198 [00:05<00:13, 11.79it/s]
81
 
 
82
  17%|█▋ | 33/198 [00:05<00:13, 11.79it/s]
83
 
 
84
  17%|█▋ | 33/198 [00:05<00:13, 11.79it/s]
85
 
 
86
  17%|█▋ | 33/198 [00:05<00:13, 11.79it/s]
87
 
 
88
  17%|█▋ | 33/198 [00:05<00:13, 11.79it/s]
89
  19%|█▉ | 38/198 [00:05<00:09, 16.36it/s]
90
 
 
91
  19%|█▉ | 38/198 [00:06<00:09, 16.36it/s]
92
 
 
93
  19%|█▉ | 38/198 [00:06<00:09, 16.36it/s]
94
 
 
95
  19%|█▉ | 38/198 [00:06<00:09, 16.36it/s]
96
  21%|██ | 41/198 [00:06<00:10, 14.94it/s]
97
 
 
98
  21%|██ | 41/198 [00:06<00:10, 14.94it/s]
99
 
 
100
  21%|██ | 41/198 [00:06<00:10, 14.94it/s]
101
 
 
102
  21%|██ | 41/198 [00:06<00:10, 14.94it/s]
103
 
 
104
  21%|██ | 41/198 [00:06<00:10, 14.94it/s]
105
  23%|██▎ | 45/198 [00:06<00:08, 17.52it/s]
106
 
 
107
  23%|██▎ | 45/198 [00:06<00:08, 17.52it/s]
108
 
 
109
  23%|██▎ | 45/198 [00:06<00:08, 17.52it/s]
110
 
 
111
  23%|██▎ | 45/198 [00:06<00:08, 17.52it/s]
112
  24%|██▍ | 48/198 [00:06<00:09, 15.28it/s]
113
 
 
114
  24%|██▍ | 48/198 [00:06<00:09, 15.28it/s]
115
 
 
116
  24%|██▍ | 48/198 [00:06<00:09, 15.28it/s]
117
 
 
118
  24%|██▍ | 48/198 [00:06<00:09, 15.28it/s]
119
 
 
120
  24%|██▍ | 48/198 [00:06<00:09, 15.28it/s]
121
  26%|██▋ | 52/198 [00:06<00:08, 16.47it/s]
122
 
 
123
  26%|██▋ | 52/198 [00:06<00:08, 16.47it/s]
124
 
 
125
  26%|██▋ | 52/198 [00:07<00:08, 16.47it/s]
126
  27%|██▋ | 54/198 [00:07<00:10, 13.51it/s]
127
 
 
128
  27%|██▋ | 54/198 [00:07<00:10, 13.51it/s]
129
 
 
130
  27%|██▋ | 54/198 [00:07<00:10, 13.51it/s]
131
 
 
132
  27%|██▋ | 54/198 [00:07<00:10, 13.51it/s]
133
 
 
134
  27%|██▋ | 54/198 [00:07<00:10, 13.51it/s]
135
 
 
136
  27%|██▋ | 54/198 [00:07<00:10, 13.51it/s]
137
  30%|██▉ | 59/198 [00:07<00:07, 18.05it/s]
138
 
 
139
  30%|██▉ | 59/198 [00:07<00:07, 18.05it/s]
140
 
 
141
  30%|██▉ | 59/198 [00:07<00:07, 18.05it/s]
142
 
 
143
  30%|██▉ | 59/198 [00:07<00:07, 18.05it/s]
144
  31%|███▏ | 62/198 [00:07<00:08, 15.72it/s]
145
 
 
146
  31%|███▏ | 62/198 [00:07<00:08, 15.72it/s]
147
 
 
148
  31%|███▏ | 62/198 [00:07<00:08, 15.72it/s]
149
 
 
150
  31%|███▏ | 62/198 [00:07<00:08, 15.72it/s]
151
 
 
152
  31%|███▏ | 62/198 [00:07<00:08, 15.72it/s]
153
  33%|███▎ | 66/198 [00:07<00:07, 18.21it/s]
154
 
 
155
  33%|███▎ | 66/198 [00:07<00:07, 18.21it/s]
156
 
 
157
  33%|███▎ | 66/198 [00:07<00:07, 18.21it/s]
158
 
 
159
  33%|███▎ | 66/198 [00:07<00:07, 18.21it/s]
160
  35%|███▍ | 69/198 [00:07<00:08, 15.96it/s]
161
 
 
162
  35%|███▍ | 69/198 [00:07<00:08, 15.96it/s]
163
 
 
164
  35%|███▍ | 69/198 [00:07<00:08, 15.96it/s]
165
 
 
166
  35%|███▍ | 69/198 [00:07<00:08, 15.96it/s]
167
 
 
168
  35%|███▍ | 69/198 [00:07<00:08, 15.96it/s]
169
  37%|███▋ | 73/198 [00:08<00:07, 17.19it/s]
170
 
 
171
  37%|███▋ | 73/198 [00:08<00:07, 17.19it/s]
172
 
 
173
  37%|███▋ | 73/198 [00:08<00:07, 17.19it/s]
174
  38%|███▊ | 75/198 [00:08<00:08, 14.09it/s]
175
 
 
176
  38%|███▊ | 75/198 [00:08<00:08, 14.09it/s]
177
 
 
178
  38%|███▊ | 75/198 [00:08<00:08, 14.09it/s]
179
 
 
180
  38%|███▊ | 75/198 [00:08<00:08, 14.09it/s]
181
 
 
182
  38%|███▊ | 75/198 [00:08<00:08, 14.09it/s]
183
 
 
184
  38%|███▊ | 75/198 [00:08<00:08, 14.09it/s]
185
  40%|████ | 80/198 [00:08<00:06, 18.76it/s]
186
 
 
187
  40%|████ | 80/198 [00:08<00:06, 18.76it/s]
188
 
 
189
  40%|████ | 80/198 [00:08<00:06, 18.76it/s]
190
 
 
191
  40%|████ | 80/198 [00:08<00:06, 18.76it/s]
192
  42%|████▏ | 83/198 [00:08<00:07, 16.28it/s]
193
 
 
194
  42%|████▏ | 83/198 [00:08<00:07, 16.28it/s]
195
 
 
196
  42%|████▏ | 83/198 [00:08<00:07, 16.28it/s]
197
 
 
198
  42%|████▏ | 83/198 [00:08<00:07, 16.28it/s]
199
 
 
200
  42%|████▏ | 83/198 [00:08<00:07, 16.28it/s]
201
  44%|████▍ | 87/198 [00:08<00:05, 18.73it/s]
202
 
 
203
  44%|████▍ | 87/198 [00:08<00:05, 18.73it/s]
204
 
 
205
  44%|████▍ | 87/198 [00:09<00:05, 18.73it/s]
206
 
 
207
  44%|████▍ | 87/198 [00:09<00:05, 18.73it/s]
208
  45%|████▌ | 90/198 [00:09<00:06, 16.24it/s]
209
 
 
210
  45%|████▌ | 90/198 [00:09<00:06, 16.24it/s]
211
 
 
212
  45%|████▌ | 90/198 [00:09<00:06, 16.24it/s]
213
 
 
214
  45%|████▌ | 90/198 [00:09<00:06, 16.24it/s]
215
 
 
216
  45%|████▌ | 90/198 [00:09<00:06, 16.24it/s]
217
  47%|████▋ | 94/198 [00:09<00:05, 17.41it/s]
218
 
 
219
  47%|████▋ | 94/198 [00:09<00:05, 17.41it/s]
220
 
 
221
  47%|████▋ | 94/198 [00:09<00:05, 17.41it/s]
222
  48%|████▊ | 96/198 [00:09<00:07, 14.21it/s]
223
 
 
224
  48%|████▊ | 96/198 [00:09<00:07, 14.21it/s]
225
 
 
226
  48%|████▊ | 96/198 [00:09<00:07, 14.21it/s]
227
 
 
228
  48%|████▊ | 96/198 [00:09<00:07, 14.21it/s]
229
 
 
230
  48%|████▊ | 96/198 [00:09<00:07, 14.21it/s]
231
 
 
232
  48%|████▊ | 96/198 [00:09<00:07, 14.21it/s]
233
  51%|█████ | 101/198 [00:09<00:05, 18.90it/s]
234
 
 
235
  51%|█████ | 101/198 [00:09<00:05, 18.90it/s]
236
 
 
237
  51%|█████ | 101/198 [00:09<00:05, 18.90it/s]
238
 
 
239
  51%|█████ | 101/198 [00:09<00:05, 18.90it/s]
240
  53%|█████▎ | 104/198 [00:09<00:05, 16.42it/s]
241
 
 
242
  53%|█████▎ | 104/198 [00:09<00:05, 16.42it/s]
243
 
 
244
  53%|█████▎ | 104/198 [00:09<00:05, 16.42it/s]
245
 
 
246
  53%|█████▎ | 104/198 [00:09<00:05, 16.42it/s]
247
 
 
248
  53%|█████▎ | 104/198 [00:09<00:05, 16.42it/s]
249
  55%|█████▍ | 108/198 [00:10<00:04, 18.92it/s]
250
 
 
251
  55%|█████▍ | 108/198 [00:10<00:04, 18.92it/s]
252
 
 
253
  55%|█████▍ | 108/198 [00:10<00:04, 18.92it/s]
254
 
 
255
  55%|█████▍ | 108/198 [00:10<00:04, 18.92it/s]
256
  56%|█████▌ | 111/198 [00:10<00:05, 16.19it/s]
257
 
 
258
  56%|█████▌ | 111/198 [00:10<00:05, 16.19it/s]
259
 
 
260
  56%|█████▌ | 111/198 [00:10<00:05, 16.19it/s]
261
 
 
262
  56%|█████▌ | 111/198 [00:10<00:05, 16.19it/s]
263
 
 
264
  56%|█████▌ | 111/198 [00:10<00:05, 16.19it/s]
265
  58%|█████▊ | 115/198 [00:10<00:04, 17.36it/s]
266
 
 
267
  58%|█████▊ | 115/198 [00:10<00:04, 17.36it/s]
268
 
 
269
  58%|█████▊ | 115/198 [00:10<00:04, 17.36it/s]
270
  59%|█████▉ | 117/198 [00:10<00:05, 14.20it/s]
271
 
 
272
  59%|█████▉ | 117/198 [00:10<00:05, 14.20it/s]
273
 
 
274
  59%|█████▉ | 117/198 [00:10<00:05, 14.20it/s]
275
 
 
276
  59%|█████▉ | 117/198 [00:10<00:05, 14.20it/s]
277
 
 
278
  59%|█████▉ | 117/198 [00:10<00:05, 14.20it/s]
279
 
 
280
  59%|█████▉ | 117/198 [00:10<00:05, 14.20it/s]
281
  62%|██████▏ | 122/198 [00:10<00:04, 18.89it/s]
282
 
 
283
  62%|██████▏ | 122/198 [00:11<00:04, 18.89it/s]
284
 
 
285
  62%|██████▏ | 122/198 [00:11<00:04, 18.89it/s]
286
 
 
287
  62%|██████▏ | 122/198 [00:11<00:04, 18.89it/s]
288
  63%|██████▎ | 125/198 [00:11<00:04, 16.47it/s]
289
 
 
290
  63%|██████▎ | 125/198 [00:11<00:04, 16.47it/s]
291
 
 
292
  63%|██████▎ | 125/198 [00:11<00:04, 16.47it/s]
293
 
 
294
  63%|██████▎ | 125/198 [00:11<00:04, 16.47it/s]
295
 
 
296
  63%|██████▎ | 125/198 [00:11<00:04, 16.47it/s]
297
  65%|██████▌ | 129/198 [00:11<00:03, 19.03it/s]
298
 
 
299
  65%|██████▌ | 129/198 [00:11<00:03, 19.03it/s]
300
 
 
301
  65%|██████▌ | 129/198 [00:11<00:03, 19.03it/s]
302
 
 
303
  65%|██████▌ | 129/198 [00:11<00:03, 19.03it/s]
304
  67%|██████▋ | 132/198 [00:11<00:04, 16.50it/s]
305
 
 
306
  67%|██████▋ | 132/198 [00:11<00:04, 16.50it/s]
307
 
 
308
  67%|██████▋ | 132/198 [00:11<00:04, 16.50it/s]
309
 
 
310
  67%|██████▋ | 132/198 [00:11<00:04, 16.50it/s]
311
 
 
312
  67%|██████▋ | 132/198 [00:11<00:04, 16.50it/s]
313
  69%|██████▊ | 136/198 [00:11<00:03, 17.60it/s]
314
 
 
315
  69%|██████▊ | 136/198 [00:11<00:03, 17.60it/s]
316
 
 
317
  69%|██████▊ | 136/198 [00:11<00:03, 17.60it/s]
318
  70%|██████▉ | 138/198 [00:11<00:04, 14.32it/s]
319
 
 
320
  70%|██████▉ | 138/198 [00:11<00:04, 14.32it/s]
321
 
 
322
  70%|██████▉ | 138/198 [00:12<00:04, 14.32it/s]
323
 
 
324
  70%|██████▉ | 138/198 [00:12<00:04, 14.32it/s]
325
 
 
326
  70%|██████▉ | 138/198 [00:12<00:04, 14.32it/s]
327
 
 
328
  70%|██████▉ | 138/198 [00:12<00:04, 14.32it/s]
329
  72%|███████▏ | 143/198 [00:12<00:02, 19.01it/s]
330
 
 
331
  72%|███████▏ | 143/198 [00:12<00:02, 19.01it/s]
332
 
 
333
  72%|███████▏ | 143/198 [00:12<00:02, 19.01it/s]
334
 
 
335
  72%|███████▏ | 143/198 [00:12<00:02, 19.01it/s]
336
  74%|███████▎ | 146/198 [00:12<00:03, 16.53it/s]
337
 
 
338
  74%|███████▎ | 146/198 [00:12<00:03, 16.53it/s]
339
 
 
340
  74%|███████▎ | 146/198 [00:12<00:03, 16.53it/s]
341
 
 
342
  74%|███████▎ | 146/198 [00:12<00:03, 16.53it/s]
343
 
 
344
  74%|███████▎ | 146/198 [00:12<00:03, 16.53it/s]
345
  76%|███████▌ | 150/198 [00:12<00:02, 19.07it/s]
346
 
 
347
  76%|███████▌ | 150/198 [00:12<00:02, 19.07it/s]
348
 
 
349
  76%|███████▌ | 150/198 [00:12<00:02, 19.07it/s]
350
 
 
351
  76%|███████▌ | 150/198 [00:12<00:02, 19.07it/s]
352
  77%|███████▋ | 153/198 [00:12<00:02, 16.50it/s]
353
 
 
354
  77%|███████▋ | 153/198 [00:12<00:02, 16.50it/s]
355
 
 
356
  77%|███████▋ | 153/198 [00:12<00:02, 16.50it/s]
357
 
 
358
  77%|███████▋ | 153/198 [00:12<00:02, 16.50it/s]
359
 
 
360
  77%|███████▋ | 153/198 [00:12<00:02, 16.50it/s]
361
  79%|███████▉ | 157/198 [00:12<00:02, 17.62it/s]
362
 
 
363
  79%|███████▉ | 157/198 [00:13<00:02, 17.62it/s]
364
 
 
365
  79%|███████▉ | 157/198 [00:13<00:02, 17.62it/s]
366
  80%|████████ | 159/198 [00:13<00:02, 14.34it/s]
367
 
 
368
  80%|████████ | 159/198 [00:13<00:02, 14.34it/s]
369
 
 
370
  80%|████████ | 159/198 [00:13<00:02, 14.34it/s]
371
 
 
372
  80%|████████ | 159/198 [00:13<00:02, 14.34it/s]
373
 
 
374
  80%|████████ | 159/198 [00:13<00:02, 14.34it/s]
375
 
 
376
  80%|████████ | 159/198 [00:13<00:02, 14.34it/s]
377
  83%|████████▎ | 164/198 [00:13<00:01, 19.03it/s]
378
 
 
379
  83%|████████▎ | 164/198 [00:13<00:01, 19.03it/s]
380
 
 
381
  83%|████████▎ | 164/198 [00:13<00:01, 19.03it/s]
382
 
 
383
  83%|████████▎ | 164/198 [00:13<00:01, 19.03it/s]
384
  84%|████████▍ | 167/198 [00:13<00:01, 16.56it/s]
385
 
 
386
  84%|████████▍ | 167/198 [00:13<00:01, 16.56it/s]
387
 
 
388
  84%|████████▍ | 167/198 [00:13<00:01, 16.56it/s]
389
 
 
390
  84%|████████▍ | 167/198 [00:13<00:01, 16.56it/s]
391
 
 
392
  84%|████████▍ | 167/198 [00:13<00:01, 16.56it/s]
393
  86%|████████▋ | 171/198 [00:13<00:01, 19.12it/s]
394
 
 
395
  86%|████████▋ | 171/198 [00:13<00:01, 19.12it/s]
396
 
 
397
  86%|████████▋ | 171/198 [00:13<00:01, 19.12it/s]
398
 
 
399
  86%|████████▋ | 171/198 [00:14<00:01, 19.12it/s]
400
  88%|████████▊ | 174/198 [00:14<00:01, 16.58it/s]
401
 
 
402
  88%|████████▊ | 174/198 [00:14<00:01, 16.58it/s]
403
 
 
404
  88%|████████▊ | 174/198 [00:14<00:01, 16.58it/s]
405
 
 
406
  88%|████████▊ | 174/198 [00:14<00:01, 16.58it/s]
407
 
 
408
  88%|████████▊ | 174/198 [00:14<00:01, 16.58it/s]
409
  90%|████████▉ | 178/198 [00:14<00:01, 17.67it/s]
410
 
 
411
  90%|████████▉ | 178/198 [00:14<00:01, 17.67it/s]
412
 
 
413
  90%|████████▉ | 178/198 [00:14<00:01, 17.67it/s]
414
  91%|█████████ | 180/198 [00:14<00:01, 14.28it/s]
415
 
 
416
  91%|█████████ | 180/198 [00:14<00:01, 14.28it/s]
417
 
 
418
  91%|█████████ | 180/198 [00:14<00:01, 14.28it/s]
419
 
 
420
  91%|█████████ | 180/198 [00:14<00:01, 14.28it/s]
421
 
 
422
  91%|█████████ | 180/198 [00:14<00:01, 14.28it/s]
423
 
 
424
  91%|█████████ | 180/198 [00:14<00:01, 14.28it/s]
425
  93%|█████████▎| 185/198 [00:14<00:00, 18.87it/s]
426
 
 
427
  93%|█████████▎| 185/198 [00:14<00:00, 18.87it/s]
428
 
 
429
  93%|█████████▎| 185/198 [00:14<00:00, 18.87it/s]
430
 
 
431
  93%|█████████▎| 185/198 [00:14<00:00, 18.87it/s]
432
  95%|█████████▍| 188/198 [00:14<00:00, 16.23it/s]
433
 
 
434
  95%|█████████▍| 188/198 [00:14<00:00, 16.23it/s]
435
 
 
436
  95%|█████████▍| 188/198 [00:14<00:00, 16.23it/s]
437
 
 
438
  95%|█████████▍| 188/198 [00:14<00:00, 16.23it/s]
439
 
 
440
  95%|█████████▍| 188/198 [00:14<00:00, 16.23it/s]
441
  97%|█████████▋| 192/198 [00:14<00:00, 18.80it/s]
442
 
 
443
  97%|█████████▋| 192/198 [00:15<00:00, 18.80it/s]
444
 
 
445
  97%|█████████▋| 192/198 [00:15<00:00, 18.80it/s]
446
 
 
447
  97%|█████████▋| 192/198 [00:15<00:00, 18.80it/s]
448
  98%|█████████▊| 195/198 [00:15<00:00, 16.37it/s]
449
 
 
450
  98%|█████████▊| 195/198 [00:15<00:00, 16.37it/s]
451
 
 
452
  98%|█████████▊| 195/198 [00:15<00:00, 16.37it/s]
453
 
 
454
  98%|█████████▊| 195/198 [00:15<00:00, 16.37it/s]
 
 
 
 
 
 
 
 
 
 
1
+ /opt/conda/envs/py310/bin/python -m mlc_llm gen_config /models/Qwen2-1.5B-Instruct --quantization q0f16 --conv-template chatml --output /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC
2
+ [2024-06-06 23:39:30] INFO auto_config.py:116: Found model configuration: /models/Qwen2-1.5B-Instruct/config.json
3
+ [2024-06-06 23:39:30] INFO auto_config.py:154: Found model type: qwen2. Use `--model-type` to override.
4
+ [2024-06-06 23:39:30] INFO qwen2_model.py:49: context_window_size not found in config.json. Falling back to max_position_embeddings (32768)
5
+ [2024-06-06 23:39:30] INFO qwen2_model.py:66: prefill_chunk_size defaults to 2048
6
+ [2024-06-06 23:39:30] INFO config.py:107: Overriding max_batch_size from 1 to 80
7
+ [2024-06-06 23:39:30] INFO gen_config.py:143: [generation_config.json] Setting bos_token_id: 151643
8
+ [2024-06-06 23:39:30] INFO gen_config.py:143: [generation_config.json] Setting pad_token_id: 151643
9
+ [2024-06-06 23:39:30] INFO gen_config.py:143: [generation_config.json] Setting eos_token_id: [151645, 151643]
10
+ [2024-06-06 23:39:30] INFO gen_config.py:143: [generation_config.json] Setting repetition_penalty: 1.1
11
+ [2024-06-06 23:39:30] INFO gen_config.py:143: [generation_config.json] Setting temperature: 0.7
12
+ [2024-06-06 23:39:30] INFO gen_config.py:143: [generation_config.json] Setting top_p: 0.8
13
+ [2024-06-06 23:39:30] INFO gen_config.py:157: Not found tokenizer config: /models/Qwen2-1.5B-Instruct/tokenizer.model
14
+ [2024-06-06 23:39:30] INFO gen_config.py:155: Found tokenizer config: /models/Qwen2-1.5B-Instruct/tokenizer.json. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC/tokenizer.json
15
+ [2024-06-06 23:39:30] INFO gen_config.py:155: Found tokenizer config: /models/Qwen2-1.5B-Instruct/vocab.json. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC/vocab.json
16
+ [2024-06-06 23:39:30] INFO gen_config.py:155: Found tokenizer config: /models/Qwen2-1.5B-Instruct/merges.txt. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC/merges.txt
17
+ [2024-06-06 23:39:30] INFO gen_config.py:157: Not found tokenizer config: /models/Qwen2-1.5B-Instruct/added_tokens.json
18
+ [2024-06-06 23:39:30] INFO gen_config.py:155: Found tokenizer config: /models/Qwen2-1.5B-Instruct/tokenizer_config.json. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC/tokenizer_config.json
19
+ [2024-06-06 23:39:30] INFO gen_config.py:216: Detected tokenizer info: {'token_postproc_method': 'byte_level', 'prepend_space_in_encode': False, 'strip_space_in_decode': False}
20
+ [2024-06-06 23:39:30] INFO gen_config.py:32: [System default] Setting presence_penalty: 0.0
21
+ [2024-06-06 23:39:30] INFO gen_config.py:32: [System default] Setting frequency_penalty: 0.0
22
+ [2024-06-06 23:39:30] INFO gen_config.py:223: Dumping configuration file to: /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC/mlc-chat-config.json
23
+ /opt/conda/envs/py310/bin/python -m mlc_llm convert_weight /models/Qwen2-1.5B-Instruct --quantization q0f16 --output /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC
24
+ [2024-06-06 23:39:32] INFO auto_config.py:116: Found model configuration: /models/Qwen2-1.5B-Instruct/config.json
25
+ [2024-06-06 23:39:33] INFO auto_device.py:79: Found device: cuda:0
26
+ [2024-06-06 23:39:35] INFO auto_device.py:88: Not found device: rocm:0
27
+ [2024-06-06 23:39:36] INFO auto_device.py:88: Not found device: metal:0
28
+ [2024-06-06 23:39:38] INFO auto_device.py:79: Found device: vulkan:0
29
+ [2024-06-06 23:39:38] INFO auto_device.py:79: Found device: vulkan:1
30
+ [2024-06-06 23:39:38] INFO auto_device.py:79: Found device: vulkan:2
31
+ [2024-06-06 23:39:38] INFO auto_device.py:79: Found device: vulkan:3
32
+ [2024-06-06 23:39:40] INFO auto_device.py:88: Not found device: opencl:0
33
+ [2024-06-06 23:39:40] INFO auto_device.py:35: Using device: cuda:0
34
+ [2024-06-06 23:39:40] INFO auto_weight.py:71: Finding weights in: /models/Qwen2-1.5B-Instruct
35
+ [2024-06-06 23:39:40] INFO auto_weight.py:137: Not found Huggingface PyTorch
36
+ [2024-06-06 23:39:40] INFO auto_weight.py:144: Found source weight format: huggingface-safetensor. Source configuration: /models/Qwen2-1.5B-Instruct/model.safetensors.index.json
37
+ [2024-06-06 23:39:40] INFO auto_weight.py:107: Using source weight configuration: /models/Qwen2-1.5B-Instruct/model.safetensors.index.json. Use `--source` to override.
38
+ [2024-06-06 23:39:40] INFO auto_weight.py:111: Using source weight format: huggingface-safetensor. Use `--source-format` to override.
39
+ [2024-06-06 23:39:40] INFO auto_config.py:154: Found model type: qwen2. Use `--model-type` to override.
40
+ [2024-06-06 23:39:40] INFO qwen2_model.py:49: context_window_size not found in config.json. Falling back to max_position_embeddings (32768)
41
+ [2024-06-06 23:39:40] INFO qwen2_model.py:66: prefill_chunk_size defaults to 2048
42
+ Traceback (most recent call last):
43
+ File "/opt/conda/envs/py310/lib/python3.10/runpy.py", line 196, in _run_module_as_main
44
+ return _run_code(code, main_globals, None,
45
+ File "/opt/conda/envs/py310/lib/python3.10/runpy.py", line 86, in _run_code
46
+ exec(code, run_globals)
47
+ File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/__main__.py", line 64, in <module>
48
+ main()
49
+ File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/__main__.py", line 37, in main
50
+ cli.main(sys.argv[2:])
51
+ File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/cli/convert_weight.py", line 88, in main
52
+ convert_weight(
53
+ File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/interface/convert_weight.py", line 181, in convert_weight
54
+ _convert_args(args)
55
+ File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/interface/convert_weight.py", line 145, in _convert_args
56
+ tvmjs.dump_ndarray_cache(
57
+ File "/opt/conda/envs/py310/lib/python3.10/site-packages/tvm/contrib/tvmjs.py", line 272, in dump_ndarray_cache
58
+ for k, origin_v in param_generator:
59
+ File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/interface/convert_weight.py", line 122, in _param_generator
60
+ loader = LOADER[args.source_format](
61
+ File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/loader/huggingface_loader.py", line 99, in __init__
62
+ check_parameter_usage(extern_param_map, set(self.torch_to_path.keys()))
63
+ File "/opt/conda/envs/py310/lib/python3.10/site-packages/mlc_llm/loader/utils.py", line 33, in check_parameter_usage
64
+ raise ValueError(
65
+ ValueError: The following extern parameters do not exist in the weight files:
66
+ lm_head.weight
67
+ Weight conversion with arguments:
68
+ --config /models/Qwen2-1.5B-Instruct/config.json
69
+ --quantization NoQuantize(name='q0f16', kind='no-quant', model_dtype='float16')
70
+ --model-type qwen2
71
+ --device cuda:0
72
+ --source /models/Qwen2-1.5B-Instruct/model.safetensors.index.json
73
+ --source-format huggingface-safetensor
74
+ --output /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC
75
+ Start storing to cache /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC
76
+ /home/rickzhou/miniconda3/envs/mlc/bin/python -m mlc_llm gen_config /ssd2/models/Qwen2-1.5B-Instruct --quantization q0f16 --conv-template chatml --output /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC
77
+ [2024-06-06 22:37:49] INFO auto_config.py:116: Found model configuration: /ssd2/models/Qwen2-1.5B-Instruct/config.json
78
+ [2024-06-06 22:37:49] INFO auto_config.py:154: Found model type: qwen2. Use `--model-type` to override.
79
+ [2024-06-06 22:37:49] INFO qwen2_model.py:50: context_window_size not found in config.json. Falling back to max_position_embeddings (32768)
80
+ [2024-06-06 22:37:49] INFO qwen2_model.py:67: prefill_chunk_size defaults to 2048
81
+ [2024-06-06 22:37:49] INFO config.py:107: Overriding max_batch_size from 1 to 80
82
+ [2024-06-06 22:37:49] INFO gen_config.py:143: [generation_config.json] Setting bos_token_id: 151643
83
+ [2024-06-06 22:37:49] INFO gen_config.py:143: [generation_config.json] Setting pad_token_id: 151643
84
+ [2024-06-06 22:37:49] INFO gen_config.py:143: [generation_config.json] Setting eos_token_id: [151645, 151643]
85
+ [2024-06-06 22:37:49] INFO gen_config.py:143: [generation_config.json] Setting repetition_penalty: 1.1
86
+ [2024-06-06 22:37:49] INFO gen_config.py:143: [generation_config.json] Setting temperature: 0.7
87
+ [2024-06-06 22:37:49] INFO gen_config.py:143: [generation_config.json] Setting top_p: 0.8
88
+ [2024-06-06 22:37:49] INFO gen_config.py:157: Not found tokenizer config: /ssd2/models/Qwen2-1.5B-Instruct/tokenizer.model
89
+ [2024-06-06 22:37:49] INFO gen_config.py:155: Found tokenizer config: /ssd2/models/Qwen2-1.5B-Instruct/tokenizer.json. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC/tokenizer.json
90
+ [2024-06-06 22:37:49] INFO gen_config.py:155: Found tokenizer config: /ssd2/models/Qwen2-1.5B-Instruct/vocab.json. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC/vocab.json
91
+ [2024-06-06 22:37:49] INFO gen_config.py:155: Found tokenizer config: /ssd2/models/Qwen2-1.5B-Instruct/merges.txt. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC/merges.txt
92
+ [2024-06-06 22:37:49] INFO gen_config.py:157: Not found tokenizer config: /ssd2/models/Qwen2-1.5B-Instruct/added_tokens.json
93
+ [2024-06-06 22:37:49] INFO gen_config.py:155: Found tokenizer config: /ssd2/models/Qwen2-1.5B-Instruct/tokenizer_config.json. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC/tokenizer_config.json
94
+ [2024-06-06 22:37:49] INFO gen_config.py:216: Detected tokenizer info: {'token_postproc_method': 'byte_level', 'prepend_space_in_encode': False, 'strip_space_in_decode': False}
95
+ [2024-06-06 22:37:49] INFO gen_config.py:32: [System default] Setting presence_penalty: 0.0
96
+ [2024-06-06 22:37:49] INFO gen_config.py:32: [System default] Setting frequency_penalty: 0.0
97
+ [2024-06-06 22:37:49] INFO gen_config.py:223: Dumping configuration file to: /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC/mlc-chat-config.json
98
+ /home/rickzhou/miniconda3/envs/mlc/bin/python -m mlc_llm convert_weight /ssd2/models/Qwen2-1.5B-Instruct --quantization q0f16 --output /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC
99
+ [2024-06-06 22:37:50] INFO auto_config.py:116: Found model configuration: /ssd2/models/Qwen2-1.5B-Instruct/config.json
100
+ [2024-06-06 22:37:52] INFO auto_device.py:79: Found device: cuda:0
101
+ [2024-06-06 22:37:52] INFO auto_device.py:79: Found device: cuda:1
102
+ [2024-06-06 22:37:53] INFO auto_device.py:88: Not found device: rocm:0
103
+ [2024-06-06 22:37:54] INFO auto_device.py:88: Not found device: metal:0
104
+ [2024-06-06 22:37:55] INFO auto_device.py:79: Found device: vulkan:0
105
+ [2024-06-06 22:37:55] INFO auto_device.py:79: Found device: vulkan:1
106
+ [2024-06-06 22:37:55] INFO auto_device.py:79: Found device: vulkan:2
107
+ [2024-06-06 22:37:56] INFO auto_device.py:88: Not found device: opencl:0
108
+ [2024-06-06 22:37:56] INFO auto_device.py:35: Using device: cuda:0
109
+ [2024-06-06 22:37:56] INFO auto_weight.py:71: Finding weights in: /ssd2/models/Qwen2-1.5B-Instruct
110
+ [2024-06-06 22:37:56] INFO auto_weight.py:137: Not found Huggingface PyTorch
111
+ [2024-06-06 22:37:56] INFO auto_weight.py:144: Found source weight format: huggingface-safetensor. Source configuration: /ssd2/models/Qwen2-1.5B-Instruct/model.safetensors.index.json
112
+ [2024-06-06 22:37:56] INFO auto_weight.py:107: Using source weight configuration: /ssd2/models/Qwen2-1.5B-Instruct/model.safetensors.index.json. Use `--source` to override.
113
+ [2024-06-06 22:37:56] INFO auto_weight.py:111: Using source weight format: huggingface-safetensor. Use `--source-format` to override.
114
+ [2024-06-06 22:37:56] INFO auto_config.py:154: Found model type: qwen2. Use `--model-type` to override.
115
+ [2024-06-06 22:37:56] INFO qwen2_model.py:50: context_window_size not found in config.json. Falling back to max_position_embeddings (32768)
116
+ [2024-06-06 22:37:56] INFO qwen2_model.py:67: prefill_chunk_size defaults to 2048
117
+ Weight conversion with arguments:
118
+ --config /ssd2/models/Qwen2-1.5B-Instruct/config.json
119
+ --quantization NoQuantize(name='q0f16', kind='no-quant', model_dtype='float16')
120
+ --model-type qwen2
121
+ --device cuda:0
122
+ --source /ssd2/models/Qwen2-1.5B-Instruct/model.safetensors.index.json
123
+ --source-format huggingface-safetensor
124
+ --output /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC
125
+ Start storing to cache /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC
126
+
127
  0%| | 0/198 [00:00<?, ?it/s]
128
 
129
+
130
  0%| | 0/198 [00:00<?, ?it/s]
131
 
132
+
133
  0%| | 0/198 [00:02<?, ?it/s]
134
  1%| | 1/198 [00:03<12:17, 3.74s/it]
135
 
136
+
137
  1%| | 1/198 [00:03<12:17, 3.74s/it]
138
 
139
+
140
  1%| | 1/198 [00:03<12:17, 3.74s/it]
141
 
142
+
143
  1%| | 1/198 [00:03<12:17, 3.74s/it]
144
  2%|▏ | 4/198 [00:04<02:35, 1.25it/s]
145
 
146
+
147
  2%|▏ | 4/198 [00:04<02:35, 1.25it/s]
148
 
149
+
150
  2%|▏ | 4/198 [00:04<02:35, 1.25it/s]
151
 
152
+
153
  2%|▏ | 4/198 [00:04<02:35, 1.25it/s]
154
 
155
+
156
  2%|▏ | 4/198 [00:04<02:35, 1.25it/s]
157
 
158
+
159
  2%|▏ | 4/198 [00:04<02:35, 1.25it/s]
160
 
161
+
162
  2%|▏ | 4/198 [00:04<02:35, 1.25it/s]
163
  5%|▌ | 10/198 [00:04<00:49, 3.78it/s]
164
 
165
+
166
  5%|▌ | 10/198 [00:04<00:49, 3.78it/s]
167
 
168
+
169
  5%|▌ | 10/198 [00:04<00:49, 3.78it/s]
170
  6%|▌ | 12/198 [00:04<00:42, 4.36it/s]
171
 
172
+
173
  6%|▌ | 12/198 [00:04<00:42, 4.36it/s]
174
 
175
+
176
  6%|▌ | 12/198 [00:04<00:42, 4.36it/s]
177
 
178
+
179
  6%|▌ | 12/198 [00:04<00:42, 4.36it/s]
180
 
181
+
182
  6%|▌ | 12/198 [00:04<00:42, 4.36it/s]
183
 
184
+
185
  6%|▌ | 12/198 [00:04<00:42, 4.36it/s]
186
  9%|▊ | 17/198 [00:04<00:24, 7.42it/s]
187
 
188
+
189
  9%|▊ | 17/198 [00:04<00:24, 7.42it/s]
190
 
191
+
192
  9%|▊ | 17/198 [00:04<00:24, 7.42it/s]
193
 
194
+
195
  9%|▊ | 17/198 [00:04<00:24, 7.42it/s]
196
  10%|█ | 20/198 [00:04<00:21, 8.33it/s]
197
 
198
+
199
  10%|█ | 20/198 [00:04<00:21, 8.33it/s]
200
 
201
+
202
  10%|█ | 20/198 [00:04<00:21, 8.33it/s]
203
 
204
+
205
  10%|█ | 20/198 [00:04<00:21, 8.33it/s]
206
 
207
+
208
  10%|█ | 20/198 [00:05<00:21, 8.33it/s]
209
  12%|█▏ | 24/198 [00:05<00:15, 11.06it/s]
210
 
211
+
212
  12%|█▏ | 24/198 [00:05<00:15, 11.06it/s]
213
 
214
+
215
  12%|█▏ | 24/198 [00:05<00:15, 11.06it/s]
216
 
217
+
218
  12%|█▏ | 24/198 [00:05<00:15, 11.06it/s]
219
  14%|█▎ | 27/198 [00:05<00:15, 11.30it/s]
220
 
221
+
222
  14%|█▎ | 27/198 [00:05<00:15, 11.30it/s]
223
 
224
+
225
  14%|█▎ | 27/198 [00:05<00:15, 11.30it/s]
226
 
227
+
228
  14%|█▎ | 27/198 [00:05<00:15, 11.30it/s]
229
 
230
+
231
  14%|█▎ | 27/198 [00:05<00:15, 11.30it/s]
232
  16%|█▌ | 31/198 [00:05<00:12, 13.34it/s]
233
 
234
+
235
  16%|█▌ | 31/198 [00:05<00:12, 13.34it/s]
236
 
237
+
238
  16%|█▌ | 31/198 [00:05<00:12, 13.34it/s]
239
  17%|█▋ | 33/198 [00:05<00:13, 11.79it/s]
240
 
241
+
242
  17%|█▋ | 33/198 [00:05<00:13, 11.79it/s]
243
 
244
+
245
  17%|█▋ | 33/198 [00:05<00:13, 11.79it/s]
246
 
247
+
248
  17%|█▋ | 33/198 [00:05<00:13, 11.79it/s]
249
 
250
+
251
  17%|█▋ | 33/198 [00:05<00:13, 11.79it/s]
252
 
253
+
254
  17%|█▋ | 33/198 [00:05<00:13, 11.79it/s]
255
  19%|█▉ | 38/198 [00:05<00:09, 16.36it/s]
256
 
257
+
258
  19%|█▉ | 38/198 [00:06<00:09, 16.36it/s]
259
 
260
+
261
  19%|█▉ | 38/198 [00:06<00:09, 16.36it/s]
262
 
263
+
264
  19%|█▉ | 38/198 [00:06<00:09, 16.36it/s]
265
  21%|██ | 41/198 [00:06<00:10, 14.94it/s]
266
 
267
+
268
  21%|██ | 41/198 [00:06<00:10, 14.94it/s]
269
 
270
+
271
  21%|██ | 41/198 [00:06<00:10, 14.94it/s]
272
 
273
+
274
  21%|██ | 41/198 [00:06<00:10, 14.94it/s]
275
 
276
+
277
  21%|██ | 41/198 [00:06<00:10, 14.94it/s]
278
  23%|██▎ | 45/198 [00:06<00:08, 17.52it/s]
279
 
280
+
281
  23%|██▎ | 45/198 [00:06<00:08, 17.52it/s]
282
 
283
+
284
  23%|██▎ | 45/198 [00:06<00:08, 17.52it/s]
285
 
286
+
287
  23%|██▎ | 45/198 [00:06<00:08, 17.52it/s]
288
  24%|██▍ | 48/198 [00:06<00:09, 15.28it/s]
289
 
290
+
291
  24%|██▍ | 48/198 [00:06<00:09, 15.28it/s]
292
 
293
+
294
  24%|██▍ | 48/198 [00:06<00:09, 15.28it/s]
295
 
296
+
297
  24%|██▍ | 48/198 [00:06<00:09, 15.28it/s]
298
 
299
+
300
  24%|██▍ | 48/198 [00:06<00:09, 15.28it/s]
301
  26%|██▋ | 52/198 [00:06<00:08, 16.47it/s]
302
 
303
+
304
  26%|██▋ | 52/198 [00:06<00:08, 16.47it/s]
305
 
306
+
307
  26%|██▋ | 52/198 [00:07<00:08, 16.47it/s]
308
  27%|██▋ | 54/198 [00:07<00:10, 13.51it/s]
309
 
310
+
311
  27%|██▋ | 54/198 [00:07<00:10, 13.51it/s]
312
 
313
+
314
  27%|██▋ | 54/198 [00:07<00:10, 13.51it/s]
315
 
316
+
317
  27%|██▋ | 54/198 [00:07<00:10, 13.51it/s]
318
 
319
+
320
  27%|██▋ | 54/198 [00:07<00:10, 13.51it/s]
321
 
322
+
323
  27%|██▋ | 54/198 [00:07<00:10, 13.51it/s]
324
  30%|██▉ | 59/198 [00:07<00:07, 18.05it/s]
325
 
326
+
327
  30%|██▉ | 59/198 [00:07<00:07, 18.05it/s]
328
 
329
+
330
  30%|██▉ | 59/198 [00:07<00:07, 18.05it/s]
331
 
332
+
333
  30%|██▉ | 59/198 [00:07<00:07, 18.05it/s]
334
  31%|███▏ | 62/198 [00:07<00:08, 15.72it/s]
335
 
336
+
337
  31%|███▏ | 62/198 [00:07<00:08, 15.72it/s]
338
 
339
+
340
  31%|███▏ | 62/198 [00:07<00:08, 15.72it/s]
341
 
342
+
343
  31%|███▏ | 62/198 [00:07<00:08, 15.72it/s]
344
 
345
+
346
  31%|███▏ | 62/198 [00:07<00:08, 15.72it/s]
347
  33%|███▎ | 66/198 [00:07<00:07, 18.21it/s]
348
 
349
+
350
  33%|███▎ | 66/198 [00:07<00:07, 18.21it/s]
351
 
352
+
353
  33%|███▎ | 66/198 [00:07<00:07, 18.21it/s]
354
 
355
+
356
  33%|███▎ | 66/198 [00:07<00:07, 18.21it/s]
357
  35%|███▍ | 69/198 [00:07<00:08, 15.96it/s]
358
 
359
+
360
  35%|███▍ | 69/198 [00:07<00:08, 15.96it/s]
361
 
362
+
363
  35%|███▍ | 69/198 [00:07<00:08, 15.96it/s]
364
 
365
+
366
  35%|███▍ | 69/198 [00:07<00:08, 15.96it/s]
367
 
368
+
369
  35%|███▍ | 69/198 [00:07<00:08, 15.96it/s]
370
  37%|███▋ | 73/198 [00:08<00:07, 17.19it/s]
371
 
372
+
373
  37%|███▋ | 73/198 [00:08<00:07, 17.19it/s]
374
 
375
+
376
  37%|███▋ | 73/198 [00:08<00:07, 17.19it/s]
377
  38%|███▊ | 75/198 [00:08<00:08, 14.09it/s]
378
 
379
+
380
  38%|███▊ | 75/198 [00:08<00:08, 14.09it/s]
381
 
382
+
383
  38%|███▊ | 75/198 [00:08<00:08, 14.09it/s]
384
 
385
+
386
  38%|███▊ | 75/198 [00:08<00:08, 14.09it/s]
387
 
388
+
389
  38%|███▊ | 75/198 [00:08<00:08, 14.09it/s]
390
 
391
+
392
  38%|███▊ | 75/198 [00:08<00:08, 14.09it/s]
393
  40%|████ | 80/198 [00:08<00:06, 18.76it/s]
394
 
395
+
396
  40%|████ | 80/198 [00:08<00:06, 18.76it/s]
397
 
398
+
399
  40%|████ | 80/198 [00:08<00:06, 18.76it/s]
400
 
401
+
402
  40%|████ | 80/198 [00:08<00:06, 18.76it/s]
403
  42%|████▏ | 83/198 [00:08<00:07, 16.28it/s]
404
 
405
+
406
  42%|████▏ | 83/198 [00:08<00:07, 16.28it/s]
407
 
408
+
409
  42%|████▏ | 83/198 [00:08<00:07, 16.28it/s]
410
 
411
+
412
  42%|████▏ | 83/198 [00:08<00:07, 16.28it/s]
413
 
414
+
415
  42%|████▏ | 83/198 [00:08<00:07, 16.28it/s]
416
  44%|████▍ | 87/198 [00:08<00:05, 18.73it/s]
417
 
418
+
419
  44%|████▍ | 87/198 [00:08<00:05, 18.73it/s]
420
 
421
+
422
  44%|████▍ | 87/198 [00:09<00:05, 18.73it/s]
423
 
424
+
425
  44%|████▍ | 87/198 [00:09<00:05, 18.73it/s]
426
  45%|████▌ | 90/198 [00:09<00:06, 16.24it/s]
427
 
428
+
429
  45%|████▌ | 90/198 [00:09<00:06, 16.24it/s]
430
 
431
+
432
  45%|████▌ | 90/198 [00:09<00:06, 16.24it/s]
433
 
434
+
435
  45%|████▌ | 90/198 [00:09<00:06, 16.24it/s]
436
 
437
+
438
  45%|████▌ | 90/198 [00:09<00:06, 16.24it/s]
439
  47%|████▋ | 94/198 [00:09<00:05, 17.41it/s]
440
 
441
+
442
  47%|████▋ | 94/198 [00:09<00:05, 17.41it/s]
443
 
444
+
445
  47%|████▋ | 94/198 [00:09<00:05, 17.41it/s]
446
  48%|████▊ | 96/198 [00:09<00:07, 14.21it/s]
447
 
448
+
449
  48%|████▊ | 96/198 [00:09<00:07, 14.21it/s]
450
 
451
+
452
  48%|████▊ | 96/198 [00:09<00:07, 14.21it/s]
453
 
454
+
455
  48%|████▊ | 96/198 [00:09<00:07, 14.21it/s]
456
 
457
+
458
  48%|████▊ | 96/198 [00:09<00:07, 14.21it/s]
459
 
460
+
461
  48%|████▊ | 96/198 [00:09<00:07, 14.21it/s]
462
  51%|█████ | 101/198 [00:09<00:05, 18.90it/s]
463
 
464
+
465
  51%|█████ | 101/198 [00:09<00:05, 18.90it/s]
466
 
467
+
468
  51%|█████ | 101/198 [00:09<00:05, 18.90it/s]
469
 
470
+
471
  51%|█████ | 101/198 [00:09<00:05, 18.90it/s]
472
  53%|█████▎ | 104/198 [00:09<00:05, 16.42it/s]
473
 
474
+
475
  53%|█████▎ | 104/198 [00:09<00:05, 16.42it/s]
476
 
477
+
478
  53%|█████▎ | 104/198 [00:09<00:05, 16.42it/s]
479
 
480
+
481
  53%|█████▎ | 104/198 [00:09<00:05, 16.42it/s]
482
 
483
+
484
  53%|█████▎ | 104/198 [00:09<00:05, 16.42it/s]
485
  55%|█████▍ | 108/198 [00:10<00:04, 18.92it/s]
486
 
487
+
488
  55%|█████▍ | 108/198 [00:10<00:04, 18.92it/s]
489
 
490
+
491
  55%|█████▍ | 108/198 [00:10<00:04, 18.92it/s]
492
 
493
+
494
  55%|█████▍ | 108/198 [00:10<00:04, 18.92it/s]
495
  56%|█████▌ | 111/198 [00:10<00:05, 16.19it/s]
496
 
497
+
498
  56%|█████▌ | 111/198 [00:10<00:05, 16.19it/s]
499
 
500
+
501
  56%|█████▌ | 111/198 [00:10<00:05, 16.19it/s]
502
 
503
+
504
  56%|█████▌ | 111/198 [00:10<00:05, 16.19it/s]
505
 
506
+
507
  56%|█████▌ | 111/198 [00:10<00:05, 16.19it/s]
508
  58%|█████▊ | 115/198 [00:10<00:04, 17.36it/s]
509
 
510
+
511
  58%|█████▊ | 115/198 [00:10<00:04, 17.36it/s]
512
 
513
+
514
  58%|█████▊ | 115/198 [00:10<00:04, 17.36it/s]
515
  59%|█████▉ | 117/198 [00:10<00:05, 14.20it/s]
516
 
517
+
518
  59%|█████▉ | 117/198 [00:10<00:05, 14.20it/s]
519
 
520
+
521
  59%|█████▉ | 117/198 [00:10<00:05, 14.20it/s]
522
 
523
+
524
  59%|█████▉ | 117/198 [00:10<00:05, 14.20it/s]
525
 
526
+
527
  59%|█████▉ | 117/198 [00:10<00:05, 14.20it/s]
528
 
529
+
530
  59%|█████▉ | 117/198 [00:10<00:05, 14.20it/s]
531
  62%|██████▏ | 122/198 [00:10<00:04, 18.89it/s]
532
 
533
+
534
  62%|██████▏ | 122/198 [00:11<00:04, 18.89it/s]
535
 
536
+
537
  62%|██████▏ | 122/198 [00:11<00:04, 18.89it/s]
538
 
539
+
540
  62%|██████▏ | 122/198 [00:11<00:04, 18.89it/s]
541
  63%|██████▎ | 125/198 [00:11<00:04, 16.47it/s]
542
 
543
+
544
  63%|██████▎ | 125/198 [00:11<00:04, 16.47it/s]
545
 
546
+
547
  63%|██████▎ | 125/198 [00:11<00:04, 16.47it/s]
548
 
549
+
550
  63%|██████▎ | 125/198 [00:11<00:04, 16.47it/s]
551
 
552
+
553
  63%|██████▎ | 125/198 [00:11<00:04, 16.47it/s]
554
  65%|██████▌ | 129/198 [00:11<00:03, 19.03it/s]
555
 
556
+
557
  65%|██████▌ | 129/198 [00:11<00:03, 19.03it/s]
558
 
559
+
560
  65%|██████▌ | 129/198 [00:11<00:03, 19.03it/s]
561
 
562
+
563
  65%|██████▌ | 129/198 [00:11<00:03, 19.03it/s]
564
  67%|██████▋ | 132/198 [00:11<00:04, 16.50it/s]
565
 
566
+
567
  67%|██████▋ | 132/198 [00:11<00:04, 16.50it/s]
568
 
569
+
570
  67%|██████▋ | 132/198 [00:11<00:04, 16.50it/s]
571
 
572
+
573
  67%|██████▋ | 132/198 [00:11<00:04, 16.50it/s]
574
 
575
+
576
  67%|██████▋ | 132/198 [00:11<00:04, 16.50it/s]
577
  69%|██████▊ | 136/198 [00:11<00:03, 17.60it/s]
578
 
579
+
580
  69%|██████▊ | 136/198 [00:11<00:03, 17.60it/s]
581
 
582
+
583
  69%|██████▊ | 136/198 [00:11<00:03, 17.60it/s]
584
  70%|██████▉ | 138/198 [00:11<00:04, 14.32it/s]
585
 
586
+
587
  70%|██████▉ | 138/198 [00:11<00:04, 14.32it/s]
588
 
589
+
590
  70%|██████▉ | 138/198 [00:12<00:04, 14.32it/s]
591
 
592
+
593
  70%|██████▉ | 138/198 [00:12<00:04, 14.32it/s]
594
 
595
+
596
  70%|██████▉ | 138/198 [00:12<00:04, 14.32it/s]
597
 
598
+
599
  70%|██████▉ | 138/198 [00:12<00:04, 14.32it/s]
600
  72%|███████▏ | 143/198 [00:12<00:02, 19.01it/s]
601
 
602
+
603
  72%|███████▏ | 143/198 [00:12<00:02, 19.01it/s]
604
 
605
+
606
  72%|███████▏ | 143/198 [00:12<00:02, 19.01it/s]
607
 
608
+
609
  72%|███████▏ | 143/198 [00:12<00:02, 19.01it/s]
610
  74%|███████▎ | 146/198 [00:12<00:03, 16.53it/s]
611
 
612
+
613
  74%|███████▎ | 146/198 [00:12<00:03, 16.53it/s]
614
 
615
+
616
  74%|███████▎ | 146/198 [00:12<00:03, 16.53it/s]
617
 
618
+
619
  74%|███████▎ | 146/198 [00:12<00:03, 16.53it/s]
620
 
621
+
622
  74%|███████▎ | 146/198 [00:12<00:03, 16.53it/s]
623
  76%|███████▌ | 150/198 [00:12<00:02, 19.07it/s]
624
 
625
+
626
  76%|███████▌ | 150/198 [00:12<00:02, 19.07it/s]
627
 
628
+
629
  76%|███████▌ | 150/198 [00:12<00:02, 19.07it/s]
630
 
631
+
632
  76%|███████▌ | 150/198 [00:12<00:02, 19.07it/s]
633
  77%|███████▋ | 153/198 [00:12<00:02, 16.50it/s]
634
 
635
+
636
  77%|███████▋ | 153/198 [00:12<00:02, 16.50it/s]
637
 
638
+
639
  77%|███████▋ | 153/198 [00:12<00:02, 16.50it/s]
640
 
641
+
642
  77%|███████▋ | 153/198 [00:12<00:02, 16.50it/s]
643
 
644
+
645
  77%|███████▋ | 153/198 [00:12<00:02, 16.50it/s]
646
  79%|███████▉ | 157/198 [00:12<00:02, 17.62it/s]
647
 
648
+
649
  79%|███████▉ | 157/198 [00:13<00:02, 17.62it/s]
650
 
651
+
652
  79%|███████▉ | 157/198 [00:13<00:02, 17.62it/s]
653
  80%|████████ | 159/198 [00:13<00:02, 14.34it/s]
654
 
655
+
656
  80%|████████ | 159/198 [00:13<00:02, 14.34it/s]
657
 
658
+
659
  80%|████████ | 159/198 [00:13<00:02, 14.34it/s]
660
 
661
+
662
  80%|████████ | 159/198 [00:13<00:02, 14.34it/s]
663
 
664
+
665
  80%|████████ | 159/198 [00:13<00:02, 14.34it/s]
666
 
667
+
668
  80%|████████ | 159/198 [00:13<00:02, 14.34it/s]
669
  83%|████████▎ | 164/198 [00:13<00:01, 19.03it/s]
670
 
671
+
672
  83%|████████▎ | 164/198 [00:13<00:01, 19.03it/s]
673
 
674
+
675
  83%|████████▎ | 164/198 [00:13<00:01, 19.03it/s]
676
 
677
+
678
  83%|████████▎ | 164/198 [00:13<00:01, 19.03it/s]
679
  84%|████████▍ | 167/198 [00:13<00:01, 16.56it/s]
680
 
681
+
682
  84%|████████▍ | 167/198 [00:13<00:01, 16.56it/s]
683
 
684
+
685
  84%|████████▍ | 167/198 [00:13<00:01, 16.56it/s]
686
 
687
+
688
  84%|████████▍ | 167/198 [00:13<00:01, 16.56it/s]
689
 
690
+
691
  84%|████████▍ | 167/198 [00:13<00:01, 16.56it/s]
692
  86%|████████▋ | 171/198 [00:13<00:01, 19.12it/s]
693
 
694
+
695
  86%|████████▋ | 171/198 [00:13<00:01, 19.12it/s]
696
 
697
+
698
  86%|████████▋ | 171/198 [00:13<00:01, 19.12it/s]
699
 
700
+
701
  86%|████████▋ | 171/198 [00:14<00:01, 19.12it/s]
702
  88%|████████▊ | 174/198 [00:14<00:01, 16.58it/s]
703
 
704
+
705
  88%|████████▊ | 174/198 [00:14<00:01, 16.58it/s]
706
 
707
+
708
  88%|████████▊ | 174/198 [00:14<00:01, 16.58it/s]
709
 
710
+
711
  88%|████████▊ | 174/198 [00:14<00:01, 16.58it/s]
712
 
713
+
714
  88%|████████▊ | 174/198 [00:14<00:01, 16.58it/s]
715
  90%|████████▉ | 178/198 [00:14<00:01, 17.67it/s]
716
 
717
+
718
  90%|████████▉ | 178/198 [00:14<00:01, 17.67it/s]
719
 
720
+
721
  90%|████████▉ | 178/198 [00:14<00:01, 17.67it/s]
722
  91%|█████████ | 180/198 [00:14<00:01, 14.28it/s]
723
 
724
+
725
  91%|█████████ | 180/198 [00:14<00:01, 14.28it/s]
726
 
727
+
728
  91%|█████████ | 180/198 [00:14<00:01, 14.28it/s]
729
 
730
+
731
  91%|█████████ | 180/198 [00:14<00:01, 14.28it/s]
732
 
733
+
734
  91%|█████████ | 180/198 [00:14<00:01, 14.28it/s]
735
 
736
+
737
  91%|█████████ | 180/198 [00:14<00:01, 14.28it/s]
738
  93%|█████████▎| 185/198 [00:14<00:00, 18.87it/s]
739
 
740
+
741
  93%|█████████▎| 185/198 [00:14<00:00, 18.87it/s]
742
 
743
+
744
  93%|█████████▎| 185/198 [00:14<00:00, 18.87it/s]
745
 
746
+
747
  93%|█████████▎| 185/198 [00:14<00:00, 18.87it/s]
748
  95%|█████████▍| 188/198 [00:14<00:00, 16.23it/s]
749
 
750
+
751
  95%|█████████▍| 188/198 [00:14<00:00, 16.23it/s]
752
 
753
+
754
  95%|█████████▍| 188/198 [00:14<00:00, 16.23it/s]
755
 
756
+
757
  95%|█████████▍| 188/198 [00:14<00:00, 16.23it/s]
758
 
759
+
760
  95%|█████████▍| 188/198 [00:14<00:00, 16.23it/s]
761
  97%|█████████▋| 192/198 [00:14<00:00, 18.80it/s]
762
 
763
+
764
  97%|█████████▋| 192/198 [00:15<00:00, 18.80it/s]
765
 
766
+
767
  97%|█████████▋| 192/198 [00:15<00:00, 18.80it/s]
768
 
769
+
770
  97%|█████████▋| 192/198 [00:15<00:00, 18.80it/s]
771
  98%|█████████▊| 195/198 [00:15<00:00, 16.37it/s]
772
 
773
+
774
  98%|█████████▊| 195/198 [00:15<00:00, 16.37it/s]
775
 
776
+
777
  98%|█████████▊| 195/198 [00:15<00:00, 16.37it/s]
778
 
779
+
780
  98%|█████████▊| 195/198 [00:15<00:00, 16.37it/s]
781
+ [2024-06-06 22:38:14] INFO huggingface_loader.py:197: Unloading HF weight file: /ssd2/models/Qwen2-1.5B-Instruct/model.safetensors
782
+ [2024-06-06 22:38:14] INFO stats.py:77: Time usage: HF loading: 1.873 sec; Pre-quantization mapping: 5.689 sec; Quantization: 0.000 sec
783
+ [2024-06-06 22:38:14] INFO stats.py:91: RAM usage: Peak RAM: 5.751 GB. Total bytes loaded from disk: 5.751 GB
784
+ [2024-06-06 22:38:14] INFO convert_weight.py:155: Parameter size after quantization: 2.875 GB
785
+ [2024-06-06 22:38:14] INFO convert_weight.py:160: Total parameters: 1,543,714,304
786
+ [2024-06-06 22:38:14] INFO convert_weight.py:161: Bits per parameter: 16.000
787
+ [2024-06-06 22:38:14] INFO convert_weight.py:166: Saved to directory: /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC
788
+
789
+ All finished, 67 total shards committed, record saved to /models/mlc-delivery/hf/mlc-ai/Qwen2-1.5B-Instruct-q0f16-MLC/ndarray-cache.json
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
mlc-chat-config.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "qwen2",
4
+ "quantization": "q0f16",
5
+ "model_config": {
6
+ "hidden_act": "silu",
7
+ "hidden_size": 1536,
8
+ "intermediate_size": 8960,
9
+ "num_attention_heads": 12,
10
+ "num_hidden_layers": 28,
11
+ "num_key_value_heads": 2,
12
+ "rms_norm_eps": 1e-06,
13
+ "rope_theta": 1000000.0,
14
+ "vocab_size": 151936,
15
+ "tie_word_embeddings": true,
16
+ "context_window_size": 32768,
17
+ "prefill_chunk_size": 2048,
18
+ "tensor_parallel_shards": 1,
19
+ "head_dim": 128,
20
+ "dtype": "float32",
21
+ "max_batch_size": 80
22
+ },
23
+ "vocab_size": 151936,
24
+ "context_window_size": 32768,
25
+ "sliding_window_size": -1,
26
+ "prefill_chunk_size": 2048,
27
+ "attention_sink_size": -1,
28
+ "tensor_parallel_shards": 1,
29
+ "temperature": 0.7,
30
+ "presence_penalty": 0.0,
31
+ "frequency_penalty": 0.0,
32
+ "repetition_penalty": 1.1,
33
+ "top_p": 0.8,
34
+ "tokenizer_files": [
35
+ "tokenizer.json",
36
+ "vocab.json",
37
+ "merges.txt",
38
+ "tokenizer_config.json"
39
+ ],
40
+ "tokenizer_info": {
41
+ "token_postproc_method": "byte_level",
42
+ "prepend_space_in_encode": false,
43
+ "strip_space_in_decode": false
44
+ },
45
+ "conv_template": {
46
+ "name": "chatml",
47
+ "system_template": "<|im_start|>system\n{system_message}",
48
+ "system_message": "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.",
49
+ "system_prefix_token_ids": null,
50
+ "add_role_after_system_message": true,
51
+ "roles": {
52
+ "user": "<|im_start|>user",
53
+ "assistant": "<|im_start|>assistant"
54
+ },
55
+ "role_templates": {
56
+ "user": "{user_message}",
57
+ "assistant": "{assistant_message}",
58
+ "tool": "{tool_message}"
59
+ },
60
+ "messages": [],
61
+ "seps": [
62
+ "<|im_end|>\n"
63
+ ],
64
+ "role_content_sep": "\n",
65
+ "role_empty_sep": "\n",
66
+ "stop_str": [
67
+ "<|im_end|>"
68
+ ],
69
+ "stop_token_ids": [
70
+ 2
71
+ ],
72
+ "function_string": "",
73
+ "use_function_calling": false
74
+ },
75
+ "pad_token_id": 151643,
76
+ "bos_token_id": 151643,
77
+ "eos_token_id": [
78
+ 151645,
79
+ 151643
80
+ ]
81
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,2638 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 198,
4
+ "ParamBytes": 3087428608.0,
5
+ "BitsPerParam": 16.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 466747392,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.weight",
15
+ "shape": [
16
+ 151936,
17
+ 1536
18
+ ],
19
+ "dtype": "float16",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 466747392,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "4004155c68c07080f39231f289b9c9d3"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 55050240,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
34
+ "shape": [
35
+ 17920,
36
+ 1536
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 55050240,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "8d9c6137ab1639c5864a3e4f0d47a945"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 27535360,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.input_layernorm.weight",
53
+ "shape": [
54
+ 1536
55
+ ],
56
+ "dtype": "float16",
57
+ "format": "f32-to-bf16",
58
+ "nbytes": 3072,
59
+ "byteOffset": 0
60
+ },
61
+ {
62
+ "name": "model.layers.0.mlp.down_proj.weight",
63
+ "shape": [
64
+ 1536,
65
+ 8960
66
+ ],
67
+ "dtype": "float16",
68
+ "format": "f32-to-bf16",
69
+ "nbytes": 27525120,
70
+ "byteOffset": 3072
71
+ },
72
+ {
73
+ "name": "model.layers.0.post_attention_layernorm.weight",
74
+ "shape": [
75
+ 1536
76
+ ],
77
+ "dtype": "float16",
78
+ "format": "f32-to-bf16",
79
+ "nbytes": 3072,
80
+ "byteOffset": 27528192
81
+ },
82
+ {
83
+ "name": "model.layers.0.self_attn.c_attn.bias",
84
+ "shape": [
85
+ 2048
86
+ ],
87
+ "dtype": "float16",
88
+ "format": "f32-to-bf16",
89
+ "nbytes": 4096,
90
+ "byteOffset": 27531264
91
+ }
92
+ ],
93
+ "md5sum": "9fd8f6531c7b68276a0e21513cf4d24c"
94
+ },
95
+ {
96
+ "dataPath": "params_shard_3.bin",
97
+ "format": "raw-shard",
98
+ "nbytes": 27525120,
99
+ "records": [
100
+ {
101
+ "name": "model.layers.1.mlp.down_proj.weight",
102
+ "shape": [
103
+ 1536,
104
+ 8960
105
+ ],
106
+ "dtype": "float16",
107
+ "format": "f32-to-bf16",
108
+ "nbytes": 27525120,
109
+ "byteOffset": 0
110
+ }
111
+ ],
112
+ "md5sum": "db7e25bc89a29b9b7bbf6714baa3dc24"
113
+ },
114
+ {
115
+ "dataPath": "params_shard_4.bin",
116
+ "format": "raw-shard",
117
+ "nbytes": 55050240,
118
+ "records": [
119
+ {
120
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
121
+ "shape": [
122
+ 17920,
123
+ 1536
124
+ ],
125
+ "dtype": "float16",
126
+ "format": "f32-to-bf16",
127
+ "nbytes": 55050240,
128
+ "byteOffset": 0
129
+ }
130
+ ],
131
+ "md5sum": "13cf30b3f20ad035712c136f18f7d38d"
132
+ },
133
+ {
134
+ "dataPath": "params_shard_5.bin",
135
+ "format": "raw-shard",
136
+ "nbytes": 27525120,
137
+ "records": [
138
+ {
139
+ "name": "model.layers.10.mlp.down_proj.weight",
140
+ "shape": [
141
+ 1536,
142
+ 8960
143
+ ],
144
+ "dtype": "float16",
145
+ "format": "f32-to-bf16",
146
+ "nbytes": 27525120,
147
+ "byteOffset": 0
148
+ }
149
+ ],
150
+ "md5sum": "dfd4e0fba5875fd1d7db311377b87fe5"
151
+ },
152
+ {
153
+ "dataPath": "params_shard_6.bin",
154
+ "format": "raw-shard",
155
+ "nbytes": 55050240,
156
+ "records": [
157
+ {
158
+ "name": "model.layers.10.mlp.gate_up_proj.weight",
159
+ "shape": [
160
+ 17920,
161
+ 1536
162
+ ],
163
+ "dtype": "float16",
164
+ "format": "f32-to-bf16",
165
+ "nbytes": 55050240,
166
+ "byteOffset": 0
167
+ }
168
+ ],
169
+ "md5sum": "d3c8c121be697e30d9da502d459908dc"
170
+ },
171
+ {
172
+ "dataPath": "params_shard_7.bin",
173
+ "format": "raw-shard",
174
+ "nbytes": 27525120,
175
+ "records": [
176
+ {
177
+ "name": "model.layers.11.mlp.down_proj.weight",
178
+ "shape": [
179
+ 1536,
180
+ 8960
181
+ ],
182
+ "dtype": "float16",
183
+ "format": "f32-to-bf16",
184
+ "nbytes": 27525120,
185
+ "byteOffset": 0
186
+ }
187
+ ],
188
+ "md5sum": "c303778173a91bfdc754b414416c5160"
189
+ },
190
+ {
191
+ "dataPath": "params_shard_8.bin",
192
+ "format": "raw-shard",
193
+ "nbytes": 55050240,
194
+ "records": [
195
+ {
196
+ "name": "model.layers.11.mlp.gate_up_proj.weight",
197
+ "shape": [
198
+ 17920,
199
+ 1536
200
+ ],
201
+ "dtype": "float16",
202
+ "format": "f32-to-bf16",
203
+ "nbytes": 55050240,
204
+ "byteOffset": 0
205
+ }
206
+ ],
207
+ "md5sum": "ab163c3a0afabe864dc1dfaeae1a9908"
208
+ },
209
+ {
210
+ "dataPath": "params_shard_9.bin",
211
+ "format": "raw-shard",
212
+ "nbytes": 33060864,
213
+ "records": [
214
+ {
215
+ "name": "model.layers.0.self_attn.c_attn.weight",
216
+ "shape": [
217
+ 2048,
218
+ 1536
219
+ ],
220
+ "dtype": "float16",
221
+ "format": "f32-to-bf16",
222
+ "nbytes": 6291456,
223
+ "byteOffset": 0
224
+ },
225
+ {
226
+ "name": "model.layers.0.self_attn.o_proj.weight",
227
+ "shape": [
228
+ 1536,
229
+ 1536
230
+ ],
231
+ "dtype": "float16",
232
+ "format": "f32-to-bf16",
233
+ "nbytes": 4718592,
234
+ "byteOffset": 6291456
235
+ },
236
+ {
237
+ "name": "model.layers.1.input_layernorm.weight",
238
+ "shape": [
239
+ 1536
240
+ ],
241
+ "dtype": "float16",
242
+ "format": "f32-to-bf16",
243
+ "nbytes": 3072,
244
+ "byteOffset": 11010048
245
+ },
246
+ {
247
+ "name": "model.layers.1.post_attention_layernorm.weight",
248
+ "shape": [
249
+ 1536
250
+ ],
251
+ "dtype": "float16",
252
+ "format": "f32-to-bf16",
253
+ "nbytes": 3072,
254
+ "byteOffset": 11013120
255
+ },
256
+ {
257
+ "name": "model.layers.1.self_attn.c_attn.bias",
258
+ "shape": [
259
+ 2048
260
+ ],
261
+ "dtype": "float16",
262
+ "format": "f32-to-bf16",
263
+ "nbytes": 4096,
264
+ "byteOffset": 11016192
265
+ },
266
+ {
267
+ "name": "model.layers.1.self_attn.c_attn.weight",
268
+ "shape": [
269
+ 2048,
270
+ 1536
271
+ ],
272
+ "dtype": "float16",
273
+ "format": "f32-to-bf16",
274
+ "nbytes": 6291456,
275
+ "byteOffset": 11020288
276
+ },
277
+ {
278
+ "name": "model.layers.1.self_attn.o_proj.weight",
279
+ "shape": [
280
+ 1536,
281
+ 1536
282
+ ],
283
+ "dtype": "float16",
284
+ "format": "f32-to-bf16",
285
+ "nbytes": 4718592,
286
+ "byteOffset": 17311744
287
+ },
288
+ {
289
+ "name": "model.layers.10.input_layernorm.weight",
290
+ "shape": [
291
+ 1536
292
+ ],
293
+ "dtype": "float16",
294
+ "format": "f32-to-bf16",
295
+ "nbytes": 3072,
296
+ "byteOffset": 22030336
297
+ },
298
+ {
299
+ "name": "model.layers.10.post_attention_layernorm.weight",
300
+ "shape": [
301
+ 1536
302
+ ],
303
+ "dtype": "float16",
304
+ "format": "f32-to-bf16",
305
+ "nbytes": 3072,
306
+ "byteOffset": 22033408
307
+ },
308
+ {
309
+ "name": "model.layers.10.self_attn.c_attn.bias",
310
+ "shape": [
311
+ 2048
312
+ ],
313
+ "dtype": "float16",
314
+ "format": "f32-to-bf16",
315
+ "nbytes": 4096,
316
+ "byteOffset": 22036480
317
+ },
318
+ {
319
+ "name": "model.layers.10.self_attn.c_attn.weight",
320
+ "shape": [
321
+ 2048,
322
+ 1536
323
+ ],
324
+ "dtype": "float16",
325
+ "format": "f32-to-bf16",
326
+ "nbytes": 6291456,
327
+ "byteOffset": 22040576
328
+ },
329
+ {
330
+ "name": "model.layers.10.self_attn.o_proj.weight",
331
+ "shape": [
332
+ 1536,
333
+ 1536
334
+ ],
335
+ "dtype": "float16",
336
+ "format": "f32-to-bf16",
337
+ "nbytes": 4718592,
338
+ "byteOffset": 28332032
339
+ },
340
+ {
341
+ "name": "model.layers.11.input_layernorm.weight",
342
+ "shape": [
343
+ 1536
344
+ ],
345
+ "dtype": "float16",
346
+ "format": "f32-to-bf16",
347
+ "nbytes": 3072,
348
+ "byteOffset": 33050624
349
+ },
350
+ {
351
+ "name": "model.layers.11.post_attention_layernorm.weight",
352
+ "shape": [
353
+ 1536
354
+ ],
355
+ "dtype": "float16",
356
+ "format": "f32-to-bf16",
357
+ "nbytes": 3072,
358
+ "byteOffset": 33053696
359
+ },
360
+ {
361
+ "name": "model.layers.11.self_attn.c_attn.bias",
362
+ "shape": [
363
+ 2048
364
+ ],
365
+ "dtype": "float16",
366
+ "format": "f32-to-bf16",
367
+ "nbytes": 4096,
368
+ "byteOffset": 33056768
369
+ }
370
+ ],
371
+ "md5sum": "2e9b51ed2ac487ad15028dc401beeb7d"
372
+ },
373
+ {
374
+ "dataPath": "params_shard_10.bin",
375
+ "format": "raw-shard",
376
+ "nbytes": 27525120,
377
+ "records": [
378
+ {
379
+ "name": "model.layers.12.mlp.down_proj.weight",
380
+ "shape": [
381
+ 1536,
382
+ 8960
383
+ ],
384
+ "dtype": "float16",
385
+ "format": "f32-to-bf16",
386
+ "nbytes": 27525120,
387
+ "byteOffset": 0
388
+ }
389
+ ],
390
+ "md5sum": "cc304778043000c0f442d99b072b593c"
391
+ },
392
+ {
393
+ "dataPath": "params_shard_11.bin",
394
+ "format": "raw-shard",
395
+ "nbytes": 55050240,
396
+ "records": [
397
+ {
398
+ "name": "model.layers.12.mlp.gate_up_proj.weight",
399
+ "shape": [
400
+ 17920,
401
+ 1536
402
+ ],
403
+ "dtype": "float16",
404
+ "format": "f32-to-bf16",
405
+ "nbytes": 55050240,
406
+ "byteOffset": 0
407
+ }
408
+ ],
409
+ "md5sum": "26700b8899f4d41d92f8439f3e0f1c28"
410
+ },
411
+ {
412
+ "dataPath": "params_shard_12.bin",
413
+ "format": "raw-shard",
414
+ "nbytes": 27525120,
415
+ "records": [
416
+ {
417
+ "name": "model.layers.13.mlp.down_proj.weight",
418
+ "shape": [
419
+ 1536,
420
+ 8960
421
+ ],
422
+ "dtype": "float16",
423
+ "format": "f32-to-bf16",
424
+ "nbytes": 27525120,
425
+ "byteOffset": 0
426
+ }
427
+ ],
428
+ "md5sum": "2ddac3acda693d7656dc82d1cb44a5fa"
429
+ },
430
+ {
431
+ "dataPath": "params_shard_13.bin",
432
+ "format": "raw-shard",
433
+ "nbytes": 55050240,
434
+ "records": [
435
+ {
436
+ "name": "model.layers.13.mlp.gate_up_proj.weight",
437
+ "shape": [
438
+ 17920,
439
+ 1536
440
+ ],
441
+ "dtype": "float16",
442
+ "format": "f32-to-bf16",
443
+ "nbytes": 55050240,
444
+ "byteOffset": 0
445
+ }
446
+ ],
447
+ "md5sum": "00477d2aa802e741a31ae46a2af61519"
448
+ },
449
+ {
450
+ "dataPath": "params_shard_14.bin",
451
+ "format": "raw-shard",
452
+ "nbytes": 27525120,
453
+ "records": [
454
+ {
455
+ "name": "model.layers.14.mlp.down_proj.weight",
456
+ "shape": [
457
+ 1536,
458
+ 8960
459
+ ],
460
+ "dtype": "float16",
461
+ "format": "f32-to-bf16",
462
+ "nbytes": 27525120,
463
+ "byteOffset": 0
464
+ }
465
+ ],
466
+ "md5sum": "a0eb921170b36a016d3b8656caa53b8d"
467
+ },
468
+ {
469
+ "dataPath": "params_shard_15.bin",
470
+ "format": "raw-shard",
471
+ "nbytes": 55050240,
472
+ "records": [
473
+ {
474
+ "name": "model.layers.14.mlp.gate_up_proj.weight",
475
+ "shape": [
476
+ 17920,
477
+ 1536
478
+ ],
479
+ "dtype": "float16",
480
+ "format": "f32-to-bf16",
481
+ "nbytes": 55050240,
482
+ "byteOffset": 0
483
+ }
484
+ ],
485
+ "md5sum": "d80214adb8ccfa0343cf8adb0caec5a4"
486
+ },
487
+ {
488
+ "dataPath": "params_shard_16.bin",
489
+ "format": "raw-shard",
490
+ "nbytes": 33060864,
491
+ "records": [
492
+ {
493
+ "name": "model.layers.11.self_attn.c_attn.weight",
494
+ "shape": [
495
+ 2048,
496
+ 1536
497
+ ],
498
+ "dtype": "float16",
499
+ "format": "f32-to-bf16",
500
+ "nbytes": 6291456,
501
+ "byteOffset": 0
502
+ },
503
+ {
504
+ "name": "model.layers.11.self_attn.o_proj.weight",
505
+ "shape": [
506
+ 1536,
507
+ 1536
508
+ ],
509
+ "dtype": "float16",
510
+ "format": "f32-to-bf16",
511
+ "nbytes": 4718592,
512
+ "byteOffset": 6291456
513
+ },
514
+ {
515
+ "name": "model.layers.12.input_layernorm.weight",
516
+ "shape": [
517
+ 1536
518
+ ],
519
+ "dtype": "float16",
520
+ "format": "f32-to-bf16",
521
+ "nbytes": 3072,
522
+ "byteOffset": 11010048
523
+ },
524
+ {
525
+ "name": "model.layers.12.post_attention_layernorm.weight",
526
+ "shape": [
527
+ 1536
528
+ ],
529
+ "dtype": "float16",
530
+ "format": "f32-to-bf16",
531
+ "nbytes": 3072,
532
+ "byteOffset": 11013120
533
+ },
534
+ {
535
+ "name": "model.layers.12.self_attn.c_attn.bias",
536
+ "shape": [
537
+ 2048
538
+ ],
539
+ "dtype": "float16",
540
+ "format": "f32-to-bf16",
541
+ "nbytes": 4096,
542
+ "byteOffset": 11016192
543
+ },
544
+ {
545
+ "name": "model.layers.12.self_attn.c_attn.weight",
546
+ "shape": [
547
+ 2048,
548
+ 1536
549
+ ],
550
+ "dtype": "float16",
551
+ "format": "f32-to-bf16",
552
+ "nbytes": 6291456,
553
+ "byteOffset": 11020288
554
+ },
555
+ {
556
+ "name": "model.layers.12.self_attn.o_proj.weight",
557
+ "shape": [
558
+ 1536,
559
+ 1536
560
+ ],
561
+ "dtype": "float16",
562
+ "format": "f32-to-bf16",
563
+ "nbytes": 4718592,
564
+ "byteOffset": 17311744
565
+ },
566
+ {
567
+ "name": "model.layers.13.input_layernorm.weight",
568
+ "shape": [
569
+ 1536
570
+ ],
571
+ "dtype": "float16",
572
+ "format": "f32-to-bf16",
573
+ "nbytes": 3072,
574
+ "byteOffset": 22030336
575
+ },
576
+ {
577
+ "name": "model.layers.13.post_attention_layernorm.weight",
578
+ "shape": [
579
+ 1536
580
+ ],
581
+ "dtype": "float16",
582
+ "format": "f32-to-bf16",
583
+ "nbytes": 3072,
584
+ "byteOffset": 22033408
585
+ },
586
+ {
587
+ "name": "model.layers.13.self_attn.c_attn.bias",
588
+ "shape": [
589
+ 2048
590
+ ],
591
+ "dtype": "float16",
592
+ "format": "f32-to-bf16",
593
+ "nbytes": 4096,
594
+ "byteOffset": 22036480
595
+ },
596
+ {
597
+ "name": "model.layers.13.self_attn.c_attn.weight",
598
+ "shape": [
599
+ 2048,
600
+ 1536
601
+ ],
602
+ "dtype": "float16",
603
+ "format": "f32-to-bf16",
604
+ "nbytes": 6291456,
605
+ "byteOffset": 22040576
606
+ },
607
+ {
608
+ "name": "model.layers.13.self_attn.o_proj.weight",
609
+ "shape": [
610
+ 1536,
611
+ 1536
612
+ ],
613
+ "dtype": "float16",
614
+ "format": "f32-to-bf16",
615
+ "nbytes": 4718592,
616
+ "byteOffset": 28332032
617
+ },
618
+ {
619
+ "name": "model.layers.14.input_layernorm.weight",
620
+ "shape": [
621
+ 1536
622
+ ],
623
+ "dtype": "float16",
624
+ "format": "f32-to-bf16",
625
+ "nbytes": 3072,
626
+ "byteOffset": 33050624
627
+ },
628
+ {
629
+ "name": "model.layers.14.post_attention_layernorm.weight",
630
+ "shape": [
631
+ 1536
632
+ ],
633
+ "dtype": "float16",
634
+ "format": "f32-to-bf16",
635
+ "nbytes": 3072,
636
+ "byteOffset": 33053696
637
+ },
638
+ {
639
+ "name": "model.layers.14.self_attn.c_attn.bias",
640
+ "shape": [
641
+ 2048
642
+ ],
643
+ "dtype": "float16",
644
+ "format": "f32-to-bf16",
645
+ "nbytes": 4096,
646
+ "byteOffset": 33056768
647
+ }
648
+ ],
649
+ "md5sum": "836ad76f4bf6b3ea6142a57d73863eab"
650
+ },
651
+ {
652
+ "dataPath": "params_shard_17.bin",
653
+ "format": "raw-shard",
654
+ "nbytes": 27525120,
655
+ "records": [
656
+ {
657
+ "name": "model.layers.15.mlp.down_proj.weight",
658
+ "shape": [
659
+ 1536,
660
+ 8960
661
+ ],
662
+ "dtype": "float16",
663
+ "format": "f32-to-bf16",
664
+ "nbytes": 27525120,
665
+ "byteOffset": 0
666
+ }
667
+ ],
668
+ "md5sum": "69c8ebf98b69e0a7bed86b1b095d4e58"
669
+ },
670
+ {
671
+ "dataPath": "params_shard_18.bin",
672
+ "format": "raw-shard",
673
+ "nbytes": 55050240,
674
+ "records": [
675
+ {
676
+ "name": "model.layers.15.mlp.gate_up_proj.weight",
677
+ "shape": [
678
+ 17920,
679
+ 1536
680
+ ],
681
+ "dtype": "float16",
682
+ "format": "f32-to-bf16",
683
+ "nbytes": 55050240,
684
+ "byteOffset": 0
685
+ }
686
+ ],
687
+ "md5sum": "bc5b5e85e1e1ca4a340dc0ae73d0cf70"
688
+ },
689
+ {
690
+ "dataPath": "params_shard_19.bin",
691
+ "format": "raw-shard",
692
+ "nbytes": 27525120,
693
+ "records": [
694
+ {
695
+ "name": "model.layers.16.mlp.down_proj.weight",
696
+ "shape": [
697
+ 1536,
698
+ 8960
699
+ ],
700
+ "dtype": "float16",
701
+ "format": "f32-to-bf16",
702
+ "nbytes": 27525120,
703
+ "byteOffset": 0
704
+ }
705
+ ],
706
+ "md5sum": "1f0d7a92b72e8c084a47173269fabe37"
707
+ },
708
+ {
709
+ "dataPath": "params_shard_20.bin",
710
+ "format": "raw-shard",
711
+ "nbytes": 55050240,
712
+ "records": [
713
+ {
714
+ "name": "model.layers.16.mlp.gate_up_proj.weight",
715
+ "shape": [
716
+ 17920,
717
+ 1536
718
+ ],
719
+ "dtype": "float16",
720
+ "format": "f32-to-bf16",
721
+ "nbytes": 55050240,
722
+ "byteOffset": 0
723
+ }
724
+ ],
725
+ "md5sum": "7868afb9efef145589b5d006eaa1d2cb"
726
+ },
727
+ {
728
+ "dataPath": "params_shard_21.bin",
729
+ "format": "raw-shard",
730
+ "nbytes": 27525120,
731
+ "records": [
732
+ {
733
+ "name": "model.layers.17.mlp.down_proj.weight",
734
+ "shape": [
735
+ 1536,
736
+ 8960
737
+ ],
738
+ "dtype": "float16",
739
+ "format": "f32-to-bf16",
740
+ "nbytes": 27525120,
741
+ "byteOffset": 0
742
+ }
743
+ ],
744
+ "md5sum": "64dd36a133277b89ecb7b753fa45a053"
745
+ },
746
+ {
747
+ "dataPath": "params_shard_22.bin",
748
+ "format": "raw-shard",
749
+ "nbytes": 55050240,
750
+ "records": [
751
+ {
752
+ "name": "model.layers.17.mlp.gate_up_proj.weight",
753
+ "shape": [
754
+ 17920,
755
+ 1536
756
+ ],
757
+ "dtype": "float16",
758
+ "format": "f32-to-bf16",
759
+ "nbytes": 55050240,
760
+ "byteOffset": 0
761
+ }
762
+ ],
763
+ "md5sum": "9ed20f54aebad5411cce28791db1bd4f"
764
+ },
765
+ {
766
+ "dataPath": "params_shard_23.bin",
767
+ "format": "raw-shard",
768
+ "nbytes": 33060864,
769
+ "records": [
770
+ {
771
+ "name": "model.layers.14.self_attn.c_attn.weight",
772
+ "shape": [
773
+ 2048,
774
+ 1536
775
+ ],
776
+ "dtype": "float16",
777
+ "format": "f32-to-bf16",
778
+ "nbytes": 6291456,
779
+ "byteOffset": 0
780
+ },
781
+ {
782
+ "name": "model.layers.14.self_attn.o_proj.weight",
783
+ "shape": [
784
+ 1536,
785
+ 1536
786
+ ],
787
+ "dtype": "float16",
788
+ "format": "f32-to-bf16",
789
+ "nbytes": 4718592,
790
+ "byteOffset": 6291456
791
+ },
792
+ {
793
+ "name": "model.layers.15.input_layernorm.weight",
794
+ "shape": [
795
+ 1536
796
+ ],
797
+ "dtype": "float16",
798
+ "format": "f32-to-bf16",
799
+ "nbytes": 3072,
800
+ "byteOffset": 11010048
801
+ },
802
+ {
803
+ "name": "model.layers.15.post_attention_layernorm.weight",
804
+ "shape": [
805
+ 1536
806
+ ],
807
+ "dtype": "float16",
808
+ "format": "f32-to-bf16",
809
+ "nbytes": 3072,
810
+ "byteOffset": 11013120
811
+ },
812
+ {
813
+ "name": "model.layers.15.self_attn.c_attn.bias",
814
+ "shape": [
815
+ 2048
816
+ ],
817
+ "dtype": "float16",
818
+ "format": "f32-to-bf16",
819
+ "nbytes": 4096,
820
+ "byteOffset": 11016192
821
+ },
822
+ {
823
+ "name": "model.layers.15.self_attn.c_attn.weight",
824
+ "shape": [
825
+ 2048,
826
+ 1536
827
+ ],
828
+ "dtype": "float16",
829
+ "format": "f32-to-bf16",
830
+ "nbytes": 6291456,
831
+ "byteOffset": 11020288
832
+ },
833
+ {
834
+ "name": "model.layers.15.self_attn.o_proj.weight",
835
+ "shape": [
836
+ 1536,
837
+ 1536
838
+ ],
839
+ "dtype": "float16",
840
+ "format": "f32-to-bf16",
841
+ "nbytes": 4718592,
842
+ "byteOffset": 17311744
843
+ },
844
+ {
845
+ "name": "model.layers.16.input_layernorm.weight",
846
+ "shape": [
847
+ 1536
848
+ ],
849
+ "dtype": "float16",
850
+ "format": "f32-to-bf16",
851
+ "nbytes": 3072,
852
+ "byteOffset": 22030336
853
+ },
854
+ {
855
+ "name": "model.layers.16.post_attention_layernorm.weight",
856
+ "shape": [
857
+ 1536
858
+ ],
859
+ "dtype": "float16",
860
+ "format": "f32-to-bf16",
861
+ "nbytes": 3072,
862
+ "byteOffset": 22033408
863
+ },
864
+ {
865
+ "name": "model.layers.16.self_attn.c_attn.bias",
866
+ "shape": [
867
+ 2048
868
+ ],
869
+ "dtype": "float16",
870
+ "format": "f32-to-bf16",
871
+ "nbytes": 4096,
872
+ "byteOffset": 22036480
873
+ },
874
+ {
875
+ "name": "model.layers.16.self_attn.c_attn.weight",
876
+ "shape": [
877
+ 2048,
878
+ 1536
879
+ ],
880
+ "dtype": "float16",
881
+ "format": "f32-to-bf16",
882
+ "nbytes": 6291456,
883
+ "byteOffset": 22040576
884
+ },
885
+ {
886
+ "name": "model.layers.16.self_attn.o_proj.weight",
887
+ "shape": [
888
+ 1536,
889
+ 1536
890
+ ],
891
+ "dtype": "float16",
892
+ "format": "f32-to-bf16",
893
+ "nbytes": 4718592,
894
+ "byteOffset": 28332032
895
+ },
896
+ {
897
+ "name": "model.layers.17.input_layernorm.weight",
898
+ "shape": [
899
+ 1536
900
+ ],
901
+ "dtype": "float16",
902
+ "format": "f32-to-bf16",
903
+ "nbytes": 3072,
904
+ "byteOffset": 33050624
905
+ },
906
+ {
907
+ "name": "model.layers.17.post_attention_layernorm.weight",
908
+ "shape": [
909
+ 1536
910
+ ],
911
+ "dtype": "float16",
912
+ "format": "f32-to-bf16",
913
+ "nbytes": 3072,
914
+ "byteOffset": 33053696
915
+ },
916
+ {
917
+ "name": "model.layers.17.self_attn.c_attn.bias",
918
+ "shape": [
919
+ 2048
920
+ ],
921
+ "dtype": "float16",
922
+ "format": "f32-to-bf16",
923
+ "nbytes": 4096,
924
+ "byteOffset": 33056768
925
+ }
926
+ ],
927
+ "md5sum": "d3ca8d87d2da5c44ba5209a2791736da"
928
+ },
929
+ {
930
+ "dataPath": "params_shard_24.bin",
931
+ "format": "raw-shard",
932
+ "nbytes": 27525120,
933
+ "records": [
934
+ {
935
+ "name": "model.layers.18.mlp.down_proj.weight",
936
+ "shape": [
937
+ 1536,
938
+ 8960
939
+ ],
940
+ "dtype": "float16",
941
+ "format": "f32-to-bf16",
942
+ "nbytes": 27525120,
943
+ "byteOffset": 0
944
+ }
945
+ ],
946
+ "md5sum": "3dd39a6d6ad6ed3ea7b684810336118a"
947
+ },
948
+ {
949
+ "dataPath": "params_shard_25.bin",
950
+ "format": "raw-shard",
951
+ "nbytes": 55050240,
952
+ "records": [
953
+ {
954
+ "name": "model.layers.18.mlp.gate_up_proj.weight",
955
+ "shape": [
956
+ 17920,
957
+ 1536
958
+ ],
959
+ "dtype": "float16",
960
+ "format": "f32-to-bf16",
961
+ "nbytes": 55050240,
962
+ "byteOffset": 0
963
+ }
964
+ ],
965
+ "md5sum": "3a8f050e1c81d9c21bd78dc7df0d4bc4"
966
+ },
967
+ {
968
+ "dataPath": "params_shard_26.bin",
969
+ "format": "raw-shard",
970
+ "nbytes": 27525120,
971
+ "records": [
972
+ {
973
+ "name": "model.layers.19.mlp.down_proj.weight",
974
+ "shape": [
975
+ 1536,
976
+ 8960
977
+ ],
978
+ "dtype": "float16",
979
+ "format": "f32-to-bf16",
980
+ "nbytes": 27525120,
981
+ "byteOffset": 0
982
+ }
983
+ ],
984
+ "md5sum": "36ca71a16ca2834d3164b92c97a170ab"
985
+ },
986
+ {
987
+ "dataPath": "params_shard_27.bin",
988
+ "format": "raw-shard",
989
+ "nbytes": 55050240,
990
+ "records": [
991
+ {
992
+ "name": "model.layers.19.mlp.gate_up_proj.weight",
993
+ "shape": [
994
+ 17920,
995
+ 1536
996
+ ],
997
+ "dtype": "float16",
998
+ "format": "f32-to-bf16",
999
+ "nbytes": 55050240,
1000
+ "byteOffset": 0
1001
+ }
1002
+ ],
1003
+ "md5sum": "9f22ca095f2021608fae9bc98136d252"
1004
+ },
1005
+ {
1006
+ "dataPath": "params_shard_28.bin",
1007
+ "format": "raw-shard",
1008
+ "nbytes": 27525120,
1009
+ "records": [
1010
+ {
1011
+ "name": "model.layers.2.mlp.down_proj.weight",
1012
+ "shape": [
1013
+ 1536,
1014
+ 8960
1015
+ ],
1016
+ "dtype": "float16",
1017
+ "format": "f32-to-bf16",
1018
+ "nbytes": 27525120,
1019
+ "byteOffset": 0
1020
+ }
1021
+ ],
1022
+ "md5sum": "fab4a0e7de9cca7058251ee57a4a9742"
1023
+ },
1024
+ {
1025
+ "dataPath": "params_shard_29.bin",
1026
+ "format": "raw-shard",
1027
+ "nbytes": 55050240,
1028
+ "records": [
1029
+ {
1030
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
1031
+ "shape": [
1032
+ 17920,
1033
+ 1536
1034
+ ],
1035
+ "dtype": "float16",
1036
+ "format": "f32-to-bf16",
1037
+ "nbytes": 55050240,
1038
+ "byteOffset": 0
1039
+ }
1040
+ ],
1041
+ "md5sum": "e6b441ea46b5d4ba144d0ede689da1a2"
1042
+ },
1043
+ {
1044
+ "dataPath": "params_shard_30.bin",
1045
+ "format": "raw-shard",
1046
+ "nbytes": 33060864,
1047
+ "records": [
1048
+ {
1049
+ "name": "model.layers.17.self_attn.c_attn.weight",
1050
+ "shape": [
1051
+ 2048,
1052
+ 1536
1053
+ ],
1054
+ "dtype": "float16",
1055
+ "format": "f32-to-bf16",
1056
+ "nbytes": 6291456,
1057
+ "byteOffset": 0
1058
+ },
1059
+ {
1060
+ "name": "model.layers.17.self_attn.o_proj.weight",
1061
+ "shape": [
1062
+ 1536,
1063
+ 1536
1064
+ ],
1065
+ "dtype": "float16",
1066
+ "format": "f32-to-bf16",
1067
+ "nbytes": 4718592,
1068
+ "byteOffset": 6291456
1069
+ },
1070
+ {
1071
+ "name": "model.layers.18.input_layernorm.weight",
1072
+ "shape": [
1073
+ 1536
1074
+ ],
1075
+ "dtype": "float16",
1076
+ "format": "f32-to-bf16",
1077
+ "nbytes": 3072,
1078
+ "byteOffset": 11010048
1079
+ },
1080
+ {
1081
+ "name": "model.layers.18.post_attention_layernorm.weight",
1082
+ "shape": [
1083
+ 1536
1084
+ ],
1085
+ "dtype": "float16",
1086
+ "format": "f32-to-bf16",
1087
+ "nbytes": 3072,
1088
+ "byteOffset": 11013120
1089
+ },
1090
+ {
1091
+ "name": "model.layers.18.self_attn.c_attn.bias",
1092
+ "shape": [
1093
+ 2048
1094
+ ],
1095
+ "dtype": "float16",
1096
+ "format": "f32-to-bf16",
1097
+ "nbytes": 4096,
1098
+ "byteOffset": 11016192
1099
+ },
1100
+ {
1101
+ "name": "model.layers.18.self_attn.c_attn.weight",
1102
+ "shape": [
1103
+ 2048,
1104
+ 1536
1105
+ ],
1106
+ "dtype": "float16",
1107
+ "format": "f32-to-bf16",
1108
+ "nbytes": 6291456,
1109
+ "byteOffset": 11020288
1110
+ },
1111
+ {
1112
+ "name": "model.layers.18.self_attn.o_proj.weight",
1113
+ "shape": [
1114
+ 1536,
1115
+ 1536
1116
+ ],
1117
+ "dtype": "float16",
1118
+ "format": "f32-to-bf16",
1119
+ "nbytes": 4718592,
1120
+ "byteOffset": 17311744
1121
+ },
1122
+ {
1123
+ "name": "model.layers.19.input_layernorm.weight",
1124
+ "shape": [
1125
+ 1536
1126
+ ],
1127
+ "dtype": "float16",
1128
+ "format": "f32-to-bf16",
1129
+ "nbytes": 3072,
1130
+ "byteOffset": 22030336
1131
+ },
1132
+ {
1133
+ "name": "model.layers.19.post_attention_layernorm.weight",
1134
+ "shape": [
1135
+ 1536
1136
+ ],
1137
+ "dtype": "float16",
1138
+ "format": "f32-to-bf16",
1139
+ "nbytes": 3072,
1140
+ "byteOffset": 22033408
1141
+ },
1142
+ {
1143
+ "name": "model.layers.19.self_attn.c_attn.bias",
1144
+ "shape": [
1145
+ 2048
1146
+ ],
1147
+ "dtype": "float16",
1148
+ "format": "f32-to-bf16",
1149
+ "nbytes": 4096,
1150
+ "byteOffset": 22036480
1151
+ },
1152
+ {
1153
+ "name": "model.layers.19.self_attn.c_attn.weight",
1154
+ "shape": [
1155
+ 2048,
1156
+ 1536
1157
+ ],
1158
+ "dtype": "float16",
1159
+ "format": "f32-to-bf16",
1160
+ "nbytes": 6291456,
1161
+ "byteOffset": 22040576
1162
+ },
1163
+ {
1164
+ "name": "model.layers.19.self_attn.o_proj.weight",
1165
+ "shape": [
1166
+ 1536,
1167
+ 1536
1168
+ ],
1169
+ "dtype": "float16",
1170
+ "format": "f32-to-bf16",
1171
+ "nbytes": 4718592,
1172
+ "byteOffset": 28332032
1173
+ },
1174
+ {
1175
+ "name": "model.layers.2.input_layernorm.weight",
1176
+ "shape": [
1177
+ 1536
1178
+ ],
1179
+ "dtype": "float16",
1180
+ "format": "f32-to-bf16",
1181
+ "nbytes": 3072,
1182
+ "byteOffset": 33050624
1183
+ },
1184
+ {
1185
+ "name": "model.layers.2.post_attention_layernorm.weight",
1186
+ "shape": [
1187
+ 1536
1188
+ ],
1189
+ "dtype": "float16",
1190
+ "format": "f32-to-bf16",
1191
+ "nbytes": 3072,
1192
+ "byteOffset": 33053696
1193
+ },
1194
+ {
1195
+ "name": "model.layers.2.self_attn.c_attn.bias",
1196
+ "shape": [
1197
+ 2048
1198
+ ],
1199
+ "dtype": "float16",
1200
+ "format": "f32-to-bf16",
1201
+ "nbytes": 4096,
1202
+ "byteOffset": 33056768
1203
+ }
1204
+ ],
1205
+ "md5sum": "ec32c77b7b448670b9fbb3e4dadd2b9f"
1206
+ },
1207
+ {
1208
+ "dataPath": "params_shard_31.bin",
1209
+ "format": "raw-shard",
1210
+ "nbytes": 27525120,
1211
+ "records": [
1212
+ {
1213
+ "name": "model.layers.20.mlp.down_proj.weight",
1214
+ "shape": [
1215
+ 1536,
1216
+ 8960
1217
+ ],
1218
+ "dtype": "float16",
1219
+ "format": "f32-to-bf16",
1220
+ "nbytes": 27525120,
1221
+ "byteOffset": 0
1222
+ }
1223
+ ],
1224
+ "md5sum": "169d2af7bb908d1d1319b2327ffd685d"
1225
+ },
1226
+ {
1227
+ "dataPath": "params_shard_32.bin",
1228
+ "format": "raw-shard",
1229
+ "nbytes": 55050240,
1230
+ "records": [
1231
+ {
1232
+ "name": "model.layers.20.mlp.gate_up_proj.weight",
1233
+ "shape": [
1234
+ 17920,
1235
+ 1536
1236
+ ],
1237
+ "dtype": "float16",
1238
+ "format": "f32-to-bf16",
1239
+ "nbytes": 55050240,
1240
+ "byteOffset": 0
1241
+ }
1242
+ ],
1243
+ "md5sum": "549d4ada0599a1d35bf7dd3f87dfad84"
1244
+ },
1245
+ {
1246
+ "dataPath": "params_shard_33.bin",
1247
+ "format": "raw-shard",
1248
+ "nbytes": 27525120,
1249
+ "records": [
1250
+ {
1251
+ "name": "model.layers.21.mlp.down_proj.weight",
1252
+ "shape": [
1253
+ 1536,
1254
+ 8960
1255
+ ],
1256
+ "dtype": "float16",
1257
+ "format": "f32-to-bf16",
1258
+ "nbytes": 27525120,
1259
+ "byteOffset": 0
1260
+ }
1261
+ ],
1262
+ "md5sum": "6ea484043810c0eeff8409f4fffc7c14"
1263
+ },
1264
+ {
1265
+ "dataPath": "params_shard_34.bin",
1266
+ "format": "raw-shard",
1267
+ "nbytes": 55050240,
1268
+ "records": [
1269
+ {
1270
+ "name": "model.layers.21.mlp.gate_up_proj.weight",
1271
+ "shape": [
1272
+ 17920,
1273
+ 1536
1274
+ ],
1275
+ "dtype": "float16",
1276
+ "format": "f32-to-bf16",
1277
+ "nbytes": 55050240,
1278
+ "byteOffset": 0
1279
+ }
1280
+ ],
1281
+ "md5sum": "f09443ff1e071a170ee446e8b5c9e983"
1282
+ },
1283
+ {
1284
+ "dataPath": "params_shard_35.bin",
1285
+ "format": "raw-shard",
1286
+ "nbytes": 27525120,
1287
+ "records": [
1288
+ {
1289
+ "name": "model.layers.22.mlp.down_proj.weight",
1290
+ "shape": [
1291
+ 1536,
1292
+ 8960
1293
+ ],
1294
+ "dtype": "float16",
1295
+ "format": "f32-to-bf16",
1296
+ "nbytes": 27525120,
1297
+ "byteOffset": 0
1298
+ }
1299
+ ],
1300
+ "md5sum": "367fc7511fd53ad92f83eff81ee49dc2"
1301
+ },
1302
+ {
1303
+ "dataPath": "params_shard_36.bin",
1304
+ "format": "raw-shard",
1305
+ "nbytes": 55050240,
1306
+ "records": [
1307
+ {
1308
+ "name": "model.layers.22.mlp.gate_up_proj.weight",
1309
+ "shape": [
1310
+ 17920,
1311
+ 1536
1312
+ ],
1313
+ "dtype": "float16",
1314
+ "format": "f32-to-bf16",
1315
+ "nbytes": 55050240,
1316
+ "byteOffset": 0
1317
+ }
1318
+ ],
1319
+ "md5sum": "0a8e85d059205dd0b12cbfd488dbcc1f"
1320
+ },
1321
+ {
1322
+ "dataPath": "params_shard_37.bin",
1323
+ "format": "raw-shard",
1324
+ "nbytes": 33060864,
1325
+ "records": [
1326
+ {
1327
+ "name": "model.layers.2.self_attn.c_attn.weight",
1328
+ "shape": [
1329
+ 2048,
1330
+ 1536
1331
+ ],
1332
+ "dtype": "float16",
1333
+ "format": "f32-to-bf16",
1334
+ "nbytes": 6291456,
1335
+ "byteOffset": 0
1336
+ },
1337
+ {
1338
+ "name": "model.layers.2.self_attn.o_proj.weight",
1339
+ "shape": [
1340
+ 1536,
1341
+ 1536
1342
+ ],
1343
+ "dtype": "float16",
1344
+ "format": "f32-to-bf16",
1345
+ "nbytes": 4718592,
1346
+ "byteOffset": 6291456
1347
+ },
1348
+ {
1349
+ "name": "model.layers.20.input_layernorm.weight",
1350
+ "shape": [
1351
+ 1536
1352
+ ],
1353
+ "dtype": "float16",
1354
+ "format": "f32-to-bf16",
1355
+ "nbytes": 3072,
1356
+ "byteOffset": 11010048
1357
+ },
1358
+ {
1359
+ "name": "model.layers.20.post_attention_layernorm.weight",
1360
+ "shape": [
1361
+ 1536
1362
+ ],
1363
+ "dtype": "float16",
1364
+ "format": "f32-to-bf16",
1365
+ "nbytes": 3072,
1366
+ "byteOffset": 11013120
1367
+ },
1368
+ {
1369
+ "name": "model.layers.20.self_attn.c_attn.bias",
1370
+ "shape": [
1371
+ 2048
1372
+ ],
1373
+ "dtype": "float16",
1374
+ "format": "f32-to-bf16",
1375
+ "nbytes": 4096,
1376
+ "byteOffset": 11016192
1377
+ },
1378
+ {
1379
+ "name": "model.layers.20.self_attn.c_attn.weight",
1380
+ "shape": [
1381
+ 2048,
1382
+ 1536
1383
+ ],
1384
+ "dtype": "float16",
1385
+ "format": "f32-to-bf16",
1386
+ "nbytes": 6291456,
1387
+ "byteOffset": 11020288
1388
+ },
1389
+ {
1390
+ "name": "model.layers.20.self_attn.o_proj.weight",
1391
+ "shape": [
1392
+ 1536,
1393
+ 1536
1394
+ ],
1395
+ "dtype": "float16",
1396
+ "format": "f32-to-bf16",
1397
+ "nbytes": 4718592,
1398
+ "byteOffset": 17311744
1399
+ },
1400
+ {
1401
+ "name": "model.layers.21.input_layernorm.weight",
1402
+ "shape": [
1403
+ 1536
1404
+ ],
1405
+ "dtype": "float16",
1406
+ "format": "f32-to-bf16",
1407
+ "nbytes": 3072,
1408
+ "byteOffset": 22030336
1409
+ },
1410
+ {
1411
+ "name": "model.layers.21.post_attention_layernorm.weight",
1412
+ "shape": [
1413
+ 1536
1414
+ ],
1415
+ "dtype": "float16",
1416
+ "format": "f32-to-bf16",
1417
+ "nbytes": 3072,
1418
+ "byteOffset": 22033408
1419
+ },
1420
+ {
1421
+ "name": "model.layers.21.self_attn.c_attn.bias",
1422
+ "shape": [
1423
+ 2048
1424
+ ],
1425
+ "dtype": "float16",
1426
+ "format": "f32-to-bf16",
1427
+ "nbytes": 4096,
1428
+ "byteOffset": 22036480
1429
+ },
1430
+ {
1431
+ "name": "model.layers.21.self_attn.c_attn.weight",
1432
+ "shape": [
1433
+ 2048,
1434
+ 1536
1435
+ ],
1436
+ "dtype": "float16",
1437
+ "format": "f32-to-bf16",
1438
+ "nbytes": 6291456,
1439
+ "byteOffset": 22040576
1440
+ },
1441
+ {
1442
+ "name": "model.layers.21.self_attn.o_proj.weight",
1443
+ "shape": [
1444
+ 1536,
1445
+ 1536
1446
+ ],
1447
+ "dtype": "float16",
1448
+ "format": "f32-to-bf16",
1449
+ "nbytes": 4718592,
1450
+ "byteOffset": 28332032
1451
+ },
1452
+ {
1453
+ "name": "model.layers.22.input_layernorm.weight",
1454
+ "shape": [
1455
+ 1536
1456
+ ],
1457
+ "dtype": "float16",
1458
+ "format": "f32-to-bf16",
1459
+ "nbytes": 3072,
1460
+ "byteOffset": 33050624
1461
+ },
1462
+ {
1463
+ "name": "model.layers.22.post_attention_layernorm.weight",
1464
+ "shape": [
1465
+ 1536
1466
+ ],
1467
+ "dtype": "float16",
1468
+ "format": "f32-to-bf16",
1469
+ "nbytes": 3072,
1470
+ "byteOffset": 33053696
1471
+ },
1472
+ {
1473
+ "name": "model.layers.22.self_attn.c_attn.bias",
1474
+ "shape": [
1475
+ 2048
1476
+ ],
1477
+ "dtype": "float16",
1478
+ "format": "f32-to-bf16",
1479
+ "nbytes": 4096,
1480
+ "byteOffset": 33056768
1481
+ }
1482
+ ],
1483
+ "md5sum": "670d9f64575bb07241d48a8283edc7c3"
1484
+ },
1485
+ {
1486
+ "dataPath": "params_shard_38.bin",
1487
+ "format": "raw-shard",
1488
+ "nbytes": 27525120,
1489
+ "records": [
1490
+ {
1491
+ "name": "model.layers.23.mlp.down_proj.weight",
1492
+ "shape": [
1493
+ 1536,
1494
+ 8960
1495
+ ],
1496
+ "dtype": "float16",
1497
+ "format": "f32-to-bf16",
1498
+ "nbytes": 27525120,
1499
+ "byteOffset": 0
1500
+ }
1501
+ ],
1502
+ "md5sum": "55dc4417efdd8592cf2596108dc49e03"
1503
+ },
1504
+ {
1505
+ "dataPath": "params_shard_39.bin",
1506
+ "format": "raw-shard",
1507
+ "nbytes": 55050240,
1508
+ "records": [
1509
+ {
1510
+ "name": "model.layers.23.mlp.gate_up_proj.weight",
1511
+ "shape": [
1512
+ 17920,
1513
+ 1536
1514
+ ],
1515
+ "dtype": "float16",
1516
+ "format": "f32-to-bf16",
1517
+ "nbytes": 55050240,
1518
+ "byteOffset": 0
1519
+ }
1520
+ ],
1521
+ "md5sum": "59aae069c3cf2e5dfc148b92384dea3d"
1522
+ },
1523
+ {
1524
+ "dataPath": "params_shard_40.bin",
1525
+ "format": "raw-shard",
1526
+ "nbytes": 27525120,
1527
+ "records": [
1528
+ {
1529
+ "name": "model.layers.24.mlp.down_proj.weight",
1530
+ "shape": [
1531
+ 1536,
1532
+ 8960
1533
+ ],
1534
+ "dtype": "float16",
1535
+ "format": "f32-to-bf16",
1536
+ "nbytes": 27525120,
1537
+ "byteOffset": 0
1538
+ }
1539
+ ],
1540
+ "md5sum": "3ee6d1d9d6a0d40aa5c6cda617a5d1bb"
1541
+ },
1542
+ {
1543
+ "dataPath": "params_shard_41.bin",
1544
+ "format": "raw-shard",
1545
+ "nbytes": 55050240,
1546
+ "records": [
1547
+ {
1548
+ "name": "model.layers.24.mlp.gate_up_proj.weight",
1549
+ "shape": [
1550
+ 17920,
1551
+ 1536
1552
+ ],
1553
+ "dtype": "float16",
1554
+ "format": "f32-to-bf16",
1555
+ "nbytes": 55050240,
1556
+ "byteOffset": 0
1557
+ }
1558
+ ],
1559
+ "md5sum": "a1c9e0afa3d19157d9470ecbe8d02441"
1560
+ },
1561
+ {
1562
+ "dataPath": "params_shard_42.bin",
1563
+ "format": "raw-shard",
1564
+ "nbytes": 27525120,
1565
+ "records": [
1566
+ {
1567
+ "name": "model.layers.25.mlp.down_proj.weight",
1568
+ "shape": [
1569
+ 1536,
1570
+ 8960
1571
+ ],
1572
+ "dtype": "float16",
1573
+ "format": "f32-to-bf16",
1574
+ "nbytes": 27525120,
1575
+ "byteOffset": 0
1576
+ }
1577
+ ],
1578
+ "md5sum": "0c88c6fbbe93a15c6d1c34b64769e9ee"
1579
+ },
1580
+ {
1581
+ "dataPath": "params_shard_43.bin",
1582
+ "format": "raw-shard",
1583
+ "nbytes": 55050240,
1584
+ "records": [
1585
+ {
1586
+ "name": "model.layers.25.mlp.gate_up_proj.weight",
1587
+ "shape": [
1588
+ 17920,
1589
+ 1536
1590
+ ],
1591
+ "dtype": "float16",
1592
+ "format": "f32-to-bf16",
1593
+ "nbytes": 55050240,
1594
+ "byteOffset": 0
1595
+ }
1596
+ ],
1597
+ "md5sum": "bf742044703c2a29f0c83f872a30751a"
1598
+ },
1599
+ {
1600
+ "dataPath": "params_shard_44.bin",
1601
+ "format": "raw-shard",
1602
+ "nbytes": 33060864,
1603
+ "records": [
1604
+ {
1605
+ "name": "model.layers.22.self_attn.c_attn.weight",
1606
+ "shape": [
1607
+ 2048,
1608
+ 1536
1609
+ ],
1610
+ "dtype": "float16",
1611
+ "format": "f32-to-bf16",
1612
+ "nbytes": 6291456,
1613
+ "byteOffset": 0
1614
+ },
1615
+ {
1616
+ "name": "model.layers.22.self_attn.o_proj.weight",
1617
+ "shape": [
1618
+ 1536,
1619
+ 1536
1620
+ ],
1621
+ "dtype": "float16",
1622
+ "format": "f32-to-bf16",
1623
+ "nbytes": 4718592,
1624
+ "byteOffset": 6291456
1625
+ },
1626
+ {
1627
+ "name": "model.layers.23.input_layernorm.weight",
1628
+ "shape": [
1629
+ 1536
1630
+ ],
1631
+ "dtype": "float16",
1632
+ "format": "f32-to-bf16",
1633
+ "nbytes": 3072,
1634
+ "byteOffset": 11010048
1635
+ },
1636
+ {
1637
+ "name": "model.layers.23.post_attention_layernorm.weight",
1638
+ "shape": [
1639
+ 1536
1640
+ ],
1641
+ "dtype": "float16",
1642
+ "format": "f32-to-bf16",
1643
+ "nbytes": 3072,
1644
+ "byteOffset": 11013120
1645
+ },
1646
+ {
1647
+ "name": "model.layers.23.self_attn.c_attn.bias",
1648
+ "shape": [
1649
+ 2048
1650
+ ],
1651
+ "dtype": "float16",
1652
+ "format": "f32-to-bf16",
1653
+ "nbytes": 4096,
1654
+ "byteOffset": 11016192
1655
+ },
1656
+ {
1657
+ "name": "model.layers.23.self_attn.c_attn.weight",
1658
+ "shape": [
1659
+ 2048,
1660
+ 1536
1661
+ ],
1662
+ "dtype": "float16",
1663
+ "format": "f32-to-bf16",
1664
+ "nbytes": 6291456,
1665
+ "byteOffset": 11020288
1666
+ },
1667
+ {
1668
+ "name": "model.layers.23.self_attn.o_proj.weight",
1669
+ "shape": [
1670
+ 1536,
1671
+ 1536
1672
+ ],
1673
+ "dtype": "float16",
1674
+ "format": "f32-to-bf16",
1675
+ "nbytes": 4718592,
1676
+ "byteOffset": 17311744
1677
+ },
1678
+ {
1679
+ "name": "model.layers.24.input_layernorm.weight",
1680
+ "shape": [
1681
+ 1536
1682
+ ],
1683
+ "dtype": "float16",
1684
+ "format": "f32-to-bf16",
1685
+ "nbytes": 3072,
1686
+ "byteOffset": 22030336
1687
+ },
1688
+ {
1689
+ "name": "model.layers.24.post_attention_layernorm.weight",
1690
+ "shape": [
1691
+ 1536
1692
+ ],
1693
+ "dtype": "float16",
1694
+ "format": "f32-to-bf16",
1695
+ "nbytes": 3072,
1696
+ "byteOffset": 22033408
1697
+ },
1698
+ {
1699
+ "name": "model.layers.24.self_attn.c_attn.bias",
1700
+ "shape": [
1701
+ 2048
1702
+ ],
1703
+ "dtype": "float16",
1704
+ "format": "f32-to-bf16",
1705
+ "nbytes": 4096,
1706
+ "byteOffset": 22036480
1707
+ },
1708
+ {
1709
+ "name": "model.layers.24.self_attn.c_attn.weight",
1710
+ "shape": [
1711
+ 2048,
1712
+ 1536
1713
+ ],
1714
+ "dtype": "float16",
1715
+ "format": "f32-to-bf16",
1716
+ "nbytes": 6291456,
1717
+ "byteOffset": 22040576
1718
+ },
1719
+ {
1720
+ "name": "model.layers.24.self_attn.o_proj.weight",
1721
+ "shape": [
1722
+ 1536,
1723
+ 1536
1724
+ ],
1725
+ "dtype": "float16",
1726
+ "format": "f32-to-bf16",
1727
+ "nbytes": 4718592,
1728
+ "byteOffset": 28332032
1729
+ },
1730
+ {
1731
+ "name": "model.layers.25.input_layernorm.weight",
1732
+ "shape": [
1733
+ 1536
1734
+ ],
1735
+ "dtype": "float16",
1736
+ "format": "f32-to-bf16",
1737
+ "nbytes": 3072,
1738
+ "byteOffset": 33050624
1739
+ },
1740
+ {
1741
+ "name": "model.layers.25.post_attention_layernorm.weight",
1742
+ "shape": [
1743
+ 1536
1744
+ ],
1745
+ "dtype": "float16",
1746
+ "format": "f32-to-bf16",
1747
+ "nbytes": 3072,
1748
+ "byteOffset": 33053696
1749
+ },
1750
+ {
1751
+ "name": "model.layers.25.self_attn.c_attn.bias",
1752
+ "shape": [
1753
+ 2048
1754
+ ],
1755
+ "dtype": "float16",
1756
+ "format": "f32-to-bf16",
1757
+ "nbytes": 4096,
1758
+ "byteOffset": 33056768
1759
+ }
1760
+ ],
1761
+ "md5sum": "9bd20d328ef42e9cb0daeea34f8f1665"
1762
+ },
1763
+ {
1764
+ "dataPath": "params_shard_45.bin",
1765
+ "format": "raw-shard",
1766
+ "nbytes": 27525120,
1767
+ "records": [
1768
+ {
1769
+ "name": "model.layers.26.mlp.down_proj.weight",
1770
+ "shape": [
1771
+ 1536,
1772
+ 8960
1773
+ ],
1774
+ "dtype": "float16",
1775
+ "format": "f32-to-bf16",
1776
+ "nbytes": 27525120,
1777
+ "byteOffset": 0
1778
+ }
1779
+ ],
1780
+ "md5sum": "3bf660c4dd495b4cb010e090958b399f"
1781
+ },
1782
+ {
1783
+ "dataPath": "params_shard_46.bin",
1784
+ "format": "raw-shard",
1785
+ "nbytes": 55050240,
1786
+ "records": [
1787
+ {
1788
+ "name": "model.layers.26.mlp.gate_up_proj.weight",
1789
+ "shape": [
1790
+ 17920,
1791
+ 1536
1792
+ ],
1793
+ "dtype": "float16",
1794
+ "format": "f32-to-bf16",
1795
+ "nbytes": 55050240,
1796
+ "byteOffset": 0
1797
+ }
1798
+ ],
1799
+ "md5sum": "1d8d43c1f77c22de3ed30d08f84db8c1"
1800
+ },
1801
+ {
1802
+ "dataPath": "params_shard_47.bin",
1803
+ "format": "raw-shard",
1804
+ "nbytes": 27525120,
1805
+ "records": [
1806
+ {
1807
+ "name": "model.layers.27.mlp.down_proj.weight",
1808
+ "shape": [
1809
+ 1536,
1810
+ 8960
1811
+ ],
1812
+ "dtype": "float16",
1813
+ "format": "f32-to-bf16",
1814
+ "nbytes": 27525120,
1815
+ "byteOffset": 0
1816
+ }
1817
+ ],
1818
+ "md5sum": "5dba30ad09b4e0832d4b66cbab7cbb65"
1819
+ },
1820
+ {
1821
+ "dataPath": "params_shard_48.bin",
1822
+ "format": "raw-shard",
1823
+ "nbytes": 55050240,
1824
+ "records": [
1825
+ {
1826
+ "name": "model.layers.27.mlp.gate_up_proj.weight",
1827
+ "shape": [
1828
+ 17920,
1829
+ 1536
1830
+ ],
1831
+ "dtype": "float16",
1832
+ "format": "f32-to-bf16",
1833
+ "nbytes": 55050240,
1834
+ "byteOffset": 0
1835
+ }
1836
+ ],
1837
+ "md5sum": "3e3f1c5f902e1612e29d8d4bfce8c13f"
1838
+ },
1839
+ {
1840
+ "dataPath": "params_shard_49.bin",
1841
+ "format": "raw-shard",
1842
+ "nbytes": 27525120,
1843
+ "records": [
1844
+ {
1845
+ "name": "model.layers.3.mlp.down_proj.weight",
1846
+ "shape": [
1847
+ 1536,
1848
+ 8960
1849
+ ],
1850
+ "dtype": "float16",
1851
+ "format": "f32-to-bf16",
1852
+ "nbytes": 27525120,
1853
+ "byteOffset": 0
1854
+ }
1855
+ ],
1856
+ "md5sum": "dafb2ba9ed30881ead0f758444c710e9"
1857
+ },
1858
+ {
1859
+ "dataPath": "params_shard_50.bin",
1860
+ "format": "raw-shard",
1861
+ "nbytes": 55050240,
1862
+ "records": [
1863
+ {
1864
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
1865
+ "shape": [
1866
+ 17920,
1867
+ 1536
1868
+ ],
1869
+ "dtype": "float16",
1870
+ "format": "f32-to-bf16",
1871
+ "nbytes": 55050240,
1872
+ "byteOffset": 0
1873
+ }
1874
+ ],
1875
+ "md5sum": "fae672c0483259b7c6f78ebe8c9bc007"
1876
+ },
1877
+ {
1878
+ "dataPath": "params_shard_51.bin",
1879
+ "format": "raw-shard",
1880
+ "nbytes": 33060864,
1881
+ "records": [
1882
+ {
1883
+ "name": "model.layers.25.self_attn.c_attn.weight",
1884
+ "shape": [
1885
+ 2048,
1886
+ 1536
1887
+ ],
1888
+ "dtype": "float16",
1889
+ "format": "f32-to-bf16",
1890
+ "nbytes": 6291456,
1891
+ "byteOffset": 0
1892
+ },
1893
+ {
1894
+ "name": "model.layers.25.self_attn.o_proj.weight",
1895
+ "shape": [
1896
+ 1536,
1897
+ 1536
1898
+ ],
1899
+ "dtype": "float16",
1900
+ "format": "f32-to-bf16",
1901
+ "nbytes": 4718592,
1902
+ "byteOffset": 6291456
1903
+ },
1904
+ {
1905
+ "name": "model.layers.26.input_layernorm.weight",
1906
+ "shape": [
1907
+ 1536
1908
+ ],
1909
+ "dtype": "float16",
1910
+ "format": "f32-to-bf16",
1911
+ "nbytes": 3072,
1912
+ "byteOffset": 11010048
1913
+ },
1914
+ {
1915
+ "name": "model.layers.26.post_attention_layernorm.weight",
1916
+ "shape": [
1917
+ 1536
1918
+ ],
1919
+ "dtype": "float16",
1920
+ "format": "f32-to-bf16",
1921
+ "nbytes": 3072,
1922
+ "byteOffset": 11013120
1923
+ },
1924
+ {
1925
+ "name": "model.layers.26.self_attn.c_attn.bias",
1926
+ "shape": [
1927
+ 2048
1928
+ ],
1929
+ "dtype": "float16",
1930
+ "format": "f32-to-bf16",
1931
+ "nbytes": 4096,
1932
+ "byteOffset": 11016192
1933
+ },
1934
+ {
1935
+ "name": "model.layers.26.self_attn.c_attn.weight",
1936
+ "shape": [
1937
+ 2048,
1938
+ 1536
1939
+ ],
1940
+ "dtype": "float16",
1941
+ "format": "f32-to-bf16",
1942
+ "nbytes": 6291456,
1943
+ "byteOffset": 11020288
1944
+ },
1945
+ {
1946
+ "name": "model.layers.26.self_attn.o_proj.weight",
1947
+ "shape": [
1948
+ 1536,
1949
+ 1536
1950
+ ],
1951
+ "dtype": "float16",
1952
+ "format": "f32-to-bf16",
1953
+ "nbytes": 4718592,
1954
+ "byteOffset": 17311744
1955
+ },
1956
+ {
1957
+ "name": "model.layers.27.input_layernorm.weight",
1958
+ "shape": [
1959
+ 1536
1960
+ ],
1961
+ "dtype": "float16",
1962
+ "format": "f32-to-bf16",
1963
+ "nbytes": 3072,
1964
+ "byteOffset": 22030336
1965
+ },
1966
+ {
1967
+ "name": "model.layers.27.post_attention_layernorm.weight",
1968
+ "shape": [
1969
+ 1536
1970
+ ],
1971
+ "dtype": "float16",
1972
+ "format": "f32-to-bf16",
1973
+ "nbytes": 3072,
1974
+ "byteOffset": 22033408
1975
+ },
1976
+ {
1977
+ "name": "model.layers.27.self_attn.c_attn.bias",
1978
+ "shape": [
1979
+ 2048
1980
+ ],
1981
+ "dtype": "float16",
1982
+ "format": "f32-to-bf16",
1983
+ "nbytes": 4096,
1984
+ "byteOffset": 22036480
1985
+ },
1986
+ {
1987
+ "name": "model.layers.27.self_attn.c_attn.weight",
1988
+ "shape": [
1989
+ 2048,
1990
+ 1536
1991
+ ],
1992
+ "dtype": "float16",
1993
+ "format": "f32-to-bf16",
1994
+ "nbytes": 6291456,
1995
+ "byteOffset": 22040576
1996
+ },
1997
+ {
1998
+ "name": "model.layers.27.self_attn.o_proj.weight",
1999
+ "shape": [
2000
+ 1536,
2001
+ 1536
2002
+ ],
2003
+ "dtype": "float16",
2004
+ "format": "f32-to-bf16",
2005
+ "nbytes": 4718592,
2006
+ "byteOffset": 28332032
2007
+ },
2008
+ {
2009
+ "name": "model.layers.3.input_layernorm.weight",
2010
+ "shape": [
2011
+ 1536
2012
+ ],
2013
+ "dtype": "float16",
2014
+ "format": "f32-to-bf16",
2015
+ "nbytes": 3072,
2016
+ "byteOffset": 33050624
2017
+ },
2018
+ {
2019
+ "name": "model.layers.3.post_attention_layernorm.weight",
2020
+ "shape": [
2021
+ 1536
2022
+ ],
2023
+ "dtype": "float16",
2024
+ "format": "f32-to-bf16",
2025
+ "nbytes": 3072,
2026
+ "byteOffset": 33053696
2027
+ },
2028
+ {
2029
+ "name": "model.layers.3.self_attn.c_attn.bias",
2030
+ "shape": [
2031
+ 2048
2032
+ ],
2033
+ "dtype": "float16",
2034
+ "format": "f32-to-bf16",
2035
+ "nbytes": 4096,
2036
+ "byteOffset": 33056768
2037
+ }
2038
+ ],
2039
+ "md5sum": "e15198cbf013718fbac52a1a8d94a4bd"
2040
+ },
2041
+ {
2042
+ "dataPath": "params_shard_52.bin",
2043
+ "format": "raw-shard",
2044
+ "nbytes": 27525120,
2045
+ "records": [
2046
+ {
2047
+ "name": "model.layers.4.mlp.down_proj.weight",
2048
+ "shape": [
2049
+ 1536,
2050
+ 8960
2051
+ ],
2052
+ "dtype": "float16",
2053
+ "format": "f32-to-bf16",
2054
+ "nbytes": 27525120,
2055
+ "byteOffset": 0
2056
+ }
2057
+ ],
2058
+ "md5sum": "8ef490e2d5fdd10b563790c6c8ddf751"
2059
+ },
2060
+ {
2061
+ "dataPath": "params_shard_53.bin",
2062
+ "format": "raw-shard",
2063
+ "nbytes": 55050240,
2064
+ "records": [
2065
+ {
2066
+ "name": "model.layers.4.mlp.gate_up_proj.weight",
2067
+ "shape": [
2068
+ 17920,
2069
+ 1536
2070
+ ],
2071
+ "dtype": "float16",
2072
+ "format": "f32-to-bf16",
2073
+ "nbytes": 55050240,
2074
+ "byteOffset": 0
2075
+ }
2076
+ ],
2077
+ "md5sum": "0d9f22936600b212ad7509684535ed16"
2078
+ },
2079
+ {
2080
+ "dataPath": "params_shard_54.bin",
2081
+ "format": "raw-shard",
2082
+ "nbytes": 27525120,
2083
+ "records": [
2084
+ {
2085
+ "name": "model.layers.5.mlp.down_proj.weight",
2086
+ "shape": [
2087
+ 1536,
2088
+ 8960
2089
+ ],
2090
+ "dtype": "float16",
2091
+ "format": "f32-to-bf16",
2092
+ "nbytes": 27525120,
2093
+ "byteOffset": 0
2094
+ }
2095
+ ],
2096
+ "md5sum": "b877bb9f1825407f4828cde2906e483c"
2097
+ },
2098
+ {
2099
+ "dataPath": "params_shard_55.bin",
2100
+ "format": "raw-shard",
2101
+ "nbytes": 55050240,
2102
+ "records": [
2103
+ {
2104
+ "name": "model.layers.5.mlp.gate_up_proj.weight",
2105
+ "shape": [
2106
+ 17920,
2107
+ 1536
2108
+ ],
2109
+ "dtype": "float16",
2110
+ "format": "f32-to-bf16",
2111
+ "nbytes": 55050240,
2112
+ "byteOffset": 0
2113
+ }
2114
+ ],
2115
+ "md5sum": "4aeb2df9e320197a7328f274ccc1e0c6"
2116
+ },
2117
+ {
2118
+ "dataPath": "params_shard_56.bin",
2119
+ "format": "raw-shard",
2120
+ "nbytes": 27525120,
2121
+ "records": [
2122
+ {
2123
+ "name": "model.layers.6.mlp.down_proj.weight",
2124
+ "shape": [
2125
+ 1536,
2126
+ 8960
2127
+ ],
2128
+ "dtype": "float16",
2129
+ "format": "f32-to-bf16",
2130
+ "nbytes": 27525120,
2131
+ "byteOffset": 0
2132
+ }
2133
+ ],
2134
+ "md5sum": "4159b8469797370880dec9cef854c8b1"
2135
+ },
2136
+ {
2137
+ "dataPath": "params_shard_57.bin",
2138
+ "format": "raw-shard",
2139
+ "nbytes": 55050240,
2140
+ "records": [
2141
+ {
2142
+ "name": "model.layers.6.mlp.gate_up_proj.weight",
2143
+ "shape": [
2144
+ 17920,
2145
+ 1536
2146
+ ],
2147
+ "dtype": "float16",
2148
+ "format": "f32-to-bf16",
2149
+ "nbytes": 55050240,
2150
+ "byteOffset": 0
2151
+ }
2152
+ ],
2153
+ "md5sum": "f6f34569657363a613bb38ffa4a8ce25"
2154
+ },
2155
+ {
2156
+ "dataPath": "params_shard_58.bin",
2157
+ "format": "raw-shard",
2158
+ "nbytes": 33060864,
2159
+ "records": [
2160
+ {
2161
+ "name": "model.layers.3.self_attn.c_attn.weight",
2162
+ "shape": [
2163
+ 2048,
2164
+ 1536
2165
+ ],
2166
+ "dtype": "float16",
2167
+ "format": "f32-to-bf16",
2168
+ "nbytes": 6291456,
2169
+ "byteOffset": 0
2170
+ },
2171
+ {
2172
+ "name": "model.layers.3.self_attn.o_proj.weight",
2173
+ "shape": [
2174
+ 1536,
2175
+ 1536
2176
+ ],
2177
+ "dtype": "float16",
2178
+ "format": "f32-to-bf16",
2179
+ "nbytes": 4718592,
2180
+ "byteOffset": 6291456
2181
+ },
2182
+ {
2183
+ "name": "model.layers.4.input_layernorm.weight",
2184
+ "shape": [
2185
+ 1536
2186
+ ],
2187
+ "dtype": "float16",
2188
+ "format": "f32-to-bf16",
2189
+ "nbytes": 3072,
2190
+ "byteOffset": 11010048
2191
+ },
2192
+ {
2193
+ "name": "model.layers.4.post_attention_layernorm.weight",
2194
+ "shape": [
2195
+ 1536
2196
+ ],
2197
+ "dtype": "float16",
2198
+ "format": "f32-to-bf16",
2199
+ "nbytes": 3072,
2200
+ "byteOffset": 11013120
2201
+ },
2202
+ {
2203
+ "name": "model.layers.4.self_attn.c_attn.bias",
2204
+ "shape": [
2205
+ 2048
2206
+ ],
2207
+ "dtype": "float16",
2208
+ "format": "f32-to-bf16",
2209
+ "nbytes": 4096,
2210
+ "byteOffset": 11016192
2211
+ },
2212
+ {
2213
+ "name": "model.layers.4.self_attn.c_attn.weight",
2214
+ "shape": [
2215
+ 2048,
2216
+ 1536
2217
+ ],
2218
+ "dtype": "float16",
2219
+ "format": "f32-to-bf16",
2220
+ "nbytes": 6291456,
2221
+ "byteOffset": 11020288
2222
+ },
2223
+ {
2224
+ "name": "model.layers.4.self_attn.o_proj.weight",
2225
+ "shape": [
2226
+ 1536,
2227
+ 1536
2228
+ ],
2229
+ "dtype": "float16",
2230
+ "format": "f32-to-bf16",
2231
+ "nbytes": 4718592,
2232
+ "byteOffset": 17311744
2233
+ },
2234
+ {
2235
+ "name": "model.layers.5.input_layernorm.weight",
2236
+ "shape": [
2237
+ 1536
2238
+ ],
2239
+ "dtype": "float16",
2240
+ "format": "f32-to-bf16",
2241
+ "nbytes": 3072,
2242
+ "byteOffset": 22030336
2243
+ },
2244
+ {
2245
+ "name": "model.layers.5.post_attention_layernorm.weight",
2246
+ "shape": [
2247
+ 1536
2248
+ ],
2249
+ "dtype": "float16",
2250
+ "format": "f32-to-bf16",
2251
+ "nbytes": 3072,
2252
+ "byteOffset": 22033408
2253
+ },
2254
+ {
2255
+ "name": "model.layers.5.self_attn.c_attn.bias",
2256
+ "shape": [
2257
+ 2048
2258
+ ],
2259
+ "dtype": "float16",
2260
+ "format": "f32-to-bf16",
2261
+ "nbytes": 4096,
2262
+ "byteOffset": 22036480
2263
+ },
2264
+ {
2265
+ "name": "model.layers.5.self_attn.c_attn.weight",
2266
+ "shape": [
2267
+ 2048,
2268
+ 1536
2269
+ ],
2270
+ "dtype": "float16",
2271
+ "format": "f32-to-bf16",
2272
+ "nbytes": 6291456,
2273
+ "byteOffset": 22040576
2274
+ },
2275
+ {
2276
+ "name": "model.layers.5.self_attn.o_proj.weight",
2277
+ "shape": [
2278
+ 1536,
2279
+ 1536
2280
+ ],
2281
+ "dtype": "float16",
2282
+ "format": "f32-to-bf16",
2283
+ "nbytes": 4718592,
2284
+ "byteOffset": 28332032
2285
+ },
2286
+ {
2287
+ "name": "model.layers.6.input_layernorm.weight",
2288
+ "shape": [
2289
+ 1536
2290
+ ],
2291
+ "dtype": "float16",
2292
+ "format": "f32-to-bf16",
2293
+ "nbytes": 3072,
2294
+ "byteOffset": 33050624
2295
+ },
2296
+ {
2297
+ "name": "model.layers.6.post_attention_layernorm.weight",
2298
+ "shape": [
2299
+ 1536
2300
+ ],
2301
+ "dtype": "float16",
2302
+ "format": "f32-to-bf16",
2303
+ "nbytes": 3072,
2304
+ "byteOffset": 33053696
2305
+ },
2306
+ {
2307
+ "name": "model.layers.6.self_attn.c_attn.bias",
2308
+ "shape": [
2309
+ 2048
2310
+ ],
2311
+ "dtype": "float16",
2312
+ "format": "f32-to-bf16",
2313
+ "nbytes": 4096,
2314
+ "byteOffset": 33056768
2315
+ }
2316
+ ],
2317
+ "md5sum": "4b9635bfbb9a0f8e379594d577e24e79"
2318
+ },
2319
+ {
2320
+ "dataPath": "params_shard_59.bin",
2321
+ "format": "raw-shard",
2322
+ "nbytes": 27525120,
2323
+ "records": [
2324
+ {
2325
+ "name": "model.layers.7.mlp.down_proj.weight",
2326
+ "shape": [
2327
+ 1536,
2328
+ 8960
2329
+ ],
2330
+ "dtype": "float16",
2331
+ "format": "f32-to-bf16",
2332
+ "nbytes": 27525120,
2333
+ "byteOffset": 0
2334
+ }
2335
+ ],
2336
+ "md5sum": "6f06f08ee4e830a7f35fad5489e2df96"
2337
+ },
2338
+ {
2339
+ "dataPath": "params_shard_60.bin",
2340
+ "format": "raw-shard",
2341
+ "nbytes": 55050240,
2342
+ "records": [
2343
+ {
2344
+ "name": "model.layers.7.mlp.gate_up_proj.weight",
2345
+ "shape": [
2346
+ 17920,
2347
+ 1536
2348
+ ],
2349
+ "dtype": "float16",
2350
+ "format": "f32-to-bf16",
2351
+ "nbytes": 55050240,
2352
+ "byteOffset": 0
2353
+ }
2354
+ ],
2355
+ "md5sum": "6a51b2acc4eb22511d78c73146e97c57"
2356
+ },
2357
+ {
2358
+ "dataPath": "params_shard_61.bin",
2359
+ "format": "raw-shard",
2360
+ "nbytes": 27525120,
2361
+ "records": [
2362
+ {
2363
+ "name": "model.layers.8.mlp.down_proj.weight",
2364
+ "shape": [
2365
+ 1536,
2366
+ 8960
2367
+ ],
2368
+ "dtype": "float16",
2369
+ "format": "f32-to-bf16",
2370
+ "nbytes": 27525120,
2371
+ "byteOffset": 0
2372
+ }
2373
+ ],
2374
+ "md5sum": "422c9f65efd3c517f5f597ff198b9b54"
2375
+ },
2376
+ {
2377
+ "dataPath": "params_shard_62.bin",
2378
+ "format": "raw-shard",
2379
+ "nbytes": 55050240,
2380
+ "records": [
2381
+ {
2382
+ "name": "model.layers.8.mlp.gate_up_proj.weight",
2383
+ "shape": [
2384
+ 17920,
2385
+ 1536
2386
+ ],
2387
+ "dtype": "float16",
2388
+ "format": "f32-to-bf16",
2389
+ "nbytes": 55050240,
2390
+ "byteOffset": 0
2391
+ }
2392
+ ],
2393
+ "md5sum": "194381860a8101e6639b37c321b03e53"
2394
+ },
2395
+ {
2396
+ "dataPath": "params_shard_63.bin",
2397
+ "format": "raw-shard",
2398
+ "nbytes": 27525120,
2399
+ "records": [
2400
+ {
2401
+ "name": "model.layers.9.mlp.down_proj.weight",
2402
+ "shape": [
2403
+ 1536,
2404
+ 8960
2405
+ ],
2406
+ "dtype": "float16",
2407
+ "format": "f32-to-bf16",
2408
+ "nbytes": 27525120,
2409
+ "byteOffset": 0
2410
+ }
2411
+ ],
2412
+ "md5sum": "dcc2852e43b9cca8d247abb596a6cfd3"
2413
+ },
2414
+ {
2415
+ "dataPath": "params_shard_64.bin",
2416
+ "format": "raw-shard",
2417
+ "nbytes": 55050240,
2418
+ "records": [
2419
+ {
2420
+ "name": "model.layers.9.mlp.gate_up_proj.weight",
2421
+ "shape": [
2422
+ 17920,
2423
+ 1536
2424
+ ],
2425
+ "dtype": "float16",
2426
+ "format": "f32-to-bf16",
2427
+ "nbytes": 55050240,
2428
+ "byteOffset": 0
2429
+ }
2430
+ ],
2431
+ "md5sum": "31b0adf54bda832e421327cc270f38de"
2432
+ },
2433
+ {
2434
+ "dataPath": "params_shard_65.bin",
2435
+ "format": "raw-shard",
2436
+ "nbytes": 33060864,
2437
+ "records": [
2438
+ {
2439
+ "name": "model.layers.6.self_attn.c_attn.weight",
2440
+ "shape": [
2441
+ 2048,
2442
+ 1536
2443
+ ],
2444
+ "dtype": "float16",
2445
+ "format": "f32-to-bf16",
2446
+ "nbytes": 6291456,
2447
+ "byteOffset": 0
2448
+ },
2449
+ {
2450
+ "name": "model.layers.6.self_attn.o_proj.weight",
2451
+ "shape": [
2452
+ 1536,
2453
+ 1536
2454
+ ],
2455
+ "dtype": "float16",
2456
+ "format": "f32-to-bf16",
2457
+ "nbytes": 4718592,
2458
+ "byteOffset": 6291456
2459
+ },
2460
+ {
2461
+ "name": "model.layers.7.input_layernorm.weight",
2462
+ "shape": [
2463
+ 1536
2464
+ ],
2465
+ "dtype": "float16",
2466
+ "format": "f32-to-bf16",
2467
+ "nbytes": 3072,
2468
+ "byteOffset": 11010048
2469
+ },
2470
+ {
2471
+ "name": "model.layers.7.post_attention_layernorm.weight",
2472
+ "shape": [
2473
+ 1536
2474
+ ],
2475
+ "dtype": "float16",
2476
+ "format": "f32-to-bf16",
2477
+ "nbytes": 3072,
2478
+ "byteOffset": 11013120
2479
+ },
2480
+ {
2481
+ "name": "model.layers.7.self_attn.c_attn.bias",
2482
+ "shape": [
2483
+ 2048
2484
+ ],
2485
+ "dtype": "float16",
2486
+ "format": "f32-to-bf16",
2487
+ "nbytes": 4096,
2488
+ "byteOffset": 11016192
2489
+ },
2490
+ {
2491
+ "name": "model.layers.7.self_attn.c_attn.weight",
2492
+ "shape": [
2493
+ 2048,
2494
+ 1536
2495
+ ],
2496
+ "dtype": "float16",
2497
+ "format": "f32-to-bf16",
2498
+ "nbytes": 6291456,
2499
+ "byteOffset": 11020288
2500
+ },
2501
+ {
2502
+ "name": "model.layers.7.self_attn.o_proj.weight",
2503
+ "shape": [
2504
+ 1536,
2505
+ 1536
2506
+ ],
2507
+ "dtype": "float16",
2508
+ "format": "f32-to-bf16",
2509
+ "nbytes": 4718592,
2510
+ "byteOffset": 17311744
2511
+ },
2512
+ {
2513
+ "name": "model.layers.8.input_layernorm.weight",
2514
+ "shape": [
2515
+ 1536
2516
+ ],
2517
+ "dtype": "float16",
2518
+ "format": "f32-to-bf16",
2519
+ "nbytes": 3072,
2520
+ "byteOffset": 22030336
2521
+ },
2522
+ {
2523
+ "name": "model.layers.8.post_attention_layernorm.weight",
2524
+ "shape": [
2525
+ 1536
2526
+ ],
2527
+ "dtype": "float16",
2528
+ "format": "f32-to-bf16",
2529
+ "nbytes": 3072,
2530
+ "byteOffset": 22033408
2531
+ },
2532
+ {
2533
+ "name": "model.layers.8.self_attn.c_attn.bias",
2534
+ "shape": [
2535
+ 2048
2536
+ ],
2537
+ "dtype": "float16",
2538
+ "format": "f32-to-bf16",
2539
+ "nbytes": 4096,
2540
+ "byteOffset": 22036480
2541
+ },
2542
+ {
2543
+ "name": "model.layers.8.self_attn.c_attn.weight",
2544
+ "shape": [
2545
+ 2048,
2546
+ 1536
2547
+ ],
2548
+ "dtype": "float16",
2549
+ "format": "f32-to-bf16",
2550
+ "nbytes": 6291456,
2551
+ "byteOffset": 22040576
2552
+ },
2553
+ {
2554
+ "name": "model.layers.8.self_attn.o_proj.weight",
2555
+ "shape": [
2556
+ 1536,
2557
+ 1536
2558
+ ],
2559
+ "dtype": "float16",
2560
+ "format": "f32-to-bf16",
2561
+ "nbytes": 4718592,
2562
+ "byteOffset": 28332032
2563
+ },
2564
+ {
2565
+ "name": "model.layers.9.input_layernorm.weight",
2566
+ "shape": [
2567
+ 1536
2568
+ ],
2569
+ "dtype": "float16",
2570
+ "format": "f32-to-bf16",
2571
+ "nbytes": 3072,
2572
+ "byteOffset": 33050624
2573
+ },
2574
+ {
2575
+ "name": "model.layers.9.post_attention_layernorm.weight",
2576
+ "shape": [
2577
+ 1536
2578
+ ],
2579
+ "dtype": "float16",
2580
+ "format": "f32-to-bf16",
2581
+ "nbytes": 3072,
2582
+ "byteOffset": 33053696
2583
+ },
2584
+ {
2585
+ "name": "model.layers.9.self_attn.c_attn.bias",
2586
+ "shape": [
2587
+ 2048
2588
+ ],
2589
+ "dtype": "float16",
2590
+ "format": "f32-to-bf16",
2591
+ "nbytes": 4096,
2592
+ "byteOffset": 33056768
2593
+ }
2594
+ ],
2595
+ "md5sum": "80e3191e706ecd10535e81ffa38fc3c2"
2596
+ },
2597
+ {
2598
+ "dataPath": "params_shard_66.bin",
2599
+ "format": "raw-shard",
2600
+ "nbytes": 11013120,
2601
+ "records": [
2602
+ {
2603
+ "name": "model.layers.9.self_attn.c_attn.weight",
2604
+ "shape": [
2605
+ 2048,
2606
+ 1536
2607
+ ],
2608
+ "dtype": "float16",
2609
+ "format": "f32-to-bf16",
2610
+ "nbytes": 6291456,
2611
+ "byteOffset": 0
2612
+ },
2613
+ {
2614
+ "name": "model.layers.9.self_attn.o_proj.weight",
2615
+ "shape": [
2616
+ 1536,
2617
+ 1536
2618
+ ],
2619
+ "dtype": "float16",
2620
+ "format": "f32-to-bf16",
2621
+ "nbytes": 4718592,
2622
+ "byteOffset": 6291456
2623
+ },
2624
+ {
2625
+ "name": "model.norm.weight",
2626
+ "shape": [
2627
+ 1536
2628
+ ],
2629
+ "dtype": "float16",
2630
+ "format": "f32-to-bf16",
2631
+ "nbytes": 3072,
2632
+ "byteOffset": 11010048
2633
+ }
2634
+ ],
2635
+ "md5sum": "554e351ae89de4c09af73e23fd71576f"
2636
+ }
2637
+ ]
2638
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43256d0b8f0c812f4ea9e707c40695df8e357fda9eb562d6728a237a534b7f7d
3
+ size 466747392
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deb7fe1b6af71a7ed9f5143f6f05feef2de12625e98f072098b8c3ecf4664578
3
+ size 55050240
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b1a08a299b0cbcec7b00e7edd5ee7e1fadea6d40fd67302d09d8f690f65bcfc
3
+ size 27525120
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adef7a955ab6fb904a2a487995eee313a7355ed8fee992713318cd0ee06e3d50
3
+ size 55050240
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1818ed266119190b631d79b35f61c9b9ea1498ecb39fbc993f373bfe07e8a6c1
3
+ size 27525120
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a69ef695f83288a716f46ca0b436bab0aa3843ac3e844c795cb5c5c3bd00dc8c
3
+ size 55050240
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:449e6dba8fedfb24594ee2e2f7786f434735d7ef2cb1adfdee4ccb161fdb78a0
3
+ size 27525120
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d843c125f9b29289555a46ddb82eac8da82c2a58abd24425ed966a831f700c76
3
+ size 55050240
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6907647e513bb68e61bcd9ae28b260b42156ba31245d844af153cfaaa3846b7d
3
+ size 33060864
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc929db04646a836322e8bf7cdf27290ad9aa06ac578b42fcdd3a1deec0b969c
3
+ size 27525120
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98fe8b376d94da887eb2239a6c8ababe4ffb43b4a844009df634b6eee810cb87
3
+ size 55050240
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1efba48ad4e5b6350840e437e59d99f8259357098ee18f26523058e64288231
3
+ size 27525120
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfa31db2ceea48f0283cd2790cf3ebd78280772f9b93abb45213630b6d1cd542
3
+ size 27535360
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a5dd1b569f309062db653b3ab82c5094d122a217fe31e728da04ea95ba10215
3
+ size 55050240
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a384fdab3fed67beb6dcecc11e07a7a3abc2509b45d2435dec315dcfb5233599
3
+ size 27525120
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4df39d8fee545fb91a042cb7baf9ff19c81c090abea017764d1a25859f717f43
3
+ size 55050240
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f82e64e2554048138286ff10af732add567ecd58ecbaae1b6f137a14f94b6860
3
+ size 33060864
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e84359eb974a34887946eaad63c9c67c225a19482f38f3d3fd0535dafaef4f01
3
+ size 27525120
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:093fa2e1c175a290654de5ff3b0082189265bf89bf6ac9678e7076e8e4f1bd54
3
+ size 55050240
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f77ba1447d90a3b17db86f2d8702243cdde48a48d7ffc86dc0280ce953a3757
3
+ size 27525120
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d5003fa57b73e7efbf709f523372e5e57a6eb3b5c70de4353d7c78db5f5a54
3
+ size 55050240
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44dbc3426ce5c3a819ee8d7cc8f3423b69cac54b749633e46dc61e14f6652f61
3
+ size 27525120
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:179573335b56c2d367c7222e16ea1a2ebed2e09d18face3a640d5ed55b31159b
3
+ size 55050240
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebf06be4638d2fc49381c37acd29778278510558ba16bf985c3cc7f6189c6baf
3
+ size 27525120
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f48a91d7ffd46ea606b64a11f169c91c9e6d3949e2876f36b8b98325c135007e
3
+ size 33060864
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd6a9b312e4c50c3707d7294e2da823b693c8d76b2edc297ad0b14aedfea467d
3
+ size 27525120
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00a0a8bf883c445b2ca7832c1e4cf69d554730f7883e51b2053b75cf40c8ec08
3
+ size 55050240
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0b388cbd7ec068d7e355ddace0f8084fa561ba2cfddf2bd83cb3f1c0474ac15
3
+ size 27525120
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01ee2bf65adcd63fd5129e751e3c5c5d8118a4ba52d48639a9bb9429384d31d3
3
+ size 55050240
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41270bd5669b5ecac3b60ad096b87be7119afe8ba9b65050e6d3fdbd43c3c3f2
3
+ size 27525120
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62a1feb183eaf5137d15b0889e3163c00738bf24edc7dc0f92cfa3bc33ca6c75
3
+ size 55050240
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03381d0fa4f59b3243a898af57d87935e28643a8e77e2ff452416def1b96c6e2
3
+ size 33060864
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8fbfe7265ec75666e68673f1bc5eaeb410cac5217fe3093abf99d825ba5f891
3
+ size 27525120
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf686aa6fa4bf3d5c4bdebb6efd8e2a20ed931bb42958328c9fc19cd77703df4
3
+ size 55050240
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6123143e5a495eebd4a824682aa78eae0df1c59f8167f9b9c1c158af8292b6bd
3
+ size 55050240
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4698bdd0cce98f3f6c6d7748d8b21b34f3883d346f8ff954c5629e92df26c17d
3
+ size 27525120
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9f0251fca3e6776fb7e0df0505d18464f838893e72ab2e130a6d6b637d63a39
3
+ size 55050240
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d8f96cbf4f71e47c597f971b02b4cc0cc6bffe32ed3e3f735f194109fb6fd1d
3
+ size 27525120
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b31b86c1bc0a55ccbd5c66d01af04f5cc5ddd575c255a5dafdea449a7865d8cc
3
+ size 55050240
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5274d241190d48484674d6626862d4dacca47dcb1a309d15dd077c925c4d974
3
+ size 33060864
params_shard_45.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08fd0059a61104cc99b699cd7ba2ca0dbb49c70f33ad55e79ae1229cedff5bea
3
+ size 27525120
params_shard_46.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f126b5b6bace80292d0a0fc05e422d6155c65a92119b842901e929325131ce9
3
+ size 55050240
params_shard_47.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01a5c73cc359c686d657a7af534dee374b1f3efb13a8963b786bb4ee80ac93ce
3
+ size 27525120
params_shard_48.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc5ba3bd64509b2e3c8552e780564318710f41d0d9181d89ce5d025a40eaa236
3
+ size 55050240
params_shard_49.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbff323956b5163b6fb8a126ec32a4709e1c2e7738dcc5fd12a70b7a238adcb5
3
+ size 27525120
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dbb502b1851feac98cbd43401dc522a59f84328b7283f6ea589223306f540ed
3
+ size 27525120