Nadav commited on
Commit
a9bb0bb
·
1 Parent(s): 030aced

Model save

Browse files
last-checkpoint/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "_name_or_path": "Nadav/PretrainedPHD",
3
- "architectures": [
4
- "PIXELForPreTraining"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "decoder_hidden_size": 512,
8
- "decoder_intermediate_size": 2048,
9
- "decoder_num_attention_heads": 16,
10
- "decoder_num_hidden_layers": 8,
11
- "hidden_act": "gelu",
12
- "hidden_dropout_prob": 0.1,
13
- "hidden_size": 768,
14
- "image_size": [
15
- 368,
16
- 368
17
- ],
18
- "initializer_range": 0.02,
19
- "intermediate_size": 3072,
20
- "layer_norm_eps": 1e-12,
21
- "mask_ratio": 0.29,
22
- "model_type": "pixel",
23
- "norm_pix_loss": true,
24
- "num_attention_heads": 12,
25
- "num_channels": 3,
26
- "num_hidden_layers": 12,
27
- "patch_size": 16,
28
- "qkv_bias": true,
29
- "torch_dtype": "float32",
30
- "transformers_version": "4.17.0"
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c076bfb7efb96d5024b8a4ddaf3dce5d10dacaac14c357e2e1f987048cc997c2
3
- size 893439185
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:339fd6d90694d0866ca99f79daa02d5d9cac878b08d046df675a5dc91c855959
3
- size 449471589
 
 
 
 
last-checkpoint/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3bdead3f6f5710faae22c375f543db349a175165befd270c4b65176d5d8e8d8
3
- size 15523
 
 
 
 
last-checkpoint/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:512587e6e0a7f210ac71b21a96f20cc79ec6ca8b511a1b62e00ee74e107dedd8
3
- size 559
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8dacaf51330a60fa39e5519f592a0627bc39ca3df9fe19b7cf9decf6b5521a0
3
- size 623
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,2736 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.95,
5
- "global_step": 200000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.0,
12
- "learning_rate": 0.00010077985004622052,
13
- "loss": 0.5551,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.01,
18
- "learning_rate": 0.00010077565027123787,
19
- "loss": 0.5087,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.01,
24
- "learning_rate": 0.00010076865093411392,
25
- "loss": 0.5035,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 0.01,
30
- "learning_rate": 0.00010075885246660077,
31
- "loss": 0.5002,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 0.01,
36
- "learning_rate": 0.00010074625547311406,
37
- "loss": 0.4973,
38
- "step": 2500
39
- },
40
- {
41
- "epoch": 0.01,
42
- "learning_rate": 0.00010073086073069567,
43
- "loss": 0.4936,
44
- "step": 3000
45
- },
46
- {
47
- "epoch": 0.02,
48
- "learning_rate": 0.00010071266918896582,
49
- "loss": 0.4953,
50
- "step": 3500
51
- },
52
- {
53
- "epoch": 0.02,
54
- "learning_rate": 0.0001006916819700645,
55
- "loss": 0.4924,
56
- "step": 4000
57
- },
58
- {
59
- "epoch": 0.02,
60
- "learning_rate": 0.00010066790036858225,
61
- "loss": 0.4905,
62
- "step": 4500
63
- },
64
- {
65
- "epoch": 0.03,
66
- "learning_rate": 0.00010064132585148025,
67
- "loss": 0.4899,
68
- "step": 5000
69
- },
70
- {
71
- "epoch": 0.03,
72
- "eval_loss": 0.4648222029209137,
73
- "eval_runtime": 559.9664,
74
- "eval_samples_per_second": 114.293,
75
- "eval_steps_per_second": 1.786,
76
- "step": 5000
77
- },
78
- {
79
- "epoch": 0.03,
80
- "learning_rate": 0.0001006119600579999,
81
- "loss": 0.4887,
82
- "step": 5500
83
- },
84
- {
85
- "epoch": 0.03,
86
- "learning_rate": 0.00010057980479956167,
87
- "loss": 0.4881,
88
- "step": 6000
89
- },
90
- {
91
- "epoch": 0.03,
92
- "learning_rate": 0.00010054493472563566,
93
- "loss": 0.487,
94
- "step": 6500
95
- },
96
- {
97
- "epoch": 0.04,
98
- "learning_rate": 0.00010050721222807678,
99
- "loss": 0.4854,
100
- "step": 7000
101
- },
102
- {
103
- "epoch": 0.04,
104
- "learning_rate": 0.00010046670672689959,
105
- "loss": 0.4851,
106
- "step": 7500
107
- },
108
- {
109
- "epoch": 0.04,
110
- "learning_rate": 0.00010042342072067417,
111
- "loss": 0.4832,
112
- "step": 8000
113
- },
114
- {
115
- "epoch": 0.04,
116
- "learning_rate": 0.00010037735687948529,
117
- "loss": 0.4838,
118
- "step": 8500
119
- },
120
- {
121
- "epoch": 0.04,
122
- "learning_rate": 0.0001003287189239578,
123
- "loss": 0.4837,
124
- "step": 9000
125
- },
126
- {
127
- "epoch": 0.05,
128
- "learning_rate": 0.00010027711919001668,
129
- "loss": 0.4828,
130
- "step": 9500
131
- },
132
- {
133
- "epoch": 0.05,
134
- "learning_rate": 0.00010022275064567964,
135
- "loss": 0.4826,
136
- "step": 10000
137
- },
138
- {
139
- "epoch": 0.05,
140
- "eval_loss": 0.456764280796051,
141
- "eval_runtime": 528.1623,
142
- "eval_samples_per_second": 121.175,
143
- "eval_steps_per_second": 1.893,
144
- "step": 10000
145
- },
146
- {
147
- "epoch": 0.0,
148
- "learning_rate": 0.00010016561664465461,
149
- "loss": 0.4813,
150
- "step": 10500
151
- },
152
- {
153
- "epoch": 0.01,
154
- "learning_rate": 0.00010010572071123591,
155
- "loss": 0.481,
156
- "step": 11000
157
- },
158
- {
159
- "epoch": 0.01,
160
- "learning_rate": 0.00010004306654008681,
161
- "loss": 0.4797,
162
- "step": 11500
163
- },
164
- {
165
- "epoch": 0.01,
166
- "learning_rate": 9.997765799601176e-05,
167
- "loss": 0.4797,
168
- "step": 12000
169
- },
170
- {
171
- "epoch": 0.01,
172
- "learning_rate": 9.990949911371783e-05,
173
- "loss": 0.4777,
174
- "step": 12500
175
- },
176
- {
177
- "epoch": 0.01,
178
- "learning_rate": 9.983859409756594e-05,
179
- "loss": 0.478,
180
- "step": 13000
181
- },
182
- {
183
- "epoch": 0.02,
184
- "learning_rate": 9.976494732131149e-05,
185
- "loss": 0.476,
186
- "step": 13500
187
- },
188
- {
189
- "epoch": 0.02,
190
- "learning_rate": 9.968871882446063e-05,
191
- "loss": 0.4751,
192
- "step": 14000
193
- },
194
- {
195
- "epoch": 0.02,
196
- "learning_rate": 9.960960778568231e-05,
197
- "loss": 0.4756,
198
- "step": 14500
199
- },
200
- {
201
- "epoch": 0.03,
202
- "learning_rate": 9.95279355079561e-05,
203
- "loss": 0.475,
204
- "step": 15000
205
- },
206
- {
207
- "epoch": 0.03,
208
- "eval_loss": 0.45417556166648865,
209
- "eval_runtime": 68.2171,
210
- "eval_samples_per_second": 93.818,
211
- "eval_steps_per_second": 1.466,
212
- "step": 15000
213
- },
214
- {
215
- "epoch": 0.03,
216
- "learning_rate": 9.944337968710037e-05,
217
- "loss": 0.4773,
218
- "step": 15500
219
- },
220
- {
221
- "epoch": 0.03,
222
- "learning_rate": 9.935610648482853e-05,
223
- "loss": 0.4759,
224
- "step": 16000
225
- },
226
- {
227
- "epoch": 0.03,
228
- "learning_rate": 9.926612128456279e-05,
229
- "loss": 0.475,
230
- "step": 16500
231
- },
232
- {
233
- "epoch": 0.04,
234
- "learning_rate": 9.917342963701418e-05,
235
- "loss": 0.4762,
236
- "step": 17000
237
- },
238
- {
239
- "epoch": 0.04,
240
- "learning_rate": 9.907803725984013e-05,
241
- "loss": 0.4754,
242
- "step": 17500
243
- },
244
- {
245
- "epoch": 0.04,
246
- "learning_rate": 9.897995003729183e-05,
247
- "loss": 0.4761,
248
- "step": 18000
249
- },
250
- {
251
- "epoch": 0.04,
252
- "learning_rate": 9.887917401985114e-05,
253
- "loss": 0.4739,
254
- "step": 18500
255
- },
256
- {
257
- "epoch": 0.04,
258
- "learning_rate": 9.877571542385757e-05,
259
- "loss": 0.473,
260
- "step": 19000
261
- },
262
- {
263
- "epoch": 0.05,
264
- "learning_rate": 9.866979556723038e-05,
265
- "loss": 0.4733,
266
- "step": 19500
267
- },
268
- {
269
- "epoch": 0.05,
270
- "learning_rate": 9.856099645730841e-05,
271
- "loss": 0.4725,
272
- "step": 20000
273
- },
274
- {
275
- "epoch": 0.05,
276
- "eval_loss": 0.4468332529067993,
277
- "eval_runtime": 78.0195,
278
- "eval_samples_per_second": 82.031,
279
- "eval_steps_per_second": 1.282,
280
- "step": 20000
281
- },
282
- {
283
- "epoch": 0.05,
284
- "learning_rate": 9.844953439552432e-05,
285
- "loss": 0.4724,
286
- "step": 20500
287
- },
288
- {
289
- "epoch": 0.06,
290
- "learning_rate": 9.833564713977207e-05,
291
- "loss": 0.4725,
292
- "step": 21000
293
- },
294
- {
295
- "epoch": 0.06,
296
- "learning_rate": 9.82188852555582e-05,
297
- "loss": 0.4714,
298
- "step": 21500
299
- },
300
- {
301
- "epoch": 0.06,
302
- "learning_rate": 9.809948152251264e-05,
303
- "loss": 0.4708,
304
- "step": 22000
305
- },
306
- {
307
- "epoch": 0.06,
308
- "learning_rate": 9.797744330602011e-05,
309
- "loss": 0.4695,
310
- "step": 22500
311
- },
312
- {
313
- "epoch": 0.07,
314
- "learning_rate": 9.785277813397263e-05,
315
- "loss": 0.4696,
316
- "step": 23000
317
- },
318
- {
319
- "epoch": 0.07,
320
- "learning_rate": 9.772549369630525e-05,
321
- "loss": 0.471,
322
- "step": 23500
323
- },
324
- {
325
- "epoch": 0.07,
326
- "learning_rate": 9.759559784452165e-05,
327
- "loss": 0.4704,
328
- "step": 24000
329
- },
330
- {
331
- "epoch": 0.07,
332
- "learning_rate": 9.746309859120983e-05,
333
- "loss": 0.4697,
334
- "step": 24500
335
- },
336
- {
337
- "epoch": 0.07,
338
- "learning_rate": 9.732827688303682e-05,
339
- "loss": 0.4692,
340
- "step": 25000
341
- },
342
- {
343
- "epoch": 0.07,
344
- "eval_loss": 0.44451454281806946,
345
- "eval_runtime": 93.3275,
346
- "eval_samples_per_second": 68.576,
347
- "eval_steps_per_second": 1.071,
348
- "step": 25000
349
- },
350
- {
351
- "epoch": 0.08,
352
- "learning_rate": 9.71906006716561e-05,
353
- "loss": 0.4683,
354
- "step": 25500
355
- },
356
- {
357
- "epoch": 0.08,
358
- "learning_rate": 9.705034604088048e-05,
359
- "loss": 0.4671,
360
- "step": 26000
361
- },
362
- {
363
- "epoch": 0.08,
364
- "learning_rate": 9.690752164227625e-05,
365
- "loss": 0.4693,
366
- "step": 26500
367
- },
368
- {
369
- "epoch": 0.09,
370
- "learning_rate": 9.67624296065391e-05,
371
- "loss": 0.4681,
372
- "step": 27000
373
- },
374
- {
375
- "epoch": 0.09,
376
- "learning_rate": 9.661449735541914e-05,
377
- "loss": 0.468,
378
- "step": 27500
379
- },
380
- {
381
- "epoch": 0.09,
382
- "learning_rate": 9.646402222167052e-05,
383
- "loss": 0.4677,
384
- "step": 28000
385
- },
386
- {
387
- "epoch": 0.09,
388
- "learning_rate": 9.631132202706308e-05,
389
- "loss": 0.4673,
390
- "step": 28500
391
- },
392
- {
393
- "epoch": 0.1,
394
- "learning_rate": 9.615579416918834e-05,
395
- "loss": 0.4661,
396
- "step": 29000
397
- },
398
- {
399
- "epoch": 0.1,
400
- "learning_rate": 9.599775172365814e-05,
401
- "loss": 0.4657,
402
- "step": 29500
403
- },
404
- {
405
- "epoch": 0.1,
406
- "learning_rate": 9.583720443927501e-05,
407
- "loss": 0.4658,
408
- "step": 30000
409
- },
410
- {
411
- "epoch": 0.1,
412
- "eval_loss": 0.4412670135498047,
413
- "eval_runtime": 84.6587,
414
- "eval_samples_per_second": 75.598,
415
- "eval_steps_per_second": 1.181,
416
- "step": 30000
417
- },
418
- {
419
- "epoch": 0.1,
420
- "learning_rate": 9.567416221935163e-05,
421
- "loss": 0.465,
422
- "step": 30500
423
- },
424
- {
425
- "epoch": 0.1,
426
- "learning_rate": 9.550863512110018e-05,
427
- "loss": 0.4666,
428
- "step": 31000
429
- },
430
- {
431
- "epoch": 0.11,
432
- "learning_rate": 9.534097182139975e-05,
433
- "loss": 0.4638,
434
- "step": 31500
435
- },
436
- {
437
- "epoch": 0.11,
438
- "learning_rate": 9.517051066878048e-05,
439
- "loss": 0.4653,
440
- "step": 32000
441
- },
442
- {
443
- "epoch": 0.11,
444
- "learning_rate": 9.499794397719863e-05,
445
- "loss": 0.4627,
446
- "step": 32500
447
- },
448
- {
449
- "epoch": 0.12,
450
- "learning_rate": 9.48225907448766e-05,
451
- "loss": 0.4653,
452
- "step": 33000
453
- },
454
- {
455
- "epoch": 0.12,
456
- "learning_rate": 9.464480516316628e-05,
457
- "loss": 0.4641,
458
- "step": 33500
459
- },
460
- {
461
- "epoch": 0.12,
462
- "learning_rate": 9.44645981987198e-05,
463
- "loss": 0.4644,
464
- "step": 34000
465
- },
466
- {
467
- "epoch": 0.12,
468
- "learning_rate": 9.428198096755159e-05,
469
- "loss": 0.4623,
470
- "step": 34500
471
- },
472
- {
473
- "epoch": 0.12,
474
- "learning_rate": 9.409696473435264e-05,
475
- "loss": 0.4648,
476
- "step": 35000
477
- },
478
- {
479
- "epoch": 0.12,
480
- "eval_loss": 0.43688544631004333,
481
- "eval_runtime": 89.9906,
482
- "eval_samples_per_second": 71.119,
483
- "eval_steps_per_second": 1.111,
484
- "step": 35000
485
- },
486
- {
487
- "epoch": 0.13,
488
- "learning_rate": 9.390956091179574e-05,
489
- "loss": 0.4632,
490
- "step": 35500
491
- },
492
- {
493
- "epoch": 0.13,
494
- "learning_rate": 9.371978105983142e-05,
495
- "loss": 0.4616,
496
- "step": 36000
497
- },
498
- {
499
- "epoch": 0.13,
500
- "learning_rate": 9.352802352506516e-05,
501
- "loss": 0.462,
502
- "step": 36500
503
- },
504
- {
505
- "epoch": 0.14,
506
- "learning_rate": 9.333353157269004e-05,
507
- "loss": 0.4623,
508
- "step": 37000
509
- },
510
- {
511
- "epoch": 0.14,
512
- "learning_rate": 9.313669912311046e-05,
513
- "loss": 0.4615,
514
- "step": 37500
515
- },
516
- {
517
- "epoch": 0.14,
518
- "learning_rate": 9.293753831787908e-05,
519
- "loss": 0.4616,
520
- "step": 38000
521
- },
522
- {
523
- "epoch": 0.14,
524
- "learning_rate": 9.27360614421725e-05,
525
- "loss": 0.4603,
526
- "step": 38500
527
- },
528
- {
529
- "epoch": 0.14,
530
- "learning_rate": 9.253228092403345e-05,
531
- "loss": 0.4603,
532
- "step": 39000
533
- },
534
- {
535
- "epoch": 0.15,
536
- "learning_rate": 9.232662375484976e-05,
537
- "loss": 0.4602,
538
- "step": 39500
539
- },
540
- {
541
- "epoch": 0.15,
542
- "learning_rate": 9.211869730366415e-05,
543
- "loss": 0.4601,
544
- "step": 40000
545
- },
546
- {
547
- "epoch": 0.15,
548
- "eval_loss": 0.43458402156829834,
549
- "eval_runtime": 76.3497,
550
- "eval_samples_per_second": 83.825,
551
- "eval_steps_per_second": 1.31,
552
- "step": 40000
553
- },
554
- {
555
- "epoch": 0.15,
556
- "learning_rate": 9.190809087984221e-05,
557
- "loss": 0.46,
558
- "step": 40500
559
- },
560
- {
561
- "epoch": 0.15,
562
- "learning_rate": 9.16952318867122e-05,
563
- "loss": 0.4596,
564
- "step": 41000
565
- },
566
- {
567
- "epoch": 0.16,
568
- "learning_rate": 9.148013345441946e-05,
569
- "loss": 0.4599,
570
- "step": 41500
571
- },
572
- {
573
- "epoch": 0.16,
574
- "learning_rate": 9.126280885124848e-05,
575
- "loss": 0.4601,
576
- "step": 42000
577
- },
578
- {
579
- "epoch": 0.16,
580
- "learning_rate": 9.104327148280447e-05,
581
- "loss": 0.4608,
582
- "step": 42500
583
- },
584
- {
585
- "epoch": 0.17,
586
- "learning_rate": 9.082153489118645e-05,
587
- "loss": 0.4588,
588
- "step": 43000
589
- },
590
- {
591
- "epoch": 0.17,
592
- "learning_rate": 9.059761275415186e-05,
593
- "loss": 0.4588,
594
- "step": 43500
595
- },
596
- {
597
- "epoch": 0.17,
598
- "learning_rate": 9.037151888427288e-05,
599
- "loss": 0.4584,
600
- "step": 44000
601
- },
602
- {
603
- "epoch": 0.17,
604
- "learning_rate": 9.014326722808438e-05,
605
- "loss": 0.4577,
606
- "step": 44500
607
- },
608
- {
609
- "epoch": 0.17,
610
- "learning_rate": 8.991333478594443e-05,
611
- "loss": 0.4591,
612
- "step": 45000
613
- },
614
- {
615
- "epoch": 0.17,
616
- "eval_loss": 0.4318523406982422,
617
- "eval_runtime": 79.5595,
618
- "eval_samples_per_second": 80.443,
619
- "eval_steps_per_second": 1.257,
620
- "step": 45000
621
- },
622
- {
623
- "epoch": 0.18,
624
- "learning_rate": 8.968081417300147e-05,
625
- "loss": 0.4566,
626
- "step": 45500
627
- },
628
- {
629
- "epoch": 0.18,
630
- "learning_rate": 8.944617837966915e-05,
631
- "loss": 0.4568,
632
- "step": 46000
633
- },
634
- {
635
- "epoch": 0.18,
636
- "learning_rate": 8.920944187938838e-05,
637
- "loss": 0.4576,
638
- "step": 46500
639
- },
640
- {
641
- "epoch": 0.18,
642
- "learning_rate": 8.897061927518163e-05,
643
- "loss": 0.4564,
644
- "step": 47000
645
- },
646
- {
647
- "epoch": 0.19,
648
- "learning_rate": 8.873069298117968e-05,
649
- "loss": 0.4563,
650
- "step": 47500
651
- },
652
- {
653
- "epoch": 0.19,
654
- "learning_rate": 8.848775068828501e-05,
655
- "loss": 0.4555,
656
- "step": 48000
657
- },
658
- {
659
- "epoch": 0.19,
660
- "learning_rate": 8.824276680877652e-05,
661
- "loss": 0.4553,
662
- "step": 48500
663
- },
664
- {
665
- "epoch": 0.2,
666
- "learning_rate": 8.799575645441385e-05,
667
- "loss": 0.4558,
668
- "step": 49000
669
- },
670
- {
671
- "epoch": 0.2,
672
- "learning_rate": 8.774673486195919e-05,
673
- "loss": 0.4555,
674
- "step": 49500
675
- },
676
- {
677
- "epoch": 0.2,
678
- "learning_rate": 8.749622140879103e-05,
679
- "loss": 0.4558,
680
- "step": 50000
681
- },
682
- {
683
- "epoch": 0.2,
684
- "eval_loss": 0.4324014186859131,
685
- "eval_runtime": 73.2304,
686
- "eval_samples_per_second": 87.395,
687
- "eval_steps_per_second": 1.366,
688
- "step": 50000
689
- },
690
- {
691
- "epoch": 0.2,
692
- "learning_rate": 8.724373544494098e-05,
693
- "loss": 0.4557,
694
- "step": 50500
695
- },
696
- {
697
- "epoch": 0.2,
698
- "learning_rate": 8.698878062256518e-05,
699
- "loss": 0.4547,
700
- "step": 51000
701
- },
702
- {
703
- "epoch": 0.21,
704
- "learning_rate": 8.673187667711001e-05,
705
- "loss": 0.4536,
706
- "step": 51500
707
- },
708
- {
709
- "epoch": 0.21,
710
- "learning_rate": 8.647303945562088e-05,
711
- "loss": 0.4552,
712
- "step": 52000
713
- },
714
- {
715
- "epoch": 0.21,
716
- "learning_rate": 8.621228492439691e-05,
717
- "loss": 0.4543,
718
- "step": 52500
719
- },
720
- {
721
- "epoch": 0.21,
722
- "learning_rate": 8.594962916800582e-05,
723
- "loss": 0.455,
724
- "step": 53000
725
- },
726
- {
727
- "epoch": 0.22,
728
- "learning_rate": 8.568508838829189e-05,
729
- "loss": 0.4538,
730
- "step": 53500
731
- },
732
- {
733
- "epoch": 0.22,
734
- "learning_rate": 8.54186789033765e-05,
735
- "loss": 0.4535,
736
- "step": 54000
737
- },
738
- {
739
- "epoch": 0.22,
740
- "learning_rate": 8.515041714665157e-05,
741
- "loss": 0.453,
742
- "step": 54500
743
- },
744
- {
745
- "epoch": 0.23,
746
- "learning_rate": 8.488031966576593e-05,
747
- "loss": 0.4543,
748
- "step": 55000
749
- },
750
- {
751
- "epoch": 0.23,
752
- "eval_loss": 0.4287147521972656,
753
- "eval_runtime": 78.086,
754
- "eval_samples_per_second": 81.961,
755
- "eval_steps_per_second": 1.281,
756
- "step": 55000
757
- },
758
- {
759
- "epoch": 0.23,
760
- "learning_rate": 8.460894875898792e-05,
761
- "loss": 0.4535,
762
- "step": 55500
763
- },
764
- {
765
- "epoch": 0.23,
766
- "learning_rate": 8.43352335124106e-05,
767
- "loss": 0.452,
768
- "step": 56000
769
- },
770
- {
771
- "epoch": 0.23,
772
- "learning_rate": 8.405973282603896e-05,
773
- "loss": 0.452,
774
- "step": 56500
775
- },
776
- {
777
- "epoch": 0.23,
778
- "learning_rate": 8.378246369405305e-05,
779
- "loss": 0.4512,
780
- "step": 57000
781
- },
782
- {
783
- "epoch": 0.24,
784
- "learning_rate": 8.350344321971899e-05,
785
- "loss": 0.4507,
786
- "step": 57500
787
- },
788
- {
789
- "epoch": 0.24,
790
- "learning_rate": 8.322268861433393e-05,
791
- "loss": 0.4518,
792
- "step": 58000
793
- },
794
- {
795
- "epoch": 0.24,
796
- "learning_rate": 8.294078384081616e-05,
797
- "loss": 0.4511,
798
- "step": 58500
799
- },
800
- {
801
- "epoch": 0.24,
802
- "learning_rate": 8.26566164153479e-05,
803
- "loss": 0.4511,
804
- "step": 59000
805
- },
806
- {
807
- "epoch": 0.25,
808
- "learning_rate": 8.237076709509483e-05,
809
- "loss": 0.4509,
810
- "step": 59500
811
- },
812
- {
813
- "epoch": 0.25,
814
- "learning_rate": 8.208325351258954e-05,
815
- "loss": 0.4504,
816
- "step": 60000
817
- },
818
- {
819
- "epoch": 0.25,
820
- "eval_loss": 0.4240756332874298,
821
- "eval_runtime": 76.2798,
822
- "eval_samples_per_second": 83.902,
823
- "eval_steps_per_second": 1.311,
824
- "step": 60000
825
- },
826
- {
827
- "epoch": 0.25,
828
- "learning_rate": 8.179409340302407e-05,
829
- "loss": 0.4503,
830
- "step": 60500
831
- },
832
- {
833
- "epoch": 0.26,
834
- "learning_rate": 8.150330460315606e-05,
835
- "loss": 0.45,
836
- "step": 61000
837
- },
838
- {
839
- "epoch": 0.26,
840
- "learning_rate": 8.121090505020843e-05,
841
- "loss": 0.4507,
842
- "step": 61500
843
- },
844
- {
845
- "epoch": 0.26,
846
- "learning_rate": 8.091750234279549e-05,
847
- "loss": 0.4504,
848
- "step": 62000
849
- },
850
- {
851
- "epoch": 0.26,
852
- "learning_rate": 8.062193862268015e-05,
853
- "loss": 0.4502,
854
- "step": 62500
855
- },
856
- {
857
- "epoch": 0.27,
858
- "learning_rate": 8.032481851629047e-05,
859
- "loss": 0.45,
860
- "step": 63000
861
- },
862
- {
863
- "epoch": 0.27,
864
- "learning_rate": 8.002616035139417e-05,
865
- "loss": 0.4492,
866
- "step": 63500
867
- },
868
- {
869
- "epoch": 0.27,
870
- "learning_rate": 7.972598255063367e-05,
871
- "loss": 0.4489,
872
- "step": 64000
873
- },
874
- {
875
- "epoch": 0.27,
876
- "learning_rate": 7.942430363038965e-05,
877
- "loss": 0.4479,
878
- "step": 64500
879
- },
880
- {
881
- "epoch": 0.28,
882
- "learning_rate": 7.912174998963153e-05,
883
- "loss": 0.449,
884
- "step": 65000
885
- },
886
- {
887
- "epoch": 0.28,
888
- "eval_loss": 0.42463359236717224,
889
- "eval_runtime": 74.485,
890
- "eval_samples_per_second": 85.923,
891
- "eval_steps_per_second": 1.343,
892
- "step": 65000
893
- },
894
- {
895
- "epoch": 0.28,
896
- "learning_rate": 7.88171276576962e-05,
897
- "loss": 0.4481,
898
- "step": 65500
899
- },
900
- {
901
- "epoch": 0.28,
902
- "learning_rate": 7.851106026872828e-05,
903
- "loss": 0.4475,
904
- "step": 66000
905
- },
906
- {
907
- "epoch": 0.28,
908
- "learning_rate": 7.820356670240599e-05,
909
- "loss": 0.4485,
910
- "step": 66500
911
- },
912
- {
913
- "epoch": 0.28,
914
- "learning_rate": 7.789466592638097e-05,
915
- "loss": 0.4475,
916
- "step": 67000
917
- },
918
- {
919
- "epoch": 0.29,
920
- "learning_rate": 7.75843769951081e-05,
921
- "loss": 0.4468,
922
- "step": 67500
923
- },
924
- {
925
- "epoch": 0.29,
926
- "learning_rate": 7.727271904867026e-05,
927
- "loss": 0.4481,
928
- "step": 68000
929
- },
930
- {
931
- "epoch": 0.29,
932
- "learning_rate": 7.695971131159764e-05,
933
- "loss": 0.4468,
934
- "step": 68500
935
- },
936
- {
937
- "epoch": 0.29,
938
- "learning_rate": 7.664600308306929e-05,
939
- "loss": 0.4468,
940
- "step": 69000
941
- },
942
- {
943
- "epoch": 0.3,
944
- "learning_rate": 7.633035637295333e-05,
945
- "loss": 0.4463,
946
- "step": 69500
947
- },
948
- {
949
- "epoch": 0.3,
950
- "learning_rate": 7.601341800157135e-05,
951
- "loss": 0.447,
952
- "step": 70000
953
- },
954
- {
955
- "epoch": 0.3,
956
- "eval_loss": 0.4192037880420685,
957
- "eval_runtime": 75.0308,
958
- "eval_samples_per_second": 85.298,
959
- "eval_steps_per_second": 1.333,
960
- "step": 70000
961
- },
962
- {
963
- "epoch": 0.3,
964
- "learning_rate": 7.569520751917501e-05,
965
- "loss": 0.4467,
966
- "step": 70500
967
- },
968
- {
969
- "epoch": 0.3,
970
- "learning_rate": 7.537574455448575e-05,
971
- "loss": 0.4454,
972
- "step": 71000
973
- },
974
- {
975
- "epoch": 0.31,
976
- "learning_rate": 7.505504881348406e-05,
977
- "loss": 0.4463,
978
- "step": 71500
979
- },
980
- {
981
- "epoch": 0.31,
982
- "learning_rate": 7.473378509304684e-05,
983
- "loss": 0.4466,
984
- "step": 72000
985
- },
986
- {
987
- "epoch": 0.31,
988
- "learning_rate": 7.441068558672384e-05,
989
- "loss": 0.4454,
990
- "step": 72500
991
- },
992
- {
993
- "epoch": 0.32,
994
- "learning_rate": 7.408706253659512e-05,
995
- "loss": 0.4445,
996
- "step": 73000
997
- },
998
- {
999
- "epoch": 0.32,
1000
- "learning_rate": 7.376163882556241e-05,
1001
- "loss": 0.4465,
1002
- "step": 73500
1003
- },
1004
- {
1005
- "epoch": 0.32,
1006
- "learning_rate": 7.343508190386175e-05,
1007
- "loss": 0.4449,
1008
- "step": 74000
1009
- },
1010
- {
1011
- "epoch": 0.32,
1012
- "learning_rate": 7.310741191506228e-05,
1013
- "loss": 0.4441,
1014
- "step": 74500
1015
- },
1016
- {
1017
- "epoch": 0.33,
1018
- "learning_rate": 7.277996627287863e-05,
1019
- "loss": 0.4479,
1020
- "step": 75000
1021
- },
1022
- {
1023
- "epoch": 0.33,
1024
- "eval_loss": 0.420879065990448,
1025
- "eval_runtime": 72.7042,
1026
- "eval_samples_per_second": 88.028,
1027
- "eval_steps_per_second": 1.375,
1028
- "step": 75000
1029
- },
1030
- {
1031
- "epoch": 0.33,
1032
- "learning_rate": 7.245013510379486e-05,
1033
- "loss": 0.4443,
1034
- "step": 75500
1035
- },
1036
- {
1037
- "epoch": 0.33,
1038
- "learning_rate": 7.211925162377042e-05,
1039
- "loss": 0.4426,
1040
- "step": 76000
1041
- },
1042
- {
1043
- "epoch": 0.33,
1044
- "learning_rate": 7.178733624325697e-05,
1045
- "loss": 0.4419,
1046
- "step": 76500
1047
- },
1048
- {
1049
- "epoch": 0.34,
1050
- "learning_rate": 7.145440943635861e-05,
1051
- "loss": 0.4433,
1052
- "step": 77000
1053
- },
1054
- {
1055
- "epoch": 0.34,
1056
- "learning_rate": 7.112049173956905e-05,
1057
- "loss": 0.4445,
1058
- "step": 77500
1059
- },
1060
- {
1061
- "epoch": 0.34,
1062
- "learning_rate": 7.07856037505047e-05,
1063
- "loss": 0.4429,
1064
- "step": 78000
1065
- },
1066
- {
1067
- "epoch": 0.34,
1068
- "learning_rate": 7.04497661266342e-05,
1069
- "loss": 0.4421,
1070
- "step": 78500
1071
- },
1072
- {
1073
- "epoch": 0.34,
1074
- "learning_rate": 7.01129995840041e-05,
1075
- "loss": 0.4421,
1076
- "step": 79000
1077
- },
1078
- {
1079
- "epoch": 0.35,
1080
- "learning_rate": 6.977600113783091e-05,
1081
- "loss": 0.4428,
1082
- "step": 79500
1083
- },
1084
- {
1085
- "epoch": 0.35,
1086
- "learning_rate": 6.943744088754813e-05,
1087
- "loss": 0.4419,
1088
- "step": 80000
1089
- },
1090
- {
1091
- "epoch": 0.35,
1092
- "eval_loss": 0.4191003441810608,
1093
- "eval_runtime": 73.8393,
1094
- "eval_samples_per_second": 86.675,
1095
- "eval_steps_per_second": 1.354,
1096
- "step": 80000
1097
- },
1098
- {
1099
- "epoch": 0.35,
1100
- "learning_rate": 6.9098014163495e-05,
1101
- "loss": 0.4409,
1102
- "step": 80500
1103
- },
1104
- {
1105
- "epoch": 0.35,
1106
- "learning_rate": 6.875774190311069e-05,
1107
- "loss": 0.4412,
1108
- "step": 81000
1109
- },
1110
- {
1111
- "epoch": 0.36,
1112
- "learning_rate": 6.841664509599106e-05,
1113
- "loss": 0.4414,
1114
- "step": 81500
1115
- },
1116
- {
1117
- "epoch": 0.36,
1118
- "learning_rate": 6.807474478259389e-05,
1119
- "loss": 0.4416,
1120
- "step": 82000
1121
- },
1122
- {
1123
- "epoch": 0.36,
1124
- "learning_rate": 6.773206205294097e-05,
1125
- "loss": 0.4419,
1126
- "step": 82500
1127
- },
1128
- {
1129
- "epoch": 0.36,
1130
- "learning_rate": 6.738930567901446e-05,
1131
- "loss": 0.4411,
1132
- "step": 83000
1133
- },
1134
- {
1135
- "epoch": 0.37,
1136
- "learning_rate": 6.704512303767616e-05,
1137
- "loss": 0.4412,
1138
- "step": 83500
1139
- },
1140
- {
1141
- "epoch": 0.37,
1142
- "learning_rate": 6.67002214920013e-05,
1143
- "loss": 0.4416,
1144
- "step": 84000
1145
- },
1146
- {
1147
- "epoch": 0.37,
1148
- "learning_rate": 6.635462231714186e-05,
1149
- "loss": 0.4409,
1150
- "step": 84500
1151
- },
1152
- {
1153
- "epoch": 0.38,
1154
- "learning_rate": 6.600834683128286e-05,
1155
- "loss": 0.44,
1156
- "step": 85000
1157
- },
1158
- {
1159
- "epoch": 0.38,
1160
- "eval_loss": 0.4143606424331665,
1161
- "eval_runtime": 94.0695,
1162
- "eval_samples_per_second": 68.035,
1163
- "eval_steps_per_second": 1.063,
1164
- "step": 85000
1165
- },
1166
- {
1167
- "epoch": 0.38,
1168
- "learning_rate": 6.566141639432736e-05,
1169
- "loss": 0.4393,
1170
- "step": 85500
1171
- },
1172
- {
1173
- "epoch": 0.38,
1174
- "learning_rate": 6.531385240657893e-05,
1175
- "loss": 0.4402,
1176
- "step": 86000
1177
- },
1178
- {
1179
- "epoch": 0.38,
1180
- "learning_rate": 6.496567630742148e-05,
1181
- "loss": 0.4398,
1182
- "step": 86500
1183
- },
1184
- {
1185
- "epoch": 0.39,
1186
- "learning_rate": 6.461690957399686e-05,
1187
- "loss": 0.4387,
1188
- "step": 87000
1189
- },
1190
- {
1191
- "epoch": 0.39,
1192
- "learning_rate": 6.426757371988005e-05,
1193
- "loss": 0.4386,
1194
- "step": 87500
1195
- },
1196
- {
1197
- "epoch": 0.39,
1198
- "learning_rate": 6.391769029375203e-05,
1199
- "loss": 0.4389,
1200
- "step": 88000
1201
- },
1202
- {
1203
- "epoch": 0.39,
1204
- "learning_rate": 6.356728087807062e-05,
1205
- "loss": 0.4389,
1206
- "step": 88500
1207
- },
1208
- {
1209
- "epoch": 0.4,
1210
- "learning_rate": 6.321636708773917e-05,
1211
- "loss": 0.4385,
1212
- "step": 89000
1213
- },
1214
- {
1215
- "epoch": 0.4,
1216
- "learning_rate": 6.28649705687732e-05,
1217
- "loss": 0.4375,
1218
- "step": 89500
1219
- },
1220
- {
1221
- "epoch": 0.4,
1222
- "learning_rate": 6.25138171578181e-05,
1223
- "loss": 0.4368,
1224
- "step": 90000
1225
- },
1226
- {
1227
- "epoch": 0.4,
1228
- "eval_loss": 0.414020299911499,
1229
- "eval_runtime": 77.2468,
1230
- "eval_samples_per_second": 82.851,
1231
- "eval_steps_per_second": 1.295,
1232
- "step": 90000
1233
- },
1234
- {
1235
- "epoch": 0.4,
1236
- "learning_rate": 6.21622261106191e-05,
1237
- "loss": 0.4382,
1238
- "step": 90500
1239
- },
1240
- {
1241
- "epoch": 0.41,
1242
- "learning_rate": 6.180951320007123e-05,
1243
- "loss": 0.4377,
1244
- "step": 91000
1245
- },
1246
- {
1247
- "epoch": 0.41,
1248
- "learning_rate": 6.145640434226352e-05,
1249
- "loss": 0.4375,
1250
- "step": 91500
1251
- },
1252
- {
1253
- "epoch": 0.41,
1254
- "learning_rate": 6.11029213186136e-05,
1255
- "loss": 0.4368,
1256
- "step": 92000
1257
- },
1258
- {
1259
- "epoch": 0.41,
1260
- "learning_rate": 6.074908593361942e-05,
1261
- "loss": 0.4377,
1262
- "step": 92500
1263
- },
1264
- {
1265
- "epoch": 0.41,
1266
- "learning_rate": 6.039633730667286e-05,
1267
- "loss": 0.4373,
1268
- "step": 93000
1269
- },
1270
- {
1271
- "epoch": 0.42,
1272
- "learning_rate": 6.004186388928874e-05,
1273
- "loss": 0.4368,
1274
- "step": 93500
1275
- },
1276
- {
1277
- "epoch": 0.42,
1278
- "learning_rate": 5.968710356158062e-05,
1279
- "loss": 0.4369,
1280
- "step": 94000
1281
- },
1282
- {
1283
- "epoch": 0.42,
1284
- "learning_rate": 5.933207820683662e-05,
1285
- "loss": 0.4369,
1286
- "step": 94500
1287
- },
1288
- {
1289
- "epoch": 0.42,
1290
- "learning_rate": 5.8976809724692915e-05,
1291
- "loss": 0.436,
1292
- "step": 95000
1293
- },
1294
- {
1295
- "epoch": 0.42,
1296
- "eval_loss": 0.41228634119033813,
1297
- "eval_runtime": 76.7198,
1298
- "eval_samples_per_second": 83.42,
1299
- "eval_steps_per_second": 1.303,
1300
- "step": 95000
1301
- },
1302
- {
1303
- "epoch": 0.43,
1304
- "learning_rate": 5.862132002978297e-05,
1305
- "loss": 0.4356,
1306
- "step": 95500
1307
- },
1308
- {
1309
- "epoch": 0.43,
1310
- "learning_rate": 5.826563105038563e-05,
1311
- "loss": 0.4354,
1312
- "step": 96000
1313
- },
1314
- {
1315
- "epoch": 0.43,
1316
- "learning_rate": 5.790976472707261e-05,
1317
- "loss": 0.4357,
1318
- "step": 96500
1319
- },
1320
- {
1321
- "epoch": 0.43,
1322
- "learning_rate": 5.7553743011355034e-05,
1323
- "loss": 0.4352,
1324
- "step": 97000
1325
- },
1326
- {
1327
- "epoch": 0.44,
1328
- "learning_rate": 5.719758786432931e-05,
1329
- "loss": 0.4341,
1330
- "step": 97500
1331
- },
1332
- {
1333
- "epoch": 0.44,
1334
- "learning_rate": 5.684203388517376e-05,
1335
- "loss": 0.4355,
1336
- "step": 98000
1337
- },
1338
- {
1339
- "epoch": 0.44,
1340
- "learning_rate": 5.6485677947424015e-05,
1341
- "loss": 0.4349,
1342
- "step": 98500
1343
- },
1344
- {
1345
- "epoch": 0.45,
1346
- "learning_rate": 5.6129254461650316e-05,
1347
- "loss": 0.4346,
1348
- "step": 99000
1349
- },
1350
- {
1351
- "epoch": 0.45,
1352
- "learning_rate": 5.577278541373216e-05,
1353
- "loss": 0.4344,
1354
- "step": 99500
1355
- },
1356
- {
1357
- "epoch": 0.45,
1358
- "learning_rate": 5.5417718780659065e-05,
1359
- "loss": 0.4355,
1360
- "step": 100000
1361
- },
1362
- {
1363
- "epoch": 0.45,
1364
- "eval_loss": 0.41035330295562744,
1365
- "eval_runtime": 78.542,
1366
- "eval_samples_per_second": 81.485,
1367
- "eval_steps_per_second": 1.273,
1368
- "step": 100000
1369
- },
1370
- {
1371
- "epoch": 0.45,
1372
- "learning_rate": 5.50612245385044e-05,
1373
- "loss": 0.4344,
1374
- "step": 100500
1375
- },
1376
- {
1377
- "epoch": 0.46,
1378
- "learning_rate": 5.470475061532183e-05,
1379
- "loss": 0.4334,
1380
- "step": 101000
1381
- },
1382
- {
1383
- "epoch": 0.46,
1384
- "learning_rate": 5.4348319000102045e-05,
1385
- "loss": 0.4324,
1386
- "step": 101500
1387
- },
1388
- {
1389
- "epoch": 0.46,
1390
- "learning_rate": 5.399195167922605e-05,
1391
- "loss": 0.4329,
1392
- "step": 102000
1393
- },
1394
- {
1395
- "epoch": 0.46,
1396
- "learning_rate": 5.3635670635108804e-05,
1397
- "loss": 0.432,
1398
- "step": 102500
1399
- },
1400
- {
1401
- "epoch": 0.47,
1402
- "learning_rate": 5.327949784484338e-05,
1403
- "loss": 0.4319,
1404
- "step": 103000
1405
- },
1406
- {
1407
- "epoch": 0.47,
1408
- "learning_rate": 5.292345527884516e-05,
1409
- "loss": 0.433,
1410
- "step": 103500
1411
- },
1412
- {
1413
- "epoch": 0.47,
1414
- "learning_rate": 5.256756489949676e-05,
1415
- "loss": 0.432,
1416
- "step": 104000
1417
- },
1418
- {
1419
- "epoch": 0.47,
1420
- "learning_rate": 5.221255990389496e-05,
1421
- "loss": 0.4329,
1422
- "step": 104500
1423
- },
1424
- {
1425
- "epoch": 0.47,
1426
- "learning_rate": 5.185703933203506e-05,
1427
- "loss": 0.4323,
1428
- "step": 105000
1429
- },
1430
- {
1431
- "epoch": 0.47,
1432
- "eval_loss": 0.4076640009880066,
1433
- "eval_runtime": 75.4837,
1434
- "eval_samples_per_second": 84.786,
1435
- "eval_steps_per_second": 1.325,
1436
- "step": 105000
1437
- },
1438
- {
1439
- "epoch": 0.48,
1440
- "learning_rate": 5.1502447101489115e-05,
1441
- "loss": 0.4324,
1442
- "step": 105500
1443
- },
1444
- {
1445
- "epoch": 0.48,
1446
- "learning_rate": 5.114738388115157e-05,
1447
- "loss": 0.4317,
1448
- "step": 106000
1449
- },
1450
- {
1451
- "epoch": 0.48,
1452
- "learning_rate": 5.07925824039137e-05,
1453
- "loss": 0.4314,
1454
- "step": 106500
1455
- },
1456
- {
1457
- "epoch": 0.48,
1458
- "learning_rate": 5.043806455560185e-05,
1459
- "loss": 0.4319,
1460
- "step": 107000
1461
- },
1462
- {
1463
- "epoch": 0.49,
1464
- "learning_rate": 5.008385220454688e-05,
1465
- "loss": 0.4306,
1466
- "step": 107500
1467
- },
1468
- {
1469
- "epoch": 0.49,
1470
- "learning_rate": 4.972996720023507e-05,
1471
- "loss": 0.431,
1472
- "step": 108000
1473
- },
1474
- {
1475
- "epoch": 0.49,
1476
- "learning_rate": 4.93764313719604e-05,
1477
- "loss": 0.4314,
1478
- "step": 108500
1479
- },
1480
- {
1481
- "epoch": 0.49,
1482
- "learning_rate": 4.90232665274781e-05,
1483
- "loss": 0.43,
1484
- "step": 109000
1485
- },
1486
- {
1487
- "epoch": 0.5,
1488
- "learning_rate": 4.867119958936006e-05,
1489
- "loss": 0.4313,
1490
- "step": 109500
1491
- },
1492
- {
1493
- "epoch": 0.5,
1494
- "learning_rate": 4.831884119208918e-05,
1495
- "loss": 0.432,
1496
- "step": 110000
1497
- },
1498
- {
1499
- "epoch": 0.5,
1500
- "eval_loss": 0.40734001994132996,
1501
- "eval_runtime": 72.2567,
1502
- "eval_samples_per_second": 88.573,
1503
- "eval_steps_per_second": 1.384,
1504
- "step": 110000
1505
- },
1506
- {
1507
- "epoch": 0.5,
1508
- "learning_rate": 4.796691901575466e-05,
1509
- "loss": 0.4311,
1510
- "step": 110500
1511
- },
1512
- {
1513
- "epoch": 0.51,
1514
- "learning_rate": 4.7615454768574006e-05,
1515
- "loss": 0.4304,
1516
- "step": 111000
1517
- },
1518
- {
1519
- "epoch": 0.51,
1520
- "learning_rate": 4.72651716067484e-05,
1521
- "loss": 0.4296,
1522
- "step": 111500
1523
- },
1524
- {
1525
- "epoch": 0.51,
1526
- "learning_rate": 4.6914687204096124e-05,
1527
- "loss": 0.4291,
1528
- "step": 112000
1529
- },
1530
- {
1531
- "epoch": 0.51,
1532
- "learning_rate": 4.656472563721235e-05,
1533
- "loss": 0.4291,
1534
- "step": 112500
1535
- },
1536
- {
1537
- "epoch": 0.52,
1538
- "learning_rate": 4.621530849337495e-05,
1539
- "loss": 0.4296,
1540
- "step": 113000
1541
- },
1542
- {
1543
- "epoch": 0.52,
1544
- "learning_rate": 4.586645732627929e-05,
1545
- "loss": 0.4294,
1546
- "step": 113500
1547
- },
1548
- {
1549
- "epoch": 0.52,
1550
- "learning_rate": 4.551888958144626e-05,
1551
- "loss": 0.429,
1552
- "step": 114000
1553
- },
1554
- {
1555
- "epoch": 0.52,
1556
- "learning_rate": 4.517123364857326e-05,
1557
- "loss": 0.429,
1558
- "step": 114500
1559
- },
1560
- {
1561
- "epoch": 0.53,
1562
- "learning_rate": 4.4824208095896454e-05,
1563
- "loss": 0.4279,
1564
- "step": 115000
1565
- },
1566
- {
1567
- "epoch": 0.53,
1568
- "eval_loss": 0.40424978733062744,
1569
- "eval_runtime": 77.1763,
1570
- "eval_samples_per_second": 82.927,
1571
- "eval_steps_per_second": 1.296,
1572
- "step": 115000
1573
- },
1574
- {
1575
- "epoch": 0.53,
1576
- "learning_rate": 4.4477834329586547e-05,
1577
- "loss": 0.4289,
1578
- "step": 115500
1579
- },
1580
- {
1581
- "epoch": 0.53,
1582
- "learning_rate": 4.4132133715609044e-05,
1583
- "loss": 0.4282,
1584
- "step": 116000
1585
- },
1586
- {
1587
- "epoch": 0.53,
1588
- "learning_rate": 4.37878168834403e-05,
1589
- "loss": 0.4276,
1590
- "step": 116500
1591
- },
1592
- {
1593
- "epoch": 0.54,
1594
- "learning_rate": 4.344352505188909e-05,
1595
- "loss": 0.4281,
1596
- "step": 117000
1597
- },
1598
- {
1599
- "epoch": 0.54,
1600
- "learning_rate": 4.309997017373853e-05,
1601
- "loss": 0.4274,
1602
- "step": 117500
1603
- },
1604
- {
1605
- "epoch": 0.54,
1606
- "learning_rate": 4.2757173441071825e-05,
1607
- "loss": 0.4272,
1608
- "step": 118000
1609
- },
1610
- {
1611
- "epoch": 0.54,
1612
- "learning_rate": 4.2415839242326844e-05,
1613
- "loss": 0.4285,
1614
- "step": 118500
1615
- },
1616
- {
1617
- "epoch": 0.55,
1618
- "learning_rate": 4.207462056671048e-05,
1619
- "loss": 0.4266,
1620
- "step": 119000
1621
- },
1622
- {
1623
- "epoch": 0.55,
1624
- "learning_rate": 4.173422328497157e-05,
1625
- "loss": 0.4279,
1626
- "step": 119500
1627
- },
1628
- {
1629
- "epoch": 0.55,
1630
- "learning_rate": 4.139466839441784e-05,
1631
- "loss": 0.4271,
1632
- "step": 120000
1633
- },
1634
- {
1635
- "epoch": 0.55,
1636
- "eval_loss": 0.40308678150177,
1637
- "eval_runtime": 73.5499,
1638
- "eval_samples_per_second": 87.016,
1639
- "eval_steps_per_second": 1.36,
1640
- "step": 120000
1641
- },
1642
- {
1643
- "epoch": 0.55,
1644
- "learning_rate": 4.105597684039436e-05,
1645
- "loss": 0.4254,
1646
- "step": 120500
1647
- },
1648
- {
1649
- "epoch": 0.56,
1650
- "learning_rate": 4.071816951499159e-05,
1651
- "loss": 0.427,
1652
- "step": 121000
1653
- },
1654
- {
1655
- "epoch": 0.56,
1656
- "learning_rate": 4.038126725575657e-05,
1657
- "loss": 0.4251,
1658
- "step": 121500
1659
- },
1660
- {
1661
- "epoch": 0.56,
1662
- "learning_rate": 4.004529084440762e-05,
1663
- "loss": 0.425,
1664
- "step": 122000
1665
- },
1666
- {
1667
- "epoch": 0.56,
1668
- "learning_rate": 3.971026100555235e-05,
1669
- "loss": 0.4257,
1670
- "step": 122500
1671
- },
1672
- {
1673
- "epoch": 0.56,
1674
- "learning_rate": 3.937619840540937e-05,
1675
- "loss": 0.4258,
1676
- "step": 123000
1677
- },
1678
- {
1679
- "epoch": 0.57,
1680
- "learning_rate": 3.9043123650533415e-05,
1681
- "loss": 0.4255,
1682
- "step": 123500
1683
- },
1684
- {
1685
- "epoch": 0.57,
1686
- "learning_rate": 3.8711720399273034e-05,
1687
- "loss": 0.4266,
1688
- "step": 124000
1689
- },
1690
- {
1691
- "epoch": 0.57,
1692
- "learning_rate": 3.8380680831439195e-05,
1693
- "loss": 0.4243,
1694
- "step": 124500
1695
- },
1696
- {
1697
- "epoch": 0.57,
1698
- "learning_rate": 3.805069051708565e-05,
1699
- "loss": 0.4253,
1700
- "step": 125000
1701
- },
1702
- {
1703
- "epoch": 0.57,
1704
- "eval_loss": 0.40229514241218567,
1705
- "eval_runtime": 73.6239,
1706
- "eval_samples_per_second": 86.928,
1707
- "eval_steps_per_second": 1.358,
1708
- "step": 125000
1709
- },
1710
- {
1711
- "epoch": 0.58,
1712
- "learning_rate": 3.772176981156942e-05,
1713
- "loss": 0.4247,
1714
- "step": 125500
1715
- },
1716
- {
1717
- "epoch": 0.58,
1718
- "learning_rate": 3.7394593564712783e-05,
1719
- "loss": 0.4257,
1720
- "step": 126000
1721
- },
1722
- {
1723
- "epoch": 0.58,
1724
- "learning_rate": 3.7068522961987034e-05,
1725
- "loss": 0.4246,
1726
- "step": 126500
1727
- },
1728
- {
1729
- "epoch": 0.58,
1730
- "learning_rate": 3.6742927987935615e-05,
1731
- "loss": 0.4236,
1732
- "step": 127000
1733
- },
1734
- {
1735
- "epoch": 0.59,
1736
- "learning_rate": 3.6418483291674376e-05,
1737
- "loss": 0.4246,
1738
- "step": 127500
1739
- },
1740
- {
1741
- "epoch": 0.59,
1742
- "learning_rate": 3.6095208886480486e-05,
1743
- "loss": 0.4235,
1744
- "step": 128000
1745
- },
1746
- {
1747
- "epoch": 0.59,
1748
- "learning_rate": 3.577312471344201e-05,
1749
- "loss": 0.4242,
1750
- "step": 128500
1751
- },
1752
- {
1753
- "epoch": 0.59,
1754
- "learning_rate": 3.545225064022787e-05,
1755
- "loss": 0.4229,
1756
- "step": 129000
1757
- },
1758
- {
1759
- "epoch": 0.6,
1760
- "learning_rate": 3.513324450767193e-05,
1761
- "loss": 0.4239,
1762
- "step": 129500
1763
- },
1764
- {
1765
- "epoch": 0.6,
1766
- "learning_rate": 3.481484741846708e-05,
1767
- "loss": 0.4234,
1768
- "step": 130000
1769
- },
1770
- {
1771
- "epoch": 0.6,
1772
- "eval_loss": 0.39831221103668213,
1773
- "eval_runtime": 80.2443,
1774
- "eval_samples_per_second": 79.756,
1775
- "eval_steps_per_second": 1.246,
1776
- "step": 130000
1777
- },
1778
- {
1779
- "epoch": 0.6,
1780
- "learning_rate": 3.449771954014401e-05,
1781
- "loss": 0.4239,
1782
- "step": 130500
1783
- },
1784
- {
1785
- "epoch": 0.6,
1786
- "learning_rate": 3.418188043464409e-05,
1787
- "loss": 0.4227,
1788
- "step": 131000
1789
- },
1790
- {
1791
- "epoch": 0.61,
1792
- "learning_rate": 3.386734958441104e-05,
1793
- "loss": 0.4224,
1794
- "step": 131500
1795
- },
1796
- {
1797
- "epoch": 0.61,
1798
- "learning_rate": 3.355414639118934e-05,
1799
- "loss": 0.4226,
1800
- "step": 132000
1801
- },
1802
- {
1803
- "epoch": 0.61,
1804
- "learning_rate": 3.3242290174827205e-05,
1805
- "loss": 0.423,
1806
- "step": 132500
1807
- },
1808
- {
1809
- "epoch": 0.61,
1810
- "learning_rate": 3.293241977586518e-05,
1811
- "loss": 0.4227,
1812
- "step": 133000
1813
- },
1814
- {
1815
- "epoch": 0.62,
1816
- "learning_rate": 3.2623312349439135e-05,
1817
- "loss": 0.4214,
1818
- "step": 133500
1819
- },
1820
- {
1821
- "epoch": 0.62,
1822
- "learning_rate": 3.231560931810103e-05,
1823
- "loss": 0.4228,
1824
- "step": 134000
1825
- },
1826
- {
1827
- "epoch": 0.62,
1828
- "learning_rate": 3.200932966242318e-05,
1829
- "loss": 0.4216,
1830
- "step": 134500
1831
- },
1832
- {
1833
- "epoch": 0.62,
1834
- "learning_rate": 3.17044922751775e-05,
1835
- "loss": 0.4234,
1836
- "step": 135000
1837
- },
1838
- {
1839
- "epoch": 0.62,
1840
- "eval_loss": 0.39721038937568665,
1841
- "eval_runtime": 79.9205,
1842
- "eval_samples_per_second": 80.08,
1843
- "eval_steps_per_second": 1.251,
1844
- "step": 135000
1845
- },
1846
- {
1847
- "epoch": 0.63,
1848
- "learning_rate": 3.140111596016993e-05,
1849
- "loss": 0.4214,
1850
- "step": 135500
1851
- },
1852
- {
1853
- "epoch": 0.63,
1854
- "learning_rate": 3.109921943108062e-05,
1855
- "loss": 0.421,
1856
- "step": 136000
1857
- },
1858
- {
1859
- "epoch": 0.63,
1860
- "learning_rate": 3.0798821310309536e-05,
1861
- "loss": 0.4215,
1862
- "step": 136500
1863
- },
1864
- {
1865
- "epoch": 0.64,
1866
- "learning_rate": 3.0500536364018534e-05,
1867
- "loss": 0.422,
1868
- "step": 137000
1869
- },
1870
- {
1871
- "epoch": 0.64,
1872
- "learning_rate": 3.0203187467139138e-05,
1873
- "loss": 0.422,
1874
- "step": 137500
1875
- },
1876
- {
1877
- "epoch": 0.64,
1878
- "learning_rate": 2.9907392250050176e-05,
1879
- "loss": 0.4217,
1880
- "step": 138000
1881
- },
1882
- {
1883
- "epoch": 0.64,
1884
- "learning_rate": 2.9613168958794007e-05,
1885
- "loss": 0.4217,
1886
- "step": 138500
1887
- },
1888
- {
1889
- "epoch": 0.65,
1890
- "learning_rate": 2.9321119409972645e-05,
1891
- "loss": 0.4208,
1892
- "step": 139000
1893
- },
1894
- {
1895
- "epoch": 0.65,
1896
- "learning_rate": 2.903009108533329e-05,
1897
- "loss": 0.4203,
1898
- "step": 139500
1899
- },
1900
- {
1901
- "epoch": 0.65,
1902
- "learning_rate": 2.8740688802594957e-05,
1903
- "loss": 0.4201,
1904
- "step": 140000
1905
- },
1906
- {
1907
- "epoch": 0.65,
1908
- "eval_loss": 0.3973109722137451,
1909
- "eval_runtime": 83.2966,
1910
- "eval_samples_per_second": 76.834,
1911
- "eval_steps_per_second": 1.201,
1912
- "step": 140000
1913
- },
1914
- {
1915
- "epoch": 0.65,
1916
- "learning_rate": 2.8452930413453704e-05,
1917
- "loss": 0.42,
1918
- "step": 140500
1919
- },
1920
- {
1921
- "epoch": 0.66,
1922
- "learning_rate": 2.8166833668202425e-05,
1923
- "loss": 0.4196,
1924
- "step": 141000
1925
- },
1926
- {
1927
- "epoch": 0.66,
1928
- "learning_rate": 2.7882416214635994e-05,
1929
- "loss": 0.4211,
1930
- "step": 141500
1931
- },
1932
- {
1933
- "epoch": 0.66,
1934
- "learning_rate": 2.760025933314676e-05,
1935
- "loss": 0.4198,
1936
- "step": 142000
1937
- },
1938
- {
1939
- "epoch": 0.66,
1940
- "learning_rate": 2.7319249545020577e-05,
1941
- "loss": 0.4195,
1942
- "step": 142500
1943
- },
1944
- {
1945
- "epoch": 0.67,
1946
- "learning_rate": 2.7039971331560428e-05,
1947
- "loss": 0.4188,
1948
- "step": 143000
1949
- },
1950
- {
1951
- "epoch": 0.67,
1952
- "learning_rate": 2.676244191996206e-05,
1953
- "loss": 0.4185,
1954
- "step": 143500
1955
- },
1956
- {
1957
- "epoch": 0.67,
1958
- "learning_rate": 2.6486678429546845e-05,
1959
- "loss": 0.4189,
1960
- "step": 144000
1961
- },
1962
- {
1963
- "epoch": 0.67,
1964
- "learning_rate": 2.6212697870705843e-05,
1965
- "loss": 0.4192,
1966
- "step": 144500
1967
- },
1968
- {
1969
- "epoch": 0.68,
1970
- "learning_rate": 2.5940517143850467e-05,
1971
- "loss": 0.4198,
1972
- "step": 145000
1973
- },
1974
- {
1975
- "epoch": 0.68,
1976
- "eval_loss": 0.3946038782596588,
1977
- "eval_runtime": 72.5985,
1978
- "eval_samples_per_second": 88.156,
1979
- "eval_steps_per_second": 1.377,
1980
- "step": 145000
1981
- },
1982
- {
1983
- "epoch": 0.68,
1984
- "learning_rate": 2.5670153038370093e-05,
1985
- "loss": 0.4191,
1986
- "step": 145500
1987
- },
1988
- {
1989
- "epoch": 0.68,
1990
- "learning_rate": 2.540162223159619e-05,
1991
- "loss": 0.4196,
1992
- "step": 146000
1993
- },
1994
- {
1995
- "epoch": 0.68,
1996
- "learning_rate": 2.5135472792545317e-05,
1997
- "loss": 0.418,
1998
- "step": 146500
1999
- },
2000
- {
2001
- "epoch": 0.69,
2002
- "learning_rate": 2.4871182176179198e-05,
2003
- "loss": 0.4186,
2004
- "step": 147000
2005
- },
2006
- {
2007
- "epoch": 0.69,
2008
- "learning_rate": 2.4608242630569237e-05,
2009
- "loss": 0.4176,
2010
- "step": 147500
2011
- },
2012
- {
2013
- "epoch": 0.69,
2014
- "learning_rate": 2.4347201887301807e-05,
2015
- "loss": 0.4187,
2016
- "step": 148000
2017
- },
2018
- {
2019
- "epoch": 0.69,
2020
- "learning_rate": 2.4088076048599544e-05,
2021
- "loss": 0.4168,
2022
- "step": 148500
2023
- },
2024
- {
2025
- "epoch": 0.69,
2026
- "learning_rate": 2.3830881098564776e-05,
2027
- "loss": 0.4182,
2028
- "step": 149000
2029
- },
2030
- {
2031
- "epoch": 0.7,
2032
- "learning_rate": 2.3575632902193518e-05,
2033
- "loss": 0.4168,
2034
- "step": 149500
2035
- },
2036
- {
2037
- "epoch": 0.7,
2038
- "learning_rate": 2.3322851806813925e-05,
2039
- "loss": 0.4169,
2040
- "step": 150000
2041
- },
2042
- {
2043
- "epoch": 0.7,
2044
- "eval_loss": 0.39588433504104614,
2045
- "eval_runtime": 76.1104,
2046
- "eval_samples_per_second": 84.088,
2047
- "eval_steps_per_second": 1.314,
2048
- "step": 150000
2049
- },
2050
- {
2051
- "epoch": 0.7,
2052
- "learning_rate": 2.307154025969024e-05,
2053
- "loss": 0.4173,
2054
- "step": 150500
2055
- },
2056
- {
2057
- "epoch": 0.7,
2058
- "learning_rate": 2.282222230594981e-05,
2059
- "loss": 0.4183,
2060
- "step": 151000
2061
- },
2062
- {
2063
- "epoch": 0.71,
2064
- "learning_rate": 2.2574913324698427e-05,
2065
- "loss": 0.4166,
2066
- "step": 151500
2067
- },
2068
- {
2069
- "epoch": 0.71,
2070
- "learning_rate": 2.2329628571118985e-05,
2071
- "loss": 0.417,
2072
- "step": 152000
2073
- },
2074
- {
2075
- "epoch": 0.71,
2076
- "learning_rate": 2.2086383175530457e-05,
2077
- "loss": 0.4175,
2078
- "step": 152500
2079
- },
2080
- {
2081
- "epoch": 0.71,
2082
- "learning_rate": 2.1845672464357035e-05,
2083
- "loss": 0.416,
2084
- "step": 153000
2085
- },
2086
- {
2087
- "epoch": 0.72,
2088
- "learning_rate": 2.1606546518349025e-05,
2089
- "loss": 0.4167,
2090
- "step": 153500
2091
- },
2092
- {
2093
- "epoch": 0.72,
2094
- "learning_rate": 2.1369504533438948e-05,
2095
- "loss": 0.4154,
2096
- "step": 154000
2097
- },
2098
- {
2099
- "epoch": 0.72,
2100
- "learning_rate": 2.1134561131493055e-05,
2101
- "loss": 0.4167,
2102
- "step": 154500
2103
- },
2104
- {
2105
- "epoch": 0.72,
2106
- "learning_rate": 2.0901730804927037e-05,
2107
- "loss": 0.4145,
2108
- "step": 155000
2109
- },
2110
- {
2111
- "epoch": 0.72,
2112
- "eval_loss": 0.39164599776268005,
2113
- "eval_runtime": 74.2853,
2114
- "eval_samples_per_second": 86.154,
2115
- "eval_steps_per_second": 1.346,
2116
- "step": 155000
2117
- },
2118
- {
2119
- "epoch": 0.73,
2120
- "learning_rate": 2.0671027915812168e-05,
2121
- "loss": 0.4167,
2122
- "step": 155500
2123
- },
2124
- {
2125
- "epoch": 0.73,
2126
- "learning_rate": 2.0442466694989354e-05,
2127
- "loss": 0.4164,
2128
- "step": 156000
2129
- },
2130
- {
2131
- "epoch": 0.73,
2132
- "learning_rate": 2.0216061241191315e-05,
2133
- "loss": 0.4164,
2134
- "step": 156500
2135
- },
2136
- {
2137
- "epoch": 0.73,
2138
- "learning_rate": 1.999227181700563e-05,
2139
- "loss": 0.417,
2140
- "step": 157000
2141
- },
2142
- {
2143
- "epoch": 0.74,
2144
- "learning_rate": 1.9770215279838127e-05,
2145
- "loss": 0.4144,
2146
- "step": 157500
2147
- },
2148
- {
2149
- "epoch": 0.74,
2150
- "learning_rate": 1.9550355977329253e-05,
2151
- "loss": 0.4154,
2152
- "step": 158000
2153
- },
2154
- {
2155
- "epoch": 0.74,
2156
- "learning_rate": 1.9333140553126862e-05,
2157
- "loss": 0.4161,
2158
- "step": 158500
2159
- },
2160
- {
2161
- "epoch": 0.74,
2162
- "learning_rate": 1.9117711807683115e-05,
2163
- "loss": 0.416,
2164
- "step": 159000
2165
- },
2166
- {
2167
- "epoch": 0.75,
2168
- "learning_rate": 1.8904520546385717e-05,
2169
- "loss": 0.4156,
2170
- "step": 159500
2171
- },
2172
- {
2173
- "epoch": 0.75,
2174
- "learning_rate": 1.8693579919875927e-05,
2175
- "loss": 0.4152,
2176
- "step": 160000
2177
- },
2178
- {
2179
- "epoch": 0.75,
2180
- "eval_loss": 0.39489009976387024,
2181
- "eval_runtime": 75.2352,
2182
- "eval_samples_per_second": 85.067,
2183
- "eval_steps_per_second": 1.329,
2184
- "step": 160000
2185
- },
2186
- {
2187
- "epoch": 0.75,
2188
- "learning_rate": 1.848490293996521e-05,
2189
- "loss": 0.4146,
2190
- "step": 160500
2191
- },
2192
- {
2193
- "epoch": 0.76,
2194
- "learning_rate": 1.8278502478832604e-05,
2195
- "loss": 0.4147,
2196
- "step": 161000
2197
- },
2198
- {
2199
- "epoch": 0.76,
2200
- "learning_rate": 1.807439126823082e-05,
2201
- "loss": 0.4155,
2202
- "step": 161500
2203
- },
2204
- {
2205
- "epoch": 0.76,
2206
- "learning_rate": 1.7872983211904567e-05,
2207
- "loss": 0.4134,
2208
- "step": 162000
2209
- },
2210
- {
2211
- "epoch": 0.76,
2212
- "learning_rate": 1.7673483491090614e-05,
2213
- "loss": 0.4148,
2214
- "step": 162500
2215
- },
2216
- {
2217
- "epoch": 0.77,
2218
- "learning_rate": 1.7476310341228535e-05,
2219
- "loss": 0.4141,
2220
- "step": 163000
2221
- },
2222
- {
2223
- "epoch": 0.77,
2224
- "learning_rate": 1.7281863251651327e-05,
2225
- "loss": 0.4151,
2226
- "step": 163500
2227
- },
2228
- {
2229
- "epoch": 0.77,
2230
- "learning_rate": 1.7089374873733182e-05,
2231
- "loss": 0.4142,
2232
- "step": 164000
2233
- },
2234
- {
2235
- "epoch": 0.77,
2236
- "learning_rate": 1.689924909733782e-05,
2237
- "loss": 0.4131,
2238
- "step": 164500
2239
- },
2240
- {
2241
- "epoch": 0.78,
2242
- "learning_rate": 1.6711497650318706e-05,
2243
- "loss": 0.4132,
2244
- "step": 165000
2245
- },
2246
- {
2247
- "epoch": 0.78,
2248
- "eval_loss": 0.38981765508651733,
2249
- "eval_runtime": 75.5186,
2250
- "eval_samples_per_second": 84.747,
2251
- "eval_steps_per_second": 1.324,
2252
- "step": 165000
2253
- },
2254
- {
2255
- "epoch": 0.78,
2256
- "learning_rate": 1.6526132114069465e-05,
2257
- "loss": 0.4135,
2258
- "step": 165500
2259
- },
2260
- {
2261
- "epoch": 0.78,
2262
- "learning_rate": 1.6343163922809553e-05,
2263
- "loss": 0.4139,
2264
- "step": 166000
2265
- },
2266
- {
2267
- "epoch": 0.78,
2268
- "learning_rate": 1.6162604362878833e-05,
2269
- "loss": 0.4133,
2270
- "step": 166500
2271
- },
2272
- {
2273
- "epoch": 0.79,
2274
- "learning_rate": 1.59844645720415e-05,
2275
- "loss": 0.4146,
2276
- "step": 167000
2277
- },
2278
- {
2279
- "epoch": 0.79,
2280
- "learning_rate": 1.5808755538798932e-05,
2281
- "loss": 0.4135,
2282
- "step": 167500
2283
- },
2284
- {
2285
- "epoch": 0.79,
2286
- "learning_rate": 1.5635488101711998e-05,
2287
- "loss": 0.4121,
2288
- "step": 168000
2289
- },
2290
- {
2291
- "epoch": 0.79,
2292
- "learning_rate": 1.5464672948732363e-05,
2293
- "loss": 0.4136,
2294
- "step": 168500
2295
- },
2296
- {
2297
- "epoch": 0.8,
2298
- "learning_rate": 1.5296320616543277e-05,
2299
- "loss": 0.4139,
2300
- "step": 169000
2301
- },
2302
- {
2303
- "epoch": 0.8,
2304
- "learning_rate": 1.5130770773077236e-05,
2305
- "loss": 0.4132,
2306
- "step": 169500
2307
- },
2308
- {
2309
- "epoch": 0.8,
2310
- "learning_rate": 1.4967370107219309e-05,
2311
- "loss": 0.4134,
2312
- "step": 170000
2313
- },
2314
- {
2315
- "epoch": 0.8,
2316
- "eval_loss": 0.39251405000686646,
2317
- "eval_runtime": 74.7186,
2318
- "eval_samples_per_second": 85.655,
2319
- "eval_steps_per_second": 1.338,
2320
- "step": 170000
2321
- },
2322
- {
2323
- "epoch": 0.8,
2324
- "learning_rate": 1.480678225733925e-05,
2325
- "loss": 0.4127,
2326
- "step": 170500
2327
- },
2328
- {
2329
- "epoch": 0.81,
2330
- "learning_rate": 1.4648373493890402e-05,
2331
- "loss": 0.4127,
2332
- "step": 171000
2333
- },
2334
- {
2335
- "epoch": 0.81,
2336
- "learning_rate": 1.4492477904427582e-05,
2337
- "loss": 0.413,
2338
- "step": 171500
2339
- },
2340
- {
2341
- "epoch": 0.81,
2342
- "learning_rate": 1.4339105105325088e-05,
2343
- "loss": 0.4122,
2344
- "step": 172000
2345
- },
2346
- {
2347
- "epoch": 0.81,
2348
- "learning_rate": 1.4188264557339635e-05,
2349
- "loss": 0.4122,
2350
- "step": 172500
2351
- },
2352
- {
2353
- "epoch": 0.81,
2354
- "learning_rate": 1.403996556502672e-05,
2355
- "loss": 0.413,
2356
- "step": 173000
2357
- },
2358
- {
2359
- "epoch": 0.82,
2360
- "learning_rate": 1.3894506221140526e-05,
2361
- "loss": 0.4122,
2362
- "step": 173500
2363
- },
2364
- {
2365
- "epoch": 0.82,
2366
- "learning_rate": 1.3751312497919097e-05,
2367
- "loss": 0.4114,
2368
- "step": 174000
2369
- },
2370
- {
2371
- "epoch": 0.82,
2372
- "learning_rate": 1.3610687283631496e-05,
2373
- "loss": 0.4119,
2374
- "step": 174500
2375
- },
2376
- {
2377
- "epoch": 0.82,
2378
- "learning_rate": 1.3472639252703293e-05,
2379
- "loss": 0.4132,
2380
- "step": 175000
2381
- },
2382
- {
2383
- "epoch": 0.82,
2384
- "eval_loss": 0.38679200410842896,
2385
- "eval_runtime": 75.015,
2386
- "eval_samples_per_second": 85.316,
2387
- "eval_steps_per_second": 1.333,
2388
- "step": 175000
2389
- },
2390
- {
2391
- "epoch": 0.83,
2392
- "learning_rate": 1.3337176920587338e-05,
2393
- "loss": 0.4101,
2394
- "step": 175500
2395
- },
2396
- {
2397
- "epoch": 0.83,
2398
- "learning_rate": 1.3204308643238348e-05,
2399
- "loss": 0.4124,
2400
- "step": 176000
2401
- },
2402
- {
2403
- "epoch": 0.83,
2404
- "learning_rate": 1.307404261659754e-05,
2405
- "loss": 0.4109,
2406
- "step": 176500
2407
- },
2408
- {
2409
- "epoch": 0.83,
2410
- "learning_rate": 1.2946386876087037e-05,
2411
- "loss": 0.4118,
2412
- "step": 177000
2413
- },
2414
- {
2415
- "epoch": 0.84,
2416
- "learning_rate": 1.2821596753196486e-05,
2417
- "loss": 0.4113,
2418
- "step": 177500
2419
- },
2420
- {
2421
- "epoch": 0.84,
2422
- "learning_rate": 1.2699179787331507e-05,
2423
- "loss": 0.4123,
2424
- "step": 178000
2425
- },
2426
- {
2427
- "epoch": 0.84,
2428
- "learning_rate": 1.2579396230901889e-05,
2429
- "loss": 0.4116,
2430
- "step": 178500
2431
- },
2432
- {
2433
- "epoch": 0.84,
2434
- "learning_rate": 1.2462253472721652e-05,
2435
- "loss": 0.4119,
2436
- "step": 179000
2437
- },
2438
- {
2439
- "epoch": 0.85,
2440
- "learning_rate": 1.2347758738707905e-05,
2441
- "loss": 0.4109,
2442
- "step": 179500
2443
- },
2444
- {
2445
- "epoch": 0.85,
2446
- "learning_rate": 1.2236140116340411e-05,
2447
- "loss": 0.4114,
2448
- "step": 180000
2449
- },
2450
- {
2451
- "epoch": 0.85,
2452
- "eval_loss": 0.3862825036048889,
2453
- "eval_runtime": 80.5174,
2454
- "eval_samples_per_second": 79.486,
2455
- "eval_steps_per_second": 1.242,
2456
- "step": 180000
2457
- },
2458
- {
2459
- "epoch": 0.85,
2460
- "learning_rate": 1.2126957123857906e-05,
2461
- "loss": 0.4123,
2462
- "step": 180500
2463
- },
2464
- {
2465
- "epoch": 0.85,
2466
- "learning_rate": 1.2020442838200083e-05,
2467
- "loss": 0.413,
2468
- "step": 181000
2469
- },
2470
- {
2471
- "epoch": 0.86,
2472
- "learning_rate": 1.191660382966979e-05,
2473
- "loss": 0.4102,
2474
- "step": 181500
2475
- },
2476
- {
2477
- "epoch": 0.86,
2478
- "learning_rate": 1.1815446503546215e-05,
2479
- "loss": 0.411,
2480
- "step": 182000
2481
- },
2482
- {
2483
- "epoch": 0.86,
2484
- "learning_rate": 1.1716977099689715e-05,
2485
- "loss": 0.4111,
2486
- "step": 182500
2487
- },
2488
- {
2489
- "epoch": 0.86,
2490
- "learning_rate": 1.1621390550409203e-05,
2491
- "loss": 0.4125,
2492
- "step": 183000
2493
- },
2494
- {
2495
- "epoch": 0.87,
2496
- "learning_rate": 1.1528309641484633e-05,
2497
- "loss": 0.4123,
2498
- "step": 183500
2499
- },
2500
- {
2501
- "epoch": 0.87,
2502
- "learning_rate": 1.143793436678133e-05,
2503
- "loss": 0.4106,
2504
- "step": 184000
2505
- },
2506
- {
2507
- "epoch": 0.87,
2508
- "learning_rate": 1.1350442919794692e-05,
2509
- "loss": 0.4111,
2510
- "step": 184500
2511
- },
2512
- {
2513
- "epoch": 0.88,
2514
- "learning_rate": 1.1265490032093274e-05,
2515
- "loss": 0.4113,
2516
- "step": 185000
2517
- },
2518
- {
2519
- "epoch": 0.88,
2520
- "eval_loss": 0.3847789466381073,
2521
- "eval_runtime": 82.908,
2522
- "eval_samples_per_second": 77.194,
2523
- "eval_steps_per_second": 1.206,
2524
- "step": 185000
2525
- },
2526
- {
2527
- "epoch": 0.88,
2528
- "learning_rate": 1.1183258990564632e-05,
2529
- "loss": 0.4101,
2530
- "step": 185500
2531
- },
2532
- {
2533
- "epoch": 0.88,
2534
- "learning_rate": 1.1103754867606732e-05,
2535
- "loss": 0.4111,
2536
- "step": 186000
2537
- },
2538
- {
2539
- "epoch": 0.88,
2540
- "learning_rate": 1.1026982567408383e-05,
2541
- "loss": 0.4104,
2542
- "step": 186500
2543
- },
2544
- {
2545
- "epoch": 0.89,
2546
- "learning_rate": 1.0953092162981306e-05,
2547
- "loss": 0.4108,
2548
- "step": 187000
2549
- },
2550
- {
2551
- "epoch": 0.89,
2552
- "learning_rate": 1.0881931921454253e-05,
2553
- "loss": 0.41,
2554
- "step": 187500
2555
- },
2556
- {
2557
- "epoch": 0.89,
2558
- "learning_rate": 1.0813371837471815e-05,
2559
- "loss": 0.4103,
2560
- "step": 188000
2561
- },
2562
- {
2563
- "epoch": 0.89,
2564
- "learning_rate": 1.0747561488442046e-05,
2565
- "loss": 0.4101,
2566
- "step": 188500
2567
- },
2568
- {
2569
- "epoch": 0.9,
2570
- "learning_rate": 1.0684504933857279e-05,
2571
- "loss": 0.4104,
2572
- "step": 189000
2573
- },
2574
- {
2575
- "epoch": 0.9,
2576
- "learning_rate": 1.0624206063342817e-05,
2577
- "loss": 0.4108,
2578
- "step": 189500
2579
- },
2580
- {
2581
- "epoch": 0.9,
2582
- "learning_rate": 1.056666859641703e-05,
2583
- "loss": 0.4101,
2584
- "step": 190000
2585
- },
2586
- {
2587
- "epoch": 0.9,
2588
- "eval_loss": 0.3863581717014313,
2589
- "eval_runtime": 76.6555,
2590
- "eval_samples_per_second": 83.49,
2591
- "eval_steps_per_second": 1.305,
2592
- "step": 190000
2593
- },
2594
- {
2595
- "epoch": 0.9,
2596
- "learning_rate": 1.0511896082261898e-05,
2597
- "loss": 0.4103,
2598
- "step": 190500
2599
- },
2600
- {
2601
- "epoch": 0.91,
2602
- "learning_rate": 1.0459891899504107e-05,
2603
- "loss": 0.4098,
2604
- "step": 191000
2605
- },
2606
- {
2607
- "epoch": 0.91,
2608
- "learning_rate": 1.0410659256006602e-05,
2609
- "loss": 0.4087,
2610
- "step": 191500
2611
- },
2612
- {
2613
- "epoch": 0.91,
2614
- "learning_rate": 1.0364201188670747e-05,
2615
- "loss": 0.4101,
2616
- "step": 192000
2617
- },
2618
- {
2619
- "epoch": 0.91,
2620
- "learning_rate": 1.0320520563248961e-05,
2621
- "loss": 0.4103,
2622
- "step": 192500
2623
- },
2624
- {
2625
- "epoch": 0.92,
2626
- "learning_rate": 1.0279699098864594e-05,
2627
- "loss": 0.4099,
2628
- "step": 193000
2629
- },
2630
- {
2631
- "epoch": 0.92,
2632
- "learning_rate": 1.024157570133687e-05,
2633
- "loss": 0.4091,
2634
- "step": 193500
2635
- },
2636
- {
2637
- "epoch": 0.92,
2638
- "learning_rate": 1.0206237309840898e-05,
2639
- "loss": 0.4092,
2640
- "step": 194000
2641
- },
2642
- {
2643
- "epoch": 0.92,
2644
- "learning_rate": 1.0173686104215128e-05,
2645
- "loss": 0.4102,
2646
- "step": 194500
2647
- },
2648
- {
2649
- "epoch": 0.93,
2650
- "learning_rate": 1.0143924092371245e-05,
2651
- "loss": 0.4088,
2652
- "step": 195000
2653
- },
2654
- {
2655
- "epoch": 0.93,
2656
- "eval_loss": 0.38611817359924316,
2657
- "eval_runtime": 81.3319,
2658
- "eval_samples_per_second": 78.69,
2659
- "eval_steps_per_second": 1.23,
2660
- "step": 195000
2661
- },
2662
- {
2663
- "epoch": 0.93,
2664
- "learning_rate": 1.011695311017034e-05,
2665
- "loss": 0.4091,
2666
- "step": 195500
2667
- },
2668
- {
2669
- "epoch": 0.93,
2670
- "learning_rate": 1.0092820389759403e-05,
2671
- "loss": 0.4099,
2672
- "step": 196000
2673
- },
2674
- {
2675
- "epoch": 0.93,
2676
- "learning_rate": 1.0071430695926197e-05,
2677
- "loss": 0.4091,
2678
- "step": 196500
2679
- },
2680
- {
2681
- "epoch": 0.94,
2682
- "learning_rate": 1.0052836503470134e-05,
2683
- "loss": 0.4093,
2684
- "step": 197000
2685
- },
2686
- {
2687
- "epoch": 0.94,
2688
- "learning_rate": 1.003703895936859e-05,
2689
- "loss": 0.4093,
2690
- "step": 197500
2691
- },
2692
- {
2693
- "epoch": 0.94,
2694
- "learning_rate": 1.002406224534175e-05,
2695
- "loss": 0.4082,
2696
- "step": 198000
2697
- },
2698
- {
2699
- "epoch": 0.94,
2700
- "learning_rate": 1.0013872772159007e-05,
2701
- "loss": 0.4105,
2702
- "step": 198500
2703
- },
2704
- {
2705
- "epoch": 0.94,
2706
- "learning_rate": 1.0006459132350263e-05,
2707
- "loss": 0.409,
2708
- "step": 199000
2709
- },
2710
- {
2711
- "epoch": 0.95,
2712
- "learning_rate": 1.0001845001670741e-05,
2713
- "loss": 0.4096,
2714
- "step": 199500
2715
- },
2716
- {
2717
- "epoch": 0.95,
2718
- "learning_rate": 1.000003066474176e-05,
2719
- "loss": 0.4099,
2720
- "step": 200000
2721
- },
2722
- {
2723
- "epoch": 0.95,
2724
- "eval_loss": 0.3822266459465027,
2725
- "eval_runtime": 77.6338,
2726
- "eval_samples_per_second": 82.438,
2727
- "eval_steps_per_second": 1.288,
2728
- "step": 200000
2729
- }
2730
- ],
2731
- "max_steps": 200000,
2732
- "num_train_epochs": 9223372036854775807,
2733
- "total_flos": 4.709861347295232e+21,
2734
- "trial_name": null,
2735
- "trial_params": null
2736
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:92fe998606f51cfbdaeade2ff6ed2dfa530ab49cf5bec297af470359fd69c4bc
3
- size 5551