boumehdi commited on
Commit
9d3c66b
·
1 Parent(s): 55e7045

Upload 9 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +24 -864
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5a9e65ddc813ff1ae693f2253104fc1adecbbf5df46b448937bccb9492735ad
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49165c4ebf699f646d1ee8dd29b4f653a39fed3e54794548e77985e4ee0be462
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fd74eeb653891d4e14d34884c603b0c9994fdb00ea8028e5c413b78cfe559d0
3
  size 1262168365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:680b1c63ef60208d52375170b4bca21c48bf4e869fba3d92686679e814810c15
3
  size 1262168365
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc18e8a150b018c46bd46562aa5a0ee60953fb10ab966c334d972199dbfcd87e
3
  size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35a38b2fb8837dad93900200eb443f5a115ca4b6d25b71a4f159eee52f1211c6
3
  size 14639
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43834ac5197f140a7b873f304e5508b173a1d381b18f877ba9ad73867f38e7ad
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efafd90182e3d39d1b7c4a686f86e5913f5abc094dc3e2f827a6d479c6cef247
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc84086bb1f18591932e6528ee39cbfdfeefaa2127e9c49d971ac25093b03632
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c76c6a7e6c643ec04de43f5e4cf627377d8989a6ae9dc6ff8a7d93215077cc3
3
  size 627
trainer_state.json CHANGED
@@ -1,901 +1,61 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 49.9989417989418,
5
- "global_step": 5900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.85,
12
- "learning_rate": 0.0003,
13
- "loss": 0.0549,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 0.85,
18
- "eval_loss": 0.3047053813934326,
19
- "eval_runtime": 213.654,
20
- "eval_samples_per_second": 15.778,
21
- "eval_steps_per_second": 1.975,
22
- "eval_wer": 0.26644545348701826,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 1.69,
27
- "learning_rate": 0.00029974554707379135,
28
- "loss": 0.0916,
29
  "step": 200
30
  },
31
  {
32
  "epoch": 1.69,
33
- "eval_loss": 0.3038010597229004,
34
- "eval_runtime": 151.8957,
35
- "eval_samples_per_second": 22.193,
36
- "eval_steps_per_second": 2.778,
37
- "eval_wer": 0.26597974152986376,
38
  "step": 200
39
  },
40
  {
41
  "epoch": 2.54,
42
- "learning_rate": 0.00029949109414758267,
43
- "loss": 0.0917,
44
  "step": 300
45
  },
46
  {
47
  "epoch": 2.54,
48
- "eval_loss": 0.31602439284324646,
49
- "eval_runtime": 152.2031,
50
- "eval_samples_per_second": 22.148,
51
- "eval_steps_per_second": 2.773,
52
- "eval_wer": 0.28134823611596227,
53
  "step": 300
54
- },
55
- {
56
- "epoch": 3.39,
57
- "learning_rate": 0.00029923664122137405,
58
- "loss": 0.0968,
59
- "step": 400
60
- },
61
- {
62
- "epoch": 3.39,
63
- "eval_loss": 0.34380972385406494,
64
- "eval_runtime": 153.4688,
65
- "eval_samples_per_second": 21.965,
66
- "eval_steps_per_second": 2.75,
67
- "eval_wer": 0.2737222028175573,
68
- "step": 400
69
- },
70
- {
71
- "epoch": 4.24,
72
- "learning_rate": 0.00029898218829516537,
73
- "loss": 0.0977,
74
- "step": 500
75
- },
76
- {
77
- "epoch": 4.24,
78
- "eval_loss": 0.36153408885002136,
79
- "eval_runtime": 166.8287,
80
- "eval_samples_per_second": 20.206,
81
- "eval_steps_per_second": 2.53,
82
- "eval_wer": 0.2702875771335429,
83
- "step": 500
84
- },
85
- {
86
- "epoch": 5.08,
87
- "learning_rate": 0.00029872773536895674,
88
- "loss": 0.0916,
89
- "step": 600
90
- },
91
- {
92
- "epoch": 5.08,
93
- "eval_loss": 0.3286847770214081,
94
- "eval_runtime": 171.9341,
95
- "eval_samples_per_second": 19.606,
96
- "eval_steps_per_second": 2.454,
97
- "eval_wer": 0.2669693794388171,
98
- "step": 600
99
- },
100
- {
101
- "epoch": 5.93,
102
- "learning_rate": 0.00029847328244274806,
103
- "loss": 0.0943,
104
- "step": 700
105
- },
106
- {
107
- "epoch": 5.93,
108
- "eval_loss": 0.3330075442790985,
109
- "eval_runtime": 152.0172,
110
- "eval_samples_per_second": 22.175,
111
- "eval_steps_per_second": 2.776,
112
- "eval_wer": 0.2651647456048434,
113
- "step": 700
114
- },
115
- {
116
- "epoch": 6.78,
117
- "learning_rate": 0.0002982213740458015,
118
- "loss": 0.0959,
119
- "step": 800
120
- },
121
- {
122
- "epoch": 6.78,
123
- "eval_loss": 0.3155308663845062,
124
- "eval_runtime": 176.3551,
125
- "eval_samples_per_second": 19.115,
126
- "eval_steps_per_second": 2.393,
127
- "eval_wer": 0.2723250669460938,
128
- "step": 800
129
- },
130
- {
131
- "epoch": 7.63,
132
- "learning_rate": 0.00029796692111959286,
133
- "loss": 0.0953,
134
- "step": 900
135
- },
136
- {
137
- "epoch": 7.63,
138
- "eval_loss": 0.3184454143047333,
139
- "eval_runtime": 179.411,
140
- "eval_samples_per_second": 18.789,
141
- "eval_steps_per_second": 2.352,
142
- "eval_wer": 0.25940156013505644,
143
- "step": 900
144
- },
145
- {
146
- "epoch": 8.47,
147
- "learning_rate": 0.0002977124681933842,
148
- "loss": 0.0989,
149
- "step": 1000
150
- },
151
- {
152
- "epoch": 8.47,
153
- "eval_loss": 0.3282919228076935,
154
- "eval_runtime": 177.9041,
155
- "eval_samples_per_second": 18.948,
156
- "eval_steps_per_second": 2.372,
157
- "eval_wer": 0.28070788217487486,
158
- "step": 1000
159
- },
160
- {
161
- "epoch": 9.32,
162
- "learning_rate": 0.00029745801526717556,
163
- "loss": 0.0962,
164
- "step": 1100
165
- },
166
- {
167
- "epoch": 9.32,
168
- "eval_loss": 0.3113383948802948,
169
- "eval_runtime": 183.182,
170
- "eval_samples_per_second": 18.402,
171
- "eval_steps_per_second": 2.304,
172
- "eval_wer": 0.2656886715566422,
173
- "step": 1100
174
- },
175
- {
176
- "epoch": 10.17,
177
- "learning_rate": 0.0002972035623409669,
178
- "loss": 0.0911,
179
- "step": 1200
180
- },
181
- {
182
- "epoch": 10.17,
183
- "eval_loss": 0.31265875697135925,
184
- "eval_runtime": 178.4394,
185
- "eval_samples_per_second": 18.892,
186
- "eval_steps_per_second": 2.365,
187
- "eval_wer": 0.2594597741297008,
188
- "step": 1200
189
- },
190
- {
191
- "epoch": 11.02,
192
- "learning_rate": 0.00029694910941475825,
193
- "loss": 0.093,
194
- "step": 1300
195
- },
196
- {
197
- "epoch": 11.02,
198
- "eval_loss": 0.33750081062316895,
199
- "eval_runtime": 185.9856,
200
- "eval_samples_per_second": 18.125,
201
- "eval_steps_per_second": 2.269,
202
- "eval_wer": 0.2635347537548027,
203
- "step": 1300
204
- },
205
- {
206
- "epoch": 11.86,
207
- "learning_rate": 0.0002966946564885496,
208
- "loss": 0.0908,
209
- "step": 1400
210
- },
211
- {
212
- "epoch": 11.86,
213
- "eval_loss": 0.31224948167800903,
214
- "eval_runtime": 182.1874,
215
- "eval_samples_per_second": 18.503,
216
- "eval_steps_per_second": 2.316,
217
- "eval_wer": 0.2616136919315403,
218
- "step": 1400
219
- },
220
- {
221
- "epoch": 12.71,
222
- "learning_rate": 0.00029644020356234095,
223
- "loss": 0.1039,
224
- "step": 1500
225
- },
226
- {
227
- "epoch": 12.71,
228
- "eval_loss": 0.33441564440727234,
229
- "eval_runtime": 187.7233,
230
- "eval_samples_per_second": 17.957,
231
- "eval_steps_per_second": 2.248,
232
- "eval_wer": 0.2726161369193154,
233
- "step": 1500
234
- },
235
- {
236
- "epoch": 13.56,
237
- "learning_rate": 0.00029618575063613227,
238
- "loss": 0.0921,
239
- "step": 1600
240
- },
241
- {
242
- "epoch": 13.56,
243
- "eval_loss": 0.3115340769290924,
244
- "eval_runtime": 189.6708,
245
- "eval_samples_per_second": 17.773,
246
- "eval_steps_per_second": 2.225,
247
- "eval_wer": 0.26859937128885786,
248
- "step": 1600
249
- },
250
- {
251
- "epoch": 14.41,
252
- "learning_rate": 0.00029593129770992364,
253
- "loss": 0.0995,
254
- "step": 1700
255
- },
256
- {
257
- "epoch": 14.41,
258
- "eval_loss": 0.3103960156440735,
259
- "eval_runtime": 183.1481,
260
- "eval_samples_per_second": 18.406,
261
- "eval_steps_per_second": 2.304,
262
- "eval_wer": 0.2650483176155548,
263
- "step": 1700
264
- },
265
- {
266
- "epoch": 15.25,
267
- "learning_rate": 0.00029567684478371497,
268
- "loss": 0.1027,
269
- "step": 1800
270
- },
271
- {
272
- "epoch": 15.25,
273
- "eval_loss": 0.33657944202423096,
274
- "eval_runtime": 185.2922,
275
- "eval_samples_per_second": 18.193,
276
- "eval_steps_per_second": 2.277,
277
- "eval_wer": 0.28891605541972293,
278
- "step": 1800
279
- },
280
- {
281
- "epoch": 16.1,
282
- "learning_rate": 0.00029542239185750634,
283
- "loss": 0.1001,
284
- "step": 1900
285
- },
286
- {
287
- "epoch": 16.1,
288
- "eval_loss": 0.32664933800697327,
289
- "eval_runtime": 182.6597,
290
- "eval_samples_per_second": 18.455,
291
- "eval_steps_per_second": 2.31,
292
- "eval_wer": 0.2692979392245896,
293
- "step": 1900
294
- },
295
- {
296
- "epoch": 16.95,
297
- "learning_rate": 0.0002951679389312977,
298
- "loss": 0.0955,
299
- "step": 2000
300
- },
301
- {
302
- "epoch": 16.95,
303
- "eval_loss": 0.32146599888801575,
304
- "eval_runtime": 175.25,
305
- "eval_samples_per_second": 19.235,
306
- "eval_steps_per_second": 2.408,
307
- "eval_wer": 0.25986727209221094,
308
- "step": 2000
309
- },
310
- {
311
- "epoch": 17.8,
312
- "learning_rate": 0.00029491348600508904,
313
- "loss": 0.0872,
314
- "step": 2100
315
- },
316
- {
317
- "epoch": 17.8,
318
- "eval_loss": 0.31995928287506104,
319
- "eval_runtime": 168.2812,
320
- "eval_samples_per_second": 20.032,
321
- "eval_steps_per_second": 2.508,
322
- "eval_wer": 0.2623704738619164,
323
- "step": 2100
324
- },
325
- {
326
- "epoch": 18.64,
327
- "learning_rate": 0.0002946590330788804,
328
- "loss": 0.0919,
329
- "step": 2200
330
- },
331
- {
332
- "epoch": 18.64,
333
- "eval_loss": 0.3285907208919525,
334
- "eval_runtime": 176.4154,
335
- "eval_samples_per_second": 19.108,
336
- "eval_steps_per_second": 2.392,
337
- "eval_wer": 0.26405867970660146,
338
- "step": 2200
339
- },
340
- {
341
- "epoch": 19.49,
342
- "learning_rate": 0.00029440458015267173,
343
- "loss": 0.0953,
344
- "step": 2300
345
- },
346
- {
347
- "epoch": 19.49,
348
- "eval_loss": 0.35332390666007996,
349
- "eval_runtime": 173.8594,
350
- "eval_samples_per_second": 19.389,
351
- "eval_steps_per_second": 2.427,
352
- "eval_wer": 0.2674350913959716,
353
- "step": 2300
354
- },
355
- {
356
- "epoch": 20.34,
357
- "learning_rate": 0.0002941501272264631,
358
- "loss": 0.0923,
359
- "step": 2400
360
- },
361
- {
362
- "epoch": 20.34,
363
- "eval_loss": 0.30950167775154114,
364
- "eval_runtime": 173.4531,
365
- "eval_samples_per_second": 19.435,
366
- "eval_steps_per_second": 2.433,
367
- "eval_wer": 0.2600419140761439,
368
- "step": 2400
369
- },
370
- {
371
- "epoch": 21.19,
372
- "learning_rate": 0.00029389567430025443,
373
- "loss": 0.0961,
374
- "step": 2500
375
- },
376
- {
377
- "epoch": 21.19,
378
- "eval_loss": 0.3377102315425873,
379
- "eval_runtime": 183.4219,
380
- "eval_samples_per_second": 18.378,
381
- "eval_steps_per_second": 2.301,
382
- "eval_wer": 0.255210152520666,
383
- "step": 2500
384
- },
385
- {
386
- "epoch": 22.03,
387
- "learning_rate": 0.00029364122137404575,
388
- "loss": 0.0919,
389
- "step": 2600
390
- },
391
- {
392
- "epoch": 22.03,
393
- "eval_loss": 0.3226545751094818,
394
- "eval_runtime": 183.0312,
395
- "eval_samples_per_second": 18.418,
396
- "eval_steps_per_second": 2.306,
397
- "eval_wer": 0.2614390499476074,
398
- "step": 2600
399
- },
400
- {
401
- "epoch": 22.88,
402
- "learning_rate": 0.00029338676844783713,
403
- "loss": 0.0859,
404
- "step": 2700
405
- },
406
- {
407
- "epoch": 22.88,
408
- "eval_loss": 0.30848973989486694,
409
- "eval_runtime": 176.2969,
410
- "eval_samples_per_second": 19.121,
411
- "eval_steps_per_second": 2.394,
412
- "eval_wer": 0.25416230061706835,
413
- "step": 2700
414
- },
415
- {
416
- "epoch": 23.73,
417
- "learning_rate": 0.00029313231552162845,
418
- "loss": 0.0915,
419
- "step": 2800
420
- },
421
- {
422
- "epoch": 23.73,
423
- "eval_loss": 0.3403824269771576,
424
- "eval_runtime": 172.6719,
425
- "eval_samples_per_second": 19.523,
426
- "eval_steps_per_second": 2.444,
427
- "eval_wer": 0.2610315519850972,
428
- "step": 2800
429
- },
430
- {
431
- "epoch": 24.58,
432
- "learning_rate": 0.00029288040712468187,
433
- "loss": 0.0917,
434
- "step": 2900
435
- },
436
- {
437
- "epoch": 24.58,
438
- "eval_loss": 0.2996799647808075,
439
- "eval_runtime": 178.4531,
440
- "eval_samples_per_second": 18.89,
441
- "eval_steps_per_second": 2.365,
442
- "eval_wer": 0.2529980207241821,
443
- "step": 2900
444
- },
445
- {
446
- "epoch": 25.42,
447
- "learning_rate": 0.00029262595419847324,
448
- "loss": 0.0967,
449
- "step": 3000
450
- },
451
- {
452
- "epoch": 25.42,
453
- "eval_loss": 0.3144609332084656,
454
- "eval_runtime": 177.4531,
455
- "eval_samples_per_second": 18.997,
456
- "eval_steps_per_second": 2.378,
457
- "eval_wer": 0.25555943648853185,
458
- "step": 3000
459
- },
460
- {
461
- "epoch": 26.27,
462
- "learning_rate": 0.0002923715012722646,
463
- "loss": 0.0973,
464
- "step": 3100
465
- },
466
- {
467
- "epoch": 26.27,
468
- "eval_loss": 0.3294685482978821,
469
- "eval_runtime": 178.7969,
470
- "eval_samples_per_second": 18.854,
471
- "eval_steps_per_second": 2.36,
472
- "eval_wer": 0.25940156013505644,
473
- "step": 3100
474
- },
475
- {
476
- "epoch": 27.12,
477
- "learning_rate": 0.00029211704834605594,
478
- "loss": 0.0932,
479
- "step": 3200
480
- },
481
- {
482
- "epoch": 27.12,
483
- "eval_loss": 0.3125886023044586,
484
- "eval_runtime": 168.7969,
485
- "eval_samples_per_second": 19.971,
486
- "eval_steps_per_second": 2.5,
487
- "eval_wer": 0.25684014437070674,
488
- "step": 3200
489
- },
490
- {
491
- "epoch": 27.97,
492
- "learning_rate": 0.0002918625954198473,
493
- "loss": 0.0945,
494
- "step": 3300
495
- },
496
- {
497
- "epoch": 27.97,
498
- "eval_loss": 0.3468785583972931,
499
- "eval_runtime": 179.6094,
500
- "eval_samples_per_second": 18.769,
501
- "eval_steps_per_second": 2.35,
502
- "eval_wer": 0.2523576667830947,
503
- "step": 3300
504
- },
505
- {
506
- "epoch": 28.81,
507
- "learning_rate": 0.00029160814249363864,
508
- "loss": 0.0852,
509
- "step": 3400
510
- },
511
- {
512
- "epoch": 28.81,
513
- "eval_loss": 0.31798404455184937,
514
- "eval_runtime": 173.2031,
515
- "eval_samples_per_second": 19.463,
516
- "eval_steps_per_second": 2.436,
517
- "eval_wer": 0.254104086622424,
518
- "step": 3400
519
- },
520
- {
521
- "epoch": 29.66,
522
- "learning_rate": 0.00029135368956743,
523
- "loss": 0.0866,
524
- "step": 3500
525
- },
526
- {
527
- "epoch": 29.66,
528
- "eval_loss": 0.31360727548599243,
529
- "eval_runtime": 167.4063,
530
- "eval_samples_per_second": 20.137,
531
- "eval_steps_per_second": 2.521,
532
- "eval_wer": 0.255210152520666,
533
- "step": 3500
534
- },
535
- {
536
- "epoch": 30.51,
537
- "learning_rate": 0.00029109923664122133,
538
- "loss": 0.0844,
539
- "step": 3600
540
- },
541
- {
542
- "epoch": 30.51,
543
- "eval_loss": 0.33361586928367615,
544
- "eval_runtime": 166.3125,
545
- "eval_samples_per_second": 20.269,
546
- "eval_steps_per_second": 2.537,
547
- "eval_wer": 0.2660379555245081,
548
- "step": 3600
549
- },
550
- {
551
- "epoch": 31.36,
552
- "learning_rate": 0.0002908447837150127,
553
- "loss": 0.0847,
554
- "step": 3700
555
- },
556
- {
557
- "epoch": 31.36,
558
- "eval_loss": 0.31821873784065247,
559
- "eval_runtime": 178.8437,
560
- "eval_samples_per_second": 18.849,
561
- "eval_steps_per_second": 2.36,
562
- "eval_wer": 0.2507858889276982,
563
- "step": 3700
564
- },
565
- {
566
- "epoch": 32.2,
567
- "learning_rate": 0.0002905903307888041,
568
- "loss": 0.0885,
569
- "step": 3800
570
- },
571
- {
572
- "epoch": 32.2,
573
- "eval_loss": 0.32577720284461975,
574
- "eval_runtime": 175.0625,
575
- "eval_samples_per_second": 19.256,
576
- "eval_steps_per_second": 2.411,
577
- "eval_wer": 0.2674933053906159,
578
- "step": 3800
579
- },
580
- {
581
- "epoch": 33.05,
582
- "learning_rate": 0.0002903358778625954,
583
- "loss": 0.0855,
584
- "step": 3900
585
- },
586
- {
587
- "epoch": 33.05,
588
- "eval_loss": 0.31836631894111633,
589
- "eval_runtime": 169.0469,
590
- "eval_samples_per_second": 19.941,
591
- "eval_steps_per_second": 2.496,
592
- "eval_wer": 0.25375480265455813,
593
- "step": 3900
594
- },
595
- {
596
- "epoch": 33.9,
597
- "learning_rate": 0.0002900814249363867,
598
- "loss": 0.0813,
599
- "step": 4000
600
- },
601
- {
602
- "epoch": 33.9,
603
- "eval_loss": 0.30345430970191956,
604
- "eval_runtime": 175.0938,
605
- "eval_samples_per_second": 19.253,
606
- "eval_steps_per_second": 2.41,
607
- "eval_wer": 0.2506112469437653,
608
- "step": 4000
609
- },
610
- {
611
- "epoch": 34.74,
612
- "learning_rate": 0.0002898269720101781,
613
- "loss": 0.0822,
614
- "step": 4100
615
- },
616
- {
617
- "epoch": 34.74,
618
- "eval_loss": 0.3159136176109314,
619
- "eval_runtime": 174.1406,
620
- "eval_samples_per_second": 19.358,
621
- "eval_steps_per_second": 2.423,
622
- "eval_wer": 0.2572476423332169,
623
- "step": 4100
624
- },
625
- {
626
- "epoch": 35.59,
627
- "learning_rate": 0.0002895725190839694,
628
- "loss": 0.0849,
629
- "step": 4200
630
- },
631
- {
632
- "epoch": 35.59,
633
- "eval_loss": 0.2940651774406433,
634
- "eval_runtime": 173.233,
635
- "eval_samples_per_second": 19.459,
636
- "eval_steps_per_second": 2.436,
637
- "eval_wer": 0.2512516008848527,
638
- "step": 4200
639
- },
640
- {
641
- "epoch": 36.44,
642
- "learning_rate": 0.0002893180661577608,
643
- "loss": 0.0885,
644
- "step": 4300
645
- },
646
- {
647
- "epoch": 36.44,
648
- "eval_loss": 0.32734107971191406,
649
- "eval_runtime": 193.9206,
650
- "eval_samples_per_second": 17.383,
651
- "eval_steps_per_second": 2.176,
652
- "eval_wer": 0.26423332169053443,
653
- "step": 4300
654
- },
655
- {
656
- "epoch": 37.29,
657
- "learning_rate": 0.0002890636132315521,
658
- "loss": 0.0866,
659
- "step": 4400
660
- },
661
- {
662
- "epoch": 37.29,
663
- "eval_loss": 0.33303678035736084,
664
- "eval_runtime": 197.0429,
665
- "eval_samples_per_second": 17.108,
666
- "eval_steps_per_second": 2.142,
667
- "eval_wer": 0.255966934451042,
668
- "step": 4400
669
- },
670
- {
671
- "epoch": 38.14,
672
- "learning_rate": 0.0002888091603053435,
673
- "loss": 0.0841,
674
- "step": 4500
675
- },
676
- {
677
- "epoch": 38.14,
678
- "eval_loss": 0.32818496227264404,
679
- "eval_runtime": 192.3874,
680
- "eval_samples_per_second": 17.522,
681
- "eval_steps_per_second": 2.193,
682
- "eval_wer": 0.24997089300267786,
683
- "step": 4500
684
- },
685
- {
686
- "epoch": 38.98,
687
- "learning_rate": 0.0002885547073791348,
688
- "loss": 0.0848,
689
- "step": 4600
690
- },
691
- {
692
- "epoch": 38.98,
693
- "eval_loss": 0.32277733087539673,
694
- "eval_runtime": 188.7845,
695
- "eval_samples_per_second": 17.856,
696
- "eval_steps_per_second": 2.235,
697
- "eval_wer": 0.2605658400279427,
698
- "step": 4600
699
- },
700
- {
701
- "epoch": 39.83,
702
- "learning_rate": 0.0002883002544529262,
703
- "loss": 0.0752,
704
- "step": 4700
705
- },
706
- {
707
- "epoch": 39.83,
708
- "eval_loss": 0.3181003928184509,
709
- "eval_runtime": 190.576,
710
- "eval_samples_per_second": 17.688,
711
- "eval_steps_per_second": 2.214,
712
- "eval_wer": 0.2516008848527186,
713
- "step": 4700
714
- },
715
- {
716
- "epoch": 40.68,
717
- "learning_rate": 0.0002880458015267175,
718
- "loss": 0.0827,
719
- "step": 4800
720
- },
721
- {
722
- "epoch": 40.68,
723
- "eval_loss": 0.3244548439979553,
724
- "eval_runtime": 196.2976,
725
- "eval_samples_per_second": 17.173,
726
- "eval_steps_per_second": 2.15,
727
- "eval_wer": 0.2487483991151473,
728
- "step": 4800
729
- },
730
- {
731
- "epoch": 41.52,
732
- "learning_rate": 0.0002877913486005089,
733
- "loss": 0.0765,
734
- "step": 4900
735
- },
736
- {
737
- "epoch": 41.52,
738
- "eval_loss": 0.31394141912460327,
739
- "eval_runtime": 194.1554,
740
- "eval_samples_per_second": 17.362,
741
- "eval_steps_per_second": 2.174,
742
- "eval_wer": 0.24508091745255559,
743
- "step": 4900
744
- },
745
- {
746
- "epoch": 42.37,
747
- "learning_rate": 0.0002875368956743002,
748
- "loss": 0.0777,
749
- "step": 5000
750
- },
751
- {
752
- "epoch": 42.37,
753
- "eval_loss": 0.3149695098400116,
754
- "eval_runtime": 192.9814,
755
- "eval_samples_per_second": 17.468,
756
- "eval_steps_per_second": 2.187,
757
- "eval_wer": 0.24508091745255559,
758
- "step": 5000
759
- },
760
- {
761
- "epoch": 43.22,
762
- "learning_rate": 0.0002872824427480916,
763
- "loss": 0.0804,
764
- "step": 5100
765
- },
766
- {
767
- "epoch": 43.22,
768
- "eval_loss": 0.3207753598690033,
769
- "eval_runtime": 190.3397,
770
- "eval_samples_per_second": 17.71,
771
- "eval_steps_per_second": 2.217,
772
- "eval_wer": 0.25381301664920247,
773
- "step": 5100
774
- },
775
- {
776
- "epoch": 44.07,
777
- "learning_rate": 0.000287030534351145,
778
- "loss": 0.0838,
779
- "step": 5200
780
- },
781
- {
782
- "epoch": 44.07,
783
- "eval_loss": 0.3102128505706787,
784
- "eval_runtime": 175.1719,
785
- "eval_samples_per_second": 19.244,
786
- "eval_steps_per_second": 2.409,
787
- "eval_wer": 0.24845732914192573,
788
- "step": 5200
789
- },
790
- {
791
- "epoch": 44.91,
792
- "learning_rate": 0.0002867760814249364,
793
- "loss": 0.0731,
794
- "step": 5300
795
- },
796
- {
797
- "epoch": 44.91,
798
- "eval_loss": 0.29943132400512695,
799
- "eval_runtime": 173.7343,
800
- "eval_samples_per_second": 19.403,
801
- "eval_steps_per_second": 2.429,
802
- "eval_wer": 0.24449877750611246,
803
- "step": 5300
804
- },
805
- {
806
- "epoch": 45.76,
807
- "learning_rate": 0.0002865216284987277,
808
- "loss": 0.0736,
809
- "step": 5400
810
- },
811
- {
812
- "epoch": 45.76,
813
- "eval_loss": 0.3185470998287201,
814
- "eval_runtime": 174.5938,
815
- "eval_samples_per_second": 19.308,
816
- "eval_steps_per_second": 2.417,
817
- "eval_wer": 0.2591687041564792,
818
- "step": 5400
819
- },
820
- {
821
- "epoch": 46.61,
822
- "learning_rate": 0.0002862671755725191,
823
- "loss": 0.0795,
824
- "step": 5500
825
- },
826
- {
827
- "epoch": 46.61,
828
- "eval_loss": 0.3023243546485901,
829
- "eval_runtime": 174.2188,
830
- "eval_samples_per_second": 19.349,
831
- "eval_steps_per_second": 2.422,
832
- "eval_wer": 0.24583769938293165,
833
- "step": 5500
834
- },
835
- {
836
- "epoch": 47.46,
837
- "learning_rate": 0.0002860127226463104,
838
- "loss": 0.0753,
839
- "step": 5600
840
- },
841
- {
842
- "epoch": 47.46,
843
- "eval_loss": 0.32648247480392456,
844
- "eval_runtime": 179.8281,
845
- "eval_samples_per_second": 18.746,
846
- "eval_steps_per_second": 2.347,
847
- "eval_wer": 0.2464780533240191,
848
- "step": 5600
849
- },
850
- {
851
- "epoch": 48.3,
852
- "learning_rate": 0.0002857582697201018,
853
- "loss": 0.0716,
854
- "step": 5700
855
- },
856
- {
857
- "epoch": 48.3,
858
- "eval_loss": 0.3370068073272705,
859
- "eval_runtime": 190.9301,
860
- "eval_samples_per_second": 17.656,
861
- "eval_steps_per_second": 2.21,
862
- "eval_wer": 0.24903946908836885,
863
- "step": 5700
864
- },
865
- {
866
- "epoch": 49.15,
867
- "learning_rate": 0.0002855038167938931,
868
- "loss": 0.074,
869
- "step": 5800
870
- },
871
- {
872
- "epoch": 49.15,
873
- "eval_loss": 0.299947589635849,
874
- "eval_runtime": 192.7259,
875
- "eval_samples_per_second": 17.491,
876
- "eval_steps_per_second": 2.19,
877
- "eval_wer": 0.2431598556292933,
878
- "step": 5800
879
- },
880
- {
881
- "epoch": 50.0,
882
- "learning_rate": 0.00028524936386768447,
883
- "loss": 0.0678,
884
- "step": 5900
885
- },
886
- {
887
- "epoch": 50.0,
888
- "eval_loss": 0.32643795013427734,
889
- "eval_runtime": 195.0205,
890
- "eval_samples_per_second": 17.285,
891
- "eval_steps_per_second": 2.164,
892
- "eval_wer": 0.2549190825474444,
893
- "step": 5900
894
  }
895
  ],
896
  "max_steps": 118000,
897
  "num_train_epochs": 1000,
898
- "total_flos": 1.3321101567847278e+20,
899
  "trial_name": null,
900
  "trial_params": null
901
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.541798941798942,
5
+ "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.85,
12
+ "learning_rate": 0.0001,
13
+ "loss": 0.0249,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 0.85,
18
+ "eval_loss": 0.3500341773033142,
19
+ "eval_runtime": 218.9102,
20
+ "eval_samples_per_second": 15.399,
21
+ "eval_steps_per_second": 1.928,
22
+ "eval_wer": 0.2325649086040284,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 1.69,
27
+ "learning_rate": 9.991518235793045e-05,
28
+ "loss": 0.0468,
29
  "step": 200
30
  },
31
  {
32
  "epoch": 1.69,
33
+ "eval_loss": 0.3051254153251648,
34
+ "eval_runtime": 153.0625,
35
+ "eval_samples_per_second": 22.024,
36
+ "eval_steps_per_second": 2.757,
37
+ "eval_wer": 0.22418209337524742,
38
  "step": 200
39
  },
40
  {
41
  "epoch": 2.54,
42
+ "learning_rate": 9.98303647158609e-05,
43
+ "loss": 0.0412,
44
  "step": 300
45
  },
46
  {
47
  "epoch": 2.54,
48
+ "eval_loss": 0.3074679672718048,
49
+ "eval_runtime": 155.3125,
50
+ "eval_samples_per_second": 21.705,
51
+ "eval_steps_per_second": 2.717,
52
+ "eval_wer": 0.22563744324135523,
53
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  }
55
  ],
56
  "max_steps": 118000,
57
  "num_train_epochs": 1000,
58
+ "total_flos": 6.832318928753621e+18,
59
  "trial_name": null,
60
  "trial_params": null
61
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:205c3abc444d227c15d961d52c246f3bbe49484c6ec6d79719acbf83317f6c1f
3
  size 3323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1c9d7091f2595ad11d1aca691a21b9f02ef4199b26e120256c51f7316f0186d
3
  size 3323