boumehdi commited on
Commit
09d3ccd
·
1 Parent(s): 3963e12

Upload 9 files

Browse files
Files changed (6) hide show
  1. optimizer.pt +1 -1
  2. rng_state.pth +1 -1
  3. scaler.pt +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +10 -1465
  6. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bc4f81e10c73c7ac61c3457069fbbe7bd2da9c195af5e95d48242d813a3bf1b
3
  size 2490593669
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02114392e70dbed0fc2f204dfae7d76d079836c2c181005bc2bf71f94557eaf8
3
  size 2490593669
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64dff988ed5a64f29c0ad329d02de826f427335e5b6422e636e4d8d72905d91b
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dcaa6d2c89e5162447b97ae000513cc80078af514ff26ab28611f082137a28f
3
  size 14575
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f56e2d94fbf475c8ee7bb541a0c78739d0af9b9df1c40b0d2dbe8978e561b1ce
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:909ffc5e40adcfcc03f8fb8e8cc5a6605d5aaa37643bab9d485939a891938ae9
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93eea870ecabb225c120efec3800c13df934b00089cdfdaa458d79acaf790660
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:819f79a372f9c1c108fae2b06747bcbf8f59bc48abe34e8c19eeba08cbc527a1
3
  size 627
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7531219980787704,
5
- "global_step": 98,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10,1477 +10,22 @@
10
  {
11
  "epoch": 0.01,
12
  "learning_rate": 5e-06,
13
- "loss": 0.1444,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.01,
18
- "eval_loss": 0.27758705615997314,
19
- "eval_runtime": 210.178,
20
- "eval_samples_per_second": 17.666,
21
- "eval_steps_per_second": 2.212,
22
- "eval_wer": 0.22257840465886627,
23
  "step": 1
24
- },
25
- {
26
- "epoch": 0.02,
27
- "learning_rate": 1e-05,
28
- "loss": 0.1768,
29
- "step": 2
30
- },
31
- {
32
- "epoch": 0.02,
33
- "eval_loss": 0.27495622634887695,
34
- "eval_runtime": 143.6977,
35
- "eval_samples_per_second": 25.839,
36
- "eval_steps_per_second": 3.236,
37
- "eval_wer": 0.22263183202436287,
38
- "step": 2
39
- },
40
- {
41
- "epoch": 0.02,
42
- "learning_rate": 1.5e-05,
43
- "loss": 0.1344,
44
- "step": 3
45
- },
46
- {
47
- "epoch": 0.02,
48
- "eval_loss": 0.26992350816726685,
49
- "eval_runtime": 145.0666,
50
- "eval_samples_per_second": 25.595,
51
- "eval_steps_per_second": 3.205,
52
- "eval_wer": 0.22097558369396805,
53
- "step": 3
54
- },
55
- {
56
- "epoch": 0.03,
57
- "learning_rate": 2e-05,
58
- "loss": 0.1347,
59
- "step": 4
60
- },
61
- {
62
- "epoch": 0.03,
63
- "eval_loss": 0.2631027102470398,
64
- "eval_runtime": 144.3292,
65
- "eval_samples_per_second": 25.726,
66
- "eval_steps_per_second": 3.222,
67
- "eval_wer": 0.2198001816530427,
68
- "step": 4
69
- },
70
- {
71
- "epoch": 0.04,
72
- "learning_rate": 2.5e-05,
73
- "loss": 0.1401,
74
- "step": 5
75
- },
76
- {
77
- "epoch": 0.04,
78
- "eval_loss": 0.2548554539680481,
79
- "eval_runtime": 145.5653,
80
- "eval_samples_per_second": 25.507,
81
- "eval_steps_per_second": 3.194,
82
- "eval_wer": 0.21825078805364106,
83
- "step": 5
84
- },
85
- {
86
- "epoch": 0.05,
87
- "learning_rate": 3e-05,
88
- "loss": 0.122,
89
- "step": 6
90
- },
91
- {
92
- "epoch": 0.05,
93
- "eval_loss": 0.24481205642223358,
94
- "eval_runtime": 146.1904,
95
- "eval_samples_per_second": 25.398,
96
- "eval_steps_per_second": 3.181,
97
- "eval_wer": 0.21627397553026662,
98
- "step": 6
99
- },
100
- {
101
- "epoch": 0.05,
102
- "learning_rate": 3.5e-05,
103
- "loss": 0.1098,
104
- "step": 7
105
- },
106
- {
107
- "epoch": 0.05,
108
- "eval_loss": 0.23431816697120667,
109
- "eval_runtime": 143.3943,
110
- "eval_samples_per_second": 25.894,
111
- "eval_steps_per_second": 3.243,
112
- "eval_wer": 0.21317518833146337,
113
- "step": 7
114
- },
115
- {
116
- "epoch": 0.06,
117
- "learning_rate": 4e-05,
118
- "loss": 0.0777,
119
- "step": 8
120
- },
121
- {
122
- "epoch": 0.06,
123
- "eval_loss": 0.2239282727241516,
124
- "eval_runtime": 144.1351,
125
- "eval_samples_per_second": 25.761,
126
- "eval_steps_per_second": 3.226,
127
- "eval_wer": 0.21002297376716353,
128
- "step": 8
129
- },
130
- {
131
- "epoch": 0.07,
132
- "learning_rate": 4.5e-05,
133
- "loss": 0.0987,
134
- "step": 9
135
- },
136
- {
137
- "epoch": 0.07,
138
- "eval_loss": 0.2142292559146881,
139
- "eval_runtime": 142.2093,
140
- "eval_samples_per_second": 26.109,
141
- "eval_steps_per_second": 3.27,
142
- "eval_wer": 0.20703104129935354,
143
- "step": 9
144
- },
145
- {
146
- "epoch": 0.08,
147
- "learning_rate": 5e-05,
148
- "loss": 0.0827,
149
- "step": 10
150
- },
151
- {
152
- "epoch": 0.08,
153
- "eval_loss": 0.2058483362197876,
154
- "eval_runtime": 139.25,
155
- "eval_samples_per_second": 26.664,
156
- "eval_steps_per_second": 3.339,
157
- "eval_wer": 0.20409253619704013,
158
- "step": 10
159
- },
160
- {
161
- "epoch": 0.08,
162
- "learning_rate": 5.500000000000001e-05,
163
- "loss": 0.0657,
164
- "step": 11
165
- },
166
- {
167
- "epoch": 0.08,
168
- "eval_loss": 0.19945959746837616,
169
- "eval_runtime": 143.6254,
170
- "eval_samples_per_second": 25.852,
171
- "eval_steps_per_second": 3.238,
172
- "eval_wer": 0.2003526206122776,
173
- "step": 11
174
- },
175
- {
176
- "epoch": 0.09,
177
- "learning_rate": 6e-05,
178
- "loss": 0.0464,
179
- "step": 12
180
- },
181
- {
182
- "epoch": 0.09,
183
- "eval_loss": 0.19494406878948212,
184
- "eval_runtime": 145.864,
185
- "eval_samples_per_second": 25.455,
186
- "eval_steps_per_second": 3.188,
187
- "eval_wer": 0.19869637228188278,
188
- "step": 12
189
- },
190
- {
191
- "epoch": 0.1,
192
- "learning_rate": 6.500000000000001e-05,
193
- "loss": 0.0465,
194
- "step": 13
195
- },
196
- {
197
- "epoch": 0.1,
198
- "eval_loss": 0.19232189655303955,
199
- "eval_runtime": 145.6174,
200
- "eval_samples_per_second": 25.498,
201
- "eval_steps_per_second": 3.193,
202
- "eval_wer": 0.19917721857135226,
203
- "step": 13
204
- },
205
- {
206
- "epoch": 0.11,
207
- "learning_rate": 7e-05,
208
- "loss": 0.0577,
209
- "step": 14
210
- },
211
- {
212
- "epoch": 0.11,
213
- "eval_loss": 0.19074703752994537,
214
- "eval_runtime": 145.4558,
215
- "eval_samples_per_second": 25.527,
216
- "eval_steps_per_second": 3.197,
217
- "eval_wer": 0.19987177432280814,
218
- "step": 14
219
- },
220
- {
221
- "epoch": 0.12,
222
- "learning_rate": 7.500000000000001e-05,
223
- "loss": 0.0343,
224
- "step": 15
225
- },
226
- {
227
- "epoch": 0.12,
228
- "eval_loss": 0.1901196986436844,
229
- "eval_runtime": 145.104,
230
- "eval_samples_per_second": 25.589,
231
- "eval_steps_per_second": 3.205,
232
- "eval_wer": 0.1995512101298285,
233
- "step": 15
234
- },
235
- {
236
- "epoch": 0.12,
237
- "learning_rate": 8e-05,
238
- "loss": 0.0496,
239
- "step": 16
240
- },
241
- {
242
- "epoch": 0.12,
243
- "eval_loss": 0.19008365273475647,
244
- "eval_runtime": 145.5007,
245
- "eval_samples_per_second": 25.519,
246
- "eval_steps_per_second": 3.196,
247
- "eval_wer": 0.19992520168830474,
248
- "step": 16
249
- },
250
- {
251
- "epoch": 0.13,
252
- "learning_rate": 8.5e-05,
253
- "loss": 0.0394,
254
- "step": 17
255
- },
256
- {
257
- "epoch": 0.13,
258
- "eval_loss": 0.19069527089595795,
259
- "eval_runtime": 145.1656,
260
- "eval_samples_per_second": 25.578,
261
- "eval_steps_per_second": 3.203,
262
- "eval_wer": 0.19976491959181492,
263
- "step": 17
264
- },
265
- {
266
- "epoch": 0.14,
267
- "learning_rate": 9e-05,
268
- "loss": 0.0389,
269
- "step": 18
270
- },
271
- {
272
- "epoch": 0.14,
273
- "eval_loss": 0.19175583124160767,
274
- "eval_runtime": 144.0309,
275
- "eval_samples_per_second": 25.779,
276
- "eval_steps_per_second": 3.228,
277
- "eval_wer": 0.19864294491638618,
278
- "step": 18
279
- },
280
- {
281
- "epoch": 0.15,
282
- "learning_rate": 9.5e-05,
283
- "loss": 0.0375,
284
- "step": 19
285
- },
286
- {
287
- "epoch": 0.15,
288
- "eval_loss": 0.1929420530796051,
289
- "eval_runtime": 155.5361,
290
- "eval_samples_per_second": 23.872,
291
- "eval_steps_per_second": 2.99,
292
- "eval_wer": 0.19869637228188278,
293
- "step": 19
294
- },
295
- {
296
- "epoch": 0.15,
297
- "learning_rate": 0.0001,
298
- "loss": 0.0416,
299
- "step": 20
300
- },
301
- {
302
- "epoch": 0.15,
303
- "eval_loss": 0.19433951377868652,
304
- "eval_runtime": 145.7557,
305
- "eval_samples_per_second": 25.474,
306
- "eval_steps_per_second": 3.19,
307
- "eval_wer": 0.19842923545439975,
308
- "step": 20
309
- },
310
- {
311
- "epoch": 0.16,
312
- "learning_rate": 9.909090909090911e-05,
313
- "loss": 0.049,
314
- "step": 21
315
- },
316
- {
317
- "epoch": 0.16,
318
- "eval_loss": 0.19554230570793152,
319
- "eval_runtime": 144.8351,
320
- "eval_samples_per_second": 25.636,
321
- "eval_steps_per_second": 3.211,
322
- "eval_wer": 0.19687984185499813,
323
- "step": 21
324
- },
325
- {
326
- "epoch": 0.17,
327
- "learning_rate": 9.818181818181818e-05,
328
- "loss": 0.0312,
329
- "step": 22
330
- },
331
- {
332
- "epoch": 0.17,
333
- "eval_loss": 0.1970851719379425,
334
- "eval_runtime": 142.3937,
335
- "eval_samples_per_second": 26.076,
336
- "eval_steps_per_second": 3.266,
337
- "eval_wer": 0.19479617460063045,
338
- "step": 22
339
- },
340
- {
341
- "epoch": 0.18,
342
- "learning_rate": 9.727272727272728e-05,
343
- "loss": 0.0455,
344
- "step": 23
345
- },
346
- {
347
- "epoch": 0.18,
348
- "eval_loss": 0.19858291745185852,
349
- "eval_runtime": 140.2626,
350
- "eval_samples_per_second": 26.472,
351
- "eval_steps_per_second": 3.315,
352
- "eval_wer": 0.19330020836672543,
353
- "step": 23
354
- },
355
- {
356
- "epoch": 0.18,
357
- "learning_rate": 9.636363636363637e-05,
358
- "loss": 0.0532,
359
- "step": 24
360
- },
361
- {
362
- "epoch": 0.18,
363
- "eval_loss": 0.19979843497276306,
364
- "eval_runtime": 140.2969,
365
- "eval_samples_per_second": 26.465,
366
- "eval_steps_per_second": 3.314,
367
- "eval_wer": 0.19105625901586792,
368
- "step": 24
369
- },
370
- {
371
- "epoch": 0.19,
372
- "learning_rate": 9.545454545454546e-05,
373
- "loss": 0.0442,
374
- "step": 25
375
- },
376
- {
377
- "epoch": 0.19,
378
- "eval_loss": 0.20092029869556427,
379
- "eval_runtime": 143.5469,
380
- "eval_samples_per_second": 25.866,
381
- "eval_steps_per_second": 3.239,
382
- "eval_wer": 0.19052198536090184,
383
- "step": 25
384
- },
385
- {
386
- "epoch": 0.2,
387
- "learning_rate": 9.454545454545455e-05,
388
- "loss": 0.0339,
389
- "step": 26
390
- },
391
- {
392
- "epoch": 0.2,
393
- "eval_loss": 0.2023477554321289,
394
- "eval_runtime": 152.7348,
395
- "eval_samples_per_second": 24.31,
396
- "eval_steps_per_second": 3.044,
397
- "eval_wer": 0.1893465833199765,
398
- "step": 26
399
- },
400
- {
401
- "epoch": 0.21,
402
- "learning_rate": 9.363636363636364e-05,
403
- "loss": 0.0346,
404
- "step": 27
405
- },
406
- {
407
- "epoch": 0.21,
408
- "eval_loss": 0.20320531725883484,
409
- "eval_runtime": 147.0625,
410
- "eval_samples_per_second": 25.248,
411
- "eval_steps_per_second": 3.162,
412
- "eval_wer": 0.18907944649249345,
413
- "step": 27
414
- },
415
- {
416
- "epoch": 0.22,
417
- "learning_rate": 9.272727272727273e-05,
418
- "loss": 0.0405,
419
- "step": 28
420
- },
421
- {
422
- "epoch": 0.22,
423
- "eval_loss": 0.20423929393291473,
424
- "eval_runtime": 147.2712,
425
- "eval_samples_per_second": 25.212,
426
- "eval_steps_per_second": 3.157,
427
- "eval_wer": 0.18875888229951382,
428
- "step": 28
429
- },
430
- {
431
- "epoch": 0.22,
432
- "learning_rate": 9.181818181818183e-05,
433
- "loss": 0.0273,
434
- "step": 29
435
- },
436
- {
437
- "epoch": 0.22,
438
- "eval_loss": 0.20516164600849152,
439
- "eval_runtime": 147.0321,
440
- "eval_samples_per_second": 25.253,
441
- "eval_steps_per_second": 3.163,
442
- "eval_wer": 0.18811775391355454,
443
- "step": 29
444
- },
445
- {
446
- "epoch": 0.23,
447
- "learning_rate": 9.090909090909092e-05,
448
- "loss": 0.0303,
449
- "step": 30
450
- },
451
- {
452
- "epoch": 0.23,
453
- "eval_loss": 0.20583093166351318,
454
- "eval_runtime": 147.1797,
455
- "eval_samples_per_second": 25.228,
456
- "eval_steps_per_second": 3.159,
457
- "eval_wer": 0.18843831810653416,
458
- "step": 30
459
- },
460
- {
461
- "epoch": 0.24,
462
- "learning_rate": 9e-05,
463
- "loss": 0.0433,
464
- "step": 31
465
- },
466
- {
467
- "epoch": 0.24,
468
- "eval_loss": 0.2059250921010971,
469
- "eval_runtime": 149.5581,
470
- "eval_samples_per_second": 24.826,
471
- "eval_steps_per_second": 3.109,
472
- "eval_wer": 0.1880108991825613,
473
- "step": 31
474
- },
475
- {
476
- "epoch": 0.25,
477
- "learning_rate": 8.90909090909091e-05,
478
- "loss": 0.0172,
479
- "step": 32
480
- },
481
- {
482
- "epoch": 0.25,
483
- "eval_loss": 0.20622014999389648,
484
- "eval_runtime": 147.3034,
485
- "eval_samples_per_second": 25.206,
486
- "eval_steps_per_second": 3.157,
487
- "eval_wer": 0.18774376235507828,
488
- "step": 32
489
- },
490
- {
491
- "epoch": 0.25,
492
- "learning_rate": 8.818181818181818e-05,
493
- "loss": 0.0417,
494
- "step": 33
495
- },
496
- {
497
- "epoch": 0.25,
498
- "eval_loss": 0.20588700473308563,
499
- "eval_runtime": 147.9347,
500
- "eval_samples_per_second": 25.099,
501
- "eval_steps_per_second": 3.143,
502
- "eval_wer": 0.18710263396911897,
503
- "step": 33
504
- },
505
- {
506
- "epoch": 0.26,
507
- "learning_rate": 8.727272727272727e-05,
508
- "loss": 0.0305,
509
- "step": 34
510
- },
511
- {
512
- "epoch": 0.26,
513
- "eval_loss": 0.2053101509809494,
514
- "eval_runtime": 147.604,
515
- "eval_samples_per_second": 25.155,
516
- "eval_steps_per_second": 3.15,
517
- "eval_wer": 0.18624779612117326,
518
- "step": 34
519
- },
520
- {
521
- "epoch": 0.27,
522
- "learning_rate": 8.636363636363637e-05,
523
- "loss": 0.0294,
524
- "step": 35
525
- },
526
- {
527
- "epoch": 0.27,
528
- "eval_loss": 0.2048512101173401,
529
- "eval_runtime": 149.6988,
530
- "eval_samples_per_second": 24.803,
531
- "eval_steps_per_second": 3.106,
532
- "eval_wer": 0.18624779612117326,
533
- "step": 35
534
- },
535
- {
536
- "epoch": 0.28,
537
- "learning_rate": 8.545454545454545e-05,
538
- "loss": 0.0344,
539
- "step": 36
540
- },
541
- {
542
- "epoch": 0.28,
543
- "eval_loss": 0.20456022024154663,
544
- "eval_runtime": 150.2353,
545
- "eval_samples_per_second": 24.715,
546
- "eval_steps_per_second": 3.095,
547
- "eval_wer": 0.1863546508521665,
548
- "step": 36
549
- },
550
- {
551
- "epoch": 0.28,
552
- "learning_rate": 8.454545454545455e-05,
553
- "loss": 0.0352,
554
- "step": 37
555
- },
556
- {
557
- "epoch": 0.28,
558
- "eval_loss": 0.20410528779029846,
559
- "eval_runtime": 147.7984,
560
- "eval_samples_per_second": 25.122,
561
- "eval_steps_per_second": 3.146,
562
- "eval_wer": 0.18539295827322755,
563
- "step": 37
564
- },
565
- {
566
- "epoch": 0.29,
567
- "learning_rate": 8.363636363636364e-05,
568
- "loss": 0.0379,
569
- "step": 38
570
- },
571
- {
572
- "epoch": 0.29,
573
- "eval_loss": 0.2038663774728775,
574
- "eval_runtime": 147.8438,
575
- "eval_samples_per_second": 25.114,
576
- "eval_steps_per_second": 3.145,
577
- "eval_wer": 0.1857135224662072,
578
- "step": 38
579
- },
580
- {
581
- "epoch": 0.3,
582
- "learning_rate": 8.272727272727273e-05,
583
- "loss": 0.0413,
584
- "step": 39
585
- },
586
- {
587
- "epoch": 0.3,
588
- "eval_loss": 0.20388050377368927,
589
- "eval_runtime": 147.3701,
590
- "eval_samples_per_second": 25.195,
591
- "eval_steps_per_second": 3.155,
592
- "eval_wer": 0.18427098359779878,
593
- "step": 39
594
- },
595
- {
596
- "epoch": 0.31,
597
- "learning_rate": 8.181818181818183e-05,
598
- "loss": 0.0317,
599
- "step": 40
600
- },
601
- {
602
- "epoch": 0.31,
603
- "eval_loss": 0.20368847250938416,
604
- "eval_runtime": 147.9612,
605
- "eval_samples_per_second": 25.094,
606
- "eval_steps_per_second": 3.143,
607
- "eval_wer": 0.18384356467382593,
608
- "step": 40
609
- },
610
- {
611
- "epoch": 0.32,
612
- "learning_rate": 8.090909090909092e-05,
613
- "loss": 0.0406,
614
- "step": 41
615
- },
616
- {
617
- "epoch": 0.32,
618
- "eval_loss": 0.20367969572544098,
619
- "eval_runtime": 148.8012,
620
- "eval_samples_per_second": 24.953,
621
- "eval_steps_per_second": 3.125,
622
- "eval_wer": 0.18330929101885987,
623
- "step": 41
624
- },
625
- {
626
- "epoch": 0.32,
627
- "learning_rate": 8e-05,
628
- "loss": 0.039,
629
- "step": 42
630
- },
631
- {
632
- "epoch": 0.32,
633
- "eval_loss": 0.20352540910243988,
634
- "eval_runtime": 148.6131,
635
- "eval_samples_per_second": 24.984,
636
- "eval_steps_per_second": 3.129,
637
- "eval_wer": 0.18256130790190736,
638
- "step": 42
639
- },
640
- {
641
- "epoch": 0.33,
642
- "learning_rate": 7.90909090909091e-05,
643
- "loss": 0.0418,
644
- "step": 43
645
- },
646
- {
647
- "epoch": 0.33,
648
- "eval_loss": 0.20341253280639648,
649
- "eval_runtime": 148.4125,
650
- "eval_samples_per_second": 25.018,
651
- "eval_steps_per_second": 3.133,
652
- "eval_wer": 0.18229417107442433,
653
- "step": 43
654
- },
655
- {
656
- "epoch": 0.34,
657
- "learning_rate": 7.818181818181818e-05,
658
- "loss": 0.0424,
659
- "step": 44
660
- },
661
- {
662
- "epoch": 0.34,
663
- "eval_loss": 0.20320875942707062,
664
- "eval_runtime": 148.7795,
665
- "eval_samples_per_second": 24.956,
666
- "eval_steps_per_second": 3.125,
667
- "eval_wer": 0.18202703424694128,
668
- "step": 44
669
- },
670
- {
671
- "epoch": 0.35,
672
- "learning_rate": 7.727272727272727e-05,
673
- "loss": 0.0262,
674
- "step": 45
675
- },
676
- {
677
- "epoch": 0.35,
678
- "eval_loss": 0.20320919156074524,
679
- "eval_runtime": 149.2065,
680
- "eval_samples_per_second": 24.885,
681
- "eval_steps_per_second": 3.116,
682
- "eval_wer": 0.18170647005396165,
683
- "step": 45
684
- },
685
- {
686
- "epoch": 0.35,
687
- "learning_rate": 7.636363636363637e-05,
688
- "loss": 0.0333,
689
- "step": 46
690
- },
691
- {
692
- "epoch": 0.35,
693
- "eval_loss": 0.20314393937587738,
694
- "eval_runtime": 148.5201,
695
- "eval_samples_per_second": 25.0,
696
- "eval_steps_per_second": 3.131,
697
- "eval_wer": 0.18181332478495485,
698
- "step": 46
699
- },
700
- {
701
- "epoch": 0.36,
702
- "learning_rate": 7.545454545454545e-05,
703
- "loss": 0.0217,
704
- "step": 47
705
- },
706
- {
707
- "epoch": 0.36,
708
- "eval_loss": 0.20316116511821747,
709
- "eval_runtime": 149.0026,
710
- "eval_samples_per_second": 24.919,
711
- "eval_steps_per_second": 3.121,
712
- "eval_wer": 0.18186675215045145,
713
- "step": 47
714
- },
715
- {
716
- "epoch": 0.37,
717
- "learning_rate": 7.454545454545455e-05,
718
- "loss": 0.0324,
719
- "step": 48
720
- },
721
- {
722
- "epoch": 0.37,
723
- "eval_loss": 0.20341037213802338,
724
- "eval_runtime": 151.4038,
725
- "eval_samples_per_second": 24.524,
726
- "eval_steps_per_second": 3.071,
727
- "eval_wer": 0.18175989741945825,
728
- "step": 48
729
- },
730
- {
731
- "epoch": 0.38,
732
- "learning_rate": 7.363636363636364e-05,
733
- "loss": 0.0438,
734
- "step": 49
735
- },
736
- {
737
- "epoch": 0.38,
738
- "eval_loss": 0.20362479984760284,
739
- "eval_runtime": 147.9041,
740
- "eval_samples_per_second": 25.104,
741
- "eval_steps_per_second": 3.144,
742
- "eval_wer": 0.18154618795747182,
743
- "step": 49
744
- },
745
- {
746
- "epoch": 0.38,
747
- "learning_rate": 7.272727272727273e-05,
748
- "loss": 0.0435,
749
- "step": 50
750
- },
751
- {
752
- "epoch": 0.38,
753
- "eval_loss": 0.20351320505142212,
754
- "eval_runtime": 150.6257,
755
- "eval_samples_per_second": 24.651,
756
- "eval_steps_per_second": 3.087,
757
- "eval_wer": 0.18192017951594808,
758
- "step": 50
759
- },
760
- {
761
- "epoch": 0.39,
762
- "learning_rate": 7.181818181818182e-05,
763
- "loss": 0.0386,
764
- "step": 51
765
- },
766
- {
767
- "epoch": 0.39,
768
- "eval_loss": 0.20342397689819336,
769
- "eval_runtime": 149.3494,
770
- "eval_samples_per_second": 24.861,
771
- "eval_steps_per_second": 3.114,
772
- "eval_wer": 0.18202703424694128,
773
- "step": 51
774
- },
775
- {
776
- "epoch": 0.4,
777
- "learning_rate": 7.090909090909092e-05,
778
- "loss": 0.034,
779
- "step": 52
780
- },
781
- {
782
- "epoch": 0.4,
783
- "eval_loss": 0.20335783064365387,
784
- "eval_runtime": 148.8221,
785
- "eval_samples_per_second": 24.949,
786
- "eval_steps_per_second": 3.125,
787
- "eval_wer": 0.18245445317091413,
788
- "step": 52
789
- },
790
- {
791
- "epoch": 0.41,
792
- "learning_rate": 7e-05,
793
- "loss": 0.0308,
794
- "step": 53
795
- },
796
- {
797
- "epoch": 0.41,
798
- "eval_loss": 0.20360249280929565,
799
- "eval_runtime": 148.86,
800
- "eval_samples_per_second": 24.943,
801
- "eval_steps_per_second": 3.124,
802
- "eval_wer": 0.18250788053641076,
803
- "step": 53
804
- },
805
- {
806
- "epoch": 0.41,
807
- "learning_rate": 6.90909090909091e-05,
808
- "loss": 0.0334,
809
- "step": 54
810
- },
811
- {
812
- "epoch": 0.41,
813
- "eval_loss": 0.2034262865781784,
814
- "eval_runtime": 149.0445,
815
- "eval_samples_per_second": 24.912,
816
- "eval_steps_per_second": 3.12,
817
- "eval_wer": 0.1828284447293904,
818
- "step": 54
819
- },
820
- {
821
- "epoch": 0.42,
822
- "learning_rate": 6.818181818181818e-05,
823
- "loss": 0.0334,
824
- "step": 55
825
- },
826
- {
827
- "epoch": 0.42,
828
- "eval_loss": 0.20335279405117035,
829
- "eval_runtime": 149.9535,
830
- "eval_samples_per_second": 24.761,
831
- "eval_steps_per_second": 3.101,
832
- "eval_wer": 0.18240102580541753,
833
- "step": 55
834
- },
835
- {
836
- "epoch": 0.43,
837
- "learning_rate": 6.727272727272727e-05,
838
- "loss": 0.0502,
839
- "step": 56
840
- },
841
- {
842
- "epoch": 0.43,
843
- "eval_loss": 0.20316840708255768,
844
- "eval_runtime": 150.4496,
845
- "eval_samples_per_second": 24.679,
846
- "eval_steps_per_second": 3.091,
847
- "eval_wer": 0.18256130790190736,
848
- "step": 56
849
- },
850
- {
851
- "epoch": 0.44,
852
- "learning_rate": 6.636363636363638e-05,
853
- "loss": 0.0341,
854
- "step": 57
855
- },
856
- {
857
- "epoch": 0.44,
858
- "eval_loss": 0.20272959768772125,
859
- "eval_runtime": 149.4094,
860
- "eval_samples_per_second": 24.851,
861
- "eval_steps_per_second": 3.112,
862
- "eval_wer": 0.1821873163434311,
863
- "step": 57
864
- },
865
- {
866
- "epoch": 0.45,
867
- "learning_rate": 6.545454545454546e-05,
868
- "loss": 0.0595,
869
- "step": 58
870
- },
871
- {
872
- "epoch": 0.45,
873
- "eval_loss": 0.20200660824775696,
874
- "eval_runtime": 150.4398,
875
- "eval_samples_per_second": 24.681,
876
- "eval_steps_per_second": 3.091,
877
- "eval_wer": 0.18245445317091413,
878
- "step": 58
879
- },
880
- {
881
- "epoch": 0.45,
882
- "learning_rate": 6.454545454545455e-05,
883
- "loss": 0.0337,
884
- "step": 59
885
- },
886
- {
887
- "epoch": 0.45,
888
- "eval_loss": 0.2010081559419632,
889
- "eval_runtime": 149.7401,
890
- "eval_samples_per_second": 24.796,
891
- "eval_steps_per_second": 3.105,
892
- "eval_wer": 0.18304215419137682,
893
- "step": 59
894
- },
895
- {
896
- "epoch": 0.46,
897
- "learning_rate": 6.363636363636364e-05,
898
- "loss": 0.0504,
899
- "step": 60
900
- },
901
- {
902
- "epoch": 0.46,
903
- "eval_loss": 0.20022302865982056,
904
- "eval_runtime": 150.2278,
905
- "eval_samples_per_second": 24.716,
906
- "eval_steps_per_second": 3.095,
907
- "eval_wer": 0.18341614574985307,
908
- "step": 60
909
- },
910
- {
911
- "epoch": 0.47,
912
- "learning_rate": 6.272727272727273e-05,
913
- "loss": 0.0298,
914
- "step": 61
915
- },
916
- {
917
- "epoch": 0.47,
918
- "eval_loss": 0.19969654083251953,
919
- "eval_runtime": 149.9979,
920
- "eval_samples_per_second": 24.754,
921
- "eval_steps_per_second": 3.1,
922
- "eval_wer": 0.18336271838435647,
923
- "step": 61
924
- },
925
- {
926
- "epoch": 0.48,
927
- "learning_rate": 6.181818181818182e-05,
928
- "loss": 0.0216,
929
- "step": 62
930
- },
931
- {
932
- "epoch": 0.48,
933
- "eval_loss": 0.19924180209636688,
934
- "eval_runtime": 149.7227,
935
- "eval_samples_per_second": 24.799,
936
- "eval_steps_per_second": 3.106,
937
- "eval_wer": 0.18411070150130898,
938
- "step": 62
939
- },
940
- {
941
- "epoch": 0.48,
942
- "learning_rate": 6.090909090909091e-05,
943
- "loss": 0.0262,
944
- "step": 63
945
- },
946
- {
947
- "epoch": 0.48,
948
- "eval_loss": 0.19875453412532806,
949
- "eval_runtime": 151.0436,
950
- "eval_samples_per_second": 24.582,
951
- "eval_steps_per_second": 3.079,
952
- "eval_wer": 0.18464497515627504,
953
- "step": 63
954
- },
955
- {
956
- "epoch": 0.49,
957
- "learning_rate": 6e-05,
958
- "loss": 0.012,
959
- "step": 64
960
- },
961
- {
962
- "epoch": 0.49,
963
- "eval_loss": 0.19849465787410736,
964
- "eval_runtime": 150.8245,
965
- "eval_samples_per_second": 24.618,
966
- "eval_steps_per_second": 3.083,
967
- "eval_wer": 0.18528610354223432,
968
- "step": 64
969
- },
970
- {
971
- "epoch": 0.5,
972
- "learning_rate": 5.90909090909091e-05,
973
- "loss": 0.026,
974
- "step": 65
975
- },
976
- {
977
- "epoch": 0.5,
978
- "eval_loss": 0.19816958904266357,
979
- "eval_runtime": 152.5115,
980
- "eval_samples_per_second": 24.346,
981
- "eval_steps_per_second": 3.049,
982
- "eval_wer": 0.185873804562697,
983
- "step": 65
984
- },
985
- {
986
- "epoch": 0.51,
987
- "learning_rate": 5.818181818181818e-05,
988
- "loss": 0.0382,
989
- "step": 66
990
- },
991
- {
992
- "epoch": 0.51,
993
- "eval_loss": 0.19807423651218414,
994
- "eval_runtime": 154.494,
995
- "eval_samples_per_second": 24.033,
996
- "eval_steps_per_second": 3.01,
997
- "eval_wer": 0.18528610354223432,
998
- "step": 66
999
- },
1000
- {
1001
- "epoch": 0.51,
1002
- "learning_rate": 5.727272727272728e-05,
1003
- "loss": 0.0284,
1004
- "step": 67
1005
- },
1006
- {
1007
- "epoch": 0.51,
1008
- "eval_loss": 0.1981343924999237,
1009
- "eval_runtime": 151.1093,
1010
- "eval_samples_per_second": 24.572,
1011
- "eval_steps_per_second": 3.077,
1012
- "eval_wer": 0.1849121119837581,
1013
- "step": 67
1014
- },
1015
- {
1016
- "epoch": 0.52,
1017
- "learning_rate": 5.636363636363636e-05,
1018
- "loss": 0.0273,
1019
- "step": 68
1020
- },
1021
- {
1022
- "epoch": 0.52,
1023
- "eval_loss": 0.19824360311031342,
1024
- "eval_runtime": 151.8867,
1025
- "eval_samples_per_second": 24.446,
1026
- "eval_steps_per_second": 3.061,
1027
- "eval_wer": 0.18421755623230218,
1028
- "step": 68
1029
- },
1030
- {
1031
- "epoch": 0.53,
1032
- "learning_rate": 5.545454545454546e-05,
1033
- "loss": 0.0248,
1034
- "step": 69
1035
- },
1036
- {
1037
- "epoch": 0.53,
1038
- "eval_loss": 0.19854041934013367,
1039
- "eval_runtime": 150.4994,
1040
- "eval_samples_per_second": 24.671,
1041
- "eval_steps_per_second": 3.09,
1042
- "eval_wer": 0.1844312656942886,
1043
- "step": 69
1044
- },
1045
- {
1046
- "epoch": 0.54,
1047
- "learning_rate": 5.4545454545454546e-05,
1048
- "loss": 0.0315,
1049
- "step": 70
1050
- },
1051
- {
1052
- "epoch": 0.54,
1053
- "eval_loss": 0.19896994531154633,
1054
- "eval_runtime": 150.9218,
1055
- "eval_samples_per_second": 24.602,
1056
- "eval_steps_per_second": 3.081,
1057
- "eval_wer": 0.18405727413581235,
1058
- "step": 70
1059
- },
1060
- {
1061
- "epoch": 0.55,
1062
- "learning_rate": 5.363636363636364e-05,
1063
- "loss": 0.0443,
1064
- "step": 71
1065
- },
1066
- {
1067
- "epoch": 0.55,
1068
- "eval_loss": 0.1993587613105774,
1069
- "eval_runtime": 151.464,
1070
- "eval_samples_per_second": 24.514,
1071
- "eval_steps_per_second": 3.07,
1072
- "eval_wer": 0.1835764278463429,
1073
- "step": 71
1074
- },
1075
- {
1076
- "epoch": 0.55,
1077
- "learning_rate": 5.272727272727272e-05,
1078
- "loss": 0.0473,
1079
- "step": 72
1080
- },
1081
- {
1082
- "epoch": 0.55,
1083
- "eval_loss": 0.1994282454252243,
1084
- "eval_runtime": 151.2775,
1085
- "eval_samples_per_second": 24.544,
1086
- "eval_steps_per_second": 3.074,
1087
- "eval_wer": 0.18379013730832933,
1088
- "step": 72
1089
- },
1090
- {
1091
- "epoch": 0.56,
1092
- "learning_rate": 5.181818181818182e-05,
1093
- "loss": 0.0366,
1094
- "step": 73
1095
- },
1096
- {
1097
- "epoch": 0.56,
1098
- "eval_loss": 0.19968333840370178,
1099
- "eval_runtime": 151.4205,
1100
- "eval_samples_per_second": 24.521,
1101
- "eval_steps_per_second": 3.071,
1102
- "eval_wer": 0.18298872682588022,
1103
- "step": 73
1104
- },
1105
- {
1106
- "epoch": 0.57,
1107
- "learning_rate": 5.090909090909091e-05,
1108
- "loss": 0.0337,
1109
- "step": 74
1110
- },
1111
- {
1112
- "epoch": 0.57,
1113
- "eval_loss": 0.19987201690673828,
1114
- "eval_runtime": 151.4399,
1115
- "eval_samples_per_second": 24.518,
1116
- "eval_steps_per_second": 3.071,
1117
- "eval_wer": 0.18309558155687344,
1118
- "step": 74
1119
- },
1120
- {
1121
- "epoch": 0.58,
1122
- "learning_rate": 5e-05,
1123
- "loss": 0.0304,
1124
- "step": 75
1125
- },
1126
- {
1127
- "epoch": 0.58,
1128
- "eval_loss": 0.2001255452632904,
1129
- "eval_runtime": 152.4344,
1130
- "eval_samples_per_second": 24.358,
1131
- "eval_steps_per_second": 3.05,
1132
- "eval_wer": 0.18346957311534967,
1133
- "step": 75
1134
- },
1135
- {
1136
- "epoch": 0.58,
1137
- "learning_rate": 4.909090909090909e-05,
1138
- "loss": 0.0362,
1139
- "step": 76
1140
- },
1141
- {
1142
- "epoch": 0.58,
1143
- "eval_loss": 0.20033375918865204,
1144
- "eval_runtime": 151.5547,
1145
- "eval_samples_per_second": 24.499,
1146
- "eval_steps_per_second": 3.068,
1147
- "eval_wer": 0.18293529946038362,
1148
- "step": 76
1149
- },
1150
- {
1151
- "epoch": 0.59,
1152
- "learning_rate": 4.8181818181818186e-05,
1153
- "loss": 0.0482,
1154
- "step": 77
1155
- },
1156
- {
1157
- "epoch": 0.59,
1158
- "eval_loss": 0.20038414001464844,
1159
- "eval_runtime": 151.9532,
1160
- "eval_samples_per_second": 24.435,
1161
- "eval_steps_per_second": 3.06,
1162
- "eval_wer": 0.18250788053641076,
1163
- "step": 77
1164
- },
1165
- {
1166
- "epoch": 0.6,
1167
- "learning_rate": 4.7272727272727275e-05,
1168
- "loss": 0.0362,
1169
- "step": 78
1170
- },
1171
- {
1172
- "epoch": 0.6,
1173
- "eval_loss": 0.20029421150684357,
1174
- "eval_runtime": 153.417,
1175
- "eval_samples_per_second": 24.202,
1176
- "eval_steps_per_second": 3.031,
1177
- "eval_wer": 0.1828284447293904,
1178
- "step": 78
1179
- },
1180
- {
1181
- "epoch": 0.61,
1182
- "learning_rate": 4.636363636363636e-05,
1183
- "loss": 0.0301,
1184
- "step": 79
1185
- },
1186
- {
1187
- "epoch": 0.61,
1188
- "eval_loss": 0.20029130578041077,
1189
- "eval_runtime": 151.1667,
1190
- "eval_samples_per_second": 24.562,
1191
- "eval_steps_per_second": 3.076,
1192
- "eval_wer": 0.18293529946038362,
1193
- "step": 79
1194
- },
1195
- {
1196
- "epoch": 0.61,
1197
- "learning_rate": 4.545454545454546e-05,
1198
- "loss": 0.031,
1199
- "step": 80
1200
- },
1201
- {
1202
- "epoch": 0.61,
1203
- "eval_loss": 0.200118288397789,
1204
- "eval_runtime": 152.5561,
1205
- "eval_samples_per_second": 24.339,
1206
- "eval_steps_per_second": 3.048,
1207
- "eval_wer": 0.18330929101885987,
1208
- "step": 80
1209
- },
1210
- {
1211
- "epoch": 0.62,
1212
- "learning_rate": 4.454545454545455e-05,
1213
- "loss": 0.0377,
1214
- "step": 81
1215
- },
1216
- {
1217
- "epoch": 0.62,
1218
- "eval_loss": 0.1998458057641983,
1219
- "eval_runtime": 152.057,
1220
- "eval_samples_per_second": 24.418,
1221
- "eval_steps_per_second": 3.058,
1222
- "eval_wer": 0.18234759843992093,
1223
- "step": 81
1224
- },
1225
- {
1226
- "epoch": 0.63,
1227
- "learning_rate": 4.3636363636363636e-05,
1228
- "loss": 0.0372,
1229
- "step": 82
1230
- },
1231
- {
1232
- "epoch": 0.63,
1233
- "eval_loss": 0.19953805208206177,
1234
- "eval_runtime": 152.6899,
1235
- "eval_samples_per_second": 24.317,
1236
- "eval_steps_per_second": 3.045,
1237
- "eval_wer": 0.18256130790190736,
1238
- "step": 82
1239
- },
1240
- {
1241
- "epoch": 0.64,
1242
- "learning_rate": 4.2727272727272724e-05,
1243
- "loss": 0.0234,
1244
- "step": 83
1245
- },
1246
- {
1247
- "epoch": 0.64,
1248
- "eval_loss": 0.19939135015010834,
1249
- "eval_runtime": 152.3549,
1250
- "eval_samples_per_second": 24.371,
1251
- "eval_steps_per_second": 3.052,
1252
- "eval_wer": 0.18202703424694128,
1253
- "step": 83
1254
- },
1255
- {
1256
- "epoch": 0.65,
1257
- "learning_rate": 4.181818181818182e-05,
1258
- "loss": 0.0539,
1259
- "step": 84
1260
- },
1261
- {
1262
- "epoch": 0.65,
1263
- "eval_loss": 0.19920985400676727,
1264
- "eval_runtime": 151.8121,
1265
- "eval_samples_per_second": 24.458,
1266
- "eval_steps_per_second": 3.063,
1267
- "eval_wer": 0.18154618795747182,
1268
- "step": 84
1269
- },
1270
- {
1271
- "epoch": 0.65,
1272
- "learning_rate": 4.0909090909090915e-05,
1273
- "loss": 0.0446,
1274
- "step": 85
1275
- },
1276
- {
1277
- "epoch": 0.65,
1278
- "eval_loss": 0.19903059303760529,
1279
- "eval_runtime": 153.3501,
1280
- "eval_samples_per_second": 24.213,
1281
- "eval_steps_per_second": 3.032,
1282
- "eval_wer": 0.18181332478495485,
1283
- "step": 85
1284
- },
1285
- {
1286
- "epoch": 0.66,
1287
- "learning_rate": 4e-05,
1288
- "loss": 0.0373,
1289
- "step": 86
1290
- },
1291
- {
1292
- "epoch": 0.66,
1293
- "eval_loss": 0.19874274730682373,
1294
- "eval_runtime": 153.2828,
1295
- "eval_samples_per_second": 24.223,
1296
- "eval_steps_per_second": 3.034,
1297
- "eval_wer": 0.18154618795747182,
1298
- "step": 86
1299
- },
1300
- {
1301
- "epoch": 0.67,
1302
- "learning_rate": 3.909090909090909e-05,
1303
- "loss": 0.0347,
1304
- "step": 87
1305
- },
1306
- {
1307
- "epoch": 0.67,
1308
- "eval_loss": 0.1984863579273224,
1309
- "eval_runtime": 153.5045,
1310
- "eval_samples_per_second": 24.188,
1311
- "eval_steps_per_second": 3.029,
1312
- "eval_wer": 0.1814393332264786,
1313
- "step": 87
1314
- },
1315
- {
1316
- "epoch": 0.68,
1317
- "learning_rate": 3.818181818181819e-05,
1318
- "loss": 0.0477,
1319
- "step": 88
1320
- },
1321
- {
1322
- "epoch": 0.68,
1323
- "eval_loss": 0.19823090732097626,
1324
- "eval_runtime": 153.2343,
1325
- "eval_samples_per_second": 24.231,
1326
- "eval_steps_per_second": 3.035,
1327
- "eval_wer": 0.18186675215045145,
1328
- "step": 88
1329
- },
1330
- {
1331
- "epoch": 0.68,
1332
- "learning_rate": 3.7272727272727276e-05,
1333
- "loss": 0.0365,
1334
- "step": 89
1335
- },
1336
- {
1337
- "epoch": 0.68,
1338
- "eval_loss": 0.1978393793106079,
1339
- "eval_runtime": 153.4002,
1340
- "eval_samples_per_second": 24.205,
1341
- "eval_steps_per_second": 3.031,
1342
- "eval_wer": 0.1814393332264786,
1343
- "step": 89
1344
- },
1345
- {
1346
- "epoch": 0.69,
1347
- "learning_rate": 3.6363636363636364e-05,
1348
- "loss": 0.0255,
1349
- "step": 90
1350
- },
1351
- {
1352
- "epoch": 0.69,
1353
- "eval_loss": 0.1975974142551422,
1354
- "eval_runtime": 153.949,
1355
- "eval_samples_per_second": 24.118,
1356
- "eval_steps_per_second": 3.02,
1357
- "eval_wer": 0.18101191430250574,
1358
- "step": 90
1359
- },
1360
- {
1361
- "epoch": 0.7,
1362
- "learning_rate": 3.545454545454546e-05,
1363
- "loss": 0.0205,
1364
- "step": 91
1365
- },
1366
- {
1367
- "epoch": 0.7,
1368
- "eval_loss": 0.19747106730937958,
1369
- "eval_runtime": 153.3875,
1370
- "eval_samples_per_second": 24.207,
1371
- "eval_steps_per_second": 3.032,
1372
- "eval_wer": 0.18063792274402948,
1373
- "step": 91
1374
- },
1375
- {
1376
- "epoch": 0.71,
1377
- "learning_rate": 3.454545454545455e-05,
1378
- "loss": 0.053,
1379
- "step": 92
1380
- },
1381
- {
1382
- "epoch": 0.71,
1383
- "eval_loss": 0.19707681238651276,
1384
- "eval_runtime": 151.9804,
1385
- "eval_samples_per_second": 24.431,
1386
- "eval_steps_per_second": 3.06,
1387
- "eval_wer": 0.18031735855104986,
1388
- "step": 92
1389
- },
1390
- {
1391
- "epoch": 0.71,
1392
- "learning_rate": 3.3636363636363636e-05,
1393
- "loss": 0.0188,
1394
- "step": 93
1395
- },
1396
- {
1397
- "epoch": 0.71,
1398
- "eval_loss": 0.19678995013237,
1399
- "eval_runtime": 156.2042,
1400
- "eval_samples_per_second": 23.77,
1401
- "eval_steps_per_second": 2.977,
1402
- "eval_wer": 0.18053106801303628,
1403
- "step": 93
1404
- },
1405
- {
1406
- "epoch": 0.72,
1407
- "learning_rate": 3.272727272727273e-05,
1408
- "loss": 0.018,
1409
- "step": 94
1410
- },
1411
- {
1412
- "epoch": 0.72,
1413
- "eval_loss": 0.1968337893486023,
1414
- "eval_runtime": 151.8471,
1415
- "eval_samples_per_second": 24.452,
1416
- "eval_steps_per_second": 3.062,
1417
- "eval_wer": 0.18015707645456003,
1418
- "step": 94
1419
- },
1420
- {
1421
- "epoch": 0.73,
1422
- "learning_rate": 3.181818181818182e-05,
1423
- "loss": 0.0135,
1424
- "step": 95
1425
- },
1426
- {
1427
- "epoch": 0.73,
1428
- "eval_loss": 0.19669164717197418,
1429
- "eval_runtime": 153.6158,
1430
- "eval_samples_per_second": 24.171,
1431
- "eval_steps_per_second": 3.027,
1432
- "eval_wer": 0.18042421328204306,
1433
- "step": 95
1434
- },
1435
- {
1436
- "epoch": 0.74,
1437
- "learning_rate": 3.090909090909091e-05,
1438
- "loss": 0.0099,
1439
- "step": 96
1440
- },
1441
- {
1442
- "epoch": 0.74,
1443
- "eval_loss": 0.1966254860162735,
1444
- "eval_runtime": 153.1791,
1445
- "eval_samples_per_second": 24.24,
1446
- "eval_steps_per_second": 3.036,
1447
- "eval_wer": 0.18010364908906343,
1448
- "step": 96
1449
- },
1450
- {
1451
- "epoch": 0.75,
1452
- "learning_rate": 3e-05,
1453
- "loss": 0.0283,
1454
- "step": 97
1455
- },
1456
- {
1457
- "epoch": 0.75,
1458
- "eval_loss": 0.19664821028709412,
1459
- "eval_runtime": 152.9205,
1460
- "eval_samples_per_second": 24.281,
1461
- "eval_steps_per_second": 3.041,
1462
- "eval_wer": 0.18042421328204306,
1463
- "step": 97
1464
- },
1465
- {
1466
- "epoch": 0.75,
1467
- "learning_rate": 2.909090909090909e-05,
1468
- "loss": 0.0345,
1469
- "step": 98
1470
- },
1471
- {
1472
- "epoch": 0.75,
1473
- "eval_loss": 0.19662834703922272,
1474
- "eval_runtime": 153.3485,
1475
- "eval_samples_per_second": 24.213,
1476
- "eval_steps_per_second": 3.032,
1477
- "eval_wer": 0.1799433669925736,
1478
- "step": 98
1479
  }
1480
  ],
1481
- "max_steps": 130,
1482
  "num_train_epochs": 1,
1483
- "total_flos": 2.245784209200768e+18,
1484
  "trial_name": null,
1485
  "trial_params": null
1486
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.007662835249042145,
5
+ "global_step": 1,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10
  {
11
  "epoch": 0.01,
12
  "learning_rate": 5e-06,
13
+ "loss": 0.0253,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.01,
18
+ "eval_loss": 0.19735707342624664,
19
+ "eval_runtime": 222.0072,
20
+ "eval_samples_per_second": 16.77,
21
+ "eval_steps_per_second": 2.099,
22
+ "eval_wer": 0.17986033370648755,
23
  "step": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  }
25
  ],
26
+ "max_steps": 13,
27
  "num_train_epochs": 1,
28
+ "total_flos": 3.812863416408576e+16,
29
  "trial_name": null,
30
  "trial_params": null
31
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:157b26c01ed722d6f5c5a4023fabc27a6bacfd61d977a05834d3adf2dfe8a9ac
3
  size 3323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79653dcfc0cbecddb31987eb9747e2b3c6231042214c6c1a06e0d99f1bc0d8af
3
  size 3323