subatomicseer commited on
Commit
a49566a
1 Parent(s): ad84922

End of training

Browse files
Files changed (5) hide show
  1. README.md +6 -3
  2. all_results.json +9 -9
  3. eval_results.json +5 -5
  4. train_results.json +4 -4
  5. trainer_state.json +304 -304
README.md CHANGED
@@ -1,5 +1,8 @@
1
  ---
 
2
  tags:
 
 
3
  - generated_from_trainer
4
  datasets:
5
  - timit_asr
@@ -12,11 +15,11 @@ model-index:
12
  name: Automatic Speech Recognition
13
  type: automatic-speech-recognition
14
  dataset:
15
- name: timit_asr
16
  type: timit_asr
17
  config: clean
18
  split: test
19
- args: clean
20
  metrics:
21
  - name: Wer
22
  type: wer
@@ -28,7 +31,7 @@ should probably proofread and complete it, then remove this comment. -->
28
 
29
  # wav2vec2-base-hyperVQ-timit-fine-tuned
30
 
31
- This model was trained from scratch on the timit_asr dataset.
32
  It achieves the following results on the evaluation set:
33
  - Loss: 3.3628
34
  - Wer: 0.9993
 
1
  ---
2
+ base_model: wav2vec2-pretrained-base-hyperVQ
3
  tags:
4
+ - automatic-speech-recognition
5
+ - timit_asr
6
  - generated_from_trainer
7
  datasets:
8
  - timit_asr
 
15
  name: Automatic Speech Recognition
16
  type: automatic-speech-recognition
17
  dataset:
18
+ name: TIMIT_ASR - NA
19
  type: timit_asr
20
  config: clean
21
  split: test
22
+ args: 'Config: na, Training split: train, Eval split: test'
23
  metrics:
24
  - name: Wer
25
  type: wer
 
31
 
32
  # wav2vec2-base-hyperVQ-timit-fine-tuned
33
 
34
+ This model is a fine-tuned version of [wav2vec2-pretrained-base-hyperVQ](https://huggingface.co/wav2vec2-pretrained-base-hyperVQ) on the TIMIT_ASR - NA dataset.
35
  It achieves the following results on the evaluation set:
36
  - Loss: 3.3628
37
  - Wer: 0.9993
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 0.6916695237159729,
4
- "eval_runtime": 8.4452,
5
  "eval_samples": 1680,
6
- "eval_samples_per_second": 198.929,
7
- "eval_steps_per_second": 6.276,
8
- "eval_wer": 0.5904486251808972,
9
- "train_loss": 1.2737908951167403,
10
- "train_runtime": 394.8383,
11
  "train_samples": 4620,
12
- "train_samples_per_second": 234.02,
13
- "train_steps_per_second": 7.345
14
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 3.362812042236328,
4
+ "eval_runtime": 8.2056,
5
  "eval_samples": 1680,
6
+ "eval_samples_per_second": 204.737,
7
+ "eval_steps_per_second": 6.459,
8
+ "eval_wer": 0.9993108676176694,
9
+ "train_loss": 3.8477164847275307,
10
+ "train_runtime": 343.8146,
11
  "train_samples": 4620,
12
+ "train_samples_per_second": 268.749,
13
+ "train_steps_per_second": 8.435
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 0.6916695237159729,
4
- "eval_runtime": 8.4452,
5
  "eval_samples": 1680,
6
- "eval_samples_per_second": 198.929,
7
- "eval_steps_per_second": 6.276,
8
- "eval_wer": 0.5904486251808972
9
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 3.362812042236328,
4
+ "eval_runtime": 8.2056,
5
  "eval_samples": 1680,
6
+ "eval_samples_per_second": 204.737,
7
+ "eval_steps_per_second": 6.459,
8
+ "eval_wer": 0.9993108676176694
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 1.2737908951167403,
4
- "train_runtime": 394.8383,
5
  "train_samples": 4620,
6
- "train_samples_per_second": 234.02,
7
- "train_steps_per_second": 7.345
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 3.8477164847275307,
4
+ "train_runtime": 343.8146,
5
  "train_samples": 4620,
6
+ "train_samples_per_second": 268.749,
7
+ "train_steps_per_second": 8.435
8
  }
trainer_state.json CHANGED
@@ -11,1769 +11,1769 @@
11
  {
12
  "epoch": 0.07,
13
  "learning_rate": 1.0000000000000002e-06,
14
- "loss": 8.9192,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.14,
19
  "learning_rate": 2.0000000000000003e-06,
20
- "loss": 8.5138,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.21,
25
  "learning_rate": 3e-06,
26
- "loss": 7.895,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.28,
31
  "learning_rate": 4.000000000000001e-06,
32
- "loss": 8.2316,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.34,
37
  "learning_rate": 5e-06,
38
- "loss": 8.3274,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.41,
43
  "learning_rate": 6e-06,
44
- "loss": 7.7786,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.48,
49
  "learning_rate": 7.000000000000001e-06,
50
- "loss": 7.1157,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.55,
55
  "learning_rate": 8.000000000000001e-06,
56
- "loss": 7.5486,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.62,
61
  "learning_rate": 9e-06,
62
- "loss": 6.633,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.69,
67
  "learning_rate": 1e-05,
68
- "loss": 5.4894,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.76,
73
  "learning_rate": 1.1000000000000001e-05,
74
- "loss": 4.5307,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 0.83,
79
  "learning_rate": 1.2e-05,
80
- "loss": 4.1082,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 0.9,
85
  "learning_rate": 1.3000000000000001e-05,
86
- "loss": 3.8578,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 0.97,
91
  "learning_rate": 1.4000000000000001e-05,
92
- "loss": 3.675,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 1.03,
97
  "learning_rate": 1.5e-05,
98
- "loss": 3.6452,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 1.1,
103
  "learning_rate": 1.6000000000000003e-05,
104
- "loss": 3.5126,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 1.17,
109
  "learning_rate": 1.7000000000000003e-05,
110
- "loss": 3.3975,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 1.24,
115
  "learning_rate": 1.8e-05,
116
- "loss": 3.3037,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 1.31,
121
  "learning_rate": 1.9e-05,
122
- "loss": 3.2924,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 1.38,
127
  "learning_rate": 2e-05,
128
- "loss": 3.1584,
129
  "step": 200
130
  },
131
  {
132
  "epoch": 1.45,
133
  "learning_rate": 2.1e-05,
134
- "loss": 3.1086,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 1.52,
139
  "learning_rate": 2.2000000000000003e-05,
140
- "loss": 3.1207,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 1.59,
145
  "learning_rate": 2.3000000000000003e-05,
146
- "loss": 3.0599,
147
  "step": 230
148
  },
149
  {
150
  "epoch": 1.66,
151
  "learning_rate": 2.4e-05,
152
- "loss": 3.0037,
153
  "step": 240
154
  },
155
  {
156
  "epoch": 1.72,
157
  "learning_rate": 2.5e-05,
158
- "loss": 2.9957,
159
  "step": 250
160
  },
161
  {
162
  "epoch": 1.79,
163
  "learning_rate": 2.6000000000000002e-05,
164
- "loss": 3.0302,
165
  "step": 260
166
  },
167
  {
168
  "epoch": 1.86,
169
  "learning_rate": 2.7000000000000002e-05,
170
- "loss": 2.9769,
171
  "step": 270
172
  },
173
  {
174
  "epoch": 1.93,
175
  "learning_rate": 2.8000000000000003e-05,
176
- "loss": 2.9324,
177
  "step": 280
178
  },
179
  {
180
  "epoch": 2.0,
181
  "learning_rate": 2.9e-05,
182
- "loss": 2.9486,
183
  "step": 290
184
  },
185
  {
186
  "epoch": 2.07,
187
  "learning_rate": 3e-05,
188
- "loss": 2.9842,
189
  "step": 300
190
  },
191
  {
192
  "epoch": 2.14,
193
  "learning_rate": 3.1e-05,
194
- "loss": 2.8971,
195
  "step": 310
196
  },
197
  {
198
  "epoch": 2.21,
199
  "learning_rate": 3.2000000000000005e-05,
200
- "loss": 2.9036,
201
  "step": 320
202
  },
203
  {
204
  "epoch": 2.28,
205
  "learning_rate": 3.3e-05,
206
- "loss": 2.9475,
207
  "step": 330
208
  },
209
  {
210
  "epoch": 2.34,
211
  "learning_rate": 3.4000000000000007e-05,
212
- "loss": 2.8944,
213
  "step": 340
214
  },
215
  {
216
  "epoch": 2.41,
217
  "learning_rate": 3.5e-05,
218
- "loss": 2.8786,
219
  "step": 350
220
  },
221
  {
222
  "epoch": 2.48,
223
  "learning_rate": 3.6e-05,
224
- "loss": 2.8823,
225
  "step": 360
226
  },
227
  {
228
  "epoch": 2.55,
229
  "learning_rate": 3.7e-05,
230
- "loss": 2.9207,
231
  "step": 370
232
  },
233
  {
234
  "epoch": 2.62,
235
  "learning_rate": 3.8e-05,
236
- "loss": 2.8595,
237
  "step": 380
238
  },
239
  {
240
  "epoch": 2.69,
241
  "learning_rate": 3.9000000000000006e-05,
242
- "loss": 2.8531,
243
  "step": 390
244
  },
245
  {
246
  "epoch": 2.76,
247
  "learning_rate": 4e-05,
248
- "loss": 2.8828,
249
  "step": 400
250
  },
251
  {
252
  "epoch": 2.83,
253
  "learning_rate": 4.1e-05,
254
- "loss": 2.8601,
255
  "step": 410
256
  },
257
  {
258
  "epoch": 2.9,
259
  "learning_rate": 4.2e-05,
260
- "loss": 2.8315,
261
  "step": 420
262
  },
263
  {
264
  "epoch": 2.97,
265
  "learning_rate": 4.3e-05,
266
- "loss": 2.8174,
267
  "step": 430
268
  },
269
  {
270
  "epoch": 3.03,
271
  "learning_rate": 4.4000000000000006e-05,
272
- "loss": 2.89,
273
  "step": 440
274
  },
275
  {
276
  "epoch": 3.1,
277
  "learning_rate": 4.5e-05,
278
- "loss": 2.815,
279
  "step": 450
280
  },
281
  {
282
  "epoch": 3.17,
283
  "learning_rate": 4.600000000000001e-05,
284
- "loss": 2.7842,
285
  "step": 460
286
  },
287
  {
288
  "epoch": 3.24,
289
  "learning_rate": 4.7e-05,
290
- "loss": 2.8152,
291
  "step": 470
292
  },
293
  {
294
  "epoch": 3.31,
295
  "learning_rate": 4.8e-05,
296
- "loss": 2.8418,
297
  "step": 480
298
  },
299
  {
300
  "epoch": 3.38,
301
  "learning_rate": 4.9e-05,
302
- "loss": 2.7557,
303
  "step": 490
304
  },
305
  {
306
  "epoch": 3.45,
307
  "learning_rate": 5e-05,
308
- "loss": 2.7455,
309
  "step": 500
310
  },
311
  {
312
  "epoch": 3.52,
313
  "learning_rate": 5.1000000000000006e-05,
314
- "loss": 2.8095,
315
  "step": 510
316
  },
317
  {
318
  "epoch": 3.59,
319
  "learning_rate": 5.2000000000000004e-05,
320
- "loss": 2.7294,
321
  "step": 520
322
  },
323
  {
324
  "epoch": 3.66,
325
  "learning_rate": 5.300000000000001e-05,
326
- "loss": 2.6936,
327
  "step": 530
328
  },
329
  {
330
  "epoch": 3.72,
331
  "learning_rate": 5.4000000000000005e-05,
332
- "loss": 2.6999,
333
  "step": 540
334
  },
335
  {
336
  "epoch": 3.79,
337
  "learning_rate": 5.500000000000001e-05,
338
- "loss": 2.7664,
339
  "step": 550
340
  },
341
  {
342
  "epoch": 3.86,
343
  "learning_rate": 5.6000000000000006e-05,
344
- "loss": 2.6373,
345
  "step": 560
346
  },
347
  {
348
  "epoch": 3.93,
349
  "learning_rate": 5.6999999999999996e-05,
350
- "loss": 2.6569,
351
  "step": 570
352
  },
353
  {
354
  "epoch": 4.0,
355
  "learning_rate": 5.8e-05,
356
- "loss": 2.7333,
357
  "step": 580
358
  },
359
  {
360
  "epoch": 4.07,
361
  "learning_rate": 5.9e-05,
362
- "loss": 2.6905,
363
  "step": 590
364
  },
365
  {
366
  "epoch": 4.14,
367
  "learning_rate": 6e-05,
368
- "loss": 2.5422,
369
  "step": 600
370
  },
371
  {
372
  "epoch": 4.21,
373
  "learning_rate": 6.1e-05,
374
- "loss": 2.5804,
375
  "step": 610
376
  },
377
  {
378
  "epoch": 4.28,
379
  "learning_rate": 6.2e-05,
380
- "loss": 2.7211,
381
  "step": 620
382
  },
383
  {
384
  "epoch": 4.34,
385
  "learning_rate": 6.3e-05,
386
- "loss": 2.518,
387
  "step": 630
388
  },
389
  {
390
  "epoch": 4.41,
391
  "learning_rate": 6.400000000000001e-05,
392
- "loss": 2.4777,
393
  "step": 640
394
  },
395
  {
396
  "epoch": 4.48,
397
  "learning_rate": 6.500000000000001e-05,
398
- "loss": 2.5562,
399
  "step": 650
400
  },
401
  {
402
  "epoch": 4.55,
403
  "learning_rate": 6.6e-05,
404
- "loss": 2.5929,
405
  "step": 660
406
  },
407
  {
408
  "epoch": 4.62,
409
  "learning_rate": 6.7e-05,
410
- "loss": 2.3472,
411
  "step": 670
412
  },
413
  {
414
  "epoch": 4.69,
415
  "learning_rate": 6.800000000000001e-05,
416
- "loss": 2.4409,
417
  "step": 680
418
  },
419
  {
420
  "epoch": 4.76,
421
  "learning_rate": 6.9e-05,
422
- "loss": 2.5433,
423
  "step": 690
424
  },
425
  {
426
  "epoch": 4.83,
427
  "learning_rate": 7e-05,
428
- "loss": 2.3576,
429
  "step": 700
430
  },
431
  {
432
  "epoch": 4.9,
433
  "learning_rate": 7.1e-05,
434
- "loss": 2.2553,
435
  "step": 710
436
  },
437
  {
438
  "epoch": 4.97,
439
  "learning_rate": 7.2e-05,
440
- "loss": 2.3377,
441
  "step": 720
442
  },
443
  {
444
  "epoch": 5.03,
445
  "learning_rate": 7.3e-05,
446
- "loss": 2.4641,
447
  "step": 730
448
  },
449
  {
450
  "epoch": 5.1,
451
  "learning_rate": 7.4e-05,
452
- "loss": 2.1024,
453
  "step": 740
454
  },
455
  {
456
  "epoch": 5.17,
457
  "learning_rate": 7.500000000000001e-05,
458
- "loss": 2.1396,
459
  "step": 750
460
  },
461
  {
462
  "epoch": 5.24,
463
  "learning_rate": 7.6e-05,
464
- "loss": 2.2905,
465
  "step": 760
466
  },
467
  {
468
  "epoch": 5.31,
469
  "learning_rate": 7.7e-05,
470
- "loss": 2.1351,
471
  "step": 770
472
  },
473
  {
474
  "epoch": 5.38,
475
  "learning_rate": 7.800000000000001e-05,
476
- "loss": 1.8236,
477
  "step": 780
478
  },
479
  {
480
  "epoch": 5.45,
481
  "learning_rate": 7.900000000000001e-05,
482
- "loss": 1.9193,
483
  "step": 790
484
  },
485
  {
486
  "epoch": 5.52,
487
  "learning_rate": 8e-05,
488
- "loss": 2.1076,
489
  "step": 800
490
  },
491
  {
492
  "epoch": 5.59,
493
  "learning_rate": 8.1e-05,
494
- "loss": 1.7412,
495
  "step": 810
496
  },
497
  {
498
  "epoch": 5.66,
499
  "learning_rate": 8.2e-05,
500
- "loss": 1.6121,
501
  "step": 820
502
  },
503
  {
504
  "epoch": 5.72,
505
  "learning_rate": 8.3e-05,
506
- "loss": 1.6504,
507
  "step": 830
508
  },
509
  {
510
  "epoch": 5.79,
511
  "learning_rate": 8.4e-05,
512
- "loss": 1.7832,
513
  "step": 840
514
  },
515
  {
516
  "epoch": 5.86,
517
  "learning_rate": 8.5e-05,
518
- "loss": 1.3229,
519
  "step": 850
520
  },
521
  {
522
  "epoch": 5.93,
523
  "learning_rate": 8.6e-05,
524
- "loss": 1.4531,
525
  "step": 860
526
  },
527
  {
528
  "epoch": 6.0,
529
  "learning_rate": 8.7e-05,
530
- "loss": 1.5507,
531
  "step": 870
532
  },
533
  {
534
  "epoch": 6.07,
535
  "learning_rate": 8.800000000000001e-05,
536
- "loss": 1.3791,
537
  "step": 880
538
  },
539
  {
540
  "epoch": 6.14,
541
  "learning_rate": 8.900000000000001e-05,
542
- "loss": 1.1911,
543
  "step": 890
544
  },
545
  {
546
  "epoch": 6.21,
547
  "learning_rate": 9e-05,
548
- "loss": 1.2356,
549
  "step": 900
550
  },
551
  {
552
  "epoch": 6.28,
553
  "learning_rate": 9.1e-05,
554
- "loss": 1.3927,
555
  "step": 910
556
  },
557
  {
558
  "epoch": 6.34,
559
  "learning_rate": 9.200000000000001e-05,
560
- "loss": 1.0738,
561
  "step": 920
562
  },
563
  {
564
  "epoch": 6.41,
565
  "learning_rate": 9.300000000000001e-05,
566
- "loss": 1.0313,
567
  "step": 930
568
  },
569
  {
570
  "epoch": 6.48,
571
  "learning_rate": 9.4e-05,
572
- "loss": 1.1769,
573
  "step": 940
574
  },
575
  {
576
  "epoch": 6.55,
577
  "learning_rate": 9.5e-05,
578
- "loss": 1.2078,
579
  "step": 950
580
  },
581
  {
582
  "epoch": 6.62,
583
  "learning_rate": 9.6e-05,
584
- "loss": 0.9346,
585
  "step": 960
586
  },
587
  {
588
  "epoch": 6.69,
589
  "learning_rate": 9.7e-05,
590
- "loss": 0.9883,
591
  "step": 970
592
  },
593
  {
594
  "epoch": 6.76,
595
  "learning_rate": 9.8e-05,
596
- "loss": 1.1853,
597
  "step": 980
598
  },
599
  {
600
  "epoch": 6.83,
601
  "learning_rate": 9.900000000000001e-05,
602
- "loss": 0.9751,
603
  "step": 990
604
  },
605
  {
606
  "epoch": 6.9,
607
  "learning_rate": 0.0001,
608
- "loss": 0.8268,
609
  "step": 1000
610
  },
611
  {
612
  "epoch": 6.97,
613
  "learning_rate": 9.947368421052632e-05,
614
- "loss": 0.9507,
615
  "step": 1010
616
  },
617
  {
618
  "epoch": 7.03,
619
  "learning_rate": 9.894736842105263e-05,
620
- "loss": 1.0724,
621
  "step": 1020
622
  },
623
  {
624
  "epoch": 7.1,
625
  "learning_rate": 9.842105263157894e-05,
626
- "loss": 0.7477,
627
  "step": 1030
628
  },
629
  {
630
  "epoch": 7.17,
631
  "learning_rate": 9.789473684210527e-05,
632
- "loss": 0.7514,
633
  "step": 1040
634
  },
635
  {
636
  "epoch": 7.24,
637
  "learning_rate": 9.736842105263158e-05,
638
- "loss": 0.8923,
639
  "step": 1050
640
  },
641
  {
642
  "epoch": 7.31,
643
  "learning_rate": 9.68421052631579e-05,
644
- "loss": 0.9013,
645
  "step": 1060
646
  },
647
  {
648
  "epoch": 7.38,
649
  "learning_rate": 9.631578947368421e-05,
650
- "loss": 0.6789,
651
  "step": 1070
652
  },
653
  {
654
  "epoch": 7.45,
655
  "learning_rate": 9.578947368421052e-05,
656
- "loss": 0.7158,
657
  "step": 1080
658
  },
659
  {
660
  "epoch": 7.52,
661
  "learning_rate": 9.526315789473685e-05,
662
- "loss": 0.9585,
663
  "step": 1090
664
  },
665
  {
666
  "epoch": 7.59,
667
  "learning_rate": 9.473684210526316e-05,
668
- "loss": 0.7256,
669
  "step": 1100
670
  },
671
  {
672
  "epoch": 7.66,
673
  "learning_rate": 9.421052631578949e-05,
674
- "loss": 0.6444,
675
  "step": 1110
676
  },
677
  {
678
  "epoch": 7.72,
679
  "learning_rate": 9.36842105263158e-05,
680
- "loss": 0.7693,
681
  "step": 1120
682
  },
683
  {
684
  "epoch": 7.79,
685
  "learning_rate": 9.315789473684211e-05,
686
- "loss": 0.8643,
687
  "step": 1130
688
  },
689
  {
690
  "epoch": 7.86,
691
  "learning_rate": 9.263157894736843e-05,
692
- "loss": 0.6321,
693
  "step": 1140
694
  },
695
  {
696
  "epoch": 7.93,
697
  "learning_rate": 9.210526315789474e-05,
698
- "loss": 0.6692,
699
  "step": 1150
700
  },
701
  {
702
  "epoch": 8.0,
703
  "learning_rate": 9.157894736842105e-05,
704
- "loss": 0.8402,
705
  "step": 1160
706
  },
707
  {
708
  "epoch": 8.07,
709
  "learning_rate": 9.105263157894738e-05,
710
- "loss": 0.6585,
711
  "step": 1170
712
  },
713
  {
714
  "epoch": 8.14,
715
  "learning_rate": 9.052631578947369e-05,
716
- "loss": 0.5458,
717
  "step": 1180
718
  },
719
  {
720
  "epoch": 8.21,
721
  "learning_rate": 9e-05,
722
- "loss": 0.6021,
723
  "step": 1190
724
  },
725
  {
726
  "epoch": 8.28,
727
  "learning_rate": 8.947368421052632e-05,
728
- "loss": 0.7951,
729
  "step": 1200
730
  },
731
  {
732
  "epoch": 8.34,
733
  "learning_rate": 8.894736842105263e-05,
734
- "loss": 0.5468,
735
  "step": 1210
736
  },
737
  {
738
  "epoch": 8.41,
739
  "learning_rate": 8.842105263157894e-05,
740
- "loss": 0.512,
741
  "step": 1220
742
  },
743
  {
744
  "epoch": 8.48,
745
  "learning_rate": 8.789473684210526e-05,
746
- "loss": 0.5986,
747
  "step": 1230
748
  },
749
  {
750
  "epoch": 8.55,
751
  "learning_rate": 8.736842105263158e-05,
752
- "loss": 0.7063,
753
  "step": 1240
754
  },
755
  {
756
  "epoch": 8.62,
757
  "learning_rate": 8.68421052631579e-05,
758
- "loss": 0.4963,
759
  "step": 1250
760
  },
761
  {
762
  "epoch": 8.69,
763
  "learning_rate": 8.631578947368421e-05,
764
- "loss": 0.5244,
765
  "step": 1260
766
  },
767
  {
768
  "epoch": 8.76,
769
  "learning_rate": 8.578947368421054e-05,
770
- "loss": 0.7563,
771
  "step": 1270
772
  },
773
  {
774
  "epoch": 8.83,
775
  "learning_rate": 8.526315789473685e-05,
776
- "loss": 0.5817,
777
  "step": 1280
778
  },
779
  {
780
  "epoch": 8.9,
781
  "learning_rate": 8.473684210526316e-05,
782
- "loss": 0.4977,
783
  "step": 1290
784
  },
785
  {
786
  "epoch": 8.97,
787
  "learning_rate": 8.421052631578948e-05,
788
- "loss": 0.5366,
789
  "step": 1300
790
  },
791
  {
792
  "epoch": 9.03,
793
  "learning_rate": 8.36842105263158e-05,
794
- "loss": 0.6796,
795
  "step": 1310
796
  },
797
  {
798
  "epoch": 9.1,
799
  "learning_rate": 8.315789473684212e-05,
800
- "loss": 0.4394,
801
  "step": 1320
802
  },
803
  {
804
  "epoch": 9.17,
805
  "learning_rate": 8.263157894736843e-05,
806
- "loss": 0.4338,
807
  "step": 1330
808
  },
809
  {
810
  "epoch": 9.24,
811
  "learning_rate": 8.210526315789474e-05,
812
- "loss": 0.553,
813
  "step": 1340
814
  },
815
  {
816
  "epoch": 9.31,
817
  "learning_rate": 8.157894736842105e-05,
818
- "loss": 0.5857,
819
  "step": 1350
820
  },
821
  {
822
  "epoch": 9.38,
823
  "learning_rate": 8.105263157894737e-05,
824
- "loss": 0.4039,
825
  "step": 1360
826
  },
827
  {
828
  "epoch": 9.45,
829
  "learning_rate": 8.052631578947368e-05,
830
- "loss": 0.4663,
831
  "step": 1370
832
  },
833
  {
834
  "epoch": 9.52,
835
  "learning_rate": 8e-05,
836
- "loss": 0.6259,
837
  "step": 1380
838
  },
839
  {
840
  "epoch": 9.59,
841
  "learning_rate": 7.947368421052632e-05,
842
- "loss": 0.4401,
843
  "step": 1390
844
  },
845
  {
846
  "epoch": 9.66,
847
  "learning_rate": 7.894736842105263e-05,
848
- "loss": 0.4078,
849
  "step": 1400
850
  },
851
  {
852
  "epoch": 9.72,
853
  "learning_rate": 7.842105263157895e-05,
854
- "loss": 0.4573,
855
  "step": 1410
856
  },
857
  {
858
  "epoch": 9.79,
859
  "learning_rate": 7.789473684210526e-05,
860
- "loss": 0.6243,
861
  "step": 1420
862
  },
863
  {
864
  "epoch": 9.86,
865
  "learning_rate": 7.736842105263159e-05,
866
- "loss": 0.4176,
867
  "step": 1430
868
  },
869
  {
870
  "epoch": 9.93,
871
  "learning_rate": 7.68421052631579e-05,
872
- "loss": 0.4041,
873
  "step": 1440
874
  },
875
  {
876
  "epoch": 10.0,
877
  "learning_rate": 7.631578947368422e-05,
878
- "loss": 0.6057,
879
  "step": 1450
880
  },
881
  {
882
  "epoch": 10.0,
883
- "eval_loss": 0.6450413465499878,
884
- "eval_runtime": 9.7845,
885
- "eval_samples_per_second": 171.701,
886
- "eval_steps_per_second": 5.417,
887
- "eval_wer": 0.6166356557094618,
888
  "step": 1450
889
  },
890
  {
891
  "epoch": 10.07,
892
  "learning_rate": 7.578947368421054e-05,
893
- "loss": 0.4214,
894
  "step": 1460
895
  },
896
  {
897
  "epoch": 10.14,
898
  "learning_rate": 7.526315789473685e-05,
899
- "loss": 0.3491,
900
  "step": 1470
901
  },
902
  {
903
  "epoch": 10.21,
904
  "learning_rate": 7.473684210526316e-05,
905
- "loss": 0.3768,
906
  "step": 1480
907
  },
908
  {
909
  "epoch": 10.28,
910
  "learning_rate": 7.421052631578948e-05,
911
- "loss": 0.5278,
912
  "step": 1490
913
  },
914
  {
915
  "epoch": 10.34,
916
  "learning_rate": 7.368421052631579e-05,
917
- "loss": 0.3585,
918
  "step": 1500
919
  },
920
  {
921
  "epoch": 10.41,
922
  "learning_rate": 7.315789473684212e-05,
923
- "loss": 0.3362,
924
  "step": 1510
925
  },
926
  {
927
  "epoch": 10.48,
928
  "learning_rate": 7.263157894736843e-05,
929
- "loss": 0.4173,
930
  "step": 1520
931
  },
932
  {
933
  "epoch": 10.55,
934
  "learning_rate": 7.210526315789474e-05,
935
- "loss": 0.519,
936
  "step": 1530
937
  },
938
  {
939
  "epoch": 10.62,
940
  "learning_rate": 7.157894736842105e-05,
941
- "loss": 0.3318,
942
  "step": 1540
943
  },
944
  {
945
  "epoch": 10.69,
946
  "learning_rate": 7.105263157894737e-05,
947
- "loss": 0.3875,
948
  "step": 1550
949
  },
950
  {
951
  "epoch": 10.76,
952
  "learning_rate": 7.052631578947368e-05,
953
- "loss": 0.5483,
954
  "step": 1560
955
  },
956
  {
957
  "epoch": 10.83,
958
  "learning_rate": 7e-05,
959
- "loss": 0.3939,
960
  "step": 1570
961
  },
962
  {
963
  "epoch": 10.9,
964
  "learning_rate": 6.947368421052632e-05,
965
- "loss": 0.337,
966
  "step": 1580
967
  },
968
  {
969
  "epoch": 10.97,
970
  "learning_rate": 6.894736842105263e-05,
971
- "loss": 0.3964,
972
  "step": 1590
973
  },
974
  {
975
  "epoch": 11.03,
976
  "learning_rate": 6.842105263157895e-05,
977
- "loss": 0.5173,
978
  "step": 1600
979
  },
980
  {
981
  "epoch": 11.1,
982
  "learning_rate": 6.789473684210527e-05,
983
- "loss": 0.2987,
984
  "step": 1610
985
  },
986
  {
987
  "epoch": 11.17,
988
  "learning_rate": 6.736842105263159e-05,
989
- "loss": 0.3273,
990
  "step": 1620
991
  },
992
  {
993
  "epoch": 11.24,
994
  "learning_rate": 6.68421052631579e-05,
995
- "loss": 0.4159,
996
  "step": 1630
997
  },
998
  {
999
  "epoch": 11.31,
1000
  "learning_rate": 6.631578947368421e-05,
1001
- "loss": 0.3859,
1002
  "step": 1640
1003
  },
1004
  {
1005
  "epoch": 11.38,
1006
  "learning_rate": 6.578947368421054e-05,
1007
- "loss": 0.2825,
1008
  "step": 1650
1009
  },
1010
  {
1011
  "epoch": 11.45,
1012
  "learning_rate": 6.526315789473685e-05,
1013
- "loss": 0.3041,
1014
  "step": 1660
1015
  },
1016
  {
1017
  "epoch": 11.52,
1018
  "learning_rate": 6.473684210526316e-05,
1019
- "loss": 0.4922,
1020
  "step": 1670
1021
  },
1022
  {
1023
  "epoch": 11.59,
1024
  "learning_rate": 6.421052631578948e-05,
1025
- "loss": 0.3212,
1026
  "step": 1680
1027
  },
1028
  {
1029
  "epoch": 11.66,
1030
  "learning_rate": 6.368421052631579e-05,
1031
- "loss": 0.2861,
1032
  "step": 1690
1033
  },
1034
  {
1035
  "epoch": 11.72,
1036
  "learning_rate": 6.31578947368421e-05,
1037
- "loss": 0.3634,
1038
  "step": 1700
1039
  },
1040
  {
1041
  "epoch": 11.79,
1042
  "learning_rate": 6.263157894736842e-05,
1043
- "loss": 0.458,
1044
  "step": 1710
1045
  },
1046
  {
1047
  "epoch": 11.86,
1048
  "learning_rate": 6.210526315789474e-05,
1049
- "loss": 0.2961,
1050
  "step": 1720
1051
  },
1052
  {
1053
  "epoch": 11.93,
1054
  "learning_rate": 6.157894736842106e-05,
1055
- "loss": 0.3275,
1056
  "step": 1730
1057
  },
1058
  {
1059
  "epoch": 12.0,
1060
  "learning_rate": 6.105263157894737e-05,
1061
- "loss": 0.4403,
1062
  "step": 1740
1063
  },
1064
  {
1065
  "epoch": 12.07,
1066
  "learning_rate": 6.052631578947369e-05,
1067
- "loss": 0.3202,
1068
  "step": 1750
1069
  },
1070
  {
1071
  "epoch": 12.14,
1072
  "learning_rate": 6e-05,
1073
- "loss": 0.2352,
1074
  "step": 1760
1075
  },
1076
  {
1077
  "epoch": 12.21,
1078
  "learning_rate": 5.9473684210526315e-05,
1079
- "loss": 0.2907,
1080
  "step": 1770
1081
  },
1082
  {
1083
  "epoch": 12.28,
1084
  "learning_rate": 5.894736842105263e-05,
1085
- "loss": 0.417,
1086
  "step": 1780
1087
  },
1088
  {
1089
  "epoch": 12.34,
1090
  "learning_rate": 5.8421052631578954e-05,
1091
- "loss": 0.2678,
1092
  "step": 1790
1093
  },
1094
  {
1095
  "epoch": 12.41,
1096
  "learning_rate": 5.789473684210527e-05,
1097
- "loss": 0.2741,
1098
  "step": 1800
1099
  },
1100
  {
1101
  "epoch": 12.48,
1102
  "learning_rate": 5.736842105263158e-05,
1103
- "loss": 0.3506,
1104
  "step": 1810
1105
  },
1106
  {
1107
  "epoch": 12.55,
1108
  "learning_rate": 5.68421052631579e-05,
1109
- "loss": 0.385,
1110
  "step": 1820
1111
  },
1112
  {
1113
  "epoch": 12.62,
1114
  "learning_rate": 5.631578947368421e-05,
1115
- "loss": 0.25,
1116
  "step": 1830
1117
  },
1118
  {
1119
  "epoch": 12.69,
1120
  "learning_rate": 5.5789473684210526e-05,
1121
- "loss": 0.285,
1122
  "step": 1840
1123
  },
1124
  {
1125
  "epoch": 12.76,
1126
  "learning_rate": 5.526315789473685e-05,
1127
- "loss": 0.4443,
1128
  "step": 1850
1129
  },
1130
  {
1131
  "epoch": 12.83,
1132
  "learning_rate": 5.4736842105263165e-05,
1133
- "loss": 0.2927,
1134
  "step": 1860
1135
  },
1136
  {
1137
  "epoch": 12.9,
1138
  "learning_rate": 5.421052631578948e-05,
1139
- "loss": 0.2531,
1140
  "step": 1870
1141
  },
1142
  {
1143
  "epoch": 12.97,
1144
  "learning_rate": 5.368421052631579e-05,
1145
- "loss": 0.3416,
1146
  "step": 1880
1147
  },
1148
  {
1149
  "epoch": 13.03,
1150
  "learning_rate": 5.3157894736842104e-05,
1151
- "loss": 0.3826,
1152
  "step": 1890
1153
  },
1154
  {
1155
  "epoch": 13.1,
1156
  "learning_rate": 5.2631578947368424e-05,
1157
- "loss": 0.231,
1158
  "step": 1900
1159
  },
1160
  {
1161
  "epoch": 13.17,
1162
  "learning_rate": 5.210526315789474e-05,
1163
- "loss": 0.254,
1164
  "step": 1910
1165
  },
1166
  {
1167
  "epoch": 13.24,
1168
  "learning_rate": 5.157894736842106e-05,
1169
- "loss": 0.3595,
1170
  "step": 1920
1171
  },
1172
  {
1173
  "epoch": 13.31,
1174
  "learning_rate": 5.1052631578947376e-05,
1175
- "loss": 0.312,
1176
  "step": 1930
1177
  },
1178
  {
1179
  "epoch": 13.38,
1180
  "learning_rate": 5.052631578947369e-05,
1181
- "loss": 0.2345,
1182
  "step": 1940
1183
  },
1184
  {
1185
  "epoch": 13.45,
1186
  "learning_rate": 5e-05,
1187
- "loss": 0.2783,
1188
  "step": 1950
1189
  },
1190
  {
1191
  "epoch": 13.52,
1192
  "learning_rate": 4.9473684210526315e-05,
1193
- "loss": 0.4077,
1194
  "step": 1960
1195
  },
1196
  {
1197
  "epoch": 13.59,
1198
  "learning_rate": 4.8947368421052635e-05,
1199
- "loss": 0.2558,
1200
  "step": 1970
1201
  },
1202
  {
1203
  "epoch": 13.66,
1204
  "learning_rate": 4.842105263157895e-05,
1205
- "loss": 0.227,
1206
  "step": 1980
1207
  },
1208
  {
1209
  "epoch": 13.72,
1210
  "learning_rate": 4.789473684210526e-05,
1211
- "loss": 0.3002,
1212
  "step": 1990
1213
  },
1214
  {
1215
  "epoch": 13.79,
1216
  "learning_rate": 4.736842105263158e-05,
1217
- "loss": 0.3746,
1218
  "step": 2000
1219
  },
1220
  {
1221
  "epoch": 13.86,
1222
  "learning_rate": 4.68421052631579e-05,
1223
- "loss": 0.2459,
1224
  "step": 2010
1225
  },
1226
  {
1227
  "epoch": 13.93,
1228
  "learning_rate": 4.6315789473684214e-05,
1229
- "loss": 0.2841,
1230
  "step": 2020
1231
  },
1232
  {
1233
  "epoch": 14.0,
1234
  "learning_rate": 4.5789473684210527e-05,
1235
- "loss": 0.3758,
1236
  "step": 2030
1237
  },
1238
  {
1239
  "epoch": 14.07,
1240
  "learning_rate": 4.5263157894736846e-05,
1241
- "loss": 0.2574,
1242
  "step": 2040
1243
  },
1244
  {
1245
  "epoch": 14.14,
1246
  "learning_rate": 4.473684210526316e-05,
1247
- "loss": 0.1991,
1248
  "step": 2050
1249
  },
1250
  {
1251
  "epoch": 14.21,
1252
  "learning_rate": 4.421052631578947e-05,
1253
- "loss": 0.2724,
1254
  "step": 2060
1255
  },
1256
  {
1257
  "epoch": 14.28,
1258
  "learning_rate": 4.368421052631579e-05,
1259
- "loss": 0.3524,
1260
  "step": 2070
1261
  },
1262
  {
1263
  "epoch": 14.34,
1264
  "learning_rate": 4.3157894736842105e-05,
1265
- "loss": 0.2233,
1266
  "step": 2080
1267
  },
1268
  {
1269
  "epoch": 14.41,
1270
  "learning_rate": 4.2631578947368425e-05,
1271
- "loss": 0.2486,
1272
  "step": 2090
1273
  },
1274
  {
1275
  "epoch": 14.48,
1276
  "learning_rate": 4.210526315789474e-05,
1277
- "loss": 0.3034,
1278
  "step": 2100
1279
  },
1280
  {
1281
  "epoch": 14.55,
1282
  "learning_rate": 4.157894736842106e-05,
1283
- "loss": 0.3108,
1284
  "step": 2110
1285
  },
1286
  {
1287
  "epoch": 14.62,
1288
  "learning_rate": 4.105263157894737e-05,
1289
- "loss": 0.2047,
1290
  "step": 2120
1291
  },
1292
  {
1293
  "epoch": 14.69,
1294
  "learning_rate": 4.0526315789473684e-05,
1295
- "loss": 0.2412,
1296
  "step": 2130
1297
  },
1298
  {
1299
  "epoch": 14.76,
1300
  "learning_rate": 4e-05,
1301
- "loss": 0.3715,
1302
  "step": 2140
1303
  },
1304
  {
1305
  "epoch": 14.83,
1306
  "learning_rate": 3.9473684210526316e-05,
1307
- "loss": 0.2471,
1308
  "step": 2150
1309
  },
1310
  {
1311
  "epoch": 14.9,
1312
  "learning_rate": 3.894736842105263e-05,
1313
- "loss": 0.2142,
1314
  "step": 2160
1315
  },
1316
  {
1317
  "epoch": 14.97,
1318
  "learning_rate": 3.842105263157895e-05,
1319
- "loss": 0.2709,
1320
  "step": 2170
1321
  },
1322
  {
1323
  "epoch": 15.03,
1324
  "learning_rate": 3.789473684210527e-05,
1325
- "loss": 0.3467,
1326
  "step": 2180
1327
  },
1328
  {
1329
  "epoch": 15.1,
1330
  "learning_rate": 3.736842105263158e-05,
1331
- "loss": 0.2154,
1332
  "step": 2190
1333
  },
1334
  {
1335
  "epoch": 15.17,
1336
  "learning_rate": 3.6842105263157895e-05,
1337
- "loss": 0.214,
1338
  "step": 2200
1339
  },
1340
  {
1341
  "epoch": 15.24,
1342
  "learning_rate": 3.6315789473684214e-05,
1343
- "loss": 0.3125,
1344
  "step": 2210
1345
  },
1346
  {
1347
  "epoch": 15.31,
1348
  "learning_rate": 3.578947368421053e-05,
1349
- "loss": 0.2642,
1350
  "step": 2220
1351
  },
1352
  {
1353
  "epoch": 15.38,
1354
  "learning_rate": 3.526315789473684e-05,
1355
- "loss": 0.1977,
1356
  "step": 2230
1357
  },
1358
  {
1359
  "epoch": 15.45,
1360
  "learning_rate": 3.473684210526316e-05,
1361
- "loss": 0.231,
1362
  "step": 2240
1363
  },
1364
  {
1365
  "epoch": 15.52,
1366
  "learning_rate": 3.421052631578947e-05,
1367
- "loss": 0.3571,
1368
  "step": 2250
1369
  },
1370
  {
1371
  "epoch": 15.59,
1372
  "learning_rate": 3.368421052631579e-05,
1373
- "loss": 0.2162,
1374
  "step": 2260
1375
  },
1376
  {
1377
  "epoch": 15.66,
1378
  "learning_rate": 3.3157894736842106e-05,
1379
- "loss": 0.2054,
1380
  "step": 2270
1381
  },
1382
  {
1383
  "epoch": 15.72,
1384
  "learning_rate": 3.2631578947368426e-05,
1385
- "loss": 0.2829,
1386
  "step": 2280
1387
  },
1388
  {
1389
  "epoch": 15.79,
1390
  "learning_rate": 3.210526315789474e-05,
1391
- "loss": 0.321,
1392
  "step": 2290
1393
  },
1394
  {
1395
  "epoch": 15.86,
1396
  "learning_rate": 3.157894736842105e-05,
1397
- "loss": 0.1882,
1398
  "step": 2300
1399
  },
1400
  {
1401
  "epoch": 15.93,
1402
  "learning_rate": 3.105263157894737e-05,
1403
- "loss": 0.2393,
1404
  "step": 2310
1405
  },
1406
  {
1407
  "epoch": 16.0,
1408
  "learning_rate": 3.0526315789473684e-05,
1409
- "loss": 0.3597,
1410
  "step": 2320
1411
  },
1412
  {
1413
  "epoch": 16.07,
1414
  "learning_rate": 3e-05,
1415
- "loss": 0.2243,
1416
  "step": 2330
1417
  },
1418
  {
1419
  "epoch": 16.14,
1420
  "learning_rate": 2.9473684210526314e-05,
1421
- "loss": 0.1971,
1422
  "step": 2340
1423
  },
1424
  {
1425
  "epoch": 16.21,
1426
  "learning_rate": 2.8947368421052634e-05,
1427
- "loss": 0.2363,
1428
  "step": 2350
1429
  },
1430
  {
1431
  "epoch": 16.28,
1432
  "learning_rate": 2.842105263157895e-05,
1433
- "loss": 0.3159,
1434
  "step": 2360
1435
  },
1436
  {
1437
  "epoch": 16.34,
1438
  "learning_rate": 2.7894736842105263e-05,
1439
- "loss": 0.1789,
1440
  "step": 2370
1441
  },
1442
  {
1443
  "epoch": 16.41,
1444
  "learning_rate": 2.7368421052631583e-05,
1445
- "loss": 0.2081,
1446
  "step": 2380
1447
  },
1448
  {
1449
  "epoch": 16.48,
1450
  "learning_rate": 2.6842105263157896e-05,
1451
- "loss": 0.2722,
1452
  "step": 2390
1453
  },
1454
  {
1455
  "epoch": 16.55,
1456
  "learning_rate": 2.6315789473684212e-05,
1457
- "loss": 0.2745,
1458
  "step": 2400
1459
  },
1460
  {
1461
  "epoch": 16.62,
1462
  "learning_rate": 2.578947368421053e-05,
1463
- "loss": 0.1838,
1464
  "step": 2410
1465
  },
1466
  {
1467
  "epoch": 16.69,
1468
  "learning_rate": 2.5263157894736845e-05,
1469
- "loss": 0.2324,
1470
  "step": 2420
1471
  },
1472
  {
1473
  "epoch": 16.76,
1474
  "learning_rate": 2.4736842105263158e-05,
1475
- "loss": 0.3483,
1476
  "step": 2430
1477
  },
1478
  {
1479
  "epoch": 16.83,
1480
  "learning_rate": 2.4210526315789474e-05,
1481
- "loss": 0.2155,
1482
  "step": 2440
1483
  },
1484
  {
1485
  "epoch": 16.9,
1486
  "learning_rate": 2.368421052631579e-05,
1487
- "loss": 0.2028,
1488
  "step": 2450
1489
  },
1490
  {
1491
  "epoch": 16.97,
1492
  "learning_rate": 2.3157894736842107e-05,
1493
- "loss": 0.2449,
1494
  "step": 2460
1495
  },
1496
  {
1497
  "epoch": 17.03,
1498
  "learning_rate": 2.2631578947368423e-05,
1499
- "loss": 0.3167,
1500
  "step": 2470
1501
  },
1502
  {
1503
  "epoch": 17.1,
1504
  "learning_rate": 2.2105263157894736e-05,
1505
- "loss": 0.1844,
1506
  "step": 2480
1507
  },
1508
  {
1509
  "epoch": 17.17,
1510
  "learning_rate": 2.1578947368421053e-05,
1511
- "loss": 0.1915,
1512
  "step": 2490
1513
  },
1514
  {
1515
  "epoch": 17.24,
1516
  "learning_rate": 2.105263157894737e-05,
1517
- "loss": 0.2815,
1518
  "step": 2500
1519
  },
1520
  {
1521
  "epoch": 17.31,
1522
  "learning_rate": 2.0526315789473685e-05,
1523
- "loss": 0.2585,
1524
  "step": 2510
1525
  },
1526
  {
1527
  "epoch": 17.38,
1528
  "learning_rate": 2e-05,
1529
- "loss": 0.1617,
1530
  "step": 2520
1531
  },
1532
  {
1533
  "epoch": 17.45,
1534
  "learning_rate": 1.9473684210526315e-05,
1535
- "loss": 0.2171,
1536
  "step": 2530
1537
  },
1538
  {
1539
  "epoch": 17.52,
1540
  "learning_rate": 1.8947368421052634e-05,
1541
- "loss": 0.3201,
1542
  "step": 2540
1543
  },
1544
  {
1545
  "epoch": 17.59,
1546
  "learning_rate": 1.8421052631578947e-05,
1547
- "loss": 0.1838,
1548
  "step": 2550
1549
  },
1550
  {
1551
  "epoch": 17.66,
1552
  "learning_rate": 1.7894736842105264e-05,
1553
- "loss": 0.1925,
1554
  "step": 2560
1555
  },
1556
  {
1557
  "epoch": 17.72,
1558
  "learning_rate": 1.736842105263158e-05,
1559
- "loss": 0.2416,
1560
  "step": 2570
1561
  },
1562
  {
1563
  "epoch": 17.79,
1564
  "learning_rate": 1.6842105263157896e-05,
1565
- "loss": 0.2668,
1566
  "step": 2580
1567
  },
1568
  {
1569
  "epoch": 17.86,
1570
  "learning_rate": 1.6315789473684213e-05,
1571
- "loss": 0.1689,
1572
  "step": 2590
1573
  },
1574
  {
1575
  "epoch": 17.93,
1576
  "learning_rate": 1.5789473684210526e-05,
1577
- "loss": 0.2112,
1578
  "step": 2600
1579
  },
1580
  {
1581
  "epoch": 18.0,
1582
  "learning_rate": 1.5263157894736842e-05,
1583
- "loss": 0.3257,
1584
  "step": 2610
1585
  },
1586
  {
1587
  "epoch": 18.07,
1588
  "learning_rate": 1.4736842105263157e-05,
1589
- "loss": 0.2023,
1590
  "step": 2620
1591
  },
1592
  {
1593
  "epoch": 18.14,
1594
  "learning_rate": 1.4210526315789475e-05,
1595
- "loss": 0.1681,
1596
  "step": 2630
1597
  },
1598
  {
1599
  "epoch": 18.21,
1600
  "learning_rate": 1.3684210526315791e-05,
1601
- "loss": 0.2162,
1602
  "step": 2640
1603
  },
1604
  {
1605
  "epoch": 18.28,
1606
  "learning_rate": 1.3157894736842106e-05,
1607
- "loss": 0.3089,
1608
  "step": 2650
1609
  },
1610
  {
1611
  "epoch": 18.34,
1612
  "learning_rate": 1.2631578947368422e-05,
1613
- "loss": 0.1866,
1614
  "step": 2660
1615
  },
1616
  {
1617
  "epoch": 18.41,
1618
  "learning_rate": 1.2105263157894737e-05,
1619
- "loss": 0.1948,
1620
  "step": 2670
1621
  },
1622
  {
1623
  "epoch": 18.48,
1624
  "learning_rate": 1.1578947368421053e-05,
1625
- "loss": 0.2388,
1626
  "step": 2680
1627
  },
1628
  {
1629
  "epoch": 18.55,
1630
  "learning_rate": 1.1052631578947368e-05,
1631
- "loss": 0.2446,
1632
  "step": 2690
1633
  },
1634
  {
1635
  "epoch": 18.62,
1636
  "learning_rate": 1.0526315789473684e-05,
1637
- "loss": 0.1834,
1638
  "step": 2700
1639
  },
1640
  {
1641
  "epoch": 18.69,
1642
  "learning_rate": 1e-05,
1643
- "loss": 0.2103,
1644
  "step": 2710
1645
  },
1646
  {
1647
  "epoch": 18.76,
1648
  "learning_rate": 9.473684210526317e-06,
1649
- "loss": 0.3084,
1650
  "step": 2720
1651
  },
1652
  {
1653
  "epoch": 18.83,
1654
  "learning_rate": 8.947368421052632e-06,
1655
- "loss": 0.1919,
1656
  "step": 2730
1657
  },
1658
  {
1659
  "epoch": 18.9,
1660
  "learning_rate": 8.421052631578948e-06,
1661
- "loss": 0.1744,
1662
  "step": 2740
1663
  },
1664
  {
1665
  "epoch": 18.97,
1666
  "learning_rate": 7.894736842105263e-06,
1667
- "loss": 0.2449,
1668
  "step": 2750
1669
  },
1670
  {
1671
  "epoch": 19.03,
1672
  "learning_rate": 7.3684210526315784e-06,
1673
- "loss": 0.3009,
1674
  "step": 2760
1675
  },
1676
  {
1677
  "epoch": 19.1,
1678
  "learning_rate": 6.842105263157896e-06,
1679
- "loss": 0.1799,
1680
  "step": 2770
1681
  },
1682
  {
1683
  "epoch": 19.17,
1684
  "learning_rate": 6.315789473684211e-06,
1685
- "loss": 0.2052,
1686
  "step": 2780
1687
  },
1688
  {
1689
  "epoch": 19.24,
1690
  "learning_rate": 5.789473684210527e-06,
1691
- "loss": 0.2713,
1692
  "step": 2790
1693
  },
1694
  {
1695
  "epoch": 19.31,
1696
  "learning_rate": 5.263157894736842e-06,
1697
- "loss": 0.2286,
1698
  "step": 2800
1699
  },
1700
  {
1701
  "epoch": 19.38,
1702
  "learning_rate": 4.736842105263159e-06,
1703
- "loss": 0.1707,
1704
  "step": 2810
1705
  },
1706
  {
1707
  "epoch": 19.45,
1708
  "learning_rate": 4.210526315789474e-06,
1709
- "loss": 0.1961,
1710
  "step": 2820
1711
  },
1712
  {
1713
  "epoch": 19.52,
1714
  "learning_rate": 3.6842105263157892e-06,
1715
- "loss": 0.2916,
1716
  "step": 2830
1717
  },
1718
  {
1719
  "epoch": 19.59,
1720
  "learning_rate": 3.1578947368421056e-06,
1721
- "loss": 0.1624,
1722
  "step": 2840
1723
  },
1724
  {
1725
  "epoch": 19.66,
1726
  "learning_rate": 2.631578947368421e-06,
1727
- "loss": 0.1789,
1728
  "step": 2850
1729
  },
1730
  {
1731
  "epoch": 19.72,
1732
  "learning_rate": 2.105263157894737e-06,
1733
- "loss": 0.2253,
1734
  "step": 2860
1735
  },
1736
  {
1737
  "epoch": 19.79,
1738
  "learning_rate": 1.5789473684210528e-06,
1739
- "loss": 0.2809,
1740
  "step": 2870
1741
  },
1742
  {
1743
  "epoch": 19.86,
1744
  "learning_rate": 1.0526315789473685e-06,
1745
- "loss": 0.1673,
1746
  "step": 2880
1747
  },
1748
  {
1749
  "epoch": 19.93,
1750
  "learning_rate": 5.263157894736843e-07,
1751
- "loss": 0.1979,
1752
  "step": 2890
1753
  },
1754
  {
1755
  "epoch": 20.0,
1756
  "learning_rate": 0.0,
1757
- "loss": 0.327,
1758
  "step": 2900
1759
  },
1760
  {
1761
  "epoch": 20.0,
1762
- "eval_loss": 0.6916695237159729,
1763
- "eval_runtime": 8.4317,
1764
- "eval_samples_per_second": 199.249,
1765
- "eval_steps_per_second": 6.286,
1766
- "eval_wer": 0.5904486251808972,
1767
  "step": 2900
1768
  },
1769
  {
1770
  "epoch": 20.0,
1771
  "step": 2900,
1772
  "total_flos": 2.6569362344615726e+18,
1773
- "train_loss": 1.2737908951167403,
1774
- "train_runtime": 394.8383,
1775
- "train_samples_per_second": 234.02,
1776
- "train_steps_per_second": 7.345
1777
  }
1778
  ],
1779
  "logging_steps": 10,
 
11
  {
12
  "epoch": 0.07,
13
  "learning_rate": 1.0000000000000002e-06,
14
+ "loss": 8.9224,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.14,
19
  "learning_rate": 2.0000000000000003e-06,
20
+ "loss": 8.5406,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.21,
25
  "learning_rate": 3e-06,
26
+ "loss": 7.9663,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.28,
31
  "learning_rate": 4.000000000000001e-06,
32
+ "loss": 8.388,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.34,
37
  "learning_rate": 5e-06,
38
+ "loss": 8.6117,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.41,
43
  "learning_rate": 6e-06,
44
+ "loss": 8.2034,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.48,
49
  "learning_rate": 7.000000000000001e-06,
50
+ "loss": 7.7261,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.55,
55
  "learning_rate": 8.000000000000001e-06,
56
+ "loss": 8.6426,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.62,
61
  "learning_rate": 9e-06,
62
+ "loss": 8.3122,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.69,
67
  "learning_rate": 1e-05,
68
+ "loss": 7.9465,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.76,
73
  "learning_rate": 1.1000000000000001e-05,
74
+ "loss": 7.9357,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 0.83,
79
  "learning_rate": 1.2e-05,
80
+ "loss": 8.333,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 0.9,
85
  "learning_rate": 1.3000000000000001e-05,
86
+ "loss": 8.0391,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 0.97,
91
  "learning_rate": 1.4000000000000001e-05,
92
+ "loss": 7.5063,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 1.03,
97
  "learning_rate": 1.5e-05,
98
+ "loss": 7.9733,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 1.1,
103
  "learning_rate": 1.6000000000000003e-05,
104
+ "loss": 7.8903,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 1.17,
109
  "learning_rate": 1.7000000000000003e-05,
110
+ "loss": 7.6222,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 1.24,
115
  "learning_rate": 1.8e-05,
116
+ "loss": 7.1372,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 1.31,
121
  "learning_rate": 1.9e-05,
122
+ "loss": 8.0093,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 1.38,
127
  "learning_rate": 2e-05,
128
+ "loss": 7.4925,
129
  "step": 200
130
  },
131
  {
132
  "epoch": 1.45,
133
  "learning_rate": 2.1e-05,
134
+ "loss": 7.0882,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 1.52,
139
  "learning_rate": 2.2000000000000003e-05,
140
+ "loss": 7.3101,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 1.59,
145
  "learning_rate": 2.3000000000000003e-05,
146
+ "loss": 7.3497,
147
  "step": 230
148
  },
149
  {
150
  "epoch": 1.66,
151
  "learning_rate": 2.4e-05,
152
+ "loss": 6.957,
153
  "step": 240
154
  },
155
  {
156
  "epoch": 1.72,
157
  "learning_rate": 2.5e-05,
158
+ "loss": 6.6912,
159
  "step": 250
160
  },
161
  {
162
  "epoch": 1.79,
163
  "learning_rate": 2.6000000000000002e-05,
164
+ "loss": 6.9221,
165
  "step": 260
166
  },
167
  {
168
  "epoch": 1.86,
169
  "learning_rate": 2.7000000000000002e-05,
170
+ "loss": 6.7065,
171
  "step": 270
172
  },
173
  {
174
  "epoch": 1.93,
175
  "learning_rate": 2.8000000000000003e-05,
176
+ "loss": 6.3174,
177
  "step": 280
178
  },
179
  {
180
  "epoch": 2.0,
181
  "learning_rate": 2.9e-05,
182
+ "loss": 6.1904,
183
  "step": 290
184
  },
185
  {
186
  "epoch": 2.07,
187
  "learning_rate": 3e-05,
188
+ "loss": 6.4006,
189
  "step": 300
190
  },
191
  {
192
  "epoch": 2.14,
193
  "learning_rate": 3.1e-05,
194
+ "loss": 6.0932,
195
  "step": 310
196
  },
197
  {
198
  "epoch": 2.21,
199
  "learning_rate": 3.2000000000000005e-05,
200
+ "loss": 5.717,
201
  "step": 320
202
  },
203
  {
204
  "epoch": 2.28,
205
  "learning_rate": 3.3e-05,
206
+ "loss": 5.8356,
207
  "step": 330
208
  },
209
  {
210
  "epoch": 2.34,
211
  "learning_rate": 3.4000000000000007e-05,
212
+ "loss": 5.6073,
213
  "step": 340
214
  },
215
  {
216
  "epoch": 2.41,
217
  "learning_rate": 3.5e-05,
218
+ "loss": 5.3597,
219
  "step": 350
220
  },
221
  {
222
  "epoch": 2.48,
223
  "learning_rate": 3.6e-05,
224
+ "loss": 5.0843,
225
  "step": 360
226
  },
227
  {
228
  "epoch": 2.55,
229
  "learning_rate": 3.7e-05,
230
+ "loss": 5.3473,
231
  "step": 370
232
  },
233
  {
234
  "epoch": 2.62,
235
  "learning_rate": 3.8e-05,
236
+ "loss": 5.0528,
237
  "step": 380
238
  },
239
  {
240
  "epoch": 2.69,
241
  "learning_rate": 3.9000000000000006e-05,
242
+ "loss": 4.8183,
243
  "step": 390
244
  },
245
  {
246
  "epoch": 2.76,
247
  "learning_rate": 4e-05,
248
+ "loss": 4.7668,
249
  "step": 400
250
  },
251
  {
252
  "epoch": 2.83,
253
  "learning_rate": 4.1e-05,
254
+ "loss": 4.6995,
255
  "step": 410
256
  },
257
  {
258
  "epoch": 2.9,
259
  "learning_rate": 4.2e-05,
260
+ "loss": 4.5144,
261
  "step": 420
262
  },
263
  {
264
  "epoch": 2.97,
265
  "learning_rate": 4.3e-05,
266
+ "loss": 4.3133,
267
  "step": 430
268
  },
269
  {
270
  "epoch": 3.03,
271
  "learning_rate": 4.4000000000000006e-05,
272
+ "loss": 4.3413,
273
  "step": 440
274
  },
275
  {
276
  "epoch": 3.1,
277
  "learning_rate": 4.5e-05,
278
+ "loss": 4.1984,
279
  "step": 450
280
  },
281
  {
282
  "epoch": 3.17,
283
  "learning_rate": 4.600000000000001e-05,
284
+ "loss": 4.0813,
285
  "step": 460
286
  },
287
  {
288
  "epoch": 3.24,
289
  "learning_rate": 4.7e-05,
290
+ "loss": 4.0248,
291
  "step": 470
292
  },
293
  {
294
  "epoch": 3.31,
295
  "learning_rate": 4.8e-05,
296
+ "loss": 4.0405,
297
  "step": 480
298
  },
299
  {
300
  "epoch": 3.38,
301
  "learning_rate": 4.9e-05,
302
+ "loss": 3.9165,
303
  "step": 490
304
  },
305
  {
306
  "epoch": 3.45,
307
  "learning_rate": 5e-05,
308
+ "loss": 3.8314,
309
  "step": 500
310
  },
311
  {
312
  "epoch": 3.52,
313
  "learning_rate": 5.1000000000000006e-05,
314
+ "loss": 3.8916,
315
  "step": 510
316
  },
317
  {
318
  "epoch": 3.59,
319
  "learning_rate": 5.2000000000000004e-05,
320
+ "loss": 3.8167,
321
  "step": 520
322
  },
323
  {
324
  "epoch": 3.66,
325
  "learning_rate": 5.300000000000001e-05,
326
+ "loss": 3.7126,
327
  "step": 530
328
  },
329
  {
330
  "epoch": 3.72,
331
  "learning_rate": 5.4000000000000005e-05,
332
+ "loss": 3.6607,
333
  "step": 540
334
  },
335
  {
336
  "epoch": 3.79,
337
  "learning_rate": 5.500000000000001e-05,
338
+ "loss": 3.7683,
339
  "step": 550
340
  },
341
  {
342
  "epoch": 3.86,
343
  "learning_rate": 5.6000000000000006e-05,
344
+ "loss": 3.6667,
345
  "step": 560
346
  },
347
  {
348
  "epoch": 3.93,
349
  "learning_rate": 5.6999999999999996e-05,
350
+ "loss": 3.6181,
351
  "step": 570
352
  },
353
  {
354
  "epoch": 4.0,
355
  "learning_rate": 5.8e-05,
356
+ "loss": 3.6345,
357
  "step": 580
358
  },
359
  {
360
  "epoch": 4.07,
361
  "learning_rate": 5.9e-05,
362
+ "loss": 3.6936,
363
  "step": 590
364
  },
365
  {
366
  "epoch": 4.14,
367
  "learning_rate": 6e-05,
368
+ "loss": 3.5988,
369
  "step": 600
370
  },
371
  {
372
  "epoch": 4.21,
373
  "learning_rate": 6.1e-05,
374
+ "loss": 3.5657,
375
  "step": 610
376
  },
377
  {
378
  "epoch": 4.28,
379
  "learning_rate": 6.2e-05,
380
+ "loss": 3.6574,
381
  "step": 620
382
  },
383
  {
384
  "epoch": 4.34,
385
  "learning_rate": 6.3e-05,
386
+ "loss": 3.602,
387
  "step": 630
388
  },
389
  {
390
  "epoch": 4.41,
391
  "learning_rate": 6.400000000000001e-05,
392
+ "loss": 3.5477,
393
  "step": 640
394
  },
395
  {
396
  "epoch": 4.48,
397
  "learning_rate": 6.500000000000001e-05,
398
+ "loss": 3.5357,
399
  "step": 650
400
  },
401
  {
402
  "epoch": 4.55,
403
  "learning_rate": 6.6e-05,
404
+ "loss": 3.6488,
405
  "step": 660
406
  },
407
  {
408
  "epoch": 4.62,
409
  "learning_rate": 6.7e-05,
410
+ "loss": 3.5509,
411
  "step": 670
412
  },
413
  {
414
  "epoch": 4.69,
415
  "learning_rate": 6.800000000000001e-05,
416
+ "loss": 3.5384,
417
  "step": 680
418
  },
419
  {
420
  "epoch": 4.76,
421
  "learning_rate": 6.9e-05,
422
+ "loss": 3.5553,
423
  "step": 690
424
  },
425
  {
426
  "epoch": 4.83,
427
  "learning_rate": 7e-05,
428
+ "loss": 3.5934,
429
  "step": 700
430
  },
431
  {
432
  "epoch": 4.9,
433
  "learning_rate": 7.1e-05,
434
+ "loss": 3.5065,
435
  "step": 710
436
  },
437
  {
438
  "epoch": 4.97,
439
  "learning_rate": 7.2e-05,
440
+ "loss": 3.473,
441
  "step": 720
442
  },
443
  {
444
  "epoch": 5.03,
445
  "learning_rate": 7.3e-05,
446
+ "loss": 3.5895,
447
  "step": 730
448
  },
449
  {
450
  "epoch": 5.1,
451
  "learning_rate": 7.4e-05,
452
+ "loss": 3.5227,
453
  "step": 740
454
  },
455
  {
456
  "epoch": 5.17,
457
  "learning_rate": 7.500000000000001e-05,
458
+ "loss": 3.5018,
459
  "step": 750
460
  },
461
  {
462
  "epoch": 5.24,
463
  "learning_rate": 7.6e-05,
464
+ "loss": 3.4822,
465
  "step": 760
466
  },
467
  {
468
  "epoch": 5.31,
469
  "learning_rate": 7.7e-05,
470
+ "loss": 3.5872,
471
  "step": 770
472
  },
473
  {
474
  "epoch": 5.38,
475
  "learning_rate": 7.800000000000001e-05,
476
+ "loss": 3.482,
477
  "step": 780
478
  },
479
  {
480
  "epoch": 5.45,
481
  "learning_rate": 7.900000000000001e-05,
482
+ "loss": 3.4622,
483
  "step": 790
484
  },
485
  {
486
  "epoch": 5.52,
487
  "learning_rate": 8e-05,
488
+ "loss": 3.4979,
489
  "step": 800
490
  },
491
  {
492
  "epoch": 5.59,
493
  "learning_rate": 8.1e-05,
494
+ "loss": 3.5079,
495
  "step": 810
496
  },
497
  {
498
  "epoch": 5.66,
499
  "learning_rate": 8.2e-05,
500
+ "loss": 3.4341,
501
  "step": 820
502
  },
503
  {
504
  "epoch": 5.72,
505
  "learning_rate": 8.3e-05,
506
+ "loss": 3.4151,
507
  "step": 830
508
  },
509
  {
510
  "epoch": 5.79,
511
  "learning_rate": 8.4e-05,
512
+ "loss": 3.5327,
513
  "step": 840
514
  },
515
  {
516
  "epoch": 5.86,
517
  "learning_rate": 8.5e-05,
518
+ "loss": 3.4457,
519
  "step": 850
520
  },
521
  {
522
  "epoch": 5.93,
523
  "learning_rate": 8.6e-05,
524
+ "loss": 3.4219,
525
  "step": 860
526
  },
527
  {
528
  "epoch": 6.0,
529
  "learning_rate": 8.7e-05,
530
+ "loss": 3.434,
531
  "step": 870
532
  },
533
  {
534
  "epoch": 6.07,
535
  "learning_rate": 8.800000000000001e-05,
536
+ "loss": 3.5263,
537
  "step": 880
538
  },
539
  {
540
  "epoch": 6.14,
541
  "learning_rate": 8.900000000000001e-05,
542
+ "loss": 3.4327,
543
  "step": 890
544
  },
545
  {
546
  "epoch": 6.21,
547
  "learning_rate": 9e-05,
548
+ "loss": 3.3951,
549
  "step": 900
550
  },
551
  {
552
  "epoch": 6.28,
553
  "learning_rate": 9.1e-05,
554
+ "loss": 3.4945,
555
  "step": 910
556
  },
557
  {
558
  "epoch": 6.34,
559
  "learning_rate": 9.200000000000001e-05,
560
+ "loss": 3.4294,
561
  "step": 920
562
  },
563
  {
564
  "epoch": 6.41,
565
  "learning_rate": 9.300000000000001e-05,
566
+ "loss": 3.3794,
567
  "step": 930
568
  },
569
  {
570
  "epoch": 6.48,
571
  "learning_rate": 9.4e-05,
572
+ "loss": 3.3755,
573
  "step": 940
574
  },
575
  {
576
  "epoch": 6.55,
577
  "learning_rate": 9.5e-05,
578
+ "loss": 3.4776,
579
  "step": 950
580
  },
581
  {
582
  "epoch": 6.62,
583
  "learning_rate": 9.6e-05,
584
+ "loss": 3.4096,
585
  "step": 960
586
  },
587
  {
588
  "epoch": 6.69,
589
  "learning_rate": 9.7e-05,
590
+ "loss": 3.366,
591
  "step": 970
592
  },
593
  {
594
  "epoch": 6.76,
595
  "learning_rate": 9.8e-05,
596
+ "loss": 3.3924,
597
  "step": 980
598
  },
599
  {
600
  "epoch": 6.83,
601
  "learning_rate": 9.900000000000001e-05,
602
+ "loss": 3.4268,
603
  "step": 990
604
  },
605
  {
606
  "epoch": 6.9,
607
  "learning_rate": 0.0001,
608
+ "loss": 3.3572,
609
  "step": 1000
610
  },
611
  {
612
  "epoch": 6.97,
613
  "learning_rate": 9.947368421052632e-05,
614
+ "loss": 3.3574,
615
  "step": 1010
616
  },
617
  {
618
  "epoch": 7.03,
619
  "learning_rate": 9.894736842105263e-05,
620
+ "loss": 3.4417,
621
  "step": 1020
622
  },
623
  {
624
  "epoch": 7.1,
625
  "learning_rate": 9.842105263157894e-05,
626
+ "loss": 3.3858,
627
  "step": 1030
628
  },
629
  {
630
  "epoch": 7.17,
631
  "learning_rate": 9.789473684210527e-05,
632
+ "loss": 3.3541,
633
  "step": 1040
634
  },
635
  {
636
  "epoch": 7.24,
637
  "learning_rate": 9.736842105263158e-05,
638
+ "loss": 3.334,
639
  "step": 1050
640
  },
641
  {
642
  "epoch": 7.31,
643
  "learning_rate": 9.68421052631579e-05,
644
+ "loss": 3.4488,
645
  "step": 1060
646
  },
647
  {
648
  "epoch": 7.38,
649
  "learning_rate": 9.631578947368421e-05,
650
+ "loss": 3.3454,
651
  "step": 1070
652
  },
653
  {
654
  "epoch": 7.45,
655
  "learning_rate": 9.578947368421052e-05,
656
+ "loss": 3.3057,
657
  "step": 1080
658
  },
659
  {
660
  "epoch": 7.52,
661
  "learning_rate": 9.526315789473685e-05,
662
+ "loss": 3.3948,
663
  "step": 1090
664
  },
665
  {
666
  "epoch": 7.59,
667
  "learning_rate": 9.473684210526316e-05,
668
+ "loss": 3.3869,
669
  "step": 1100
670
  },
671
  {
672
  "epoch": 7.66,
673
  "learning_rate": 9.421052631578949e-05,
674
+ "loss": 3.3038,
675
  "step": 1110
676
  },
677
  {
678
  "epoch": 7.72,
679
  "learning_rate": 9.36842105263158e-05,
680
+ "loss": 3.3054,
681
  "step": 1120
682
  },
683
  {
684
  "epoch": 7.79,
685
  "learning_rate": 9.315789473684211e-05,
686
+ "loss": 3.4006,
687
  "step": 1130
688
  },
689
  {
690
  "epoch": 7.86,
691
  "learning_rate": 9.263157894736843e-05,
692
+ "loss": 3.3313,
693
  "step": 1140
694
  },
695
  {
696
  "epoch": 7.93,
697
  "learning_rate": 9.210526315789474e-05,
698
+ "loss": 3.2982,
699
  "step": 1150
700
  },
701
  {
702
  "epoch": 8.0,
703
  "learning_rate": 9.157894736842105e-05,
704
+ "loss": 3.3071,
705
  "step": 1160
706
  },
707
  {
708
  "epoch": 8.07,
709
  "learning_rate": 9.105263157894738e-05,
710
+ "loss": 3.3822,
711
  "step": 1170
712
  },
713
  {
714
  "epoch": 8.14,
715
  "learning_rate": 9.052631578947369e-05,
716
+ "loss": 3.3196,
717
  "step": 1180
718
  },
719
  {
720
  "epoch": 8.21,
721
  "learning_rate": 9e-05,
722
+ "loss": 3.2765,
723
  "step": 1190
724
  },
725
  {
726
  "epoch": 8.28,
727
  "learning_rate": 8.947368421052632e-05,
728
+ "loss": 3.3805,
729
  "step": 1200
730
  },
731
  {
732
  "epoch": 8.34,
733
  "learning_rate": 8.894736842105263e-05,
734
+ "loss": 3.3321,
735
  "step": 1210
736
  },
737
  {
738
  "epoch": 8.41,
739
  "learning_rate": 8.842105263157894e-05,
740
+ "loss": 3.2837,
741
  "step": 1220
742
  },
743
  {
744
  "epoch": 8.48,
745
  "learning_rate": 8.789473684210526e-05,
746
+ "loss": 3.2773,
747
  "step": 1230
748
  },
749
  {
750
  "epoch": 8.55,
751
  "learning_rate": 8.736842105263158e-05,
752
+ "loss": 3.3566,
753
  "step": 1240
754
  },
755
  {
756
  "epoch": 8.62,
757
  "learning_rate": 8.68421052631579e-05,
758
+ "loss": 3.2951,
759
  "step": 1250
760
  },
761
  {
762
  "epoch": 8.69,
763
  "learning_rate": 8.631578947368421e-05,
764
+ "loss": 3.2632,
765
  "step": 1260
766
  },
767
  {
768
  "epoch": 8.76,
769
  "learning_rate": 8.578947368421054e-05,
770
+ "loss": 3.3313,
771
  "step": 1270
772
  },
773
  {
774
  "epoch": 8.83,
775
  "learning_rate": 8.526315789473685e-05,
776
+ "loss": 3.3523,
777
  "step": 1280
778
  },
779
  {
780
  "epoch": 8.9,
781
  "learning_rate": 8.473684210526316e-05,
782
+ "loss": 3.2767,
783
  "step": 1290
784
  },
785
  {
786
  "epoch": 8.97,
787
  "learning_rate": 8.421052631578948e-05,
788
+ "loss": 3.2448,
789
  "step": 1300
790
  },
791
  {
792
  "epoch": 9.03,
793
  "learning_rate": 8.36842105263158e-05,
794
+ "loss": 3.3419,
795
  "step": 1310
796
  },
797
  {
798
  "epoch": 9.1,
799
  "learning_rate": 8.315789473684212e-05,
800
+ "loss": 3.2813,
801
  "step": 1320
802
  },
803
  {
804
  "epoch": 9.17,
805
  "learning_rate": 8.263157894736843e-05,
806
+ "loss": 3.2558,
807
  "step": 1330
808
  },
809
  {
810
  "epoch": 9.24,
811
  "learning_rate": 8.210526315789474e-05,
812
+ "loss": 3.2502,
813
  "step": 1340
814
  },
815
  {
816
  "epoch": 9.31,
817
  "learning_rate": 8.157894736842105e-05,
818
+ "loss": 3.3594,
819
  "step": 1350
820
  },
821
  {
822
  "epoch": 9.38,
823
  "learning_rate": 8.105263157894737e-05,
824
+ "loss": 3.2739,
825
  "step": 1360
826
  },
827
  {
828
  "epoch": 9.45,
829
  "learning_rate": 8.052631578947368e-05,
830
+ "loss": 3.2404,
831
  "step": 1370
832
  },
833
  {
834
  "epoch": 9.52,
835
  "learning_rate": 8e-05,
836
+ "loss": 3.3528,
837
  "step": 1380
838
  },
839
  {
840
  "epoch": 9.59,
841
  "learning_rate": 7.947368421052632e-05,
842
+ "loss": 3.3139,
843
  "step": 1390
844
  },
845
  {
846
  "epoch": 9.66,
847
  "learning_rate": 7.894736842105263e-05,
848
+ "loss": 3.2333,
849
  "step": 1400
850
  },
851
  {
852
  "epoch": 9.72,
853
  "learning_rate": 7.842105263157895e-05,
854
+ "loss": 3.225,
855
  "step": 1410
856
  },
857
  {
858
  "epoch": 9.79,
859
  "learning_rate": 7.789473684210526e-05,
860
+ "loss": 3.3473,
861
  "step": 1420
862
  },
863
  {
864
  "epoch": 9.86,
865
  "learning_rate": 7.736842105263159e-05,
866
+ "loss": 3.2565,
867
  "step": 1430
868
  },
869
  {
870
  "epoch": 9.93,
871
  "learning_rate": 7.68421052631579e-05,
872
+ "loss": 3.2372,
873
  "step": 1440
874
  },
875
  {
876
  "epoch": 10.0,
877
  "learning_rate": 7.631578947368422e-05,
878
+ "loss": 3.2725,
879
  "step": 1450
880
  },
881
  {
882
  "epoch": 10.0,
883
+ "eval_loss": 3.4699134826660156,
884
+ "eval_runtime": 8.1516,
885
+ "eval_samples_per_second": 206.093,
886
+ "eval_steps_per_second": 6.502,
887
+ "eval_wer": 1.0005513059058646,
888
  "step": 1450
889
  },
890
  {
891
  "epoch": 10.07,
892
  "learning_rate": 7.578947368421054e-05,
893
+ "loss": 3.3204,
894
  "step": 1460
895
  },
896
  {
897
  "epoch": 10.14,
898
  "learning_rate": 7.526315789473685e-05,
899
+ "loss": 3.2301,
900
  "step": 1470
901
  },
902
  {
903
  "epoch": 10.21,
904
  "learning_rate": 7.473684210526316e-05,
905
+ "loss": 3.2381,
906
  "step": 1480
907
  },
908
  {
909
  "epoch": 10.28,
910
  "learning_rate": 7.421052631578948e-05,
911
+ "loss": 3.3093,
912
  "step": 1490
913
  },
914
  {
915
  "epoch": 10.34,
916
  "learning_rate": 7.368421052631579e-05,
917
+ "loss": 3.2747,
918
  "step": 1500
919
  },
920
  {
921
  "epoch": 10.41,
922
  "learning_rate": 7.315789473684212e-05,
923
+ "loss": 3.2268,
924
  "step": 1510
925
  },
926
  {
927
  "epoch": 10.48,
928
  "learning_rate": 7.263157894736843e-05,
929
+ "loss": 3.2105,
930
  "step": 1520
931
  },
932
  {
933
  "epoch": 10.55,
934
  "learning_rate": 7.210526315789474e-05,
935
+ "loss": 3.3174,
936
  "step": 1530
937
  },
938
  {
939
  "epoch": 10.62,
940
  "learning_rate": 7.157894736842105e-05,
941
+ "loss": 3.2254,
942
  "step": 1540
943
  },
944
  {
945
  "epoch": 10.69,
946
  "learning_rate": 7.105263157894737e-05,
947
+ "loss": 3.2547,
948
  "step": 1550
949
  },
950
  {
951
  "epoch": 10.76,
952
  "learning_rate": 7.052631578947368e-05,
953
+ "loss": 3.2704,
954
  "step": 1560
955
  },
956
  {
957
  "epoch": 10.83,
958
  "learning_rate": 7e-05,
959
+ "loss": 3.3047,
960
  "step": 1570
961
  },
962
  {
963
  "epoch": 10.9,
964
  "learning_rate": 6.947368421052632e-05,
965
+ "loss": 3.2407,
966
  "step": 1580
967
  },
968
  {
969
  "epoch": 10.97,
970
  "learning_rate": 6.894736842105263e-05,
971
+ "loss": 3.1856,
972
  "step": 1590
973
  },
974
  {
975
  "epoch": 11.03,
976
  "learning_rate": 6.842105263157895e-05,
977
+ "loss": 3.3326,
978
  "step": 1600
979
  },
980
  {
981
  "epoch": 11.1,
982
  "learning_rate": 6.789473684210527e-05,
983
+ "loss": 3.2289,
984
  "step": 1610
985
  },
986
  {
987
  "epoch": 11.17,
988
  "learning_rate": 6.736842105263159e-05,
989
+ "loss": 3.229,
990
  "step": 1620
991
  },
992
  {
993
  "epoch": 11.24,
994
  "learning_rate": 6.68421052631579e-05,
995
+ "loss": 3.2157,
996
  "step": 1630
997
  },
998
  {
999
  "epoch": 11.31,
1000
  "learning_rate": 6.631578947368421e-05,
1001
+ "loss": 3.3323,
1002
  "step": 1640
1003
  },
1004
  {
1005
  "epoch": 11.38,
1006
  "learning_rate": 6.578947368421054e-05,
1007
+ "loss": 3.2229,
1008
  "step": 1650
1009
  },
1010
  {
1011
  "epoch": 11.45,
1012
  "learning_rate": 6.526315789473685e-05,
1013
+ "loss": 3.1919,
1014
  "step": 1660
1015
  },
1016
  {
1017
  "epoch": 11.52,
1018
  "learning_rate": 6.473684210526316e-05,
1019
+ "loss": 3.2794,
1020
  "step": 1670
1021
  },
1022
  {
1023
  "epoch": 11.59,
1024
  "learning_rate": 6.421052631578948e-05,
1025
+ "loss": 3.2455,
1026
  "step": 1680
1027
  },
1028
  {
1029
  "epoch": 11.66,
1030
  "learning_rate": 6.368421052631579e-05,
1031
+ "loss": 3.1852,
1032
  "step": 1690
1033
  },
1034
  {
1035
  "epoch": 11.72,
1036
  "learning_rate": 6.31578947368421e-05,
1037
+ "loss": 3.1948,
1038
  "step": 1700
1039
  },
1040
  {
1041
  "epoch": 11.79,
1042
  "learning_rate": 6.263157894736842e-05,
1043
+ "loss": 3.3379,
1044
  "step": 1710
1045
  },
1046
  {
1047
  "epoch": 11.86,
1048
  "learning_rate": 6.210526315789474e-05,
1049
+ "loss": 3.2203,
1050
  "step": 1720
1051
  },
1052
  {
1053
  "epoch": 11.93,
1054
  "learning_rate": 6.157894736842106e-05,
1055
+ "loss": 3.2083,
1056
  "step": 1730
1057
  },
1058
  {
1059
  "epoch": 12.0,
1060
  "learning_rate": 6.105263157894737e-05,
1061
+ "loss": 3.2124,
1062
  "step": 1740
1063
  },
1064
  {
1065
  "epoch": 12.07,
1066
  "learning_rate": 6.052631578947369e-05,
1067
+ "loss": 3.2972,
1068
  "step": 1750
1069
  },
1070
  {
1071
  "epoch": 12.14,
1072
  "learning_rate": 6e-05,
1073
+ "loss": 3.1723,
1074
  "step": 1760
1075
  },
1076
  {
1077
  "epoch": 12.21,
1078
  "learning_rate": 5.9473684210526315e-05,
1079
+ "loss": 3.1838,
1080
  "step": 1770
1081
  },
1082
  {
1083
  "epoch": 12.28,
1084
  "learning_rate": 5.894736842105263e-05,
1085
+ "loss": 3.2744,
1086
  "step": 1780
1087
  },
1088
  {
1089
  "epoch": 12.34,
1090
  "learning_rate": 5.8421052631578954e-05,
1091
+ "loss": 3.257,
1092
  "step": 1790
1093
  },
1094
  {
1095
  "epoch": 12.41,
1096
  "learning_rate": 5.789473684210527e-05,
1097
+ "loss": 3.2009,
1098
  "step": 1800
1099
  },
1100
  {
1101
  "epoch": 12.48,
1102
  "learning_rate": 5.736842105263158e-05,
1103
+ "loss": 3.1939,
1104
  "step": 1810
1105
  },
1106
  {
1107
  "epoch": 12.55,
1108
  "learning_rate": 5.68421052631579e-05,
1109
+ "loss": 3.3005,
1110
  "step": 1820
1111
  },
1112
  {
1113
  "epoch": 12.62,
1114
  "learning_rate": 5.631578947368421e-05,
1115
+ "loss": 3.1852,
1116
  "step": 1830
1117
  },
1118
  {
1119
  "epoch": 12.69,
1120
  "learning_rate": 5.5789473684210526e-05,
1121
+ "loss": 3.1796,
1122
  "step": 1840
1123
  },
1124
  {
1125
  "epoch": 12.76,
1126
  "learning_rate": 5.526315789473685e-05,
1127
+ "loss": 3.2465,
1128
  "step": 1850
1129
  },
1130
  {
1131
  "epoch": 12.83,
1132
  "learning_rate": 5.4736842105263165e-05,
1133
+ "loss": 3.2793,
1134
  "step": 1860
1135
  },
1136
  {
1137
  "epoch": 12.9,
1138
  "learning_rate": 5.421052631578948e-05,
1139
+ "loss": 3.179,
1140
  "step": 1870
1141
  },
1142
  {
1143
  "epoch": 12.97,
1144
  "learning_rate": 5.368421052631579e-05,
1145
+ "loss": 3.2026,
1146
  "step": 1880
1147
  },
1148
  {
1149
  "epoch": 13.03,
1150
  "learning_rate": 5.3157894736842104e-05,
1151
+ "loss": 3.2608,
1152
  "step": 1890
1153
  },
1154
  {
1155
  "epoch": 13.1,
1156
  "learning_rate": 5.2631578947368424e-05,
1157
+ "loss": 3.2105,
1158
  "step": 1900
1159
  },
1160
  {
1161
  "epoch": 13.17,
1162
  "learning_rate": 5.210526315789474e-05,
1163
+ "loss": 3.1839,
1164
  "step": 1910
1165
  },
1166
  {
1167
  "epoch": 13.24,
1168
  "learning_rate": 5.157894736842106e-05,
1169
+ "loss": 3.2157,
1170
  "step": 1920
1171
  },
1172
  {
1173
  "epoch": 13.31,
1174
  "learning_rate": 5.1052631578947376e-05,
1175
+ "loss": 3.2742,
1176
  "step": 1930
1177
  },
1178
  {
1179
  "epoch": 13.38,
1180
  "learning_rate": 5.052631578947369e-05,
1181
+ "loss": 3.1615,
1182
  "step": 1940
1183
  },
1184
  {
1185
  "epoch": 13.45,
1186
  "learning_rate": 5e-05,
1187
+ "loss": 3.1736,
1188
  "step": 1950
1189
  },
1190
  {
1191
  "epoch": 13.52,
1192
  "learning_rate": 4.9473684210526315e-05,
1193
+ "loss": 3.2578,
1194
  "step": 1960
1195
  },
1196
  {
1197
  "epoch": 13.59,
1198
  "learning_rate": 4.8947368421052635e-05,
1199
+ "loss": 3.2397,
1200
  "step": 1970
1201
  },
1202
  {
1203
  "epoch": 13.66,
1204
  "learning_rate": 4.842105263157895e-05,
1205
+ "loss": 3.1528,
1206
  "step": 1980
1207
  },
1208
  {
1209
  "epoch": 13.72,
1210
  "learning_rate": 4.789473684210526e-05,
1211
+ "loss": 3.1764,
1212
  "step": 1990
1213
  },
1214
  {
1215
  "epoch": 13.79,
1216
  "learning_rate": 4.736842105263158e-05,
1217
+ "loss": 3.2888,
1218
  "step": 2000
1219
  },
1220
  {
1221
  "epoch": 13.86,
1222
  "learning_rate": 4.68421052631579e-05,
1223
+ "loss": 3.1991,
1224
  "step": 2010
1225
  },
1226
  {
1227
  "epoch": 13.93,
1228
  "learning_rate": 4.6315789473684214e-05,
1229
+ "loss": 3.1734,
1230
  "step": 2020
1231
  },
1232
  {
1233
  "epoch": 14.0,
1234
  "learning_rate": 4.5789473684210527e-05,
1235
+ "loss": 3.193,
1236
  "step": 2030
1237
  },
1238
  {
1239
  "epoch": 14.07,
1240
  "learning_rate": 4.5263157894736846e-05,
1241
+ "loss": 3.2672,
1242
  "step": 2040
1243
  },
1244
  {
1245
  "epoch": 14.14,
1246
  "learning_rate": 4.473684210526316e-05,
1247
+ "loss": 3.1536,
1248
  "step": 2050
1249
  },
1250
  {
1251
  "epoch": 14.21,
1252
  "learning_rate": 4.421052631578947e-05,
1253
+ "loss": 3.1904,
1254
  "step": 2060
1255
  },
1256
  {
1257
  "epoch": 14.28,
1258
  "learning_rate": 4.368421052631579e-05,
1259
+ "loss": 3.2782,
1260
  "step": 2070
1261
  },
1262
  {
1263
  "epoch": 14.34,
1264
  "learning_rate": 4.3157894736842105e-05,
1265
+ "loss": 3.2108,
1266
  "step": 2080
1267
  },
1268
  {
1269
  "epoch": 14.41,
1270
  "learning_rate": 4.2631578947368425e-05,
1271
+ "loss": 3.1988,
1272
  "step": 2090
1273
  },
1274
  {
1275
  "epoch": 14.48,
1276
  "learning_rate": 4.210526315789474e-05,
1277
+ "loss": 3.1915,
1278
  "step": 2100
1279
  },
1280
  {
1281
  "epoch": 14.55,
1282
  "learning_rate": 4.157894736842106e-05,
1283
+ "loss": 3.2633,
1284
  "step": 2110
1285
  },
1286
  {
1287
  "epoch": 14.62,
1288
  "learning_rate": 4.105263157894737e-05,
1289
+ "loss": 3.1832,
1290
  "step": 2120
1291
  },
1292
  {
1293
  "epoch": 14.69,
1294
  "learning_rate": 4.0526315789473684e-05,
1295
+ "loss": 3.1445,
1296
  "step": 2130
1297
  },
1298
  {
1299
  "epoch": 14.76,
1300
  "learning_rate": 4e-05,
1301
+ "loss": 3.2,
1302
  "step": 2140
1303
  },
1304
  {
1305
  "epoch": 14.83,
1306
  "learning_rate": 3.9473684210526316e-05,
1307
+ "loss": 3.2517,
1308
  "step": 2150
1309
  },
1310
  {
1311
  "epoch": 14.9,
1312
  "learning_rate": 3.894736842105263e-05,
1313
+ "loss": 3.1483,
1314
  "step": 2160
1315
  },
1316
  {
1317
  "epoch": 14.97,
1318
  "learning_rate": 3.842105263157895e-05,
1319
+ "loss": 3.1521,
1320
  "step": 2170
1321
  },
1322
  {
1323
  "epoch": 15.03,
1324
  "learning_rate": 3.789473684210527e-05,
1325
+ "loss": 3.273,
1326
  "step": 2180
1327
  },
1328
  {
1329
  "epoch": 15.1,
1330
  "learning_rate": 3.736842105263158e-05,
1331
+ "loss": 3.2115,
1332
  "step": 2190
1333
  },
1334
  {
1335
  "epoch": 15.17,
1336
  "learning_rate": 3.6842105263157895e-05,
1337
+ "loss": 3.1547,
1338
  "step": 2200
1339
  },
1340
  {
1341
  "epoch": 15.24,
1342
  "learning_rate": 3.6315789473684214e-05,
1343
+ "loss": 3.1564,
1344
  "step": 2210
1345
  },
1346
  {
1347
  "epoch": 15.31,
1348
  "learning_rate": 3.578947368421053e-05,
1349
+ "loss": 3.2829,
1350
  "step": 2220
1351
  },
1352
  {
1353
  "epoch": 15.38,
1354
  "learning_rate": 3.526315789473684e-05,
1355
+ "loss": 3.1727,
1356
  "step": 2230
1357
  },
1358
  {
1359
  "epoch": 15.45,
1360
  "learning_rate": 3.473684210526316e-05,
1361
+ "loss": 3.1617,
1362
  "step": 2240
1363
  },
1364
  {
1365
  "epoch": 15.52,
1366
  "learning_rate": 3.421052631578947e-05,
1367
+ "loss": 3.2143,
1368
  "step": 2250
1369
  },
1370
  {
1371
  "epoch": 15.59,
1372
  "learning_rate": 3.368421052631579e-05,
1373
+ "loss": 3.2045,
1374
  "step": 2260
1375
  },
1376
  {
1377
  "epoch": 15.66,
1378
  "learning_rate": 3.3157894736842106e-05,
1379
+ "loss": 3.1499,
1380
  "step": 2270
1381
  },
1382
  {
1383
  "epoch": 15.72,
1384
  "learning_rate": 3.2631578947368426e-05,
1385
+ "loss": 3.1733,
1386
  "step": 2280
1387
  },
1388
  {
1389
  "epoch": 15.79,
1390
  "learning_rate": 3.210526315789474e-05,
1391
+ "loss": 3.2579,
1392
  "step": 2290
1393
  },
1394
  {
1395
  "epoch": 15.86,
1396
  "learning_rate": 3.157894736842105e-05,
1397
+ "loss": 3.1808,
1398
  "step": 2300
1399
  },
1400
  {
1401
  "epoch": 15.93,
1402
  "learning_rate": 3.105263157894737e-05,
1403
+ "loss": 3.1587,
1404
  "step": 2310
1405
  },
1406
  {
1407
  "epoch": 16.0,
1408
  "learning_rate": 3.0526315789473684e-05,
1409
+ "loss": 3.229,
1410
  "step": 2320
1411
  },
1412
  {
1413
  "epoch": 16.07,
1414
  "learning_rate": 3e-05,
1415
+ "loss": 3.2771,
1416
  "step": 2330
1417
  },
1418
  {
1419
  "epoch": 16.14,
1420
  "learning_rate": 2.9473684210526314e-05,
1421
+ "loss": 3.178,
1422
  "step": 2340
1423
  },
1424
  {
1425
  "epoch": 16.21,
1426
  "learning_rate": 2.8947368421052634e-05,
1427
+ "loss": 3.1357,
1428
  "step": 2350
1429
  },
1430
  {
1431
  "epoch": 16.28,
1432
  "learning_rate": 2.842105263157895e-05,
1433
+ "loss": 3.2502,
1434
  "step": 2360
1435
  },
1436
  {
1437
  "epoch": 16.34,
1438
  "learning_rate": 2.7894736842105263e-05,
1439
+ "loss": 3.1893,
1440
  "step": 2370
1441
  },
1442
  {
1443
  "epoch": 16.41,
1444
  "learning_rate": 2.7368421052631583e-05,
1445
+ "loss": 3.1526,
1446
  "step": 2380
1447
  },
1448
  {
1449
  "epoch": 16.48,
1450
  "learning_rate": 2.6842105263157896e-05,
1451
+ "loss": 3.1775,
1452
  "step": 2390
1453
  },
1454
  {
1455
  "epoch": 16.55,
1456
  "learning_rate": 2.6315789473684212e-05,
1457
+ "loss": 3.266,
1458
  "step": 2400
1459
  },
1460
  {
1461
  "epoch": 16.62,
1462
  "learning_rate": 2.578947368421053e-05,
1463
+ "loss": 3.1632,
1464
  "step": 2410
1465
  },
1466
  {
1467
  "epoch": 16.69,
1468
  "learning_rate": 2.5263157894736845e-05,
1469
+ "loss": 3.1558,
1470
  "step": 2420
1471
  },
1472
  {
1473
  "epoch": 16.76,
1474
  "learning_rate": 2.4736842105263158e-05,
1475
+ "loss": 3.2264,
1476
  "step": 2430
1477
  },
1478
  {
1479
  "epoch": 16.83,
1480
  "learning_rate": 2.4210526315789474e-05,
1481
+ "loss": 3.2213,
1482
  "step": 2440
1483
  },
1484
  {
1485
  "epoch": 16.9,
1486
  "learning_rate": 2.368421052631579e-05,
1487
+ "loss": 3.1527,
1488
  "step": 2450
1489
  },
1490
  {
1491
  "epoch": 16.97,
1492
  "learning_rate": 2.3157894736842107e-05,
1493
+ "loss": 3.1292,
1494
  "step": 2460
1495
  },
1496
  {
1497
  "epoch": 17.03,
1498
  "learning_rate": 2.2631578947368423e-05,
1499
+ "loss": 3.251,
1500
  "step": 2470
1501
  },
1502
  {
1503
  "epoch": 17.1,
1504
  "learning_rate": 2.2105263157894736e-05,
1505
+ "loss": 3.149,
1506
  "step": 2480
1507
  },
1508
  {
1509
  "epoch": 17.17,
1510
  "learning_rate": 2.1578947368421053e-05,
1511
+ "loss": 3.1467,
1512
  "step": 2490
1513
  },
1514
  {
1515
  "epoch": 17.24,
1516
  "learning_rate": 2.105263157894737e-05,
1517
+ "loss": 3.1744,
1518
  "step": 2500
1519
  },
1520
  {
1521
  "epoch": 17.31,
1522
  "learning_rate": 2.0526315789473685e-05,
1523
+ "loss": 3.2617,
1524
  "step": 2510
1525
  },
1526
  {
1527
  "epoch": 17.38,
1528
  "learning_rate": 2e-05,
1529
+ "loss": 3.1516,
1530
  "step": 2520
1531
  },
1532
  {
1533
  "epoch": 17.45,
1534
  "learning_rate": 1.9473684210526315e-05,
1535
+ "loss": 3.1366,
1536
  "step": 2530
1537
  },
1538
  {
1539
  "epoch": 17.52,
1540
  "learning_rate": 1.8947368421052634e-05,
1541
+ "loss": 3.2229,
1542
  "step": 2540
1543
  },
1544
  {
1545
  "epoch": 17.59,
1546
  "learning_rate": 1.8421052631578947e-05,
1547
+ "loss": 3.195,
1548
  "step": 2550
1549
  },
1550
  {
1551
  "epoch": 17.66,
1552
  "learning_rate": 1.7894736842105264e-05,
1553
+ "loss": 3.1612,
1554
  "step": 2560
1555
  },
1556
  {
1557
  "epoch": 17.72,
1558
  "learning_rate": 1.736842105263158e-05,
1559
+ "loss": 3.1497,
1560
  "step": 2570
1561
  },
1562
  {
1563
  "epoch": 17.79,
1564
  "learning_rate": 1.6842105263157896e-05,
1565
+ "loss": 3.2591,
1566
  "step": 2580
1567
  },
1568
  {
1569
  "epoch": 17.86,
1570
  "learning_rate": 1.6315789473684213e-05,
1571
+ "loss": 3.1834,
1572
  "step": 2590
1573
  },
1574
  {
1575
  "epoch": 17.93,
1576
  "learning_rate": 1.5789473684210526e-05,
1577
+ "loss": 3.1799,
1578
  "step": 2600
1579
  },
1580
  {
1581
  "epoch": 18.0,
1582
  "learning_rate": 1.5263157894736842e-05,
1583
+ "loss": 3.1597,
1584
  "step": 2610
1585
  },
1586
  {
1587
  "epoch": 18.07,
1588
  "learning_rate": 1.4736842105263157e-05,
1589
+ "loss": 3.2753,
1590
  "step": 2620
1591
  },
1592
  {
1593
  "epoch": 18.14,
1594
  "learning_rate": 1.4210526315789475e-05,
1595
+ "loss": 3.1269,
1596
  "step": 2630
1597
  },
1598
  {
1599
  "epoch": 18.21,
1600
  "learning_rate": 1.3684210526315791e-05,
1601
+ "loss": 3.1321,
1602
  "step": 2640
1603
  },
1604
  {
1605
  "epoch": 18.28,
1606
  "learning_rate": 1.3157894736842106e-05,
1607
+ "loss": 3.2213,
1608
  "step": 2650
1609
  },
1610
  {
1611
  "epoch": 18.34,
1612
  "learning_rate": 1.2631578947368422e-05,
1613
+ "loss": 3.1916,
1614
  "step": 2660
1615
  },
1616
  {
1617
  "epoch": 18.41,
1618
  "learning_rate": 1.2105263157894737e-05,
1619
+ "loss": 3.1388,
1620
  "step": 2670
1621
  },
1622
  {
1623
  "epoch": 18.48,
1624
  "learning_rate": 1.1578947368421053e-05,
1625
+ "loss": 3.1401,
1626
  "step": 2680
1627
  },
1628
  {
1629
  "epoch": 18.55,
1630
  "learning_rate": 1.1052631578947368e-05,
1631
+ "loss": 3.2428,
1632
  "step": 2690
1633
  },
1634
  {
1635
  "epoch": 18.62,
1636
  "learning_rate": 1.0526315789473684e-05,
1637
+ "loss": 3.1645,
1638
  "step": 2700
1639
  },
1640
  {
1641
  "epoch": 18.69,
1642
  "learning_rate": 1e-05,
1643
+ "loss": 3.1607,
1644
  "step": 2710
1645
  },
1646
  {
1647
  "epoch": 18.76,
1648
  "learning_rate": 9.473684210526317e-06,
1649
+ "loss": 3.2464,
1650
  "step": 2720
1651
  },
1652
  {
1653
  "epoch": 18.83,
1654
  "learning_rate": 8.947368421052632e-06,
1655
+ "loss": 3.2228,
1656
  "step": 2730
1657
  },
1658
  {
1659
  "epoch": 18.9,
1660
  "learning_rate": 8.421052631578948e-06,
1661
+ "loss": 3.1488,
1662
  "step": 2740
1663
  },
1664
  {
1665
  "epoch": 18.97,
1666
  "learning_rate": 7.894736842105263e-06,
1667
+ "loss": 3.162,
1668
  "step": 2750
1669
  },
1670
  {
1671
  "epoch": 19.03,
1672
  "learning_rate": 7.3684210526315784e-06,
1673
+ "loss": 3.2708,
1674
  "step": 2760
1675
  },
1676
  {
1677
  "epoch": 19.1,
1678
  "learning_rate": 6.842105263157896e-06,
1679
+ "loss": 3.1619,
1680
  "step": 2770
1681
  },
1682
  {
1683
  "epoch": 19.17,
1684
  "learning_rate": 6.315789473684211e-06,
1685
+ "loss": 3.1504,
1686
  "step": 2780
1687
  },
1688
  {
1689
  "epoch": 19.24,
1690
  "learning_rate": 5.789473684210527e-06,
1691
+ "loss": 3.1647,
1692
  "step": 2790
1693
  },
1694
  {
1695
  "epoch": 19.31,
1696
  "learning_rate": 5.263157894736842e-06,
1697
+ "loss": 3.2402,
1698
  "step": 2800
1699
  },
1700
  {
1701
  "epoch": 19.38,
1702
  "learning_rate": 4.736842105263159e-06,
1703
+ "loss": 3.1644,
1704
  "step": 2810
1705
  },
1706
  {
1707
  "epoch": 19.45,
1708
  "learning_rate": 4.210526315789474e-06,
1709
+ "loss": 3.1178,
1710
  "step": 2820
1711
  },
1712
  {
1713
  "epoch": 19.52,
1714
  "learning_rate": 3.6842105263157892e-06,
1715
+ "loss": 3.2254,
1716
  "step": 2830
1717
  },
1718
  {
1719
  "epoch": 19.59,
1720
  "learning_rate": 3.1578947368421056e-06,
1721
+ "loss": 3.1963,
1722
  "step": 2840
1723
  },
1724
  {
1725
  "epoch": 19.66,
1726
  "learning_rate": 2.631578947368421e-06,
1727
+ "loss": 3.1382,
1728
  "step": 2850
1729
  },
1730
  {
1731
  "epoch": 19.72,
1732
  "learning_rate": 2.105263157894737e-06,
1733
+ "loss": 3.1406,
1734
  "step": 2860
1735
  },
1736
  {
1737
  "epoch": 19.79,
1738
  "learning_rate": 1.5789473684210528e-06,
1739
+ "loss": 3.2676,
1740
  "step": 2870
1741
  },
1742
  {
1743
  "epoch": 19.86,
1744
  "learning_rate": 1.0526315789473685e-06,
1745
+ "loss": 3.1524,
1746
  "step": 2880
1747
  },
1748
  {
1749
  "epoch": 19.93,
1750
  "learning_rate": 5.263157894736843e-07,
1751
+ "loss": 3.1674,
1752
  "step": 2890
1753
  },
1754
  {
1755
  "epoch": 20.0,
1756
  "learning_rate": 0.0,
1757
+ "loss": 3.1682,
1758
  "step": 2900
1759
  },
1760
  {
1761
  "epoch": 20.0,
1762
+ "eval_loss": 3.362812042236328,
1763
+ "eval_runtime": 8.2276,
1764
+ "eval_samples_per_second": 204.191,
1765
+ "eval_steps_per_second": 6.442,
1766
+ "eval_wer": 0.9993108676176694,
1767
  "step": 2900
1768
  },
1769
  {
1770
  "epoch": 20.0,
1771
  "step": 2900,
1772
  "total_flos": 2.6569362344615726e+18,
1773
+ "train_loss": 3.8477164847275307,
1774
+ "train_runtime": 343.8146,
1775
+ "train_samples_per_second": 268.749,
1776
+ "train_steps_per_second": 8.435
1777
  }
1778
  ],
1779
  "logging_steps": 10,