agnesluhtaru commited on
Commit
32feac1
1 Parent(s): 6b2a443

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 32.26,
3
- "eval_loss": 0.4131947159767151,
4
- "eval_runtime": 2301.1875,
5
- "eval_samples_per_second": 0.559,
6
- "eval_steps_per_second": 0.559,
7
- "eval_wer": 15.734731594103234,
8
- "train_loss": 0.017603627420263366,
9
- "train_runtime": 74040.5343,
10
- "train_samples_per_second": 1.729,
11
- "train_steps_per_second": 0.054
12
  }
 
1
  {
2
+ "epoch": 16.13,
3
+ "eval_loss": 0.37141528725624084,
4
+ "eval_runtime": 2355.4891,
5
+ "eval_samples_per_second": 0.546,
6
+ "eval_steps_per_second": 0.546,
7
+ "eval_wer": 15.558516353633904,
8
+ "train_loss": 0.03306677715945989,
9
+ "train_runtime": 38266.3528,
10
+ "train_samples_per_second": 1.672,
11
+ "train_steps_per_second": 0.052
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 32.26,
3
- "eval_loss": 0.4131947159767151,
4
- "eval_runtime": 2301.1875,
5
- "eval_samples_per_second": 0.559,
6
- "eval_steps_per_second": 0.559,
7
- "eval_wer": 15.734731594103234
8
  }
 
1
  {
2
+ "epoch": 16.13,
3
+ "eval_loss": 0.37141528725624084,
4
+ "eval_runtime": 2355.4891,
5
+ "eval_samples_per_second": 0.546,
6
+ "eval_steps_per_second": 0.546,
7
+ "eval_wer": 15.558516353633904
8
  }
runs/Feb03_20-01-32_nid007478/events.out.tfevents.1675490223.nid007478.81214.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8280e4fafb75a30371d7851b57bf30aff2ef5a3887613ea214aafb8ec2f63aa4
3
+ size 358
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 32.26,
3
- "train_loss": 0.017603627420263366,
4
- "train_runtime": 74040.5343,
5
- "train_samples_per_second": 1.729,
6
- "train_steps_per_second": 0.054
7
  }
 
1
  {
2
+ "epoch": 16.13,
3
+ "train_loss": 0.03306677715945989,
4
+ "train_runtime": 38266.3528,
5
+ "train_samples_per_second": 1.672,
6
+ "train_steps_per_second": 0.052
7
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 15.734731594103234,
3
- "best_model_checkpoint": "/scratch/project_465000370/asr_children/models/whisper-large-et-clinic/checkpoint-4000",
4
- "epoch": 32.25612806403202,
5
- "global_step": 4000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10,1048 +10,532 @@
10
  {
11
  "epoch": 0.2,
12
  "learning_rate": 1.2000000000000002e-06,
13
- "loss": 0.4973,
14
  "step": 25
15
  },
16
  {
17
  "epoch": 0.4,
18
- "learning_rate": 2.4500000000000003e-06,
19
- "loss": 0.2667,
20
  "step": 50
21
  },
22
  {
23
  "epoch": 0.6,
24
- "learning_rate": 3.7e-06,
25
- "loss": 0.2056,
26
  "step": 75
27
  },
28
  {
29
  "epoch": 0.8,
30
- "learning_rate": 4.95e-06,
31
- "loss": 0.2174,
32
  "step": 100
33
  },
34
  {
35
  "epoch": 1.01,
36
- "learning_rate": 6.200000000000001e-06,
37
- "loss": 0.198,
38
  "step": 125
39
  },
40
  {
41
  "epoch": 1.21,
42
- "learning_rate": 7.4e-06,
43
- "loss": 0.1203,
44
  "step": 150
45
  },
46
  {
47
  "epoch": 1.41,
48
- "learning_rate": 8.65e-06,
49
- "loss": 0.1191,
50
  "step": 175
51
  },
52
  {
53
  "epoch": 1.61,
54
- "learning_rate": 9.9e-06,
55
- "loss": 0.1183,
56
  "step": 200
57
  },
58
  {
59
  "epoch": 1.81,
60
- "learning_rate": 9.939473684210526e-06,
61
- "loss": 0.1172,
62
  "step": 225
63
  },
64
  {
65
  "epoch": 2.02,
66
- "learning_rate": 9.873684210526317e-06,
67
- "loss": 0.1264,
68
  "step": 250
69
  },
70
  {
71
  "epoch": 2.22,
72
- "learning_rate": 9.807894736842106e-06,
73
- "loss": 0.0478,
74
  "step": 275
75
  },
76
  {
77
  "epoch": 2.42,
78
- "learning_rate": 9.742105263157897e-06,
79
- "loss": 0.0544,
80
  "step": 300
81
  },
82
  {
83
  "epoch": 2.62,
84
- "learning_rate": 9.678947368421053e-06,
85
  "loss": 0.0605,
86
  "step": 325
87
  },
88
  {
89
  "epoch": 2.82,
90
- "learning_rate": 9.613157894736844e-06,
91
- "loss": 0.0575,
92
  "step": 350
93
  },
94
  {
95
  "epoch": 3.02,
96
- "learning_rate": 9.547368421052631e-06,
97
- "loss": 0.0494,
98
  "step": 375
99
  },
100
  {
101
  "epoch": 3.22,
102
- "learning_rate": 9.481578947368422e-06,
103
- "loss": 0.0262,
104
  "step": 400
105
  },
106
  {
107
  "epoch": 3.42,
108
- "learning_rate": 9.415789473684211e-06,
109
- "loss": 0.0237,
110
  "step": 425
111
  },
112
  {
113
  "epoch": 3.62,
114
- "learning_rate": 9.350000000000002e-06,
115
- "loss": 0.0293,
116
  "step": 450
117
  },
118
  {
119
  "epoch": 3.82,
120
- "learning_rate": 9.28421052631579e-06,
121
- "loss": 0.0284,
122
  "step": 475
123
  },
124
  {
125
  "epoch": 4.03,
126
- "learning_rate": 9.21842105263158e-06,
127
- "loss": 0.0297,
128
  "step": 500
129
  },
130
  {
131
  "epoch": 4.03,
132
- "eval_loss": 0.3037151098251343,
133
- "eval_runtime": 2297.7169,
134
- "eval_samples_per_second": 0.56,
135
- "eval_steps_per_second": 0.56,
136
- "eval_wer": 16.71466024842051,
137
  "step": 500
138
  },
139
  {
140
  "epoch": 4.23,
141
- "learning_rate": 9.15263157894737e-06,
142
- "loss": 0.0151,
143
  "step": 525
144
  },
145
  {
146
  "epoch": 4.43,
147
- "learning_rate": 9.086842105263158e-06,
148
- "loss": 0.0161,
149
  "step": 550
150
  },
151
  {
152
  "epoch": 4.63,
153
- "learning_rate": 9.021052631578948e-06,
154
- "loss": 0.0165,
155
  "step": 575
156
  },
157
  {
158
  "epoch": 4.83,
159
- "learning_rate": 8.955263157894738e-06,
160
- "loss": 0.017,
161
  "step": 600
162
  },
163
  {
164
  "epoch": 5.04,
165
- "learning_rate": 8.889473684210528e-06,
166
- "loss": 0.0154,
167
  "step": 625
168
  },
169
  {
170
  "epoch": 5.24,
171
- "learning_rate": 8.823684210526317e-06,
172
- "loss": 0.0093,
173
  "step": 650
174
  },
175
  {
176
  "epoch": 5.44,
177
- "learning_rate": 8.757894736842106e-06,
178
- "loss": 0.0109,
179
  "step": 675
180
  },
181
  {
182
  "epoch": 5.64,
183
- "learning_rate": 8.692105263157895e-06,
184
- "loss": 0.009,
185
  "step": 700
186
  },
187
  {
188
  "epoch": 5.84,
189
- "learning_rate": 8.626315789473686e-06,
190
- "loss": 0.0109,
191
  "step": 725
192
  },
193
  {
194
  "epoch": 6.05,
195
- "learning_rate": 8.560526315789475e-06,
196
- "loss": 0.0105,
197
  "step": 750
198
  },
199
  {
200
  "epoch": 6.25,
201
- "learning_rate": 8.494736842105264e-06,
202
- "loss": 0.0077,
203
  "step": 775
204
  },
205
  {
206
  "epoch": 6.45,
207
- "learning_rate": 8.428947368421053e-06,
208
- "loss": 0.0063,
209
  "step": 800
210
  },
211
  {
212
  "epoch": 6.65,
213
- "learning_rate": 8.363157894736842e-06,
214
- "loss": 0.0093,
215
  "step": 825
216
  },
217
  {
218
  "epoch": 6.85,
219
- "learning_rate": 8.297368421052631e-06,
220
- "loss": 0.0072,
221
  "step": 850
222
  },
223
  {
224
  "epoch": 7.06,
225
- "learning_rate": 8.231578947368422e-06,
226
- "loss": 0.0081,
227
  "step": 875
228
  },
229
  {
230
  "epoch": 7.26,
231
- "learning_rate": 8.165789473684211e-06,
232
- "loss": 0.0047,
233
  "step": 900
234
  },
235
  {
236
  "epoch": 7.46,
237
- "learning_rate": 8.1e-06,
238
- "loss": 0.0058,
239
  "step": 925
240
  },
241
  {
242
  "epoch": 7.66,
243
- "learning_rate": 8.03421052631579e-06,
244
- "loss": 0.0051,
245
  "step": 950
246
  },
247
  {
248
  "epoch": 7.86,
249
- "learning_rate": 7.96842105263158e-06,
250
- "loss": 0.0056,
251
  "step": 975
252
  },
253
  {
254
  "epoch": 8.06,
255
- "learning_rate": 7.90263157894737e-06,
256
- "loss": 0.0057,
257
  "step": 1000
258
  },
259
  {
260
  "epoch": 8.06,
261
- "eval_loss": 0.3434995114803314,
262
- "eval_runtime": 2295.9725,
263
- "eval_samples_per_second": 0.56,
264
- "eval_steps_per_second": 0.56,
265
- "eval_wer": 15.949628228821936,
266
  "step": 1000
267
  },
268
  {
269
  "epoch": 8.26,
270
- "learning_rate": 7.836842105263159e-06,
271
- "loss": 0.0031,
272
  "step": 1025
273
  },
274
  {
275
  "epoch": 8.46,
276
- "learning_rate": 7.771052631578948e-06,
277
- "loss": 0.0055,
278
  "step": 1050
279
  },
280
  {
281
  "epoch": 8.66,
282
- "learning_rate": 7.705263157894738e-06,
283
- "loss": 0.0056,
284
  "step": 1075
285
  },
286
  {
287
  "epoch": 8.86,
288
- "learning_rate": 7.639473684210526e-06,
289
- "loss": 0.0049,
290
  "step": 1100
291
  },
292
  {
293
  "epoch": 9.07,
294
- "learning_rate": 7.573684210526317e-06,
295
- "loss": 0.004,
296
  "step": 1125
297
  },
298
  {
299
  "epoch": 9.27,
300
- "learning_rate": 7.507894736842106e-06,
301
- "loss": 0.0036,
302
  "step": 1150
303
  },
304
  {
305
  "epoch": 9.47,
306
- "learning_rate": 7.442105263157895e-06,
307
- "loss": 0.0028,
308
  "step": 1175
309
  },
310
  {
311
  "epoch": 9.67,
312
- "learning_rate": 7.376315789473685e-06,
313
- "loss": 0.0034,
314
  "step": 1200
315
  },
316
  {
317
  "epoch": 9.87,
318
- "learning_rate": 7.310526315789475e-06,
319
- "loss": 0.0027,
320
  "step": 1225
321
  },
322
  {
323
  "epoch": 10.08,
324
- "learning_rate": 7.244736842105263e-06,
325
- "loss": 0.0027,
326
  "step": 1250
327
  },
328
  {
329
  "epoch": 10.28,
330
- "learning_rate": 7.178947368421053e-06,
331
- "loss": 0.0022,
332
  "step": 1275
333
  },
334
  {
335
  "epoch": 10.48,
336
- "learning_rate": 7.113157894736843e-06,
337
- "loss": 0.0033,
338
  "step": 1300
339
  },
340
  {
341
  "epoch": 10.68,
342
- "learning_rate": 7.047368421052631e-06,
343
- "loss": 0.0033,
344
  "step": 1325
345
  },
346
  {
347
  "epoch": 10.88,
348
- "learning_rate": 6.981578947368421e-06,
349
- "loss": 0.005,
350
  "step": 1350
351
  },
352
  {
353
  "epoch": 11.09,
354
- "learning_rate": 6.915789473684211e-06,
355
- "loss": 0.0034,
356
  "step": 1375
357
  },
358
  {
359
  "epoch": 11.29,
360
- "learning_rate": 6.850000000000001e-06,
361
- "loss": 0.0035,
362
  "step": 1400
363
  },
364
  {
365
  "epoch": 11.49,
366
- "learning_rate": 6.78421052631579e-06,
367
- "loss": 0.0033,
368
  "step": 1425
369
  },
370
  {
371
  "epoch": 11.69,
372
- "learning_rate": 6.71842105263158e-06,
373
- "loss": 0.0041,
374
  "step": 1450
375
  },
376
  {
377
  "epoch": 11.89,
378
- "learning_rate": 6.6526315789473695e-06,
379
- "loss": 0.0047,
380
  "step": 1475
381
  },
382
  {
383
  "epoch": 12.1,
384
- "learning_rate": 6.586842105263158e-06,
385
- "loss": 0.0049,
386
  "step": 1500
387
  },
388
  {
389
  "epoch": 12.1,
390
- "eval_loss": 0.3610152006149292,
391
- "eval_runtime": 2310.3916,
392
- "eval_samples_per_second": 0.557,
393
- "eval_steps_per_second": 0.557,
394
- "eval_wer": 16.87368375811235,
395
  "step": 1500
396
  },
397
  {
398
  "epoch": 12.3,
399
- "learning_rate": 6.521052631578948e-06,
400
- "loss": 0.0035,
401
  "step": 1525
402
  },
403
  {
404
  "epoch": 12.5,
405
- "learning_rate": 6.455263157894738e-06,
406
- "loss": 0.0036,
407
  "step": 1550
408
  },
409
  {
410
  "epoch": 12.7,
411
- "learning_rate": 6.389473684210527e-06,
412
- "loss": 0.0038,
413
  "step": 1575
414
  },
415
  {
416
  "epoch": 12.9,
417
- "learning_rate": 6.323684210526316e-06,
418
- "loss": 0.0033,
419
  "step": 1600
420
  },
421
  {
422
  "epoch": 13.1,
423
- "learning_rate": 6.257894736842106e-06,
424
- "loss": 0.0028,
425
  "step": 1625
426
  },
427
  {
428
  "epoch": 13.3,
429
- "learning_rate": 6.192105263157895e-06,
430
- "loss": 0.0029,
431
  "step": 1650
432
  },
433
  {
434
  "epoch": 13.5,
435
- "learning_rate": 6.126315789473685e-06,
436
- "loss": 0.0033,
437
  "step": 1675
438
  },
439
  {
440
  "epoch": 13.7,
441
- "learning_rate": 6.060526315789474e-06,
442
- "loss": 0.003,
443
  "step": 1700
444
  },
445
  {
446
  "epoch": 13.9,
447
- "learning_rate": 5.994736842105263e-06,
448
- "loss": 0.0036,
449
  "step": 1725
450
  },
451
  {
452
  "epoch": 14.11,
453
- "learning_rate": 5.928947368421053e-06,
454
- "loss": 0.0029,
455
  "step": 1750
456
  },
457
  {
458
  "epoch": 14.31,
459
- "learning_rate": 5.863157894736842e-06,
460
- "loss": 0.0027,
461
  "step": 1775
462
  },
463
  {
464
  "epoch": 14.51,
465
- "learning_rate": 5.7973684210526315e-06,
466
- "loss": 0.0023,
467
  "step": 1800
468
  },
469
  {
470
  "epoch": 14.71,
471
- "learning_rate": 5.7315789473684215e-06,
472
- "loss": 0.0023,
473
  "step": 1825
474
  },
475
  {
476
  "epoch": 14.91,
477
- "learning_rate": 5.6657894736842115e-06,
478
- "loss": 0.0033,
479
  "step": 1850
480
  },
481
  {
482
  "epoch": 15.12,
483
- "learning_rate": 5.600000000000001e-06,
484
- "loss": 0.0033,
485
  "step": 1875
486
  },
487
  {
488
  "epoch": 15.32,
489
- "learning_rate": 5.53421052631579e-06,
490
- "loss": 0.0037,
491
  "step": 1900
492
  },
493
  {
494
  "epoch": 15.52,
495
- "learning_rate": 5.46842105263158e-06,
496
- "loss": 0.0035,
497
  "step": 1925
498
  },
499
  {
500
  "epoch": 15.72,
501
- "learning_rate": 5.40263157894737e-06,
502
- "loss": 0.0037,
503
  "step": 1950
504
  },
505
  {
506
  "epoch": 15.92,
507
- "learning_rate": 5.336842105263158e-06,
508
- "loss": 0.0024,
509
  "step": 1975
510
  },
511
  {
512
  "epoch": 16.13,
513
- "learning_rate": 5.271052631578948e-06,
514
- "loss": 0.0015,
515
  "step": 2000
516
  },
517
  {
518
  "epoch": 16.13,
519
- "eval_loss": 0.36096739768981934,
520
- "eval_runtime": 2288.7771,
521
- "eval_samples_per_second": 0.562,
522
- "eval_steps_per_second": 0.562,
523
- "eval_wer": 16.125843469291272,
524
  "step": 2000
525
  },
526
  {
527
- "epoch": 16.33,
528
- "learning_rate": 5.205263157894738e-06,
529
- "loss": 0.0017,
530
- "step": 2025
531
- },
532
- {
533
- "epoch": 16.53,
534
- "learning_rate": 5.139473684210526e-06,
535
- "loss": 0.0017,
536
- "step": 2050
537
- },
538
- {
539
- "epoch": 16.73,
540
- "learning_rate": 5.073684210526316e-06,
541
- "loss": 0.0017,
542
- "step": 2075
543
- },
544
- {
545
- "epoch": 16.93,
546
- "learning_rate": 5.007894736842106e-06,
547
- "loss": 0.003,
548
- "step": 2100
549
- },
550
- {
551
- "epoch": 17.14,
552
- "learning_rate": 4.942105263157895e-06,
553
- "loss": 0.0017,
554
- "step": 2125
555
- },
556
- {
557
- "epoch": 17.34,
558
- "learning_rate": 4.876315789473684e-06,
559
- "loss": 0.0014,
560
- "step": 2150
561
- },
562
- {
563
- "epoch": 17.54,
564
- "learning_rate": 4.8105263157894735e-06,
565
- "loss": 0.0022,
566
- "step": 2175
567
- },
568
- {
569
- "epoch": 17.74,
570
- "learning_rate": 4.7447368421052634e-06,
571
- "loss": 0.0023,
572
- "step": 2200
573
- },
574
- {
575
- "epoch": 17.94,
576
- "learning_rate": 4.6789473684210525e-06,
577
- "loss": 0.0026,
578
- "step": 2225
579
- },
580
- {
581
- "epoch": 18.14,
582
- "learning_rate": 4.6131578947368425e-06,
583
- "loss": 0.0013,
584
- "step": 2250
585
- },
586
- {
587
- "epoch": 18.34,
588
- "learning_rate": 4.547368421052632e-06,
589
- "loss": 0.0012,
590
- "step": 2275
591
- },
592
- {
593
- "epoch": 18.54,
594
- "learning_rate": 4.481578947368422e-06,
595
- "loss": 0.0014,
596
- "step": 2300
597
- },
598
- {
599
- "epoch": 18.74,
600
- "learning_rate": 4.415789473684211e-06,
601
- "loss": 0.0017,
602
- "step": 2325
603
- },
604
- {
605
- "epoch": 18.94,
606
- "learning_rate": 4.350000000000001e-06,
607
- "loss": 0.002,
608
- "step": 2350
609
- },
610
- {
611
- "epoch": 19.15,
612
- "learning_rate": 4.28421052631579e-06,
613
- "loss": 0.0015,
614
- "step": 2375
615
- },
616
- {
617
- "epoch": 19.35,
618
- "learning_rate": 4.21842105263158e-06,
619
- "loss": 0.0016,
620
- "step": 2400
621
- },
622
- {
623
- "epoch": 19.55,
624
- "learning_rate": 4.152631578947369e-06,
625
- "loss": 0.0013,
626
- "step": 2425
627
- },
628
- {
629
- "epoch": 19.75,
630
- "learning_rate": 4.086842105263159e-06,
631
- "loss": 0.0014,
632
- "step": 2450
633
- },
634
- {
635
- "epoch": 19.95,
636
- "learning_rate": 4.021052631578948e-06,
637
- "loss": 0.0011,
638
- "step": 2475
639
- },
640
- {
641
- "epoch": 20.16,
642
- "learning_rate": 3.955263157894737e-06,
643
- "loss": 0.001,
644
- "step": 2500
645
- },
646
- {
647
- "epoch": 20.16,
648
- "eval_loss": 0.38679206371307373,
649
- "eval_runtime": 2297.8177,
650
- "eval_samples_per_second": 0.56,
651
- "eval_steps_per_second": 0.56,
652
- "eval_wer": 16.035586882709417,
653
- "step": 2500
654
- },
655
- {
656
- "epoch": 20.36,
657
- "learning_rate": 3.889473684210527e-06,
658
- "loss": 0.0013,
659
- "step": 2525
660
- },
661
- {
662
- "epoch": 20.56,
663
- "learning_rate": 3.823684210526316e-06,
664
- "loss": 0.0012,
665
- "step": 2550
666
- },
667
- {
668
- "epoch": 20.76,
669
- "learning_rate": 3.7578947368421053e-06,
670
- "loss": 0.001,
671
- "step": 2575
672
- },
673
- {
674
- "epoch": 20.96,
675
- "learning_rate": 3.6921052631578953e-06,
676
- "loss": 0.0012,
677
- "step": 2600
678
- },
679
- {
680
- "epoch": 21.17,
681
- "learning_rate": 3.6263157894736844e-06,
682
- "loss": 0.0013,
683
- "step": 2625
684
- },
685
- {
686
- "epoch": 21.37,
687
- "learning_rate": 3.5605263157894736e-06,
688
- "loss": 0.0008,
689
- "step": 2650
690
- },
691
- {
692
- "epoch": 21.57,
693
- "learning_rate": 3.4947368421052635e-06,
694
- "loss": 0.0013,
695
- "step": 2675
696
- },
697
- {
698
- "epoch": 21.77,
699
- "learning_rate": 3.4289473684210527e-06,
700
- "loss": 0.001,
701
- "step": 2700
702
- },
703
- {
704
- "epoch": 21.97,
705
- "learning_rate": 3.3631578947368426e-06,
706
- "loss": 0.0014,
707
- "step": 2725
708
- },
709
- {
710
- "epoch": 22.18,
711
- "learning_rate": 3.2973684210526318e-06,
712
- "loss": 0.0012,
713
- "step": 2750
714
- },
715
- {
716
- "epoch": 22.38,
717
- "learning_rate": 3.2315789473684213e-06,
718
- "loss": 0.0008,
719
- "step": 2775
720
- },
721
- {
722
- "epoch": 22.58,
723
- "learning_rate": 3.165789473684211e-06,
724
- "loss": 0.001,
725
- "step": 2800
726
- },
727
- {
728
- "epoch": 22.78,
729
- "learning_rate": 3.1000000000000004e-06,
730
- "loss": 0.0012,
731
- "step": 2825
732
- },
733
- {
734
- "epoch": 22.98,
735
- "learning_rate": 3.0342105263157895e-06,
736
- "loss": 0.001,
737
- "step": 2850
738
- },
739
- {
740
- "epoch": 23.18,
741
- "learning_rate": 2.9684210526315795e-06,
742
- "loss": 0.0008,
743
- "step": 2875
744
- },
745
- {
746
- "epoch": 23.38,
747
- "learning_rate": 2.9026315789473686e-06,
748
- "loss": 0.0007,
749
- "step": 2900
750
- },
751
- {
752
- "epoch": 23.58,
753
- "learning_rate": 2.8368421052631586e-06,
754
- "loss": 0.0011,
755
- "step": 2925
756
- },
757
- {
758
- "epoch": 23.78,
759
- "learning_rate": 2.7710526315789477e-06,
760
- "loss": 0.0015,
761
- "step": 2950
762
- },
763
- {
764
- "epoch": 23.98,
765
- "learning_rate": 2.705263157894737e-06,
766
- "loss": 0.0011,
767
- "step": 2975
768
- },
769
- {
770
- "epoch": 24.19,
771
- "learning_rate": 2.639473684210527e-06,
772
- "loss": 0.0011,
773
- "step": 3000
774
- },
775
- {
776
- "epoch": 24.19,
777
- "eval_loss": 0.394901841878891,
778
- "eval_runtime": 2297.2378,
779
- "eval_samples_per_second": 0.56,
780
- "eval_steps_per_second": 0.56,
781
- "eval_wer": 15.829286113379466,
782
- "step": 3000
783
- },
784
- {
785
- "epoch": 24.39,
786
- "learning_rate": 2.573684210526316e-06,
787
- "loss": 0.0007,
788
- "step": 3025
789
- },
790
- {
791
- "epoch": 24.59,
792
- "learning_rate": 2.5078947368421055e-06,
793
- "loss": 0.0012,
794
- "step": 3050
795
- },
796
- {
797
- "epoch": 24.79,
798
- "learning_rate": 2.442105263157895e-06,
799
- "loss": 0.0011,
800
- "step": 3075
801
- },
802
- {
803
- "epoch": 24.99,
804
- "learning_rate": 2.3763157894736846e-06,
805
- "loss": 0.0009,
806
- "step": 3100
807
- },
808
- {
809
- "epoch": 25.2,
810
- "learning_rate": 2.310526315789474e-06,
811
- "loss": 0.0009,
812
- "step": 3125
813
- },
814
- {
815
- "epoch": 25.4,
816
- "learning_rate": 2.2447368421052636e-06,
817
- "loss": 0.0008,
818
- "step": 3150
819
- },
820
- {
821
- "epoch": 25.6,
822
- "learning_rate": 2.1789473684210528e-06,
823
- "loss": 0.0009,
824
- "step": 3175
825
- },
826
- {
827
- "epoch": 25.8,
828
- "learning_rate": 2.1131578947368423e-06,
829
- "loss": 0.0009,
830
- "step": 3200
831
- },
832
- {
833
- "epoch": 26.01,
834
- "learning_rate": 2.047368421052632e-06,
835
- "loss": 0.0011,
836
- "step": 3225
837
- },
838
- {
839
- "epoch": 26.21,
840
- "learning_rate": 1.9815789473684214e-06,
841
- "loss": 0.0007,
842
- "step": 3250
843
- },
844
- {
845
- "epoch": 26.41,
846
- "learning_rate": 1.9157894736842105e-06,
847
- "loss": 0.001,
848
- "step": 3275
849
- },
850
- {
851
- "epoch": 26.61,
852
- "learning_rate": 1.85e-06,
853
- "loss": 0.0008,
854
- "step": 3300
855
- },
856
- {
857
- "epoch": 26.81,
858
- "learning_rate": 1.7842105263157896e-06,
859
- "loss": 0.0008,
860
- "step": 3325
861
- },
862
- {
863
- "epoch": 27.02,
864
- "learning_rate": 1.7184210526315792e-06,
865
- "loss": 0.0012,
866
- "step": 3350
867
- },
868
- {
869
- "epoch": 27.22,
870
- "learning_rate": 1.6526315789473685e-06,
871
- "loss": 0.0009,
872
- "step": 3375
873
- },
874
- {
875
- "epoch": 27.42,
876
- "learning_rate": 1.586842105263158e-06,
877
- "loss": 0.0009,
878
- "step": 3400
879
- },
880
- {
881
- "epoch": 27.62,
882
- "learning_rate": 1.5210526315789476e-06,
883
- "loss": 0.0009,
884
- "step": 3425
885
- },
886
- {
887
- "epoch": 27.82,
888
- "learning_rate": 1.4552631578947371e-06,
889
- "loss": 0.0009,
890
- "step": 3450
891
- },
892
- {
893
- "epoch": 28.02,
894
- "learning_rate": 1.3894736842105263e-06,
895
- "loss": 0.001,
896
- "step": 3475
897
- },
898
- {
899
- "epoch": 28.22,
900
- "learning_rate": 1.3236842105263158e-06,
901
- "loss": 0.0006,
902
- "step": 3500
903
- },
904
- {
905
- "epoch": 28.22,
906
- "eval_loss": 0.4028264284133911,
907
- "eval_runtime": 2295.0076,
908
- "eval_samples_per_second": 0.56,
909
- "eval_steps_per_second": 0.56,
910
- "eval_wer": 15.74762539218636,
911
- "step": 3500
912
- },
913
- {
914
- "epoch": 28.42,
915
- "learning_rate": 1.2578947368421054e-06,
916
- "loss": 0.0005,
917
- "step": 3525
918
- },
919
- {
920
- "epoch": 28.62,
921
- "learning_rate": 1.192105263157895e-06,
922
- "loss": 0.0006,
923
- "step": 3550
924
- },
925
- {
926
- "epoch": 28.82,
927
- "learning_rate": 1.1263157894736842e-06,
928
- "loss": 0.0011,
929
- "step": 3575
930
- },
931
- {
932
- "epoch": 29.03,
933
- "learning_rate": 1.0605263157894738e-06,
934
- "loss": 0.0012,
935
- "step": 3600
936
- },
937
- {
938
- "epoch": 29.23,
939
- "learning_rate": 9.947368421052631e-07,
940
- "loss": 0.0006,
941
- "step": 3625
942
- },
943
- {
944
- "epoch": 29.43,
945
- "learning_rate": 9.289473684210528e-07,
946
- "loss": 0.0008,
947
- "step": 3650
948
- },
949
- {
950
- "epoch": 29.63,
951
- "learning_rate": 8.631578947368421e-07,
952
- "loss": 0.0009,
953
- "step": 3675
954
- },
955
- {
956
- "epoch": 29.83,
957
- "learning_rate": 7.973684210526317e-07,
958
- "loss": 0.0008,
959
- "step": 3700
960
- },
961
- {
962
- "epoch": 30.04,
963
- "learning_rate": 7.315789473684211e-07,
964
- "loss": 0.0008,
965
- "step": 3725
966
- },
967
- {
968
- "epoch": 30.24,
969
- "learning_rate": 6.657894736842106e-07,
970
- "loss": 0.0007,
971
- "step": 3750
972
- },
973
- {
974
- "epoch": 30.44,
975
- "learning_rate": 6.000000000000001e-07,
976
- "loss": 0.0008,
977
- "step": 3775
978
- },
979
- {
980
- "epoch": 30.64,
981
- "learning_rate": 5.342105263157895e-07,
982
- "loss": 0.0009,
983
- "step": 3800
984
- },
985
- {
986
- "epoch": 30.84,
987
- "learning_rate": 4.6842105263157896e-07,
988
- "loss": 0.0008,
989
- "step": 3825
990
- },
991
- {
992
- "epoch": 31.05,
993
- "learning_rate": 4.0263157894736845e-07,
994
- "loss": 0.0007,
995
- "step": 3850
996
- },
997
- {
998
- "epoch": 31.25,
999
- "learning_rate": 3.368421052631579e-07,
1000
- "loss": 0.0006,
1001
- "step": 3875
1002
- },
1003
- {
1004
- "epoch": 31.45,
1005
- "learning_rate": 2.710526315789474e-07,
1006
- "loss": 0.0008,
1007
- "step": 3900
1008
- },
1009
- {
1010
- "epoch": 31.65,
1011
- "learning_rate": 2.0526315789473685e-07,
1012
- "loss": 0.0007,
1013
- "step": 3925
1014
- },
1015
- {
1016
- "epoch": 31.85,
1017
- "learning_rate": 1.3947368421052632e-07,
1018
- "loss": 0.001,
1019
- "step": 3950
1020
- },
1021
- {
1022
- "epoch": 32.06,
1023
- "learning_rate": 7.368421052631579e-08,
1024
- "loss": 0.0006,
1025
- "step": 3975
1026
- },
1027
- {
1028
- "epoch": 32.26,
1029
- "learning_rate": 7.894736842105264e-09,
1030
- "loss": 0.0008,
1031
- "step": 4000
1032
- },
1033
- {
1034
- "epoch": 32.26,
1035
- "eval_loss": 0.4131947159767151,
1036
- "eval_runtime": 2384.3215,
1037
- "eval_samples_per_second": 0.539,
1038
- "eval_steps_per_second": 0.539,
1039
- "eval_wer": 15.734731594103234,
1040
- "step": 4000
1041
- },
1042
- {
1043
- "epoch": 32.26,
1044
- "step": 4000,
1045
- "total_flos": 2.73804134842368e+20,
1046
- "train_loss": 0.017603627420263366,
1047
- "train_runtime": 74040.5343,
1048
- "train_samples_per_second": 1.729,
1049
- "train_steps_per_second": 0.054
1050
  }
1051
  ],
1052
- "max_steps": 4000,
1053
- "num_train_epochs": 33,
1054
- "total_flos": 2.73804134842368e+20,
1055
  "trial_name": null,
1056
  "trial_params": null
1057
  }
 
1
  {
2
+ "best_metric": 15.558516353633904,
3
+ "best_model_checkpoint": "/scratch/project_465000370/asr_children/models/whisper-large-et-clinic/checkpoint-1500",
4
+ "epoch": 16.12806403201601,
5
+ "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10
  {
11
  "epoch": 0.2,
12
  "learning_rate": 1.2000000000000002e-06,
13
+ "loss": 0.5006,
14
  "step": 25
15
  },
16
  {
17
  "epoch": 0.4,
18
+ "learning_rate": 2.4000000000000003e-06,
19
+ "loss": 0.2673,
20
  "step": 50
21
  },
22
  {
23
  "epoch": 0.6,
24
+ "learning_rate": 3.6000000000000003e-06,
25
+ "loss": 0.2076,
26
  "step": 75
27
  },
28
  {
29
  "epoch": 0.8,
30
+ "learning_rate": 4.85e-06,
31
+ "loss": 0.2199,
32
  "step": 100
33
  },
34
  {
35
  "epoch": 1.01,
36
+ "learning_rate": 6.1e-06,
37
+ "loss": 0.2007,
38
  "step": 125
39
  },
40
  {
41
  "epoch": 1.21,
42
+ "learning_rate": 7.350000000000001e-06,
43
+ "loss": 0.1242,
44
  "step": 150
45
  },
46
  {
47
  "epoch": 1.41,
48
+ "learning_rate": 8.6e-06,
49
+ "loss": 0.1211,
50
  "step": 175
51
  },
52
  {
53
  "epoch": 1.61,
54
+ "learning_rate": 9.85e-06,
55
+ "loss": 0.1206,
56
  "step": 200
57
  },
58
  {
59
  "epoch": 1.81,
60
+ "learning_rate": 9.877777777777778e-06,
61
+ "loss": 0.1183,
62
  "step": 225
63
  },
64
  {
65
  "epoch": 2.02,
66
+ "learning_rate": 9.73888888888889e-06,
67
+ "loss": 0.1292,
68
  "step": 250
69
  },
70
  {
71
  "epoch": 2.22,
72
+ "learning_rate": 9.605555555555556e-06,
73
+ "loss": 0.049,
74
  "step": 275
75
  },
76
  {
77
  "epoch": 2.42,
78
+ "learning_rate": 9.466666666666667e-06,
79
+ "loss": 0.0546,
80
  "step": 300
81
  },
82
  {
83
  "epoch": 2.62,
84
+ "learning_rate": 9.32777777777778e-06,
85
  "loss": 0.0605,
86
  "step": 325
87
  },
88
  {
89
  "epoch": 2.82,
90
+ "learning_rate": 9.188888888888889e-06,
91
+ "loss": 0.0569,
92
  "step": 350
93
  },
94
  {
95
  "epoch": 3.02,
96
+ "learning_rate": 9.050000000000001e-06,
97
+ "loss": 0.0469,
98
  "step": 375
99
  },
100
  {
101
  "epoch": 3.22,
102
+ "learning_rate": 8.91111111111111e-06,
103
+ "loss": 0.0267,
104
  "step": 400
105
  },
106
  {
107
  "epoch": 3.42,
108
+ "learning_rate": 8.772222222222223e-06,
109
+ "loss": 0.0254,
110
  "step": 425
111
  },
112
  {
113
  "epoch": 3.62,
114
+ "learning_rate": 8.633333333333334e-06,
115
+ "loss": 0.0272,
116
  "step": 450
117
  },
118
  {
119
  "epoch": 3.82,
120
+ "learning_rate": 8.494444444444445e-06,
121
+ "loss": 0.0274,
122
  "step": 475
123
  },
124
  {
125
  "epoch": 4.03,
126
+ "learning_rate": 8.355555555555556e-06,
127
+ "loss": 0.0302,
128
  "step": 500
129
  },
130
  {
131
  "epoch": 4.03,
132
+ "eval_loss": 0.2971087098121643,
133
+ "eval_runtime": 2340.0934,
134
+ "eval_samples_per_second": 0.55,
135
+ "eval_steps_per_second": 0.55,
136
+ "eval_wer": 16.289164911677485,
137
  "step": 500
138
  },
139
  {
140
  "epoch": 4.23,
141
+ "learning_rate": 8.216666666666667e-06,
142
+ "loss": 0.0143,
143
  "step": 525
144
  },
145
  {
146
  "epoch": 4.43,
147
+ "learning_rate": 8.077777777777778e-06,
148
+ "loss": 0.0142,
149
  "step": 550
150
  },
151
  {
152
  "epoch": 4.63,
153
+ "learning_rate": 7.938888888888889e-06,
154
+ "loss": 0.0159,
155
  "step": 575
156
  },
157
  {
158
  "epoch": 4.83,
159
+ "learning_rate": 7.800000000000002e-06,
160
+ "loss": 0.0144,
161
  "step": 600
162
  },
163
  {
164
  "epoch": 5.04,
165
+ "learning_rate": 7.66111111111111e-06,
166
+ "loss": 0.0137,
167
  "step": 625
168
  },
169
  {
170
  "epoch": 5.24,
171
+ "learning_rate": 7.5222222222222226e-06,
172
+ "loss": 0.0094,
173
  "step": 650
174
  },
175
  {
176
  "epoch": 5.44,
177
+ "learning_rate": 7.3833333333333335e-06,
178
+ "loss": 0.0103,
179
  "step": 675
180
  },
181
  {
182
  "epoch": 5.64,
183
+ "learning_rate": 7.244444444444445e-06,
184
+ "loss": 0.0095,
185
  "step": 700
186
  },
187
  {
188
  "epoch": 5.84,
189
+ "learning_rate": 7.105555555555556e-06,
190
+ "loss": 0.009,
191
  "step": 725
192
  },
193
  {
194
  "epoch": 6.05,
195
+ "learning_rate": 6.966666666666667e-06,
196
+ "loss": 0.0088,
197
  "step": 750
198
  },
199
  {
200
  "epoch": 6.25,
201
+ "learning_rate": 6.827777777777779e-06,
202
+ "loss": 0.0065,
203
  "step": 775
204
  },
205
  {
206
  "epoch": 6.45,
207
+ "learning_rate": 6.688888888888889e-06,
208
+ "loss": 0.0055,
209
  "step": 800
210
  },
211
  {
212
  "epoch": 6.65,
213
+ "learning_rate": 6.550000000000001e-06,
214
+ "loss": 0.0074,
215
  "step": 825
216
  },
217
  {
218
  "epoch": 6.85,
219
+ "learning_rate": 6.411111111111111e-06,
220
+ "loss": 0.0049,
221
  "step": 850
222
  },
223
  {
224
  "epoch": 7.06,
225
+ "learning_rate": 6.272222222222223e-06,
226
+ "loss": 0.0052,
227
  "step": 875
228
  },
229
  {
230
  "epoch": 7.26,
231
+ "learning_rate": 6.133333333333334e-06,
232
+ "loss": 0.0037,
233
  "step": 900
234
  },
235
  {
236
  "epoch": 7.46,
237
+ "learning_rate": 5.9944444444444446e-06,
238
+ "loss": 0.0034,
239
  "step": 925
240
  },
241
  {
242
  "epoch": 7.66,
243
+ "learning_rate": 5.855555555555556e-06,
244
+ "loss": 0.0032,
245
  "step": 950
246
  },
247
  {
248
  "epoch": 7.86,
249
+ "learning_rate": 5.716666666666667e-06,
250
+ "loss": 0.0035,
251
  "step": 975
252
  },
253
  {
254
  "epoch": 8.06,
255
+ "learning_rate": 5.577777777777778e-06,
256
+ "loss": 0.0042,
257
  "step": 1000
258
  },
259
  {
260
  "epoch": 8.06,
261
+ "eval_loss": 0.3406260907649994,
262
+ "eval_runtime": 2343.2491,
263
+ "eval_samples_per_second": 0.549,
264
+ "eval_steps_per_second": 0.549,
265
+ "eval_wer": 15.855073709545708,
266
  "step": 1000
267
  },
268
  {
269
  "epoch": 8.26,
270
+ "learning_rate": 5.438888888888889e-06,
271
+ "loss": 0.0023,
272
  "step": 1025
273
  },
274
  {
275
  "epoch": 8.46,
276
+ "learning_rate": 5.300000000000001e-06,
277
+ "loss": 0.0036,
278
  "step": 1050
279
  },
280
  {
281
  "epoch": 8.66,
282
+ "learning_rate": 5.161111111111111e-06,
283
+ "loss": 0.0027,
284
  "step": 1075
285
  },
286
  {
287
  "epoch": 8.86,
288
+ "learning_rate": 5.022222222222223e-06,
289
+ "loss": 0.0027,
290
  "step": 1100
291
  },
292
  {
293
  "epoch": 9.07,
294
+ "learning_rate": 4.883333333333334e-06,
295
+ "loss": 0.002,
296
  "step": 1125
297
  },
298
  {
299
  "epoch": 9.27,
300
+ "learning_rate": 4.744444444444445e-06,
301
+ "loss": 0.0022,
302
  "step": 1150
303
  },
304
  {
305
  "epoch": 9.47,
306
+ "learning_rate": 4.605555555555556e-06,
307
+ "loss": 0.002,
308
  "step": 1175
309
  },
310
  {
311
  "epoch": 9.67,
312
+ "learning_rate": 4.4666666666666665e-06,
313
+ "loss": 0.0019,
314
  "step": 1200
315
  },
316
  {
317
  "epoch": 9.87,
318
+ "learning_rate": 4.327777777777778e-06,
319
+ "loss": 0.0019,
320
  "step": 1225
321
  },
322
  {
323
  "epoch": 10.08,
324
+ "learning_rate": 4.188888888888889e-06,
325
+ "loss": 0.0017,
326
  "step": 1250
327
  },
328
  {
329
  "epoch": 10.28,
330
+ "learning_rate": 4.05e-06,
331
+ "loss": 0.0013,
332
  "step": 1275
333
  },
334
  {
335
  "epoch": 10.48,
336
+ "learning_rate": 3.911111111111112e-06,
337
+ "loss": 0.0026,
338
  "step": 1300
339
  },
340
  {
341
  "epoch": 10.68,
342
+ "learning_rate": 3.7722222222222225e-06,
343
+ "loss": 0.0018,
344
  "step": 1325
345
  },
346
  {
347
  "epoch": 10.88,
348
+ "learning_rate": 3.633333333333334e-06,
349
+ "loss": 0.0024,
350
  "step": 1350
351
  },
352
  {
353
  "epoch": 11.09,
354
+ "learning_rate": 3.4944444444444448e-06,
355
+ "loss": 0.0015,
356
  "step": 1375
357
  },
358
  {
359
  "epoch": 11.29,
360
+ "learning_rate": 3.3555555555555557e-06,
361
+ "loss": 0.0012,
362
  "step": 1400
363
  },
364
  {
365
  "epoch": 11.49,
366
+ "learning_rate": 3.2166666666666666e-06,
367
+ "loss": 0.0012,
368
  "step": 1425
369
  },
370
  {
371
  "epoch": 11.69,
372
+ "learning_rate": 3.077777777777778e-06,
373
+ "loss": 0.0017,
374
  "step": 1450
375
  },
376
  {
377
  "epoch": 11.89,
378
+ "learning_rate": 2.938888888888889e-06,
379
+ "loss": 0.0019,
380
  "step": 1475
381
  },
382
  {
383
  "epoch": 12.1,
384
+ "learning_rate": 2.8000000000000003e-06,
385
+ "loss": 0.0017,
386
  "step": 1500
387
  },
388
  {
389
  "epoch": 12.1,
390
+ "eval_loss": 0.37141528725624084,
391
+ "eval_runtime": 2358.6091,
392
+ "eval_samples_per_second": 0.545,
393
+ "eval_steps_per_second": 0.545,
394
+ "eval_wer": 15.558516353633904,
395
  "step": 1500
396
  },
397
  {
398
  "epoch": 12.3,
399
+ "learning_rate": 2.6611111111111117e-06,
400
+ "loss": 0.0012,
401
  "step": 1525
402
  },
403
  {
404
  "epoch": 12.5,
405
+ "learning_rate": 2.5222222222222226e-06,
406
+ "loss": 0.0016,
407
  "step": 1550
408
  },
409
  {
410
  "epoch": 12.7,
411
+ "learning_rate": 2.3833333333333335e-06,
412
+ "loss": 0.0016,
413
  "step": 1575
414
  },
415
  {
416
  "epoch": 12.9,
417
+ "learning_rate": 2.2444444444444445e-06,
418
+ "loss": 0.0011,
419
  "step": 1600
420
  },
421
  {
422
  "epoch": 13.1,
423
+ "learning_rate": 2.105555555555556e-06,
424
+ "loss": 0.0012,
425
  "step": 1625
426
  },
427
  {
428
  "epoch": 13.3,
429
+ "learning_rate": 1.9666666666666668e-06,
430
+ "loss": 0.0011,
431
  "step": 1650
432
  },
433
  {
434
  "epoch": 13.5,
435
+ "learning_rate": 1.8277777777777781e-06,
436
+ "loss": 0.0014,
437
  "step": 1675
438
  },
439
  {
440
  "epoch": 13.7,
441
+ "learning_rate": 1.688888888888889e-06,
442
+ "loss": 0.0011,
443
  "step": 1700
444
  },
445
  {
446
  "epoch": 13.9,
447
+ "learning_rate": 1.5500000000000002e-06,
448
+ "loss": 0.0014,
449
  "step": 1725
450
  },
451
  {
452
  "epoch": 14.11,
453
+ "learning_rate": 1.4111111111111111e-06,
454
+ "loss": 0.0013,
455
  "step": 1750
456
  },
457
  {
458
  "epoch": 14.31,
459
+ "learning_rate": 1.2722222222222223e-06,
460
+ "loss": 0.0012,
461
  "step": 1775
462
  },
463
  {
464
  "epoch": 14.51,
465
+ "learning_rate": 1.1333333333333334e-06,
466
+ "loss": 0.001,
467
  "step": 1800
468
  },
469
  {
470
  "epoch": 14.71,
471
+ "learning_rate": 9.944444444444446e-07,
472
+ "loss": 0.001,
473
  "step": 1825
474
  },
475
  {
476
  "epoch": 14.91,
477
+ "learning_rate": 8.555555555555556e-07,
478
+ "loss": 0.0012,
479
  "step": 1850
480
  },
481
  {
482
  "epoch": 15.12,
483
+ "learning_rate": 7.166666666666668e-07,
484
+ "loss": 0.0011,
485
  "step": 1875
486
  },
487
  {
488
  "epoch": 15.32,
489
+ "learning_rate": 5.777777777777778e-07,
490
+ "loss": 0.0011,
491
  "step": 1900
492
  },
493
  {
494
  "epoch": 15.52,
495
+ "learning_rate": 4.388888888888889e-07,
496
+ "loss": 0.0009,
497
  "step": 1925
498
  },
499
  {
500
  "epoch": 15.72,
501
+ "learning_rate": 3.0000000000000004e-07,
502
+ "loss": 0.0012,
503
  "step": 1950
504
  },
505
  {
506
  "epoch": 15.92,
507
+ "learning_rate": 1.6111111111111113e-07,
508
+ "loss": 0.0012,
509
  "step": 1975
510
  },
511
  {
512
  "epoch": 16.13,
513
+ "learning_rate": 2.2222222222222224e-08,
514
+ "loss": 0.0009,
515
  "step": 2000
516
  },
517
  {
518
  "epoch": 16.13,
519
+ "eval_loss": 0.393410325050354,
520
+ "eval_runtime": 2357.7409,
521
+ "eval_samples_per_second": 0.545,
522
+ "eval_steps_per_second": 0.545,
523
+ "eval_wer": 15.644475007521383,
524
  "step": 2000
525
  },
526
  {
527
+ "epoch": 16.13,
528
+ "step": 2000,
529
+ "total_flos": 1.36902067421184e+20,
530
+ "train_loss": 0.03306677715945989,
531
+ "train_runtime": 38266.3528,
532
+ "train_samples_per_second": 1.672,
533
+ "train_steps_per_second": 0.052
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
  }
535
  ],
536
+ "max_steps": 2000,
537
+ "num_train_epochs": 17,
538
+ "total_flos": 1.36902067421184e+20,
539
  "trial_name": null,
540
  "trial_params": null
541
  }