whisper-large-v3-eu / trainer_state.json
xezpeleta's picture
End of training
9b7997b verified
{
"best_metric": 6.544273760459599,
"best_model_checkpoint": "./checkpoint-19500",
"epoch": 1.0,
"eval_steps": 500,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00125,
"grad_norm": 10.480854034423828,
"learning_rate": 1.0499999999999999e-07,
"loss": 1.4117,
"step": 25
},
{
"epoch": 0.0025,
"grad_norm": 9.908479690551758,
"learning_rate": 2.1437499999999999e-07,
"loss": 1.4142,
"step": 50
},
{
"epoch": 0.00375,
"grad_norm": 7.737735748291016,
"learning_rate": 3.2374999999999997e-07,
"loss": 1.2755,
"step": 75
},
{
"epoch": 0.005,
"grad_norm": 5.847314834594727,
"learning_rate": 4.33125e-07,
"loss": 0.8497,
"step": 100
},
{
"epoch": 0.00625,
"grad_norm": 5.064878463745117,
"learning_rate": 5.425e-07,
"loss": 0.5289,
"step": 125
},
{
"epoch": 0.0075,
"grad_norm": 4.789758205413818,
"learning_rate": 6.518749999999999e-07,
"loss": 0.5181,
"step": 150
},
{
"epoch": 0.00875,
"grad_norm": 4.744896411895752,
"learning_rate": 7.612499999999999e-07,
"loss": 0.3765,
"step": 175
},
{
"epoch": 0.01,
"grad_norm": 4.427361965179443,
"learning_rate": 8.706249999999999e-07,
"loss": 0.3585,
"step": 200
},
{
"epoch": 0.01125,
"grad_norm": 4.670985698699951,
"learning_rate": 9.8e-07,
"loss": 0.3219,
"step": 225
},
{
"epoch": 0.0125,
"grad_norm": 3.8691747188568115,
"learning_rate": 1.0893749999999998e-06,
"loss": 0.3409,
"step": 250
},
{
"epoch": 0.01375,
"grad_norm": 4.622318267822266,
"learning_rate": 1.19875e-06,
"loss": 0.2899,
"step": 275
},
{
"epoch": 0.015,
"grad_norm": 3.7900593280792236,
"learning_rate": 1.3081249999999999e-06,
"loss": 0.2834,
"step": 300
},
{
"epoch": 0.01625,
"grad_norm": 3.9323770999908447,
"learning_rate": 1.4175e-06,
"loss": 0.2643,
"step": 325
},
{
"epoch": 0.0175,
"grad_norm": 3.708969831466675,
"learning_rate": 1.5268749999999997e-06,
"loss": 0.2827,
"step": 350
},
{
"epoch": 0.01875,
"grad_norm": 3.707580804824829,
"learning_rate": 1.6362499999999998e-06,
"loss": 0.274,
"step": 375
},
{
"epoch": 0.02,
"grad_norm": 3.8519480228424072,
"learning_rate": 1.745625e-06,
"loss": 0.2568,
"step": 400
},
{
"epoch": 0.02125,
"grad_norm": 4.571149826049805,
"learning_rate": 1.8549999999999998e-06,
"loss": 0.2376,
"step": 425
},
{
"epoch": 0.0225,
"grad_norm": 3.2112503051757812,
"learning_rate": 1.9643749999999997e-06,
"loss": 0.2212,
"step": 450
},
{
"epoch": 0.02375,
"grad_norm": 6.173221588134766,
"learning_rate": 2.07375e-06,
"loss": 0.2238,
"step": 475
},
{
"epoch": 0.025,
"grad_norm": 4.781201362609863,
"learning_rate": 2.183125e-06,
"loss": 0.2854,
"step": 500
},
{
"epoch": 0.025,
"eval_loss": 0.41938766837120056,
"eval_runtime": 531.5621,
"eval_samples_per_second": 3.181,
"eval_steps_per_second": 0.399,
"eval_wer": 25.88984638441364,
"step": 500
},
{
"epoch": 0.02625,
"grad_norm": 4.559605598449707,
"learning_rate": 2.2925e-06,
"loss": 0.2996,
"step": 525
},
{
"epoch": 0.0275,
"grad_norm": 5.1446852684021,
"learning_rate": 2.401875e-06,
"loss": 0.2791,
"step": 550
},
{
"epoch": 0.02875,
"grad_norm": 4.178796768188477,
"learning_rate": 2.5112499999999995e-06,
"loss": 0.3061,
"step": 575
},
{
"epoch": 0.03,
"grad_norm": 4.030816555023193,
"learning_rate": 2.6206249999999996e-06,
"loss": 0.2768,
"step": 600
},
{
"epoch": 0.03125,
"grad_norm": 4.405904769897461,
"learning_rate": 2.7299999999999997e-06,
"loss": 0.2678,
"step": 625
},
{
"epoch": 0.0325,
"grad_norm": 3.5179622173309326,
"learning_rate": 2.839375e-06,
"loss": 0.2363,
"step": 650
},
{
"epoch": 0.03375,
"grad_norm": 3.767529010772705,
"learning_rate": 2.94875e-06,
"loss": 0.219,
"step": 675
},
{
"epoch": 0.035,
"grad_norm": 2.898439645767212,
"learning_rate": 3.0581249999999996e-06,
"loss": 0.1913,
"step": 700
},
{
"epoch": 0.03625,
"grad_norm": 3.3581595420837402,
"learning_rate": 3.1674999999999997e-06,
"loss": 0.173,
"step": 725
},
{
"epoch": 0.0375,
"grad_norm": 2.872340202331543,
"learning_rate": 3.276875e-06,
"loss": 0.176,
"step": 750
},
{
"epoch": 0.03875,
"grad_norm": 3.1199734210968018,
"learning_rate": 3.38625e-06,
"loss": 0.1808,
"step": 775
},
{
"epoch": 0.04,
"grad_norm": 2.626908779144287,
"learning_rate": 3.495625e-06,
"loss": 0.1802,
"step": 800
},
{
"epoch": 0.04125,
"grad_norm": 2.776141881942749,
"learning_rate": 3.6049999999999994e-06,
"loss": 0.1761,
"step": 825
},
{
"epoch": 0.0425,
"grad_norm": 3.149322509765625,
"learning_rate": 3.7143749999999995e-06,
"loss": 0.1569,
"step": 850
},
{
"epoch": 0.04375,
"grad_norm": 2.776301145553589,
"learning_rate": 3.82375e-06,
"loss": 0.1607,
"step": 875
},
{
"epoch": 0.045,
"grad_norm": 2.8884706497192383,
"learning_rate": 3.933125e-06,
"loss": 0.1582,
"step": 900
},
{
"epoch": 0.04625,
"grad_norm": 3.991647720336914,
"learning_rate": 4.0425e-06,
"loss": 0.1511,
"step": 925
},
{
"epoch": 0.0475,
"grad_norm": 2.892364740371704,
"learning_rate": 4.151874999999999e-06,
"loss": 0.1434,
"step": 950
},
{
"epoch": 0.04875,
"grad_norm": 3.7825698852539062,
"learning_rate": 4.261249999999999e-06,
"loss": 0.148,
"step": 975
},
{
"epoch": 0.05,
"grad_norm": 2.511293649673462,
"learning_rate": 4.370624999999999e-06,
"loss": 0.1425,
"step": 1000
},
{
"epoch": 0.05,
"eval_loss": 0.39225178956985474,
"eval_runtime": 528.4404,
"eval_samples_per_second": 3.2,
"eval_steps_per_second": 0.401,
"eval_wer": 20.507056325715002,
"step": 1000
},
{
"epoch": 0.05125,
"grad_norm": 1.8921111822128296,
"learning_rate": 4.369473684210526e-06,
"loss": 0.1395,
"step": 1025
},
{
"epoch": 0.0525,
"grad_norm": 3.18829607963562,
"learning_rate": 4.363717105263158e-06,
"loss": 0.1294,
"step": 1050
},
{
"epoch": 0.05375,
"grad_norm": 2.508878231048584,
"learning_rate": 4.357960526315789e-06,
"loss": 0.1366,
"step": 1075
},
{
"epoch": 0.055,
"grad_norm": 2.201958179473877,
"learning_rate": 4.352203947368421e-06,
"loss": 0.14,
"step": 1100
},
{
"epoch": 0.05625,
"grad_norm": 2.756673574447632,
"learning_rate": 4.346447368421052e-06,
"loss": 0.1355,
"step": 1125
},
{
"epoch": 0.0575,
"grad_norm": 3.084169864654541,
"learning_rate": 4.340690789473684e-06,
"loss": 0.1278,
"step": 1150
},
{
"epoch": 0.05875,
"grad_norm": 2.486377239227295,
"learning_rate": 4.334934210526315e-06,
"loss": 0.1298,
"step": 1175
},
{
"epoch": 0.06,
"grad_norm": 3.5559706687927246,
"learning_rate": 4.329177631578947e-06,
"loss": 0.1352,
"step": 1200
},
{
"epoch": 0.06125,
"grad_norm": 2.6353018283843994,
"learning_rate": 4.323421052631579e-06,
"loss": 0.1163,
"step": 1225
},
{
"epoch": 0.0625,
"grad_norm": 2.8629567623138428,
"learning_rate": 4.31766447368421e-06,
"loss": 0.1199,
"step": 1250
},
{
"epoch": 0.06375,
"grad_norm": 2.9020206928253174,
"learning_rate": 4.311907894736842e-06,
"loss": 0.1206,
"step": 1275
},
{
"epoch": 0.065,
"grad_norm": 2.4626991748809814,
"learning_rate": 4.306151315789473e-06,
"loss": 0.1395,
"step": 1300
},
{
"epoch": 0.06625,
"grad_norm": 2.9234840869903564,
"learning_rate": 4.300394736842105e-06,
"loss": 0.1269,
"step": 1325
},
{
"epoch": 0.0675,
"grad_norm": 3.017625570297241,
"learning_rate": 4.294638157894737e-06,
"loss": 0.1228,
"step": 1350
},
{
"epoch": 0.06875,
"grad_norm": 2.5392937660217285,
"learning_rate": 4.288881578947368e-06,
"loss": 0.1273,
"step": 1375
},
{
"epoch": 0.07,
"grad_norm": 3.2523694038391113,
"learning_rate": 4.283125e-06,
"loss": 0.1242,
"step": 1400
},
{
"epoch": 0.07125,
"grad_norm": 2.633652448654175,
"learning_rate": 4.277368421052632e-06,
"loss": 0.1341,
"step": 1425
},
{
"epoch": 0.0725,
"grad_norm": 3.952681064605713,
"learning_rate": 4.271611842105263e-06,
"loss": 0.1588,
"step": 1450
},
{
"epoch": 0.07375,
"grad_norm": 3.9815685749053955,
"learning_rate": 4.265855263157895e-06,
"loss": 0.1879,
"step": 1475
},
{
"epoch": 0.075,
"grad_norm": 3.197030544281006,
"learning_rate": 4.260098684210526e-06,
"loss": 0.2199,
"step": 1500
},
{
"epoch": 0.075,
"eval_loss": 0.3290639817714691,
"eval_runtime": 535.231,
"eval_samples_per_second": 3.159,
"eval_steps_per_second": 0.396,
"eval_wer": 17.478456350693143,
"step": 1500
},
{
"epoch": 0.07625,
"grad_norm": 3.8294057846069336,
"learning_rate": 4.254342105263158e-06,
"loss": 0.205,
"step": 1525
},
{
"epoch": 0.0775,
"grad_norm": 3.339564085006714,
"learning_rate": 4.248585526315789e-06,
"loss": 0.1793,
"step": 1550
},
{
"epoch": 0.07875,
"grad_norm": 4.41719913482666,
"learning_rate": 4.242828947368421e-06,
"loss": 0.1903,
"step": 1575
},
{
"epoch": 0.08,
"grad_norm": 4.329945087432861,
"learning_rate": 4.237072368421052e-06,
"loss": 0.2487,
"step": 1600
},
{
"epoch": 0.08125,
"grad_norm": 2.858635425567627,
"learning_rate": 4.231315789473684e-06,
"loss": 0.16,
"step": 1625
},
{
"epoch": 0.0825,
"grad_norm": 2.6474554538726807,
"learning_rate": 4.225559210526316e-06,
"loss": 0.1294,
"step": 1650
},
{
"epoch": 0.08375,
"grad_norm": 2.6311450004577637,
"learning_rate": 4.219802631578947e-06,
"loss": 0.1199,
"step": 1675
},
{
"epoch": 0.085,
"grad_norm": 2.472925901412964,
"learning_rate": 4.214046052631579e-06,
"loss": 0.1106,
"step": 1700
},
{
"epoch": 0.08625,
"grad_norm": 2.1684815883636475,
"learning_rate": 4.20828947368421e-06,
"loss": 0.1081,
"step": 1725
},
{
"epoch": 0.0875,
"grad_norm": 2.2405142784118652,
"learning_rate": 4.202532894736842e-06,
"loss": 0.1024,
"step": 1750
},
{
"epoch": 0.08875,
"grad_norm": 3.28480863571167,
"learning_rate": 4.196776315789474e-06,
"loss": 0.1395,
"step": 1775
},
{
"epoch": 0.09,
"grad_norm": 2.734311819076538,
"learning_rate": 4.191019736842105e-06,
"loss": 0.1685,
"step": 1800
},
{
"epoch": 0.09125,
"grad_norm": 3.3384852409362793,
"learning_rate": 4.185263157894737e-06,
"loss": 0.179,
"step": 1825
},
{
"epoch": 0.0925,
"grad_norm": 4.151054859161377,
"learning_rate": 4.1795065789473686e-06,
"loss": 0.1863,
"step": 1850
},
{
"epoch": 0.09375,
"grad_norm": 3.854214668273926,
"learning_rate": 4.17375e-06,
"loss": 0.1753,
"step": 1875
},
{
"epoch": 0.095,
"grad_norm": 3.3321709632873535,
"learning_rate": 4.1679934210526316e-06,
"loss": 0.1684,
"step": 1900
},
{
"epoch": 0.09625,
"grad_norm": 2.8302998542785645,
"learning_rate": 4.162236842105263e-06,
"loss": 0.1818,
"step": 1925
},
{
"epoch": 0.0975,
"grad_norm": 5.4296555519104,
"learning_rate": 4.156480263157895e-06,
"loss": 0.5523,
"step": 1950
},
{
"epoch": 0.09875,
"grad_norm": 3.8675997257232666,
"learning_rate": 4.1507236842105265e-06,
"loss": 0.3352,
"step": 1975
},
{
"epoch": 0.1,
"grad_norm": 3.9055581092834473,
"learning_rate": 4.144967105263158e-06,
"loss": 0.2343,
"step": 2000
},
{
"epoch": 0.1,
"eval_loss": 0.2860749065876007,
"eval_runtime": 530.1633,
"eval_samples_per_second": 3.19,
"eval_steps_per_second": 0.4,
"eval_wer": 14.13138503809167,
"step": 2000
},
{
"epoch": 0.10125,
"grad_norm": 3.413243293762207,
"learning_rate": 4.1392105263157895e-06,
"loss": 0.2233,
"step": 2025
},
{
"epoch": 0.1025,
"grad_norm": 3.1129419803619385,
"learning_rate": 4.133453947368421e-06,
"loss": 0.2076,
"step": 2050
},
{
"epoch": 0.10375,
"grad_norm": 3.0855767726898193,
"learning_rate": 4.1276973684210525e-06,
"loss": 0.1675,
"step": 2075
},
{
"epoch": 0.105,
"grad_norm": 2.5053539276123047,
"learning_rate": 4.121940789473684e-06,
"loss": 0.1291,
"step": 2100
},
{
"epoch": 0.10625,
"grad_norm": 2.078958511352539,
"learning_rate": 4.1161842105263155e-06,
"loss": 0.1036,
"step": 2125
},
{
"epoch": 0.1075,
"grad_norm": 2.436898708343506,
"learning_rate": 4.110427631578947e-06,
"loss": 0.1153,
"step": 2150
},
{
"epoch": 0.10875,
"grad_norm": 2.3834900856018066,
"learning_rate": 4.1046710526315786e-06,
"loss": 0.0943,
"step": 2175
},
{
"epoch": 0.11,
"grad_norm": 2.070406913757324,
"learning_rate": 4.09891447368421e-06,
"loss": 0.0898,
"step": 2200
},
{
"epoch": 0.11125,
"grad_norm": 2.0004026889801025,
"learning_rate": 4.0931578947368416e-06,
"loss": 0.0912,
"step": 2225
},
{
"epoch": 0.1125,
"grad_norm": 2.4464359283447266,
"learning_rate": 4.0874013157894735e-06,
"loss": 0.0907,
"step": 2250
},
{
"epoch": 0.11375,
"grad_norm": 2.8847742080688477,
"learning_rate": 4.081644736842105e-06,
"loss": 0.0978,
"step": 2275
},
{
"epoch": 0.115,
"grad_norm": 2.167893171310425,
"learning_rate": 4.0758881578947365e-06,
"loss": 0.0968,
"step": 2300
},
{
"epoch": 0.11625,
"grad_norm": 1.575804352760315,
"learning_rate": 4.0701315789473684e-06,
"loss": 0.0976,
"step": 2325
},
{
"epoch": 0.1175,
"grad_norm": 2.3064370155334473,
"learning_rate": 4.0643749999999995e-06,
"loss": 0.0966,
"step": 2350
},
{
"epoch": 0.11875,
"grad_norm": 1.9859708547592163,
"learning_rate": 4.0586184210526314e-06,
"loss": 0.0958,
"step": 2375
},
{
"epoch": 0.12,
"grad_norm": 1.7534265518188477,
"learning_rate": 4.052861842105263e-06,
"loss": 0.0966,
"step": 2400
},
{
"epoch": 0.12125,
"grad_norm": 3.8807549476623535,
"learning_rate": 4.0471052631578945e-06,
"loss": 0.1367,
"step": 2425
},
{
"epoch": 0.1225,
"grad_norm": 3.933382987976074,
"learning_rate": 4.041348684210526e-06,
"loss": 0.1445,
"step": 2450
},
{
"epoch": 0.12375,
"grad_norm": 3.3107643127441406,
"learning_rate": 4.0355921052631575e-06,
"loss": 0.1486,
"step": 2475
},
{
"epoch": 0.125,
"grad_norm": 2.699190139770508,
"learning_rate": 4.029835526315789e-06,
"loss": 0.1391,
"step": 2500
},
{
"epoch": 0.125,
"eval_loss": 0.2906411290168762,
"eval_runtime": 531.8376,
"eval_samples_per_second": 3.18,
"eval_steps_per_second": 0.399,
"eval_wer": 13.31335081803422,
"step": 2500
},
{
"epoch": 0.12625,
"grad_norm": 3.5841128826141357,
"learning_rate": 4.0240789473684205e-06,
"loss": 0.1628,
"step": 2525
},
{
"epoch": 0.1275,
"grad_norm": 3.2463104724884033,
"learning_rate": 4.018322368421052e-06,
"loss": 0.1705,
"step": 2550
},
{
"epoch": 0.12875,
"grad_norm": 5.094871520996094,
"learning_rate": 4.0125657894736835e-06,
"loss": 0.1663,
"step": 2575
},
{
"epoch": 0.13,
"grad_norm": 3.0208804607391357,
"learning_rate": 4.0068092105263154e-06,
"loss": 0.1712,
"step": 2600
},
{
"epoch": 0.13125,
"grad_norm": 3.4919967651367188,
"learning_rate": 4.0010526315789465e-06,
"loss": 0.1498,
"step": 2625
},
{
"epoch": 0.1325,
"grad_norm": 3.4352219104766846,
"learning_rate": 3.9952960526315784e-06,
"loss": 0.1423,
"step": 2650
},
{
"epoch": 0.13375,
"grad_norm": 3.63608455657959,
"learning_rate": 3.98953947368421e-06,
"loss": 0.1565,
"step": 2675
},
{
"epoch": 0.135,
"grad_norm": 3.6155622005462646,
"learning_rate": 3.9837828947368414e-06,
"loss": 0.1553,
"step": 2700
},
{
"epoch": 0.13625,
"grad_norm": 3.4833076000213623,
"learning_rate": 3.978026315789473e-06,
"loss": 0.1416,
"step": 2725
},
{
"epoch": 0.1375,
"grad_norm": 3.147080421447754,
"learning_rate": 3.9722697368421045e-06,
"loss": 0.1469,
"step": 2750
},
{
"epoch": 0.13875,
"grad_norm": 2.891146659851074,
"learning_rate": 3.966513157894736e-06,
"loss": 0.128,
"step": 2775
},
{
"epoch": 0.14,
"grad_norm": 3.1411890983581543,
"learning_rate": 3.960756578947368e-06,
"loss": 0.1372,
"step": 2800
},
{
"epoch": 0.14125,
"grad_norm": 3.836360216140747,
"learning_rate": 3.954999999999999e-06,
"loss": 0.149,
"step": 2825
},
{
"epoch": 0.1425,
"grad_norm": 4.1377339363098145,
"learning_rate": 3.949243421052631e-06,
"loss": 0.1383,
"step": 2850
},
{
"epoch": 0.14375,
"grad_norm": 3.7640268802642822,
"learning_rate": 3.943486842105263e-06,
"loss": 0.1492,
"step": 2875
},
{
"epoch": 0.145,
"grad_norm": 3.452561855316162,
"learning_rate": 3.937730263157894e-06,
"loss": 0.1288,
"step": 2900
},
{
"epoch": 0.14625,
"grad_norm": 1.9712022542953491,
"learning_rate": 3.931973684210526e-06,
"loss": 0.1137,
"step": 2925
},
{
"epoch": 0.1475,
"grad_norm": 2.0524768829345703,
"learning_rate": 3.926217105263157e-06,
"loss": 0.0973,
"step": 2950
},
{
"epoch": 0.14875,
"grad_norm": 2.3722898960113525,
"learning_rate": 3.920460526315789e-06,
"loss": 0.0915,
"step": 2975
},
{
"epoch": 0.15,
"grad_norm": 2.5048534870147705,
"learning_rate": 3.914703947368421e-06,
"loss": 0.0853,
"step": 3000
},
{
"epoch": 0.15,
"eval_loss": 0.26879894733428955,
"eval_runtime": 531.192,
"eval_samples_per_second": 3.183,
"eval_steps_per_second": 0.399,
"eval_wer": 12.045710003746722,
"step": 3000
},
{
"epoch": 0.15125,
"grad_norm": 2.009464740753174,
"learning_rate": 3.908947368421052e-06,
"loss": 0.1037,
"step": 3025
},
{
"epoch": 0.1525,
"grad_norm": 2.3635034561157227,
"learning_rate": 3.903190789473684e-06,
"loss": 0.0889,
"step": 3050
},
{
"epoch": 0.15375,
"grad_norm": 3.131683111190796,
"learning_rate": 3.897434210526315e-06,
"loss": 0.0895,
"step": 3075
},
{
"epoch": 0.155,
"grad_norm": 2.2032673358917236,
"learning_rate": 3.891677631578947e-06,
"loss": 0.0955,
"step": 3100
},
{
"epoch": 0.15625,
"grad_norm": 1.8079180717468262,
"learning_rate": 3.885921052631578e-06,
"loss": 0.0785,
"step": 3125
},
{
"epoch": 0.1575,
"grad_norm": 2.2879910469055176,
"learning_rate": 3.88016447368421e-06,
"loss": 0.0721,
"step": 3150
},
{
"epoch": 0.15875,
"grad_norm": 2.491487979888916,
"learning_rate": 3.874407894736841e-06,
"loss": 0.0817,
"step": 3175
},
{
"epoch": 0.16,
"grad_norm": 2.996129035949707,
"learning_rate": 3.868651315789473e-06,
"loss": 0.0877,
"step": 3200
},
{
"epoch": 0.16125,
"grad_norm": 2.9992258548736572,
"learning_rate": 3.862894736842104e-06,
"loss": 0.1011,
"step": 3225
},
{
"epoch": 0.1625,
"grad_norm": 2.5464529991149902,
"learning_rate": 3.857138157894736e-06,
"loss": 0.1385,
"step": 3250
},
{
"epoch": 0.16375,
"grad_norm": 2.853933095932007,
"learning_rate": 3.851381578947368e-06,
"loss": 0.1223,
"step": 3275
},
{
"epoch": 0.165,
"grad_norm": 2.3290011882781982,
"learning_rate": 3.845624999999999e-06,
"loss": 0.1236,
"step": 3300
},
{
"epoch": 0.16625,
"grad_norm": 2.61714768409729,
"learning_rate": 3.839868421052631e-06,
"loss": 0.1167,
"step": 3325
},
{
"epoch": 0.1675,
"grad_norm": 3.926612377166748,
"learning_rate": 3.834111842105263e-06,
"loss": 0.1306,
"step": 3350
},
{
"epoch": 0.16875,
"grad_norm": 2.9979617595672607,
"learning_rate": 3.828355263157894e-06,
"loss": 0.1383,
"step": 3375
},
{
"epoch": 0.17,
"grad_norm": 2.879436492919922,
"learning_rate": 3.822598684210526e-06,
"loss": 0.1112,
"step": 3400
},
{
"epoch": 0.17125,
"grad_norm": 2.706355571746826,
"learning_rate": 3.816842105263158e-06,
"loss": 0.1006,
"step": 3425
},
{
"epoch": 0.1725,
"grad_norm": 2.263953685760498,
"learning_rate": 3.811085526315789e-06,
"loss": 0.089,
"step": 3450
},
{
"epoch": 0.17375,
"grad_norm": 3.070748805999756,
"learning_rate": 3.8053289473684207e-06,
"loss": 0.0801,
"step": 3475
},
{
"epoch": 0.175,
"grad_norm": 2.202629566192627,
"learning_rate": 3.799572368421052e-06,
"loss": 0.0866,
"step": 3500
},
{
"epoch": 0.175,
"eval_loss": 0.2575243413448334,
"eval_runtime": 531.9323,
"eval_samples_per_second": 3.179,
"eval_steps_per_second": 0.399,
"eval_wer": 11.471212688897214,
"step": 3500
},
{
"epoch": 0.17625,
"grad_norm": 2.1003735065460205,
"learning_rate": 3.793815789473684e-06,
"loss": 0.0794,
"step": 3525
},
{
"epoch": 0.1775,
"grad_norm": 2.4936602115631104,
"learning_rate": 3.788059210526315e-06,
"loss": 0.0754,
"step": 3550
},
{
"epoch": 0.17875,
"grad_norm": 2.2320945262908936,
"learning_rate": 3.782302631578947e-06,
"loss": 0.0906,
"step": 3575
},
{
"epoch": 0.18,
"grad_norm": 1.4985826015472412,
"learning_rate": 3.7765460526315786e-06,
"loss": 0.0872,
"step": 3600
},
{
"epoch": 0.18125,
"grad_norm": 1.6096969842910767,
"learning_rate": 3.77078947368421e-06,
"loss": 0.0783,
"step": 3625
},
{
"epoch": 0.1825,
"grad_norm": 1.8306738138198853,
"learning_rate": 3.7650328947368416e-06,
"loss": 0.0783,
"step": 3650
},
{
"epoch": 0.18375,
"grad_norm": 1.972235918045044,
"learning_rate": 3.759276315789473e-06,
"loss": 0.0789,
"step": 3675
},
{
"epoch": 0.185,
"grad_norm": 2.0266051292419434,
"learning_rate": 3.753519736842105e-06,
"loss": 0.0708,
"step": 3700
},
{
"epoch": 0.18625,
"grad_norm": 1.6395690441131592,
"learning_rate": 3.7477631578947366e-06,
"loss": 0.0768,
"step": 3725
},
{
"epoch": 0.1875,
"grad_norm": 1.8886572122573853,
"learning_rate": 3.742006578947368e-06,
"loss": 0.0745,
"step": 3750
},
{
"epoch": 0.18875,
"grad_norm": 3.0105178356170654,
"learning_rate": 3.7362499999999996e-06,
"loss": 0.0816,
"step": 3775
},
{
"epoch": 0.19,
"grad_norm": 1.8376508951187134,
"learning_rate": 3.7304934210526315e-06,
"loss": 0.082,
"step": 3800
},
{
"epoch": 0.19125,
"grad_norm": 1.758370041847229,
"learning_rate": 3.7247368421052626e-06,
"loss": 0.0798,
"step": 3825
},
{
"epoch": 0.1925,
"grad_norm": 1.2405736446380615,
"learning_rate": 3.7189802631578945e-06,
"loss": 0.0773,
"step": 3850
},
{
"epoch": 0.19375,
"grad_norm": 1.8085663318634033,
"learning_rate": 3.713223684210526e-06,
"loss": 0.0861,
"step": 3875
},
{
"epoch": 0.195,
"grad_norm": 3.838613986968994,
"learning_rate": 3.7074671052631575e-06,
"loss": 0.1032,
"step": 3900
},
{
"epoch": 0.19625,
"grad_norm": 3.087472438812256,
"learning_rate": 3.701710526315789e-06,
"loss": 0.133,
"step": 3925
},
{
"epoch": 0.1975,
"grad_norm": 2.3854024410247803,
"learning_rate": 3.6959539473684206e-06,
"loss": 0.1366,
"step": 3950
},
{
"epoch": 0.19875,
"grad_norm": 3.235400676727295,
"learning_rate": 3.690197368421052e-06,
"loss": 0.1275,
"step": 3975
},
{
"epoch": 0.2,
"grad_norm": 2.7316720485687256,
"learning_rate": 3.684440789473684e-06,
"loss": 0.1311,
"step": 4000
},
{
"epoch": 0.2,
"eval_loss": 0.24720044434070587,
"eval_runtime": 536.8744,
"eval_samples_per_second": 3.15,
"eval_steps_per_second": 0.395,
"eval_wer": 12.482827525914825,
"step": 4000
},
{
"epoch": 0.20125,
"grad_norm": 2.4120874404907227,
"learning_rate": 3.6786842105263155e-06,
"loss": 0.1215,
"step": 4025
},
{
"epoch": 0.2025,
"grad_norm": 2.5485270023345947,
"learning_rate": 3.672927631578947e-06,
"loss": 0.0983,
"step": 4050
},
{
"epoch": 0.20375,
"grad_norm": 2.2741594314575195,
"learning_rate": 3.667171052631579e-06,
"loss": 0.0764,
"step": 4075
},
{
"epoch": 0.205,
"grad_norm": 1.875857949256897,
"learning_rate": 3.66141447368421e-06,
"loss": 0.0733,
"step": 4100
},
{
"epoch": 0.20625,
"grad_norm": 1.8897082805633545,
"learning_rate": 3.655657894736842e-06,
"loss": 0.0797,
"step": 4125
},
{
"epoch": 0.2075,
"grad_norm": 1.5462270975112915,
"learning_rate": 3.6499013157894735e-06,
"loss": 0.0772,
"step": 4150
},
{
"epoch": 0.20875,
"grad_norm": 2.1055002212524414,
"learning_rate": 3.644144736842105e-06,
"loss": 0.079,
"step": 4175
},
{
"epoch": 0.21,
"grad_norm": 2.8036248683929443,
"learning_rate": 3.6383881578947365e-06,
"loss": 0.0828,
"step": 4200
},
{
"epoch": 0.21125,
"grad_norm": 1.496777892112732,
"learning_rate": 3.6326315789473684e-06,
"loss": 0.0658,
"step": 4225
},
{
"epoch": 0.2125,
"grad_norm": 2.213822364807129,
"learning_rate": 3.6268749999999995e-06,
"loss": 0.0722,
"step": 4250
},
{
"epoch": 0.21375,
"grad_norm": 1.4431771039962769,
"learning_rate": 3.6211184210526314e-06,
"loss": 0.061,
"step": 4275
},
{
"epoch": 0.215,
"grad_norm": 1.6346482038497925,
"learning_rate": 3.6153618421052625e-06,
"loss": 0.0641,
"step": 4300
},
{
"epoch": 0.21625,
"grad_norm": 1.5905380249023438,
"learning_rate": 3.6096052631578944e-06,
"loss": 0.0633,
"step": 4325
},
{
"epoch": 0.2175,
"grad_norm": 2.4848458766937256,
"learning_rate": 3.6038486842105263e-06,
"loss": 0.0738,
"step": 4350
},
{
"epoch": 0.21875,
"grad_norm": 2.568466901779175,
"learning_rate": 3.5980921052631574e-06,
"loss": 0.1123,
"step": 4375
},
{
"epoch": 0.22,
"grad_norm": 2.5104339122772217,
"learning_rate": 3.5923355263157894e-06,
"loss": 0.1179,
"step": 4400
},
{
"epoch": 0.22125,
"grad_norm": 3.769829273223877,
"learning_rate": 3.586578947368421e-06,
"loss": 0.1221,
"step": 4425
},
{
"epoch": 0.2225,
"grad_norm": 2.850048542022705,
"learning_rate": 3.5808223684210524e-06,
"loss": 0.1115,
"step": 4450
},
{
"epoch": 0.22375,
"grad_norm": 2.0328500270843506,
"learning_rate": 3.575065789473684e-06,
"loss": 0.1274,
"step": 4475
},
{
"epoch": 0.225,
"grad_norm": 2.765300750732422,
"learning_rate": 3.569309210526316e-06,
"loss": 0.1338,
"step": 4500
},
{
"epoch": 0.225,
"eval_loss": 0.24367325007915497,
"eval_runtime": 531.4688,
"eval_samples_per_second": 3.182,
"eval_steps_per_second": 0.399,
"eval_wer": 10.990383414512301,
"step": 4500
},
{
"epoch": 0.22625,
"grad_norm": 1.4456897974014282,
"learning_rate": 3.563552631578947e-06,
"loss": 0.0921,
"step": 4525
},
{
"epoch": 0.2275,
"grad_norm": 2.357384443283081,
"learning_rate": 3.557796052631579e-06,
"loss": 0.0728,
"step": 4550
},
{
"epoch": 0.22875,
"grad_norm": 2.2841663360595703,
"learning_rate": 3.55203947368421e-06,
"loss": 0.0703,
"step": 4575
},
{
"epoch": 0.23,
"grad_norm": 1.8975858688354492,
"learning_rate": 3.546282894736842e-06,
"loss": 0.0595,
"step": 4600
},
{
"epoch": 0.23125,
"grad_norm": 1.6614043712615967,
"learning_rate": 3.5405263157894733e-06,
"loss": 0.0684,
"step": 4625
},
{
"epoch": 0.2325,
"grad_norm": 3.0987887382507324,
"learning_rate": 3.534769736842105e-06,
"loss": 0.0643,
"step": 4650
},
{
"epoch": 0.23375,
"grad_norm": 1.869446873664856,
"learning_rate": 3.5290131578947363e-06,
"loss": 0.0612,
"step": 4675
},
{
"epoch": 0.235,
"grad_norm": 1.6360236406326294,
"learning_rate": 3.5232565789473683e-06,
"loss": 0.0627,
"step": 4700
},
{
"epoch": 0.23625,
"grad_norm": 2.188901424407959,
"learning_rate": 3.5174999999999998e-06,
"loss": 0.068,
"step": 4725
},
{
"epoch": 0.2375,
"grad_norm": 1.5851141214370728,
"learning_rate": 3.5117434210526313e-06,
"loss": 0.0702,
"step": 4750
},
{
"epoch": 0.23875,
"grad_norm": 1.9303579330444336,
"learning_rate": 3.5059868421052632e-06,
"loss": 0.0683,
"step": 4775
},
{
"epoch": 0.24,
"grad_norm": 1.8640798330307007,
"learning_rate": 3.5002302631578943e-06,
"loss": 0.0637,
"step": 4800
},
{
"epoch": 0.24125,
"grad_norm": 2.395669937133789,
"learning_rate": 3.4944736842105262e-06,
"loss": 0.0626,
"step": 4825
},
{
"epoch": 0.2425,
"grad_norm": 1.5368024110794067,
"learning_rate": 3.4887171052631573e-06,
"loss": 0.0694,
"step": 4850
},
{
"epoch": 0.24375,
"grad_norm": 2.1346402168273926,
"learning_rate": 3.4829605263157892e-06,
"loss": 0.0734,
"step": 4875
},
{
"epoch": 0.245,
"grad_norm": 2.0883893966674805,
"learning_rate": 3.4772039473684207e-06,
"loss": 0.0659,
"step": 4900
},
{
"epoch": 0.24625,
"grad_norm": 1.6861238479614258,
"learning_rate": 3.4714473684210523e-06,
"loss": 0.0656,
"step": 4925
},
{
"epoch": 0.2475,
"grad_norm": 1.5790470838546753,
"learning_rate": 3.4656907894736838e-06,
"loss": 0.0801,
"step": 4950
},
{
"epoch": 0.24875,
"grad_norm": 1.3223644495010376,
"learning_rate": 3.4599342105263157e-06,
"loss": 0.0806,
"step": 4975
},
{
"epoch": 0.25,
"grad_norm": 1.6931387186050415,
"learning_rate": 3.4541776315789468e-06,
"loss": 0.0748,
"step": 5000
},
{
"epoch": 0.25,
"eval_loss": 0.2556721270084381,
"eval_runtime": 534.5469,
"eval_samples_per_second": 3.163,
"eval_steps_per_second": 0.397,
"eval_wer": 10.709379293118522,
"step": 5000
},
{
"epoch": 0.25125,
"grad_norm": 1.5327143669128418,
"learning_rate": 3.4484210526315787e-06,
"loss": 0.0968,
"step": 5025
},
{
"epoch": 0.2525,
"grad_norm": 1.988226294517517,
"learning_rate": 3.4426644736842106e-06,
"loss": 0.0921,
"step": 5050
},
{
"epoch": 0.25375,
"grad_norm": 2.179086446762085,
"learning_rate": 3.4369078947368417e-06,
"loss": 0.093,
"step": 5075
},
{
"epoch": 0.255,
"grad_norm": 2.4304797649383545,
"learning_rate": 3.4311513157894736e-06,
"loss": 0.0909,
"step": 5100
},
{
"epoch": 0.25625,
"grad_norm": 2.498908281326294,
"learning_rate": 3.4253947368421047e-06,
"loss": 0.1225,
"step": 5125
},
{
"epoch": 0.2575,
"grad_norm": 2.018110752105713,
"learning_rate": 3.4196381578947367e-06,
"loss": 0.1199,
"step": 5150
},
{
"epoch": 0.25875,
"grad_norm": 1.8156744241714478,
"learning_rate": 3.413881578947368e-06,
"loss": 0.1032,
"step": 5175
},
{
"epoch": 0.26,
"grad_norm": 2.395634651184082,
"learning_rate": 3.4081249999999997e-06,
"loss": 0.0842,
"step": 5200
},
{
"epoch": 0.26125,
"grad_norm": 1.8604170083999634,
"learning_rate": 3.402368421052631e-06,
"loss": 0.0753,
"step": 5225
},
{
"epoch": 0.2625,
"grad_norm": 2.186006784439087,
"learning_rate": 3.396611842105263e-06,
"loss": 0.0693,
"step": 5250
},
{
"epoch": 0.26375,
"grad_norm": 2.117950201034546,
"learning_rate": 3.390855263157894e-06,
"loss": 0.0731,
"step": 5275
},
{
"epoch": 0.265,
"grad_norm": 1.442688226699829,
"learning_rate": 3.385098684210526e-06,
"loss": 0.0607,
"step": 5300
},
{
"epoch": 0.26625,
"grad_norm": 2.0623013973236084,
"learning_rate": 3.379342105263157e-06,
"loss": 0.0598,
"step": 5325
},
{
"epoch": 0.2675,
"grad_norm": 1.6096211671829224,
"learning_rate": 3.373585526315789e-06,
"loss": 0.0687,
"step": 5350
},
{
"epoch": 0.26875,
"grad_norm": 1.2381603717803955,
"learning_rate": 3.367828947368421e-06,
"loss": 0.0646,
"step": 5375
},
{
"epoch": 0.27,
"grad_norm": 1.6694140434265137,
"learning_rate": 3.362072368421052e-06,
"loss": 0.0595,
"step": 5400
},
{
"epoch": 0.27125,
"grad_norm": 2.486950159072876,
"learning_rate": 3.356315789473684e-06,
"loss": 0.074,
"step": 5425
},
{
"epoch": 0.2725,
"grad_norm": 1.2931033372879028,
"learning_rate": 3.3505592105263156e-06,
"loss": 0.08,
"step": 5450
},
{
"epoch": 0.27375,
"grad_norm": 2.314680337905884,
"learning_rate": 3.344802631578947e-06,
"loss": 0.0662,
"step": 5475
},
{
"epoch": 0.275,
"grad_norm": 2.413079261779785,
"learning_rate": 3.3390460526315786e-06,
"loss": 0.0821,
"step": 5500
},
{
"epoch": 0.275,
"eval_loss": 0.2597045302391052,
"eval_runtime": 532.1724,
"eval_samples_per_second": 3.178,
"eval_steps_per_second": 0.398,
"eval_wer": 10.247283626826526,
"step": 5500
},
{
"epoch": 0.27625,
"grad_norm": 2.8475470542907715,
"learning_rate": 3.3332894736842105e-06,
"loss": 0.1317,
"step": 5525
},
{
"epoch": 0.2775,
"grad_norm": 2.919682025909424,
"learning_rate": 3.3275328947368416e-06,
"loss": 0.1323,
"step": 5550
},
{
"epoch": 0.27875,
"grad_norm": 3.0585904121398926,
"learning_rate": 3.3217763157894735e-06,
"loss": 0.1332,
"step": 5575
},
{
"epoch": 0.28,
"grad_norm": 2.4418559074401855,
"learning_rate": 3.3160197368421046e-06,
"loss": 0.1126,
"step": 5600
},
{
"epoch": 0.28125,
"grad_norm": 2.9454727172851562,
"learning_rate": 3.3102631578947365e-06,
"loss": 0.0991,
"step": 5625
},
{
"epoch": 0.2825,
"grad_norm": 2.472628593444824,
"learning_rate": 3.304506578947368e-06,
"loss": 0.1106,
"step": 5650
},
{
"epoch": 0.28375,
"grad_norm": 2.1178548336029053,
"learning_rate": 3.2987499999999995e-06,
"loss": 0.1027,
"step": 5675
},
{
"epoch": 0.285,
"grad_norm": 2.5170726776123047,
"learning_rate": 3.2929934210526315e-06,
"loss": 0.1027,
"step": 5700
},
{
"epoch": 0.28625,
"grad_norm": 2.9180397987365723,
"learning_rate": 3.287236842105263e-06,
"loss": 0.1045,
"step": 5725
},
{
"epoch": 0.2875,
"grad_norm": 2.6896932125091553,
"learning_rate": 3.2814802631578945e-06,
"loss": 0.1069,
"step": 5750
},
{
"epoch": 0.28875,
"grad_norm": 3.1297285556793213,
"learning_rate": 3.275723684210526e-06,
"loss": 0.1003,
"step": 5775
},
{
"epoch": 0.29,
"grad_norm": 2.4746246337890625,
"learning_rate": 3.269967105263158e-06,
"loss": 0.1084,
"step": 5800
},
{
"epoch": 0.29125,
"grad_norm": 1.7318406105041504,
"learning_rate": 3.264210526315789e-06,
"loss": 0.0846,
"step": 5825
},
{
"epoch": 0.2925,
"grad_norm": 2.190168857574463,
"learning_rate": 3.258453947368421e-06,
"loss": 0.082,
"step": 5850
},
{
"epoch": 0.29375,
"grad_norm": 1.5366681814193726,
"learning_rate": 3.252697368421052e-06,
"loss": 0.0656,
"step": 5875
},
{
"epoch": 0.295,
"grad_norm": 1.8261510133743286,
"learning_rate": 3.246940789473684e-06,
"loss": 0.0646,
"step": 5900
},
{
"epoch": 0.29625,
"grad_norm": 1.9088908433914185,
"learning_rate": 3.2411842105263155e-06,
"loss": 0.0662,
"step": 5925
},
{
"epoch": 0.2975,
"grad_norm": 1.3404430150985718,
"learning_rate": 3.235427631578947e-06,
"loss": 0.0712,
"step": 5950
},
{
"epoch": 0.29875,
"grad_norm": 1.7546651363372803,
"learning_rate": 3.2296710526315785e-06,
"loss": 0.084,
"step": 5975
},
{
"epoch": 0.3,
"grad_norm": 1.7727612257003784,
"learning_rate": 3.2239144736842104e-06,
"loss": 0.0988,
"step": 6000
},
{
"epoch": 0.3,
"eval_loss": 0.2406572848558426,
"eval_runtime": 535.6321,
"eval_samples_per_second": 3.157,
"eval_steps_per_second": 0.396,
"eval_wer": 9.447983014861997,
"step": 6000
},
{
"epoch": 0.30125,
"grad_norm": 2.477670907974243,
"learning_rate": 3.2181578947368415e-06,
"loss": 0.1013,
"step": 6025
},
{
"epoch": 0.3025,
"grad_norm": 4.175459384918213,
"learning_rate": 3.2124013157894734e-06,
"loss": 0.1199,
"step": 6050
},
{
"epoch": 0.30375,
"grad_norm": 2.4588561058044434,
"learning_rate": 3.2066447368421053e-06,
"loss": 0.1203,
"step": 6075
},
{
"epoch": 0.305,
"grad_norm": 3.759526491165161,
"learning_rate": 3.2008881578947364e-06,
"loss": 0.1261,
"step": 6100
},
{
"epoch": 0.30625,
"grad_norm": 3.186166524887085,
"learning_rate": 3.1951315789473683e-06,
"loss": 0.0946,
"step": 6125
},
{
"epoch": 0.3075,
"grad_norm": 1.874886155128479,
"learning_rate": 3.1893749999999994e-06,
"loss": 0.0707,
"step": 6150
},
{
"epoch": 0.30875,
"grad_norm": 1.673767328262329,
"learning_rate": 3.1836184210526314e-06,
"loss": 0.0605,
"step": 6175
},
{
"epoch": 0.31,
"grad_norm": 2.6728780269622803,
"learning_rate": 3.177861842105263e-06,
"loss": 0.064,
"step": 6200
},
{
"epoch": 0.31125,
"grad_norm": 1.245354175567627,
"learning_rate": 3.1721052631578944e-06,
"loss": 0.0603,
"step": 6225
},
{
"epoch": 0.3125,
"grad_norm": 1.3173916339874268,
"learning_rate": 3.166348684210526e-06,
"loss": 0.067,
"step": 6250
},
{
"epoch": 0.31375,
"grad_norm": 1.9218686819076538,
"learning_rate": 3.160592105263158e-06,
"loss": 0.0723,
"step": 6275
},
{
"epoch": 0.315,
"grad_norm": 1.822493314743042,
"learning_rate": 3.154835526315789e-06,
"loss": 0.0772,
"step": 6300
},
{
"epoch": 0.31625,
"grad_norm": 2.4955074787139893,
"learning_rate": 3.149078947368421e-06,
"loss": 0.1124,
"step": 6325
},
{
"epoch": 0.3175,
"grad_norm": 2.448274612426758,
"learning_rate": 3.1433223684210523e-06,
"loss": 0.1144,
"step": 6350
},
{
"epoch": 0.31875,
"grad_norm": 2.732297658920288,
"learning_rate": 3.137565789473684e-06,
"loss": 0.0983,
"step": 6375
},
{
"epoch": 0.32,
"grad_norm": 3.261770248413086,
"learning_rate": 3.1318092105263158e-06,
"loss": 0.11,
"step": 6400
},
{
"epoch": 0.32125,
"grad_norm": 2.367335319519043,
"learning_rate": 3.1260526315789473e-06,
"loss": 0.1129,
"step": 6425
},
{
"epoch": 0.3225,
"grad_norm": 2.4930291175842285,
"learning_rate": 3.1202960526315788e-06,
"loss": 0.1106,
"step": 6450
},
{
"epoch": 0.32375,
"grad_norm": 1.8275959491729736,
"learning_rate": 3.1145394736842103e-06,
"loss": 0.0814,
"step": 6475
},
{
"epoch": 0.325,
"grad_norm": 3.6453261375427246,
"learning_rate": 3.1087828947368418e-06,
"loss": 0.0824,
"step": 6500
},
{
"epoch": 0.325,
"eval_loss": 0.24250419437885284,
"eval_runtime": 531.4087,
"eval_samples_per_second": 3.182,
"eval_steps_per_second": 0.399,
"eval_wer": 9.223179717746971,
"step": 6500
},
{
"epoch": 0.32625,
"grad_norm": 2.3996527194976807,
"learning_rate": 3.1030263157894733e-06,
"loss": 0.0913,
"step": 6525
},
{
"epoch": 0.3275,
"grad_norm": 3.106403350830078,
"learning_rate": 3.0972697368421052e-06,
"loss": 0.0969,
"step": 6550
},
{
"epoch": 0.32875,
"grad_norm": 3.741685628890991,
"learning_rate": 3.0915131578947363e-06,
"loss": 0.1091,
"step": 6575
},
{
"epoch": 0.33,
"grad_norm": 1.6008243560791016,
"learning_rate": 3.0859868421052626e-06,
"loss": 0.0984,
"step": 6600
},
{
"epoch": 0.33125,
"grad_norm": 2.268734931945801,
"learning_rate": 3.0802302631578945e-06,
"loss": 0.0968,
"step": 6625
},
{
"epoch": 0.3325,
"grad_norm": 2.442617654800415,
"learning_rate": 3.074473684210526e-06,
"loss": 0.0716,
"step": 6650
},
{
"epoch": 0.33375,
"grad_norm": 1.9763257503509521,
"learning_rate": 3.0687171052631575e-06,
"loss": 0.0674,
"step": 6675
},
{
"epoch": 0.335,
"grad_norm": 1.828474998474121,
"learning_rate": 3.0629605263157894e-06,
"loss": 0.0654,
"step": 6700
},
{
"epoch": 0.33625,
"grad_norm": 1.5649821758270264,
"learning_rate": 3.0572039473684205e-06,
"loss": 0.057,
"step": 6725
},
{
"epoch": 0.3375,
"grad_norm": 1.911927580833435,
"learning_rate": 3.0514473684210525e-06,
"loss": 0.0532,
"step": 6750
},
{
"epoch": 0.33875,
"grad_norm": 1.3287229537963867,
"learning_rate": 3.045690789473684e-06,
"loss": 0.0623,
"step": 6775
},
{
"epoch": 0.34,
"grad_norm": 1.7754572629928589,
"learning_rate": 3.0399342105263155e-06,
"loss": 0.0635,
"step": 6800
},
{
"epoch": 0.34125,
"grad_norm": 1.9900065660476685,
"learning_rate": 3.034177631578947e-06,
"loss": 0.0678,
"step": 6825
},
{
"epoch": 0.3425,
"grad_norm": 1.714850664138794,
"learning_rate": 3.028421052631579e-06,
"loss": 0.0654,
"step": 6850
},
{
"epoch": 0.34375,
"grad_norm": 1.6401875019073486,
"learning_rate": 3.02266447368421e-06,
"loss": 0.0662,
"step": 6875
},
{
"epoch": 0.345,
"grad_norm": 1.0171102285385132,
"learning_rate": 3.016907894736842e-06,
"loss": 0.0573,
"step": 6900
},
{
"epoch": 0.34625,
"grad_norm": 1.4662336111068726,
"learning_rate": 3.0111513157894734e-06,
"loss": 0.0556,
"step": 6925
},
{
"epoch": 0.3475,
"grad_norm": 1.7531720399856567,
"learning_rate": 3.005394736842105e-06,
"loss": 0.0501,
"step": 6950
},
{
"epoch": 0.34875,
"grad_norm": 2.6019067764282227,
"learning_rate": 2.9996381578947364e-06,
"loss": 0.0629,
"step": 6975
},
{
"epoch": 0.35,
"grad_norm": 2.0052170753479004,
"learning_rate": 2.9938815789473684e-06,
"loss": 0.0678,
"step": 7000
},
{
"epoch": 0.35,
"eval_loss": 0.23009631037712097,
"eval_runtime": 530.6679,
"eval_samples_per_second": 3.187,
"eval_steps_per_second": 0.399,
"eval_wer": 9.13575621331335,
"step": 7000
},
{
"epoch": 0.35125,
"grad_norm": 2.00034761428833,
"learning_rate": 2.988125e-06,
"loss": 0.0582,
"step": 7025
},
{
"epoch": 0.3525,
"grad_norm": 1.7806837558746338,
"learning_rate": 2.9823684210526314e-06,
"loss": 0.058,
"step": 7050
},
{
"epoch": 0.35375,
"grad_norm": 1.4306073188781738,
"learning_rate": 2.976611842105263e-06,
"loss": 0.0555,
"step": 7075
},
{
"epoch": 0.355,
"grad_norm": 1.8648333549499512,
"learning_rate": 2.9708552631578944e-06,
"loss": 0.0662,
"step": 7100
},
{
"epoch": 0.35625,
"grad_norm": 2.046255350112915,
"learning_rate": 2.9650986842105263e-06,
"loss": 0.0873,
"step": 7125
},
{
"epoch": 0.3575,
"grad_norm": 1.928809404373169,
"learning_rate": 2.9593421052631574e-06,
"loss": 0.0948,
"step": 7150
},
{
"epoch": 0.35875,
"grad_norm": 2.6892471313476562,
"learning_rate": 2.9535855263157893e-06,
"loss": 0.1043,
"step": 7175
},
{
"epoch": 0.36,
"grad_norm": 1.9739983081817627,
"learning_rate": 2.947828947368421e-06,
"loss": 0.1037,
"step": 7200
},
{
"epoch": 0.36125,
"grad_norm": 3.5157880783081055,
"learning_rate": 2.9420723684210523e-06,
"loss": 0.1139,
"step": 7225
},
{
"epoch": 0.3625,
"grad_norm": 2.140559673309326,
"learning_rate": 2.936315789473684e-06,
"loss": 0.0912,
"step": 7250
},
{
"epoch": 0.36375,
"grad_norm": 2.24043607711792,
"learning_rate": 2.9305592105263158e-06,
"loss": 0.0555,
"step": 7275
},
{
"epoch": 0.365,
"grad_norm": 1.5429259538650513,
"learning_rate": 2.924802631578947e-06,
"loss": 0.059,
"step": 7300
},
{
"epoch": 0.36625,
"grad_norm": 1.9133890867233276,
"learning_rate": 2.919046052631579e-06,
"loss": 0.0576,
"step": 7325
},
{
"epoch": 0.3675,
"grad_norm": 1.585777759552002,
"learning_rate": 2.9132894736842103e-06,
"loss": 0.0497,
"step": 7350
},
{
"epoch": 0.36875,
"grad_norm": 1.5571388006210327,
"learning_rate": 2.907532894736842e-06,
"loss": 0.0604,
"step": 7375
},
{
"epoch": 0.37,
"grad_norm": 1.2344049215316772,
"learning_rate": 2.9017763157894737e-06,
"loss": 0.0621,
"step": 7400
},
{
"epoch": 0.37125,
"grad_norm": 1.7708073854446411,
"learning_rate": 2.896019736842105e-06,
"loss": 0.0713,
"step": 7425
},
{
"epoch": 0.3725,
"grad_norm": 2.126579999923706,
"learning_rate": 2.8902631578947367e-06,
"loss": 0.0661,
"step": 7450
},
{
"epoch": 0.37375,
"grad_norm": 1.9544090032577515,
"learning_rate": 2.8845065789473682e-06,
"loss": 0.0626,
"step": 7475
},
{
"epoch": 0.375,
"grad_norm": 2.478142499923706,
"learning_rate": 2.8787499999999998e-06,
"loss": 0.1124,
"step": 7500
},
{
"epoch": 0.375,
"eval_loss": 0.2558789849281311,
"eval_runtime": 531.6323,
"eval_samples_per_second": 3.181,
"eval_steps_per_second": 0.399,
"eval_wer": 9.323092294242539,
"step": 7500
},
{
"epoch": 0.37625,
"grad_norm": 2.722101926803589,
"learning_rate": 2.8729934210526313e-06,
"loss": 0.1143,
"step": 7525
},
{
"epoch": 0.3775,
"grad_norm": 2.3424594402313232,
"learning_rate": 2.867236842105263e-06,
"loss": 0.0968,
"step": 7550
},
{
"epoch": 0.37875,
"grad_norm": 2.566340208053589,
"learning_rate": 2.8614802631578943e-06,
"loss": 0.0932,
"step": 7575
},
{
"epoch": 0.38,
"grad_norm": 3.2237472534179688,
"learning_rate": 2.855723684210526e-06,
"loss": 0.1155,
"step": 7600
},
{
"epoch": 0.38125,
"grad_norm": 3.058669090270996,
"learning_rate": 2.8499671052631573e-06,
"loss": 0.103,
"step": 7625
},
{
"epoch": 0.3825,
"grad_norm": 4.729414463043213,
"learning_rate": 2.8442105263157892e-06,
"loss": 0.0922,
"step": 7650
},
{
"epoch": 0.38375,
"grad_norm": 2.140126943588257,
"learning_rate": 2.8384539473684207e-06,
"loss": 0.0982,
"step": 7675
},
{
"epoch": 0.385,
"grad_norm": 2.778568983078003,
"learning_rate": 2.8326973684210522e-06,
"loss": 0.0993,
"step": 7700
},
{
"epoch": 0.38625,
"grad_norm": 2.6681206226348877,
"learning_rate": 2.826940789473684e-06,
"loss": 0.1018,
"step": 7725
},
{
"epoch": 0.3875,
"grad_norm": 1.5673187971115112,
"learning_rate": 2.8211842105263157e-06,
"loss": 0.0854,
"step": 7750
},
{
"epoch": 0.38875,
"grad_norm": 1.3890910148620605,
"learning_rate": 2.815427631578947e-06,
"loss": 0.0703,
"step": 7775
},
{
"epoch": 0.39,
"grad_norm": 2.176023483276367,
"learning_rate": 2.8096710526315787e-06,
"loss": 0.0672,
"step": 7800
},
{
"epoch": 0.39125,
"grad_norm": 1.2905758619308472,
"learning_rate": 2.8039144736842106e-06,
"loss": 0.0615,
"step": 7825
},
{
"epoch": 0.3925,
"grad_norm": 1.3446353673934937,
"learning_rate": 2.7981578947368417e-06,
"loss": 0.0637,
"step": 7850
},
{
"epoch": 0.39375,
"grad_norm": 2.1519501209259033,
"learning_rate": 2.7924013157894736e-06,
"loss": 0.056,
"step": 7875
},
{
"epoch": 0.395,
"grad_norm": 1.8618980646133423,
"learning_rate": 2.7866447368421047e-06,
"loss": 0.0573,
"step": 7900
},
{
"epoch": 0.39625,
"grad_norm": 2.5565106868743896,
"learning_rate": 2.7808881578947366e-06,
"loss": 0.0882,
"step": 7925
},
{
"epoch": 0.3975,
"grad_norm": 3.98923397064209,
"learning_rate": 2.775131578947368e-06,
"loss": 0.0981,
"step": 7950
},
{
"epoch": 0.39875,
"grad_norm": 3.326756477355957,
"learning_rate": 2.7693749999999996e-06,
"loss": 0.147,
"step": 7975
},
{
"epoch": 0.4,
"grad_norm": 2.8089091777801514,
"learning_rate": 2.763618421052631e-06,
"loss": 0.1122,
"step": 8000
},
{
"epoch": 0.4,
"eval_loss": 0.22397179901599884,
"eval_runtime": 531.6557,
"eval_samples_per_second": 3.181,
"eval_steps_per_second": 0.399,
"eval_wer": 8.523791682278006,
"step": 8000
},
{
"epoch": 0.40125,
"grad_norm": 1.8123100996017456,
"learning_rate": 2.757861842105263e-06,
"loss": 0.0967,
"step": 8025
},
{
"epoch": 0.4025,
"grad_norm": 2.1731700897216797,
"learning_rate": 2.7521052631578946e-06,
"loss": 0.0927,
"step": 8050
},
{
"epoch": 0.40375,
"grad_norm": 2.9888458251953125,
"learning_rate": 2.746348684210526e-06,
"loss": 0.1061,
"step": 8075
},
{
"epoch": 0.405,
"grad_norm": 3.51106595993042,
"learning_rate": 2.740592105263158e-06,
"loss": 0.158,
"step": 8100
},
{
"epoch": 0.40625,
"grad_norm": 3.410916805267334,
"learning_rate": 2.734835526315789e-06,
"loss": 0.1011,
"step": 8125
},
{
"epoch": 0.4075,
"grad_norm": 2.426023006439209,
"learning_rate": 2.729078947368421e-06,
"loss": 0.0864,
"step": 8150
},
{
"epoch": 0.40875,
"grad_norm": 2.8296170234680176,
"learning_rate": 2.723322368421052e-06,
"loss": 0.0929,
"step": 8175
},
{
"epoch": 0.41,
"grad_norm": 2.028474807739258,
"learning_rate": 2.717565789473684e-06,
"loss": 0.0848,
"step": 8200
},
{
"epoch": 0.41125,
"grad_norm": 2.4663166999816895,
"learning_rate": 2.7118092105263155e-06,
"loss": 0.0698,
"step": 8225
},
{
"epoch": 0.4125,
"grad_norm": 1.7618118524551392,
"learning_rate": 2.706052631578947e-06,
"loss": 0.058,
"step": 8250
},
{
"epoch": 0.41375,
"grad_norm": 2.2708559036254883,
"learning_rate": 2.7002960526315786e-06,
"loss": 0.0607,
"step": 8275
},
{
"epoch": 0.415,
"grad_norm": 1.6543164253234863,
"learning_rate": 2.6945394736842105e-06,
"loss": 0.0556,
"step": 8300
},
{
"epoch": 0.41625,
"grad_norm": 2.5951287746429443,
"learning_rate": 2.6887828947368416e-06,
"loss": 0.0576,
"step": 8325
},
{
"epoch": 0.4175,
"grad_norm": 1.1910465955734253,
"learning_rate": 2.6830263157894735e-06,
"loss": 0.059,
"step": 8350
},
{
"epoch": 0.41875,
"grad_norm": 1.667228102684021,
"learning_rate": 2.6772697368421054e-06,
"loss": 0.0521,
"step": 8375
},
{
"epoch": 0.42,
"grad_norm": 2.1288628578186035,
"learning_rate": 2.6715131578947365e-06,
"loss": 0.0557,
"step": 8400
},
{
"epoch": 0.42125,
"grad_norm": 2.0485122203826904,
"learning_rate": 2.6657565789473684e-06,
"loss": 0.0493,
"step": 8425
},
{
"epoch": 0.4225,
"grad_norm": 1.8512142896652222,
"learning_rate": 2.6599999999999995e-06,
"loss": 0.056,
"step": 8450
},
{
"epoch": 0.42375,
"grad_norm": 1.8958942890167236,
"learning_rate": 2.6542434210526314e-06,
"loss": 0.059,
"step": 8475
},
{
"epoch": 0.425,
"grad_norm": 1.2833645343780518,
"learning_rate": 2.648486842105263e-06,
"loss": 0.0477,
"step": 8500
},
{
"epoch": 0.425,
"eval_loss": 0.23789365589618683,
"eval_runtime": 530.7286,
"eval_samples_per_second": 3.186,
"eval_steps_per_second": 0.399,
"eval_wer": 8.317721993255901,
"step": 8500
},
{
"epoch": 0.42625,
"grad_norm": 1.2612178325653076,
"learning_rate": 2.6427302631578945e-06,
"loss": 0.05,
"step": 8525
},
{
"epoch": 0.4275,
"grad_norm": 2.100247621536255,
"learning_rate": 2.636973684210526e-06,
"loss": 0.0626,
"step": 8550
},
{
"epoch": 0.42875,
"grad_norm": 2.7199559211730957,
"learning_rate": 2.631217105263158e-06,
"loss": 0.0906,
"step": 8575
},
{
"epoch": 0.43,
"grad_norm": 3.267314910888672,
"learning_rate": 2.625460526315789e-06,
"loss": 0.1068,
"step": 8600
},
{
"epoch": 0.43125,
"grad_norm": 3.2623515129089355,
"learning_rate": 2.619703947368421e-06,
"loss": 0.0849,
"step": 8625
},
{
"epoch": 0.4325,
"grad_norm": 1.8294329643249512,
"learning_rate": 2.613947368421052e-06,
"loss": 0.0776,
"step": 8650
},
{
"epoch": 0.43375,
"grad_norm": 3.3888967037200928,
"learning_rate": 2.608190789473684e-06,
"loss": 0.0869,
"step": 8675
},
{
"epoch": 0.435,
"grad_norm": 2.5059332847595215,
"learning_rate": 2.602434210526316e-06,
"loss": 0.0781,
"step": 8700
},
{
"epoch": 0.43625,
"grad_norm": 1.8527718782424927,
"learning_rate": 2.596677631578947e-06,
"loss": 0.0513,
"step": 8725
},
{
"epoch": 0.4375,
"grad_norm": 1.4375104904174805,
"learning_rate": 2.590921052631579e-06,
"loss": 0.053,
"step": 8750
},
{
"epoch": 0.43875,
"grad_norm": 1.923519253730774,
"learning_rate": 2.5851644736842104e-06,
"loss": 0.0487,
"step": 8775
},
{
"epoch": 0.44,
"grad_norm": 1.6237260103225708,
"learning_rate": 2.579407894736842e-06,
"loss": 0.0499,
"step": 8800
},
{
"epoch": 0.44125,
"grad_norm": 1.7452889680862427,
"learning_rate": 2.5736513157894734e-06,
"loss": 0.0538,
"step": 8825
},
{
"epoch": 0.4425,
"grad_norm": 1.7012261152267456,
"learning_rate": 2.5678947368421053e-06,
"loss": 0.0529,
"step": 8850
},
{
"epoch": 0.44375,
"grad_norm": 1.8288905620574951,
"learning_rate": 2.5621381578947364e-06,
"loss": 0.0473,
"step": 8875
},
{
"epoch": 0.445,
"grad_norm": 1.9288239479064941,
"learning_rate": 2.5563815789473683e-06,
"loss": 0.0683,
"step": 8900
},
{
"epoch": 0.44625,
"grad_norm": 1.3186031579971313,
"learning_rate": 2.5506249999999994e-06,
"loss": 0.065,
"step": 8925
},
{
"epoch": 0.4475,
"grad_norm": 1.340890645980835,
"learning_rate": 2.5448684210526313e-06,
"loss": 0.0617,
"step": 8950
},
{
"epoch": 0.44875,
"grad_norm": 2.7007381916046143,
"learning_rate": 2.539111842105263e-06,
"loss": 0.0512,
"step": 8975
},
{
"epoch": 0.45,
"grad_norm": 1.688952922821045,
"learning_rate": 2.5333552631578943e-06,
"loss": 0.0638,
"step": 9000
},
{
"epoch": 0.45,
"eval_loss": 0.23539182543754578,
"eval_runtime": 531.3204,
"eval_samples_per_second": 3.183,
"eval_steps_per_second": 0.399,
"eval_wer": 8.948420132384163,
"step": 9000
},
{
"epoch": 0.45125,
"grad_norm": 3.06502103805542,
"learning_rate": 2.527598684210526e-06,
"loss": 0.0691,
"step": 9025
},
{
"epoch": 0.4525,
"grad_norm": 2.2699365615844727,
"learning_rate": 2.5218421052631578e-06,
"loss": 0.0765,
"step": 9050
},
{
"epoch": 0.45375,
"grad_norm": 3.057246208190918,
"learning_rate": 2.5160855263157893e-06,
"loss": 0.0874,
"step": 9075
},
{
"epoch": 0.455,
"grad_norm": 2.452810764312744,
"learning_rate": 2.5105592105263156e-06,
"loss": 0.0992,
"step": 9100
},
{
"epoch": 0.45625,
"grad_norm": 1.8321553468704224,
"learning_rate": 2.504802631578947e-06,
"loss": 0.091,
"step": 9125
},
{
"epoch": 0.4575,
"grad_norm": 2.1675491333007812,
"learning_rate": 2.499046052631579e-06,
"loss": 0.0968,
"step": 9150
},
{
"epoch": 0.45875,
"grad_norm": 2.440648317337036,
"learning_rate": 2.49328947368421e-06,
"loss": 0.094,
"step": 9175
},
{
"epoch": 0.46,
"grad_norm": 3.3630011081695557,
"learning_rate": 2.487532894736842e-06,
"loss": 0.0934,
"step": 9200
},
{
"epoch": 0.46125,
"grad_norm": 3.1267924308776855,
"learning_rate": 2.481776315789473e-06,
"loss": 0.095,
"step": 9225
},
{
"epoch": 0.4625,
"grad_norm": 2.791846752166748,
"learning_rate": 2.476019736842105e-06,
"loss": 0.0988,
"step": 9250
},
{
"epoch": 0.46375,
"grad_norm": 1.883380651473999,
"learning_rate": 2.4702631578947365e-06,
"loss": 0.089,
"step": 9275
},
{
"epoch": 0.465,
"grad_norm": 2.572441577911377,
"learning_rate": 2.464506578947368e-06,
"loss": 0.0933,
"step": 9300
},
{
"epoch": 0.46625,
"grad_norm": 3.08231258392334,
"learning_rate": 2.45875e-06,
"loss": 0.0856,
"step": 9325
},
{
"epoch": 0.4675,
"grad_norm": 2.208491563796997,
"learning_rate": 2.4529934210526315e-06,
"loss": 0.0795,
"step": 9350
},
{
"epoch": 0.46875,
"grad_norm": 2.896657943725586,
"learning_rate": 2.447236842105263e-06,
"loss": 0.0625,
"step": 9375
},
{
"epoch": 0.47,
"grad_norm": 1.3465672731399536,
"learning_rate": 2.4414802631578945e-06,
"loss": 0.0822,
"step": 9400
},
{
"epoch": 0.47125,
"grad_norm": 3.4039506912231445,
"learning_rate": 2.4357236842105264e-06,
"loss": 0.0813,
"step": 9425
},
{
"epoch": 0.4725,
"grad_norm": 2.213761568069458,
"learning_rate": 2.4299671052631575e-06,
"loss": 0.075,
"step": 9450
},
{
"epoch": 0.47375,
"grad_norm": 1.693393588066101,
"learning_rate": 2.4242105263157894e-06,
"loss": 0.0819,
"step": 9475
},
{
"epoch": 0.475,
"grad_norm": 3.1261212825775146,
"learning_rate": 2.4184539473684205e-06,
"loss": 0.0735,
"step": 9500
},
{
"epoch": 0.475,
"eval_loss": 0.22311098873615265,
"eval_runtime": 530.3307,
"eval_samples_per_second": 3.189,
"eval_steps_per_second": 0.4,
"eval_wer": 8.39890096165855,
"step": 9500
},
{
"epoch": 0.47625,
"grad_norm": 2.6011083126068115,
"learning_rate": 2.4126973684210524e-06,
"loss": 0.0557,
"step": 9525
},
{
"epoch": 0.4775,
"grad_norm": 1.4606833457946777,
"learning_rate": 2.406940789473684e-06,
"loss": 0.0518,
"step": 9550
},
{
"epoch": 0.47875,
"grad_norm": 3.201547145843506,
"learning_rate": 2.4011842105263154e-06,
"loss": 0.0616,
"step": 9575
},
{
"epoch": 0.48,
"grad_norm": 1.470755696296692,
"learning_rate": 2.395427631578947e-06,
"loss": 0.0566,
"step": 9600
},
{
"epoch": 0.48125,
"grad_norm": 1.0501068830490112,
"learning_rate": 2.389671052631579e-06,
"loss": 0.0482,
"step": 9625
},
{
"epoch": 0.4825,
"grad_norm": 1.7576944828033447,
"learning_rate": 2.38391447368421e-06,
"loss": 0.0487,
"step": 9650
},
{
"epoch": 0.48375,
"grad_norm": 2.6596386432647705,
"learning_rate": 2.378157894736842e-06,
"loss": 0.0548,
"step": 9675
},
{
"epoch": 0.485,
"grad_norm": 2.2998361587524414,
"learning_rate": 2.372401315789474e-06,
"loss": 0.0755,
"step": 9700
},
{
"epoch": 0.48625,
"grad_norm": 1.885953426361084,
"learning_rate": 2.366644736842105e-06,
"loss": 0.0635,
"step": 9725
},
{
"epoch": 0.4875,
"grad_norm": 1.686090111732483,
"learning_rate": 2.360888157894737e-06,
"loss": 0.0664,
"step": 9750
},
{
"epoch": 0.48875,
"grad_norm": 1.487586259841919,
"learning_rate": 2.3551315789473683e-06,
"loss": 0.0723,
"step": 9775
},
{
"epoch": 0.49,
"grad_norm": 1.5484004020690918,
"learning_rate": 2.349375e-06,
"loss": 0.0697,
"step": 9800
},
{
"epoch": 0.49125,
"grad_norm": 1.6730592250823975,
"learning_rate": 2.3436184210526314e-06,
"loss": 0.0726,
"step": 9825
},
{
"epoch": 0.4925,
"grad_norm": 1.549166202545166,
"learning_rate": 2.337861842105263e-06,
"loss": 0.0599,
"step": 9850
},
{
"epoch": 0.49375,
"grad_norm": 2.127182960510254,
"learning_rate": 2.3321052631578944e-06,
"loss": 0.0552,
"step": 9875
},
{
"epoch": 0.495,
"grad_norm": 1.5453063249588013,
"learning_rate": 2.3263486842105263e-06,
"loss": 0.0647,
"step": 9900
},
{
"epoch": 0.49625,
"grad_norm": 2.2514312267303467,
"learning_rate": 2.3205921052631574e-06,
"loss": 0.0543,
"step": 9925
},
{
"epoch": 0.4975,
"grad_norm": 1.5466394424438477,
"learning_rate": 2.3148355263157893e-06,
"loss": 0.0576,
"step": 9950
},
{
"epoch": 0.49875,
"grad_norm": 1.1446313858032227,
"learning_rate": 2.309078947368421e-06,
"loss": 0.0581,
"step": 9975
},
{
"epoch": 0.5,
"grad_norm": 1.7810652256011963,
"learning_rate": 2.3033223684210523e-06,
"loss": 0.0548,
"step": 10000
},
{
"epoch": 0.5,
"eval_loss": 0.23302312195301056,
"eval_runtime": 531.2942,
"eval_samples_per_second": 3.183,
"eval_steps_per_second": 0.399,
"eval_wer": 8.57374797052579,
"step": 10000
},
{
"epoch": 0.50125,
"grad_norm": 1.8410784006118774,
"learning_rate": 2.2975657894736842e-06,
"loss": 0.0818,
"step": 10025
},
{
"epoch": 0.5025,
"grad_norm": 2.0660974979400635,
"learning_rate": 2.2918092105263158e-06,
"loss": 0.0839,
"step": 10050
},
{
"epoch": 0.50375,
"grad_norm": 0.7626898288726807,
"learning_rate": 2.2860526315789473e-06,
"loss": 0.0717,
"step": 10075
},
{
"epoch": 0.505,
"grad_norm": 3.819746971130371,
"learning_rate": 2.2802960526315788e-06,
"loss": 0.0696,
"step": 10100
},
{
"epoch": 0.50625,
"grad_norm": 1.8556462526321411,
"learning_rate": 2.2745394736842103e-06,
"loss": 0.0776,
"step": 10125
},
{
"epoch": 0.5075,
"grad_norm": 2.1852500438690186,
"learning_rate": 2.2687828947368418e-06,
"loss": 0.1228,
"step": 10150
},
{
"epoch": 0.50875,
"grad_norm": 2.6284213066101074,
"learning_rate": 2.2630263157894737e-06,
"loss": 0.1121,
"step": 10175
},
{
"epoch": 0.51,
"grad_norm": 3.840794801712036,
"learning_rate": 2.2572697368421048e-06,
"loss": 0.087,
"step": 10200
},
{
"epoch": 0.51125,
"grad_norm": 1.920469045639038,
"learning_rate": 2.2515131578947367e-06,
"loss": 0.0876,
"step": 10225
},
{
"epoch": 0.5125,
"grad_norm": 2.9199891090393066,
"learning_rate": 2.2457565789473682e-06,
"loss": 0.0812,
"step": 10250
},
{
"epoch": 0.51375,
"grad_norm": 2.7151129245758057,
"learning_rate": 2.2399999999999997e-06,
"loss": 0.0733,
"step": 10275
},
{
"epoch": 0.515,
"grad_norm": 3.474050760269165,
"learning_rate": 2.2342434210526312e-06,
"loss": 0.0934,
"step": 10300
},
{
"epoch": 0.51625,
"grad_norm": 1.5654582977294922,
"learning_rate": 2.228486842105263e-06,
"loss": 0.0762,
"step": 10325
},
{
"epoch": 0.5175,
"grad_norm": 1.0436935424804688,
"learning_rate": 2.2227302631578947e-06,
"loss": 0.0727,
"step": 10350
},
{
"epoch": 0.51875,
"grad_norm": 0.8793361186981201,
"learning_rate": 2.216973684210526e-06,
"loss": 0.0471,
"step": 10375
},
{
"epoch": 0.52,
"grad_norm": 0.7731598019599915,
"learning_rate": 2.211217105263158e-06,
"loss": 0.0467,
"step": 10400
},
{
"epoch": 0.52125,
"grad_norm": 1.2689337730407715,
"learning_rate": 2.205460526315789e-06,
"loss": 0.0485,
"step": 10425
},
{
"epoch": 0.5225,
"grad_norm": 1.4495617151260376,
"learning_rate": 2.199703947368421e-06,
"loss": 0.0541,
"step": 10450
},
{
"epoch": 0.52375,
"grad_norm": 1.4262604713439941,
"learning_rate": 2.193947368421052e-06,
"loss": 0.0539,
"step": 10475
},
{
"epoch": 0.525,
"grad_norm": 1.8088651895523071,
"learning_rate": 2.188190789473684e-06,
"loss": 0.0557,
"step": 10500
},
{
"epoch": 0.525,
"eval_loss": 0.2133007049560547,
"eval_runtime": 530.2894,
"eval_samples_per_second": 3.189,
"eval_steps_per_second": 0.4,
"eval_wer": 8.361433745472711,
"step": 10500
},
{
"epoch": 0.52625,
"grad_norm": 1.178223967552185,
"learning_rate": 2.1824342105263156e-06,
"loss": 0.0549,
"step": 10525
},
{
"epoch": 0.5275,
"grad_norm": 1.7510823011398315,
"learning_rate": 2.176677631578947e-06,
"loss": 0.0557,
"step": 10550
},
{
"epoch": 0.52875,
"grad_norm": 0.9500125050544739,
"learning_rate": 2.1709210526315786e-06,
"loss": 0.0553,
"step": 10575
},
{
"epoch": 0.53,
"grad_norm": 2.060792922973633,
"learning_rate": 2.1651644736842106e-06,
"loss": 0.0596,
"step": 10600
},
{
"epoch": 0.53125,
"grad_norm": 2.1061859130859375,
"learning_rate": 2.159407894736842e-06,
"loss": 0.0539,
"step": 10625
},
{
"epoch": 0.5325,
"grad_norm": 1.6122857332229614,
"learning_rate": 2.1536513157894736e-06,
"loss": 0.053,
"step": 10650
},
{
"epoch": 0.53375,
"grad_norm": 2.2909045219421387,
"learning_rate": 2.147894736842105e-06,
"loss": 0.0614,
"step": 10675
},
{
"epoch": 0.535,
"grad_norm": 3.2241578102111816,
"learning_rate": 2.1421381578947366e-06,
"loss": 0.0829,
"step": 10700
},
{
"epoch": 0.53625,
"grad_norm": 2.7384145259857178,
"learning_rate": 2.136611842105263e-06,
"loss": 0.0817,
"step": 10725
},
{
"epoch": 0.5375,
"grad_norm": 1.8319401741027832,
"learning_rate": 2.1308552631578944e-06,
"loss": 0.0823,
"step": 10750
},
{
"epoch": 0.53875,
"grad_norm": 2.4007859230041504,
"learning_rate": 2.125098684210526e-06,
"loss": 0.0733,
"step": 10775
},
{
"epoch": 0.54,
"grad_norm": 2.042520046234131,
"learning_rate": 2.119342105263158e-06,
"loss": 0.0838,
"step": 10800
},
{
"epoch": 0.54125,
"grad_norm": 2.0478389263153076,
"learning_rate": 2.1135855263157893e-06,
"loss": 0.0831,
"step": 10825
},
{
"epoch": 0.5425,
"grad_norm": 2.357926607131958,
"learning_rate": 2.107828947368421e-06,
"loss": 0.0728,
"step": 10850
},
{
"epoch": 0.54375,
"grad_norm": 2.214553117752075,
"learning_rate": 2.1020723684210523e-06,
"loss": 0.0804,
"step": 10875
},
{
"epoch": 0.545,
"grad_norm": 3.484598398208618,
"learning_rate": 2.0963157894736843e-06,
"loss": 0.0592,
"step": 10900
},
{
"epoch": 0.54625,
"grad_norm": 1.5546646118164062,
"learning_rate": 2.0905592105263158e-06,
"loss": 0.0577,
"step": 10925
},
{
"epoch": 0.5475,
"grad_norm": 2.218691349029541,
"learning_rate": 2.0848026315789473e-06,
"loss": 0.053,
"step": 10950
},
{
"epoch": 0.54875,
"grad_norm": 2.9559834003448486,
"learning_rate": 2.0790460526315788e-06,
"loss": 0.0543,
"step": 10975
},
{
"epoch": 0.55,
"grad_norm": 1.6290831565856934,
"learning_rate": 2.0732894736842103e-06,
"loss": 0.0626,
"step": 11000
},
{
"epoch": 0.55,
"eval_loss": 0.2083810567855835,
"eval_runtime": 531.6457,
"eval_samples_per_second": 3.181,
"eval_steps_per_second": 0.399,
"eval_wer": 8.286499313101036,
"step": 11000
},
{
"epoch": 0.55125,
"grad_norm": 2.2507994174957275,
"learning_rate": 2.067532894736842e-06,
"loss": 0.0645,
"step": 11025
},
{
"epoch": 0.5525,
"grad_norm": 3.930997133255005,
"learning_rate": 2.0617763157894733e-06,
"loss": 0.0699,
"step": 11050
},
{
"epoch": 0.55375,
"grad_norm": 3.1073126792907715,
"learning_rate": 2.056019736842105e-06,
"loss": 0.0852,
"step": 11075
},
{
"epoch": 0.555,
"grad_norm": 2.5678088665008545,
"learning_rate": 2.0502631578947367e-06,
"loss": 0.0863,
"step": 11100
},
{
"epoch": 0.55625,
"grad_norm": 2.97763729095459,
"learning_rate": 2.0445065789473682e-06,
"loss": 0.0718,
"step": 11125
},
{
"epoch": 0.5575,
"grad_norm": 1.2580708265304565,
"learning_rate": 2.0387499999999998e-06,
"loss": 0.0462,
"step": 11150
},
{
"epoch": 0.55875,
"grad_norm": 1.804002285003662,
"learning_rate": 2.0329934210526317e-06,
"loss": 0.0364,
"step": 11175
},
{
"epoch": 0.56,
"grad_norm": 1.492600679397583,
"learning_rate": 2.027236842105263e-06,
"loss": 0.0438,
"step": 11200
},
{
"epoch": 0.56125,
"grad_norm": 2.423004627227783,
"learning_rate": 2.0214802631578947e-06,
"loss": 0.031,
"step": 11225
},
{
"epoch": 0.5625,
"grad_norm": 1.5198426246643066,
"learning_rate": 2.015723684210526e-06,
"loss": 0.0324,
"step": 11250
},
{
"epoch": 0.56375,
"grad_norm": 0.9852400422096252,
"learning_rate": 2.0099671052631577e-06,
"loss": 0.029,
"step": 11275
},
{
"epoch": 0.565,
"grad_norm": 1.2327955961227417,
"learning_rate": 2.004210526315789e-06,
"loss": 0.0406,
"step": 11300
},
{
"epoch": 0.56625,
"grad_norm": 1.455636978149414,
"learning_rate": 1.9984539473684207e-06,
"loss": 0.047,
"step": 11325
},
{
"epoch": 0.5675,
"grad_norm": 1.4720903635025024,
"learning_rate": 1.9926973684210522e-06,
"loss": 0.0444,
"step": 11350
},
{
"epoch": 0.56875,
"grad_norm": 1.7255401611328125,
"learning_rate": 1.986940789473684e-06,
"loss": 0.0514,
"step": 11375
},
{
"epoch": 0.57,
"grad_norm": 1.3503352403640747,
"learning_rate": 1.9811842105263157e-06,
"loss": 0.0533,
"step": 11400
},
{
"epoch": 0.57125,
"grad_norm": 1.5066325664520264,
"learning_rate": 1.975427631578947e-06,
"loss": 0.0524,
"step": 11425
},
{
"epoch": 0.5725,
"grad_norm": 1.877842903137207,
"learning_rate": 1.9696710526315787e-06,
"loss": 0.0519,
"step": 11450
},
{
"epoch": 0.57375,
"grad_norm": 1.4466218948364258,
"learning_rate": 1.9639144736842106e-06,
"loss": 0.0548,
"step": 11475
},
{
"epoch": 0.575,
"grad_norm": 1.3053616285324097,
"learning_rate": 1.958157894736842e-06,
"loss": 0.0472,
"step": 11500
},
{
"epoch": 0.575,
"eval_loss": 0.23307645320892334,
"eval_runtime": 536.26,
"eval_samples_per_second": 3.153,
"eval_steps_per_second": 0.395,
"eval_wer": 8.074185088047958,
"step": 11500
},
{
"epoch": 0.57625,
"grad_norm": 1.172753930091858,
"learning_rate": 1.9524013157894736e-06,
"loss": 0.0506,
"step": 11525
},
{
"epoch": 0.5775,
"grad_norm": 1.700363039970398,
"learning_rate": 1.946644736842105e-06,
"loss": 0.0585,
"step": 11550
},
{
"epoch": 0.57875,
"grad_norm": 1.3203791379928589,
"learning_rate": 1.9408881578947366e-06,
"loss": 0.0499,
"step": 11575
},
{
"epoch": 0.58,
"grad_norm": 1.4109314680099487,
"learning_rate": 1.935131578947368e-06,
"loss": 0.0433,
"step": 11600
},
{
"epoch": 0.58125,
"grad_norm": 1.3247355222702026,
"learning_rate": 1.929375e-06,
"loss": 0.0378,
"step": 11625
},
{
"epoch": 0.5825,
"grad_norm": 0.9325533509254456,
"learning_rate": 1.9236184210526316e-06,
"loss": 0.0442,
"step": 11650
},
{
"epoch": 0.58375,
"grad_norm": 1.8996745347976685,
"learning_rate": 1.917861842105263e-06,
"loss": 0.049,
"step": 11675
},
{
"epoch": 0.585,
"grad_norm": 1.7976350784301758,
"learning_rate": 1.9121052631578946e-06,
"loss": 0.0467,
"step": 11700
},
{
"epoch": 0.58625,
"grad_norm": 2.180805206298828,
"learning_rate": 1.906348684210526e-06,
"loss": 0.0493,
"step": 11725
},
{
"epoch": 0.5875,
"grad_norm": 1.2519850730895996,
"learning_rate": 1.9005921052631576e-06,
"loss": 0.0486,
"step": 11750
},
{
"epoch": 0.58875,
"grad_norm": 2.3758866786956787,
"learning_rate": 1.8948355263157893e-06,
"loss": 0.0584,
"step": 11775
},
{
"epoch": 0.59,
"grad_norm": 2.0312483310699463,
"learning_rate": 1.8890789473684208e-06,
"loss": 0.0702,
"step": 11800
},
{
"epoch": 0.59125,
"grad_norm": 2.017726182937622,
"learning_rate": 1.8833223684210525e-06,
"loss": 0.0822,
"step": 11825
},
{
"epoch": 0.5925,
"grad_norm": 2.159196138381958,
"learning_rate": 1.8775657894736842e-06,
"loss": 0.0918,
"step": 11850
},
{
"epoch": 0.59375,
"grad_norm": 2.8051164150238037,
"learning_rate": 1.8718092105263158e-06,
"loss": 0.0927,
"step": 11875
},
{
"epoch": 0.595,
"grad_norm": 1.9617701768875122,
"learning_rate": 1.8660526315789473e-06,
"loss": 0.0762,
"step": 11900
},
{
"epoch": 0.59625,
"grad_norm": 1.4993948936462402,
"learning_rate": 1.8602960526315788e-06,
"loss": 0.0768,
"step": 11925
},
{
"epoch": 0.5975,
"grad_norm": 2.1341333389282227,
"learning_rate": 1.8545394736842105e-06,
"loss": 0.0647,
"step": 11950
},
{
"epoch": 0.59875,
"grad_norm": 1.5004290342330933,
"learning_rate": 1.848782894736842e-06,
"loss": 0.0669,
"step": 11975
},
{
"epoch": 0.6,
"grad_norm": 3.0987565517425537,
"learning_rate": 1.8430263157894735e-06,
"loss": 0.0636,
"step": 12000
},
{
"epoch": 0.6,
"eval_loss": 0.2118152379989624,
"eval_runtime": 536.0484,
"eval_samples_per_second": 3.155,
"eval_steps_per_second": 0.395,
"eval_wer": 7.961783439490445,
"step": 12000
},
{
"epoch": 0.60125,
"grad_norm": 1.6456586122512817,
"learning_rate": 1.837269736842105e-06,
"loss": 0.0701,
"step": 12025
},
{
"epoch": 0.6025,
"grad_norm": 2.0990679264068604,
"learning_rate": 1.8315131578947367e-06,
"loss": 0.0573,
"step": 12050
},
{
"epoch": 0.60375,
"grad_norm": 1.8728748559951782,
"learning_rate": 1.8257565789473682e-06,
"loss": 0.054,
"step": 12075
},
{
"epoch": 0.605,
"grad_norm": 1.2849019765853882,
"learning_rate": 1.8199999999999997e-06,
"loss": 0.0522,
"step": 12100
},
{
"epoch": 0.60625,
"grad_norm": 1.6803030967712402,
"learning_rate": 1.8142434210526312e-06,
"loss": 0.0492,
"step": 12125
},
{
"epoch": 0.6075,
"grad_norm": 1.9102485179901123,
"learning_rate": 1.808486842105263e-06,
"loss": 0.0482,
"step": 12150
},
{
"epoch": 0.60875,
"grad_norm": 1.1118731498718262,
"learning_rate": 1.8027302631578947e-06,
"loss": 0.0422,
"step": 12175
},
{
"epoch": 0.61,
"grad_norm": 1.1670501232147217,
"learning_rate": 1.7969736842105262e-06,
"loss": 0.0515,
"step": 12200
},
{
"epoch": 0.61125,
"grad_norm": 2.522876739501953,
"learning_rate": 1.7912171052631579e-06,
"loss": 0.0412,
"step": 12225
},
{
"epoch": 0.6125,
"grad_norm": 1.2704464197158813,
"learning_rate": 1.7854605263157894e-06,
"loss": 0.0508,
"step": 12250
},
{
"epoch": 0.61375,
"grad_norm": 2.399094343185425,
"learning_rate": 1.779703947368421e-06,
"loss": 0.0547,
"step": 12275
},
{
"epoch": 0.615,
"grad_norm": 2.2606582641601562,
"learning_rate": 1.7739473684210524e-06,
"loss": 0.0562,
"step": 12300
},
{
"epoch": 0.61625,
"grad_norm": 0.5112090110778809,
"learning_rate": 1.7681907894736841e-06,
"loss": 0.0513,
"step": 12325
},
{
"epoch": 0.6175,
"grad_norm": 1.1044148206710815,
"learning_rate": 1.7624342105263156e-06,
"loss": 0.0544,
"step": 12350
},
{
"epoch": 0.61875,
"grad_norm": 1.2760109901428223,
"learning_rate": 1.7566776315789471e-06,
"loss": 0.0512,
"step": 12375
},
{
"epoch": 0.62,
"grad_norm": 1.3780227899551392,
"learning_rate": 1.7509210526315786e-06,
"loss": 0.0546,
"step": 12400
},
{
"epoch": 0.62125,
"grad_norm": 1.0981767177581787,
"learning_rate": 1.7451644736842104e-06,
"loss": 0.041,
"step": 12425
},
{
"epoch": 0.6225,
"grad_norm": 2.353482484817505,
"learning_rate": 1.7394078947368419e-06,
"loss": 0.0479,
"step": 12450
},
{
"epoch": 0.62375,
"grad_norm": 1.3375900983810425,
"learning_rate": 1.7336513157894734e-06,
"loss": 0.0522,
"step": 12475
},
{
"epoch": 0.625,
"grad_norm": 2.1002514362335205,
"learning_rate": 1.7278947368421053e-06,
"loss": 0.0466,
"step": 12500
},
{
"epoch": 0.625,
"eval_loss": 0.21263667941093445,
"eval_runtime": 535.5066,
"eval_samples_per_second": 3.158,
"eval_steps_per_second": 0.396,
"eval_wer": 7.468465093043587,
"step": 12500
},
{
"epoch": 0.62625,
"grad_norm": 1.5551177263259888,
"learning_rate": 1.7221381578947368e-06,
"loss": 0.0584,
"step": 12525
},
{
"epoch": 0.6275,
"grad_norm": 2.234121322631836,
"learning_rate": 1.7163815789473683e-06,
"loss": 0.061,
"step": 12550
},
{
"epoch": 0.62875,
"grad_norm": 2.269101619720459,
"learning_rate": 1.7106249999999998e-06,
"loss": 0.0607,
"step": 12575
},
{
"epoch": 0.63,
"grad_norm": 2.8848202228546143,
"learning_rate": 1.7048684210526315e-06,
"loss": 0.0675,
"step": 12600
},
{
"epoch": 0.63125,
"grad_norm": 2.2159249782562256,
"learning_rate": 1.699111842105263e-06,
"loss": 0.0783,
"step": 12625
},
{
"epoch": 0.6325,
"grad_norm": 1.5829565525054932,
"learning_rate": 1.6933552631578946e-06,
"loss": 0.0834,
"step": 12650
},
{
"epoch": 0.63375,
"grad_norm": 1.9816817045211792,
"learning_rate": 1.687598684210526e-06,
"loss": 0.0727,
"step": 12675
},
{
"epoch": 0.635,
"grad_norm": 2.8434395790100098,
"learning_rate": 1.6818421052631578e-06,
"loss": 0.0778,
"step": 12700
},
{
"epoch": 0.63625,
"grad_norm": 2.4956297874450684,
"learning_rate": 1.6760855263157893e-06,
"loss": 0.0731,
"step": 12725
},
{
"epoch": 0.6375,
"grad_norm": 1.7429981231689453,
"learning_rate": 1.6703289473684208e-06,
"loss": 0.0637,
"step": 12750
},
{
"epoch": 0.63875,
"grad_norm": 2.3022801876068115,
"learning_rate": 1.6645723684210525e-06,
"loss": 0.0708,
"step": 12775
},
{
"epoch": 0.64,
"grad_norm": 1.621469497680664,
"learning_rate": 1.658815789473684e-06,
"loss": 0.0466,
"step": 12800
},
{
"epoch": 0.64125,
"grad_norm": 1.7762545347213745,
"learning_rate": 1.6530592105263155e-06,
"loss": 0.0544,
"step": 12825
},
{
"epoch": 0.6425,
"grad_norm": 1.568123698234558,
"learning_rate": 1.6473026315789472e-06,
"loss": 0.0457,
"step": 12850
},
{
"epoch": 0.64375,
"grad_norm": 0.5994829535484314,
"learning_rate": 1.641546052631579e-06,
"loss": 0.0487,
"step": 12875
},
{
"epoch": 0.645,
"grad_norm": 1.9480714797973633,
"learning_rate": 1.6357894736842105e-06,
"loss": 0.0503,
"step": 12900
},
{
"epoch": 0.64625,
"grad_norm": 2.2603769302368164,
"learning_rate": 1.630032894736842e-06,
"loss": 0.0705,
"step": 12925
},
{
"epoch": 0.6475,
"grad_norm": 2.2942919731140137,
"learning_rate": 1.6242763157894737e-06,
"loss": 0.0666,
"step": 12950
},
{
"epoch": 0.64875,
"grad_norm": 2.819730758666992,
"learning_rate": 1.6185197368421052e-06,
"loss": 0.0736,
"step": 12975
},
{
"epoch": 0.65,
"grad_norm": 1.8207030296325684,
"learning_rate": 1.6127631578947367e-06,
"loss": 0.0604,
"step": 13000
},
{
"epoch": 0.65,
"eval_loss": 0.21604977548122406,
"eval_runtime": 534.676,
"eval_samples_per_second": 3.163,
"eval_steps_per_second": 0.397,
"eval_wer": 7.655801173972773,
"step": 13000
},
{
"epoch": 0.65125,
"grad_norm": 2.0296692848205566,
"learning_rate": 1.6070065789473682e-06,
"loss": 0.0745,
"step": 13025
},
{
"epoch": 0.6525,
"grad_norm": 3.9246408939361572,
"learning_rate": 1.60125e-06,
"loss": 0.0862,
"step": 13050
},
{
"epoch": 0.65375,
"grad_norm": 1.9909517765045166,
"learning_rate": 1.5954934210526314e-06,
"loss": 0.0676,
"step": 13075
},
{
"epoch": 0.655,
"grad_norm": 2.652264356613159,
"learning_rate": 1.589736842105263e-06,
"loss": 0.0823,
"step": 13100
},
{
"epoch": 0.65625,
"grad_norm": 2.1940698623657227,
"learning_rate": 1.5839802631578944e-06,
"loss": 0.0775,
"step": 13125
},
{
"epoch": 0.6575,
"grad_norm": 3.084667444229126,
"learning_rate": 1.5782236842105262e-06,
"loss": 0.0779,
"step": 13150
},
{
"epoch": 0.65875,
"grad_norm": 2.134045124053955,
"learning_rate": 1.5724671052631579e-06,
"loss": 0.0756,
"step": 13175
},
{
"epoch": 0.66,
"grad_norm": 2.4405481815338135,
"learning_rate": 1.5667105263157894e-06,
"loss": 0.075,
"step": 13200
},
{
"epoch": 0.66125,
"grad_norm": 2.251408100128174,
"learning_rate": 1.560953947368421e-06,
"loss": 0.0668,
"step": 13225
},
{
"epoch": 0.6625,
"grad_norm": 2.21307635307312,
"learning_rate": 1.5551973684210526e-06,
"loss": 0.076,
"step": 13250
},
{
"epoch": 0.66375,
"grad_norm": 3.1692416667938232,
"learning_rate": 1.549440789473684e-06,
"loss": 0.0841,
"step": 13275
},
{
"epoch": 0.665,
"grad_norm": 2.4879300594329834,
"learning_rate": 1.5436842105263156e-06,
"loss": 0.0785,
"step": 13300
},
{
"epoch": 0.66625,
"grad_norm": 1.6188695430755615,
"learning_rate": 1.5379276315789473e-06,
"loss": 0.0698,
"step": 13325
},
{
"epoch": 0.6675,
"grad_norm": 2.258192300796509,
"learning_rate": 1.5321710526315788e-06,
"loss": 0.0682,
"step": 13350
},
{
"epoch": 0.66875,
"grad_norm": 1.7001844644546509,
"learning_rate": 1.5264144736842103e-06,
"loss": 0.0728,
"step": 13375
},
{
"epoch": 0.67,
"grad_norm": 2.0650229454040527,
"learning_rate": 1.5206578947368418e-06,
"loss": 0.0608,
"step": 13400
},
{
"epoch": 0.67125,
"grad_norm": 1.0384840965270996,
"learning_rate": 1.5149013157894736e-06,
"loss": 0.0521,
"step": 13425
},
{
"epoch": 0.6725,
"grad_norm": 1.458274483680725,
"learning_rate": 1.509144736842105e-06,
"loss": 0.0544,
"step": 13450
},
{
"epoch": 0.67375,
"grad_norm": 1.678476095199585,
"learning_rate": 1.5033881578947366e-06,
"loss": 0.0478,
"step": 13475
},
{
"epoch": 0.675,
"grad_norm": 2.1401052474975586,
"learning_rate": 1.497631578947368e-06,
"loss": 0.0544,
"step": 13500
},
{
"epoch": 0.675,
"eval_loss": 0.21870100498199463,
"eval_runtime": 534.1154,
"eval_samples_per_second": 3.166,
"eval_steps_per_second": 0.397,
"eval_wer": 7.999250655676284,
"step": 13500
},
{
"epoch": 0.67625,
"grad_norm": 1.387534737586975,
"learning_rate": 1.491875e-06,
"loss": 0.0497,
"step": 13525
},
{
"epoch": 0.6775,
"grad_norm": 2.2233715057373047,
"learning_rate": 1.4861184210526315e-06,
"loss": 0.0628,
"step": 13550
},
{
"epoch": 0.67875,
"grad_norm": 2.775345802307129,
"learning_rate": 1.480361842105263e-06,
"loss": 0.0883,
"step": 13575
},
{
"epoch": 0.68,
"grad_norm": 2.7996487617492676,
"learning_rate": 1.4746052631578947e-06,
"loss": 0.0895,
"step": 13600
},
{
"epoch": 0.68125,
"grad_norm": 2.4933836460113525,
"learning_rate": 1.4688486842105262e-06,
"loss": 0.0876,
"step": 13625
},
{
"epoch": 0.6825,
"grad_norm": 3.253474712371826,
"learning_rate": 1.4630921052631578e-06,
"loss": 0.0725,
"step": 13650
},
{
"epoch": 0.68375,
"grad_norm": 2.5821990966796875,
"learning_rate": 1.4573355263157893e-06,
"loss": 0.088,
"step": 13675
},
{
"epoch": 0.685,
"grad_norm": 3.219723701477051,
"learning_rate": 1.451578947368421e-06,
"loss": 0.079,
"step": 13700
},
{
"epoch": 0.68625,
"grad_norm": 2.1482114791870117,
"learning_rate": 1.4458223684210525e-06,
"loss": 0.0715,
"step": 13725
},
{
"epoch": 0.6875,
"grad_norm": 3.403439521789551,
"learning_rate": 1.440065789473684e-06,
"loss": 0.0731,
"step": 13750
},
{
"epoch": 0.68875,
"grad_norm": 2.0612175464630127,
"learning_rate": 1.4343092105263155e-06,
"loss": 0.0669,
"step": 13775
},
{
"epoch": 0.69,
"grad_norm": 2.5637385845184326,
"learning_rate": 1.4285526315789472e-06,
"loss": 0.0766,
"step": 13800
},
{
"epoch": 0.69125,
"grad_norm": 1.8747389316558838,
"learning_rate": 1.4227960526315787e-06,
"loss": 0.0723,
"step": 13825
},
{
"epoch": 0.6925,
"grad_norm": 2.6436047554016113,
"learning_rate": 1.4170394736842104e-06,
"loss": 0.0694,
"step": 13850
},
{
"epoch": 0.69375,
"grad_norm": 2.300952911376953,
"learning_rate": 1.4112828947368422e-06,
"loss": 0.0711,
"step": 13875
},
{
"epoch": 0.695,
"grad_norm": 2.480396032333374,
"learning_rate": 1.4055263157894737e-06,
"loss": 0.0695,
"step": 13900
},
{
"epoch": 0.69625,
"grad_norm": 3.047656536102295,
"learning_rate": 1.3997697368421052e-06,
"loss": 0.0827,
"step": 13925
},
{
"epoch": 0.6975,
"grad_norm": 1.8521438837051392,
"learning_rate": 1.3940131578947367e-06,
"loss": 0.0799,
"step": 13950
},
{
"epoch": 0.69875,
"grad_norm": 3.52673602104187,
"learning_rate": 1.3882565789473684e-06,
"loss": 0.0819,
"step": 13975
},
{
"epoch": 0.7,
"grad_norm": 2.5274155139923096,
"learning_rate": 1.3824999999999999e-06,
"loss": 0.07,
"step": 14000
},
{
"epoch": 0.7,
"eval_loss": 0.21170927584171295,
"eval_runtime": 534.7374,
"eval_samples_per_second": 3.162,
"eval_steps_per_second": 0.396,
"eval_wer": 7.437242412888723,
"step": 14000
},
{
"epoch": 0.70125,
"grad_norm": 3.9497313499450684,
"learning_rate": 1.3767434210526314e-06,
"loss": 0.0977,
"step": 14025
},
{
"epoch": 0.7025,
"grad_norm": 5.4897284507751465,
"learning_rate": 1.3709868421052631e-06,
"loss": 0.1658,
"step": 14050
},
{
"epoch": 0.70375,
"grad_norm": 3.0957064628601074,
"learning_rate": 1.3652302631578946e-06,
"loss": 0.1823,
"step": 14075
},
{
"epoch": 0.705,
"grad_norm": 3.2891457080841064,
"learning_rate": 1.3594736842105261e-06,
"loss": 0.1777,
"step": 14100
},
{
"epoch": 0.70625,
"grad_norm": 3.642838954925537,
"learning_rate": 1.3537171052631576e-06,
"loss": 0.177,
"step": 14125
},
{
"epoch": 0.7075,
"grad_norm": 4.022505760192871,
"learning_rate": 1.3479605263157894e-06,
"loss": 0.1773,
"step": 14150
},
{
"epoch": 0.70875,
"grad_norm": 3.632260799407959,
"learning_rate": 1.3422039473684209e-06,
"loss": 0.138,
"step": 14175
},
{
"epoch": 0.71,
"grad_norm": 1.6560989618301392,
"learning_rate": 1.3364473684210526e-06,
"loss": 0.1163,
"step": 14200
},
{
"epoch": 0.71125,
"grad_norm": 1.4849154949188232,
"learning_rate": 1.3306907894736843e-06,
"loss": 0.1001,
"step": 14225
},
{
"epoch": 0.7125,
"grad_norm": 2.3382551670074463,
"learning_rate": 1.3249342105263158e-06,
"loss": 0.0748,
"step": 14250
},
{
"epoch": 0.71375,
"grad_norm": 3.0243709087371826,
"learning_rate": 1.3191776315789473e-06,
"loss": 0.0699,
"step": 14275
},
{
"epoch": 0.715,
"grad_norm": 3.4510324001312256,
"learning_rate": 1.3134210526315788e-06,
"loss": 0.0822,
"step": 14300
},
{
"epoch": 0.71625,
"grad_norm": 1.71156907081604,
"learning_rate": 1.3076644736842105e-06,
"loss": 0.0817,
"step": 14325
},
{
"epoch": 0.7175,
"grad_norm": 1.4711543321609497,
"learning_rate": 1.301907894736842e-06,
"loss": 0.0573,
"step": 14350
},
{
"epoch": 0.71875,
"grad_norm": 1.4108855724334717,
"learning_rate": 1.2961513157894735e-06,
"loss": 0.0518,
"step": 14375
},
{
"epoch": 0.72,
"grad_norm": 1.4882175922393799,
"learning_rate": 1.290394736842105e-06,
"loss": 0.0585,
"step": 14400
},
{
"epoch": 0.72125,
"grad_norm": 1.6964808702468872,
"learning_rate": 1.2846381578947368e-06,
"loss": 0.0562,
"step": 14425
},
{
"epoch": 0.7225,
"grad_norm": 1.7226653099060059,
"learning_rate": 1.2788815789473683e-06,
"loss": 0.0574,
"step": 14450
},
{
"epoch": 0.72375,
"grad_norm": 2.7214572429656982,
"learning_rate": 1.2731249999999998e-06,
"loss": 0.0629,
"step": 14475
},
{
"epoch": 0.725,
"grad_norm": 1.1752701997756958,
"learning_rate": 1.2673684210526313e-06,
"loss": 0.0534,
"step": 14500
},
{
"epoch": 0.725,
"eval_loss": 0.13807399570941925,
"eval_runtime": 533.693,
"eval_samples_per_second": 3.168,
"eval_steps_per_second": 0.397,
"eval_wer": 7.04383664293743,
"step": 14500
},
{
"epoch": 0.72625,
"grad_norm": 5.266875267028809,
"learning_rate": 1.261611842105263e-06,
"loss": 0.0553,
"step": 14525
},
{
"epoch": 0.7275,
"grad_norm": 2.1979897022247314,
"learning_rate": 1.2558552631578947e-06,
"loss": 0.0498,
"step": 14550
},
{
"epoch": 0.72875,
"grad_norm": 1.445584774017334,
"learning_rate": 1.2500986842105262e-06,
"loss": 0.0432,
"step": 14575
},
{
"epoch": 0.73,
"grad_norm": 0.985780656337738,
"learning_rate": 1.244342105263158e-06,
"loss": 0.0398,
"step": 14600
},
{
"epoch": 0.73125,
"grad_norm": 1.4595451354980469,
"learning_rate": 1.2385855263157894e-06,
"loss": 0.0472,
"step": 14625
},
{
"epoch": 0.7325,
"grad_norm": 1.6958725452423096,
"learning_rate": 1.232828947368421e-06,
"loss": 0.0451,
"step": 14650
},
{
"epoch": 0.73375,
"grad_norm": 1.4922881126403809,
"learning_rate": 1.2270723684210525e-06,
"loss": 0.0483,
"step": 14675
},
{
"epoch": 0.735,
"grad_norm": 2.243989944458008,
"learning_rate": 1.2213157894736842e-06,
"loss": 0.0691,
"step": 14700
},
{
"epoch": 0.73625,
"grad_norm": 3.160104513168335,
"learning_rate": 1.2155592105263157e-06,
"loss": 0.0814,
"step": 14725
},
{
"epoch": 0.7375,
"grad_norm": 2.0205318927764893,
"learning_rate": 1.2098026315789472e-06,
"loss": 0.0693,
"step": 14750
},
{
"epoch": 0.73875,
"grad_norm": 1.519434928894043,
"learning_rate": 1.2040460526315787e-06,
"loss": 0.0589,
"step": 14775
},
{
"epoch": 0.74,
"grad_norm": 2.59538197517395,
"learning_rate": 1.1982894736842104e-06,
"loss": 0.0546,
"step": 14800
},
{
"epoch": 0.74125,
"grad_norm": 2.137489080429077,
"learning_rate": 1.192532894736842e-06,
"loss": 0.0679,
"step": 14825
},
{
"epoch": 0.7425,
"grad_norm": 1.5184602737426758,
"learning_rate": 1.1867763157894734e-06,
"loss": 0.0685,
"step": 14850
},
{
"epoch": 0.74375,
"grad_norm": 2.101884365081787,
"learning_rate": 1.1810197368421054e-06,
"loss": 0.0526,
"step": 14875
},
{
"epoch": 0.745,
"grad_norm": 1.778254508972168,
"learning_rate": 1.1752631578947369e-06,
"loss": 0.0463,
"step": 14900
},
{
"epoch": 0.74625,
"grad_norm": 2.073361873626709,
"learning_rate": 1.1695065789473684e-06,
"loss": 0.0542,
"step": 14925
},
{
"epoch": 0.7475,
"grad_norm": 2.091325283050537,
"learning_rate": 1.1637499999999999e-06,
"loss": 0.0456,
"step": 14950
},
{
"epoch": 0.74875,
"grad_norm": 1.7418571710586548,
"learning_rate": 1.1579934210526316e-06,
"loss": 0.0435,
"step": 14975
},
{
"epoch": 0.75,
"grad_norm": 1.8316125869750977,
"learning_rate": 1.152236842105263e-06,
"loss": 0.046,
"step": 15000
},
{
"epoch": 0.75,
"eval_loss": 0.14957565069198608,
"eval_runtime": 534.2678,
"eval_samples_per_second": 3.165,
"eval_steps_per_second": 0.397,
"eval_wer": 7.081303859123267,
"step": 15000
},
{
"epoch": 0.75125,
"grad_norm": 2.781534433364868,
"learning_rate": 1.1467105263157894e-06,
"loss": 0.0728,
"step": 15025
},
{
"epoch": 0.7525,
"grad_norm": 2.0675017833709717,
"learning_rate": 1.1409539473684209e-06,
"loss": 0.095,
"step": 15050
},
{
"epoch": 0.75375,
"grad_norm": 3.430636167526245,
"learning_rate": 1.1351973684210524e-06,
"loss": 0.0966,
"step": 15075
},
{
"epoch": 0.755,
"grad_norm": 3.50378680229187,
"learning_rate": 1.129440789473684e-06,
"loss": 0.1087,
"step": 15100
},
{
"epoch": 0.75625,
"grad_norm": 2.9562337398529053,
"learning_rate": 1.1236842105263156e-06,
"loss": 0.1098,
"step": 15125
},
{
"epoch": 0.7575,
"grad_norm": 2.7388198375701904,
"learning_rate": 1.1179276315789471e-06,
"loss": 0.1328,
"step": 15150
},
{
"epoch": 0.75875,
"grad_norm": 3.3490402698516846,
"learning_rate": 1.112171052631579e-06,
"loss": 0.097,
"step": 15175
},
{
"epoch": 0.76,
"grad_norm": 1.3750718832015991,
"learning_rate": 1.1064144736842105e-06,
"loss": 0.0722,
"step": 15200
},
{
"epoch": 0.76125,
"grad_norm": 1.7064391374588013,
"learning_rate": 1.100657894736842e-06,
"loss": 0.0588,
"step": 15225
},
{
"epoch": 0.7625,
"grad_norm": 1.8604276180267334,
"learning_rate": 1.0949013157894736e-06,
"loss": 0.0557,
"step": 15250
},
{
"epoch": 0.76375,
"grad_norm": 1.2240312099456787,
"learning_rate": 1.0891447368421053e-06,
"loss": 0.0438,
"step": 15275
},
{
"epoch": 0.765,
"grad_norm": 1.5873894691467285,
"learning_rate": 1.0833881578947368e-06,
"loss": 0.0471,
"step": 15300
},
{
"epoch": 0.76625,
"grad_norm": 1.645041823387146,
"learning_rate": 1.0776315789473683e-06,
"loss": 0.0586,
"step": 15325
},
{
"epoch": 0.7675,
"grad_norm": 2.3403167724609375,
"learning_rate": 1.0718749999999998e-06,
"loss": 0.0698,
"step": 15350
},
{
"epoch": 0.76875,
"grad_norm": 2.5629897117614746,
"learning_rate": 1.0661184210526315e-06,
"loss": 0.068,
"step": 15375
},
{
"epoch": 0.77,
"grad_norm": 2.1160974502563477,
"learning_rate": 1.060361842105263e-06,
"loss": 0.0771,
"step": 15400
},
{
"epoch": 0.77125,
"grad_norm": 2.094522714614868,
"learning_rate": 1.0546052631578947e-06,
"loss": 0.0882,
"step": 15425
},
{
"epoch": 0.7725,
"grad_norm": 2.3391168117523193,
"learning_rate": 1.0488486842105262e-06,
"loss": 0.0746,
"step": 15450
},
{
"epoch": 0.77375,
"grad_norm": 2.208967924118042,
"learning_rate": 1.0430921052631577e-06,
"loss": 0.0725,
"step": 15475
},
{
"epoch": 0.775,
"grad_norm": 2.7758445739746094,
"learning_rate": 1.0373355263157895e-06,
"loss": 0.066,
"step": 15500
},
{
"epoch": 0.775,
"eval_loss": 0.1524539738893509,
"eval_runtime": 533.899,
"eval_samples_per_second": 3.167,
"eval_steps_per_second": 0.397,
"eval_wer": 7.00012489072062,
"step": 15500
},
{
"epoch": 0.77625,
"grad_norm": 1.5453675985336304,
"learning_rate": 1.031578947368421e-06,
"loss": 0.0511,
"step": 15525
},
{
"epoch": 0.7775,
"grad_norm": 2.0205094814300537,
"learning_rate": 1.0258223684210525e-06,
"loss": 0.05,
"step": 15550
},
{
"epoch": 0.77875,
"grad_norm": 1.2804875373840332,
"learning_rate": 1.020065789473684e-06,
"loss": 0.0598,
"step": 15575
},
{
"epoch": 0.78,
"grad_norm": 2.22847843170166,
"learning_rate": 1.0143092105263157e-06,
"loss": 0.0686,
"step": 15600
},
{
"epoch": 0.78125,
"grad_norm": 2.523324489593506,
"learning_rate": 1.0085526315789472e-06,
"loss": 0.1251,
"step": 15625
},
{
"epoch": 0.7825,
"grad_norm": 1.8177152872085571,
"learning_rate": 1.002796052631579e-06,
"loss": 0.1014,
"step": 15650
},
{
"epoch": 0.78375,
"grad_norm": 1.9223369359970093,
"learning_rate": 9.970394736842104e-07,
"loss": 0.0604,
"step": 15675
},
{
"epoch": 0.785,
"grad_norm": 1.9404890537261963,
"learning_rate": 9.91282894736842e-07,
"loss": 0.0556,
"step": 15700
},
{
"epoch": 0.78625,
"grad_norm": 1.354697823524475,
"learning_rate": 9.855263157894737e-07,
"loss": 0.0452,
"step": 15725
},
{
"epoch": 0.7875,
"grad_norm": 0.9245623350143433,
"learning_rate": 9.797697368421052e-07,
"loss": 0.0536,
"step": 15750
},
{
"epoch": 0.78875,
"grad_norm": 1.3286716938018799,
"learning_rate": 9.740131578947369e-07,
"loss": 0.0402,
"step": 15775
},
{
"epoch": 0.79,
"grad_norm": 2.337540626525879,
"learning_rate": 9.682565789473684e-07,
"loss": 0.0619,
"step": 15800
},
{
"epoch": 0.79125,
"grad_norm": 1.3047797679901123,
"learning_rate": 9.624999999999999e-07,
"loss": 0.0582,
"step": 15825
},
{
"epoch": 0.7925,
"grad_norm": 1.5523693561553955,
"learning_rate": 9.567434210526314e-07,
"loss": 0.0461,
"step": 15850
},
{
"epoch": 0.79375,
"grad_norm": 0.8749285340309143,
"learning_rate": 9.50986842105263e-07,
"loss": 0.0458,
"step": 15875
},
{
"epoch": 0.795,
"grad_norm": 1.0452526807785034,
"learning_rate": 9.452302631578946e-07,
"loss": 0.0419,
"step": 15900
},
{
"epoch": 0.79625,
"grad_norm": 1.9379664659500122,
"learning_rate": 9.394736842105263e-07,
"loss": 0.0566,
"step": 15925
},
{
"epoch": 0.7975,
"grad_norm": 1.316031575202942,
"learning_rate": 9.337171052631578e-07,
"loss": 0.0473,
"step": 15950
},
{
"epoch": 0.79875,
"grad_norm": 1.216234564781189,
"learning_rate": 9.279605263157895e-07,
"loss": 0.0567,
"step": 15975
},
{
"epoch": 0.8,
"grad_norm": 1.7266921997070312,
"learning_rate": 9.22203947368421e-07,
"loss": 0.0632,
"step": 16000
},
{
"epoch": 0.8,
"eval_loss": 0.14084434509277344,
"eval_runtime": 535.4097,
"eval_samples_per_second": 3.158,
"eval_steps_per_second": 0.396,
"eval_wer": 6.681653553141001,
"step": 16000
},
{
"epoch": 0.80125,
"grad_norm": 1.8532096147537231,
"learning_rate": 9.164473684210526e-07,
"loss": 0.0579,
"step": 16025
},
{
"epoch": 0.8025,
"grad_norm": 2.181915044784546,
"learning_rate": 9.106907894736841e-07,
"loss": 0.0757,
"step": 16050
},
{
"epoch": 0.80375,
"grad_norm": 2.2596707344055176,
"learning_rate": 9.049342105263157e-07,
"loss": 0.0729,
"step": 16075
},
{
"epoch": 0.805,
"grad_norm": 1.2219024896621704,
"learning_rate": 8.991776315789473e-07,
"loss": 0.0666,
"step": 16100
},
{
"epoch": 0.80625,
"grad_norm": 1.135261058807373,
"learning_rate": 8.934210526315789e-07,
"loss": 0.0627,
"step": 16125
},
{
"epoch": 0.8075,
"grad_norm": 1.6599974632263184,
"learning_rate": 8.876644736842104e-07,
"loss": 0.0477,
"step": 16150
},
{
"epoch": 0.80875,
"grad_norm": 1.7189278602600098,
"learning_rate": 8.81907894736842e-07,
"loss": 0.049,
"step": 16175
},
{
"epoch": 0.81,
"grad_norm": 0.837539553642273,
"learning_rate": 8.761513157894735e-07,
"loss": 0.0489,
"step": 16200
},
{
"epoch": 0.81125,
"grad_norm": 1.5122978687286377,
"learning_rate": 8.703947368421051e-07,
"loss": 0.0389,
"step": 16225
},
{
"epoch": 0.8125,
"grad_norm": 1.7276921272277832,
"learning_rate": 8.646381578947368e-07,
"loss": 0.0449,
"step": 16250
},
{
"epoch": 0.81375,
"grad_norm": 2.028928756713867,
"learning_rate": 8.588815789473684e-07,
"loss": 0.045,
"step": 16275
},
{
"epoch": 0.815,
"grad_norm": 1.258401870727539,
"learning_rate": 8.53125e-07,
"loss": 0.0413,
"step": 16300
},
{
"epoch": 0.81625,
"grad_norm": 1.2878379821777344,
"learning_rate": 8.473684210526315e-07,
"loss": 0.0454,
"step": 16325
},
{
"epoch": 0.8175,
"grad_norm": 0.9309024810791016,
"learning_rate": 8.416118421052631e-07,
"loss": 0.0389,
"step": 16350
},
{
"epoch": 0.81875,
"grad_norm": 0.6321396231651306,
"learning_rate": 8.358552631578946e-07,
"loss": 0.0282,
"step": 16375
},
{
"epoch": 0.82,
"grad_norm": 1.8799151182174683,
"learning_rate": 8.300986842105262e-07,
"loss": 0.0316,
"step": 16400
},
{
"epoch": 0.82125,
"grad_norm": 0.642666220664978,
"learning_rate": 8.243421052631577e-07,
"loss": 0.0415,
"step": 16425
},
{
"epoch": 0.8225,
"grad_norm": 1.199803352355957,
"learning_rate": 8.185855263157894e-07,
"loss": 0.0503,
"step": 16450
},
{
"epoch": 0.82375,
"grad_norm": 1.517521858215332,
"learning_rate": 8.128289473684211e-07,
"loss": 0.0342,
"step": 16475
},
{
"epoch": 0.825,
"grad_norm": 1.683922290802002,
"learning_rate": 8.070723684210526e-07,
"loss": 0.0437,
"step": 16500
},
{
"epoch": 0.825,
"eval_loss": 0.1474502831697464,
"eval_runtime": 533.549,
"eval_samples_per_second": 3.169,
"eval_steps_per_second": 0.397,
"eval_wer": 6.594230048707381,
"step": 16500
},
{
"epoch": 0.82625,
"grad_norm": 2.0518248081207275,
"learning_rate": 8.013157894736842e-07,
"loss": 0.0411,
"step": 16525
},
{
"epoch": 0.8275,
"grad_norm": 1.139129638671875,
"learning_rate": 7.955592105263157e-07,
"loss": 0.0426,
"step": 16550
},
{
"epoch": 0.82875,
"grad_norm": 0.7436901926994324,
"learning_rate": 7.898026315789473e-07,
"loss": 0.0413,
"step": 16575
},
{
"epoch": 0.83,
"grad_norm": 0.8292795419692993,
"learning_rate": 7.840460526315789e-07,
"loss": 0.0456,
"step": 16600
},
{
"epoch": 0.83125,
"grad_norm": 2.60646390914917,
"learning_rate": 7.782894736842105e-07,
"loss": 0.0459,
"step": 16625
},
{
"epoch": 0.8325,
"grad_norm": 2.15118408203125,
"learning_rate": 7.72532894736842e-07,
"loss": 0.0653,
"step": 16650
},
{
"epoch": 0.83375,
"grad_norm": 1.8501421213150024,
"learning_rate": 7.667763157894736e-07,
"loss": 0.0702,
"step": 16675
},
{
"epoch": 0.835,
"grad_norm": 1.579913854598999,
"learning_rate": 7.610197368421051e-07,
"loss": 0.0693,
"step": 16700
},
{
"epoch": 0.83625,
"grad_norm": 2.4913477897644043,
"learning_rate": 7.552631578947367e-07,
"loss": 0.0874,
"step": 16725
},
{
"epoch": 0.8375,
"grad_norm": 2.489863634109497,
"learning_rate": 7.495065789473683e-07,
"loss": 0.0642,
"step": 16750
},
{
"epoch": 0.83875,
"grad_norm": 4.630337715148926,
"learning_rate": 7.4375e-07,
"loss": 0.0728,
"step": 16775
},
{
"epoch": 0.84,
"grad_norm": 1.708297848701477,
"learning_rate": 7.379934210526316e-07,
"loss": 0.056,
"step": 16800
},
{
"epoch": 0.84125,
"grad_norm": 1.7515946626663208,
"learning_rate": 7.322368421052631e-07,
"loss": 0.0477,
"step": 16825
},
{
"epoch": 0.8425,
"grad_norm": 1.6641236543655396,
"learning_rate": 7.264802631578947e-07,
"loss": 0.0508,
"step": 16850
},
{
"epoch": 0.84375,
"grad_norm": 1.693472146987915,
"learning_rate": 7.207236842105262e-07,
"loss": 0.0457,
"step": 16875
},
{
"epoch": 0.845,
"grad_norm": 0.845664381980896,
"learning_rate": 7.149671052631578e-07,
"loss": 0.0415,
"step": 16900
},
{
"epoch": 0.84625,
"grad_norm": 1.8824065923690796,
"learning_rate": 7.092105263157893e-07,
"loss": 0.0481,
"step": 16925
},
{
"epoch": 0.8475,
"grad_norm": 1.9034583568572998,
"learning_rate": 7.03453947368421e-07,
"loss": 0.0496,
"step": 16950
},
{
"epoch": 0.84875,
"grad_norm": 2.6840953826904297,
"learning_rate": 6.976973684210525e-07,
"loss": 0.0574,
"step": 16975
},
{
"epoch": 0.85,
"grad_norm": 1.8385533094406128,
"learning_rate": 6.919407894736842e-07,
"loss": 0.0478,
"step": 17000
},
{
"epoch": 0.85,
"eval_loss": 0.15727710723876953,
"eval_runtime": 534.9573,
"eval_samples_per_second": 3.161,
"eval_steps_per_second": 0.396,
"eval_wer": 6.794055201698514,
"step": 17000
},
{
"epoch": 0.85125,
"grad_norm": 1.590932011604309,
"learning_rate": 6.864144736842104e-07,
"loss": 0.0589,
"step": 17025
},
{
"epoch": 0.8525,
"grad_norm": 1.005034327507019,
"learning_rate": 6.806578947368419e-07,
"loss": 0.0554,
"step": 17050
},
{
"epoch": 0.85375,
"grad_norm": 3.3872015476226807,
"learning_rate": 6.749013157894737e-07,
"loss": 0.0572,
"step": 17075
},
{
"epoch": 0.855,
"grad_norm": 3.8093373775482178,
"learning_rate": 6.691447368421053e-07,
"loss": 0.1078,
"step": 17100
},
{
"epoch": 0.85625,
"grad_norm": 2.587963581085205,
"learning_rate": 6.633881578947368e-07,
"loss": 0.1426,
"step": 17125
},
{
"epoch": 0.8575,
"grad_norm": 3.9271957874298096,
"learning_rate": 6.576315789473684e-07,
"loss": 0.1496,
"step": 17150
},
{
"epoch": 0.85875,
"grad_norm": 3.7258965969085693,
"learning_rate": 6.518749999999999e-07,
"loss": 0.1852,
"step": 17175
},
{
"epoch": 0.86,
"grad_norm": 4.298374652862549,
"learning_rate": 6.461184210526315e-07,
"loss": 0.2419,
"step": 17200
},
{
"epoch": 0.86125,
"grad_norm": 6.419559478759766,
"learning_rate": 6.403618421052631e-07,
"loss": 0.225,
"step": 17225
},
{
"epoch": 0.8625,
"grad_norm": 4.669430732727051,
"learning_rate": 6.346052631578947e-07,
"loss": 0.297,
"step": 17250
},
{
"epoch": 0.86375,
"grad_norm": 4.676415920257568,
"learning_rate": 6.288486842105262e-07,
"loss": 0.2001,
"step": 17275
},
{
"epoch": 0.865,
"grad_norm": 1.519974708557129,
"learning_rate": 6.230921052631579e-07,
"loss": 0.1029,
"step": 17300
},
{
"epoch": 0.86625,
"grad_norm": 2.9553279876708984,
"learning_rate": 6.173355263157894e-07,
"loss": 0.0917,
"step": 17325
},
{
"epoch": 0.8675,
"grad_norm": 1.5657232999801636,
"learning_rate": 6.11578947368421e-07,
"loss": 0.088,
"step": 17350
},
{
"epoch": 0.86875,
"grad_norm": 3.1620709896087646,
"learning_rate": 6.058223684210525e-07,
"loss": 0.1046,
"step": 17375
},
{
"epoch": 0.87,
"grad_norm": 3.469240188598633,
"learning_rate": 6.000657894736842e-07,
"loss": 0.1004,
"step": 17400
},
{
"epoch": 0.87125,
"grad_norm": 1.9016904830932617,
"learning_rate": 5.943092105263158e-07,
"loss": 0.0875,
"step": 17425
},
{
"epoch": 0.8725,
"grad_norm": 3.1401467323303223,
"learning_rate": 5.885526315789473e-07,
"loss": 0.0593,
"step": 17450
},
{
"epoch": 0.87375,
"grad_norm": 1.1564242839813232,
"learning_rate": 5.827960526315789e-07,
"loss": 0.0444,
"step": 17475
},
{
"epoch": 0.875,
"grad_norm": 0.9873404502868652,
"learning_rate": 5.770394736842104e-07,
"loss": 0.0418,
"step": 17500
},
{
"epoch": 0.875,
"eval_loss": 0.156468465924263,
"eval_runtime": 534.8937,
"eval_samples_per_second": 3.161,
"eval_steps_per_second": 0.396,
"eval_wer": 6.650430872986138,
"step": 17500
},
{
"epoch": 0.87625,
"grad_norm": 1.499561071395874,
"learning_rate": 5.71282894736842e-07,
"loss": 0.0423,
"step": 17525
},
{
"epoch": 0.8775,
"grad_norm": 1.0905530452728271,
"learning_rate": 5.655263157894735e-07,
"loss": 0.0496,
"step": 17550
},
{
"epoch": 0.87875,
"grad_norm": 1.6048545837402344,
"learning_rate": 5.597697368421053e-07,
"loss": 0.0437,
"step": 17575
},
{
"epoch": 0.88,
"grad_norm": 1.5219619274139404,
"learning_rate": 5.540131578947369e-07,
"loss": 0.0676,
"step": 17600
},
{
"epoch": 0.88125,
"grad_norm": 1.8919825553894043,
"learning_rate": 5.482565789473684e-07,
"loss": 0.0647,
"step": 17625
},
{
"epoch": 0.8825,
"grad_norm": 2.4546618461608887,
"learning_rate": 5.425e-07,
"loss": 0.0625,
"step": 17650
},
{
"epoch": 0.88375,
"grad_norm": 1.7209670543670654,
"learning_rate": 5.367434210526315e-07,
"loss": 0.0661,
"step": 17675
},
{
"epoch": 0.885,
"grad_norm": 2.5535149574279785,
"learning_rate": 5.309868421052631e-07,
"loss": 0.0691,
"step": 17700
},
{
"epoch": 0.88625,
"grad_norm": 3.5450563430786133,
"learning_rate": 5.252302631578947e-07,
"loss": 0.0603,
"step": 17725
},
{
"epoch": 0.8875,
"grad_norm": 1.4123398065567017,
"learning_rate": 5.194736842105262e-07,
"loss": 0.0666,
"step": 17750
},
{
"epoch": 0.88875,
"grad_norm": 1.427933931350708,
"learning_rate": 5.137171052631578e-07,
"loss": 0.0428,
"step": 17775
},
{
"epoch": 0.89,
"grad_norm": 1.3647822141647339,
"learning_rate": 5.079605263157895e-07,
"loss": 0.0382,
"step": 17800
},
{
"epoch": 0.89125,
"grad_norm": 1.1601825952529907,
"learning_rate": 5.02203947368421e-07,
"loss": 0.0485,
"step": 17825
},
{
"epoch": 0.8925,
"grad_norm": 1.2409619092941284,
"learning_rate": 4.964473684210526e-07,
"loss": 0.0439,
"step": 17850
},
{
"epoch": 0.89375,
"grad_norm": 2.1224701404571533,
"learning_rate": 4.906907894736842e-07,
"loss": 0.0463,
"step": 17875
},
{
"epoch": 0.895,
"grad_norm": 1.7053598165512085,
"learning_rate": 4.849342105263158e-07,
"loss": 0.0419,
"step": 17900
},
{
"epoch": 0.89625,
"grad_norm": 1.2734942436218262,
"learning_rate": 4.791776315789473e-07,
"loss": 0.0385,
"step": 17925
},
{
"epoch": 0.8975,
"grad_norm": 1.448438048362732,
"learning_rate": 4.734210526315789e-07,
"loss": 0.0397,
"step": 17950
},
{
"epoch": 0.89875,
"grad_norm": 1.1724251508712769,
"learning_rate": 4.6766447368421047e-07,
"loss": 0.0454,
"step": 17975
},
{
"epoch": 0.9,
"grad_norm": 1.4745044708251953,
"learning_rate": 4.6190789473684203e-07,
"loss": 0.0382,
"step": 18000
},
{
"epoch": 0.9,
"eval_loss": 0.15590737760066986,
"eval_runtime": 533.3825,
"eval_samples_per_second": 3.17,
"eval_steps_per_second": 0.397,
"eval_wer": 6.563007368552516,
"step": 18000
},
{
"epoch": 0.90125,
"grad_norm": 1.3046791553497314,
"learning_rate": 4.5615131578947364e-07,
"loss": 0.0497,
"step": 18025
},
{
"epoch": 0.9025,
"grad_norm": 2.052855968475342,
"learning_rate": 4.5039473684210525e-07,
"loss": 0.0542,
"step": 18050
},
{
"epoch": 0.90375,
"grad_norm": 1.6691333055496216,
"learning_rate": 4.446381578947368e-07,
"loss": 0.0503,
"step": 18075
},
{
"epoch": 0.905,
"grad_norm": 3.4304769039154053,
"learning_rate": 4.3888157894736837e-07,
"loss": 0.0572,
"step": 18100
},
{
"epoch": 0.90625,
"grad_norm": 1.5289900302886963,
"learning_rate": 4.33125e-07,
"loss": 0.07,
"step": 18125
},
{
"epoch": 0.9075,
"grad_norm": 2.5705385208129883,
"learning_rate": 4.2736842105263154e-07,
"loss": 0.0778,
"step": 18150
},
{
"epoch": 0.90875,
"grad_norm": 2.34914493560791,
"learning_rate": 4.216118421052631e-07,
"loss": 0.0739,
"step": 18175
},
{
"epoch": 0.91,
"grad_norm": 2.6740806102752686,
"learning_rate": 4.158552631578947e-07,
"loss": 0.0621,
"step": 18200
},
{
"epoch": 0.91125,
"grad_norm": 0.931742787361145,
"learning_rate": 4.1009868421052627e-07,
"loss": 0.0636,
"step": 18225
},
{
"epoch": 0.9125,
"grad_norm": 1.7513364553451538,
"learning_rate": 4.0434210526315783e-07,
"loss": 0.0526,
"step": 18250
},
{
"epoch": 0.91375,
"grad_norm": 1.3136606216430664,
"learning_rate": 3.985855263157894e-07,
"loss": 0.0469,
"step": 18275
},
{
"epoch": 0.915,
"grad_norm": 1.2674484252929688,
"learning_rate": 3.9282894736842105e-07,
"loss": 0.0569,
"step": 18300
},
{
"epoch": 0.91625,
"grad_norm": 2.0879714488983154,
"learning_rate": 3.870723684210526e-07,
"loss": 0.056,
"step": 18325
},
{
"epoch": 0.9175,
"grad_norm": 1.6177654266357422,
"learning_rate": 3.8131578947368417e-07,
"loss": 0.0496,
"step": 18350
},
{
"epoch": 0.91875,
"grad_norm": 1.267562985420227,
"learning_rate": 3.755592105263158e-07,
"loss": 0.0474,
"step": 18375
},
{
"epoch": 0.92,
"grad_norm": 1.9628887176513672,
"learning_rate": 3.6980263157894734e-07,
"loss": 0.0523,
"step": 18400
},
{
"epoch": 0.92125,
"grad_norm": 2.450678586959839,
"learning_rate": 3.640460526315789e-07,
"loss": 0.0571,
"step": 18425
},
{
"epoch": 0.9225,
"grad_norm": 3.2376692295074463,
"learning_rate": 3.5828947368421046e-07,
"loss": 0.0726,
"step": 18450
},
{
"epoch": 0.92375,
"grad_norm": 1.718723177909851,
"learning_rate": 3.5253289473684207e-07,
"loss": 0.0759,
"step": 18475
},
{
"epoch": 0.925,
"grad_norm": 1.7278677225112915,
"learning_rate": 3.4677631578947363e-07,
"loss": 0.0658,
"step": 18500
},
{
"epoch": 0.925,
"eval_loss": 0.145228311419487,
"eval_runtime": 537.5559,
"eval_samples_per_second": 3.146,
"eval_steps_per_second": 0.394,
"eval_wer": 6.563007368552516,
"step": 18500
},
{
"epoch": 0.92625,
"grad_norm": 2.314218044281006,
"learning_rate": 3.410197368421052e-07,
"loss": 0.083,
"step": 18525
},
{
"epoch": 0.9275,
"grad_norm": 2.6032817363739014,
"learning_rate": 3.3526315789473685e-07,
"loss": 0.0796,
"step": 18550
},
{
"epoch": 0.92875,
"grad_norm": 1.2821646928787231,
"learning_rate": 3.295065789473684e-07,
"loss": 0.0475,
"step": 18575
},
{
"epoch": 0.93,
"grad_norm": 1.2048566341400146,
"learning_rate": 3.2374999999999997e-07,
"loss": 0.0441,
"step": 18600
},
{
"epoch": 0.93125,
"grad_norm": 2.205629348754883,
"learning_rate": 3.179934210526316e-07,
"loss": 0.0529,
"step": 18625
},
{
"epoch": 0.9325,
"grad_norm": 0.948354959487915,
"learning_rate": 3.1223684210526314e-07,
"loss": 0.0491,
"step": 18650
},
{
"epoch": 0.93375,
"grad_norm": 0.8600139617919922,
"learning_rate": 3.064802631578947e-07,
"loss": 0.0397,
"step": 18675
},
{
"epoch": 0.935,
"grad_norm": 1.5570470094680786,
"learning_rate": 3.0072368421052626e-07,
"loss": 0.0479,
"step": 18700
},
{
"epoch": 0.93625,
"grad_norm": 1.6737167835235596,
"learning_rate": 2.9496710526315787e-07,
"loss": 0.053,
"step": 18725
},
{
"epoch": 0.9375,
"grad_norm": 1.7217572927474976,
"learning_rate": 2.8921052631578943e-07,
"loss": 0.0551,
"step": 18750
},
{
"epoch": 0.93875,
"grad_norm": 2.207542896270752,
"learning_rate": 2.83453947368421e-07,
"loss": 0.0618,
"step": 18775
},
{
"epoch": 0.94,
"grad_norm": 1.6761177778244019,
"learning_rate": 2.7769736842105265e-07,
"loss": 0.0652,
"step": 18800
},
{
"epoch": 0.94125,
"grad_norm": 2.9946813583374023,
"learning_rate": 2.719407894736842e-07,
"loss": 0.0738,
"step": 18825
},
{
"epoch": 0.9425,
"grad_norm": 2.3663125038146973,
"learning_rate": 2.6618421052631577e-07,
"loss": 0.0629,
"step": 18850
},
{
"epoch": 0.94375,
"grad_norm": 2.5888278484344482,
"learning_rate": 2.6042763157894733e-07,
"loss": 0.2007,
"step": 18875
},
{
"epoch": 0.945,
"grad_norm": 1.5639821290969849,
"learning_rate": 2.5467105263157894e-07,
"loss": 0.2028,
"step": 18900
},
{
"epoch": 0.94625,
"grad_norm": 1.5892317295074463,
"learning_rate": 2.489144736842105e-07,
"loss": 0.1062,
"step": 18925
},
{
"epoch": 0.9475,
"grad_norm": 2.5038766860961914,
"learning_rate": 2.431578947368421e-07,
"loss": 0.06,
"step": 18950
},
{
"epoch": 0.94875,
"grad_norm": 2.531886577606201,
"learning_rate": 2.3740131578947364e-07,
"loss": 0.0553,
"step": 18975
},
{
"epoch": 0.95,
"grad_norm": 2.5510354042053223,
"learning_rate": 2.3164473684210526e-07,
"loss": 0.0531,
"step": 19000
},
{
"epoch": 0.95,
"eval_loss": 0.15760228037834167,
"eval_runtime": 534.6809,
"eval_samples_per_second": 3.163,
"eval_steps_per_second": 0.396,
"eval_wer": 6.662919945048083,
"step": 19000
},
{
"epoch": 0.95125,
"grad_norm": 3.058936357498169,
"learning_rate": 2.2588815789473684e-07,
"loss": 0.061,
"step": 19025
},
{
"epoch": 0.9525,
"grad_norm": 4.343925476074219,
"learning_rate": 2.201315789473684e-07,
"loss": 0.0775,
"step": 19050
},
{
"epoch": 0.95375,
"grad_norm": 3.271355628967285,
"learning_rate": 2.1437499999999999e-07,
"loss": 0.0762,
"step": 19075
},
{
"epoch": 0.955,
"grad_norm": 1.7924737930297852,
"learning_rate": 2.0861842105263154e-07,
"loss": 0.0733,
"step": 19100
},
{
"epoch": 0.95625,
"grad_norm": 2.034940719604492,
"learning_rate": 2.0286184210526313e-07,
"loss": 0.0747,
"step": 19125
},
{
"epoch": 0.9575,
"grad_norm": 3.0561563968658447,
"learning_rate": 1.9733552631578946e-07,
"loss": 0.0841,
"step": 19150
},
{
"epoch": 0.95875,
"grad_norm": 1.5333133935928345,
"learning_rate": 1.9157894736842102e-07,
"loss": 0.0656,
"step": 19175
},
{
"epoch": 0.96,
"grad_norm": 1.5307198762893677,
"learning_rate": 1.858223684210526e-07,
"loss": 0.0532,
"step": 19200
},
{
"epoch": 0.96125,
"grad_norm": 1.5663795471191406,
"learning_rate": 1.8006578947368422e-07,
"loss": 0.0485,
"step": 19225
},
{
"epoch": 0.9625,
"grad_norm": 1.8204154968261719,
"learning_rate": 1.7430921052631578e-07,
"loss": 0.0506,
"step": 19250
},
{
"epoch": 0.96375,
"grad_norm": 0.6307218074798584,
"learning_rate": 1.6855263157894736e-07,
"loss": 0.0412,
"step": 19275
},
{
"epoch": 0.965,
"grad_norm": 1.2638368606567383,
"learning_rate": 1.6279605263157892e-07,
"loss": 0.0367,
"step": 19300
},
{
"epoch": 0.96625,
"grad_norm": 1.448020100593567,
"learning_rate": 1.570394736842105e-07,
"loss": 0.0461,
"step": 19325
},
{
"epoch": 0.9675,
"grad_norm": 1.148501992225647,
"learning_rate": 1.5128289473684207e-07,
"loss": 0.0321,
"step": 19350
},
{
"epoch": 0.96875,
"grad_norm": 1.24919593334198,
"learning_rate": 1.4552631578947368e-07,
"loss": 0.0442,
"step": 19375
},
{
"epoch": 0.97,
"grad_norm": 1.437836766242981,
"learning_rate": 1.3976973684210526e-07,
"loss": 0.0418,
"step": 19400
},
{
"epoch": 0.97125,
"grad_norm": 1.7176451683044434,
"learning_rate": 1.3401315789473682e-07,
"loss": 0.0423,
"step": 19425
},
{
"epoch": 0.9725,
"grad_norm": 0.6188969016075134,
"learning_rate": 1.282565789473684e-07,
"loss": 0.0372,
"step": 19450
},
{
"epoch": 0.97375,
"grad_norm": 0.7245228886604309,
"learning_rate": 1.225e-07,
"loss": 0.0447,
"step": 19475
},
{
"epoch": 0.975,
"grad_norm": 1.1836830377578735,
"learning_rate": 1.1674342105263156e-07,
"loss": 0.0416,
"step": 19500
},
{
"epoch": 0.975,
"eval_loss": 0.1550171822309494,
"eval_runtime": 533.135,
"eval_samples_per_second": 3.172,
"eval_steps_per_second": 0.398,
"eval_wer": 6.544273760459599,
"step": 19500
},
{
"epoch": 0.97625,
"grad_norm": 2.7617335319519043,
"learning_rate": 1.1098684210526315e-07,
"loss": 0.0481,
"step": 19525
},
{
"epoch": 0.9775,
"grad_norm": 1.8646786212921143,
"learning_rate": 1.0523026315789472e-07,
"loss": 0.0479,
"step": 19550
},
{
"epoch": 0.97875,
"grad_norm": 3.3118820190429688,
"learning_rate": 9.947368421052632e-08,
"loss": 0.0622,
"step": 19575
},
{
"epoch": 0.98,
"grad_norm": 1.3400448560714722,
"learning_rate": 9.371710526315789e-08,
"loss": 0.0727,
"step": 19600
},
{
"epoch": 0.98125,
"grad_norm": 3.044895648956299,
"learning_rate": 8.796052631578946e-08,
"loss": 0.0613,
"step": 19625
},
{
"epoch": 0.9825,
"grad_norm": 3.217283248901367,
"learning_rate": 8.220394736842105e-08,
"loss": 0.0814,
"step": 19650
},
{
"epoch": 0.98375,
"grad_norm": 1.3824083805084229,
"learning_rate": 7.644736842105262e-08,
"loss": 0.0598,
"step": 19675
},
{
"epoch": 0.985,
"grad_norm": 1.3852965831756592,
"learning_rate": 7.069078947368419e-08,
"loss": 0.0504,
"step": 19700
},
{
"epoch": 0.98625,
"grad_norm": 2.5811800956726074,
"learning_rate": 6.493421052631578e-08,
"loss": 0.0643,
"step": 19725
},
{
"epoch": 0.9875,
"grad_norm": 1.2770925760269165,
"learning_rate": 5.9177631578947364e-08,
"loss": 0.0586,
"step": 19750
},
{
"epoch": 0.98875,
"grad_norm": 2.8050851821899414,
"learning_rate": 5.342105263157894e-08,
"loss": 0.0623,
"step": 19775
},
{
"epoch": 0.99,
"grad_norm": 3.1270270347595215,
"learning_rate": 4.766447368421052e-08,
"loss": 0.1128,
"step": 19800
},
{
"epoch": 0.99125,
"grad_norm": 2.787506103515625,
"learning_rate": 4.1907894736842107e-08,
"loss": 0.0945,
"step": 19825
},
{
"epoch": 0.9925,
"grad_norm": 2.0053322315216064,
"learning_rate": 3.615131578947368e-08,
"loss": 0.0676,
"step": 19850
},
{
"epoch": 0.99375,
"grad_norm": 2.319840669631958,
"learning_rate": 3.0394736842105264e-08,
"loss": 0.0434,
"step": 19875
},
{
"epoch": 0.995,
"grad_norm": 1.3756728172302246,
"learning_rate": 2.463815789473684e-08,
"loss": 0.0458,
"step": 19900
},
{
"epoch": 0.99625,
"grad_norm": 1.3499048948287964,
"learning_rate": 1.8881578947368418e-08,
"loss": 0.0426,
"step": 19925
},
{
"epoch": 0.9975,
"grad_norm": 1.0059881210327148,
"learning_rate": 1.3124999999999998e-08,
"loss": 0.0543,
"step": 19950
},
{
"epoch": 0.99875,
"grad_norm": 1.247534155845642,
"learning_rate": 7.368421052631579e-09,
"loss": 0.0526,
"step": 19975
},
{
"epoch": 1.0,
"grad_norm": 1.3380235433578491,
"learning_rate": 1.6118421052631579e-09,
"loss": 0.0435,
"step": 20000
},
{
"epoch": 1.0,
"eval_loss": 0.15491345524787903,
"eval_runtime": 532.2586,
"eval_samples_per_second": 3.177,
"eval_steps_per_second": 0.398,
"eval_wer": 6.544273760459599,
"step": 20000
},
{
"epoch": 1.0,
"step": 20000,
"total_flos": 1.0871994580992e+21,
"train_loss": 0.0028733723163604737,
"train_runtime": 7510.0544,
"train_samples_per_second": 42.61,
"train_steps_per_second": 2.663
}
],
"logging_steps": 25,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0871994580992e+21,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}