{ "best_metric": 6.544273760459599, "best_model_checkpoint": "./checkpoint-19500", "epoch": 1.0, "eval_steps": 500, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00125, "grad_norm": 10.480854034423828, "learning_rate": 1.0499999999999999e-07, "loss": 1.4117, "step": 25 }, { "epoch": 0.0025, "grad_norm": 9.908479690551758, "learning_rate": 2.1437499999999999e-07, "loss": 1.4142, "step": 50 }, { "epoch": 0.00375, "grad_norm": 7.737735748291016, "learning_rate": 3.2374999999999997e-07, "loss": 1.2755, "step": 75 }, { "epoch": 0.005, "grad_norm": 5.847314834594727, "learning_rate": 4.33125e-07, "loss": 0.8497, "step": 100 }, { "epoch": 0.00625, "grad_norm": 5.064878463745117, "learning_rate": 5.425e-07, "loss": 0.5289, "step": 125 }, { "epoch": 0.0075, "grad_norm": 4.789758205413818, "learning_rate": 6.518749999999999e-07, "loss": 0.5181, "step": 150 }, { "epoch": 0.00875, "grad_norm": 4.744896411895752, "learning_rate": 7.612499999999999e-07, "loss": 0.3765, "step": 175 }, { "epoch": 0.01, "grad_norm": 4.427361965179443, "learning_rate": 8.706249999999999e-07, "loss": 0.3585, "step": 200 }, { "epoch": 0.01125, "grad_norm": 4.670985698699951, "learning_rate": 9.8e-07, "loss": 0.3219, "step": 225 }, { "epoch": 0.0125, "grad_norm": 3.8691747188568115, "learning_rate": 1.0893749999999998e-06, "loss": 0.3409, "step": 250 }, { "epoch": 0.01375, "grad_norm": 4.622318267822266, "learning_rate": 1.19875e-06, "loss": 0.2899, "step": 275 }, { "epoch": 0.015, "grad_norm": 3.7900593280792236, "learning_rate": 1.3081249999999999e-06, "loss": 0.2834, "step": 300 }, { "epoch": 0.01625, "grad_norm": 3.9323770999908447, "learning_rate": 1.4175e-06, "loss": 0.2643, "step": 325 }, { "epoch": 0.0175, "grad_norm": 3.708969831466675, "learning_rate": 1.5268749999999997e-06, "loss": 0.2827, "step": 350 }, { "epoch": 0.01875, "grad_norm": 3.707580804824829, "learning_rate": 1.6362499999999998e-06, "loss": 0.274, "step": 375 }, { "epoch": 0.02, "grad_norm": 3.8519480228424072, "learning_rate": 1.745625e-06, "loss": 0.2568, "step": 400 }, { "epoch": 0.02125, "grad_norm": 4.571149826049805, "learning_rate": 1.8549999999999998e-06, "loss": 0.2376, "step": 425 }, { "epoch": 0.0225, "grad_norm": 3.2112503051757812, "learning_rate": 1.9643749999999997e-06, "loss": 0.2212, "step": 450 }, { "epoch": 0.02375, "grad_norm": 6.173221588134766, "learning_rate": 2.07375e-06, "loss": 0.2238, "step": 475 }, { "epoch": 0.025, "grad_norm": 4.781201362609863, "learning_rate": 2.183125e-06, "loss": 0.2854, "step": 500 }, { "epoch": 0.025, "eval_loss": 0.41938766837120056, "eval_runtime": 531.5621, "eval_samples_per_second": 3.181, "eval_steps_per_second": 0.399, "eval_wer": 25.88984638441364, "step": 500 }, { "epoch": 0.02625, "grad_norm": 4.559605598449707, "learning_rate": 2.2925e-06, "loss": 0.2996, "step": 525 }, { "epoch": 0.0275, "grad_norm": 5.1446852684021, "learning_rate": 2.401875e-06, "loss": 0.2791, "step": 550 }, { "epoch": 0.02875, "grad_norm": 4.178796768188477, "learning_rate": 2.5112499999999995e-06, "loss": 0.3061, "step": 575 }, { "epoch": 0.03, "grad_norm": 4.030816555023193, "learning_rate": 2.6206249999999996e-06, "loss": 0.2768, "step": 600 }, { "epoch": 0.03125, "grad_norm": 4.405904769897461, "learning_rate": 2.7299999999999997e-06, "loss": 0.2678, "step": 625 }, { "epoch": 0.0325, "grad_norm": 3.5179622173309326, "learning_rate": 2.839375e-06, "loss": 0.2363, "step": 650 }, { "epoch": 0.03375, "grad_norm": 3.767529010772705, "learning_rate": 2.94875e-06, "loss": 0.219, "step": 675 }, { "epoch": 0.035, "grad_norm": 2.898439645767212, "learning_rate": 3.0581249999999996e-06, "loss": 0.1913, "step": 700 }, { "epoch": 0.03625, "grad_norm": 3.3581595420837402, "learning_rate": 3.1674999999999997e-06, "loss": 0.173, "step": 725 }, { "epoch": 0.0375, "grad_norm": 2.872340202331543, "learning_rate": 3.276875e-06, "loss": 0.176, "step": 750 }, { "epoch": 0.03875, "grad_norm": 3.1199734210968018, "learning_rate": 3.38625e-06, "loss": 0.1808, "step": 775 }, { "epoch": 0.04, "grad_norm": 2.626908779144287, "learning_rate": 3.495625e-06, "loss": 0.1802, "step": 800 }, { "epoch": 0.04125, "grad_norm": 2.776141881942749, "learning_rate": 3.6049999999999994e-06, "loss": 0.1761, "step": 825 }, { "epoch": 0.0425, "grad_norm": 3.149322509765625, "learning_rate": 3.7143749999999995e-06, "loss": 0.1569, "step": 850 }, { "epoch": 0.04375, "grad_norm": 2.776301145553589, "learning_rate": 3.82375e-06, "loss": 0.1607, "step": 875 }, { "epoch": 0.045, "grad_norm": 2.8884706497192383, "learning_rate": 3.933125e-06, "loss": 0.1582, "step": 900 }, { "epoch": 0.04625, "grad_norm": 3.991647720336914, "learning_rate": 4.0425e-06, "loss": 0.1511, "step": 925 }, { "epoch": 0.0475, "grad_norm": 2.892364740371704, "learning_rate": 4.151874999999999e-06, "loss": 0.1434, "step": 950 }, { "epoch": 0.04875, "grad_norm": 3.7825698852539062, "learning_rate": 4.261249999999999e-06, "loss": 0.148, "step": 975 }, { "epoch": 0.05, "grad_norm": 2.511293649673462, "learning_rate": 4.370624999999999e-06, "loss": 0.1425, "step": 1000 }, { "epoch": 0.05, "eval_loss": 0.39225178956985474, "eval_runtime": 528.4404, "eval_samples_per_second": 3.2, "eval_steps_per_second": 0.401, "eval_wer": 20.507056325715002, "step": 1000 }, { "epoch": 0.05125, "grad_norm": 1.8921111822128296, "learning_rate": 4.369473684210526e-06, "loss": 0.1395, "step": 1025 }, { "epoch": 0.0525, "grad_norm": 3.18829607963562, "learning_rate": 4.363717105263158e-06, "loss": 0.1294, "step": 1050 }, { "epoch": 0.05375, "grad_norm": 2.508878231048584, "learning_rate": 4.357960526315789e-06, "loss": 0.1366, "step": 1075 }, { "epoch": 0.055, "grad_norm": 2.201958179473877, "learning_rate": 4.352203947368421e-06, "loss": 0.14, "step": 1100 }, { "epoch": 0.05625, "grad_norm": 2.756673574447632, "learning_rate": 4.346447368421052e-06, "loss": 0.1355, "step": 1125 }, { "epoch": 0.0575, "grad_norm": 3.084169864654541, "learning_rate": 4.340690789473684e-06, "loss": 0.1278, "step": 1150 }, { "epoch": 0.05875, "grad_norm": 2.486377239227295, "learning_rate": 4.334934210526315e-06, "loss": 0.1298, "step": 1175 }, { "epoch": 0.06, "grad_norm": 3.5559706687927246, "learning_rate": 4.329177631578947e-06, "loss": 0.1352, "step": 1200 }, { "epoch": 0.06125, "grad_norm": 2.6353018283843994, "learning_rate": 4.323421052631579e-06, "loss": 0.1163, "step": 1225 }, { "epoch": 0.0625, "grad_norm": 2.8629567623138428, "learning_rate": 4.31766447368421e-06, "loss": 0.1199, "step": 1250 }, { "epoch": 0.06375, "grad_norm": 2.9020206928253174, "learning_rate": 4.311907894736842e-06, "loss": 0.1206, "step": 1275 }, { "epoch": 0.065, "grad_norm": 2.4626991748809814, "learning_rate": 4.306151315789473e-06, "loss": 0.1395, "step": 1300 }, { "epoch": 0.06625, "grad_norm": 2.9234840869903564, "learning_rate": 4.300394736842105e-06, "loss": 0.1269, "step": 1325 }, { "epoch": 0.0675, "grad_norm": 3.017625570297241, "learning_rate": 4.294638157894737e-06, "loss": 0.1228, "step": 1350 }, { "epoch": 0.06875, "grad_norm": 2.5392937660217285, "learning_rate": 4.288881578947368e-06, "loss": 0.1273, "step": 1375 }, { "epoch": 0.07, "grad_norm": 3.2523694038391113, "learning_rate": 4.283125e-06, "loss": 0.1242, "step": 1400 }, { "epoch": 0.07125, "grad_norm": 2.633652448654175, "learning_rate": 4.277368421052632e-06, "loss": 0.1341, "step": 1425 }, { "epoch": 0.0725, "grad_norm": 3.952681064605713, "learning_rate": 4.271611842105263e-06, "loss": 0.1588, "step": 1450 }, { "epoch": 0.07375, "grad_norm": 3.9815685749053955, "learning_rate": 4.265855263157895e-06, "loss": 0.1879, "step": 1475 }, { "epoch": 0.075, "grad_norm": 3.197030544281006, "learning_rate": 4.260098684210526e-06, "loss": 0.2199, "step": 1500 }, { "epoch": 0.075, "eval_loss": 0.3290639817714691, "eval_runtime": 535.231, "eval_samples_per_second": 3.159, "eval_steps_per_second": 0.396, "eval_wer": 17.478456350693143, "step": 1500 }, { "epoch": 0.07625, "grad_norm": 3.8294057846069336, "learning_rate": 4.254342105263158e-06, "loss": 0.205, "step": 1525 }, { "epoch": 0.0775, "grad_norm": 3.339564085006714, "learning_rate": 4.248585526315789e-06, "loss": 0.1793, "step": 1550 }, { "epoch": 0.07875, "grad_norm": 4.41719913482666, "learning_rate": 4.242828947368421e-06, "loss": 0.1903, "step": 1575 }, { "epoch": 0.08, "grad_norm": 4.329945087432861, "learning_rate": 4.237072368421052e-06, "loss": 0.2487, "step": 1600 }, { "epoch": 0.08125, "grad_norm": 2.858635425567627, "learning_rate": 4.231315789473684e-06, "loss": 0.16, "step": 1625 }, { "epoch": 0.0825, "grad_norm": 2.6474554538726807, "learning_rate": 4.225559210526316e-06, "loss": 0.1294, "step": 1650 }, { "epoch": 0.08375, "grad_norm": 2.6311450004577637, "learning_rate": 4.219802631578947e-06, "loss": 0.1199, "step": 1675 }, { "epoch": 0.085, "grad_norm": 2.472925901412964, "learning_rate": 4.214046052631579e-06, "loss": 0.1106, "step": 1700 }, { "epoch": 0.08625, "grad_norm": 2.1684815883636475, "learning_rate": 4.20828947368421e-06, "loss": 0.1081, "step": 1725 }, { "epoch": 0.0875, "grad_norm": 2.2405142784118652, "learning_rate": 4.202532894736842e-06, "loss": 0.1024, "step": 1750 }, { "epoch": 0.08875, "grad_norm": 3.28480863571167, "learning_rate": 4.196776315789474e-06, "loss": 0.1395, "step": 1775 }, { "epoch": 0.09, "grad_norm": 2.734311819076538, "learning_rate": 4.191019736842105e-06, "loss": 0.1685, "step": 1800 }, { "epoch": 0.09125, "grad_norm": 3.3384852409362793, "learning_rate": 4.185263157894737e-06, "loss": 0.179, "step": 1825 }, { "epoch": 0.0925, "grad_norm": 4.151054859161377, "learning_rate": 4.1795065789473686e-06, "loss": 0.1863, "step": 1850 }, { "epoch": 0.09375, "grad_norm": 3.854214668273926, "learning_rate": 4.17375e-06, "loss": 0.1753, "step": 1875 }, { "epoch": 0.095, "grad_norm": 3.3321709632873535, "learning_rate": 4.1679934210526316e-06, "loss": 0.1684, "step": 1900 }, { "epoch": 0.09625, "grad_norm": 2.8302998542785645, "learning_rate": 4.162236842105263e-06, "loss": 0.1818, "step": 1925 }, { "epoch": 0.0975, "grad_norm": 5.4296555519104, "learning_rate": 4.156480263157895e-06, "loss": 0.5523, "step": 1950 }, { "epoch": 0.09875, "grad_norm": 3.8675997257232666, "learning_rate": 4.1507236842105265e-06, "loss": 0.3352, "step": 1975 }, { "epoch": 0.1, "grad_norm": 3.9055581092834473, "learning_rate": 4.144967105263158e-06, "loss": 0.2343, "step": 2000 }, { "epoch": 0.1, "eval_loss": 0.2860749065876007, "eval_runtime": 530.1633, "eval_samples_per_second": 3.19, "eval_steps_per_second": 0.4, "eval_wer": 14.13138503809167, "step": 2000 }, { "epoch": 0.10125, "grad_norm": 3.413243293762207, "learning_rate": 4.1392105263157895e-06, "loss": 0.2233, "step": 2025 }, { "epoch": 0.1025, "grad_norm": 3.1129419803619385, "learning_rate": 4.133453947368421e-06, "loss": 0.2076, "step": 2050 }, { "epoch": 0.10375, "grad_norm": 3.0855767726898193, "learning_rate": 4.1276973684210525e-06, "loss": 0.1675, "step": 2075 }, { "epoch": 0.105, "grad_norm": 2.5053539276123047, "learning_rate": 4.121940789473684e-06, "loss": 0.1291, "step": 2100 }, { "epoch": 0.10625, "grad_norm": 2.078958511352539, "learning_rate": 4.1161842105263155e-06, "loss": 0.1036, "step": 2125 }, { "epoch": 0.1075, "grad_norm": 2.436898708343506, "learning_rate": 4.110427631578947e-06, "loss": 0.1153, "step": 2150 }, { "epoch": 0.10875, "grad_norm": 2.3834900856018066, "learning_rate": 4.1046710526315786e-06, "loss": 0.0943, "step": 2175 }, { "epoch": 0.11, "grad_norm": 2.070406913757324, "learning_rate": 4.09891447368421e-06, "loss": 0.0898, "step": 2200 }, { "epoch": 0.11125, "grad_norm": 2.0004026889801025, "learning_rate": 4.0931578947368416e-06, "loss": 0.0912, "step": 2225 }, { "epoch": 0.1125, "grad_norm": 2.4464359283447266, "learning_rate": 4.0874013157894735e-06, "loss": 0.0907, "step": 2250 }, { "epoch": 0.11375, "grad_norm": 2.8847742080688477, "learning_rate": 4.081644736842105e-06, "loss": 0.0978, "step": 2275 }, { "epoch": 0.115, "grad_norm": 2.167893171310425, "learning_rate": 4.0758881578947365e-06, "loss": 0.0968, "step": 2300 }, { "epoch": 0.11625, "grad_norm": 1.575804352760315, "learning_rate": 4.0701315789473684e-06, "loss": 0.0976, "step": 2325 }, { "epoch": 0.1175, "grad_norm": 2.3064370155334473, "learning_rate": 4.0643749999999995e-06, "loss": 0.0966, "step": 2350 }, { "epoch": 0.11875, "grad_norm": 1.9859708547592163, "learning_rate": 4.0586184210526314e-06, "loss": 0.0958, "step": 2375 }, { "epoch": 0.12, "grad_norm": 1.7534265518188477, "learning_rate": 4.052861842105263e-06, "loss": 0.0966, "step": 2400 }, { "epoch": 0.12125, "grad_norm": 3.8807549476623535, "learning_rate": 4.0471052631578945e-06, "loss": 0.1367, "step": 2425 }, { "epoch": 0.1225, "grad_norm": 3.933382987976074, "learning_rate": 4.041348684210526e-06, "loss": 0.1445, "step": 2450 }, { "epoch": 0.12375, "grad_norm": 3.3107643127441406, "learning_rate": 4.0355921052631575e-06, "loss": 0.1486, "step": 2475 }, { "epoch": 0.125, "grad_norm": 2.699190139770508, "learning_rate": 4.029835526315789e-06, "loss": 0.1391, "step": 2500 }, { "epoch": 0.125, "eval_loss": 0.2906411290168762, "eval_runtime": 531.8376, "eval_samples_per_second": 3.18, "eval_steps_per_second": 0.399, "eval_wer": 13.31335081803422, "step": 2500 }, { "epoch": 0.12625, "grad_norm": 3.5841128826141357, "learning_rate": 4.0240789473684205e-06, "loss": 0.1628, "step": 2525 }, { "epoch": 0.1275, "grad_norm": 3.2463104724884033, "learning_rate": 4.018322368421052e-06, "loss": 0.1705, "step": 2550 }, { "epoch": 0.12875, "grad_norm": 5.094871520996094, "learning_rate": 4.0125657894736835e-06, "loss": 0.1663, "step": 2575 }, { "epoch": 0.13, "grad_norm": 3.0208804607391357, "learning_rate": 4.0068092105263154e-06, "loss": 0.1712, "step": 2600 }, { "epoch": 0.13125, "grad_norm": 3.4919967651367188, "learning_rate": 4.0010526315789465e-06, "loss": 0.1498, "step": 2625 }, { "epoch": 0.1325, "grad_norm": 3.4352219104766846, "learning_rate": 3.9952960526315784e-06, "loss": 0.1423, "step": 2650 }, { "epoch": 0.13375, "grad_norm": 3.63608455657959, "learning_rate": 3.98953947368421e-06, "loss": 0.1565, "step": 2675 }, { "epoch": 0.135, "grad_norm": 3.6155622005462646, "learning_rate": 3.9837828947368414e-06, "loss": 0.1553, "step": 2700 }, { "epoch": 0.13625, "grad_norm": 3.4833076000213623, "learning_rate": 3.978026315789473e-06, "loss": 0.1416, "step": 2725 }, { "epoch": 0.1375, "grad_norm": 3.147080421447754, "learning_rate": 3.9722697368421045e-06, "loss": 0.1469, "step": 2750 }, { "epoch": 0.13875, "grad_norm": 2.891146659851074, "learning_rate": 3.966513157894736e-06, "loss": 0.128, "step": 2775 }, { "epoch": 0.14, "grad_norm": 3.1411890983581543, "learning_rate": 3.960756578947368e-06, "loss": 0.1372, "step": 2800 }, { "epoch": 0.14125, "grad_norm": 3.836360216140747, "learning_rate": 3.954999999999999e-06, "loss": 0.149, "step": 2825 }, { "epoch": 0.1425, "grad_norm": 4.1377339363098145, "learning_rate": 3.949243421052631e-06, "loss": 0.1383, "step": 2850 }, { "epoch": 0.14375, "grad_norm": 3.7640268802642822, "learning_rate": 3.943486842105263e-06, "loss": 0.1492, "step": 2875 }, { "epoch": 0.145, "grad_norm": 3.452561855316162, "learning_rate": 3.937730263157894e-06, "loss": 0.1288, "step": 2900 }, { "epoch": 0.14625, "grad_norm": 1.9712022542953491, "learning_rate": 3.931973684210526e-06, "loss": 0.1137, "step": 2925 }, { "epoch": 0.1475, "grad_norm": 2.0524768829345703, "learning_rate": 3.926217105263157e-06, "loss": 0.0973, "step": 2950 }, { "epoch": 0.14875, "grad_norm": 2.3722898960113525, "learning_rate": 3.920460526315789e-06, "loss": 0.0915, "step": 2975 }, { "epoch": 0.15, "grad_norm": 2.5048534870147705, "learning_rate": 3.914703947368421e-06, "loss": 0.0853, "step": 3000 }, { "epoch": 0.15, "eval_loss": 0.26879894733428955, "eval_runtime": 531.192, "eval_samples_per_second": 3.183, "eval_steps_per_second": 0.399, "eval_wer": 12.045710003746722, "step": 3000 }, { "epoch": 0.15125, "grad_norm": 2.009464740753174, "learning_rate": 3.908947368421052e-06, "loss": 0.1037, "step": 3025 }, { "epoch": 0.1525, "grad_norm": 2.3635034561157227, "learning_rate": 3.903190789473684e-06, "loss": 0.0889, "step": 3050 }, { "epoch": 0.15375, "grad_norm": 3.131683111190796, "learning_rate": 3.897434210526315e-06, "loss": 0.0895, "step": 3075 }, { "epoch": 0.155, "grad_norm": 2.2032673358917236, "learning_rate": 3.891677631578947e-06, "loss": 0.0955, "step": 3100 }, { "epoch": 0.15625, "grad_norm": 1.8079180717468262, "learning_rate": 3.885921052631578e-06, "loss": 0.0785, "step": 3125 }, { "epoch": 0.1575, "grad_norm": 2.2879910469055176, "learning_rate": 3.88016447368421e-06, "loss": 0.0721, "step": 3150 }, { "epoch": 0.15875, "grad_norm": 2.491487979888916, "learning_rate": 3.874407894736841e-06, "loss": 0.0817, "step": 3175 }, { "epoch": 0.16, "grad_norm": 2.996129035949707, "learning_rate": 3.868651315789473e-06, "loss": 0.0877, "step": 3200 }, { "epoch": 0.16125, "grad_norm": 2.9992258548736572, "learning_rate": 3.862894736842104e-06, "loss": 0.1011, "step": 3225 }, { "epoch": 0.1625, "grad_norm": 2.5464529991149902, "learning_rate": 3.857138157894736e-06, "loss": 0.1385, "step": 3250 }, { "epoch": 0.16375, "grad_norm": 2.853933095932007, "learning_rate": 3.851381578947368e-06, "loss": 0.1223, "step": 3275 }, { "epoch": 0.165, "grad_norm": 2.3290011882781982, "learning_rate": 3.845624999999999e-06, "loss": 0.1236, "step": 3300 }, { "epoch": 0.16625, "grad_norm": 2.61714768409729, "learning_rate": 3.839868421052631e-06, "loss": 0.1167, "step": 3325 }, { "epoch": 0.1675, "grad_norm": 3.926612377166748, "learning_rate": 3.834111842105263e-06, "loss": 0.1306, "step": 3350 }, { "epoch": 0.16875, "grad_norm": 2.9979617595672607, "learning_rate": 3.828355263157894e-06, "loss": 0.1383, "step": 3375 }, { "epoch": 0.17, "grad_norm": 2.879436492919922, "learning_rate": 3.822598684210526e-06, "loss": 0.1112, "step": 3400 }, { "epoch": 0.17125, "grad_norm": 2.706355571746826, "learning_rate": 3.816842105263158e-06, "loss": 0.1006, "step": 3425 }, { "epoch": 0.1725, "grad_norm": 2.263953685760498, "learning_rate": 3.811085526315789e-06, "loss": 0.089, "step": 3450 }, { "epoch": 0.17375, "grad_norm": 3.070748805999756, "learning_rate": 3.8053289473684207e-06, "loss": 0.0801, "step": 3475 }, { "epoch": 0.175, "grad_norm": 2.202629566192627, "learning_rate": 3.799572368421052e-06, "loss": 0.0866, "step": 3500 }, { "epoch": 0.175, "eval_loss": 0.2575243413448334, "eval_runtime": 531.9323, "eval_samples_per_second": 3.179, "eval_steps_per_second": 0.399, "eval_wer": 11.471212688897214, "step": 3500 }, { "epoch": 0.17625, "grad_norm": 2.1003735065460205, "learning_rate": 3.793815789473684e-06, "loss": 0.0794, "step": 3525 }, { "epoch": 0.1775, "grad_norm": 2.4936602115631104, "learning_rate": 3.788059210526315e-06, "loss": 0.0754, "step": 3550 }, { "epoch": 0.17875, "grad_norm": 2.2320945262908936, "learning_rate": 3.782302631578947e-06, "loss": 0.0906, "step": 3575 }, { "epoch": 0.18, "grad_norm": 1.4985826015472412, "learning_rate": 3.7765460526315786e-06, "loss": 0.0872, "step": 3600 }, { "epoch": 0.18125, "grad_norm": 1.6096969842910767, "learning_rate": 3.77078947368421e-06, "loss": 0.0783, "step": 3625 }, { "epoch": 0.1825, "grad_norm": 1.8306738138198853, "learning_rate": 3.7650328947368416e-06, "loss": 0.0783, "step": 3650 }, { "epoch": 0.18375, "grad_norm": 1.972235918045044, "learning_rate": 3.759276315789473e-06, "loss": 0.0789, "step": 3675 }, { "epoch": 0.185, "grad_norm": 2.0266051292419434, "learning_rate": 3.753519736842105e-06, "loss": 0.0708, "step": 3700 }, { "epoch": 0.18625, "grad_norm": 1.6395690441131592, "learning_rate": 3.7477631578947366e-06, "loss": 0.0768, "step": 3725 }, { "epoch": 0.1875, "grad_norm": 1.8886572122573853, "learning_rate": 3.742006578947368e-06, "loss": 0.0745, "step": 3750 }, { "epoch": 0.18875, "grad_norm": 3.0105178356170654, "learning_rate": 3.7362499999999996e-06, "loss": 0.0816, "step": 3775 }, { "epoch": 0.19, "grad_norm": 1.8376508951187134, "learning_rate": 3.7304934210526315e-06, "loss": 0.082, "step": 3800 }, { "epoch": 0.19125, "grad_norm": 1.758370041847229, "learning_rate": 3.7247368421052626e-06, "loss": 0.0798, "step": 3825 }, { "epoch": 0.1925, "grad_norm": 1.2405736446380615, "learning_rate": 3.7189802631578945e-06, "loss": 0.0773, "step": 3850 }, { "epoch": 0.19375, "grad_norm": 1.8085663318634033, "learning_rate": 3.713223684210526e-06, "loss": 0.0861, "step": 3875 }, { "epoch": 0.195, "grad_norm": 3.838613986968994, "learning_rate": 3.7074671052631575e-06, "loss": 0.1032, "step": 3900 }, { "epoch": 0.19625, "grad_norm": 3.087472438812256, "learning_rate": 3.701710526315789e-06, "loss": 0.133, "step": 3925 }, { "epoch": 0.1975, "grad_norm": 2.3854024410247803, "learning_rate": 3.6959539473684206e-06, "loss": 0.1366, "step": 3950 }, { "epoch": 0.19875, "grad_norm": 3.235400676727295, "learning_rate": 3.690197368421052e-06, "loss": 0.1275, "step": 3975 }, { "epoch": 0.2, "grad_norm": 2.7316720485687256, "learning_rate": 3.684440789473684e-06, "loss": 0.1311, "step": 4000 }, { "epoch": 0.2, "eval_loss": 0.24720044434070587, "eval_runtime": 536.8744, "eval_samples_per_second": 3.15, "eval_steps_per_second": 0.395, "eval_wer": 12.482827525914825, "step": 4000 }, { "epoch": 0.20125, "grad_norm": 2.4120874404907227, "learning_rate": 3.6786842105263155e-06, "loss": 0.1215, "step": 4025 }, { "epoch": 0.2025, "grad_norm": 2.5485270023345947, "learning_rate": 3.672927631578947e-06, "loss": 0.0983, "step": 4050 }, { "epoch": 0.20375, "grad_norm": 2.2741594314575195, "learning_rate": 3.667171052631579e-06, "loss": 0.0764, "step": 4075 }, { "epoch": 0.205, "grad_norm": 1.875857949256897, "learning_rate": 3.66141447368421e-06, "loss": 0.0733, "step": 4100 }, { "epoch": 0.20625, "grad_norm": 1.8897082805633545, "learning_rate": 3.655657894736842e-06, "loss": 0.0797, "step": 4125 }, { "epoch": 0.2075, "grad_norm": 1.5462270975112915, "learning_rate": 3.6499013157894735e-06, "loss": 0.0772, "step": 4150 }, { "epoch": 0.20875, "grad_norm": 2.1055002212524414, "learning_rate": 3.644144736842105e-06, "loss": 0.079, "step": 4175 }, { "epoch": 0.21, "grad_norm": 2.8036248683929443, "learning_rate": 3.6383881578947365e-06, "loss": 0.0828, "step": 4200 }, { "epoch": 0.21125, "grad_norm": 1.496777892112732, "learning_rate": 3.6326315789473684e-06, "loss": 0.0658, "step": 4225 }, { "epoch": 0.2125, "grad_norm": 2.213822364807129, "learning_rate": 3.6268749999999995e-06, "loss": 0.0722, "step": 4250 }, { "epoch": 0.21375, "grad_norm": 1.4431771039962769, "learning_rate": 3.6211184210526314e-06, "loss": 0.061, "step": 4275 }, { "epoch": 0.215, "grad_norm": 1.6346482038497925, "learning_rate": 3.6153618421052625e-06, "loss": 0.0641, "step": 4300 }, { "epoch": 0.21625, "grad_norm": 1.5905380249023438, "learning_rate": 3.6096052631578944e-06, "loss": 0.0633, "step": 4325 }, { "epoch": 0.2175, "grad_norm": 2.4848458766937256, "learning_rate": 3.6038486842105263e-06, "loss": 0.0738, "step": 4350 }, { "epoch": 0.21875, "grad_norm": 2.568466901779175, "learning_rate": 3.5980921052631574e-06, "loss": 0.1123, "step": 4375 }, { "epoch": 0.22, "grad_norm": 2.5104339122772217, "learning_rate": 3.5923355263157894e-06, "loss": 0.1179, "step": 4400 }, { "epoch": 0.22125, "grad_norm": 3.769829273223877, "learning_rate": 3.586578947368421e-06, "loss": 0.1221, "step": 4425 }, { "epoch": 0.2225, "grad_norm": 2.850048542022705, "learning_rate": 3.5808223684210524e-06, "loss": 0.1115, "step": 4450 }, { "epoch": 0.22375, "grad_norm": 2.0328500270843506, "learning_rate": 3.575065789473684e-06, "loss": 0.1274, "step": 4475 }, { "epoch": 0.225, "grad_norm": 2.765300750732422, "learning_rate": 3.569309210526316e-06, "loss": 0.1338, "step": 4500 }, { "epoch": 0.225, "eval_loss": 0.24367325007915497, "eval_runtime": 531.4688, "eval_samples_per_second": 3.182, "eval_steps_per_second": 0.399, "eval_wer": 10.990383414512301, "step": 4500 }, { "epoch": 0.22625, "grad_norm": 1.4456897974014282, "learning_rate": 3.563552631578947e-06, "loss": 0.0921, "step": 4525 }, { "epoch": 0.2275, "grad_norm": 2.357384443283081, "learning_rate": 3.557796052631579e-06, "loss": 0.0728, "step": 4550 }, { "epoch": 0.22875, "grad_norm": 2.2841663360595703, "learning_rate": 3.55203947368421e-06, "loss": 0.0703, "step": 4575 }, { "epoch": 0.23, "grad_norm": 1.8975858688354492, "learning_rate": 3.546282894736842e-06, "loss": 0.0595, "step": 4600 }, { "epoch": 0.23125, "grad_norm": 1.6614043712615967, "learning_rate": 3.5405263157894733e-06, "loss": 0.0684, "step": 4625 }, { "epoch": 0.2325, "grad_norm": 3.0987887382507324, "learning_rate": 3.534769736842105e-06, "loss": 0.0643, "step": 4650 }, { "epoch": 0.23375, "grad_norm": 1.869446873664856, "learning_rate": 3.5290131578947363e-06, "loss": 0.0612, "step": 4675 }, { "epoch": 0.235, "grad_norm": 1.6360236406326294, "learning_rate": 3.5232565789473683e-06, "loss": 0.0627, "step": 4700 }, { "epoch": 0.23625, "grad_norm": 2.188901424407959, "learning_rate": 3.5174999999999998e-06, "loss": 0.068, "step": 4725 }, { "epoch": 0.2375, "grad_norm": 1.5851141214370728, "learning_rate": 3.5117434210526313e-06, "loss": 0.0702, "step": 4750 }, { "epoch": 0.23875, "grad_norm": 1.9303579330444336, "learning_rate": 3.5059868421052632e-06, "loss": 0.0683, "step": 4775 }, { "epoch": 0.24, "grad_norm": 1.8640798330307007, "learning_rate": 3.5002302631578943e-06, "loss": 0.0637, "step": 4800 }, { "epoch": 0.24125, "grad_norm": 2.395669937133789, "learning_rate": 3.4944736842105262e-06, "loss": 0.0626, "step": 4825 }, { "epoch": 0.2425, "grad_norm": 1.5368024110794067, "learning_rate": 3.4887171052631573e-06, "loss": 0.0694, "step": 4850 }, { "epoch": 0.24375, "grad_norm": 2.1346402168273926, "learning_rate": 3.4829605263157892e-06, "loss": 0.0734, "step": 4875 }, { "epoch": 0.245, "grad_norm": 2.0883893966674805, "learning_rate": 3.4772039473684207e-06, "loss": 0.0659, "step": 4900 }, { "epoch": 0.24625, "grad_norm": 1.6861238479614258, "learning_rate": 3.4714473684210523e-06, "loss": 0.0656, "step": 4925 }, { "epoch": 0.2475, "grad_norm": 1.5790470838546753, "learning_rate": 3.4656907894736838e-06, "loss": 0.0801, "step": 4950 }, { "epoch": 0.24875, "grad_norm": 1.3223644495010376, "learning_rate": 3.4599342105263157e-06, "loss": 0.0806, "step": 4975 }, { "epoch": 0.25, "grad_norm": 1.6931387186050415, "learning_rate": 3.4541776315789468e-06, "loss": 0.0748, "step": 5000 }, { "epoch": 0.25, "eval_loss": 0.2556721270084381, "eval_runtime": 534.5469, "eval_samples_per_second": 3.163, "eval_steps_per_second": 0.397, "eval_wer": 10.709379293118522, "step": 5000 }, { "epoch": 0.25125, "grad_norm": 1.5327143669128418, "learning_rate": 3.4484210526315787e-06, "loss": 0.0968, "step": 5025 }, { "epoch": 0.2525, "grad_norm": 1.988226294517517, "learning_rate": 3.4426644736842106e-06, "loss": 0.0921, "step": 5050 }, { "epoch": 0.25375, "grad_norm": 2.179086446762085, "learning_rate": 3.4369078947368417e-06, "loss": 0.093, "step": 5075 }, { "epoch": 0.255, "grad_norm": 2.4304797649383545, "learning_rate": 3.4311513157894736e-06, "loss": 0.0909, "step": 5100 }, { "epoch": 0.25625, "grad_norm": 2.498908281326294, "learning_rate": 3.4253947368421047e-06, "loss": 0.1225, "step": 5125 }, { "epoch": 0.2575, "grad_norm": 2.018110752105713, "learning_rate": 3.4196381578947367e-06, "loss": 0.1199, "step": 5150 }, { "epoch": 0.25875, "grad_norm": 1.8156744241714478, "learning_rate": 3.413881578947368e-06, "loss": 0.1032, "step": 5175 }, { "epoch": 0.26, "grad_norm": 2.395634651184082, "learning_rate": 3.4081249999999997e-06, "loss": 0.0842, "step": 5200 }, { "epoch": 0.26125, "grad_norm": 1.8604170083999634, "learning_rate": 3.402368421052631e-06, "loss": 0.0753, "step": 5225 }, { "epoch": 0.2625, "grad_norm": 2.186006784439087, "learning_rate": 3.396611842105263e-06, "loss": 0.0693, "step": 5250 }, { "epoch": 0.26375, "grad_norm": 2.117950201034546, "learning_rate": 3.390855263157894e-06, "loss": 0.0731, "step": 5275 }, { "epoch": 0.265, "grad_norm": 1.442688226699829, "learning_rate": 3.385098684210526e-06, "loss": 0.0607, "step": 5300 }, { "epoch": 0.26625, "grad_norm": 2.0623013973236084, "learning_rate": 3.379342105263157e-06, "loss": 0.0598, "step": 5325 }, { "epoch": 0.2675, "grad_norm": 1.6096211671829224, "learning_rate": 3.373585526315789e-06, "loss": 0.0687, "step": 5350 }, { "epoch": 0.26875, "grad_norm": 1.2381603717803955, "learning_rate": 3.367828947368421e-06, "loss": 0.0646, "step": 5375 }, { "epoch": 0.27, "grad_norm": 1.6694140434265137, "learning_rate": 3.362072368421052e-06, "loss": 0.0595, "step": 5400 }, { "epoch": 0.27125, "grad_norm": 2.486950159072876, "learning_rate": 3.356315789473684e-06, "loss": 0.074, "step": 5425 }, { "epoch": 0.2725, "grad_norm": 1.2931033372879028, "learning_rate": 3.3505592105263156e-06, "loss": 0.08, "step": 5450 }, { "epoch": 0.27375, "grad_norm": 2.314680337905884, "learning_rate": 3.344802631578947e-06, "loss": 0.0662, "step": 5475 }, { "epoch": 0.275, "grad_norm": 2.413079261779785, "learning_rate": 3.3390460526315786e-06, "loss": 0.0821, "step": 5500 }, { "epoch": 0.275, "eval_loss": 0.2597045302391052, "eval_runtime": 532.1724, "eval_samples_per_second": 3.178, "eval_steps_per_second": 0.398, "eval_wer": 10.247283626826526, "step": 5500 }, { "epoch": 0.27625, "grad_norm": 2.8475470542907715, "learning_rate": 3.3332894736842105e-06, "loss": 0.1317, "step": 5525 }, { "epoch": 0.2775, "grad_norm": 2.919682025909424, "learning_rate": 3.3275328947368416e-06, "loss": 0.1323, "step": 5550 }, { "epoch": 0.27875, "grad_norm": 3.0585904121398926, "learning_rate": 3.3217763157894735e-06, "loss": 0.1332, "step": 5575 }, { "epoch": 0.28, "grad_norm": 2.4418559074401855, "learning_rate": 3.3160197368421046e-06, "loss": 0.1126, "step": 5600 }, { "epoch": 0.28125, "grad_norm": 2.9454727172851562, "learning_rate": 3.3102631578947365e-06, "loss": 0.0991, "step": 5625 }, { "epoch": 0.2825, "grad_norm": 2.472628593444824, "learning_rate": 3.304506578947368e-06, "loss": 0.1106, "step": 5650 }, { "epoch": 0.28375, "grad_norm": 2.1178548336029053, "learning_rate": 3.2987499999999995e-06, "loss": 0.1027, "step": 5675 }, { "epoch": 0.285, "grad_norm": 2.5170726776123047, "learning_rate": 3.2929934210526315e-06, "loss": 0.1027, "step": 5700 }, { "epoch": 0.28625, "grad_norm": 2.9180397987365723, "learning_rate": 3.287236842105263e-06, "loss": 0.1045, "step": 5725 }, { "epoch": 0.2875, "grad_norm": 2.6896932125091553, "learning_rate": 3.2814802631578945e-06, "loss": 0.1069, "step": 5750 }, { "epoch": 0.28875, "grad_norm": 3.1297285556793213, "learning_rate": 3.275723684210526e-06, "loss": 0.1003, "step": 5775 }, { "epoch": 0.29, "grad_norm": 2.4746246337890625, "learning_rate": 3.269967105263158e-06, "loss": 0.1084, "step": 5800 }, { "epoch": 0.29125, "grad_norm": 1.7318406105041504, "learning_rate": 3.264210526315789e-06, "loss": 0.0846, "step": 5825 }, { "epoch": 0.2925, "grad_norm": 2.190168857574463, "learning_rate": 3.258453947368421e-06, "loss": 0.082, "step": 5850 }, { "epoch": 0.29375, "grad_norm": 1.5366681814193726, "learning_rate": 3.252697368421052e-06, "loss": 0.0656, "step": 5875 }, { "epoch": 0.295, "grad_norm": 1.8261510133743286, "learning_rate": 3.246940789473684e-06, "loss": 0.0646, "step": 5900 }, { "epoch": 0.29625, "grad_norm": 1.9088908433914185, "learning_rate": 3.2411842105263155e-06, "loss": 0.0662, "step": 5925 }, { "epoch": 0.2975, "grad_norm": 1.3404430150985718, "learning_rate": 3.235427631578947e-06, "loss": 0.0712, "step": 5950 }, { "epoch": 0.29875, "grad_norm": 1.7546651363372803, "learning_rate": 3.2296710526315785e-06, "loss": 0.084, "step": 5975 }, { "epoch": 0.3, "grad_norm": 1.7727612257003784, "learning_rate": 3.2239144736842104e-06, "loss": 0.0988, "step": 6000 }, { "epoch": 0.3, "eval_loss": 0.2406572848558426, "eval_runtime": 535.6321, "eval_samples_per_second": 3.157, "eval_steps_per_second": 0.396, "eval_wer": 9.447983014861997, "step": 6000 }, { "epoch": 0.30125, "grad_norm": 2.477670907974243, "learning_rate": 3.2181578947368415e-06, "loss": 0.1013, "step": 6025 }, { "epoch": 0.3025, "grad_norm": 4.175459384918213, "learning_rate": 3.2124013157894734e-06, "loss": 0.1199, "step": 6050 }, { "epoch": 0.30375, "grad_norm": 2.4588561058044434, "learning_rate": 3.2066447368421053e-06, "loss": 0.1203, "step": 6075 }, { "epoch": 0.305, "grad_norm": 3.759526491165161, "learning_rate": 3.2008881578947364e-06, "loss": 0.1261, "step": 6100 }, { "epoch": 0.30625, "grad_norm": 3.186166524887085, "learning_rate": 3.1951315789473683e-06, "loss": 0.0946, "step": 6125 }, { "epoch": 0.3075, "grad_norm": 1.874886155128479, "learning_rate": 3.1893749999999994e-06, "loss": 0.0707, "step": 6150 }, { "epoch": 0.30875, "grad_norm": 1.673767328262329, "learning_rate": 3.1836184210526314e-06, "loss": 0.0605, "step": 6175 }, { "epoch": 0.31, "grad_norm": 2.6728780269622803, "learning_rate": 3.177861842105263e-06, "loss": 0.064, "step": 6200 }, { "epoch": 0.31125, "grad_norm": 1.245354175567627, "learning_rate": 3.1721052631578944e-06, "loss": 0.0603, "step": 6225 }, { "epoch": 0.3125, "grad_norm": 1.3173916339874268, "learning_rate": 3.166348684210526e-06, "loss": 0.067, "step": 6250 }, { "epoch": 0.31375, "grad_norm": 1.9218686819076538, "learning_rate": 3.160592105263158e-06, "loss": 0.0723, "step": 6275 }, { "epoch": 0.315, "grad_norm": 1.822493314743042, "learning_rate": 3.154835526315789e-06, "loss": 0.0772, "step": 6300 }, { "epoch": 0.31625, "grad_norm": 2.4955074787139893, "learning_rate": 3.149078947368421e-06, "loss": 0.1124, "step": 6325 }, { "epoch": 0.3175, "grad_norm": 2.448274612426758, "learning_rate": 3.1433223684210523e-06, "loss": 0.1144, "step": 6350 }, { "epoch": 0.31875, "grad_norm": 2.732297658920288, "learning_rate": 3.137565789473684e-06, "loss": 0.0983, "step": 6375 }, { "epoch": 0.32, "grad_norm": 3.261770248413086, "learning_rate": 3.1318092105263158e-06, "loss": 0.11, "step": 6400 }, { "epoch": 0.32125, "grad_norm": 2.367335319519043, "learning_rate": 3.1260526315789473e-06, "loss": 0.1129, "step": 6425 }, { "epoch": 0.3225, "grad_norm": 2.4930291175842285, "learning_rate": 3.1202960526315788e-06, "loss": 0.1106, "step": 6450 }, { "epoch": 0.32375, "grad_norm": 1.8275959491729736, "learning_rate": 3.1145394736842103e-06, "loss": 0.0814, "step": 6475 }, { "epoch": 0.325, "grad_norm": 3.6453261375427246, "learning_rate": 3.1087828947368418e-06, "loss": 0.0824, "step": 6500 }, { "epoch": 0.325, "eval_loss": 0.24250419437885284, "eval_runtime": 531.4087, "eval_samples_per_second": 3.182, "eval_steps_per_second": 0.399, "eval_wer": 9.223179717746971, "step": 6500 }, { "epoch": 0.32625, "grad_norm": 2.3996527194976807, "learning_rate": 3.1030263157894733e-06, "loss": 0.0913, "step": 6525 }, { "epoch": 0.3275, "grad_norm": 3.106403350830078, "learning_rate": 3.0972697368421052e-06, "loss": 0.0969, "step": 6550 }, { "epoch": 0.32875, "grad_norm": 3.741685628890991, "learning_rate": 3.0915131578947363e-06, "loss": 0.1091, "step": 6575 }, { "epoch": 0.33, "grad_norm": 1.6008243560791016, "learning_rate": 3.0859868421052626e-06, "loss": 0.0984, "step": 6600 }, { "epoch": 0.33125, "grad_norm": 2.268734931945801, "learning_rate": 3.0802302631578945e-06, "loss": 0.0968, "step": 6625 }, { "epoch": 0.3325, "grad_norm": 2.442617654800415, "learning_rate": 3.074473684210526e-06, "loss": 0.0716, "step": 6650 }, { "epoch": 0.33375, "grad_norm": 1.9763257503509521, "learning_rate": 3.0687171052631575e-06, "loss": 0.0674, "step": 6675 }, { "epoch": 0.335, "grad_norm": 1.828474998474121, "learning_rate": 3.0629605263157894e-06, "loss": 0.0654, "step": 6700 }, { "epoch": 0.33625, "grad_norm": 1.5649821758270264, "learning_rate": 3.0572039473684205e-06, "loss": 0.057, "step": 6725 }, { "epoch": 0.3375, "grad_norm": 1.911927580833435, "learning_rate": 3.0514473684210525e-06, "loss": 0.0532, "step": 6750 }, { "epoch": 0.33875, "grad_norm": 1.3287229537963867, "learning_rate": 3.045690789473684e-06, "loss": 0.0623, "step": 6775 }, { "epoch": 0.34, "grad_norm": 1.7754572629928589, "learning_rate": 3.0399342105263155e-06, "loss": 0.0635, "step": 6800 }, { "epoch": 0.34125, "grad_norm": 1.9900065660476685, "learning_rate": 3.034177631578947e-06, "loss": 0.0678, "step": 6825 }, { "epoch": 0.3425, "grad_norm": 1.714850664138794, "learning_rate": 3.028421052631579e-06, "loss": 0.0654, "step": 6850 }, { "epoch": 0.34375, "grad_norm": 1.6401875019073486, "learning_rate": 3.02266447368421e-06, "loss": 0.0662, "step": 6875 }, { "epoch": 0.345, "grad_norm": 1.0171102285385132, "learning_rate": 3.016907894736842e-06, "loss": 0.0573, "step": 6900 }, { "epoch": 0.34625, "grad_norm": 1.4662336111068726, "learning_rate": 3.0111513157894734e-06, "loss": 0.0556, "step": 6925 }, { "epoch": 0.3475, "grad_norm": 1.7531720399856567, "learning_rate": 3.005394736842105e-06, "loss": 0.0501, "step": 6950 }, { "epoch": 0.34875, "grad_norm": 2.6019067764282227, "learning_rate": 2.9996381578947364e-06, "loss": 0.0629, "step": 6975 }, { "epoch": 0.35, "grad_norm": 2.0052170753479004, "learning_rate": 2.9938815789473684e-06, "loss": 0.0678, "step": 7000 }, { "epoch": 0.35, "eval_loss": 0.23009631037712097, "eval_runtime": 530.6679, "eval_samples_per_second": 3.187, "eval_steps_per_second": 0.399, "eval_wer": 9.13575621331335, "step": 7000 }, { "epoch": 0.35125, "grad_norm": 2.00034761428833, "learning_rate": 2.988125e-06, "loss": 0.0582, "step": 7025 }, { "epoch": 0.3525, "grad_norm": 1.7806837558746338, "learning_rate": 2.9823684210526314e-06, "loss": 0.058, "step": 7050 }, { "epoch": 0.35375, "grad_norm": 1.4306073188781738, "learning_rate": 2.976611842105263e-06, "loss": 0.0555, "step": 7075 }, { "epoch": 0.355, "grad_norm": 1.8648333549499512, "learning_rate": 2.9708552631578944e-06, "loss": 0.0662, "step": 7100 }, { "epoch": 0.35625, "grad_norm": 2.046255350112915, "learning_rate": 2.9650986842105263e-06, "loss": 0.0873, "step": 7125 }, { "epoch": 0.3575, "grad_norm": 1.928809404373169, "learning_rate": 2.9593421052631574e-06, "loss": 0.0948, "step": 7150 }, { "epoch": 0.35875, "grad_norm": 2.6892471313476562, "learning_rate": 2.9535855263157893e-06, "loss": 0.1043, "step": 7175 }, { "epoch": 0.36, "grad_norm": 1.9739983081817627, "learning_rate": 2.947828947368421e-06, "loss": 0.1037, "step": 7200 }, { "epoch": 0.36125, "grad_norm": 3.5157880783081055, "learning_rate": 2.9420723684210523e-06, "loss": 0.1139, "step": 7225 }, { "epoch": 0.3625, "grad_norm": 2.140559673309326, "learning_rate": 2.936315789473684e-06, "loss": 0.0912, "step": 7250 }, { "epoch": 0.36375, "grad_norm": 2.24043607711792, "learning_rate": 2.9305592105263158e-06, "loss": 0.0555, "step": 7275 }, { "epoch": 0.365, "grad_norm": 1.5429259538650513, "learning_rate": 2.924802631578947e-06, "loss": 0.059, "step": 7300 }, { "epoch": 0.36625, "grad_norm": 1.9133890867233276, "learning_rate": 2.919046052631579e-06, "loss": 0.0576, "step": 7325 }, { "epoch": 0.3675, "grad_norm": 1.585777759552002, "learning_rate": 2.9132894736842103e-06, "loss": 0.0497, "step": 7350 }, { "epoch": 0.36875, "grad_norm": 1.5571388006210327, "learning_rate": 2.907532894736842e-06, "loss": 0.0604, "step": 7375 }, { "epoch": 0.37, "grad_norm": 1.2344049215316772, "learning_rate": 2.9017763157894737e-06, "loss": 0.0621, "step": 7400 }, { "epoch": 0.37125, "grad_norm": 1.7708073854446411, "learning_rate": 2.896019736842105e-06, "loss": 0.0713, "step": 7425 }, { "epoch": 0.3725, "grad_norm": 2.126579999923706, "learning_rate": 2.8902631578947367e-06, "loss": 0.0661, "step": 7450 }, { "epoch": 0.37375, "grad_norm": 1.9544090032577515, "learning_rate": 2.8845065789473682e-06, "loss": 0.0626, "step": 7475 }, { "epoch": 0.375, "grad_norm": 2.478142499923706, "learning_rate": 2.8787499999999998e-06, "loss": 0.1124, "step": 7500 }, { "epoch": 0.375, "eval_loss": 0.2558789849281311, "eval_runtime": 531.6323, "eval_samples_per_second": 3.181, "eval_steps_per_second": 0.399, "eval_wer": 9.323092294242539, "step": 7500 }, { "epoch": 0.37625, "grad_norm": 2.722101926803589, "learning_rate": 2.8729934210526313e-06, "loss": 0.1143, "step": 7525 }, { "epoch": 0.3775, "grad_norm": 2.3424594402313232, "learning_rate": 2.867236842105263e-06, "loss": 0.0968, "step": 7550 }, { "epoch": 0.37875, "grad_norm": 2.566340208053589, "learning_rate": 2.8614802631578943e-06, "loss": 0.0932, "step": 7575 }, { "epoch": 0.38, "grad_norm": 3.2237472534179688, "learning_rate": 2.855723684210526e-06, "loss": 0.1155, "step": 7600 }, { "epoch": 0.38125, "grad_norm": 3.058669090270996, "learning_rate": 2.8499671052631573e-06, "loss": 0.103, "step": 7625 }, { "epoch": 0.3825, "grad_norm": 4.729414463043213, "learning_rate": 2.8442105263157892e-06, "loss": 0.0922, "step": 7650 }, { "epoch": 0.38375, "grad_norm": 2.140126943588257, "learning_rate": 2.8384539473684207e-06, "loss": 0.0982, "step": 7675 }, { "epoch": 0.385, "grad_norm": 2.778568983078003, "learning_rate": 2.8326973684210522e-06, "loss": 0.0993, "step": 7700 }, { "epoch": 0.38625, "grad_norm": 2.6681206226348877, "learning_rate": 2.826940789473684e-06, "loss": 0.1018, "step": 7725 }, { "epoch": 0.3875, "grad_norm": 1.5673187971115112, "learning_rate": 2.8211842105263157e-06, "loss": 0.0854, "step": 7750 }, { "epoch": 0.38875, "grad_norm": 1.3890910148620605, "learning_rate": 2.815427631578947e-06, "loss": 0.0703, "step": 7775 }, { "epoch": 0.39, "grad_norm": 2.176023483276367, "learning_rate": 2.8096710526315787e-06, "loss": 0.0672, "step": 7800 }, { "epoch": 0.39125, "grad_norm": 1.2905758619308472, "learning_rate": 2.8039144736842106e-06, "loss": 0.0615, "step": 7825 }, { "epoch": 0.3925, "grad_norm": 1.3446353673934937, "learning_rate": 2.7981578947368417e-06, "loss": 0.0637, "step": 7850 }, { "epoch": 0.39375, "grad_norm": 2.1519501209259033, "learning_rate": 2.7924013157894736e-06, "loss": 0.056, "step": 7875 }, { "epoch": 0.395, "grad_norm": 1.8618980646133423, "learning_rate": 2.7866447368421047e-06, "loss": 0.0573, "step": 7900 }, { "epoch": 0.39625, "grad_norm": 2.5565106868743896, "learning_rate": 2.7808881578947366e-06, "loss": 0.0882, "step": 7925 }, { "epoch": 0.3975, "grad_norm": 3.98923397064209, "learning_rate": 2.775131578947368e-06, "loss": 0.0981, "step": 7950 }, { "epoch": 0.39875, "grad_norm": 3.326756477355957, "learning_rate": 2.7693749999999996e-06, "loss": 0.147, "step": 7975 }, { "epoch": 0.4, "grad_norm": 2.8089091777801514, "learning_rate": 2.763618421052631e-06, "loss": 0.1122, "step": 8000 }, { "epoch": 0.4, "eval_loss": 0.22397179901599884, "eval_runtime": 531.6557, "eval_samples_per_second": 3.181, "eval_steps_per_second": 0.399, "eval_wer": 8.523791682278006, "step": 8000 }, { "epoch": 0.40125, "grad_norm": 1.8123100996017456, "learning_rate": 2.757861842105263e-06, "loss": 0.0967, "step": 8025 }, { "epoch": 0.4025, "grad_norm": 2.1731700897216797, "learning_rate": 2.7521052631578946e-06, "loss": 0.0927, "step": 8050 }, { "epoch": 0.40375, "grad_norm": 2.9888458251953125, "learning_rate": 2.746348684210526e-06, "loss": 0.1061, "step": 8075 }, { "epoch": 0.405, "grad_norm": 3.51106595993042, "learning_rate": 2.740592105263158e-06, "loss": 0.158, "step": 8100 }, { "epoch": 0.40625, "grad_norm": 3.410916805267334, "learning_rate": 2.734835526315789e-06, "loss": 0.1011, "step": 8125 }, { "epoch": 0.4075, "grad_norm": 2.426023006439209, "learning_rate": 2.729078947368421e-06, "loss": 0.0864, "step": 8150 }, { "epoch": 0.40875, "grad_norm": 2.8296170234680176, "learning_rate": 2.723322368421052e-06, "loss": 0.0929, "step": 8175 }, { "epoch": 0.41, "grad_norm": 2.028474807739258, "learning_rate": 2.717565789473684e-06, "loss": 0.0848, "step": 8200 }, { "epoch": 0.41125, "grad_norm": 2.4663166999816895, "learning_rate": 2.7118092105263155e-06, "loss": 0.0698, "step": 8225 }, { "epoch": 0.4125, "grad_norm": 1.7618118524551392, "learning_rate": 2.706052631578947e-06, "loss": 0.058, "step": 8250 }, { "epoch": 0.41375, "grad_norm": 2.2708559036254883, "learning_rate": 2.7002960526315786e-06, "loss": 0.0607, "step": 8275 }, { "epoch": 0.415, "grad_norm": 1.6543164253234863, "learning_rate": 2.6945394736842105e-06, "loss": 0.0556, "step": 8300 }, { "epoch": 0.41625, "grad_norm": 2.5951287746429443, "learning_rate": 2.6887828947368416e-06, "loss": 0.0576, "step": 8325 }, { "epoch": 0.4175, "grad_norm": 1.1910465955734253, "learning_rate": 2.6830263157894735e-06, "loss": 0.059, "step": 8350 }, { "epoch": 0.41875, "grad_norm": 1.667228102684021, "learning_rate": 2.6772697368421054e-06, "loss": 0.0521, "step": 8375 }, { "epoch": 0.42, "grad_norm": 2.1288628578186035, "learning_rate": 2.6715131578947365e-06, "loss": 0.0557, "step": 8400 }, { "epoch": 0.42125, "grad_norm": 2.0485122203826904, "learning_rate": 2.6657565789473684e-06, "loss": 0.0493, "step": 8425 }, { "epoch": 0.4225, "grad_norm": 1.8512142896652222, "learning_rate": 2.6599999999999995e-06, "loss": 0.056, "step": 8450 }, { "epoch": 0.42375, "grad_norm": 1.8958942890167236, "learning_rate": 2.6542434210526314e-06, "loss": 0.059, "step": 8475 }, { "epoch": 0.425, "grad_norm": 1.2833645343780518, "learning_rate": 2.648486842105263e-06, "loss": 0.0477, "step": 8500 }, { "epoch": 0.425, "eval_loss": 0.23789365589618683, "eval_runtime": 530.7286, "eval_samples_per_second": 3.186, "eval_steps_per_second": 0.399, "eval_wer": 8.317721993255901, "step": 8500 }, { "epoch": 0.42625, "grad_norm": 1.2612178325653076, "learning_rate": 2.6427302631578945e-06, "loss": 0.05, "step": 8525 }, { "epoch": 0.4275, "grad_norm": 2.100247621536255, "learning_rate": 2.636973684210526e-06, "loss": 0.0626, "step": 8550 }, { "epoch": 0.42875, "grad_norm": 2.7199559211730957, "learning_rate": 2.631217105263158e-06, "loss": 0.0906, "step": 8575 }, { "epoch": 0.43, "grad_norm": 3.267314910888672, "learning_rate": 2.625460526315789e-06, "loss": 0.1068, "step": 8600 }, { "epoch": 0.43125, "grad_norm": 3.2623515129089355, "learning_rate": 2.619703947368421e-06, "loss": 0.0849, "step": 8625 }, { "epoch": 0.4325, "grad_norm": 1.8294329643249512, "learning_rate": 2.613947368421052e-06, "loss": 0.0776, "step": 8650 }, { "epoch": 0.43375, "grad_norm": 3.3888967037200928, "learning_rate": 2.608190789473684e-06, "loss": 0.0869, "step": 8675 }, { "epoch": 0.435, "grad_norm": 2.5059332847595215, "learning_rate": 2.602434210526316e-06, "loss": 0.0781, "step": 8700 }, { "epoch": 0.43625, "grad_norm": 1.8527718782424927, "learning_rate": 2.596677631578947e-06, "loss": 0.0513, "step": 8725 }, { "epoch": 0.4375, "grad_norm": 1.4375104904174805, "learning_rate": 2.590921052631579e-06, "loss": 0.053, "step": 8750 }, { "epoch": 0.43875, "grad_norm": 1.923519253730774, "learning_rate": 2.5851644736842104e-06, "loss": 0.0487, "step": 8775 }, { "epoch": 0.44, "grad_norm": 1.6237260103225708, "learning_rate": 2.579407894736842e-06, "loss": 0.0499, "step": 8800 }, { "epoch": 0.44125, "grad_norm": 1.7452889680862427, "learning_rate": 2.5736513157894734e-06, "loss": 0.0538, "step": 8825 }, { "epoch": 0.4425, "grad_norm": 1.7012261152267456, "learning_rate": 2.5678947368421053e-06, "loss": 0.0529, "step": 8850 }, { "epoch": 0.44375, "grad_norm": 1.8288905620574951, "learning_rate": 2.5621381578947364e-06, "loss": 0.0473, "step": 8875 }, { "epoch": 0.445, "grad_norm": 1.9288239479064941, "learning_rate": 2.5563815789473683e-06, "loss": 0.0683, "step": 8900 }, { "epoch": 0.44625, "grad_norm": 1.3186031579971313, "learning_rate": 2.5506249999999994e-06, "loss": 0.065, "step": 8925 }, { "epoch": 0.4475, "grad_norm": 1.340890645980835, "learning_rate": 2.5448684210526313e-06, "loss": 0.0617, "step": 8950 }, { "epoch": 0.44875, "grad_norm": 2.7007381916046143, "learning_rate": 2.539111842105263e-06, "loss": 0.0512, "step": 8975 }, { "epoch": 0.45, "grad_norm": 1.688952922821045, "learning_rate": 2.5333552631578943e-06, "loss": 0.0638, "step": 9000 }, { "epoch": 0.45, "eval_loss": 0.23539182543754578, "eval_runtime": 531.3204, "eval_samples_per_second": 3.183, "eval_steps_per_second": 0.399, "eval_wer": 8.948420132384163, "step": 9000 }, { "epoch": 0.45125, "grad_norm": 3.06502103805542, "learning_rate": 2.527598684210526e-06, "loss": 0.0691, "step": 9025 }, { "epoch": 0.4525, "grad_norm": 2.2699365615844727, "learning_rate": 2.5218421052631578e-06, "loss": 0.0765, "step": 9050 }, { "epoch": 0.45375, "grad_norm": 3.057246208190918, "learning_rate": 2.5160855263157893e-06, "loss": 0.0874, "step": 9075 }, { "epoch": 0.455, "grad_norm": 2.452810764312744, "learning_rate": 2.5105592105263156e-06, "loss": 0.0992, "step": 9100 }, { "epoch": 0.45625, "grad_norm": 1.8321553468704224, "learning_rate": 2.504802631578947e-06, "loss": 0.091, "step": 9125 }, { "epoch": 0.4575, "grad_norm": 2.1675491333007812, "learning_rate": 2.499046052631579e-06, "loss": 0.0968, "step": 9150 }, { "epoch": 0.45875, "grad_norm": 2.440648317337036, "learning_rate": 2.49328947368421e-06, "loss": 0.094, "step": 9175 }, { "epoch": 0.46, "grad_norm": 3.3630011081695557, "learning_rate": 2.487532894736842e-06, "loss": 0.0934, "step": 9200 }, { "epoch": 0.46125, "grad_norm": 3.1267924308776855, "learning_rate": 2.481776315789473e-06, "loss": 0.095, "step": 9225 }, { "epoch": 0.4625, "grad_norm": 2.791846752166748, "learning_rate": 2.476019736842105e-06, "loss": 0.0988, "step": 9250 }, { "epoch": 0.46375, "grad_norm": 1.883380651473999, "learning_rate": 2.4702631578947365e-06, "loss": 0.089, "step": 9275 }, { "epoch": 0.465, "grad_norm": 2.572441577911377, "learning_rate": 2.464506578947368e-06, "loss": 0.0933, "step": 9300 }, { "epoch": 0.46625, "grad_norm": 3.08231258392334, "learning_rate": 2.45875e-06, "loss": 0.0856, "step": 9325 }, { "epoch": 0.4675, "grad_norm": 2.208491563796997, "learning_rate": 2.4529934210526315e-06, "loss": 0.0795, "step": 9350 }, { "epoch": 0.46875, "grad_norm": 2.896657943725586, "learning_rate": 2.447236842105263e-06, "loss": 0.0625, "step": 9375 }, { "epoch": 0.47, "grad_norm": 1.3465672731399536, "learning_rate": 2.4414802631578945e-06, "loss": 0.0822, "step": 9400 }, { "epoch": 0.47125, "grad_norm": 3.4039506912231445, "learning_rate": 2.4357236842105264e-06, "loss": 0.0813, "step": 9425 }, { "epoch": 0.4725, "grad_norm": 2.213761568069458, "learning_rate": 2.4299671052631575e-06, "loss": 0.075, "step": 9450 }, { "epoch": 0.47375, "grad_norm": 1.693393588066101, "learning_rate": 2.4242105263157894e-06, "loss": 0.0819, "step": 9475 }, { "epoch": 0.475, "grad_norm": 3.1261212825775146, "learning_rate": 2.4184539473684205e-06, "loss": 0.0735, "step": 9500 }, { "epoch": 0.475, "eval_loss": 0.22311098873615265, "eval_runtime": 530.3307, "eval_samples_per_second": 3.189, "eval_steps_per_second": 0.4, "eval_wer": 8.39890096165855, "step": 9500 }, { "epoch": 0.47625, "grad_norm": 2.6011083126068115, "learning_rate": 2.4126973684210524e-06, "loss": 0.0557, "step": 9525 }, { "epoch": 0.4775, "grad_norm": 1.4606833457946777, "learning_rate": 2.406940789473684e-06, "loss": 0.0518, "step": 9550 }, { "epoch": 0.47875, "grad_norm": 3.201547145843506, "learning_rate": 2.4011842105263154e-06, "loss": 0.0616, "step": 9575 }, { "epoch": 0.48, "grad_norm": 1.470755696296692, "learning_rate": 2.395427631578947e-06, "loss": 0.0566, "step": 9600 }, { "epoch": 0.48125, "grad_norm": 1.0501068830490112, "learning_rate": 2.389671052631579e-06, "loss": 0.0482, "step": 9625 }, { "epoch": 0.4825, "grad_norm": 1.7576944828033447, "learning_rate": 2.38391447368421e-06, "loss": 0.0487, "step": 9650 }, { "epoch": 0.48375, "grad_norm": 2.6596386432647705, "learning_rate": 2.378157894736842e-06, "loss": 0.0548, "step": 9675 }, { "epoch": 0.485, "grad_norm": 2.2998361587524414, "learning_rate": 2.372401315789474e-06, "loss": 0.0755, "step": 9700 }, { "epoch": 0.48625, "grad_norm": 1.885953426361084, "learning_rate": 2.366644736842105e-06, "loss": 0.0635, "step": 9725 }, { "epoch": 0.4875, "grad_norm": 1.686090111732483, "learning_rate": 2.360888157894737e-06, "loss": 0.0664, "step": 9750 }, { "epoch": 0.48875, "grad_norm": 1.487586259841919, "learning_rate": 2.3551315789473683e-06, "loss": 0.0723, "step": 9775 }, { "epoch": 0.49, "grad_norm": 1.5484004020690918, "learning_rate": 2.349375e-06, "loss": 0.0697, "step": 9800 }, { "epoch": 0.49125, "grad_norm": 1.6730592250823975, "learning_rate": 2.3436184210526314e-06, "loss": 0.0726, "step": 9825 }, { "epoch": 0.4925, "grad_norm": 1.549166202545166, "learning_rate": 2.337861842105263e-06, "loss": 0.0599, "step": 9850 }, { "epoch": 0.49375, "grad_norm": 2.127182960510254, "learning_rate": 2.3321052631578944e-06, "loss": 0.0552, "step": 9875 }, { "epoch": 0.495, "grad_norm": 1.5453063249588013, "learning_rate": 2.3263486842105263e-06, "loss": 0.0647, "step": 9900 }, { "epoch": 0.49625, "grad_norm": 2.2514312267303467, "learning_rate": 2.3205921052631574e-06, "loss": 0.0543, "step": 9925 }, { "epoch": 0.4975, "grad_norm": 1.5466394424438477, "learning_rate": 2.3148355263157893e-06, "loss": 0.0576, "step": 9950 }, { "epoch": 0.49875, "grad_norm": 1.1446313858032227, "learning_rate": 2.309078947368421e-06, "loss": 0.0581, "step": 9975 }, { "epoch": 0.5, "grad_norm": 1.7810652256011963, "learning_rate": 2.3033223684210523e-06, "loss": 0.0548, "step": 10000 }, { "epoch": 0.5, "eval_loss": 0.23302312195301056, "eval_runtime": 531.2942, "eval_samples_per_second": 3.183, "eval_steps_per_second": 0.399, "eval_wer": 8.57374797052579, "step": 10000 }, { "epoch": 0.50125, "grad_norm": 1.8410784006118774, "learning_rate": 2.2975657894736842e-06, "loss": 0.0818, "step": 10025 }, { "epoch": 0.5025, "grad_norm": 2.0660974979400635, "learning_rate": 2.2918092105263158e-06, "loss": 0.0839, "step": 10050 }, { "epoch": 0.50375, "grad_norm": 0.7626898288726807, "learning_rate": 2.2860526315789473e-06, "loss": 0.0717, "step": 10075 }, { "epoch": 0.505, "grad_norm": 3.819746971130371, "learning_rate": 2.2802960526315788e-06, "loss": 0.0696, "step": 10100 }, { "epoch": 0.50625, "grad_norm": 1.8556462526321411, "learning_rate": 2.2745394736842103e-06, "loss": 0.0776, "step": 10125 }, { "epoch": 0.5075, "grad_norm": 2.1852500438690186, "learning_rate": 2.2687828947368418e-06, "loss": 0.1228, "step": 10150 }, { "epoch": 0.50875, "grad_norm": 2.6284213066101074, "learning_rate": 2.2630263157894737e-06, "loss": 0.1121, "step": 10175 }, { "epoch": 0.51, "grad_norm": 3.840794801712036, "learning_rate": 2.2572697368421048e-06, "loss": 0.087, "step": 10200 }, { "epoch": 0.51125, "grad_norm": 1.920469045639038, "learning_rate": 2.2515131578947367e-06, "loss": 0.0876, "step": 10225 }, { "epoch": 0.5125, "grad_norm": 2.9199891090393066, "learning_rate": 2.2457565789473682e-06, "loss": 0.0812, "step": 10250 }, { "epoch": 0.51375, "grad_norm": 2.7151129245758057, "learning_rate": 2.2399999999999997e-06, "loss": 0.0733, "step": 10275 }, { "epoch": 0.515, "grad_norm": 3.474050760269165, "learning_rate": 2.2342434210526312e-06, "loss": 0.0934, "step": 10300 }, { "epoch": 0.51625, "grad_norm": 1.5654582977294922, "learning_rate": 2.228486842105263e-06, "loss": 0.0762, "step": 10325 }, { "epoch": 0.5175, "grad_norm": 1.0436935424804688, "learning_rate": 2.2227302631578947e-06, "loss": 0.0727, "step": 10350 }, { "epoch": 0.51875, "grad_norm": 0.8793361186981201, "learning_rate": 2.216973684210526e-06, "loss": 0.0471, "step": 10375 }, { "epoch": 0.52, "grad_norm": 0.7731598019599915, "learning_rate": 2.211217105263158e-06, "loss": 0.0467, "step": 10400 }, { "epoch": 0.52125, "grad_norm": 1.2689337730407715, "learning_rate": 2.205460526315789e-06, "loss": 0.0485, "step": 10425 }, { "epoch": 0.5225, "grad_norm": 1.4495617151260376, "learning_rate": 2.199703947368421e-06, "loss": 0.0541, "step": 10450 }, { "epoch": 0.52375, "grad_norm": 1.4262604713439941, "learning_rate": 2.193947368421052e-06, "loss": 0.0539, "step": 10475 }, { "epoch": 0.525, "grad_norm": 1.8088651895523071, "learning_rate": 2.188190789473684e-06, "loss": 0.0557, "step": 10500 }, { "epoch": 0.525, "eval_loss": 0.2133007049560547, "eval_runtime": 530.2894, "eval_samples_per_second": 3.189, "eval_steps_per_second": 0.4, "eval_wer": 8.361433745472711, "step": 10500 }, { "epoch": 0.52625, "grad_norm": 1.178223967552185, "learning_rate": 2.1824342105263156e-06, "loss": 0.0549, "step": 10525 }, { "epoch": 0.5275, "grad_norm": 1.7510823011398315, "learning_rate": 2.176677631578947e-06, "loss": 0.0557, "step": 10550 }, { "epoch": 0.52875, "grad_norm": 0.9500125050544739, "learning_rate": 2.1709210526315786e-06, "loss": 0.0553, "step": 10575 }, { "epoch": 0.53, "grad_norm": 2.060792922973633, "learning_rate": 2.1651644736842106e-06, "loss": 0.0596, "step": 10600 }, { "epoch": 0.53125, "grad_norm": 2.1061859130859375, "learning_rate": 2.159407894736842e-06, "loss": 0.0539, "step": 10625 }, { "epoch": 0.5325, "grad_norm": 1.6122857332229614, "learning_rate": 2.1536513157894736e-06, "loss": 0.053, "step": 10650 }, { "epoch": 0.53375, "grad_norm": 2.2909045219421387, "learning_rate": 2.147894736842105e-06, "loss": 0.0614, "step": 10675 }, { "epoch": 0.535, "grad_norm": 3.2241578102111816, "learning_rate": 2.1421381578947366e-06, "loss": 0.0829, "step": 10700 }, { "epoch": 0.53625, "grad_norm": 2.7384145259857178, "learning_rate": 2.136611842105263e-06, "loss": 0.0817, "step": 10725 }, { "epoch": 0.5375, "grad_norm": 1.8319401741027832, "learning_rate": 2.1308552631578944e-06, "loss": 0.0823, "step": 10750 }, { "epoch": 0.53875, "grad_norm": 2.4007859230041504, "learning_rate": 2.125098684210526e-06, "loss": 0.0733, "step": 10775 }, { "epoch": 0.54, "grad_norm": 2.042520046234131, "learning_rate": 2.119342105263158e-06, "loss": 0.0838, "step": 10800 }, { "epoch": 0.54125, "grad_norm": 2.0478389263153076, "learning_rate": 2.1135855263157893e-06, "loss": 0.0831, "step": 10825 }, { "epoch": 0.5425, "grad_norm": 2.357926607131958, "learning_rate": 2.107828947368421e-06, "loss": 0.0728, "step": 10850 }, { "epoch": 0.54375, "grad_norm": 2.214553117752075, "learning_rate": 2.1020723684210523e-06, "loss": 0.0804, "step": 10875 }, { "epoch": 0.545, "grad_norm": 3.484598398208618, "learning_rate": 2.0963157894736843e-06, "loss": 0.0592, "step": 10900 }, { "epoch": 0.54625, "grad_norm": 1.5546646118164062, "learning_rate": 2.0905592105263158e-06, "loss": 0.0577, "step": 10925 }, { "epoch": 0.5475, "grad_norm": 2.218691349029541, "learning_rate": 2.0848026315789473e-06, "loss": 0.053, "step": 10950 }, { "epoch": 0.54875, "grad_norm": 2.9559834003448486, "learning_rate": 2.0790460526315788e-06, "loss": 0.0543, "step": 10975 }, { "epoch": 0.55, "grad_norm": 1.6290831565856934, "learning_rate": 2.0732894736842103e-06, "loss": 0.0626, "step": 11000 }, { "epoch": 0.55, "eval_loss": 0.2083810567855835, "eval_runtime": 531.6457, "eval_samples_per_second": 3.181, "eval_steps_per_second": 0.399, "eval_wer": 8.286499313101036, "step": 11000 }, { "epoch": 0.55125, "grad_norm": 2.2507994174957275, "learning_rate": 2.067532894736842e-06, "loss": 0.0645, "step": 11025 }, { "epoch": 0.5525, "grad_norm": 3.930997133255005, "learning_rate": 2.0617763157894733e-06, "loss": 0.0699, "step": 11050 }, { "epoch": 0.55375, "grad_norm": 3.1073126792907715, "learning_rate": 2.056019736842105e-06, "loss": 0.0852, "step": 11075 }, { "epoch": 0.555, "grad_norm": 2.5678088665008545, "learning_rate": 2.0502631578947367e-06, "loss": 0.0863, "step": 11100 }, { "epoch": 0.55625, "grad_norm": 2.97763729095459, "learning_rate": 2.0445065789473682e-06, "loss": 0.0718, "step": 11125 }, { "epoch": 0.5575, "grad_norm": 1.2580708265304565, "learning_rate": 2.0387499999999998e-06, "loss": 0.0462, "step": 11150 }, { "epoch": 0.55875, "grad_norm": 1.804002285003662, "learning_rate": 2.0329934210526317e-06, "loss": 0.0364, "step": 11175 }, { "epoch": 0.56, "grad_norm": 1.492600679397583, "learning_rate": 2.027236842105263e-06, "loss": 0.0438, "step": 11200 }, { "epoch": 0.56125, "grad_norm": 2.423004627227783, "learning_rate": 2.0214802631578947e-06, "loss": 0.031, "step": 11225 }, { "epoch": 0.5625, "grad_norm": 1.5198426246643066, "learning_rate": 2.015723684210526e-06, "loss": 0.0324, "step": 11250 }, { "epoch": 0.56375, "grad_norm": 0.9852400422096252, "learning_rate": 2.0099671052631577e-06, "loss": 0.029, "step": 11275 }, { "epoch": 0.565, "grad_norm": 1.2327955961227417, "learning_rate": 2.004210526315789e-06, "loss": 0.0406, "step": 11300 }, { "epoch": 0.56625, "grad_norm": 1.455636978149414, "learning_rate": 1.9984539473684207e-06, "loss": 0.047, "step": 11325 }, { "epoch": 0.5675, "grad_norm": 1.4720903635025024, "learning_rate": 1.9926973684210522e-06, "loss": 0.0444, "step": 11350 }, { "epoch": 0.56875, "grad_norm": 1.7255401611328125, "learning_rate": 1.986940789473684e-06, "loss": 0.0514, "step": 11375 }, { "epoch": 0.57, "grad_norm": 1.3503352403640747, "learning_rate": 1.9811842105263157e-06, "loss": 0.0533, "step": 11400 }, { "epoch": 0.57125, "grad_norm": 1.5066325664520264, "learning_rate": 1.975427631578947e-06, "loss": 0.0524, "step": 11425 }, { "epoch": 0.5725, "grad_norm": 1.877842903137207, "learning_rate": 1.9696710526315787e-06, "loss": 0.0519, "step": 11450 }, { "epoch": 0.57375, "grad_norm": 1.4466218948364258, "learning_rate": 1.9639144736842106e-06, "loss": 0.0548, "step": 11475 }, { "epoch": 0.575, "grad_norm": 1.3053616285324097, "learning_rate": 1.958157894736842e-06, "loss": 0.0472, "step": 11500 }, { "epoch": 0.575, "eval_loss": 0.23307645320892334, "eval_runtime": 536.26, "eval_samples_per_second": 3.153, "eval_steps_per_second": 0.395, "eval_wer": 8.074185088047958, "step": 11500 }, { "epoch": 0.57625, "grad_norm": 1.172753930091858, "learning_rate": 1.9524013157894736e-06, "loss": 0.0506, "step": 11525 }, { "epoch": 0.5775, "grad_norm": 1.700363039970398, "learning_rate": 1.946644736842105e-06, "loss": 0.0585, "step": 11550 }, { "epoch": 0.57875, "grad_norm": 1.3203791379928589, "learning_rate": 1.9408881578947366e-06, "loss": 0.0499, "step": 11575 }, { "epoch": 0.58, "grad_norm": 1.4109314680099487, "learning_rate": 1.935131578947368e-06, "loss": 0.0433, "step": 11600 }, { "epoch": 0.58125, "grad_norm": 1.3247355222702026, "learning_rate": 1.929375e-06, "loss": 0.0378, "step": 11625 }, { "epoch": 0.5825, "grad_norm": 0.9325533509254456, "learning_rate": 1.9236184210526316e-06, "loss": 0.0442, "step": 11650 }, { "epoch": 0.58375, "grad_norm": 1.8996745347976685, "learning_rate": 1.917861842105263e-06, "loss": 0.049, "step": 11675 }, { "epoch": 0.585, "grad_norm": 1.7976350784301758, "learning_rate": 1.9121052631578946e-06, "loss": 0.0467, "step": 11700 }, { "epoch": 0.58625, "grad_norm": 2.180805206298828, "learning_rate": 1.906348684210526e-06, "loss": 0.0493, "step": 11725 }, { "epoch": 0.5875, "grad_norm": 1.2519850730895996, "learning_rate": 1.9005921052631576e-06, "loss": 0.0486, "step": 11750 }, { "epoch": 0.58875, "grad_norm": 2.3758866786956787, "learning_rate": 1.8948355263157893e-06, "loss": 0.0584, "step": 11775 }, { "epoch": 0.59, "grad_norm": 2.0312483310699463, "learning_rate": 1.8890789473684208e-06, "loss": 0.0702, "step": 11800 }, { "epoch": 0.59125, "grad_norm": 2.017726182937622, "learning_rate": 1.8833223684210525e-06, "loss": 0.0822, "step": 11825 }, { "epoch": 0.5925, "grad_norm": 2.159196138381958, "learning_rate": 1.8775657894736842e-06, "loss": 0.0918, "step": 11850 }, { "epoch": 0.59375, "grad_norm": 2.8051164150238037, "learning_rate": 1.8718092105263158e-06, "loss": 0.0927, "step": 11875 }, { "epoch": 0.595, "grad_norm": 1.9617701768875122, "learning_rate": 1.8660526315789473e-06, "loss": 0.0762, "step": 11900 }, { "epoch": 0.59625, "grad_norm": 1.4993948936462402, "learning_rate": 1.8602960526315788e-06, "loss": 0.0768, "step": 11925 }, { "epoch": 0.5975, "grad_norm": 2.1341333389282227, "learning_rate": 1.8545394736842105e-06, "loss": 0.0647, "step": 11950 }, { "epoch": 0.59875, "grad_norm": 1.5004290342330933, "learning_rate": 1.848782894736842e-06, "loss": 0.0669, "step": 11975 }, { "epoch": 0.6, "grad_norm": 3.0987565517425537, "learning_rate": 1.8430263157894735e-06, "loss": 0.0636, "step": 12000 }, { "epoch": 0.6, "eval_loss": 0.2118152379989624, "eval_runtime": 536.0484, "eval_samples_per_second": 3.155, "eval_steps_per_second": 0.395, "eval_wer": 7.961783439490445, "step": 12000 }, { "epoch": 0.60125, "grad_norm": 1.6456586122512817, "learning_rate": 1.837269736842105e-06, "loss": 0.0701, "step": 12025 }, { "epoch": 0.6025, "grad_norm": 2.0990679264068604, "learning_rate": 1.8315131578947367e-06, "loss": 0.0573, "step": 12050 }, { "epoch": 0.60375, "grad_norm": 1.8728748559951782, "learning_rate": 1.8257565789473682e-06, "loss": 0.054, "step": 12075 }, { "epoch": 0.605, "grad_norm": 1.2849019765853882, "learning_rate": 1.8199999999999997e-06, "loss": 0.0522, "step": 12100 }, { "epoch": 0.60625, "grad_norm": 1.6803030967712402, "learning_rate": 1.8142434210526312e-06, "loss": 0.0492, "step": 12125 }, { "epoch": 0.6075, "grad_norm": 1.9102485179901123, "learning_rate": 1.808486842105263e-06, "loss": 0.0482, "step": 12150 }, { "epoch": 0.60875, "grad_norm": 1.1118731498718262, "learning_rate": 1.8027302631578947e-06, "loss": 0.0422, "step": 12175 }, { "epoch": 0.61, "grad_norm": 1.1670501232147217, "learning_rate": 1.7969736842105262e-06, "loss": 0.0515, "step": 12200 }, { "epoch": 0.61125, "grad_norm": 2.522876739501953, "learning_rate": 1.7912171052631579e-06, "loss": 0.0412, "step": 12225 }, { "epoch": 0.6125, "grad_norm": 1.2704464197158813, "learning_rate": 1.7854605263157894e-06, "loss": 0.0508, "step": 12250 }, { "epoch": 0.61375, "grad_norm": 2.399094343185425, "learning_rate": 1.779703947368421e-06, "loss": 0.0547, "step": 12275 }, { "epoch": 0.615, "grad_norm": 2.2606582641601562, "learning_rate": 1.7739473684210524e-06, "loss": 0.0562, "step": 12300 }, { "epoch": 0.61625, "grad_norm": 0.5112090110778809, "learning_rate": 1.7681907894736841e-06, "loss": 0.0513, "step": 12325 }, { "epoch": 0.6175, "grad_norm": 1.1044148206710815, "learning_rate": 1.7624342105263156e-06, "loss": 0.0544, "step": 12350 }, { "epoch": 0.61875, "grad_norm": 1.2760109901428223, "learning_rate": 1.7566776315789471e-06, "loss": 0.0512, "step": 12375 }, { "epoch": 0.62, "grad_norm": 1.3780227899551392, "learning_rate": 1.7509210526315786e-06, "loss": 0.0546, "step": 12400 }, { "epoch": 0.62125, "grad_norm": 1.0981767177581787, "learning_rate": 1.7451644736842104e-06, "loss": 0.041, "step": 12425 }, { "epoch": 0.6225, "grad_norm": 2.353482484817505, "learning_rate": 1.7394078947368419e-06, "loss": 0.0479, "step": 12450 }, { "epoch": 0.62375, "grad_norm": 1.3375900983810425, "learning_rate": 1.7336513157894734e-06, "loss": 0.0522, "step": 12475 }, { "epoch": 0.625, "grad_norm": 2.1002514362335205, "learning_rate": 1.7278947368421053e-06, "loss": 0.0466, "step": 12500 }, { "epoch": 0.625, "eval_loss": 0.21263667941093445, "eval_runtime": 535.5066, "eval_samples_per_second": 3.158, "eval_steps_per_second": 0.396, "eval_wer": 7.468465093043587, "step": 12500 }, { "epoch": 0.62625, "grad_norm": 1.5551177263259888, "learning_rate": 1.7221381578947368e-06, "loss": 0.0584, "step": 12525 }, { "epoch": 0.6275, "grad_norm": 2.234121322631836, "learning_rate": 1.7163815789473683e-06, "loss": 0.061, "step": 12550 }, { "epoch": 0.62875, "grad_norm": 2.269101619720459, "learning_rate": 1.7106249999999998e-06, "loss": 0.0607, "step": 12575 }, { "epoch": 0.63, "grad_norm": 2.8848202228546143, "learning_rate": 1.7048684210526315e-06, "loss": 0.0675, "step": 12600 }, { "epoch": 0.63125, "grad_norm": 2.2159249782562256, "learning_rate": 1.699111842105263e-06, "loss": 0.0783, "step": 12625 }, { "epoch": 0.6325, "grad_norm": 1.5829565525054932, "learning_rate": 1.6933552631578946e-06, "loss": 0.0834, "step": 12650 }, { "epoch": 0.63375, "grad_norm": 1.9816817045211792, "learning_rate": 1.687598684210526e-06, "loss": 0.0727, "step": 12675 }, { "epoch": 0.635, "grad_norm": 2.8434395790100098, "learning_rate": 1.6818421052631578e-06, "loss": 0.0778, "step": 12700 }, { "epoch": 0.63625, "grad_norm": 2.4956297874450684, "learning_rate": 1.6760855263157893e-06, "loss": 0.0731, "step": 12725 }, { "epoch": 0.6375, "grad_norm": 1.7429981231689453, "learning_rate": 1.6703289473684208e-06, "loss": 0.0637, "step": 12750 }, { "epoch": 0.63875, "grad_norm": 2.3022801876068115, "learning_rate": 1.6645723684210525e-06, "loss": 0.0708, "step": 12775 }, { "epoch": 0.64, "grad_norm": 1.621469497680664, "learning_rate": 1.658815789473684e-06, "loss": 0.0466, "step": 12800 }, { "epoch": 0.64125, "grad_norm": 1.7762545347213745, "learning_rate": 1.6530592105263155e-06, "loss": 0.0544, "step": 12825 }, { "epoch": 0.6425, "grad_norm": 1.568123698234558, "learning_rate": 1.6473026315789472e-06, "loss": 0.0457, "step": 12850 }, { "epoch": 0.64375, "grad_norm": 0.5994829535484314, "learning_rate": 1.641546052631579e-06, "loss": 0.0487, "step": 12875 }, { "epoch": 0.645, "grad_norm": 1.9480714797973633, "learning_rate": 1.6357894736842105e-06, "loss": 0.0503, "step": 12900 }, { "epoch": 0.64625, "grad_norm": 2.2603769302368164, "learning_rate": 1.630032894736842e-06, "loss": 0.0705, "step": 12925 }, { "epoch": 0.6475, "grad_norm": 2.2942919731140137, "learning_rate": 1.6242763157894737e-06, "loss": 0.0666, "step": 12950 }, { "epoch": 0.64875, "grad_norm": 2.819730758666992, "learning_rate": 1.6185197368421052e-06, "loss": 0.0736, "step": 12975 }, { "epoch": 0.65, "grad_norm": 1.8207030296325684, "learning_rate": 1.6127631578947367e-06, "loss": 0.0604, "step": 13000 }, { "epoch": 0.65, "eval_loss": 0.21604977548122406, "eval_runtime": 534.676, "eval_samples_per_second": 3.163, "eval_steps_per_second": 0.397, "eval_wer": 7.655801173972773, "step": 13000 }, { "epoch": 0.65125, "grad_norm": 2.0296692848205566, "learning_rate": 1.6070065789473682e-06, "loss": 0.0745, "step": 13025 }, { "epoch": 0.6525, "grad_norm": 3.9246408939361572, "learning_rate": 1.60125e-06, "loss": 0.0862, "step": 13050 }, { "epoch": 0.65375, "grad_norm": 1.9909517765045166, "learning_rate": 1.5954934210526314e-06, "loss": 0.0676, "step": 13075 }, { "epoch": 0.655, "grad_norm": 2.652264356613159, "learning_rate": 1.589736842105263e-06, "loss": 0.0823, "step": 13100 }, { "epoch": 0.65625, "grad_norm": 2.1940698623657227, "learning_rate": 1.5839802631578944e-06, "loss": 0.0775, "step": 13125 }, { "epoch": 0.6575, "grad_norm": 3.084667444229126, "learning_rate": 1.5782236842105262e-06, "loss": 0.0779, "step": 13150 }, { "epoch": 0.65875, "grad_norm": 2.134045124053955, "learning_rate": 1.5724671052631579e-06, "loss": 0.0756, "step": 13175 }, { "epoch": 0.66, "grad_norm": 2.4405481815338135, "learning_rate": 1.5667105263157894e-06, "loss": 0.075, "step": 13200 }, { "epoch": 0.66125, "grad_norm": 2.251408100128174, "learning_rate": 1.560953947368421e-06, "loss": 0.0668, "step": 13225 }, { "epoch": 0.6625, "grad_norm": 2.21307635307312, "learning_rate": 1.5551973684210526e-06, "loss": 0.076, "step": 13250 }, { "epoch": 0.66375, "grad_norm": 3.1692416667938232, "learning_rate": 1.549440789473684e-06, "loss": 0.0841, "step": 13275 }, { "epoch": 0.665, "grad_norm": 2.4879300594329834, "learning_rate": 1.5436842105263156e-06, "loss": 0.0785, "step": 13300 }, { "epoch": 0.66625, "grad_norm": 1.6188695430755615, "learning_rate": 1.5379276315789473e-06, "loss": 0.0698, "step": 13325 }, { "epoch": 0.6675, "grad_norm": 2.258192300796509, "learning_rate": 1.5321710526315788e-06, "loss": 0.0682, "step": 13350 }, { "epoch": 0.66875, "grad_norm": 1.7001844644546509, "learning_rate": 1.5264144736842103e-06, "loss": 0.0728, "step": 13375 }, { "epoch": 0.67, "grad_norm": 2.0650229454040527, "learning_rate": 1.5206578947368418e-06, "loss": 0.0608, "step": 13400 }, { "epoch": 0.67125, "grad_norm": 1.0384840965270996, "learning_rate": 1.5149013157894736e-06, "loss": 0.0521, "step": 13425 }, { "epoch": 0.6725, "grad_norm": 1.458274483680725, "learning_rate": 1.509144736842105e-06, "loss": 0.0544, "step": 13450 }, { "epoch": 0.67375, "grad_norm": 1.678476095199585, "learning_rate": 1.5033881578947366e-06, "loss": 0.0478, "step": 13475 }, { "epoch": 0.675, "grad_norm": 2.1401052474975586, "learning_rate": 1.497631578947368e-06, "loss": 0.0544, "step": 13500 }, { "epoch": 0.675, "eval_loss": 0.21870100498199463, "eval_runtime": 534.1154, "eval_samples_per_second": 3.166, "eval_steps_per_second": 0.397, "eval_wer": 7.999250655676284, "step": 13500 }, { "epoch": 0.67625, "grad_norm": 1.387534737586975, "learning_rate": 1.491875e-06, "loss": 0.0497, "step": 13525 }, { "epoch": 0.6775, "grad_norm": 2.2233715057373047, "learning_rate": 1.4861184210526315e-06, "loss": 0.0628, "step": 13550 }, { "epoch": 0.67875, "grad_norm": 2.775345802307129, "learning_rate": 1.480361842105263e-06, "loss": 0.0883, "step": 13575 }, { "epoch": 0.68, "grad_norm": 2.7996487617492676, "learning_rate": 1.4746052631578947e-06, "loss": 0.0895, "step": 13600 }, { "epoch": 0.68125, "grad_norm": 2.4933836460113525, "learning_rate": 1.4688486842105262e-06, "loss": 0.0876, "step": 13625 }, { "epoch": 0.6825, "grad_norm": 3.253474712371826, "learning_rate": 1.4630921052631578e-06, "loss": 0.0725, "step": 13650 }, { "epoch": 0.68375, "grad_norm": 2.5821990966796875, "learning_rate": 1.4573355263157893e-06, "loss": 0.088, "step": 13675 }, { "epoch": 0.685, "grad_norm": 3.219723701477051, "learning_rate": 1.451578947368421e-06, "loss": 0.079, "step": 13700 }, { "epoch": 0.68625, "grad_norm": 2.1482114791870117, "learning_rate": 1.4458223684210525e-06, "loss": 0.0715, "step": 13725 }, { "epoch": 0.6875, "grad_norm": 3.403439521789551, "learning_rate": 1.440065789473684e-06, "loss": 0.0731, "step": 13750 }, { "epoch": 0.68875, "grad_norm": 2.0612175464630127, "learning_rate": 1.4343092105263155e-06, "loss": 0.0669, "step": 13775 }, { "epoch": 0.69, "grad_norm": 2.5637385845184326, "learning_rate": 1.4285526315789472e-06, "loss": 0.0766, "step": 13800 }, { "epoch": 0.69125, "grad_norm": 1.8747389316558838, "learning_rate": 1.4227960526315787e-06, "loss": 0.0723, "step": 13825 }, { "epoch": 0.6925, "grad_norm": 2.6436047554016113, "learning_rate": 1.4170394736842104e-06, "loss": 0.0694, "step": 13850 }, { "epoch": 0.69375, "grad_norm": 2.300952911376953, "learning_rate": 1.4112828947368422e-06, "loss": 0.0711, "step": 13875 }, { "epoch": 0.695, "grad_norm": 2.480396032333374, "learning_rate": 1.4055263157894737e-06, "loss": 0.0695, "step": 13900 }, { "epoch": 0.69625, "grad_norm": 3.047656536102295, "learning_rate": 1.3997697368421052e-06, "loss": 0.0827, "step": 13925 }, { "epoch": 0.6975, "grad_norm": 1.8521438837051392, "learning_rate": 1.3940131578947367e-06, "loss": 0.0799, "step": 13950 }, { "epoch": 0.69875, "grad_norm": 3.52673602104187, "learning_rate": 1.3882565789473684e-06, "loss": 0.0819, "step": 13975 }, { "epoch": 0.7, "grad_norm": 2.5274155139923096, "learning_rate": 1.3824999999999999e-06, "loss": 0.07, "step": 14000 }, { "epoch": 0.7, "eval_loss": 0.21170927584171295, "eval_runtime": 534.7374, "eval_samples_per_second": 3.162, "eval_steps_per_second": 0.396, "eval_wer": 7.437242412888723, "step": 14000 }, { "epoch": 0.70125, "grad_norm": 3.9497313499450684, "learning_rate": 1.3767434210526314e-06, "loss": 0.0977, "step": 14025 }, { "epoch": 0.7025, "grad_norm": 5.4897284507751465, "learning_rate": 1.3709868421052631e-06, "loss": 0.1658, "step": 14050 }, { "epoch": 0.70375, "grad_norm": 3.0957064628601074, "learning_rate": 1.3652302631578946e-06, "loss": 0.1823, "step": 14075 }, { "epoch": 0.705, "grad_norm": 3.2891457080841064, "learning_rate": 1.3594736842105261e-06, "loss": 0.1777, "step": 14100 }, { "epoch": 0.70625, "grad_norm": 3.642838954925537, "learning_rate": 1.3537171052631576e-06, "loss": 0.177, "step": 14125 }, { "epoch": 0.7075, "grad_norm": 4.022505760192871, "learning_rate": 1.3479605263157894e-06, "loss": 0.1773, "step": 14150 }, { "epoch": 0.70875, "grad_norm": 3.632260799407959, "learning_rate": 1.3422039473684209e-06, "loss": 0.138, "step": 14175 }, { "epoch": 0.71, "grad_norm": 1.6560989618301392, "learning_rate": 1.3364473684210526e-06, "loss": 0.1163, "step": 14200 }, { "epoch": 0.71125, "grad_norm": 1.4849154949188232, "learning_rate": 1.3306907894736843e-06, "loss": 0.1001, "step": 14225 }, { "epoch": 0.7125, "grad_norm": 2.3382551670074463, "learning_rate": 1.3249342105263158e-06, "loss": 0.0748, "step": 14250 }, { "epoch": 0.71375, "grad_norm": 3.0243709087371826, "learning_rate": 1.3191776315789473e-06, "loss": 0.0699, "step": 14275 }, { "epoch": 0.715, "grad_norm": 3.4510324001312256, "learning_rate": 1.3134210526315788e-06, "loss": 0.0822, "step": 14300 }, { "epoch": 0.71625, "grad_norm": 1.71156907081604, "learning_rate": 1.3076644736842105e-06, "loss": 0.0817, "step": 14325 }, { "epoch": 0.7175, "grad_norm": 1.4711543321609497, "learning_rate": 1.301907894736842e-06, "loss": 0.0573, "step": 14350 }, { "epoch": 0.71875, "grad_norm": 1.4108855724334717, "learning_rate": 1.2961513157894735e-06, "loss": 0.0518, "step": 14375 }, { "epoch": 0.72, "grad_norm": 1.4882175922393799, "learning_rate": 1.290394736842105e-06, "loss": 0.0585, "step": 14400 }, { "epoch": 0.72125, "grad_norm": 1.6964808702468872, "learning_rate": 1.2846381578947368e-06, "loss": 0.0562, "step": 14425 }, { "epoch": 0.7225, "grad_norm": 1.7226653099060059, "learning_rate": 1.2788815789473683e-06, "loss": 0.0574, "step": 14450 }, { "epoch": 0.72375, "grad_norm": 2.7214572429656982, "learning_rate": 1.2731249999999998e-06, "loss": 0.0629, "step": 14475 }, { "epoch": 0.725, "grad_norm": 1.1752701997756958, "learning_rate": 1.2673684210526313e-06, "loss": 0.0534, "step": 14500 }, { "epoch": 0.725, "eval_loss": 0.13807399570941925, "eval_runtime": 533.693, "eval_samples_per_second": 3.168, "eval_steps_per_second": 0.397, "eval_wer": 7.04383664293743, "step": 14500 }, { "epoch": 0.72625, "grad_norm": 5.266875267028809, "learning_rate": 1.261611842105263e-06, "loss": 0.0553, "step": 14525 }, { "epoch": 0.7275, "grad_norm": 2.1979897022247314, "learning_rate": 1.2558552631578947e-06, "loss": 0.0498, "step": 14550 }, { "epoch": 0.72875, "grad_norm": 1.445584774017334, "learning_rate": 1.2500986842105262e-06, "loss": 0.0432, "step": 14575 }, { "epoch": 0.73, "grad_norm": 0.985780656337738, "learning_rate": 1.244342105263158e-06, "loss": 0.0398, "step": 14600 }, { "epoch": 0.73125, "grad_norm": 1.4595451354980469, "learning_rate": 1.2385855263157894e-06, "loss": 0.0472, "step": 14625 }, { "epoch": 0.7325, "grad_norm": 1.6958725452423096, "learning_rate": 1.232828947368421e-06, "loss": 0.0451, "step": 14650 }, { "epoch": 0.73375, "grad_norm": 1.4922881126403809, "learning_rate": 1.2270723684210525e-06, "loss": 0.0483, "step": 14675 }, { "epoch": 0.735, "grad_norm": 2.243989944458008, "learning_rate": 1.2213157894736842e-06, "loss": 0.0691, "step": 14700 }, { "epoch": 0.73625, "grad_norm": 3.160104513168335, "learning_rate": 1.2155592105263157e-06, "loss": 0.0814, "step": 14725 }, { "epoch": 0.7375, "grad_norm": 2.0205318927764893, "learning_rate": 1.2098026315789472e-06, "loss": 0.0693, "step": 14750 }, { "epoch": 0.73875, "grad_norm": 1.519434928894043, "learning_rate": 1.2040460526315787e-06, "loss": 0.0589, "step": 14775 }, { "epoch": 0.74, "grad_norm": 2.59538197517395, "learning_rate": 1.1982894736842104e-06, "loss": 0.0546, "step": 14800 }, { "epoch": 0.74125, "grad_norm": 2.137489080429077, "learning_rate": 1.192532894736842e-06, "loss": 0.0679, "step": 14825 }, { "epoch": 0.7425, "grad_norm": 1.5184602737426758, "learning_rate": 1.1867763157894734e-06, "loss": 0.0685, "step": 14850 }, { "epoch": 0.74375, "grad_norm": 2.101884365081787, "learning_rate": 1.1810197368421054e-06, "loss": 0.0526, "step": 14875 }, { "epoch": 0.745, "grad_norm": 1.778254508972168, "learning_rate": 1.1752631578947369e-06, "loss": 0.0463, "step": 14900 }, { "epoch": 0.74625, "grad_norm": 2.073361873626709, "learning_rate": 1.1695065789473684e-06, "loss": 0.0542, "step": 14925 }, { "epoch": 0.7475, "grad_norm": 2.091325283050537, "learning_rate": 1.1637499999999999e-06, "loss": 0.0456, "step": 14950 }, { "epoch": 0.74875, "grad_norm": 1.7418571710586548, "learning_rate": 1.1579934210526316e-06, "loss": 0.0435, "step": 14975 }, { "epoch": 0.75, "grad_norm": 1.8316125869750977, "learning_rate": 1.152236842105263e-06, "loss": 0.046, "step": 15000 }, { "epoch": 0.75, "eval_loss": 0.14957565069198608, "eval_runtime": 534.2678, "eval_samples_per_second": 3.165, "eval_steps_per_second": 0.397, "eval_wer": 7.081303859123267, "step": 15000 }, { "epoch": 0.75125, "grad_norm": 2.781534433364868, "learning_rate": 1.1467105263157894e-06, "loss": 0.0728, "step": 15025 }, { "epoch": 0.7525, "grad_norm": 2.0675017833709717, "learning_rate": 1.1409539473684209e-06, "loss": 0.095, "step": 15050 }, { "epoch": 0.75375, "grad_norm": 3.430636167526245, "learning_rate": 1.1351973684210524e-06, "loss": 0.0966, "step": 15075 }, { "epoch": 0.755, "grad_norm": 3.50378680229187, "learning_rate": 1.129440789473684e-06, "loss": 0.1087, "step": 15100 }, { "epoch": 0.75625, "grad_norm": 2.9562337398529053, "learning_rate": 1.1236842105263156e-06, "loss": 0.1098, "step": 15125 }, { "epoch": 0.7575, "grad_norm": 2.7388198375701904, "learning_rate": 1.1179276315789471e-06, "loss": 0.1328, "step": 15150 }, { "epoch": 0.75875, "grad_norm": 3.3490402698516846, "learning_rate": 1.112171052631579e-06, "loss": 0.097, "step": 15175 }, { "epoch": 0.76, "grad_norm": 1.3750718832015991, "learning_rate": 1.1064144736842105e-06, "loss": 0.0722, "step": 15200 }, { "epoch": 0.76125, "grad_norm": 1.7064391374588013, "learning_rate": 1.100657894736842e-06, "loss": 0.0588, "step": 15225 }, { "epoch": 0.7625, "grad_norm": 1.8604276180267334, "learning_rate": 1.0949013157894736e-06, "loss": 0.0557, "step": 15250 }, { "epoch": 0.76375, "grad_norm": 1.2240312099456787, "learning_rate": 1.0891447368421053e-06, "loss": 0.0438, "step": 15275 }, { "epoch": 0.765, "grad_norm": 1.5873894691467285, "learning_rate": 1.0833881578947368e-06, "loss": 0.0471, "step": 15300 }, { "epoch": 0.76625, "grad_norm": 1.645041823387146, "learning_rate": 1.0776315789473683e-06, "loss": 0.0586, "step": 15325 }, { "epoch": 0.7675, "grad_norm": 2.3403167724609375, "learning_rate": 1.0718749999999998e-06, "loss": 0.0698, "step": 15350 }, { "epoch": 0.76875, "grad_norm": 2.5629897117614746, "learning_rate": 1.0661184210526315e-06, "loss": 0.068, "step": 15375 }, { "epoch": 0.77, "grad_norm": 2.1160974502563477, "learning_rate": 1.060361842105263e-06, "loss": 0.0771, "step": 15400 }, { "epoch": 0.77125, "grad_norm": 2.094522714614868, "learning_rate": 1.0546052631578947e-06, "loss": 0.0882, "step": 15425 }, { "epoch": 0.7725, "grad_norm": 2.3391168117523193, "learning_rate": 1.0488486842105262e-06, "loss": 0.0746, "step": 15450 }, { "epoch": 0.77375, "grad_norm": 2.208967924118042, "learning_rate": 1.0430921052631577e-06, "loss": 0.0725, "step": 15475 }, { "epoch": 0.775, "grad_norm": 2.7758445739746094, "learning_rate": 1.0373355263157895e-06, "loss": 0.066, "step": 15500 }, { "epoch": 0.775, "eval_loss": 0.1524539738893509, "eval_runtime": 533.899, "eval_samples_per_second": 3.167, "eval_steps_per_second": 0.397, "eval_wer": 7.00012489072062, "step": 15500 }, { "epoch": 0.77625, "grad_norm": 1.5453675985336304, "learning_rate": 1.031578947368421e-06, "loss": 0.0511, "step": 15525 }, { "epoch": 0.7775, "grad_norm": 2.0205094814300537, "learning_rate": 1.0258223684210525e-06, "loss": 0.05, "step": 15550 }, { "epoch": 0.77875, "grad_norm": 1.2804875373840332, "learning_rate": 1.020065789473684e-06, "loss": 0.0598, "step": 15575 }, { "epoch": 0.78, "grad_norm": 2.22847843170166, "learning_rate": 1.0143092105263157e-06, "loss": 0.0686, "step": 15600 }, { "epoch": 0.78125, "grad_norm": 2.523324489593506, "learning_rate": 1.0085526315789472e-06, "loss": 0.1251, "step": 15625 }, { "epoch": 0.7825, "grad_norm": 1.8177152872085571, "learning_rate": 1.002796052631579e-06, "loss": 0.1014, "step": 15650 }, { "epoch": 0.78375, "grad_norm": 1.9223369359970093, "learning_rate": 9.970394736842104e-07, "loss": 0.0604, "step": 15675 }, { "epoch": 0.785, "grad_norm": 1.9404890537261963, "learning_rate": 9.91282894736842e-07, "loss": 0.0556, "step": 15700 }, { "epoch": 0.78625, "grad_norm": 1.354697823524475, "learning_rate": 9.855263157894737e-07, "loss": 0.0452, "step": 15725 }, { "epoch": 0.7875, "grad_norm": 0.9245623350143433, "learning_rate": 9.797697368421052e-07, "loss": 0.0536, "step": 15750 }, { "epoch": 0.78875, "grad_norm": 1.3286716938018799, "learning_rate": 9.740131578947369e-07, "loss": 0.0402, "step": 15775 }, { "epoch": 0.79, "grad_norm": 2.337540626525879, "learning_rate": 9.682565789473684e-07, "loss": 0.0619, "step": 15800 }, { "epoch": 0.79125, "grad_norm": 1.3047797679901123, "learning_rate": 9.624999999999999e-07, "loss": 0.0582, "step": 15825 }, { "epoch": 0.7925, "grad_norm": 1.5523693561553955, "learning_rate": 9.567434210526314e-07, "loss": 0.0461, "step": 15850 }, { "epoch": 0.79375, "grad_norm": 0.8749285340309143, "learning_rate": 9.50986842105263e-07, "loss": 0.0458, "step": 15875 }, { "epoch": 0.795, "grad_norm": 1.0452526807785034, "learning_rate": 9.452302631578946e-07, "loss": 0.0419, "step": 15900 }, { "epoch": 0.79625, "grad_norm": 1.9379664659500122, "learning_rate": 9.394736842105263e-07, "loss": 0.0566, "step": 15925 }, { "epoch": 0.7975, "grad_norm": 1.316031575202942, "learning_rate": 9.337171052631578e-07, "loss": 0.0473, "step": 15950 }, { "epoch": 0.79875, "grad_norm": 1.216234564781189, "learning_rate": 9.279605263157895e-07, "loss": 0.0567, "step": 15975 }, { "epoch": 0.8, "grad_norm": 1.7266921997070312, "learning_rate": 9.22203947368421e-07, "loss": 0.0632, "step": 16000 }, { "epoch": 0.8, "eval_loss": 0.14084434509277344, "eval_runtime": 535.4097, "eval_samples_per_second": 3.158, "eval_steps_per_second": 0.396, "eval_wer": 6.681653553141001, "step": 16000 }, { "epoch": 0.80125, "grad_norm": 1.8532096147537231, "learning_rate": 9.164473684210526e-07, "loss": 0.0579, "step": 16025 }, { "epoch": 0.8025, "grad_norm": 2.181915044784546, "learning_rate": 9.106907894736841e-07, "loss": 0.0757, "step": 16050 }, { "epoch": 0.80375, "grad_norm": 2.2596707344055176, "learning_rate": 9.049342105263157e-07, "loss": 0.0729, "step": 16075 }, { "epoch": 0.805, "grad_norm": 1.2219024896621704, "learning_rate": 8.991776315789473e-07, "loss": 0.0666, "step": 16100 }, { "epoch": 0.80625, "grad_norm": 1.135261058807373, "learning_rate": 8.934210526315789e-07, "loss": 0.0627, "step": 16125 }, { "epoch": 0.8075, "grad_norm": 1.6599974632263184, "learning_rate": 8.876644736842104e-07, "loss": 0.0477, "step": 16150 }, { "epoch": 0.80875, "grad_norm": 1.7189278602600098, "learning_rate": 8.81907894736842e-07, "loss": 0.049, "step": 16175 }, { "epoch": 0.81, "grad_norm": 0.837539553642273, "learning_rate": 8.761513157894735e-07, "loss": 0.0489, "step": 16200 }, { "epoch": 0.81125, "grad_norm": 1.5122978687286377, "learning_rate": 8.703947368421051e-07, "loss": 0.0389, "step": 16225 }, { "epoch": 0.8125, "grad_norm": 1.7276921272277832, "learning_rate": 8.646381578947368e-07, "loss": 0.0449, "step": 16250 }, { "epoch": 0.81375, "grad_norm": 2.028928756713867, "learning_rate": 8.588815789473684e-07, "loss": 0.045, "step": 16275 }, { "epoch": 0.815, "grad_norm": 1.258401870727539, "learning_rate": 8.53125e-07, "loss": 0.0413, "step": 16300 }, { "epoch": 0.81625, "grad_norm": 1.2878379821777344, "learning_rate": 8.473684210526315e-07, "loss": 0.0454, "step": 16325 }, { "epoch": 0.8175, "grad_norm": 0.9309024810791016, "learning_rate": 8.416118421052631e-07, "loss": 0.0389, "step": 16350 }, { "epoch": 0.81875, "grad_norm": 0.6321396231651306, "learning_rate": 8.358552631578946e-07, "loss": 0.0282, "step": 16375 }, { "epoch": 0.82, "grad_norm": 1.8799151182174683, "learning_rate": 8.300986842105262e-07, "loss": 0.0316, "step": 16400 }, { "epoch": 0.82125, "grad_norm": 0.642666220664978, "learning_rate": 8.243421052631577e-07, "loss": 0.0415, "step": 16425 }, { "epoch": 0.8225, "grad_norm": 1.199803352355957, "learning_rate": 8.185855263157894e-07, "loss": 0.0503, "step": 16450 }, { "epoch": 0.82375, "grad_norm": 1.517521858215332, "learning_rate": 8.128289473684211e-07, "loss": 0.0342, "step": 16475 }, { "epoch": 0.825, "grad_norm": 1.683922290802002, "learning_rate": 8.070723684210526e-07, "loss": 0.0437, "step": 16500 }, { "epoch": 0.825, "eval_loss": 0.1474502831697464, "eval_runtime": 533.549, "eval_samples_per_second": 3.169, "eval_steps_per_second": 0.397, "eval_wer": 6.594230048707381, "step": 16500 }, { "epoch": 0.82625, "grad_norm": 2.0518248081207275, "learning_rate": 8.013157894736842e-07, "loss": 0.0411, "step": 16525 }, { "epoch": 0.8275, "grad_norm": 1.139129638671875, "learning_rate": 7.955592105263157e-07, "loss": 0.0426, "step": 16550 }, { "epoch": 0.82875, "grad_norm": 0.7436901926994324, "learning_rate": 7.898026315789473e-07, "loss": 0.0413, "step": 16575 }, { "epoch": 0.83, "grad_norm": 0.8292795419692993, "learning_rate": 7.840460526315789e-07, "loss": 0.0456, "step": 16600 }, { "epoch": 0.83125, "grad_norm": 2.60646390914917, "learning_rate": 7.782894736842105e-07, "loss": 0.0459, "step": 16625 }, { "epoch": 0.8325, "grad_norm": 2.15118408203125, "learning_rate": 7.72532894736842e-07, "loss": 0.0653, "step": 16650 }, { "epoch": 0.83375, "grad_norm": 1.8501421213150024, "learning_rate": 7.667763157894736e-07, "loss": 0.0702, "step": 16675 }, { "epoch": 0.835, "grad_norm": 1.579913854598999, "learning_rate": 7.610197368421051e-07, "loss": 0.0693, "step": 16700 }, { "epoch": 0.83625, "grad_norm": 2.4913477897644043, "learning_rate": 7.552631578947367e-07, "loss": 0.0874, "step": 16725 }, { "epoch": 0.8375, "grad_norm": 2.489863634109497, "learning_rate": 7.495065789473683e-07, "loss": 0.0642, "step": 16750 }, { "epoch": 0.83875, "grad_norm": 4.630337715148926, "learning_rate": 7.4375e-07, "loss": 0.0728, "step": 16775 }, { "epoch": 0.84, "grad_norm": 1.708297848701477, "learning_rate": 7.379934210526316e-07, "loss": 0.056, "step": 16800 }, { "epoch": 0.84125, "grad_norm": 1.7515946626663208, "learning_rate": 7.322368421052631e-07, "loss": 0.0477, "step": 16825 }, { "epoch": 0.8425, "grad_norm": 1.6641236543655396, "learning_rate": 7.264802631578947e-07, "loss": 0.0508, "step": 16850 }, { "epoch": 0.84375, "grad_norm": 1.693472146987915, "learning_rate": 7.207236842105262e-07, "loss": 0.0457, "step": 16875 }, { "epoch": 0.845, "grad_norm": 0.845664381980896, "learning_rate": 7.149671052631578e-07, "loss": 0.0415, "step": 16900 }, { "epoch": 0.84625, "grad_norm": 1.8824065923690796, "learning_rate": 7.092105263157893e-07, "loss": 0.0481, "step": 16925 }, { "epoch": 0.8475, "grad_norm": 1.9034583568572998, "learning_rate": 7.03453947368421e-07, "loss": 0.0496, "step": 16950 }, { "epoch": 0.84875, "grad_norm": 2.6840953826904297, "learning_rate": 6.976973684210525e-07, "loss": 0.0574, "step": 16975 }, { "epoch": 0.85, "grad_norm": 1.8385533094406128, "learning_rate": 6.919407894736842e-07, "loss": 0.0478, "step": 17000 }, { "epoch": 0.85, "eval_loss": 0.15727710723876953, "eval_runtime": 534.9573, "eval_samples_per_second": 3.161, "eval_steps_per_second": 0.396, "eval_wer": 6.794055201698514, "step": 17000 }, { "epoch": 0.85125, "grad_norm": 1.590932011604309, "learning_rate": 6.864144736842104e-07, "loss": 0.0589, "step": 17025 }, { "epoch": 0.8525, "grad_norm": 1.005034327507019, "learning_rate": 6.806578947368419e-07, "loss": 0.0554, "step": 17050 }, { "epoch": 0.85375, "grad_norm": 3.3872015476226807, "learning_rate": 6.749013157894737e-07, "loss": 0.0572, "step": 17075 }, { "epoch": 0.855, "grad_norm": 3.8093373775482178, "learning_rate": 6.691447368421053e-07, "loss": 0.1078, "step": 17100 }, { "epoch": 0.85625, "grad_norm": 2.587963581085205, "learning_rate": 6.633881578947368e-07, "loss": 0.1426, "step": 17125 }, { "epoch": 0.8575, "grad_norm": 3.9271957874298096, "learning_rate": 6.576315789473684e-07, "loss": 0.1496, "step": 17150 }, { "epoch": 0.85875, "grad_norm": 3.7258965969085693, "learning_rate": 6.518749999999999e-07, "loss": 0.1852, "step": 17175 }, { "epoch": 0.86, "grad_norm": 4.298374652862549, "learning_rate": 6.461184210526315e-07, "loss": 0.2419, "step": 17200 }, { "epoch": 0.86125, "grad_norm": 6.419559478759766, "learning_rate": 6.403618421052631e-07, "loss": 0.225, "step": 17225 }, { "epoch": 0.8625, "grad_norm": 4.669430732727051, "learning_rate": 6.346052631578947e-07, "loss": 0.297, "step": 17250 }, { "epoch": 0.86375, "grad_norm": 4.676415920257568, "learning_rate": 6.288486842105262e-07, "loss": 0.2001, "step": 17275 }, { "epoch": 0.865, "grad_norm": 1.519974708557129, "learning_rate": 6.230921052631579e-07, "loss": 0.1029, "step": 17300 }, { "epoch": 0.86625, "grad_norm": 2.9553279876708984, "learning_rate": 6.173355263157894e-07, "loss": 0.0917, "step": 17325 }, { "epoch": 0.8675, "grad_norm": 1.5657232999801636, "learning_rate": 6.11578947368421e-07, "loss": 0.088, "step": 17350 }, { "epoch": 0.86875, "grad_norm": 3.1620709896087646, "learning_rate": 6.058223684210525e-07, "loss": 0.1046, "step": 17375 }, { "epoch": 0.87, "grad_norm": 3.469240188598633, "learning_rate": 6.000657894736842e-07, "loss": 0.1004, "step": 17400 }, { "epoch": 0.87125, "grad_norm": 1.9016904830932617, "learning_rate": 5.943092105263158e-07, "loss": 0.0875, "step": 17425 }, { "epoch": 0.8725, "grad_norm": 3.1401467323303223, "learning_rate": 5.885526315789473e-07, "loss": 0.0593, "step": 17450 }, { "epoch": 0.87375, "grad_norm": 1.1564242839813232, "learning_rate": 5.827960526315789e-07, "loss": 0.0444, "step": 17475 }, { "epoch": 0.875, "grad_norm": 0.9873404502868652, "learning_rate": 5.770394736842104e-07, "loss": 0.0418, "step": 17500 }, { "epoch": 0.875, "eval_loss": 0.156468465924263, "eval_runtime": 534.8937, "eval_samples_per_second": 3.161, "eval_steps_per_second": 0.396, "eval_wer": 6.650430872986138, "step": 17500 }, { "epoch": 0.87625, "grad_norm": 1.499561071395874, "learning_rate": 5.71282894736842e-07, "loss": 0.0423, "step": 17525 }, { "epoch": 0.8775, "grad_norm": 1.0905530452728271, "learning_rate": 5.655263157894735e-07, "loss": 0.0496, "step": 17550 }, { "epoch": 0.87875, "grad_norm": 1.6048545837402344, "learning_rate": 5.597697368421053e-07, "loss": 0.0437, "step": 17575 }, { "epoch": 0.88, "grad_norm": 1.5219619274139404, "learning_rate": 5.540131578947369e-07, "loss": 0.0676, "step": 17600 }, { "epoch": 0.88125, "grad_norm": 1.8919825553894043, "learning_rate": 5.482565789473684e-07, "loss": 0.0647, "step": 17625 }, { "epoch": 0.8825, "grad_norm": 2.4546618461608887, "learning_rate": 5.425e-07, "loss": 0.0625, "step": 17650 }, { "epoch": 0.88375, "grad_norm": 1.7209670543670654, "learning_rate": 5.367434210526315e-07, "loss": 0.0661, "step": 17675 }, { "epoch": 0.885, "grad_norm": 2.5535149574279785, "learning_rate": 5.309868421052631e-07, "loss": 0.0691, "step": 17700 }, { "epoch": 0.88625, "grad_norm": 3.5450563430786133, "learning_rate": 5.252302631578947e-07, "loss": 0.0603, "step": 17725 }, { "epoch": 0.8875, "grad_norm": 1.4123398065567017, "learning_rate": 5.194736842105262e-07, "loss": 0.0666, "step": 17750 }, { "epoch": 0.88875, "grad_norm": 1.427933931350708, "learning_rate": 5.137171052631578e-07, "loss": 0.0428, "step": 17775 }, { "epoch": 0.89, "grad_norm": 1.3647822141647339, "learning_rate": 5.079605263157895e-07, "loss": 0.0382, "step": 17800 }, { "epoch": 0.89125, "grad_norm": 1.1601825952529907, "learning_rate": 5.02203947368421e-07, "loss": 0.0485, "step": 17825 }, { "epoch": 0.8925, "grad_norm": 1.2409619092941284, "learning_rate": 4.964473684210526e-07, "loss": 0.0439, "step": 17850 }, { "epoch": 0.89375, "grad_norm": 2.1224701404571533, "learning_rate": 4.906907894736842e-07, "loss": 0.0463, "step": 17875 }, { "epoch": 0.895, "grad_norm": 1.7053598165512085, "learning_rate": 4.849342105263158e-07, "loss": 0.0419, "step": 17900 }, { "epoch": 0.89625, "grad_norm": 1.2734942436218262, "learning_rate": 4.791776315789473e-07, "loss": 0.0385, "step": 17925 }, { "epoch": 0.8975, "grad_norm": 1.448438048362732, "learning_rate": 4.734210526315789e-07, "loss": 0.0397, "step": 17950 }, { "epoch": 0.89875, "grad_norm": 1.1724251508712769, "learning_rate": 4.6766447368421047e-07, "loss": 0.0454, "step": 17975 }, { "epoch": 0.9, "grad_norm": 1.4745044708251953, "learning_rate": 4.6190789473684203e-07, "loss": 0.0382, "step": 18000 }, { "epoch": 0.9, "eval_loss": 0.15590737760066986, "eval_runtime": 533.3825, "eval_samples_per_second": 3.17, "eval_steps_per_second": 0.397, "eval_wer": 6.563007368552516, "step": 18000 }, { "epoch": 0.90125, "grad_norm": 1.3046791553497314, "learning_rate": 4.5615131578947364e-07, "loss": 0.0497, "step": 18025 }, { "epoch": 0.9025, "grad_norm": 2.052855968475342, "learning_rate": 4.5039473684210525e-07, "loss": 0.0542, "step": 18050 }, { "epoch": 0.90375, "grad_norm": 1.6691333055496216, "learning_rate": 4.446381578947368e-07, "loss": 0.0503, "step": 18075 }, { "epoch": 0.905, "grad_norm": 3.4304769039154053, "learning_rate": 4.3888157894736837e-07, "loss": 0.0572, "step": 18100 }, { "epoch": 0.90625, "grad_norm": 1.5289900302886963, "learning_rate": 4.33125e-07, "loss": 0.07, "step": 18125 }, { "epoch": 0.9075, "grad_norm": 2.5705385208129883, "learning_rate": 4.2736842105263154e-07, "loss": 0.0778, "step": 18150 }, { "epoch": 0.90875, "grad_norm": 2.34914493560791, "learning_rate": 4.216118421052631e-07, "loss": 0.0739, "step": 18175 }, { "epoch": 0.91, "grad_norm": 2.6740806102752686, "learning_rate": 4.158552631578947e-07, "loss": 0.0621, "step": 18200 }, { "epoch": 0.91125, "grad_norm": 0.931742787361145, "learning_rate": 4.1009868421052627e-07, "loss": 0.0636, "step": 18225 }, { "epoch": 0.9125, "grad_norm": 1.7513364553451538, "learning_rate": 4.0434210526315783e-07, "loss": 0.0526, "step": 18250 }, { "epoch": 0.91375, "grad_norm": 1.3136606216430664, "learning_rate": 3.985855263157894e-07, "loss": 0.0469, "step": 18275 }, { "epoch": 0.915, "grad_norm": 1.2674484252929688, "learning_rate": 3.9282894736842105e-07, "loss": 0.0569, "step": 18300 }, { "epoch": 0.91625, "grad_norm": 2.0879714488983154, "learning_rate": 3.870723684210526e-07, "loss": 0.056, "step": 18325 }, { "epoch": 0.9175, "grad_norm": 1.6177654266357422, "learning_rate": 3.8131578947368417e-07, "loss": 0.0496, "step": 18350 }, { "epoch": 0.91875, "grad_norm": 1.267562985420227, "learning_rate": 3.755592105263158e-07, "loss": 0.0474, "step": 18375 }, { "epoch": 0.92, "grad_norm": 1.9628887176513672, "learning_rate": 3.6980263157894734e-07, "loss": 0.0523, "step": 18400 }, { "epoch": 0.92125, "grad_norm": 2.450678586959839, "learning_rate": 3.640460526315789e-07, "loss": 0.0571, "step": 18425 }, { "epoch": 0.9225, "grad_norm": 3.2376692295074463, "learning_rate": 3.5828947368421046e-07, "loss": 0.0726, "step": 18450 }, { "epoch": 0.92375, "grad_norm": 1.718723177909851, "learning_rate": 3.5253289473684207e-07, "loss": 0.0759, "step": 18475 }, { "epoch": 0.925, "grad_norm": 1.7278677225112915, "learning_rate": 3.4677631578947363e-07, "loss": 0.0658, "step": 18500 }, { "epoch": 0.925, "eval_loss": 0.145228311419487, "eval_runtime": 537.5559, "eval_samples_per_second": 3.146, "eval_steps_per_second": 0.394, "eval_wer": 6.563007368552516, "step": 18500 }, { "epoch": 0.92625, "grad_norm": 2.314218044281006, "learning_rate": 3.410197368421052e-07, "loss": 0.083, "step": 18525 }, { "epoch": 0.9275, "grad_norm": 2.6032817363739014, "learning_rate": 3.3526315789473685e-07, "loss": 0.0796, "step": 18550 }, { "epoch": 0.92875, "grad_norm": 1.2821646928787231, "learning_rate": 3.295065789473684e-07, "loss": 0.0475, "step": 18575 }, { "epoch": 0.93, "grad_norm": 1.2048566341400146, "learning_rate": 3.2374999999999997e-07, "loss": 0.0441, "step": 18600 }, { "epoch": 0.93125, "grad_norm": 2.205629348754883, "learning_rate": 3.179934210526316e-07, "loss": 0.0529, "step": 18625 }, { "epoch": 0.9325, "grad_norm": 0.948354959487915, "learning_rate": 3.1223684210526314e-07, "loss": 0.0491, "step": 18650 }, { "epoch": 0.93375, "grad_norm": 0.8600139617919922, "learning_rate": 3.064802631578947e-07, "loss": 0.0397, "step": 18675 }, { "epoch": 0.935, "grad_norm": 1.5570470094680786, "learning_rate": 3.0072368421052626e-07, "loss": 0.0479, "step": 18700 }, { "epoch": 0.93625, "grad_norm": 1.6737167835235596, "learning_rate": 2.9496710526315787e-07, "loss": 0.053, "step": 18725 }, { "epoch": 0.9375, "grad_norm": 1.7217572927474976, "learning_rate": 2.8921052631578943e-07, "loss": 0.0551, "step": 18750 }, { "epoch": 0.93875, "grad_norm": 2.207542896270752, "learning_rate": 2.83453947368421e-07, "loss": 0.0618, "step": 18775 }, { "epoch": 0.94, "grad_norm": 1.6761177778244019, "learning_rate": 2.7769736842105265e-07, "loss": 0.0652, "step": 18800 }, { "epoch": 0.94125, "grad_norm": 2.9946813583374023, "learning_rate": 2.719407894736842e-07, "loss": 0.0738, "step": 18825 }, { "epoch": 0.9425, "grad_norm": 2.3663125038146973, "learning_rate": 2.6618421052631577e-07, "loss": 0.0629, "step": 18850 }, { "epoch": 0.94375, "grad_norm": 2.5888278484344482, "learning_rate": 2.6042763157894733e-07, "loss": 0.2007, "step": 18875 }, { "epoch": 0.945, "grad_norm": 1.5639821290969849, "learning_rate": 2.5467105263157894e-07, "loss": 0.2028, "step": 18900 }, { "epoch": 0.94625, "grad_norm": 1.5892317295074463, "learning_rate": 2.489144736842105e-07, "loss": 0.1062, "step": 18925 }, { "epoch": 0.9475, "grad_norm": 2.5038766860961914, "learning_rate": 2.431578947368421e-07, "loss": 0.06, "step": 18950 }, { "epoch": 0.94875, "grad_norm": 2.531886577606201, "learning_rate": 2.3740131578947364e-07, "loss": 0.0553, "step": 18975 }, { "epoch": 0.95, "grad_norm": 2.5510354042053223, "learning_rate": 2.3164473684210526e-07, "loss": 0.0531, "step": 19000 }, { "epoch": 0.95, "eval_loss": 0.15760228037834167, "eval_runtime": 534.6809, "eval_samples_per_second": 3.163, "eval_steps_per_second": 0.396, "eval_wer": 6.662919945048083, "step": 19000 }, { "epoch": 0.95125, "grad_norm": 3.058936357498169, "learning_rate": 2.2588815789473684e-07, "loss": 0.061, "step": 19025 }, { "epoch": 0.9525, "grad_norm": 4.343925476074219, "learning_rate": 2.201315789473684e-07, "loss": 0.0775, "step": 19050 }, { "epoch": 0.95375, "grad_norm": 3.271355628967285, "learning_rate": 2.1437499999999999e-07, "loss": 0.0762, "step": 19075 }, { "epoch": 0.955, "grad_norm": 1.7924737930297852, "learning_rate": 2.0861842105263154e-07, "loss": 0.0733, "step": 19100 }, { "epoch": 0.95625, "grad_norm": 2.034940719604492, "learning_rate": 2.0286184210526313e-07, "loss": 0.0747, "step": 19125 }, { "epoch": 0.9575, "grad_norm": 3.0561563968658447, "learning_rate": 1.9733552631578946e-07, "loss": 0.0841, "step": 19150 }, { "epoch": 0.95875, "grad_norm": 1.5333133935928345, "learning_rate": 1.9157894736842102e-07, "loss": 0.0656, "step": 19175 }, { "epoch": 0.96, "grad_norm": 1.5307198762893677, "learning_rate": 1.858223684210526e-07, "loss": 0.0532, "step": 19200 }, { "epoch": 0.96125, "grad_norm": 1.5663795471191406, "learning_rate": 1.8006578947368422e-07, "loss": 0.0485, "step": 19225 }, { "epoch": 0.9625, "grad_norm": 1.8204154968261719, "learning_rate": 1.7430921052631578e-07, "loss": 0.0506, "step": 19250 }, { "epoch": 0.96375, "grad_norm": 0.6307218074798584, "learning_rate": 1.6855263157894736e-07, "loss": 0.0412, "step": 19275 }, { "epoch": 0.965, "grad_norm": 1.2638368606567383, "learning_rate": 1.6279605263157892e-07, "loss": 0.0367, "step": 19300 }, { "epoch": 0.96625, "grad_norm": 1.448020100593567, "learning_rate": 1.570394736842105e-07, "loss": 0.0461, "step": 19325 }, { "epoch": 0.9675, "grad_norm": 1.148501992225647, "learning_rate": 1.5128289473684207e-07, "loss": 0.0321, "step": 19350 }, { "epoch": 0.96875, "grad_norm": 1.24919593334198, "learning_rate": 1.4552631578947368e-07, "loss": 0.0442, "step": 19375 }, { "epoch": 0.97, "grad_norm": 1.437836766242981, "learning_rate": 1.3976973684210526e-07, "loss": 0.0418, "step": 19400 }, { "epoch": 0.97125, "grad_norm": 1.7176451683044434, "learning_rate": 1.3401315789473682e-07, "loss": 0.0423, "step": 19425 }, { "epoch": 0.9725, "grad_norm": 0.6188969016075134, "learning_rate": 1.282565789473684e-07, "loss": 0.0372, "step": 19450 }, { "epoch": 0.97375, "grad_norm": 0.7245228886604309, "learning_rate": 1.225e-07, "loss": 0.0447, "step": 19475 }, { "epoch": 0.975, "grad_norm": 1.1836830377578735, "learning_rate": 1.1674342105263156e-07, "loss": 0.0416, "step": 19500 }, { "epoch": 0.975, "eval_loss": 0.1550171822309494, "eval_runtime": 533.135, "eval_samples_per_second": 3.172, "eval_steps_per_second": 0.398, "eval_wer": 6.544273760459599, "step": 19500 }, { "epoch": 0.97625, "grad_norm": 2.7617335319519043, "learning_rate": 1.1098684210526315e-07, "loss": 0.0481, "step": 19525 }, { "epoch": 0.9775, "grad_norm": 1.8646786212921143, "learning_rate": 1.0523026315789472e-07, "loss": 0.0479, "step": 19550 }, { "epoch": 0.97875, "grad_norm": 3.3118820190429688, "learning_rate": 9.947368421052632e-08, "loss": 0.0622, "step": 19575 }, { "epoch": 0.98, "grad_norm": 1.3400448560714722, "learning_rate": 9.371710526315789e-08, "loss": 0.0727, "step": 19600 }, { "epoch": 0.98125, "grad_norm": 3.044895648956299, "learning_rate": 8.796052631578946e-08, "loss": 0.0613, "step": 19625 }, { "epoch": 0.9825, "grad_norm": 3.217283248901367, "learning_rate": 8.220394736842105e-08, "loss": 0.0814, "step": 19650 }, { "epoch": 0.98375, "grad_norm": 1.3824083805084229, "learning_rate": 7.644736842105262e-08, "loss": 0.0598, "step": 19675 }, { "epoch": 0.985, "grad_norm": 1.3852965831756592, "learning_rate": 7.069078947368419e-08, "loss": 0.0504, "step": 19700 }, { "epoch": 0.98625, "grad_norm": 2.5811800956726074, "learning_rate": 6.493421052631578e-08, "loss": 0.0643, "step": 19725 }, { "epoch": 0.9875, "grad_norm": 1.2770925760269165, "learning_rate": 5.9177631578947364e-08, "loss": 0.0586, "step": 19750 }, { "epoch": 0.98875, "grad_norm": 2.8050851821899414, "learning_rate": 5.342105263157894e-08, "loss": 0.0623, "step": 19775 }, { "epoch": 0.99, "grad_norm": 3.1270270347595215, "learning_rate": 4.766447368421052e-08, "loss": 0.1128, "step": 19800 }, { "epoch": 0.99125, "grad_norm": 2.787506103515625, "learning_rate": 4.1907894736842107e-08, "loss": 0.0945, "step": 19825 }, { "epoch": 0.9925, "grad_norm": 2.0053322315216064, "learning_rate": 3.615131578947368e-08, "loss": 0.0676, "step": 19850 }, { "epoch": 0.99375, "grad_norm": 2.319840669631958, "learning_rate": 3.0394736842105264e-08, "loss": 0.0434, "step": 19875 }, { "epoch": 0.995, "grad_norm": 1.3756728172302246, "learning_rate": 2.463815789473684e-08, "loss": 0.0458, "step": 19900 }, { "epoch": 0.99625, "grad_norm": 1.3499048948287964, "learning_rate": 1.8881578947368418e-08, "loss": 0.0426, "step": 19925 }, { "epoch": 0.9975, "grad_norm": 1.0059881210327148, "learning_rate": 1.3124999999999998e-08, "loss": 0.0543, "step": 19950 }, { "epoch": 0.99875, "grad_norm": 1.247534155845642, "learning_rate": 7.368421052631579e-09, "loss": 0.0526, "step": 19975 }, { "epoch": 1.0, "grad_norm": 1.3380235433578491, "learning_rate": 1.6118421052631579e-09, "loss": 0.0435, "step": 20000 }, { "epoch": 1.0, "eval_loss": 0.15491345524787903, "eval_runtime": 532.2586, "eval_samples_per_second": 3.177, "eval_steps_per_second": 0.398, "eval_wer": 6.544273760459599, "step": 20000 }, { "epoch": 1.0, "step": 20000, "total_flos": 1.0871994580992e+21, "train_loss": 0.0028733723163604737, "train_runtime": 7510.0544, "train_samples_per_second": 42.61, "train_steps_per_second": 2.663 } ], "logging_steps": 25, "max_steps": 20000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0871994580992e+21, "train_batch_size": 16, "trial_name": null, "trial_params": null }