|
{ |
|
"best_metric": 6.544273760459599, |
|
"best_model_checkpoint": "./checkpoint-19500", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 20000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00125, |
|
"grad_norm": 10.480854034423828, |
|
"learning_rate": 1.0499999999999999e-07, |
|
"loss": 1.4117, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0025, |
|
"grad_norm": 9.908479690551758, |
|
"learning_rate": 2.1437499999999999e-07, |
|
"loss": 1.4142, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00375, |
|
"grad_norm": 7.737735748291016, |
|
"learning_rate": 3.2374999999999997e-07, |
|
"loss": 1.2755, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.005, |
|
"grad_norm": 5.847314834594727, |
|
"learning_rate": 4.33125e-07, |
|
"loss": 0.8497, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00625, |
|
"grad_norm": 5.064878463745117, |
|
"learning_rate": 5.425e-07, |
|
"loss": 0.5289, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0075, |
|
"grad_norm": 4.789758205413818, |
|
"learning_rate": 6.518749999999999e-07, |
|
"loss": 0.5181, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.00875, |
|
"grad_norm": 4.744896411895752, |
|
"learning_rate": 7.612499999999999e-07, |
|
"loss": 0.3765, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.427361965179443, |
|
"learning_rate": 8.706249999999999e-07, |
|
"loss": 0.3585, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01125, |
|
"grad_norm": 4.670985698699951, |
|
"learning_rate": 9.8e-07, |
|
"loss": 0.3219, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.0125, |
|
"grad_norm": 3.8691747188568115, |
|
"learning_rate": 1.0893749999999998e-06, |
|
"loss": 0.3409, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01375, |
|
"grad_norm": 4.622318267822266, |
|
"learning_rate": 1.19875e-06, |
|
"loss": 0.2899, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.015, |
|
"grad_norm": 3.7900593280792236, |
|
"learning_rate": 1.3081249999999999e-06, |
|
"loss": 0.2834, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01625, |
|
"grad_norm": 3.9323770999908447, |
|
"learning_rate": 1.4175e-06, |
|
"loss": 0.2643, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.0175, |
|
"grad_norm": 3.708969831466675, |
|
"learning_rate": 1.5268749999999997e-06, |
|
"loss": 0.2827, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.01875, |
|
"grad_norm": 3.707580804824829, |
|
"learning_rate": 1.6362499999999998e-06, |
|
"loss": 0.274, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.8519480228424072, |
|
"learning_rate": 1.745625e-06, |
|
"loss": 0.2568, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02125, |
|
"grad_norm": 4.571149826049805, |
|
"learning_rate": 1.8549999999999998e-06, |
|
"loss": 0.2376, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.0225, |
|
"grad_norm": 3.2112503051757812, |
|
"learning_rate": 1.9643749999999997e-06, |
|
"loss": 0.2212, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.02375, |
|
"grad_norm": 6.173221588134766, |
|
"learning_rate": 2.07375e-06, |
|
"loss": 0.2238, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 4.781201362609863, |
|
"learning_rate": 2.183125e-06, |
|
"loss": 0.2854, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.025, |
|
"eval_loss": 0.41938766837120056, |
|
"eval_runtime": 531.5621, |
|
"eval_samples_per_second": 3.181, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 25.88984638441364, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02625, |
|
"grad_norm": 4.559605598449707, |
|
"learning_rate": 2.2925e-06, |
|
"loss": 0.2996, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.0275, |
|
"grad_norm": 5.1446852684021, |
|
"learning_rate": 2.401875e-06, |
|
"loss": 0.2791, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.02875, |
|
"grad_norm": 4.178796768188477, |
|
"learning_rate": 2.5112499999999995e-06, |
|
"loss": 0.3061, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.030816555023193, |
|
"learning_rate": 2.6206249999999996e-06, |
|
"loss": 0.2768, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 4.405904769897461, |
|
"learning_rate": 2.7299999999999997e-06, |
|
"loss": 0.2678, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.0325, |
|
"grad_norm": 3.5179622173309326, |
|
"learning_rate": 2.839375e-06, |
|
"loss": 0.2363, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.03375, |
|
"grad_norm": 3.767529010772705, |
|
"learning_rate": 2.94875e-06, |
|
"loss": 0.219, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.035, |
|
"grad_norm": 2.898439645767212, |
|
"learning_rate": 3.0581249999999996e-06, |
|
"loss": 0.1913, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.03625, |
|
"grad_norm": 3.3581595420837402, |
|
"learning_rate": 3.1674999999999997e-06, |
|
"loss": 0.173, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.0375, |
|
"grad_norm": 2.872340202331543, |
|
"learning_rate": 3.276875e-06, |
|
"loss": 0.176, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.03875, |
|
"grad_norm": 3.1199734210968018, |
|
"learning_rate": 3.38625e-06, |
|
"loss": 0.1808, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.626908779144287, |
|
"learning_rate": 3.495625e-06, |
|
"loss": 0.1802, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.04125, |
|
"grad_norm": 2.776141881942749, |
|
"learning_rate": 3.6049999999999994e-06, |
|
"loss": 0.1761, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.0425, |
|
"grad_norm": 3.149322509765625, |
|
"learning_rate": 3.7143749999999995e-06, |
|
"loss": 0.1569, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.04375, |
|
"grad_norm": 2.776301145553589, |
|
"learning_rate": 3.82375e-06, |
|
"loss": 0.1607, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.045, |
|
"grad_norm": 2.8884706497192383, |
|
"learning_rate": 3.933125e-06, |
|
"loss": 0.1582, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.04625, |
|
"grad_norm": 3.991647720336914, |
|
"learning_rate": 4.0425e-06, |
|
"loss": 0.1511, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.0475, |
|
"grad_norm": 2.892364740371704, |
|
"learning_rate": 4.151874999999999e-06, |
|
"loss": 0.1434, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.04875, |
|
"grad_norm": 3.7825698852539062, |
|
"learning_rate": 4.261249999999999e-06, |
|
"loss": 0.148, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.511293649673462, |
|
"learning_rate": 4.370624999999999e-06, |
|
"loss": 0.1425, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 0.39225178956985474, |
|
"eval_runtime": 528.4404, |
|
"eval_samples_per_second": 3.2, |
|
"eval_steps_per_second": 0.401, |
|
"eval_wer": 20.507056325715002, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05125, |
|
"grad_norm": 1.8921111822128296, |
|
"learning_rate": 4.369473684210526e-06, |
|
"loss": 0.1395, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.0525, |
|
"grad_norm": 3.18829607963562, |
|
"learning_rate": 4.363717105263158e-06, |
|
"loss": 0.1294, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.05375, |
|
"grad_norm": 2.508878231048584, |
|
"learning_rate": 4.357960526315789e-06, |
|
"loss": 0.1366, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.055, |
|
"grad_norm": 2.201958179473877, |
|
"learning_rate": 4.352203947368421e-06, |
|
"loss": 0.14, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.05625, |
|
"grad_norm": 2.756673574447632, |
|
"learning_rate": 4.346447368421052e-06, |
|
"loss": 0.1355, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.0575, |
|
"grad_norm": 3.084169864654541, |
|
"learning_rate": 4.340690789473684e-06, |
|
"loss": 0.1278, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.05875, |
|
"grad_norm": 2.486377239227295, |
|
"learning_rate": 4.334934210526315e-06, |
|
"loss": 0.1298, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.5559706687927246, |
|
"learning_rate": 4.329177631578947e-06, |
|
"loss": 0.1352, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.06125, |
|
"grad_norm": 2.6353018283843994, |
|
"learning_rate": 4.323421052631579e-06, |
|
"loss": 0.1163, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 2.8629567623138428, |
|
"learning_rate": 4.31766447368421e-06, |
|
"loss": 0.1199, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.06375, |
|
"grad_norm": 2.9020206928253174, |
|
"learning_rate": 4.311907894736842e-06, |
|
"loss": 0.1206, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.065, |
|
"grad_norm": 2.4626991748809814, |
|
"learning_rate": 4.306151315789473e-06, |
|
"loss": 0.1395, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.06625, |
|
"grad_norm": 2.9234840869903564, |
|
"learning_rate": 4.300394736842105e-06, |
|
"loss": 0.1269, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.0675, |
|
"grad_norm": 3.017625570297241, |
|
"learning_rate": 4.294638157894737e-06, |
|
"loss": 0.1228, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.06875, |
|
"grad_norm": 2.5392937660217285, |
|
"learning_rate": 4.288881578947368e-06, |
|
"loss": 0.1273, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.2523694038391113, |
|
"learning_rate": 4.283125e-06, |
|
"loss": 0.1242, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.07125, |
|
"grad_norm": 2.633652448654175, |
|
"learning_rate": 4.277368421052632e-06, |
|
"loss": 0.1341, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.0725, |
|
"grad_norm": 3.952681064605713, |
|
"learning_rate": 4.271611842105263e-06, |
|
"loss": 0.1588, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.07375, |
|
"grad_norm": 3.9815685749053955, |
|
"learning_rate": 4.265855263157895e-06, |
|
"loss": 0.1879, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 3.197030544281006, |
|
"learning_rate": 4.260098684210526e-06, |
|
"loss": 0.2199, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"eval_loss": 0.3290639817714691, |
|
"eval_runtime": 535.231, |
|
"eval_samples_per_second": 3.159, |
|
"eval_steps_per_second": 0.396, |
|
"eval_wer": 17.478456350693143, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.07625, |
|
"grad_norm": 3.8294057846069336, |
|
"learning_rate": 4.254342105263158e-06, |
|
"loss": 0.205, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.0775, |
|
"grad_norm": 3.339564085006714, |
|
"learning_rate": 4.248585526315789e-06, |
|
"loss": 0.1793, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.07875, |
|
"grad_norm": 4.41719913482666, |
|
"learning_rate": 4.242828947368421e-06, |
|
"loss": 0.1903, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.329945087432861, |
|
"learning_rate": 4.237072368421052e-06, |
|
"loss": 0.2487, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.08125, |
|
"grad_norm": 2.858635425567627, |
|
"learning_rate": 4.231315789473684e-06, |
|
"loss": 0.16, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.0825, |
|
"grad_norm": 2.6474554538726807, |
|
"learning_rate": 4.225559210526316e-06, |
|
"loss": 0.1294, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.08375, |
|
"grad_norm": 2.6311450004577637, |
|
"learning_rate": 4.219802631578947e-06, |
|
"loss": 0.1199, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.085, |
|
"grad_norm": 2.472925901412964, |
|
"learning_rate": 4.214046052631579e-06, |
|
"loss": 0.1106, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.08625, |
|
"grad_norm": 2.1684815883636475, |
|
"learning_rate": 4.20828947368421e-06, |
|
"loss": 0.1081, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.0875, |
|
"grad_norm": 2.2405142784118652, |
|
"learning_rate": 4.202532894736842e-06, |
|
"loss": 0.1024, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.08875, |
|
"grad_norm": 3.28480863571167, |
|
"learning_rate": 4.196776315789474e-06, |
|
"loss": 0.1395, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.734311819076538, |
|
"learning_rate": 4.191019736842105e-06, |
|
"loss": 0.1685, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.09125, |
|
"grad_norm": 3.3384852409362793, |
|
"learning_rate": 4.185263157894737e-06, |
|
"loss": 0.179, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.0925, |
|
"grad_norm": 4.151054859161377, |
|
"learning_rate": 4.1795065789473686e-06, |
|
"loss": 0.1863, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 3.854214668273926, |
|
"learning_rate": 4.17375e-06, |
|
"loss": 0.1753, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.095, |
|
"grad_norm": 3.3321709632873535, |
|
"learning_rate": 4.1679934210526316e-06, |
|
"loss": 0.1684, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.09625, |
|
"grad_norm": 2.8302998542785645, |
|
"learning_rate": 4.162236842105263e-06, |
|
"loss": 0.1818, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.0975, |
|
"grad_norm": 5.4296555519104, |
|
"learning_rate": 4.156480263157895e-06, |
|
"loss": 0.5523, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.09875, |
|
"grad_norm": 3.8675997257232666, |
|
"learning_rate": 4.1507236842105265e-06, |
|
"loss": 0.3352, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.9055581092834473, |
|
"learning_rate": 4.144967105263158e-06, |
|
"loss": 0.2343, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 0.2860749065876007, |
|
"eval_runtime": 530.1633, |
|
"eval_samples_per_second": 3.19, |
|
"eval_steps_per_second": 0.4, |
|
"eval_wer": 14.13138503809167, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.10125, |
|
"grad_norm": 3.413243293762207, |
|
"learning_rate": 4.1392105263157895e-06, |
|
"loss": 0.2233, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.1025, |
|
"grad_norm": 3.1129419803619385, |
|
"learning_rate": 4.133453947368421e-06, |
|
"loss": 0.2076, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.10375, |
|
"grad_norm": 3.0855767726898193, |
|
"learning_rate": 4.1276973684210525e-06, |
|
"loss": 0.1675, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.105, |
|
"grad_norm": 2.5053539276123047, |
|
"learning_rate": 4.121940789473684e-06, |
|
"loss": 0.1291, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.10625, |
|
"grad_norm": 2.078958511352539, |
|
"learning_rate": 4.1161842105263155e-06, |
|
"loss": 0.1036, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.1075, |
|
"grad_norm": 2.436898708343506, |
|
"learning_rate": 4.110427631578947e-06, |
|
"loss": 0.1153, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.10875, |
|
"grad_norm": 2.3834900856018066, |
|
"learning_rate": 4.1046710526315786e-06, |
|
"loss": 0.0943, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.070406913757324, |
|
"learning_rate": 4.09891447368421e-06, |
|
"loss": 0.0898, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.11125, |
|
"grad_norm": 2.0004026889801025, |
|
"learning_rate": 4.0931578947368416e-06, |
|
"loss": 0.0912, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.1125, |
|
"grad_norm": 2.4464359283447266, |
|
"learning_rate": 4.0874013157894735e-06, |
|
"loss": 0.0907, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.11375, |
|
"grad_norm": 2.8847742080688477, |
|
"learning_rate": 4.081644736842105e-06, |
|
"loss": 0.0978, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.115, |
|
"grad_norm": 2.167893171310425, |
|
"learning_rate": 4.0758881578947365e-06, |
|
"loss": 0.0968, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.11625, |
|
"grad_norm": 1.575804352760315, |
|
"learning_rate": 4.0701315789473684e-06, |
|
"loss": 0.0976, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.1175, |
|
"grad_norm": 2.3064370155334473, |
|
"learning_rate": 4.0643749999999995e-06, |
|
"loss": 0.0966, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.11875, |
|
"grad_norm": 1.9859708547592163, |
|
"learning_rate": 4.0586184210526314e-06, |
|
"loss": 0.0958, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.7534265518188477, |
|
"learning_rate": 4.052861842105263e-06, |
|
"loss": 0.0966, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.12125, |
|
"grad_norm": 3.8807549476623535, |
|
"learning_rate": 4.0471052631578945e-06, |
|
"loss": 0.1367, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.1225, |
|
"grad_norm": 3.933382987976074, |
|
"learning_rate": 4.041348684210526e-06, |
|
"loss": 0.1445, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.12375, |
|
"grad_norm": 3.3107643127441406, |
|
"learning_rate": 4.0355921052631575e-06, |
|
"loss": 0.1486, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 2.699190139770508, |
|
"learning_rate": 4.029835526315789e-06, |
|
"loss": 0.1391, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"eval_loss": 0.2906411290168762, |
|
"eval_runtime": 531.8376, |
|
"eval_samples_per_second": 3.18, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 13.31335081803422, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.12625, |
|
"grad_norm": 3.5841128826141357, |
|
"learning_rate": 4.0240789473684205e-06, |
|
"loss": 0.1628, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.1275, |
|
"grad_norm": 3.2463104724884033, |
|
"learning_rate": 4.018322368421052e-06, |
|
"loss": 0.1705, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.12875, |
|
"grad_norm": 5.094871520996094, |
|
"learning_rate": 4.0125657894736835e-06, |
|
"loss": 0.1663, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.0208804607391357, |
|
"learning_rate": 4.0068092105263154e-06, |
|
"loss": 0.1712, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.13125, |
|
"grad_norm": 3.4919967651367188, |
|
"learning_rate": 4.0010526315789465e-06, |
|
"loss": 0.1498, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.1325, |
|
"grad_norm": 3.4352219104766846, |
|
"learning_rate": 3.9952960526315784e-06, |
|
"loss": 0.1423, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.13375, |
|
"grad_norm": 3.63608455657959, |
|
"learning_rate": 3.98953947368421e-06, |
|
"loss": 0.1565, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.135, |
|
"grad_norm": 3.6155622005462646, |
|
"learning_rate": 3.9837828947368414e-06, |
|
"loss": 0.1553, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.13625, |
|
"grad_norm": 3.4833076000213623, |
|
"learning_rate": 3.978026315789473e-06, |
|
"loss": 0.1416, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.1375, |
|
"grad_norm": 3.147080421447754, |
|
"learning_rate": 3.9722697368421045e-06, |
|
"loss": 0.1469, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.13875, |
|
"grad_norm": 2.891146659851074, |
|
"learning_rate": 3.966513157894736e-06, |
|
"loss": 0.128, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.1411890983581543, |
|
"learning_rate": 3.960756578947368e-06, |
|
"loss": 0.1372, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.14125, |
|
"grad_norm": 3.836360216140747, |
|
"learning_rate": 3.954999999999999e-06, |
|
"loss": 0.149, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.1425, |
|
"grad_norm": 4.1377339363098145, |
|
"learning_rate": 3.949243421052631e-06, |
|
"loss": 0.1383, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.14375, |
|
"grad_norm": 3.7640268802642822, |
|
"learning_rate": 3.943486842105263e-06, |
|
"loss": 0.1492, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.145, |
|
"grad_norm": 3.452561855316162, |
|
"learning_rate": 3.937730263157894e-06, |
|
"loss": 0.1288, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.14625, |
|
"grad_norm": 1.9712022542953491, |
|
"learning_rate": 3.931973684210526e-06, |
|
"loss": 0.1137, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.1475, |
|
"grad_norm": 2.0524768829345703, |
|
"learning_rate": 3.926217105263157e-06, |
|
"loss": 0.0973, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.14875, |
|
"grad_norm": 2.3722898960113525, |
|
"learning_rate": 3.920460526315789e-06, |
|
"loss": 0.0915, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.5048534870147705, |
|
"learning_rate": 3.914703947368421e-06, |
|
"loss": 0.0853, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 0.26879894733428955, |
|
"eval_runtime": 531.192, |
|
"eval_samples_per_second": 3.183, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 12.045710003746722, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.15125, |
|
"grad_norm": 2.009464740753174, |
|
"learning_rate": 3.908947368421052e-06, |
|
"loss": 0.1037, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.1525, |
|
"grad_norm": 2.3635034561157227, |
|
"learning_rate": 3.903190789473684e-06, |
|
"loss": 0.0889, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.15375, |
|
"grad_norm": 3.131683111190796, |
|
"learning_rate": 3.897434210526315e-06, |
|
"loss": 0.0895, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.155, |
|
"grad_norm": 2.2032673358917236, |
|
"learning_rate": 3.891677631578947e-06, |
|
"loss": 0.0955, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 1.8079180717468262, |
|
"learning_rate": 3.885921052631578e-06, |
|
"loss": 0.0785, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.1575, |
|
"grad_norm": 2.2879910469055176, |
|
"learning_rate": 3.88016447368421e-06, |
|
"loss": 0.0721, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.15875, |
|
"grad_norm": 2.491487979888916, |
|
"learning_rate": 3.874407894736841e-06, |
|
"loss": 0.0817, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.996129035949707, |
|
"learning_rate": 3.868651315789473e-06, |
|
"loss": 0.0877, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.16125, |
|
"grad_norm": 2.9992258548736572, |
|
"learning_rate": 3.862894736842104e-06, |
|
"loss": 0.1011, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.1625, |
|
"grad_norm": 2.5464529991149902, |
|
"learning_rate": 3.857138157894736e-06, |
|
"loss": 0.1385, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.16375, |
|
"grad_norm": 2.853933095932007, |
|
"learning_rate": 3.851381578947368e-06, |
|
"loss": 0.1223, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.165, |
|
"grad_norm": 2.3290011882781982, |
|
"learning_rate": 3.845624999999999e-06, |
|
"loss": 0.1236, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.16625, |
|
"grad_norm": 2.61714768409729, |
|
"learning_rate": 3.839868421052631e-06, |
|
"loss": 0.1167, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.1675, |
|
"grad_norm": 3.926612377166748, |
|
"learning_rate": 3.834111842105263e-06, |
|
"loss": 0.1306, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.16875, |
|
"grad_norm": 2.9979617595672607, |
|
"learning_rate": 3.828355263157894e-06, |
|
"loss": 0.1383, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.879436492919922, |
|
"learning_rate": 3.822598684210526e-06, |
|
"loss": 0.1112, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.17125, |
|
"grad_norm": 2.706355571746826, |
|
"learning_rate": 3.816842105263158e-06, |
|
"loss": 0.1006, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.1725, |
|
"grad_norm": 2.263953685760498, |
|
"learning_rate": 3.811085526315789e-06, |
|
"loss": 0.089, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.17375, |
|
"grad_norm": 3.070748805999756, |
|
"learning_rate": 3.8053289473684207e-06, |
|
"loss": 0.0801, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"grad_norm": 2.202629566192627, |
|
"learning_rate": 3.799572368421052e-06, |
|
"loss": 0.0866, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"eval_loss": 0.2575243413448334, |
|
"eval_runtime": 531.9323, |
|
"eval_samples_per_second": 3.179, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 11.471212688897214, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.17625, |
|
"grad_norm": 2.1003735065460205, |
|
"learning_rate": 3.793815789473684e-06, |
|
"loss": 0.0794, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.1775, |
|
"grad_norm": 2.4936602115631104, |
|
"learning_rate": 3.788059210526315e-06, |
|
"loss": 0.0754, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.17875, |
|
"grad_norm": 2.2320945262908936, |
|
"learning_rate": 3.782302631578947e-06, |
|
"loss": 0.0906, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.4985826015472412, |
|
"learning_rate": 3.7765460526315786e-06, |
|
"loss": 0.0872, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.18125, |
|
"grad_norm": 1.6096969842910767, |
|
"learning_rate": 3.77078947368421e-06, |
|
"loss": 0.0783, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.1825, |
|
"grad_norm": 1.8306738138198853, |
|
"learning_rate": 3.7650328947368416e-06, |
|
"loss": 0.0783, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.18375, |
|
"grad_norm": 1.972235918045044, |
|
"learning_rate": 3.759276315789473e-06, |
|
"loss": 0.0789, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.185, |
|
"grad_norm": 2.0266051292419434, |
|
"learning_rate": 3.753519736842105e-06, |
|
"loss": 0.0708, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.18625, |
|
"grad_norm": 1.6395690441131592, |
|
"learning_rate": 3.7477631578947366e-06, |
|
"loss": 0.0768, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 1.8886572122573853, |
|
"learning_rate": 3.742006578947368e-06, |
|
"loss": 0.0745, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.18875, |
|
"grad_norm": 3.0105178356170654, |
|
"learning_rate": 3.7362499999999996e-06, |
|
"loss": 0.0816, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.8376508951187134, |
|
"learning_rate": 3.7304934210526315e-06, |
|
"loss": 0.082, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.19125, |
|
"grad_norm": 1.758370041847229, |
|
"learning_rate": 3.7247368421052626e-06, |
|
"loss": 0.0798, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.1925, |
|
"grad_norm": 1.2405736446380615, |
|
"learning_rate": 3.7189802631578945e-06, |
|
"loss": 0.0773, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.19375, |
|
"grad_norm": 1.8085663318634033, |
|
"learning_rate": 3.713223684210526e-06, |
|
"loss": 0.0861, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.195, |
|
"grad_norm": 3.838613986968994, |
|
"learning_rate": 3.7074671052631575e-06, |
|
"loss": 0.1032, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.19625, |
|
"grad_norm": 3.087472438812256, |
|
"learning_rate": 3.701710526315789e-06, |
|
"loss": 0.133, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.1975, |
|
"grad_norm": 2.3854024410247803, |
|
"learning_rate": 3.6959539473684206e-06, |
|
"loss": 0.1366, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.19875, |
|
"grad_norm": 3.235400676727295, |
|
"learning_rate": 3.690197368421052e-06, |
|
"loss": 0.1275, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.7316720485687256, |
|
"learning_rate": 3.684440789473684e-06, |
|
"loss": 0.1311, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.24720044434070587, |
|
"eval_runtime": 536.8744, |
|
"eval_samples_per_second": 3.15, |
|
"eval_steps_per_second": 0.395, |
|
"eval_wer": 12.482827525914825, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.20125, |
|
"grad_norm": 2.4120874404907227, |
|
"learning_rate": 3.6786842105263155e-06, |
|
"loss": 0.1215, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.2025, |
|
"grad_norm": 2.5485270023345947, |
|
"learning_rate": 3.672927631578947e-06, |
|
"loss": 0.0983, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.20375, |
|
"grad_norm": 2.2741594314575195, |
|
"learning_rate": 3.667171052631579e-06, |
|
"loss": 0.0764, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.205, |
|
"grad_norm": 1.875857949256897, |
|
"learning_rate": 3.66141447368421e-06, |
|
"loss": 0.0733, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.20625, |
|
"grad_norm": 1.8897082805633545, |
|
"learning_rate": 3.655657894736842e-06, |
|
"loss": 0.0797, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.2075, |
|
"grad_norm": 1.5462270975112915, |
|
"learning_rate": 3.6499013157894735e-06, |
|
"loss": 0.0772, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.20875, |
|
"grad_norm": 2.1055002212524414, |
|
"learning_rate": 3.644144736842105e-06, |
|
"loss": 0.079, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.8036248683929443, |
|
"learning_rate": 3.6383881578947365e-06, |
|
"loss": 0.0828, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.21125, |
|
"grad_norm": 1.496777892112732, |
|
"learning_rate": 3.6326315789473684e-06, |
|
"loss": 0.0658, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.2125, |
|
"grad_norm": 2.213822364807129, |
|
"learning_rate": 3.6268749999999995e-06, |
|
"loss": 0.0722, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.21375, |
|
"grad_norm": 1.4431771039962769, |
|
"learning_rate": 3.6211184210526314e-06, |
|
"loss": 0.061, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.215, |
|
"grad_norm": 1.6346482038497925, |
|
"learning_rate": 3.6153618421052625e-06, |
|
"loss": 0.0641, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.21625, |
|
"grad_norm": 1.5905380249023438, |
|
"learning_rate": 3.6096052631578944e-06, |
|
"loss": 0.0633, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.2175, |
|
"grad_norm": 2.4848458766937256, |
|
"learning_rate": 3.6038486842105263e-06, |
|
"loss": 0.0738, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"grad_norm": 2.568466901779175, |
|
"learning_rate": 3.5980921052631574e-06, |
|
"loss": 0.1123, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.5104339122772217, |
|
"learning_rate": 3.5923355263157894e-06, |
|
"loss": 0.1179, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.22125, |
|
"grad_norm": 3.769829273223877, |
|
"learning_rate": 3.586578947368421e-06, |
|
"loss": 0.1221, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.2225, |
|
"grad_norm": 2.850048542022705, |
|
"learning_rate": 3.5808223684210524e-06, |
|
"loss": 0.1115, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.22375, |
|
"grad_norm": 2.0328500270843506, |
|
"learning_rate": 3.575065789473684e-06, |
|
"loss": 0.1274, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"grad_norm": 2.765300750732422, |
|
"learning_rate": 3.569309210526316e-06, |
|
"loss": 0.1338, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"eval_loss": 0.24367325007915497, |
|
"eval_runtime": 531.4688, |
|
"eval_samples_per_second": 3.182, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 10.990383414512301, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.22625, |
|
"grad_norm": 1.4456897974014282, |
|
"learning_rate": 3.563552631578947e-06, |
|
"loss": 0.0921, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.2275, |
|
"grad_norm": 2.357384443283081, |
|
"learning_rate": 3.557796052631579e-06, |
|
"loss": 0.0728, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.22875, |
|
"grad_norm": 2.2841663360595703, |
|
"learning_rate": 3.55203947368421e-06, |
|
"loss": 0.0703, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.8975858688354492, |
|
"learning_rate": 3.546282894736842e-06, |
|
"loss": 0.0595, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.23125, |
|
"grad_norm": 1.6614043712615967, |
|
"learning_rate": 3.5405263157894733e-06, |
|
"loss": 0.0684, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.2325, |
|
"grad_norm": 3.0987887382507324, |
|
"learning_rate": 3.534769736842105e-06, |
|
"loss": 0.0643, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.23375, |
|
"grad_norm": 1.869446873664856, |
|
"learning_rate": 3.5290131578947363e-06, |
|
"loss": 0.0612, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.235, |
|
"grad_norm": 1.6360236406326294, |
|
"learning_rate": 3.5232565789473683e-06, |
|
"loss": 0.0627, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.23625, |
|
"grad_norm": 2.188901424407959, |
|
"learning_rate": 3.5174999999999998e-06, |
|
"loss": 0.068, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.2375, |
|
"grad_norm": 1.5851141214370728, |
|
"learning_rate": 3.5117434210526313e-06, |
|
"loss": 0.0702, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.23875, |
|
"grad_norm": 1.9303579330444336, |
|
"learning_rate": 3.5059868421052632e-06, |
|
"loss": 0.0683, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.8640798330307007, |
|
"learning_rate": 3.5002302631578943e-06, |
|
"loss": 0.0637, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.24125, |
|
"grad_norm": 2.395669937133789, |
|
"learning_rate": 3.4944736842105262e-06, |
|
"loss": 0.0626, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.2425, |
|
"grad_norm": 1.5368024110794067, |
|
"learning_rate": 3.4887171052631573e-06, |
|
"loss": 0.0694, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.24375, |
|
"grad_norm": 2.1346402168273926, |
|
"learning_rate": 3.4829605263157892e-06, |
|
"loss": 0.0734, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.245, |
|
"grad_norm": 2.0883893966674805, |
|
"learning_rate": 3.4772039473684207e-06, |
|
"loss": 0.0659, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.24625, |
|
"grad_norm": 1.6861238479614258, |
|
"learning_rate": 3.4714473684210523e-06, |
|
"loss": 0.0656, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.2475, |
|
"grad_norm": 1.5790470838546753, |
|
"learning_rate": 3.4656907894736838e-06, |
|
"loss": 0.0801, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.24875, |
|
"grad_norm": 1.3223644495010376, |
|
"learning_rate": 3.4599342105263157e-06, |
|
"loss": 0.0806, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.6931387186050415, |
|
"learning_rate": 3.4541776315789468e-06, |
|
"loss": 0.0748, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 0.2556721270084381, |
|
"eval_runtime": 534.5469, |
|
"eval_samples_per_second": 3.163, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 10.709379293118522, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.25125, |
|
"grad_norm": 1.5327143669128418, |
|
"learning_rate": 3.4484210526315787e-06, |
|
"loss": 0.0968, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.2525, |
|
"grad_norm": 1.988226294517517, |
|
"learning_rate": 3.4426644736842106e-06, |
|
"loss": 0.0921, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.25375, |
|
"grad_norm": 2.179086446762085, |
|
"learning_rate": 3.4369078947368417e-06, |
|
"loss": 0.093, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.255, |
|
"grad_norm": 2.4304797649383545, |
|
"learning_rate": 3.4311513157894736e-06, |
|
"loss": 0.0909, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.25625, |
|
"grad_norm": 2.498908281326294, |
|
"learning_rate": 3.4253947368421047e-06, |
|
"loss": 0.1225, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.2575, |
|
"grad_norm": 2.018110752105713, |
|
"learning_rate": 3.4196381578947367e-06, |
|
"loss": 0.1199, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.25875, |
|
"grad_norm": 1.8156744241714478, |
|
"learning_rate": 3.413881578947368e-06, |
|
"loss": 0.1032, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.395634651184082, |
|
"learning_rate": 3.4081249999999997e-06, |
|
"loss": 0.0842, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.26125, |
|
"grad_norm": 1.8604170083999634, |
|
"learning_rate": 3.402368421052631e-06, |
|
"loss": 0.0753, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 0.2625, |
|
"grad_norm": 2.186006784439087, |
|
"learning_rate": 3.396611842105263e-06, |
|
"loss": 0.0693, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.26375, |
|
"grad_norm": 2.117950201034546, |
|
"learning_rate": 3.390855263157894e-06, |
|
"loss": 0.0731, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 0.265, |
|
"grad_norm": 1.442688226699829, |
|
"learning_rate": 3.385098684210526e-06, |
|
"loss": 0.0607, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.26625, |
|
"grad_norm": 2.0623013973236084, |
|
"learning_rate": 3.379342105263157e-06, |
|
"loss": 0.0598, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 0.2675, |
|
"grad_norm": 1.6096211671829224, |
|
"learning_rate": 3.373585526315789e-06, |
|
"loss": 0.0687, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.26875, |
|
"grad_norm": 1.2381603717803955, |
|
"learning_rate": 3.367828947368421e-06, |
|
"loss": 0.0646, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.6694140434265137, |
|
"learning_rate": 3.362072368421052e-06, |
|
"loss": 0.0595, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.27125, |
|
"grad_norm": 2.486950159072876, |
|
"learning_rate": 3.356315789473684e-06, |
|
"loss": 0.074, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.2725, |
|
"grad_norm": 1.2931033372879028, |
|
"learning_rate": 3.3505592105263156e-06, |
|
"loss": 0.08, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.27375, |
|
"grad_norm": 2.314680337905884, |
|
"learning_rate": 3.344802631578947e-06, |
|
"loss": 0.0662, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"grad_norm": 2.413079261779785, |
|
"learning_rate": 3.3390460526315786e-06, |
|
"loss": 0.0821, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"eval_loss": 0.2597045302391052, |
|
"eval_runtime": 532.1724, |
|
"eval_samples_per_second": 3.178, |
|
"eval_steps_per_second": 0.398, |
|
"eval_wer": 10.247283626826526, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.27625, |
|
"grad_norm": 2.8475470542907715, |
|
"learning_rate": 3.3332894736842105e-06, |
|
"loss": 0.1317, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 0.2775, |
|
"grad_norm": 2.919682025909424, |
|
"learning_rate": 3.3275328947368416e-06, |
|
"loss": 0.1323, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.27875, |
|
"grad_norm": 3.0585904121398926, |
|
"learning_rate": 3.3217763157894735e-06, |
|
"loss": 0.1332, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.4418559074401855, |
|
"learning_rate": 3.3160197368421046e-06, |
|
"loss": 0.1126, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"grad_norm": 2.9454727172851562, |
|
"learning_rate": 3.3102631578947365e-06, |
|
"loss": 0.0991, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.2825, |
|
"grad_norm": 2.472628593444824, |
|
"learning_rate": 3.304506578947368e-06, |
|
"loss": 0.1106, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.28375, |
|
"grad_norm": 2.1178548336029053, |
|
"learning_rate": 3.2987499999999995e-06, |
|
"loss": 0.1027, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 0.285, |
|
"grad_norm": 2.5170726776123047, |
|
"learning_rate": 3.2929934210526315e-06, |
|
"loss": 0.1027, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.28625, |
|
"grad_norm": 2.9180397987365723, |
|
"learning_rate": 3.287236842105263e-06, |
|
"loss": 0.1045, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 0.2875, |
|
"grad_norm": 2.6896932125091553, |
|
"learning_rate": 3.2814802631578945e-06, |
|
"loss": 0.1069, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.28875, |
|
"grad_norm": 3.1297285556793213, |
|
"learning_rate": 3.275723684210526e-06, |
|
"loss": 0.1003, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.4746246337890625, |
|
"learning_rate": 3.269967105263158e-06, |
|
"loss": 0.1084, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.29125, |
|
"grad_norm": 1.7318406105041504, |
|
"learning_rate": 3.264210526315789e-06, |
|
"loss": 0.0846, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 0.2925, |
|
"grad_norm": 2.190168857574463, |
|
"learning_rate": 3.258453947368421e-06, |
|
"loss": 0.082, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.29375, |
|
"grad_norm": 1.5366681814193726, |
|
"learning_rate": 3.252697368421052e-06, |
|
"loss": 0.0656, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 0.295, |
|
"grad_norm": 1.8261510133743286, |
|
"learning_rate": 3.246940789473684e-06, |
|
"loss": 0.0646, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.29625, |
|
"grad_norm": 1.9088908433914185, |
|
"learning_rate": 3.2411842105263155e-06, |
|
"loss": 0.0662, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 0.2975, |
|
"grad_norm": 1.3404430150985718, |
|
"learning_rate": 3.235427631578947e-06, |
|
"loss": 0.0712, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.29875, |
|
"grad_norm": 1.7546651363372803, |
|
"learning_rate": 3.2296710526315785e-06, |
|
"loss": 0.084, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.7727612257003784, |
|
"learning_rate": 3.2239144736842104e-06, |
|
"loss": 0.0988, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 0.2406572848558426, |
|
"eval_runtime": 535.6321, |
|
"eval_samples_per_second": 3.157, |
|
"eval_steps_per_second": 0.396, |
|
"eval_wer": 9.447983014861997, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.30125, |
|
"grad_norm": 2.477670907974243, |
|
"learning_rate": 3.2181578947368415e-06, |
|
"loss": 0.1013, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 0.3025, |
|
"grad_norm": 4.175459384918213, |
|
"learning_rate": 3.2124013157894734e-06, |
|
"loss": 0.1199, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.30375, |
|
"grad_norm": 2.4588561058044434, |
|
"learning_rate": 3.2066447368421053e-06, |
|
"loss": 0.1203, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 0.305, |
|
"grad_norm": 3.759526491165161, |
|
"learning_rate": 3.2008881578947364e-06, |
|
"loss": 0.1261, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.30625, |
|
"grad_norm": 3.186166524887085, |
|
"learning_rate": 3.1951315789473683e-06, |
|
"loss": 0.0946, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 0.3075, |
|
"grad_norm": 1.874886155128479, |
|
"learning_rate": 3.1893749999999994e-06, |
|
"loss": 0.0707, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.30875, |
|
"grad_norm": 1.673767328262329, |
|
"learning_rate": 3.1836184210526314e-06, |
|
"loss": 0.0605, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.6728780269622803, |
|
"learning_rate": 3.177861842105263e-06, |
|
"loss": 0.064, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.31125, |
|
"grad_norm": 1.245354175567627, |
|
"learning_rate": 3.1721052631578944e-06, |
|
"loss": 0.0603, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 1.3173916339874268, |
|
"learning_rate": 3.166348684210526e-06, |
|
"loss": 0.067, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.31375, |
|
"grad_norm": 1.9218686819076538, |
|
"learning_rate": 3.160592105263158e-06, |
|
"loss": 0.0723, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 0.315, |
|
"grad_norm": 1.822493314743042, |
|
"learning_rate": 3.154835526315789e-06, |
|
"loss": 0.0772, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.31625, |
|
"grad_norm": 2.4955074787139893, |
|
"learning_rate": 3.149078947368421e-06, |
|
"loss": 0.1124, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 0.3175, |
|
"grad_norm": 2.448274612426758, |
|
"learning_rate": 3.1433223684210523e-06, |
|
"loss": 0.1144, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.31875, |
|
"grad_norm": 2.732297658920288, |
|
"learning_rate": 3.137565789473684e-06, |
|
"loss": 0.0983, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.261770248413086, |
|
"learning_rate": 3.1318092105263158e-06, |
|
"loss": 0.11, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.32125, |
|
"grad_norm": 2.367335319519043, |
|
"learning_rate": 3.1260526315789473e-06, |
|
"loss": 0.1129, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 0.3225, |
|
"grad_norm": 2.4930291175842285, |
|
"learning_rate": 3.1202960526315788e-06, |
|
"loss": 0.1106, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.32375, |
|
"grad_norm": 1.8275959491729736, |
|
"learning_rate": 3.1145394736842103e-06, |
|
"loss": 0.0814, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"grad_norm": 3.6453261375427246, |
|
"learning_rate": 3.1087828947368418e-06, |
|
"loss": 0.0824, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"eval_loss": 0.24250419437885284, |
|
"eval_runtime": 531.4087, |
|
"eval_samples_per_second": 3.182, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 9.223179717746971, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.32625, |
|
"grad_norm": 2.3996527194976807, |
|
"learning_rate": 3.1030263157894733e-06, |
|
"loss": 0.0913, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 0.3275, |
|
"grad_norm": 3.106403350830078, |
|
"learning_rate": 3.0972697368421052e-06, |
|
"loss": 0.0969, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.32875, |
|
"grad_norm": 3.741685628890991, |
|
"learning_rate": 3.0915131578947363e-06, |
|
"loss": 0.1091, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.6008243560791016, |
|
"learning_rate": 3.0859868421052626e-06, |
|
"loss": 0.0984, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.33125, |
|
"grad_norm": 2.268734931945801, |
|
"learning_rate": 3.0802302631578945e-06, |
|
"loss": 0.0968, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 0.3325, |
|
"grad_norm": 2.442617654800415, |
|
"learning_rate": 3.074473684210526e-06, |
|
"loss": 0.0716, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.33375, |
|
"grad_norm": 1.9763257503509521, |
|
"learning_rate": 3.0687171052631575e-06, |
|
"loss": 0.0674, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 0.335, |
|
"grad_norm": 1.828474998474121, |
|
"learning_rate": 3.0629605263157894e-06, |
|
"loss": 0.0654, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.33625, |
|
"grad_norm": 1.5649821758270264, |
|
"learning_rate": 3.0572039473684205e-06, |
|
"loss": 0.057, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 0.3375, |
|
"grad_norm": 1.911927580833435, |
|
"learning_rate": 3.0514473684210525e-06, |
|
"loss": 0.0532, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.33875, |
|
"grad_norm": 1.3287229537963867, |
|
"learning_rate": 3.045690789473684e-06, |
|
"loss": 0.0623, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.7754572629928589, |
|
"learning_rate": 3.0399342105263155e-06, |
|
"loss": 0.0635, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.34125, |
|
"grad_norm": 1.9900065660476685, |
|
"learning_rate": 3.034177631578947e-06, |
|
"loss": 0.0678, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 0.3425, |
|
"grad_norm": 1.714850664138794, |
|
"learning_rate": 3.028421052631579e-06, |
|
"loss": 0.0654, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"grad_norm": 1.6401875019073486, |
|
"learning_rate": 3.02266447368421e-06, |
|
"loss": 0.0662, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 0.345, |
|
"grad_norm": 1.0171102285385132, |
|
"learning_rate": 3.016907894736842e-06, |
|
"loss": 0.0573, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.34625, |
|
"grad_norm": 1.4662336111068726, |
|
"learning_rate": 3.0111513157894734e-06, |
|
"loss": 0.0556, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 0.3475, |
|
"grad_norm": 1.7531720399856567, |
|
"learning_rate": 3.005394736842105e-06, |
|
"loss": 0.0501, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.34875, |
|
"grad_norm": 2.6019067764282227, |
|
"learning_rate": 2.9996381578947364e-06, |
|
"loss": 0.0629, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.0052170753479004, |
|
"learning_rate": 2.9938815789473684e-06, |
|
"loss": 0.0678, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 0.23009631037712097, |
|
"eval_runtime": 530.6679, |
|
"eval_samples_per_second": 3.187, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 9.13575621331335, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.35125, |
|
"grad_norm": 2.00034761428833, |
|
"learning_rate": 2.988125e-06, |
|
"loss": 0.0582, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 0.3525, |
|
"grad_norm": 1.7806837558746338, |
|
"learning_rate": 2.9823684210526314e-06, |
|
"loss": 0.058, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.35375, |
|
"grad_norm": 1.4306073188781738, |
|
"learning_rate": 2.976611842105263e-06, |
|
"loss": 0.0555, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 0.355, |
|
"grad_norm": 1.8648333549499512, |
|
"learning_rate": 2.9708552631578944e-06, |
|
"loss": 0.0662, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.35625, |
|
"grad_norm": 2.046255350112915, |
|
"learning_rate": 2.9650986842105263e-06, |
|
"loss": 0.0873, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 0.3575, |
|
"grad_norm": 1.928809404373169, |
|
"learning_rate": 2.9593421052631574e-06, |
|
"loss": 0.0948, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.35875, |
|
"grad_norm": 2.6892471313476562, |
|
"learning_rate": 2.9535855263157893e-06, |
|
"loss": 0.1043, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.9739983081817627, |
|
"learning_rate": 2.947828947368421e-06, |
|
"loss": 0.1037, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.36125, |
|
"grad_norm": 3.5157880783081055, |
|
"learning_rate": 2.9420723684210523e-06, |
|
"loss": 0.1139, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 0.3625, |
|
"grad_norm": 2.140559673309326, |
|
"learning_rate": 2.936315789473684e-06, |
|
"loss": 0.0912, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.36375, |
|
"grad_norm": 2.24043607711792, |
|
"learning_rate": 2.9305592105263158e-06, |
|
"loss": 0.0555, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 0.365, |
|
"grad_norm": 1.5429259538650513, |
|
"learning_rate": 2.924802631578947e-06, |
|
"loss": 0.059, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.36625, |
|
"grad_norm": 1.9133890867233276, |
|
"learning_rate": 2.919046052631579e-06, |
|
"loss": 0.0576, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 0.3675, |
|
"grad_norm": 1.585777759552002, |
|
"learning_rate": 2.9132894736842103e-06, |
|
"loss": 0.0497, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.36875, |
|
"grad_norm": 1.5571388006210327, |
|
"learning_rate": 2.907532894736842e-06, |
|
"loss": 0.0604, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.2344049215316772, |
|
"learning_rate": 2.9017763157894737e-06, |
|
"loss": 0.0621, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.37125, |
|
"grad_norm": 1.7708073854446411, |
|
"learning_rate": 2.896019736842105e-06, |
|
"loss": 0.0713, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 0.3725, |
|
"grad_norm": 2.126579999923706, |
|
"learning_rate": 2.8902631578947367e-06, |
|
"loss": 0.0661, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.37375, |
|
"grad_norm": 1.9544090032577515, |
|
"learning_rate": 2.8845065789473682e-06, |
|
"loss": 0.0626, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 2.478142499923706, |
|
"learning_rate": 2.8787499999999998e-06, |
|
"loss": 0.1124, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"eval_loss": 0.2558789849281311, |
|
"eval_runtime": 531.6323, |
|
"eval_samples_per_second": 3.181, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 9.323092294242539, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.37625, |
|
"grad_norm": 2.722101926803589, |
|
"learning_rate": 2.8729934210526313e-06, |
|
"loss": 0.1143, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 0.3775, |
|
"grad_norm": 2.3424594402313232, |
|
"learning_rate": 2.867236842105263e-06, |
|
"loss": 0.0968, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.37875, |
|
"grad_norm": 2.566340208053589, |
|
"learning_rate": 2.8614802631578943e-06, |
|
"loss": 0.0932, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.2237472534179688, |
|
"learning_rate": 2.855723684210526e-06, |
|
"loss": 0.1155, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.38125, |
|
"grad_norm": 3.058669090270996, |
|
"learning_rate": 2.8499671052631573e-06, |
|
"loss": 0.103, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 0.3825, |
|
"grad_norm": 4.729414463043213, |
|
"learning_rate": 2.8442105263157892e-06, |
|
"loss": 0.0922, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.38375, |
|
"grad_norm": 2.140126943588257, |
|
"learning_rate": 2.8384539473684207e-06, |
|
"loss": 0.0982, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 0.385, |
|
"grad_norm": 2.778568983078003, |
|
"learning_rate": 2.8326973684210522e-06, |
|
"loss": 0.0993, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.38625, |
|
"grad_norm": 2.6681206226348877, |
|
"learning_rate": 2.826940789473684e-06, |
|
"loss": 0.1018, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 0.3875, |
|
"grad_norm": 1.5673187971115112, |
|
"learning_rate": 2.8211842105263157e-06, |
|
"loss": 0.0854, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.38875, |
|
"grad_norm": 1.3890910148620605, |
|
"learning_rate": 2.815427631578947e-06, |
|
"loss": 0.0703, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.176023483276367, |
|
"learning_rate": 2.8096710526315787e-06, |
|
"loss": 0.0672, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.39125, |
|
"grad_norm": 1.2905758619308472, |
|
"learning_rate": 2.8039144736842106e-06, |
|
"loss": 0.0615, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 0.3925, |
|
"grad_norm": 1.3446353673934937, |
|
"learning_rate": 2.7981578947368417e-06, |
|
"loss": 0.0637, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.39375, |
|
"grad_norm": 2.1519501209259033, |
|
"learning_rate": 2.7924013157894736e-06, |
|
"loss": 0.056, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 0.395, |
|
"grad_norm": 1.8618980646133423, |
|
"learning_rate": 2.7866447368421047e-06, |
|
"loss": 0.0573, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.39625, |
|
"grad_norm": 2.5565106868743896, |
|
"learning_rate": 2.7808881578947366e-06, |
|
"loss": 0.0882, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 0.3975, |
|
"grad_norm": 3.98923397064209, |
|
"learning_rate": 2.775131578947368e-06, |
|
"loss": 0.0981, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.39875, |
|
"grad_norm": 3.326756477355957, |
|
"learning_rate": 2.7693749999999996e-06, |
|
"loss": 0.147, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.8089091777801514, |
|
"learning_rate": 2.763618421052631e-06, |
|
"loss": 0.1122, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.22397179901599884, |
|
"eval_runtime": 531.6557, |
|
"eval_samples_per_second": 3.181, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 8.523791682278006, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.40125, |
|
"grad_norm": 1.8123100996017456, |
|
"learning_rate": 2.757861842105263e-06, |
|
"loss": 0.0967, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 0.4025, |
|
"grad_norm": 2.1731700897216797, |
|
"learning_rate": 2.7521052631578946e-06, |
|
"loss": 0.0927, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.40375, |
|
"grad_norm": 2.9888458251953125, |
|
"learning_rate": 2.746348684210526e-06, |
|
"loss": 0.1061, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 0.405, |
|
"grad_norm": 3.51106595993042, |
|
"learning_rate": 2.740592105263158e-06, |
|
"loss": 0.158, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.40625, |
|
"grad_norm": 3.410916805267334, |
|
"learning_rate": 2.734835526315789e-06, |
|
"loss": 0.1011, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 0.4075, |
|
"grad_norm": 2.426023006439209, |
|
"learning_rate": 2.729078947368421e-06, |
|
"loss": 0.0864, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.40875, |
|
"grad_norm": 2.8296170234680176, |
|
"learning_rate": 2.723322368421052e-06, |
|
"loss": 0.0929, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.028474807739258, |
|
"learning_rate": 2.717565789473684e-06, |
|
"loss": 0.0848, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.41125, |
|
"grad_norm": 2.4663166999816895, |
|
"learning_rate": 2.7118092105263155e-06, |
|
"loss": 0.0698, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 0.4125, |
|
"grad_norm": 1.7618118524551392, |
|
"learning_rate": 2.706052631578947e-06, |
|
"loss": 0.058, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.41375, |
|
"grad_norm": 2.2708559036254883, |
|
"learning_rate": 2.7002960526315786e-06, |
|
"loss": 0.0607, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 0.415, |
|
"grad_norm": 1.6543164253234863, |
|
"learning_rate": 2.6945394736842105e-06, |
|
"loss": 0.0556, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.41625, |
|
"grad_norm": 2.5951287746429443, |
|
"learning_rate": 2.6887828947368416e-06, |
|
"loss": 0.0576, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 0.4175, |
|
"grad_norm": 1.1910465955734253, |
|
"learning_rate": 2.6830263157894735e-06, |
|
"loss": 0.059, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.41875, |
|
"grad_norm": 1.667228102684021, |
|
"learning_rate": 2.6772697368421054e-06, |
|
"loss": 0.0521, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.1288628578186035, |
|
"learning_rate": 2.6715131578947365e-06, |
|
"loss": 0.0557, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.42125, |
|
"grad_norm": 2.0485122203826904, |
|
"learning_rate": 2.6657565789473684e-06, |
|
"loss": 0.0493, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 0.4225, |
|
"grad_norm": 1.8512142896652222, |
|
"learning_rate": 2.6599999999999995e-06, |
|
"loss": 0.056, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.42375, |
|
"grad_norm": 1.8958942890167236, |
|
"learning_rate": 2.6542434210526314e-06, |
|
"loss": 0.059, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 0.425, |
|
"grad_norm": 1.2833645343780518, |
|
"learning_rate": 2.648486842105263e-06, |
|
"loss": 0.0477, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.425, |
|
"eval_loss": 0.23789365589618683, |
|
"eval_runtime": 530.7286, |
|
"eval_samples_per_second": 3.186, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 8.317721993255901, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.42625, |
|
"grad_norm": 1.2612178325653076, |
|
"learning_rate": 2.6427302631578945e-06, |
|
"loss": 0.05, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 0.4275, |
|
"grad_norm": 2.100247621536255, |
|
"learning_rate": 2.636973684210526e-06, |
|
"loss": 0.0626, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.42875, |
|
"grad_norm": 2.7199559211730957, |
|
"learning_rate": 2.631217105263158e-06, |
|
"loss": 0.0906, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.267314910888672, |
|
"learning_rate": 2.625460526315789e-06, |
|
"loss": 0.1068, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.43125, |
|
"grad_norm": 3.2623515129089355, |
|
"learning_rate": 2.619703947368421e-06, |
|
"loss": 0.0849, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 0.4325, |
|
"grad_norm": 1.8294329643249512, |
|
"learning_rate": 2.613947368421052e-06, |
|
"loss": 0.0776, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.43375, |
|
"grad_norm": 3.3888967037200928, |
|
"learning_rate": 2.608190789473684e-06, |
|
"loss": 0.0869, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 0.435, |
|
"grad_norm": 2.5059332847595215, |
|
"learning_rate": 2.602434210526316e-06, |
|
"loss": 0.0781, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.43625, |
|
"grad_norm": 1.8527718782424927, |
|
"learning_rate": 2.596677631578947e-06, |
|
"loss": 0.0513, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 1.4375104904174805, |
|
"learning_rate": 2.590921052631579e-06, |
|
"loss": 0.053, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.43875, |
|
"grad_norm": 1.923519253730774, |
|
"learning_rate": 2.5851644736842104e-06, |
|
"loss": 0.0487, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.6237260103225708, |
|
"learning_rate": 2.579407894736842e-06, |
|
"loss": 0.0499, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.44125, |
|
"grad_norm": 1.7452889680862427, |
|
"learning_rate": 2.5736513157894734e-06, |
|
"loss": 0.0538, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 0.4425, |
|
"grad_norm": 1.7012261152267456, |
|
"learning_rate": 2.5678947368421053e-06, |
|
"loss": 0.0529, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.44375, |
|
"grad_norm": 1.8288905620574951, |
|
"learning_rate": 2.5621381578947364e-06, |
|
"loss": 0.0473, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 0.445, |
|
"grad_norm": 1.9288239479064941, |
|
"learning_rate": 2.5563815789473683e-06, |
|
"loss": 0.0683, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.44625, |
|
"grad_norm": 1.3186031579971313, |
|
"learning_rate": 2.5506249999999994e-06, |
|
"loss": 0.065, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 0.4475, |
|
"grad_norm": 1.340890645980835, |
|
"learning_rate": 2.5448684210526313e-06, |
|
"loss": 0.0617, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.44875, |
|
"grad_norm": 2.7007381916046143, |
|
"learning_rate": 2.539111842105263e-06, |
|
"loss": 0.0512, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.688952922821045, |
|
"learning_rate": 2.5333552631578943e-06, |
|
"loss": 0.0638, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 0.23539182543754578, |
|
"eval_runtime": 531.3204, |
|
"eval_samples_per_second": 3.183, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 8.948420132384163, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.45125, |
|
"grad_norm": 3.06502103805542, |
|
"learning_rate": 2.527598684210526e-06, |
|
"loss": 0.0691, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 0.4525, |
|
"grad_norm": 2.2699365615844727, |
|
"learning_rate": 2.5218421052631578e-06, |
|
"loss": 0.0765, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.45375, |
|
"grad_norm": 3.057246208190918, |
|
"learning_rate": 2.5160855263157893e-06, |
|
"loss": 0.0874, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 0.455, |
|
"grad_norm": 2.452810764312744, |
|
"learning_rate": 2.5105592105263156e-06, |
|
"loss": 0.0992, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.45625, |
|
"grad_norm": 1.8321553468704224, |
|
"learning_rate": 2.504802631578947e-06, |
|
"loss": 0.091, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 0.4575, |
|
"grad_norm": 2.1675491333007812, |
|
"learning_rate": 2.499046052631579e-06, |
|
"loss": 0.0968, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.45875, |
|
"grad_norm": 2.440648317337036, |
|
"learning_rate": 2.49328947368421e-06, |
|
"loss": 0.094, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 3.3630011081695557, |
|
"learning_rate": 2.487532894736842e-06, |
|
"loss": 0.0934, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.46125, |
|
"grad_norm": 3.1267924308776855, |
|
"learning_rate": 2.481776315789473e-06, |
|
"loss": 0.095, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 0.4625, |
|
"grad_norm": 2.791846752166748, |
|
"learning_rate": 2.476019736842105e-06, |
|
"loss": 0.0988, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.46375, |
|
"grad_norm": 1.883380651473999, |
|
"learning_rate": 2.4702631578947365e-06, |
|
"loss": 0.089, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 0.465, |
|
"grad_norm": 2.572441577911377, |
|
"learning_rate": 2.464506578947368e-06, |
|
"loss": 0.0933, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.46625, |
|
"grad_norm": 3.08231258392334, |
|
"learning_rate": 2.45875e-06, |
|
"loss": 0.0856, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 0.4675, |
|
"grad_norm": 2.208491563796997, |
|
"learning_rate": 2.4529934210526315e-06, |
|
"loss": 0.0795, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 2.896657943725586, |
|
"learning_rate": 2.447236842105263e-06, |
|
"loss": 0.0625, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.3465672731399536, |
|
"learning_rate": 2.4414802631578945e-06, |
|
"loss": 0.0822, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.47125, |
|
"grad_norm": 3.4039506912231445, |
|
"learning_rate": 2.4357236842105264e-06, |
|
"loss": 0.0813, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 0.4725, |
|
"grad_norm": 2.213761568069458, |
|
"learning_rate": 2.4299671052631575e-06, |
|
"loss": 0.075, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.47375, |
|
"grad_norm": 1.693393588066101, |
|
"learning_rate": 2.4242105263157894e-06, |
|
"loss": 0.0819, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 0.475, |
|
"grad_norm": 3.1261212825775146, |
|
"learning_rate": 2.4184539473684205e-06, |
|
"loss": 0.0735, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.475, |
|
"eval_loss": 0.22311098873615265, |
|
"eval_runtime": 530.3307, |
|
"eval_samples_per_second": 3.189, |
|
"eval_steps_per_second": 0.4, |
|
"eval_wer": 8.39890096165855, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.47625, |
|
"grad_norm": 2.6011083126068115, |
|
"learning_rate": 2.4126973684210524e-06, |
|
"loss": 0.0557, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 0.4775, |
|
"grad_norm": 1.4606833457946777, |
|
"learning_rate": 2.406940789473684e-06, |
|
"loss": 0.0518, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.47875, |
|
"grad_norm": 3.201547145843506, |
|
"learning_rate": 2.4011842105263154e-06, |
|
"loss": 0.0616, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.470755696296692, |
|
"learning_rate": 2.395427631578947e-06, |
|
"loss": 0.0566, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.48125, |
|
"grad_norm": 1.0501068830490112, |
|
"learning_rate": 2.389671052631579e-06, |
|
"loss": 0.0482, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 0.4825, |
|
"grad_norm": 1.7576944828033447, |
|
"learning_rate": 2.38391447368421e-06, |
|
"loss": 0.0487, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.48375, |
|
"grad_norm": 2.6596386432647705, |
|
"learning_rate": 2.378157894736842e-06, |
|
"loss": 0.0548, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 0.485, |
|
"grad_norm": 2.2998361587524414, |
|
"learning_rate": 2.372401315789474e-06, |
|
"loss": 0.0755, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.48625, |
|
"grad_norm": 1.885953426361084, |
|
"learning_rate": 2.366644736842105e-06, |
|
"loss": 0.0635, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 0.4875, |
|
"grad_norm": 1.686090111732483, |
|
"learning_rate": 2.360888157894737e-06, |
|
"loss": 0.0664, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.48875, |
|
"grad_norm": 1.487586259841919, |
|
"learning_rate": 2.3551315789473683e-06, |
|
"loss": 0.0723, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.5484004020690918, |
|
"learning_rate": 2.349375e-06, |
|
"loss": 0.0697, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.49125, |
|
"grad_norm": 1.6730592250823975, |
|
"learning_rate": 2.3436184210526314e-06, |
|
"loss": 0.0726, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 0.4925, |
|
"grad_norm": 1.549166202545166, |
|
"learning_rate": 2.337861842105263e-06, |
|
"loss": 0.0599, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.49375, |
|
"grad_norm": 2.127182960510254, |
|
"learning_rate": 2.3321052631578944e-06, |
|
"loss": 0.0552, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 0.495, |
|
"grad_norm": 1.5453063249588013, |
|
"learning_rate": 2.3263486842105263e-06, |
|
"loss": 0.0647, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.49625, |
|
"grad_norm": 2.2514312267303467, |
|
"learning_rate": 2.3205921052631574e-06, |
|
"loss": 0.0543, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 0.4975, |
|
"grad_norm": 1.5466394424438477, |
|
"learning_rate": 2.3148355263157893e-06, |
|
"loss": 0.0576, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.49875, |
|
"grad_norm": 1.1446313858032227, |
|
"learning_rate": 2.309078947368421e-06, |
|
"loss": 0.0581, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.7810652256011963, |
|
"learning_rate": 2.3033223684210523e-06, |
|
"loss": 0.0548, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.23302312195301056, |
|
"eval_runtime": 531.2942, |
|
"eval_samples_per_second": 3.183, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 8.57374797052579, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.50125, |
|
"grad_norm": 1.8410784006118774, |
|
"learning_rate": 2.2975657894736842e-06, |
|
"loss": 0.0818, |
|
"step": 10025 |
|
}, |
|
{ |
|
"epoch": 0.5025, |
|
"grad_norm": 2.0660974979400635, |
|
"learning_rate": 2.2918092105263158e-06, |
|
"loss": 0.0839, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.50375, |
|
"grad_norm": 0.7626898288726807, |
|
"learning_rate": 2.2860526315789473e-06, |
|
"loss": 0.0717, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 0.505, |
|
"grad_norm": 3.819746971130371, |
|
"learning_rate": 2.2802960526315788e-06, |
|
"loss": 0.0696, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.50625, |
|
"grad_norm": 1.8556462526321411, |
|
"learning_rate": 2.2745394736842103e-06, |
|
"loss": 0.0776, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 0.5075, |
|
"grad_norm": 2.1852500438690186, |
|
"learning_rate": 2.2687828947368418e-06, |
|
"loss": 0.1228, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.50875, |
|
"grad_norm": 2.6284213066101074, |
|
"learning_rate": 2.2630263157894737e-06, |
|
"loss": 0.1121, |
|
"step": 10175 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.840794801712036, |
|
"learning_rate": 2.2572697368421048e-06, |
|
"loss": 0.087, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.51125, |
|
"grad_norm": 1.920469045639038, |
|
"learning_rate": 2.2515131578947367e-06, |
|
"loss": 0.0876, |
|
"step": 10225 |
|
}, |
|
{ |
|
"epoch": 0.5125, |
|
"grad_norm": 2.9199891090393066, |
|
"learning_rate": 2.2457565789473682e-06, |
|
"loss": 0.0812, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.51375, |
|
"grad_norm": 2.7151129245758057, |
|
"learning_rate": 2.2399999999999997e-06, |
|
"loss": 0.0733, |
|
"step": 10275 |
|
}, |
|
{ |
|
"epoch": 0.515, |
|
"grad_norm": 3.474050760269165, |
|
"learning_rate": 2.2342434210526312e-06, |
|
"loss": 0.0934, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.51625, |
|
"grad_norm": 1.5654582977294922, |
|
"learning_rate": 2.228486842105263e-06, |
|
"loss": 0.0762, |
|
"step": 10325 |
|
}, |
|
{ |
|
"epoch": 0.5175, |
|
"grad_norm": 1.0436935424804688, |
|
"learning_rate": 2.2227302631578947e-06, |
|
"loss": 0.0727, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.51875, |
|
"grad_norm": 0.8793361186981201, |
|
"learning_rate": 2.216973684210526e-06, |
|
"loss": 0.0471, |
|
"step": 10375 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.7731598019599915, |
|
"learning_rate": 2.211217105263158e-06, |
|
"loss": 0.0467, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.52125, |
|
"grad_norm": 1.2689337730407715, |
|
"learning_rate": 2.205460526315789e-06, |
|
"loss": 0.0485, |
|
"step": 10425 |
|
}, |
|
{ |
|
"epoch": 0.5225, |
|
"grad_norm": 1.4495617151260376, |
|
"learning_rate": 2.199703947368421e-06, |
|
"loss": 0.0541, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.52375, |
|
"grad_norm": 1.4262604713439941, |
|
"learning_rate": 2.193947368421052e-06, |
|
"loss": 0.0539, |
|
"step": 10475 |
|
}, |
|
{ |
|
"epoch": 0.525, |
|
"grad_norm": 1.8088651895523071, |
|
"learning_rate": 2.188190789473684e-06, |
|
"loss": 0.0557, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.525, |
|
"eval_loss": 0.2133007049560547, |
|
"eval_runtime": 530.2894, |
|
"eval_samples_per_second": 3.189, |
|
"eval_steps_per_second": 0.4, |
|
"eval_wer": 8.361433745472711, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.52625, |
|
"grad_norm": 1.178223967552185, |
|
"learning_rate": 2.1824342105263156e-06, |
|
"loss": 0.0549, |
|
"step": 10525 |
|
}, |
|
{ |
|
"epoch": 0.5275, |
|
"grad_norm": 1.7510823011398315, |
|
"learning_rate": 2.176677631578947e-06, |
|
"loss": 0.0557, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.52875, |
|
"grad_norm": 0.9500125050544739, |
|
"learning_rate": 2.1709210526315786e-06, |
|
"loss": 0.0553, |
|
"step": 10575 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.060792922973633, |
|
"learning_rate": 2.1651644736842106e-06, |
|
"loss": 0.0596, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.53125, |
|
"grad_norm": 2.1061859130859375, |
|
"learning_rate": 2.159407894736842e-06, |
|
"loss": 0.0539, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 0.5325, |
|
"grad_norm": 1.6122857332229614, |
|
"learning_rate": 2.1536513157894736e-06, |
|
"loss": 0.053, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.53375, |
|
"grad_norm": 2.2909045219421387, |
|
"learning_rate": 2.147894736842105e-06, |
|
"loss": 0.0614, |
|
"step": 10675 |
|
}, |
|
{ |
|
"epoch": 0.535, |
|
"grad_norm": 3.2241578102111816, |
|
"learning_rate": 2.1421381578947366e-06, |
|
"loss": 0.0829, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.53625, |
|
"grad_norm": 2.7384145259857178, |
|
"learning_rate": 2.136611842105263e-06, |
|
"loss": 0.0817, |
|
"step": 10725 |
|
}, |
|
{ |
|
"epoch": 0.5375, |
|
"grad_norm": 1.8319401741027832, |
|
"learning_rate": 2.1308552631578944e-06, |
|
"loss": 0.0823, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.53875, |
|
"grad_norm": 2.4007859230041504, |
|
"learning_rate": 2.125098684210526e-06, |
|
"loss": 0.0733, |
|
"step": 10775 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.042520046234131, |
|
"learning_rate": 2.119342105263158e-06, |
|
"loss": 0.0838, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.54125, |
|
"grad_norm": 2.0478389263153076, |
|
"learning_rate": 2.1135855263157893e-06, |
|
"loss": 0.0831, |
|
"step": 10825 |
|
}, |
|
{ |
|
"epoch": 0.5425, |
|
"grad_norm": 2.357926607131958, |
|
"learning_rate": 2.107828947368421e-06, |
|
"loss": 0.0728, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.54375, |
|
"grad_norm": 2.214553117752075, |
|
"learning_rate": 2.1020723684210523e-06, |
|
"loss": 0.0804, |
|
"step": 10875 |
|
}, |
|
{ |
|
"epoch": 0.545, |
|
"grad_norm": 3.484598398208618, |
|
"learning_rate": 2.0963157894736843e-06, |
|
"loss": 0.0592, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.54625, |
|
"grad_norm": 1.5546646118164062, |
|
"learning_rate": 2.0905592105263158e-06, |
|
"loss": 0.0577, |
|
"step": 10925 |
|
}, |
|
{ |
|
"epoch": 0.5475, |
|
"grad_norm": 2.218691349029541, |
|
"learning_rate": 2.0848026315789473e-06, |
|
"loss": 0.053, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.54875, |
|
"grad_norm": 2.9559834003448486, |
|
"learning_rate": 2.0790460526315788e-06, |
|
"loss": 0.0543, |
|
"step": 10975 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.6290831565856934, |
|
"learning_rate": 2.0732894736842103e-06, |
|
"loss": 0.0626, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 0.2083810567855835, |
|
"eval_runtime": 531.6457, |
|
"eval_samples_per_second": 3.181, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 8.286499313101036, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.55125, |
|
"grad_norm": 2.2507994174957275, |
|
"learning_rate": 2.067532894736842e-06, |
|
"loss": 0.0645, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 0.5525, |
|
"grad_norm": 3.930997133255005, |
|
"learning_rate": 2.0617763157894733e-06, |
|
"loss": 0.0699, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.55375, |
|
"grad_norm": 3.1073126792907715, |
|
"learning_rate": 2.056019736842105e-06, |
|
"loss": 0.0852, |
|
"step": 11075 |
|
}, |
|
{ |
|
"epoch": 0.555, |
|
"grad_norm": 2.5678088665008545, |
|
"learning_rate": 2.0502631578947367e-06, |
|
"loss": 0.0863, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.55625, |
|
"grad_norm": 2.97763729095459, |
|
"learning_rate": 2.0445065789473682e-06, |
|
"loss": 0.0718, |
|
"step": 11125 |
|
}, |
|
{ |
|
"epoch": 0.5575, |
|
"grad_norm": 1.2580708265304565, |
|
"learning_rate": 2.0387499999999998e-06, |
|
"loss": 0.0462, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.55875, |
|
"grad_norm": 1.804002285003662, |
|
"learning_rate": 2.0329934210526317e-06, |
|
"loss": 0.0364, |
|
"step": 11175 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.492600679397583, |
|
"learning_rate": 2.027236842105263e-06, |
|
"loss": 0.0438, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.56125, |
|
"grad_norm": 2.423004627227783, |
|
"learning_rate": 2.0214802631578947e-06, |
|
"loss": 0.031, |
|
"step": 11225 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 1.5198426246643066, |
|
"learning_rate": 2.015723684210526e-06, |
|
"loss": 0.0324, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.56375, |
|
"grad_norm": 0.9852400422096252, |
|
"learning_rate": 2.0099671052631577e-06, |
|
"loss": 0.029, |
|
"step": 11275 |
|
}, |
|
{ |
|
"epoch": 0.565, |
|
"grad_norm": 1.2327955961227417, |
|
"learning_rate": 2.004210526315789e-06, |
|
"loss": 0.0406, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.56625, |
|
"grad_norm": 1.455636978149414, |
|
"learning_rate": 1.9984539473684207e-06, |
|
"loss": 0.047, |
|
"step": 11325 |
|
}, |
|
{ |
|
"epoch": 0.5675, |
|
"grad_norm": 1.4720903635025024, |
|
"learning_rate": 1.9926973684210522e-06, |
|
"loss": 0.0444, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.56875, |
|
"grad_norm": 1.7255401611328125, |
|
"learning_rate": 1.986940789473684e-06, |
|
"loss": 0.0514, |
|
"step": 11375 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.3503352403640747, |
|
"learning_rate": 1.9811842105263157e-06, |
|
"loss": 0.0533, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.57125, |
|
"grad_norm": 1.5066325664520264, |
|
"learning_rate": 1.975427631578947e-06, |
|
"loss": 0.0524, |
|
"step": 11425 |
|
}, |
|
{ |
|
"epoch": 0.5725, |
|
"grad_norm": 1.877842903137207, |
|
"learning_rate": 1.9696710526315787e-06, |
|
"loss": 0.0519, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.57375, |
|
"grad_norm": 1.4466218948364258, |
|
"learning_rate": 1.9639144736842106e-06, |
|
"loss": 0.0548, |
|
"step": 11475 |
|
}, |
|
{ |
|
"epoch": 0.575, |
|
"grad_norm": 1.3053616285324097, |
|
"learning_rate": 1.958157894736842e-06, |
|
"loss": 0.0472, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.575, |
|
"eval_loss": 0.23307645320892334, |
|
"eval_runtime": 536.26, |
|
"eval_samples_per_second": 3.153, |
|
"eval_steps_per_second": 0.395, |
|
"eval_wer": 8.074185088047958, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.57625, |
|
"grad_norm": 1.172753930091858, |
|
"learning_rate": 1.9524013157894736e-06, |
|
"loss": 0.0506, |
|
"step": 11525 |
|
}, |
|
{ |
|
"epoch": 0.5775, |
|
"grad_norm": 1.700363039970398, |
|
"learning_rate": 1.946644736842105e-06, |
|
"loss": 0.0585, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.57875, |
|
"grad_norm": 1.3203791379928589, |
|
"learning_rate": 1.9408881578947366e-06, |
|
"loss": 0.0499, |
|
"step": 11575 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.4109314680099487, |
|
"learning_rate": 1.935131578947368e-06, |
|
"loss": 0.0433, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.58125, |
|
"grad_norm": 1.3247355222702026, |
|
"learning_rate": 1.929375e-06, |
|
"loss": 0.0378, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 0.5825, |
|
"grad_norm": 0.9325533509254456, |
|
"learning_rate": 1.9236184210526316e-06, |
|
"loss": 0.0442, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.58375, |
|
"grad_norm": 1.8996745347976685, |
|
"learning_rate": 1.917861842105263e-06, |
|
"loss": 0.049, |
|
"step": 11675 |
|
}, |
|
{ |
|
"epoch": 0.585, |
|
"grad_norm": 1.7976350784301758, |
|
"learning_rate": 1.9121052631578946e-06, |
|
"loss": 0.0467, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.58625, |
|
"grad_norm": 2.180805206298828, |
|
"learning_rate": 1.906348684210526e-06, |
|
"loss": 0.0493, |
|
"step": 11725 |
|
}, |
|
{ |
|
"epoch": 0.5875, |
|
"grad_norm": 1.2519850730895996, |
|
"learning_rate": 1.9005921052631576e-06, |
|
"loss": 0.0486, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.58875, |
|
"grad_norm": 2.3758866786956787, |
|
"learning_rate": 1.8948355263157893e-06, |
|
"loss": 0.0584, |
|
"step": 11775 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.0312483310699463, |
|
"learning_rate": 1.8890789473684208e-06, |
|
"loss": 0.0702, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.59125, |
|
"grad_norm": 2.017726182937622, |
|
"learning_rate": 1.8833223684210525e-06, |
|
"loss": 0.0822, |
|
"step": 11825 |
|
}, |
|
{ |
|
"epoch": 0.5925, |
|
"grad_norm": 2.159196138381958, |
|
"learning_rate": 1.8775657894736842e-06, |
|
"loss": 0.0918, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.59375, |
|
"grad_norm": 2.8051164150238037, |
|
"learning_rate": 1.8718092105263158e-06, |
|
"loss": 0.0927, |
|
"step": 11875 |
|
}, |
|
{ |
|
"epoch": 0.595, |
|
"grad_norm": 1.9617701768875122, |
|
"learning_rate": 1.8660526315789473e-06, |
|
"loss": 0.0762, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.59625, |
|
"grad_norm": 1.4993948936462402, |
|
"learning_rate": 1.8602960526315788e-06, |
|
"loss": 0.0768, |
|
"step": 11925 |
|
}, |
|
{ |
|
"epoch": 0.5975, |
|
"grad_norm": 2.1341333389282227, |
|
"learning_rate": 1.8545394736842105e-06, |
|
"loss": 0.0647, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.59875, |
|
"grad_norm": 1.5004290342330933, |
|
"learning_rate": 1.848782894736842e-06, |
|
"loss": 0.0669, |
|
"step": 11975 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.0987565517425537, |
|
"learning_rate": 1.8430263157894735e-06, |
|
"loss": 0.0636, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.2118152379989624, |
|
"eval_runtime": 536.0484, |
|
"eval_samples_per_second": 3.155, |
|
"eval_steps_per_second": 0.395, |
|
"eval_wer": 7.961783439490445, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.60125, |
|
"grad_norm": 1.6456586122512817, |
|
"learning_rate": 1.837269736842105e-06, |
|
"loss": 0.0701, |
|
"step": 12025 |
|
}, |
|
{ |
|
"epoch": 0.6025, |
|
"grad_norm": 2.0990679264068604, |
|
"learning_rate": 1.8315131578947367e-06, |
|
"loss": 0.0573, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.60375, |
|
"grad_norm": 1.8728748559951782, |
|
"learning_rate": 1.8257565789473682e-06, |
|
"loss": 0.054, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 0.605, |
|
"grad_norm": 1.2849019765853882, |
|
"learning_rate": 1.8199999999999997e-06, |
|
"loss": 0.0522, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.60625, |
|
"grad_norm": 1.6803030967712402, |
|
"learning_rate": 1.8142434210526312e-06, |
|
"loss": 0.0492, |
|
"step": 12125 |
|
}, |
|
{ |
|
"epoch": 0.6075, |
|
"grad_norm": 1.9102485179901123, |
|
"learning_rate": 1.808486842105263e-06, |
|
"loss": 0.0482, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.60875, |
|
"grad_norm": 1.1118731498718262, |
|
"learning_rate": 1.8027302631578947e-06, |
|
"loss": 0.0422, |
|
"step": 12175 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.1670501232147217, |
|
"learning_rate": 1.7969736842105262e-06, |
|
"loss": 0.0515, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.61125, |
|
"grad_norm": 2.522876739501953, |
|
"learning_rate": 1.7912171052631579e-06, |
|
"loss": 0.0412, |
|
"step": 12225 |
|
}, |
|
{ |
|
"epoch": 0.6125, |
|
"grad_norm": 1.2704464197158813, |
|
"learning_rate": 1.7854605263157894e-06, |
|
"loss": 0.0508, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.61375, |
|
"grad_norm": 2.399094343185425, |
|
"learning_rate": 1.779703947368421e-06, |
|
"loss": 0.0547, |
|
"step": 12275 |
|
}, |
|
{ |
|
"epoch": 0.615, |
|
"grad_norm": 2.2606582641601562, |
|
"learning_rate": 1.7739473684210524e-06, |
|
"loss": 0.0562, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.61625, |
|
"grad_norm": 0.5112090110778809, |
|
"learning_rate": 1.7681907894736841e-06, |
|
"loss": 0.0513, |
|
"step": 12325 |
|
}, |
|
{ |
|
"epoch": 0.6175, |
|
"grad_norm": 1.1044148206710815, |
|
"learning_rate": 1.7624342105263156e-06, |
|
"loss": 0.0544, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.61875, |
|
"grad_norm": 1.2760109901428223, |
|
"learning_rate": 1.7566776315789471e-06, |
|
"loss": 0.0512, |
|
"step": 12375 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.3780227899551392, |
|
"learning_rate": 1.7509210526315786e-06, |
|
"loss": 0.0546, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.62125, |
|
"grad_norm": 1.0981767177581787, |
|
"learning_rate": 1.7451644736842104e-06, |
|
"loss": 0.041, |
|
"step": 12425 |
|
}, |
|
{ |
|
"epoch": 0.6225, |
|
"grad_norm": 2.353482484817505, |
|
"learning_rate": 1.7394078947368419e-06, |
|
"loss": 0.0479, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.62375, |
|
"grad_norm": 1.3375900983810425, |
|
"learning_rate": 1.7336513157894734e-06, |
|
"loss": 0.0522, |
|
"step": 12475 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 2.1002514362335205, |
|
"learning_rate": 1.7278947368421053e-06, |
|
"loss": 0.0466, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"eval_loss": 0.21263667941093445, |
|
"eval_runtime": 535.5066, |
|
"eval_samples_per_second": 3.158, |
|
"eval_steps_per_second": 0.396, |
|
"eval_wer": 7.468465093043587, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.62625, |
|
"grad_norm": 1.5551177263259888, |
|
"learning_rate": 1.7221381578947368e-06, |
|
"loss": 0.0584, |
|
"step": 12525 |
|
}, |
|
{ |
|
"epoch": 0.6275, |
|
"grad_norm": 2.234121322631836, |
|
"learning_rate": 1.7163815789473683e-06, |
|
"loss": 0.061, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.62875, |
|
"grad_norm": 2.269101619720459, |
|
"learning_rate": 1.7106249999999998e-06, |
|
"loss": 0.0607, |
|
"step": 12575 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2.8848202228546143, |
|
"learning_rate": 1.7048684210526315e-06, |
|
"loss": 0.0675, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.63125, |
|
"grad_norm": 2.2159249782562256, |
|
"learning_rate": 1.699111842105263e-06, |
|
"loss": 0.0783, |
|
"step": 12625 |
|
}, |
|
{ |
|
"epoch": 0.6325, |
|
"grad_norm": 1.5829565525054932, |
|
"learning_rate": 1.6933552631578946e-06, |
|
"loss": 0.0834, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.63375, |
|
"grad_norm": 1.9816817045211792, |
|
"learning_rate": 1.687598684210526e-06, |
|
"loss": 0.0727, |
|
"step": 12675 |
|
}, |
|
{ |
|
"epoch": 0.635, |
|
"grad_norm": 2.8434395790100098, |
|
"learning_rate": 1.6818421052631578e-06, |
|
"loss": 0.0778, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.63625, |
|
"grad_norm": 2.4956297874450684, |
|
"learning_rate": 1.6760855263157893e-06, |
|
"loss": 0.0731, |
|
"step": 12725 |
|
}, |
|
{ |
|
"epoch": 0.6375, |
|
"grad_norm": 1.7429981231689453, |
|
"learning_rate": 1.6703289473684208e-06, |
|
"loss": 0.0637, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.63875, |
|
"grad_norm": 2.3022801876068115, |
|
"learning_rate": 1.6645723684210525e-06, |
|
"loss": 0.0708, |
|
"step": 12775 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.621469497680664, |
|
"learning_rate": 1.658815789473684e-06, |
|
"loss": 0.0466, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.64125, |
|
"grad_norm": 1.7762545347213745, |
|
"learning_rate": 1.6530592105263155e-06, |
|
"loss": 0.0544, |
|
"step": 12825 |
|
}, |
|
{ |
|
"epoch": 0.6425, |
|
"grad_norm": 1.568123698234558, |
|
"learning_rate": 1.6473026315789472e-06, |
|
"loss": 0.0457, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.64375, |
|
"grad_norm": 0.5994829535484314, |
|
"learning_rate": 1.641546052631579e-06, |
|
"loss": 0.0487, |
|
"step": 12875 |
|
}, |
|
{ |
|
"epoch": 0.645, |
|
"grad_norm": 1.9480714797973633, |
|
"learning_rate": 1.6357894736842105e-06, |
|
"loss": 0.0503, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.64625, |
|
"grad_norm": 2.2603769302368164, |
|
"learning_rate": 1.630032894736842e-06, |
|
"loss": 0.0705, |
|
"step": 12925 |
|
}, |
|
{ |
|
"epoch": 0.6475, |
|
"grad_norm": 2.2942919731140137, |
|
"learning_rate": 1.6242763157894737e-06, |
|
"loss": 0.0666, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.64875, |
|
"grad_norm": 2.819730758666992, |
|
"learning_rate": 1.6185197368421052e-06, |
|
"loss": 0.0736, |
|
"step": 12975 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.8207030296325684, |
|
"learning_rate": 1.6127631578947367e-06, |
|
"loss": 0.0604, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 0.21604977548122406, |
|
"eval_runtime": 534.676, |
|
"eval_samples_per_second": 3.163, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 7.655801173972773, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.65125, |
|
"grad_norm": 2.0296692848205566, |
|
"learning_rate": 1.6070065789473682e-06, |
|
"loss": 0.0745, |
|
"step": 13025 |
|
}, |
|
{ |
|
"epoch": 0.6525, |
|
"grad_norm": 3.9246408939361572, |
|
"learning_rate": 1.60125e-06, |
|
"loss": 0.0862, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.65375, |
|
"grad_norm": 1.9909517765045166, |
|
"learning_rate": 1.5954934210526314e-06, |
|
"loss": 0.0676, |
|
"step": 13075 |
|
}, |
|
{ |
|
"epoch": 0.655, |
|
"grad_norm": 2.652264356613159, |
|
"learning_rate": 1.589736842105263e-06, |
|
"loss": 0.0823, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.65625, |
|
"grad_norm": 2.1940698623657227, |
|
"learning_rate": 1.5839802631578944e-06, |
|
"loss": 0.0775, |
|
"step": 13125 |
|
}, |
|
{ |
|
"epoch": 0.6575, |
|
"grad_norm": 3.084667444229126, |
|
"learning_rate": 1.5782236842105262e-06, |
|
"loss": 0.0779, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.65875, |
|
"grad_norm": 2.134045124053955, |
|
"learning_rate": 1.5724671052631579e-06, |
|
"loss": 0.0756, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 2.4405481815338135, |
|
"learning_rate": 1.5667105263157894e-06, |
|
"loss": 0.075, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.66125, |
|
"grad_norm": 2.251408100128174, |
|
"learning_rate": 1.560953947368421e-06, |
|
"loss": 0.0668, |
|
"step": 13225 |
|
}, |
|
{ |
|
"epoch": 0.6625, |
|
"grad_norm": 2.21307635307312, |
|
"learning_rate": 1.5551973684210526e-06, |
|
"loss": 0.076, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.66375, |
|
"grad_norm": 3.1692416667938232, |
|
"learning_rate": 1.549440789473684e-06, |
|
"loss": 0.0841, |
|
"step": 13275 |
|
}, |
|
{ |
|
"epoch": 0.665, |
|
"grad_norm": 2.4879300594329834, |
|
"learning_rate": 1.5436842105263156e-06, |
|
"loss": 0.0785, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.66625, |
|
"grad_norm": 1.6188695430755615, |
|
"learning_rate": 1.5379276315789473e-06, |
|
"loss": 0.0698, |
|
"step": 13325 |
|
}, |
|
{ |
|
"epoch": 0.6675, |
|
"grad_norm": 2.258192300796509, |
|
"learning_rate": 1.5321710526315788e-06, |
|
"loss": 0.0682, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.66875, |
|
"grad_norm": 1.7001844644546509, |
|
"learning_rate": 1.5264144736842103e-06, |
|
"loss": 0.0728, |
|
"step": 13375 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.0650229454040527, |
|
"learning_rate": 1.5206578947368418e-06, |
|
"loss": 0.0608, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.67125, |
|
"grad_norm": 1.0384840965270996, |
|
"learning_rate": 1.5149013157894736e-06, |
|
"loss": 0.0521, |
|
"step": 13425 |
|
}, |
|
{ |
|
"epoch": 0.6725, |
|
"grad_norm": 1.458274483680725, |
|
"learning_rate": 1.509144736842105e-06, |
|
"loss": 0.0544, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.67375, |
|
"grad_norm": 1.678476095199585, |
|
"learning_rate": 1.5033881578947366e-06, |
|
"loss": 0.0478, |
|
"step": 13475 |
|
}, |
|
{ |
|
"epoch": 0.675, |
|
"grad_norm": 2.1401052474975586, |
|
"learning_rate": 1.497631578947368e-06, |
|
"loss": 0.0544, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.675, |
|
"eval_loss": 0.21870100498199463, |
|
"eval_runtime": 534.1154, |
|
"eval_samples_per_second": 3.166, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 7.999250655676284, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.67625, |
|
"grad_norm": 1.387534737586975, |
|
"learning_rate": 1.491875e-06, |
|
"loss": 0.0497, |
|
"step": 13525 |
|
}, |
|
{ |
|
"epoch": 0.6775, |
|
"grad_norm": 2.2233715057373047, |
|
"learning_rate": 1.4861184210526315e-06, |
|
"loss": 0.0628, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 0.67875, |
|
"grad_norm": 2.775345802307129, |
|
"learning_rate": 1.480361842105263e-06, |
|
"loss": 0.0883, |
|
"step": 13575 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.7996487617492676, |
|
"learning_rate": 1.4746052631578947e-06, |
|
"loss": 0.0895, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.68125, |
|
"grad_norm": 2.4933836460113525, |
|
"learning_rate": 1.4688486842105262e-06, |
|
"loss": 0.0876, |
|
"step": 13625 |
|
}, |
|
{ |
|
"epoch": 0.6825, |
|
"grad_norm": 3.253474712371826, |
|
"learning_rate": 1.4630921052631578e-06, |
|
"loss": 0.0725, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.68375, |
|
"grad_norm": 2.5821990966796875, |
|
"learning_rate": 1.4573355263157893e-06, |
|
"loss": 0.088, |
|
"step": 13675 |
|
}, |
|
{ |
|
"epoch": 0.685, |
|
"grad_norm": 3.219723701477051, |
|
"learning_rate": 1.451578947368421e-06, |
|
"loss": 0.079, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.68625, |
|
"grad_norm": 2.1482114791870117, |
|
"learning_rate": 1.4458223684210525e-06, |
|
"loss": 0.0715, |
|
"step": 13725 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 3.403439521789551, |
|
"learning_rate": 1.440065789473684e-06, |
|
"loss": 0.0731, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.68875, |
|
"grad_norm": 2.0612175464630127, |
|
"learning_rate": 1.4343092105263155e-06, |
|
"loss": 0.0669, |
|
"step": 13775 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.5637385845184326, |
|
"learning_rate": 1.4285526315789472e-06, |
|
"loss": 0.0766, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.69125, |
|
"grad_norm": 1.8747389316558838, |
|
"learning_rate": 1.4227960526315787e-06, |
|
"loss": 0.0723, |
|
"step": 13825 |
|
}, |
|
{ |
|
"epoch": 0.6925, |
|
"grad_norm": 2.6436047554016113, |
|
"learning_rate": 1.4170394736842104e-06, |
|
"loss": 0.0694, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 0.69375, |
|
"grad_norm": 2.300952911376953, |
|
"learning_rate": 1.4112828947368422e-06, |
|
"loss": 0.0711, |
|
"step": 13875 |
|
}, |
|
{ |
|
"epoch": 0.695, |
|
"grad_norm": 2.480396032333374, |
|
"learning_rate": 1.4055263157894737e-06, |
|
"loss": 0.0695, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.69625, |
|
"grad_norm": 3.047656536102295, |
|
"learning_rate": 1.3997697368421052e-06, |
|
"loss": 0.0827, |
|
"step": 13925 |
|
}, |
|
{ |
|
"epoch": 0.6975, |
|
"grad_norm": 1.8521438837051392, |
|
"learning_rate": 1.3940131578947367e-06, |
|
"loss": 0.0799, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.69875, |
|
"grad_norm": 3.52673602104187, |
|
"learning_rate": 1.3882565789473684e-06, |
|
"loss": 0.0819, |
|
"step": 13975 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.5274155139923096, |
|
"learning_rate": 1.3824999999999999e-06, |
|
"loss": 0.07, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 0.21170927584171295, |
|
"eval_runtime": 534.7374, |
|
"eval_samples_per_second": 3.162, |
|
"eval_steps_per_second": 0.396, |
|
"eval_wer": 7.437242412888723, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.70125, |
|
"grad_norm": 3.9497313499450684, |
|
"learning_rate": 1.3767434210526314e-06, |
|
"loss": 0.0977, |
|
"step": 14025 |
|
}, |
|
{ |
|
"epoch": 0.7025, |
|
"grad_norm": 5.4897284507751465, |
|
"learning_rate": 1.3709868421052631e-06, |
|
"loss": 0.1658, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 0.70375, |
|
"grad_norm": 3.0957064628601074, |
|
"learning_rate": 1.3652302631578946e-06, |
|
"loss": 0.1823, |
|
"step": 14075 |
|
}, |
|
{ |
|
"epoch": 0.705, |
|
"grad_norm": 3.2891457080841064, |
|
"learning_rate": 1.3594736842105261e-06, |
|
"loss": 0.1777, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.70625, |
|
"grad_norm": 3.642838954925537, |
|
"learning_rate": 1.3537171052631576e-06, |
|
"loss": 0.177, |
|
"step": 14125 |
|
}, |
|
{ |
|
"epoch": 0.7075, |
|
"grad_norm": 4.022505760192871, |
|
"learning_rate": 1.3479605263157894e-06, |
|
"loss": 0.1773, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 0.70875, |
|
"grad_norm": 3.632260799407959, |
|
"learning_rate": 1.3422039473684209e-06, |
|
"loss": 0.138, |
|
"step": 14175 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.6560989618301392, |
|
"learning_rate": 1.3364473684210526e-06, |
|
"loss": 0.1163, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.71125, |
|
"grad_norm": 1.4849154949188232, |
|
"learning_rate": 1.3306907894736843e-06, |
|
"loss": 0.1001, |
|
"step": 14225 |
|
}, |
|
{ |
|
"epoch": 0.7125, |
|
"grad_norm": 2.3382551670074463, |
|
"learning_rate": 1.3249342105263158e-06, |
|
"loss": 0.0748, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 0.71375, |
|
"grad_norm": 3.0243709087371826, |
|
"learning_rate": 1.3191776315789473e-06, |
|
"loss": 0.0699, |
|
"step": 14275 |
|
}, |
|
{ |
|
"epoch": 0.715, |
|
"grad_norm": 3.4510324001312256, |
|
"learning_rate": 1.3134210526315788e-06, |
|
"loss": 0.0822, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.71625, |
|
"grad_norm": 1.71156907081604, |
|
"learning_rate": 1.3076644736842105e-06, |
|
"loss": 0.0817, |
|
"step": 14325 |
|
}, |
|
{ |
|
"epoch": 0.7175, |
|
"grad_norm": 1.4711543321609497, |
|
"learning_rate": 1.301907894736842e-06, |
|
"loss": 0.0573, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 0.71875, |
|
"grad_norm": 1.4108855724334717, |
|
"learning_rate": 1.2961513157894735e-06, |
|
"loss": 0.0518, |
|
"step": 14375 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.4882175922393799, |
|
"learning_rate": 1.290394736842105e-06, |
|
"loss": 0.0585, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.72125, |
|
"grad_norm": 1.6964808702468872, |
|
"learning_rate": 1.2846381578947368e-06, |
|
"loss": 0.0562, |
|
"step": 14425 |
|
}, |
|
{ |
|
"epoch": 0.7225, |
|
"grad_norm": 1.7226653099060059, |
|
"learning_rate": 1.2788815789473683e-06, |
|
"loss": 0.0574, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 0.72375, |
|
"grad_norm": 2.7214572429656982, |
|
"learning_rate": 1.2731249999999998e-06, |
|
"loss": 0.0629, |
|
"step": 14475 |
|
}, |
|
{ |
|
"epoch": 0.725, |
|
"grad_norm": 1.1752701997756958, |
|
"learning_rate": 1.2673684210526313e-06, |
|
"loss": 0.0534, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.725, |
|
"eval_loss": 0.13807399570941925, |
|
"eval_runtime": 533.693, |
|
"eval_samples_per_second": 3.168, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 7.04383664293743, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.72625, |
|
"grad_norm": 5.266875267028809, |
|
"learning_rate": 1.261611842105263e-06, |
|
"loss": 0.0553, |
|
"step": 14525 |
|
}, |
|
{ |
|
"epoch": 0.7275, |
|
"grad_norm": 2.1979897022247314, |
|
"learning_rate": 1.2558552631578947e-06, |
|
"loss": 0.0498, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 0.72875, |
|
"grad_norm": 1.445584774017334, |
|
"learning_rate": 1.2500986842105262e-06, |
|
"loss": 0.0432, |
|
"step": 14575 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.985780656337738, |
|
"learning_rate": 1.244342105263158e-06, |
|
"loss": 0.0398, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.73125, |
|
"grad_norm": 1.4595451354980469, |
|
"learning_rate": 1.2385855263157894e-06, |
|
"loss": 0.0472, |
|
"step": 14625 |
|
}, |
|
{ |
|
"epoch": 0.7325, |
|
"grad_norm": 1.6958725452423096, |
|
"learning_rate": 1.232828947368421e-06, |
|
"loss": 0.0451, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 0.73375, |
|
"grad_norm": 1.4922881126403809, |
|
"learning_rate": 1.2270723684210525e-06, |
|
"loss": 0.0483, |
|
"step": 14675 |
|
}, |
|
{ |
|
"epoch": 0.735, |
|
"grad_norm": 2.243989944458008, |
|
"learning_rate": 1.2213157894736842e-06, |
|
"loss": 0.0691, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.73625, |
|
"grad_norm": 3.160104513168335, |
|
"learning_rate": 1.2155592105263157e-06, |
|
"loss": 0.0814, |
|
"step": 14725 |
|
}, |
|
{ |
|
"epoch": 0.7375, |
|
"grad_norm": 2.0205318927764893, |
|
"learning_rate": 1.2098026315789472e-06, |
|
"loss": 0.0693, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 0.73875, |
|
"grad_norm": 1.519434928894043, |
|
"learning_rate": 1.2040460526315787e-06, |
|
"loss": 0.0589, |
|
"step": 14775 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 2.59538197517395, |
|
"learning_rate": 1.1982894736842104e-06, |
|
"loss": 0.0546, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.74125, |
|
"grad_norm": 2.137489080429077, |
|
"learning_rate": 1.192532894736842e-06, |
|
"loss": 0.0679, |
|
"step": 14825 |
|
}, |
|
{ |
|
"epoch": 0.7425, |
|
"grad_norm": 1.5184602737426758, |
|
"learning_rate": 1.1867763157894734e-06, |
|
"loss": 0.0685, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 0.74375, |
|
"grad_norm": 2.101884365081787, |
|
"learning_rate": 1.1810197368421054e-06, |
|
"loss": 0.0526, |
|
"step": 14875 |
|
}, |
|
{ |
|
"epoch": 0.745, |
|
"grad_norm": 1.778254508972168, |
|
"learning_rate": 1.1752631578947369e-06, |
|
"loss": 0.0463, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.74625, |
|
"grad_norm": 2.073361873626709, |
|
"learning_rate": 1.1695065789473684e-06, |
|
"loss": 0.0542, |
|
"step": 14925 |
|
}, |
|
{ |
|
"epoch": 0.7475, |
|
"grad_norm": 2.091325283050537, |
|
"learning_rate": 1.1637499999999999e-06, |
|
"loss": 0.0456, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 0.74875, |
|
"grad_norm": 1.7418571710586548, |
|
"learning_rate": 1.1579934210526316e-06, |
|
"loss": 0.0435, |
|
"step": 14975 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.8316125869750977, |
|
"learning_rate": 1.152236842105263e-06, |
|
"loss": 0.046, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 0.14957565069198608, |
|
"eval_runtime": 534.2678, |
|
"eval_samples_per_second": 3.165, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 7.081303859123267, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.75125, |
|
"grad_norm": 2.781534433364868, |
|
"learning_rate": 1.1467105263157894e-06, |
|
"loss": 0.0728, |
|
"step": 15025 |
|
}, |
|
{ |
|
"epoch": 0.7525, |
|
"grad_norm": 2.0675017833709717, |
|
"learning_rate": 1.1409539473684209e-06, |
|
"loss": 0.095, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 0.75375, |
|
"grad_norm": 3.430636167526245, |
|
"learning_rate": 1.1351973684210524e-06, |
|
"loss": 0.0966, |
|
"step": 15075 |
|
}, |
|
{ |
|
"epoch": 0.755, |
|
"grad_norm": 3.50378680229187, |
|
"learning_rate": 1.129440789473684e-06, |
|
"loss": 0.1087, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.75625, |
|
"grad_norm": 2.9562337398529053, |
|
"learning_rate": 1.1236842105263156e-06, |
|
"loss": 0.1098, |
|
"step": 15125 |
|
}, |
|
{ |
|
"epoch": 0.7575, |
|
"grad_norm": 2.7388198375701904, |
|
"learning_rate": 1.1179276315789471e-06, |
|
"loss": 0.1328, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 0.75875, |
|
"grad_norm": 3.3490402698516846, |
|
"learning_rate": 1.112171052631579e-06, |
|
"loss": 0.097, |
|
"step": 15175 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.3750718832015991, |
|
"learning_rate": 1.1064144736842105e-06, |
|
"loss": 0.0722, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.76125, |
|
"grad_norm": 1.7064391374588013, |
|
"learning_rate": 1.100657894736842e-06, |
|
"loss": 0.0588, |
|
"step": 15225 |
|
}, |
|
{ |
|
"epoch": 0.7625, |
|
"grad_norm": 1.8604276180267334, |
|
"learning_rate": 1.0949013157894736e-06, |
|
"loss": 0.0557, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 0.76375, |
|
"grad_norm": 1.2240312099456787, |
|
"learning_rate": 1.0891447368421053e-06, |
|
"loss": 0.0438, |
|
"step": 15275 |
|
}, |
|
{ |
|
"epoch": 0.765, |
|
"grad_norm": 1.5873894691467285, |
|
"learning_rate": 1.0833881578947368e-06, |
|
"loss": 0.0471, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.76625, |
|
"grad_norm": 1.645041823387146, |
|
"learning_rate": 1.0776315789473683e-06, |
|
"loss": 0.0586, |
|
"step": 15325 |
|
}, |
|
{ |
|
"epoch": 0.7675, |
|
"grad_norm": 2.3403167724609375, |
|
"learning_rate": 1.0718749999999998e-06, |
|
"loss": 0.0698, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 0.76875, |
|
"grad_norm": 2.5629897117614746, |
|
"learning_rate": 1.0661184210526315e-06, |
|
"loss": 0.068, |
|
"step": 15375 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.1160974502563477, |
|
"learning_rate": 1.060361842105263e-06, |
|
"loss": 0.0771, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.77125, |
|
"grad_norm": 2.094522714614868, |
|
"learning_rate": 1.0546052631578947e-06, |
|
"loss": 0.0882, |
|
"step": 15425 |
|
}, |
|
{ |
|
"epoch": 0.7725, |
|
"grad_norm": 2.3391168117523193, |
|
"learning_rate": 1.0488486842105262e-06, |
|
"loss": 0.0746, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 0.77375, |
|
"grad_norm": 2.208967924118042, |
|
"learning_rate": 1.0430921052631577e-06, |
|
"loss": 0.0725, |
|
"step": 15475 |
|
}, |
|
{ |
|
"epoch": 0.775, |
|
"grad_norm": 2.7758445739746094, |
|
"learning_rate": 1.0373355263157895e-06, |
|
"loss": 0.066, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.775, |
|
"eval_loss": 0.1524539738893509, |
|
"eval_runtime": 533.899, |
|
"eval_samples_per_second": 3.167, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 7.00012489072062, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.77625, |
|
"grad_norm": 1.5453675985336304, |
|
"learning_rate": 1.031578947368421e-06, |
|
"loss": 0.0511, |
|
"step": 15525 |
|
}, |
|
{ |
|
"epoch": 0.7775, |
|
"grad_norm": 2.0205094814300537, |
|
"learning_rate": 1.0258223684210525e-06, |
|
"loss": 0.05, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 0.77875, |
|
"grad_norm": 1.2804875373840332, |
|
"learning_rate": 1.020065789473684e-06, |
|
"loss": 0.0598, |
|
"step": 15575 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.22847843170166, |
|
"learning_rate": 1.0143092105263157e-06, |
|
"loss": 0.0686, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 2.523324489593506, |
|
"learning_rate": 1.0085526315789472e-06, |
|
"loss": 0.1251, |
|
"step": 15625 |
|
}, |
|
{ |
|
"epoch": 0.7825, |
|
"grad_norm": 1.8177152872085571, |
|
"learning_rate": 1.002796052631579e-06, |
|
"loss": 0.1014, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 0.78375, |
|
"grad_norm": 1.9223369359970093, |
|
"learning_rate": 9.970394736842104e-07, |
|
"loss": 0.0604, |
|
"step": 15675 |
|
}, |
|
{ |
|
"epoch": 0.785, |
|
"grad_norm": 1.9404890537261963, |
|
"learning_rate": 9.91282894736842e-07, |
|
"loss": 0.0556, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.78625, |
|
"grad_norm": 1.354697823524475, |
|
"learning_rate": 9.855263157894737e-07, |
|
"loss": 0.0452, |
|
"step": 15725 |
|
}, |
|
{ |
|
"epoch": 0.7875, |
|
"grad_norm": 0.9245623350143433, |
|
"learning_rate": 9.797697368421052e-07, |
|
"loss": 0.0536, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 0.78875, |
|
"grad_norm": 1.3286716938018799, |
|
"learning_rate": 9.740131578947369e-07, |
|
"loss": 0.0402, |
|
"step": 15775 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 2.337540626525879, |
|
"learning_rate": 9.682565789473684e-07, |
|
"loss": 0.0619, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.79125, |
|
"grad_norm": 1.3047797679901123, |
|
"learning_rate": 9.624999999999999e-07, |
|
"loss": 0.0582, |
|
"step": 15825 |
|
}, |
|
{ |
|
"epoch": 0.7925, |
|
"grad_norm": 1.5523693561553955, |
|
"learning_rate": 9.567434210526314e-07, |
|
"loss": 0.0461, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 0.79375, |
|
"grad_norm": 0.8749285340309143, |
|
"learning_rate": 9.50986842105263e-07, |
|
"loss": 0.0458, |
|
"step": 15875 |
|
}, |
|
{ |
|
"epoch": 0.795, |
|
"grad_norm": 1.0452526807785034, |
|
"learning_rate": 9.452302631578946e-07, |
|
"loss": 0.0419, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.79625, |
|
"grad_norm": 1.9379664659500122, |
|
"learning_rate": 9.394736842105263e-07, |
|
"loss": 0.0566, |
|
"step": 15925 |
|
}, |
|
{ |
|
"epoch": 0.7975, |
|
"grad_norm": 1.316031575202942, |
|
"learning_rate": 9.337171052631578e-07, |
|
"loss": 0.0473, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 0.79875, |
|
"grad_norm": 1.216234564781189, |
|
"learning_rate": 9.279605263157895e-07, |
|
"loss": 0.0567, |
|
"step": 15975 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.7266921997070312, |
|
"learning_rate": 9.22203947368421e-07, |
|
"loss": 0.0632, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.14084434509277344, |
|
"eval_runtime": 535.4097, |
|
"eval_samples_per_second": 3.158, |
|
"eval_steps_per_second": 0.396, |
|
"eval_wer": 6.681653553141001, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.80125, |
|
"grad_norm": 1.8532096147537231, |
|
"learning_rate": 9.164473684210526e-07, |
|
"loss": 0.0579, |
|
"step": 16025 |
|
}, |
|
{ |
|
"epoch": 0.8025, |
|
"grad_norm": 2.181915044784546, |
|
"learning_rate": 9.106907894736841e-07, |
|
"loss": 0.0757, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 0.80375, |
|
"grad_norm": 2.2596707344055176, |
|
"learning_rate": 9.049342105263157e-07, |
|
"loss": 0.0729, |
|
"step": 16075 |
|
}, |
|
{ |
|
"epoch": 0.805, |
|
"grad_norm": 1.2219024896621704, |
|
"learning_rate": 8.991776315789473e-07, |
|
"loss": 0.0666, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.80625, |
|
"grad_norm": 1.135261058807373, |
|
"learning_rate": 8.934210526315789e-07, |
|
"loss": 0.0627, |
|
"step": 16125 |
|
}, |
|
{ |
|
"epoch": 0.8075, |
|
"grad_norm": 1.6599974632263184, |
|
"learning_rate": 8.876644736842104e-07, |
|
"loss": 0.0477, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 0.80875, |
|
"grad_norm": 1.7189278602600098, |
|
"learning_rate": 8.81907894736842e-07, |
|
"loss": 0.049, |
|
"step": 16175 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.837539553642273, |
|
"learning_rate": 8.761513157894735e-07, |
|
"loss": 0.0489, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.81125, |
|
"grad_norm": 1.5122978687286377, |
|
"learning_rate": 8.703947368421051e-07, |
|
"loss": 0.0389, |
|
"step": 16225 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"grad_norm": 1.7276921272277832, |
|
"learning_rate": 8.646381578947368e-07, |
|
"loss": 0.0449, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 0.81375, |
|
"grad_norm": 2.028928756713867, |
|
"learning_rate": 8.588815789473684e-07, |
|
"loss": 0.045, |
|
"step": 16275 |
|
}, |
|
{ |
|
"epoch": 0.815, |
|
"grad_norm": 1.258401870727539, |
|
"learning_rate": 8.53125e-07, |
|
"loss": 0.0413, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.81625, |
|
"grad_norm": 1.2878379821777344, |
|
"learning_rate": 8.473684210526315e-07, |
|
"loss": 0.0454, |
|
"step": 16325 |
|
}, |
|
{ |
|
"epoch": 0.8175, |
|
"grad_norm": 0.9309024810791016, |
|
"learning_rate": 8.416118421052631e-07, |
|
"loss": 0.0389, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 0.81875, |
|
"grad_norm": 0.6321396231651306, |
|
"learning_rate": 8.358552631578946e-07, |
|
"loss": 0.0282, |
|
"step": 16375 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.8799151182174683, |
|
"learning_rate": 8.300986842105262e-07, |
|
"loss": 0.0316, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.82125, |
|
"grad_norm": 0.642666220664978, |
|
"learning_rate": 8.243421052631577e-07, |
|
"loss": 0.0415, |
|
"step": 16425 |
|
}, |
|
{ |
|
"epoch": 0.8225, |
|
"grad_norm": 1.199803352355957, |
|
"learning_rate": 8.185855263157894e-07, |
|
"loss": 0.0503, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 0.82375, |
|
"grad_norm": 1.517521858215332, |
|
"learning_rate": 8.128289473684211e-07, |
|
"loss": 0.0342, |
|
"step": 16475 |
|
}, |
|
{ |
|
"epoch": 0.825, |
|
"grad_norm": 1.683922290802002, |
|
"learning_rate": 8.070723684210526e-07, |
|
"loss": 0.0437, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.825, |
|
"eval_loss": 0.1474502831697464, |
|
"eval_runtime": 533.549, |
|
"eval_samples_per_second": 3.169, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 6.594230048707381, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.82625, |
|
"grad_norm": 2.0518248081207275, |
|
"learning_rate": 8.013157894736842e-07, |
|
"loss": 0.0411, |
|
"step": 16525 |
|
}, |
|
{ |
|
"epoch": 0.8275, |
|
"grad_norm": 1.139129638671875, |
|
"learning_rate": 7.955592105263157e-07, |
|
"loss": 0.0426, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 0.82875, |
|
"grad_norm": 0.7436901926994324, |
|
"learning_rate": 7.898026315789473e-07, |
|
"loss": 0.0413, |
|
"step": 16575 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.8292795419692993, |
|
"learning_rate": 7.840460526315789e-07, |
|
"loss": 0.0456, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.83125, |
|
"grad_norm": 2.60646390914917, |
|
"learning_rate": 7.782894736842105e-07, |
|
"loss": 0.0459, |
|
"step": 16625 |
|
}, |
|
{ |
|
"epoch": 0.8325, |
|
"grad_norm": 2.15118408203125, |
|
"learning_rate": 7.72532894736842e-07, |
|
"loss": 0.0653, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 0.83375, |
|
"grad_norm": 1.8501421213150024, |
|
"learning_rate": 7.667763157894736e-07, |
|
"loss": 0.0702, |
|
"step": 16675 |
|
}, |
|
{ |
|
"epoch": 0.835, |
|
"grad_norm": 1.579913854598999, |
|
"learning_rate": 7.610197368421051e-07, |
|
"loss": 0.0693, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.83625, |
|
"grad_norm": 2.4913477897644043, |
|
"learning_rate": 7.552631578947367e-07, |
|
"loss": 0.0874, |
|
"step": 16725 |
|
}, |
|
{ |
|
"epoch": 0.8375, |
|
"grad_norm": 2.489863634109497, |
|
"learning_rate": 7.495065789473683e-07, |
|
"loss": 0.0642, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 0.83875, |
|
"grad_norm": 4.630337715148926, |
|
"learning_rate": 7.4375e-07, |
|
"loss": 0.0728, |
|
"step": 16775 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.708297848701477, |
|
"learning_rate": 7.379934210526316e-07, |
|
"loss": 0.056, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.84125, |
|
"grad_norm": 1.7515946626663208, |
|
"learning_rate": 7.322368421052631e-07, |
|
"loss": 0.0477, |
|
"step": 16825 |
|
}, |
|
{ |
|
"epoch": 0.8425, |
|
"grad_norm": 1.6641236543655396, |
|
"learning_rate": 7.264802631578947e-07, |
|
"loss": 0.0508, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 0.84375, |
|
"grad_norm": 1.693472146987915, |
|
"learning_rate": 7.207236842105262e-07, |
|
"loss": 0.0457, |
|
"step": 16875 |
|
}, |
|
{ |
|
"epoch": 0.845, |
|
"grad_norm": 0.845664381980896, |
|
"learning_rate": 7.149671052631578e-07, |
|
"loss": 0.0415, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.84625, |
|
"grad_norm": 1.8824065923690796, |
|
"learning_rate": 7.092105263157893e-07, |
|
"loss": 0.0481, |
|
"step": 16925 |
|
}, |
|
{ |
|
"epoch": 0.8475, |
|
"grad_norm": 1.9034583568572998, |
|
"learning_rate": 7.03453947368421e-07, |
|
"loss": 0.0496, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 0.84875, |
|
"grad_norm": 2.6840953826904297, |
|
"learning_rate": 6.976973684210525e-07, |
|
"loss": 0.0574, |
|
"step": 16975 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.8385533094406128, |
|
"learning_rate": 6.919407894736842e-07, |
|
"loss": 0.0478, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 0.15727710723876953, |
|
"eval_runtime": 534.9573, |
|
"eval_samples_per_second": 3.161, |
|
"eval_steps_per_second": 0.396, |
|
"eval_wer": 6.794055201698514, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.85125, |
|
"grad_norm": 1.590932011604309, |
|
"learning_rate": 6.864144736842104e-07, |
|
"loss": 0.0589, |
|
"step": 17025 |
|
}, |
|
{ |
|
"epoch": 0.8525, |
|
"grad_norm": 1.005034327507019, |
|
"learning_rate": 6.806578947368419e-07, |
|
"loss": 0.0554, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 0.85375, |
|
"grad_norm": 3.3872015476226807, |
|
"learning_rate": 6.749013157894737e-07, |
|
"loss": 0.0572, |
|
"step": 17075 |
|
}, |
|
{ |
|
"epoch": 0.855, |
|
"grad_norm": 3.8093373775482178, |
|
"learning_rate": 6.691447368421053e-07, |
|
"loss": 0.1078, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.85625, |
|
"grad_norm": 2.587963581085205, |
|
"learning_rate": 6.633881578947368e-07, |
|
"loss": 0.1426, |
|
"step": 17125 |
|
}, |
|
{ |
|
"epoch": 0.8575, |
|
"grad_norm": 3.9271957874298096, |
|
"learning_rate": 6.576315789473684e-07, |
|
"loss": 0.1496, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 0.85875, |
|
"grad_norm": 3.7258965969085693, |
|
"learning_rate": 6.518749999999999e-07, |
|
"loss": 0.1852, |
|
"step": 17175 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 4.298374652862549, |
|
"learning_rate": 6.461184210526315e-07, |
|
"loss": 0.2419, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.86125, |
|
"grad_norm": 6.419559478759766, |
|
"learning_rate": 6.403618421052631e-07, |
|
"loss": 0.225, |
|
"step": 17225 |
|
}, |
|
{ |
|
"epoch": 0.8625, |
|
"grad_norm": 4.669430732727051, |
|
"learning_rate": 6.346052631578947e-07, |
|
"loss": 0.297, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 0.86375, |
|
"grad_norm": 4.676415920257568, |
|
"learning_rate": 6.288486842105262e-07, |
|
"loss": 0.2001, |
|
"step": 17275 |
|
}, |
|
{ |
|
"epoch": 0.865, |
|
"grad_norm": 1.519974708557129, |
|
"learning_rate": 6.230921052631579e-07, |
|
"loss": 0.1029, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.86625, |
|
"grad_norm": 2.9553279876708984, |
|
"learning_rate": 6.173355263157894e-07, |
|
"loss": 0.0917, |
|
"step": 17325 |
|
}, |
|
{ |
|
"epoch": 0.8675, |
|
"grad_norm": 1.5657232999801636, |
|
"learning_rate": 6.11578947368421e-07, |
|
"loss": 0.088, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 0.86875, |
|
"grad_norm": 3.1620709896087646, |
|
"learning_rate": 6.058223684210525e-07, |
|
"loss": 0.1046, |
|
"step": 17375 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 3.469240188598633, |
|
"learning_rate": 6.000657894736842e-07, |
|
"loss": 0.1004, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.87125, |
|
"grad_norm": 1.9016904830932617, |
|
"learning_rate": 5.943092105263158e-07, |
|
"loss": 0.0875, |
|
"step": 17425 |
|
}, |
|
{ |
|
"epoch": 0.8725, |
|
"grad_norm": 3.1401467323303223, |
|
"learning_rate": 5.885526315789473e-07, |
|
"loss": 0.0593, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 0.87375, |
|
"grad_norm": 1.1564242839813232, |
|
"learning_rate": 5.827960526315789e-07, |
|
"loss": 0.0444, |
|
"step": 17475 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 0.9873404502868652, |
|
"learning_rate": 5.770394736842104e-07, |
|
"loss": 0.0418, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"eval_loss": 0.156468465924263, |
|
"eval_runtime": 534.8937, |
|
"eval_samples_per_second": 3.161, |
|
"eval_steps_per_second": 0.396, |
|
"eval_wer": 6.650430872986138, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.87625, |
|
"grad_norm": 1.499561071395874, |
|
"learning_rate": 5.71282894736842e-07, |
|
"loss": 0.0423, |
|
"step": 17525 |
|
}, |
|
{ |
|
"epoch": 0.8775, |
|
"grad_norm": 1.0905530452728271, |
|
"learning_rate": 5.655263157894735e-07, |
|
"loss": 0.0496, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 0.87875, |
|
"grad_norm": 1.6048545837402344, |
|
"learning_rate": 5.597697368421053e-07, |
|
"loss": 0.0437, |
|
"step": 17575 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.5219619274139404, |
|
"learning_rate": 5.540131578947369e-07, |
|
"loss": 0.0676, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.88125, |
|
"grad_norm": 1.8919825553894043, |
|
"learning_rate": 5.482565789473684e-07, |
|
"loss": 0.0647, |
|
"step": 17625 |
|
}, |
|
{ |
|
"epoch": 0.8825, |
|
"grad_norm": 2.4546618461608887, |
|
"learning_rate": 5.425e-07, |
|
"loss": 0.0625, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 0.88375, |
|
"grad_norm": 1.7209670543670654, |
|
"learning_rate": 5.367434210526315e-07, |
|
"loss": 0.0661, |
|
"step": 17675 |
|
}, |
|
{ |
|
"epoch": 0.885, |
|
"grad_norm": 2.5535149574279785, |
|
"learning_rate": 5.309868421052631e-07, |
|
"loss": 0.0691, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.88625, |
|
"grad_norm": 3.5450563430786133, |
|
"learning_rate": 5.252302631578947e-07, |
|
"loss": 0.0603, |
|
"step": 17725 |
|
}, |
|
{ |
|
"epoch": 0.8875, |
|
"grad_norm": 1.4123398065567017, |
|
"learning_rate": 5.194736842105262e-07, |
|
"loss": 0.0666, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 0.88875, |
|
"grad_norm": 1.427933931350708, |
|
"learning_rate": 5.137171052631578e-07, |
|
"loss": 0.0428, |
|
"step": 17775 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.3647822141647339, |
|
"learning_rate": 5.079605263157895e-07, |
|
"loss": 0.0382, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.89125, |
|
"grad_norm": 1.1601825952529907, |
|
"learning_rate": 5.02203947368421e-07, |
|
"loss": 0.0485, |
|
"step": 17825 |
|
}, |
|
{ |
|
"epoch": 0.8925, |
|
"grad_norm": 1.2409619092941284, |
|
"learning_rate": 4.964473684210526e-07, |
|
"loss": 0.0439, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 0.89375, |
|
"grad_norm": 2.1224701404571533, |
|
"learning_rate": 4.906907894736842e-07, |
|
"loss": 0.0463, |
|
"step": 17875 |
|
}, |
|
{ |
|
"epoch": 0.895, |
|
"grad_norm": 1.7053598165512085, |
|
"learning_rate": 4.849342105263158e-07, |
|
"loss": 0.0419, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.89625, |
|
"grad_norm": 1.2734942436218262, |
|
"learning_rate": 4.791776315789473e-07, |
|
"loss": 0.0385, |
|
"step": 17925 |
|
}, |
|
{ |
|
"epoch": 0.8975, |
|
"grad_norm": 1.448438048362732, |
|
"learning_rate": 4.734210526315789e-07, |
|
"loss": 0.0397, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 0.89875, |
|
"grad_norm": 1.1724251508712769, |
|
"learning_rate": 4.6766447368421047e-07, |
|
"loss": 0.0454, |
|
"step": 17975 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.4745044708251953, |
|
"learning_rate": 4.6190789473684203e-07, |
|
"loss": 0.0382, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 0.15590737760066986, |
|
"eval_runtime": 533.3825, |
|
"eval_samples_per_second": 3.17, |
|
"eval_steps_per_second": 0.397, |
|
"eval_wer": 6.563007368552516, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.90125, |
|
"grad_norm": 1.3046791553497314, |
|
"learning_rate": 4.5615131578947364e-07, |
|
"loss": 0.0497, |
|
"step": 18025 |
|
}, |
|
{ |
|
"epoch": 0.9025, |
|
"grad_norm": 2.052855968475342, |
|
"learning_rate": 4.5039473684210525e-07, |
|
"loss": 0.0542, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 0.90375, |
|
"grad_norm": 1.6691333055496216, |
|
"learning_rate": 4.446381578947368e-07, |
|
"loss": 0.0503, |
|
"step": 18075 |
|
}, |
|
{ |
|
"epoch": 0.905, |
|
"grad_norm": 3.4304769039154053, |
|
"learning_rate": 4.3888157894736837e-07, |
|
"loss": 0.0572, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.90625, |
|
"grad_norm": 1.5289900302886963, |
|
"learning_rate": 4.33125e-07, |
|
"loss": 0.07, |
|
"step": 18125 |
|
}, |
|
{ |
|
"epoch": 0.9075, |
|
"grad_norm": 2.5705385208129883, |
|
"learning_rate": 4.2736842105263154e-07, |
|
"loss": 0.0778, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 0.90875, |
|
"grad_norm": 2.34914493560791, |
|
"learning_rate": 4.216118421052631e-07, |
|
"loss": 0.0739, |
|
"step": 18175 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.6740806102752686, |
|
"learning_rate": 4.158552631578947e-07, |
|
"loss": 0.0621, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.91125, |
|
"grad_norm": 0.931742787361145, |
|
"learning_rate": 4.1009868421052627e-07, |
|
"loss": 0.0636, |
|
"step": 18225 |
|
}, |
|
{ |
|
"epoch": 0.9125, |
|
"grad_norm": 1.7513364553451538, |
|
"learning_rate": 4.0434210526315783e-07, |
|
"loss": 0.0526, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 0.91375, |
|
"grad_norm": 1.3136606216430664, |
|
"learning_rate": 3.985855263157894e-07, |
|
"loss": 0.0469, |
|
"step": 18275 |
|
}, |
|
{ |
|
"epoch": 0.915, |
|
"grad_norm": 1.2674484252929688, |
|
"learning_rate": 3.9282894736842105e-07, |
|
"loss": 0.0569, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.91625, |
|
"grad_norm": 2.0879714488983154, |
|
"learning_rate": 3.870723684210526e-07, |
|
"loss": 0.056, |
|
"step": 18325 |
|
}, |
|
{ |
|
"epoch": 0.9175, |
|
"grad_norm": 1.6177654266357422, |
|
"learning_rate": 3.8131578947368417e-07, |
|
"loss": 0.0496, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 0.91875, |
|
"grad_norm": 1.267562985420227, |
|
"learning_rate": 3.755592105263158e-07, |
|
"loss": 0.0474, |
|
"step": 18375 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.9628887176513672, |
|
"learning_rate": 3.6980263157894734e-07, |
|
"loss": 0.0523, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.92125, |
|
"grad_norm": 2.450678586959839, |
|
"learning_rate": 3.640460526315789e-07, |
|
"loss": 0.0571, |
|
"step": 18425 |
|
}, |
|
{ |
|
"epoch": 0.9225, |
|
"grad_norm": 3.2376692295074463, |
|
"learning_rate": 3.5828947368421046e-07, |
|
"loss": 0.0726, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 0.92375, |
|
"grad_norm": 1.718723177909851, |
|
"learning_rate": 3.5253289473684207e-07, |
|
"loss": 0.0759, |
|
"step": 18475 |
|
}, |
|
{ |
|
"epoch": 0.925, |
|
"grad_norm": 1.7278677225112915, |
|
"learning_rate": 3.4677631578947363e-07, |
|
"loss": 0.0658, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.925, |
|
"eval_loss": 0.145228311419487, |
|
"eval_runtime": 537.5559, |
|
"eval_samples_per_second": 3.146, |
|
"eval_steps_per_second": 0.394, |
|
"eval_wer": 6.563007368552516, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.92625, |
|
"grad_norm": 2.314218044281006, |
|
"learning_rate": 3.410197368421052e-07, |
|
"loss": 0.083, |
|
"step": 18525 |
|
}, |
|
{ |
|
"epoch": 0.9275, |
|
"grad_norm": 2.6032817363739014, |
|
"learning_rate": 3.3526315789473685e-07, |
|
"loss": 0.0796, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 0.92875, |
|
"grad_norm": 1.2821646928787231, |
|
"learning_rate": 3.295065789473684e-07, |
|
"loss": 0.0475, |
|
"step": 18575 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.2048566341400146, |
|
"learning_rate": 3.2374999999999997e-07, |
|
"loss": 0.0441, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.93125, |
|
"grad_norm": 2.205629348754883, |
|
"learning_rate": 3.179934210526316e-07, |
|
"loss": 0.0529, |
|
"step": 18625 |
|
}, |
|
{ |
|
"epoch": 0.9325, |
|
"grad_norm": 0.948354959487915, |
|
"learning_rate": 3.1223684210526314e-07, |
|
"loss": 0.0491, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 0.93375, |
|
"grad_norm": 0.8600139617919922, |
|
"learning_rate": 3.064802631578947e-07, |
|
"loss": 0.0397, |
|
"step": 18675 |
|
}, |
|
{ |
|
"epoch": 0.935, |
|
"grad_norm": 1.5570470094680786, |
|
"learning_rate": 3.0072368421052626e-07, |
|
"loss": 0.0479, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.93625, |
|
"grad_norm": 1.6737167835235596, |
|
"learning_rate": 2.9496710526315787e-07, |
|
"loss": 0.053, |
|
"step": 18725 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 1.7217572927474976, |
|
"learning_rate": 2.8921052631578943e-07, |
|
"loss": 0.0551, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 0.93875, |
|
"grad_norm": 2.207542896270752, |
|
"learning_rate": 2.83453947368421e-07, |
|
"loss": 0.0618, |
|
"step": 18775 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.6761177778244019, |
|
"learning_rate": 2.7769736842105265e-07, |
|
"loss": 0.0652, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.94125, |
|
"grad_norm": 2.9946813583374023, |
|
"learning_rate": 2.719407894736842e-07, |
|
"loss": 0.0738, |
|
"step": 18825 |
|
}, |
|
{ |
|
"epoch": 0.9425, |
|
"grad_norm": 2.3663125038146973, |
|
"learning_rate": 2.6618421052631577e-07, |
|
"loss": 0.0629, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 0.94375, |
|
"grad_norm": 2.5888278484344482, |
|
"learning_rate": 2.6042763157894733e-07, |
|
"loss": 0.2007, |
|
"step": 18875 |
|
}, |
|
{ |
|
"epoch": 0.945, |
|
"grad_norm": 1.5639821290969849, |
|
"learning_rate": 2.5467105263157894e-07, |
|
"loss": 0.2028, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.94625, |
|
"grad_norm": 1.5892317295074463, |
|
"learning_rate": 2.489144736842105e-07, |
|
"loss": 0.1062, |
|
"step": 18925 |
|
}, |
|
{ |
|
"epoch": 0.9475, |
|
"grad_norm": 2.5038766860961914, |
|
"learning_rate": 2.431578947368421e-07, |
|
"loss": 0.06, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 0.94875, |
|
"grad_norm": 2.531886577606201, |
|
"learning_rate": 2.3740131578947364e-07, |
|
"loss": 0.0553, |
|
"step": 18975 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.5510354042053223, |
|
"learning_rate": 2.3164473684210526e-07, |
|
"loss": 0.0531, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 0.15760228037834167, |
|
"eval_runtime": 534.6809, |
|
"eval_samples_per_second": 3.163, |
|
"eval_steps_per_second": 0.396, |
|
"eval_wer": 6.662919945048083, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.95125, |
|
"grad_norm": 3.058936357498169, |
|
"learning_rate": 2.2588815789473684e-07, |
|
"loss": 0.061, |
|
"step": 19025 |
|
}, |
|
{ |
|
"epoch": 0.9525, |
|
"grad_norm": 4.343925476074219, |
|
"learning_rate": 2.201315789473684e-07, |
|
"loss": 0.0775, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 0.95375, |
|
"grad_norm": 3.271355628967285, |
|
"learning_rate": 2.1437499999999999e-07, |
|
"loss": 0.0762, |
|
"step": 19075 |
|
}, |
|
{ |
|
"epoch": 0.955, |
|
"grad_norm": 1.7924737930297852, |
|
"learning_rate": 2.0861842105263154e-07, |
|
"loss": 0.0733, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.95625, |
|
"grad_norm": 2.034940719604492, |
|
"learning_rate": 2.0286184210526313e-07, |
|
"loss": 0.0747, |
|
"step": 19125 |
|
}, |
|
{ |
|
"epoch": 0.9575, |
|
"grad_norm": 3.0561563968658447, |
|
"learning_rate": 1.9733552631578946e-07, |
|
"loss": 0.0841, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 0.95875, |
|
"grad_norm": 1.5333133935928345, |
|
"learning_rate": 1.9157894736842102e-07, |
|
"loss": 0.0656, |
|
"step": 19175 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.5307198762893677, |
|
"learning_rate": 1.858223684210526e-07, |
|
"loss": 0.0532, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.96125, |
|
"grad_norm": 1.5663795471191406, |
|
"learning_rate": 1.8006578947368422e-07, |
|
"loss": 0.0485, |
|
"step": 19225 |
|
}, |
|
{ |
|
"epoch": 0.9625, |
|
"grad_norm": 1.8204154968261719, |
|
"learning_rate": 1.7430921052631578e-07, |
|
"loss": 0.0506, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 0.96375, |
|
"grad_norm": 0.6307218074798584, |
|
"learning_rate": 1.6855263157894736e-07, |
|
"loss": 0.0412, |
|
"step": 19275 |
|
}, |
|
{ |
|
"epoch": 0.965, |
|
"grad_norm": 1.2638368606567383, |
|
"learning_rate": 1.6279605263157892e-07, |
|
"loss": 0.0367, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.96625, |
|
"grad_norm": 1.448020100593567, |
|
"learning_rate": 1.570394736842105e-07, |
|
"loss": 0.0461, |
|
"step": 19325 |
|
}, |
|
{ |
|
"epoch": 0.9675, |
|
"grad_norm": 1.148501992225647, |
|
"learning_rate": 1.5128289473684207e-07, |
|
"loss": 0.0321, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 0.96875, |
|
"grad_norm": 1.24919593334198, |
|
"learning_rate": 1.4552631578947368e-07, |
|
"loss": 0.0442, |
|
"step": 19375 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.437836766242981, |
|
"learning_rate": 1.3976973684210526e-07, |
|
"loss": 0.0418, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.97125, |
|
"grad_norm": 1.7176451683044434, |
|
"learning_rate": 1.3401315789473682e-07, |
|
"loss": 0.0423, |
|
"step": 19425 |
|
}, |
|
{ |
|
"epoch": 0.9725, |
|
"grad_norm": 0.6188969016075134, |
|
"learning_rate": 1.282565789473684e-07, |
|
"loss": 0.0372, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 0.97375, |
|
"grad_norm": 0.7245228886604309, |
|
"learning_rate": 1.225e-07, |
|
"loss": 0.0447, |
|
"step": 19475 |
|
}, |
|
{ |
|
"epoch": 0.975, |
|
"grad_norm": 1.1836830377578735, |
|
"learning_rate": 1.1674342105263156e-07, |
|
"loss": 0.0416, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.975, |
|
"eval_loss": 0.1550171822309494, |
|
"eval_runtime": 533.135, |
|
"eval_samples_per_second": 3.172, |
|
"eval_steps_per_second": 0.398, |
|
"eval_wer": 6.544273760459599, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.97625, |
|
"grad_norm": 2.7617335319519043, |
|
"learning_rate": 1.1098684210526315e-07, |
|
"loss": 0.0481, |
|
"step": 19525 |
|
}, |
|
{ |
|
"epoch": 0.9775, |
|
"grad_norm": 1.8646786212921143, |
|
"learning_rate": 1.0523026315789472e-07, |
|
"loss": 0.0479, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 0.97875, |
|
"grad_norm": 3.3118820190429688, |
|
"learning_rate": 9.947368421052632e-08, |
|
"loss": 0.0622, |
|
"step": 19575 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.3400448560714722, |
|
"learning_rate": 9.371710526315789e-08, |
|
"loss": 0.0727, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.98125, |
|
"grad_norm": 3.044895648956299, |
|
"learning_rate": 8.796052631578946e-08, |
|
"loss": 0.0613, |
|
"step": 19625 |
|
}, |
|
{ |
|
"epoch": 0.9825, |
|
"grad_norm": 3.217283248901367, |
|
"learning_rate": 8.220394736842105e-08, |
|
"loss": 0.0814, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 0.98375, |
|
"grad_norm": 1.3824083805084229, |
|
"learning_rate": 7.644736842105262e-08, |
|
"loss": 0.0598, |
|
"step": 19675 |
|
}, |
|
{ |
|
"epoch": 0.985, |
|
"grad_norm": 1.3852965831756592, |
|
"learning_rate": 7.069078947368419e-08, |
|
"loss": 0.0504, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.98625, |
|
"grad_norm": 2.5811800956726074, |
|
"learning_rate": 6.493421052631578e-08, |
|
"loss": 0.0643, |
|
"step": 19725 |
|
}, |
|
{ |
|
"epoch": 0.9875, |
|
"grad_norm": 1.2770925760269165, |
|
"learning_rate": 5.9177631578947364e-08, |
|
"loss": 0.0586, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 0.98875, |
|
"grad_norm": 2.8050851821899414, |
|
"learning_rate": 5.342105263157894e-08, |
|
"loss": 0.0623, |
|
"step": 19775 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 3.1270270347595215, |
|
"learning_rate": 4.766447368421052e-08, |
|
"loss": 0.1128, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.99125, |
|
"grad_norm": 2.787506103515625, |
|
"learning_rate": 4.1907894736842107e-08, |
|
"loss": 0.0945, |
|
"step": 19825 |
|
}, |
|
{ |
|
"epoch": 0.9925, |
|
"grad_norm": 2.0053322315216064, |
|
"learning_rate": 3.615131578947368e-08, |
|
"loss": 0.0676, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 0.99375, |
|
"grad_norm": 2.319840669631958, |
|
"learning_rate": 3.0394736842105264e-08, |
|
"loss": 0.0434, |
|
"step": 19875 |
|
}, |
|
{ |
|
"epoch": 0.995, |
|
"grad_norm": 1.3756728172302246, |
|
"learning_rate": 2.463815789473684e-08, |
|
"loss": 0.0458, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.99625, |
|
"grad_norm": 1.3499048948287964, |
|
"learning_rate": 1.8881578947368418e-08, |
|
"loss": 0.0426, |
|
"step": 19925 |
|
}, |
|
{ |
|
"epoch": 0.9975, |
|
"grad_norm": 1.0059881210327148, |
|
"learning_rate": 1.3124999999999998e-08, |
|
"loss": 0.0543, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 0.99875, |
|
"grad_norm": 1.247534155845642, |
|
"learning_rate": 7.368421052631579e-09, |
|
"loss": 0.0526, |
|
"step": 19975 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.3380235433578491, |
|
"learning_rate": 1.6118421052631579e-09, |
|
"loss": 0.0435, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.15491345524787903, |
|
"eval_runtime": 532.2586, |
|
"eval_samples_per_second": 3.177, |
|
"eval_steps_per_second": 0.398, |
|
"eval_wer": 6.544273760459599, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 20000, |
|
"total_flos": 1.0871994580992e+21, |
|
"train_loss": 0.0028733723163604737, |
|
"train_runtime": 7510.0544, |
|
"train_samples_per_second": 42.61, |
|
"train_steps_per_second": 2.663 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 20000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0871994580992e+21, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|