{ "best_metric": 0.1262135922330097, "best_model_checkpoint": "results3\\checkpoint-124000", "epoch": 2.002992, "eval_steps": 4000, "global_step": 124000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008, "grad_norm": 12.33169937133789, "learning_rate": 1.8800000000000002e-06, "loss": 1.7644, "step": 100 }, { "epoch": 0.0016, "grad_norm": 9.583354949951172, "learning_rate": 3.88e-06, "loss": 0.929, "step": 200 }, { "epoch": 0.0024, "grad_norm": 12.06747817993164, "learning_rate": 5.8800000000000005e-06, "loss": 0.4652, "step": 300 }, { "epoch": 0.0032, "grad_norm": 9.123275756835938, "learning_rate": 7.88e-06, "loss": 0.3351, "step": 400 }, { "epoch": 0.004, "grad_norm": 15.981216430664062, "learning_rate": 9.88e-06, "loss": 0.4079, "step": 500 }, { "epoch": 0.0048, "grad_norm": 20.674148559570312, "learning_rate": 9.992449799196789e-06, "loss": 0.3966, "step": 600 }, { "epoch": 0.0056, "grad_norm": 11.142045974731445, "learning_rate": 9.984417670682733e-06, "loss": 0.3628, "step": 700 }, { "epoch": 0.0064, "grad_norm": 6.716693878173828, "learning_rate": 9.976385542168675e-06, "loss": 0.3647, "step": 800 }, { "epoch": 0.0072, "grad_norm": 15.127047538757324, "learning_rate": 9.968353413654619e-06, "loss": 0.4007, "step": 900 }, { "epoch": 0.008, "grad_norm": 5.561180114746094, "learning_rate": 9.960321285140563e-06, "loss": 0.3748, "step": 1000 }, { "epoch": 0.0088, "grad_norm": 11.087468147277832, "learning_rate": 9.952289156626507e-06, "loss": 0.3802, "step": 1100 }, { "epoch": 0.0096, "grad_norm": 13.053077697753906, "learning_rate": 9.94425702811245e-06, "loss": 0.3555, "step": 1200 }, { "epoch": 0.0104, "grad_norm": 8.72202205657959, "learning_rate": 9.936224899598395e-06, "loss": 0.3532, "step": 1300 }, { "epoch": 0.0112, "grad_norm": 13.878268241882324, "learning_rate": 9.928192771084338e-06, "loss": 0.3571, "step": 1400 }, { "epoch": 0.012, "grad_norm": 7.951385498046875, "learning_rate": 9.920160642570282e-06, "loss": 0.3683, "step": 1500 }, { "epoch": 0.0128, "grad_norm": 7.835740566253662, "learning_rate": 9.912128514056226e-06, "loss": 0.3409, "step": 1600 }, { "epoch": 0.0136, "grad_norm": 10.594538688659668, "learning_rate": 9.904096385542169e-06, "loss": 0.3455, "step": 1700 }, { "epoch": 0.0144, "grad_norm": 8.736641883850098, "learning_rate": 9.896064257028112e-06, "loss": 0.3567, "step": 1800 }, { "epoch": 0.0152, "grad_norm": 6.575601100921631, "learning_rate": 9.888032128514056e-06, "loss": 0.3606, "step": 1900 }, { "epoch": 0.016, "grad_norm": 11.653617858886719, "learning_rate": 9.88e-06, "loss": 0.3683, "step": 2000 }, { "epoch": 0.0168, "grad_norm": 12.467767715454102, "learning_rate": 9.871967871485944e-06, "loss": 0.3723, "step": 2100 }, { "epoch": 0.0176, "grad_norm": 11.205801963806152, "learning_rate": 9.863935742971888e-06, "loss": 0.3561, "step": 2200 }, { "epoch": 0.0184, "grad_norm": 10.407919883728027, "learning_rate": 9.855903614457832e-06, "loss": 0.3414, "step": 2300 }, { "epoch": 0.0192, "grad_norm": 6.497184753417969, "learning_rate": 9.847871485943776e-06, "loss": 0.3439, "step": 2400 }, { "epoch": 0.02, "grad_norm": 10.192763328552246, "learning_rate": 9.83983935742972e-06, "loss": 0.3334, "step": 2500 }, { "epoch": 0.0208, "grad_norm": 10.233382225036621, "learning_rate": 9.831807228915664e-06, "loss": 0.3715, "step": 2600 }, { "epoch": 0.0216, "grad_norm": 10.373730659484863, "learning_rate": 9.823775100401608e-06, "loss": 0.3557, "step": 2700 }, { "epoch": 0.0224, "grad_norm": 9.90769100189209, "learning_rate": 9.81574297188755e-06, "loss": 0.345, "step": 2800 }, { "epoch": 0.0232, "grad_norm": 12.89505386352539, "learning_rate": 9.807710843373494e-06, "loss": 0.3466, "step": 2900 }, { "epoch": 0.024, "grad_norm": 11.755387306213379, "learning_rate": 9.799678714859438e-06, "loss": 0.3518, "step": 3000 }, { "epoch": 0.0248, "grad_norm": 23.758481979370117, "learning_rate": 9.791646586345382e-06, "loss": 0.3367, "step": 3100 }, { "epoch": 0.0256, "grad_norm": 8.041315078735352, "learning_rate": 9.783614457831326e-06, "loss": 0.3385, "step": 3200 }, { "epoch": 0.0264, "grad_norm": 6.949316501617432, "learning_rate": 9.77558232931727e-06, "loss": 0.3557, "step": 3300 }, { "epoch": 0.0272, "grad_norm": 20.79637908935547, "learning_rate": 9.767550200803213e-06, "loss": 0.3497, "step": 3400 }, { "epoch": 0.028, "grad_norm": 10.800033569335938, "learning_rate": 9.759518072289157e-06, "loss": 0.351, "step": 3500 }, { "epoch": 0.0288, "grad_norm": 7.772594451904297, "learning_rate": 9.751485943775101e-06, "loss": 0.3272, "step": 3600 }, { "epoch": 0.0296, "grad_norm": 7.19946813583374, "learning_rate": 9.743453815261045e-06, "loss": 0.3216, "step": 3700 }, { "epoch": 0.0304, "grad_norm": 11.165406227111816, "learning_rate": 9.735421686746989e-06, "loss": 0.3305, "step": 3800 }, { "epoch": 0.0312, "grad_norm": 12.210046768188477, "learning_rate": 9.727389558232933e-06, "loss": 0.3524, "step": 3900 }, { "epoch": 0.032, "grad_norm": 6.451345443725586, "learning_rate": 9.719357429718877e-06, "loss": 0.3394, "step": 4000 }, { "epoch": 0.032, "eval_test1_cer": 0.09286074389424154, "eval_test1_cer_norm": 0.07516507791677672, "eval_test1_loss": 0.2566915452480316, "eval_test1_runtime": 6436.6351, "eval_test1_samples_per_second": 0.388, "eval_test1_steps_per_second": 0.097, "eval_test1_wer": 0.2368582174407417, "eval_test1_wer_norm": 0.17320786697448787, "step": 4000 }, { "epoch": 0.032, "eval_test2_cer": 0.2131304364057192, "eval_test2_cer_norm": 0.18118124418964984, "eval_test2_loss": 0.4192444682121277, "eval_test2_runtime": 3419.6713, "eval_test2_samples_per_second": 0.731, "eval_test2_steps_per_second": 0.183, "eval_test2_wer": 0.42229343099107347, "eval_test2_wer_norm": 0.35425739170295667, "step": 4000 }, { "epoch": 0.0328, "grad_norm": 43.34160232543945, "learning_rate": 9.711325301204821e-06, "loss": 0.3384, "step": 4100 }, { "epoch": 0.0336, "grad_norm": 7.907329559326172, "learning_rate": 9.703293172690765e-06, "loss": 0.3674, "step": 4200 }, { "epoch": 0.0344, "grad_norm": 6.71475076675415, "learning_rate": 9.695261044176709e-06, "loss": 0.3364, "step": 4300 }, { "epoch": 0.0352, "grad_norm": 9.054850578308105, "learning_rate": 9.687228915662651e-06, "loss": 0.3417, "step": 4400 }, { "epoch": 0.036, "grad_norm": 14.793953895568848, "learning_rate": 9.679196787148595e-06, "loss": 0.3286, "step": 4500 }, { "epoch": 0.0368, "grad_norm": 5.203219413757324, "learning_rate": 9.671164658634539e-06, "loss": 0.3684, "step": 4600 }, { "epoch": 0.0376, "grad_norm": 12.02910327911377, "learning_rate": 9.663132530120483e-06, "loss": 0.3393, "step": 4700 }, { "epoch": 0.0384, "grad_norm": 10.032449722290039, "learning_rate": 9.655100401606427e-06, "loss": 0.3503, "step": 4800 }, { "epoch": 0.0392, "grad_norm": 9.57917594909668, "learning_rate": 9.64706827309237e-06, "loss": 0.3239, "step": 4900 }, { "epoch": 0.04, "grad_norm": 9.18687629699707, "learning_rate": 9.639036144578314e-06, "loss": 0.3407, "step": 5000 }, { "epoch": 0.0408, "grad_norm": 9.08846378326416, "learning_rate": 9.631004016064258e-06, "loss": 0.3478, "step": 5100 }, { "epoch": 0.0416, "grad_norm": 6.322196006774902, "learning_rate": 9.622971887550202e-06, "loss": 0.3294, "step": 5200 }, { "epoch": 0.0424, "grad_norm": 11.071808815002441, "learning_rate": 9.614939759036145e-06, "loss": 0.3533, "step": 5300 }, { "epoch": 0.0432, "grad_norm": 8.492830276489258, "learning_rate": 9.606907630522088e-06, "loss": 0.3297, "step": 5400 }, { "epoch": 0.044, "grad_norm": 13.28835678100586, "learning_rate": 9.598875502008032e-06, "loss": 0.3228, "step": 5500 }, { "epoch": 0.0448, "grad_norm": 9.030045509338379, "learning_rate": 9.590843373493976e-06, "loss": 0.3228, "step": 5600 }, { "epoch": 0.0456, "grad_norm": 12.364481925964355, "learning_rate": 9.58281124497992e-06, "loss": 0.3406, "step": 5700 }, { "epoch": 0.0464, "grad_norm": 6.908151149749756, "learning_rate": 9.574779116465864e-06, "loss": 0.3365, "step": 5800 }, { "epoch": 0.0472, "grad_norm": 10.87695598602295, "learning_rate": 9.566746987951808e-06, "loss": 0.3332, "step": 5900 }, { "epoch": 0.048, "grad_norm": 9.280797004699707, "learning_rate": 9.558714859437752e-06, "loss": 0.3414, "step": 6000 }, { "epoch": 0.0488, "grad_norm": 13.637160301208496, "learning_rate": 9.550682730923696e-06, "loss": 0.3275, "step": 6100 }, { "epoch": 0.0496, "grad_norm": 10.094959259033203, "learning_rate": 9.542650602409638e-06, "loss": 0.3152, "step": 6200 }, { "epoch": 0.0504, "grad_norm": 6.261775016784668, "learning_rate": 9.534618473895582e-06, "loss": 0.3236, "step": 6300 }, { "epoch": 0.0512, "grad_norm": 7.221704006195068, "learning_rate": 9.526586345381526e-06, "loss": 0.3461, "step": 6400 }, { "epoch": 0.052, "grad_norm": 10.923733711242676, "learning_rate": 9.51855421686747e-06, "loss": 0.3249, "step": 6500 }, { "epoch": 0.0528, "grad_norm": 5.432595729827881, "learning_rate": 9.510522088353414e-06, "loss": 0.3556, "step": 6600 }, { "epoch": 0.0536, "grad_norm": 15.021200180053711, "learning_rate": 9.502489959839358e-06, "loss": 0.3208, "step": 6700 }, { "epoch": 0.0544, "grad_norm": 6.641680717468262, "learning_rate": 9.494457831325302e-06, "loss": 0.3565, "step": 6800 }, { "epoch": 0.0552, "grad_norm": 15.715243339538574, "learning_rate": 9.486425702811246e-06, "loss": 0.3123, "step": 6900 }, { "epoch": 0.056, "grad_norm": 9.304129600524902, "learning_rate": 9.47839357429719e-06, "loss": 0.312, "step": 7000 }, { "epoch": 0.0568, "grad_norm": 10.780037879943848, "learning_rate": 9.470361445783133e-06, "loss": 0.3445, "step": 7100 }, { "epoch": 0.0576, "grad_norm": 11.77072811126709, "learning_rate": 9.462329317269077e-06, "loss": 0.3293, "step": 7200 }, { "epoch": 0.0584, "grad_norm": 11.29255485534668, "learning_rate": 9.454297188755021e-06, "loss": 0.3045, "step": 7300 }, { "epoch": 0.0592, "grad_norm": 9.00106143951416, "learning_rate": 9.446265060240965e-06, "loss": 0.3132, "step": 7400 }, { "epoch": 0.06, "grad_norm": 8.445959091186523, "learning_rate": 9.438232931726909e-06, "loss": 0.3327, "step": 7500 }, { "epoch": 0.0608, "grad_norm": 13.996682167053223, "learning_rate": 9.430200803212853e-06, "loss": 0.3496, "step": 7600 }, { "epoch": 0.0616, "grad_norm": 6.321784019470215, "learning_rate": 9.422168674698797e-06, "loss": 0.2928, "step": 7700 }, { "epoch": 0.0624, "grad_norm": 9.260029792785645, "learning_rate": 9.414136546184741e-06, "loss": 0.2978, "step": 7800 }, { "epoch": 0.0632, "grad_norm": 14.37282657623291, "learning_rate": 9.406104417670685e-06, "loss": 0.3177, "step": 7900 }, { "epoch": 0.064, "grad_norm": 13.508281707763672, "learning_rate": 9.398072289156627e-06, "loss": 0.3357, "step": 8000 }, { "epoch": 0.064, "eval_test1_cer": 0.07946822018074538, "eval_test1_cer_norm": 0.05569188657046126, "eval_test1_loss": 0.2466106414794922, "eval_test1_runtime": 3431.7176, "eval_test1_samples_per_second": 0.728, "eval_test1_steps_per_second": 0.182, "eval_test1_wer": 0.21889851016064607, "eval_test1_wer_norm": 0.15733948975715245, "step": 8000 }, { "epoch": 0.064, "eval_test2_cer": 0.21952346287378205, "eval_test2_cer_norm": 0.16622927641772545, "eval_test2_loss": 0.410643607378006, "eval_test2_runtime": 3658.9711, "eval_test2_samples_per_second": 0.683, "eval_test2_steps_per_second": 0.171, "eval_test2_wer": 0.39210917830167086, "eval_test2_wer_norm": 0.32348727939491173, "step": 8000 }, { "epoch": 0.0648, "grad_norm": 7.349660873413086, "learning_rate": 9.390040160642571e-06, "loss": 0.3311, "step": 8100 }, { "epoch": 0.0656, "grad_norm": 6.557274341583252, "learning_rate": 9.382008032128515e-06, "loss": 0.318, "step": 8200 }, { "epoch": 0.0664, "grad_norm": 20.285615921020508, "learning_rate": 9.373975903614459e-06, "loss": 0.3116, "step": 8300 }, { "epoch": 0.0672, "grad_norm": 7.684230327606201, "learning_rate": 9.365943775100403e-06, "loss": 0.3232, "step": 8400 }, { "epoch": 0.068, "grad_norm": 8.374768257141113, "learning_rate": 9.357911646586347e-06, "loss": 0.3328, "step": 8500 }, { "epoch": 0.0688, "grad_norm": 6.924304008483887, "learning_rate": 9.34987951807229e-06, "loss": 0.2923, "step": 8600 }, { "epoch": 0.0696, "grad_norm": 5.943380832672119, "learning_rate": 9.341847389558234e-06, "loss": 0.3167, "step": 8700 }, { "epoch": 0.0704, "grad_norm": 24.188907623291016, "learning_rate": 9.333815261044178e-06, "loss": 0.3521, "step": 8800 }, { "epoch": 0.0712, "grad_norm": 13.702215194702148, "learning_rate": 9.325783132530122e-06, "loss": 0.2937, "step": 8900 }, { "epoch": 0.072, "grad_norm": 10.463766098022461, "learning_rate": 9.317751004016065e-06, "loss": 0.3317, "step": 9000 }, { "epoch": 0.0728, "grad_norm": 5.035882472991943, "learning_rate": 9.309718875502008e-06, "loss": 0.3048, "step": 9100 }, { "epoch": 0.0736, "grad_norm": 11.715450286865234, "learning_rate": 9.301686746987952e-06, "loss": 0.3287, "step": 9200 }, { "epoch": 0.0744, "grad_norm": 13.449250221252441, "learning_rate": 9.293654618473896e-06, "loss": 0.3345, "step": 9300 }, { "epoch": 0.0752, "grad_norm": 8.16373348236084, "learning_rate": 9.28562248995984e-06, "loss": 0.3233, "step": 9400 }, { "epoch": 0.076, "grad_norm": 6.236880302429199, "learning_rate": 9.277590361445784e-06, "loss": 0.3131, "step": 9500 }, { "epoch": 0.0768, "grad_norm": 12.164971351623535, "learning_rate": 9.269558232931728e-06, "loss": 0.3281, "step": 9600 }, { "epoch": 0.0776, "grad_norm": 15.774728775024414, "learning_rate": 9.261526104417672e-06, "loss": 0.315, "step": 9700 }, { "epoch": 0.0784, "grad_norm": 16.81951332092285, "learning_rate": 9.253493975903616e-06, "loss": 0.3491, "step": 9800 }, { "epoch": 0.0792, "grad_norm": 35.960697174072266, "learning_rate": 9.245461847389558e-06, "loss": 0.3258, "step": 9900 }, { "epoch": 0.08, "grad_norm": 9.389575958251953, "learning_rate": 9.237429718875502e-06, "loss": 0.3082, "step": 10000 }, { "epoch": 0.0808, "grad_norm": 5.77083683013916, "learning_rate": 9.229477911646586e-06, "loss": 0.316, "step": 10100 }, { "epoch": 0.0816, "grad_norm": 7.730663776397705, "learning_rate": 9.22144578313253e-06, "loss": 0.3538, "step": 10200 }, { "epoch": 0.0824, "grad_norm": 2.672037363052368, "learning_rate": 9.213413654618474e-06, "loss": 0.3089, "step": 10300 }, { "epoch": 0.0832, "grad_norm": 10.987016677856445, "learning_rate": 9.205381526104418e-06, "loss": 0.3032, "step": 10400 }, { "epoch": 0.084, "grad_norm": 5.871443271636963, "learning_rate": 9.197349397590362e-06, "loss": 0.3423, "step": 10500 }, { "epoch": 0.0848, "grad_norm": 8.393383026123047, "learning_rate": 9.189317269076306e-06, "loss": 0.2872, "step": 10600 }, { "epoch": 0.0856, "grad_norm": 8.698946952819824, "learning_rate": 9.18128514056225e-06, "loss": 0.3028, "step": 10700 }, { "epoch": 0.0864, "grad_norm": 2.9550344944000244, "learning_rate": 9.173253012048194e-06, "loss": 0.3262, "step": 10800 }, { "epoch": 0.0872, "grad_norm": 6.455613136291504, "learning_rate": 9.165220883534138e-06, "loss": 0.3273, "step": 10900 }, { "epoch": 0.088, "grad_norm": 11.550117492675781, "learning_rate": 9.157269076305222e-06, "loss": 0.3307, "step": 11000 }, { "epoch": 0.0888, "grad_norm": 9.657747268676758, "learning_rate": 9.149236947791166e-06, "loss": 0.3227, "step": 11100 }, { "epoch": 0.0896, "grad_norm": 9.803390502929688, "learning_rate": 9.14120481927711e-06, "loss": 0.3102, "step": 11200 }, { "epoch": 0.0904, "grad_norm": 12.699715614318848, "learning_rate": 9.133172690763052e-06, "loss": 0.3353, "step": 11300 }, { "epoch": 0.0912, "grad_norm": 9.973093032836914, "learning_rate": 9.125140562248996e-06, "loss": 0.3053, "step": 11400 }, { "epoch": 0.092, "grad_norm": 12.822247505187988, "learning_rate": 9.11710843373494e-06, "loss": 0.3088, "step": 11500 }, { "epoch": 0.0928, "grad_norm": 13.785049438476562, "learning_rate": 9.109076305220884e-06, "loss": 0.3004, "step": 11600 }, { "epoch": 0.0936, "grad_norm": 11.756546020507812, "learning_rate": 9.101044176706828e-06, "loss": 0.304, "step": 11700 }, { "epoch": 0.0944, "grad_norm": 12.297760009765625, "learning_rate": 9.093012048192772e-06, "loss": 0.3029, "step": 11800 }, { "epoch": 0.0952, "grad_norm": 7.838765621185303, "learning_rate": 9.084979919678716e-06, "loss": 0.3046, "step": 11900 }, { "epoch": 0.096, "grad_norm": 6.533823013305664, "learning_rate": 9.07694779116466e-06, "loss": 0.3065, "step": 12000 }, { "epoch": 0.096, "eval_test1_cer": 0.10149656434386436, "eval_test1_cer_norm": 0.06880207457920139, "eval_test1_loss": 0.23942527174949646, "eval_test1_runtime": 3496.8354, "eval_test1_samples_per_second": 0.715, "eval_test1_steps_per_second": 0.179, "eval_test1_wer": 0.23831598588880143, "eval_test1_wer_norm": 0.1713375610041205, "step": 12000 }, { "epoch": 0.096, "eval_test2_cer": 0.2408444394669056, "eval_test2_cer_norm": 0.178324488689185, "eval_test2_loss": 0.3968297839164734, "eval_test2_runtime": 3795.2628, "eval_test2_samples_per_second": 0.659, "eval_test2_steps_per_second": 0.165, "eval_test2_wer": 0.4381437399862669, "eval_test2_wer_norm": 0.36356864542745815, "step": 12000 }, { "epoch": 0.0968, "grad_norm": 6.5714826583862305, "learning_rate": 9.068915662650603e-06, "loss": 0.3177, "step": 12100 }, { "epoch": 0.0976, "grad_norm": 10.972187995910645, "learning_rate": 9.060883534136546e-06, "loss": 0.3032, "step": 12200 }, { "epoch": 0.0984, "grad_norm": 12.001550674438477, "learning_rate": 9.05285140562249e-06, "loss": 0.3009, "step": 12300 }, { "epoch": 0.0992, "grad_norm": 10.76689338684082, "learning_rate": 9.044819277108433e-06, "loss": 0.304, "step": 12400 }, { "epoch": 0.1, "grad_norm": 6.645873069763184, "learning_rate": 9.036787148594377e-06, "loss": 0.2801, "step": 12500 }, { "epoch": 0.1008, "grad_norm": 7.068800926208496, "learning_rate": 9.028755020080321e-06, "loss": 0.3198, "step": 12600 }, { "epoch": 0.1016, "grad_norm": 7.24527645111084, "learning_rate": 9.020722891566265e-06, "loss": 0.3236, "step": 12700 }, { "epoch": 0.1024, "grad_norm": 8.540508270263672, "learning_rate": 9.01269076305221e-06, "loss": 0.3132, "step": 12800 }, { "epoch": 0.1032, "grad_norm": 10.710851669311523, "learning_rate": 9.004658634538153e-06, "loss": 0.3034, "step": 12900 }, { "epoch": 0.104, "grad_norm": 7.53853702545166, "learning_rate": 8.996626506024097e-06, "loss": 0.2957, "step": 13000 }, { "epoch": 0.1048, "grad_norm": 8.962101936340332, "learning_rate": 8.988594377510041e-06, "loss": 0.312, "step": 13100 }, { "epoch": 0.1056, "grad_norm": 7.6881561279296875, "learning_rate": 8.980642570281125e-06, "loss": 0.3304, "step": 13200 }, { "epoch": 0.1064, "grad_norm": 3.7053704261779785, "learning_rate": 8.97261044176707e-06, "loss": 0.2913, "step": 13300 }, { "epoch": 0.1072, "grad_norm": 5.796387195587158, "learning_rate": 8.964578313253013e-06, "loss": 0.3162, "step": 13400 }, { "epoch": 0.108, "grad_norm": 14.237732887268066, "learning_rate": 8.956546184738957e-06, "loss": 0.3096, "step": 13500 }, { "epoch": 0.1088, "grad_norm": 9.336588859558105, "learning_rate": 8.948514056224901e-06, "loss": 0.3018, "step": 13600 }, { "epoch": 0.1096, "grad_norm": 16.10430335998535, "learning_rate": 8.940481927710845e-06, "loss": 0.3149, "step": 13700 }, { "epoch": 0.1104, "grad_norm": 70.292236328125, "learning_rate": 8.932449799196787e-06, "loss": 0.2721, "step": 13800 }, { "epoch": 0.1112, "grad_norm": 5.2695770263671875, "learning_rate": 8.924417670682731e-06, "loss": 0.294, "step": 13900 }, { "epoch": 0.112, "grad_norm": 7.514453411102295, "learning_rate": 8.916385542168675e-06, "loss": 0.3363, "step": 14000 }, { "epoch": 0.1128, "grad_norm": 6.603350639343262, "learning_rate": 8.908353413654619e-06, "loss": 0.3214, "step": 14100 }, { "epoch": 0.1136, "grad_norm": 12.648730278015137, "learning_rate": 8.900321285140563e-06, "loss": 0.313, "step": 14200 }, { "epoch": 0.1144, "grad_norm": 19.52114486694336, "learning_rate": 8.892289156626507e-06, "loss": 0.2778, "step": 14300 }, { "epoch": 0.1152, "grad_norm": 9.065464973449707, "learning_rate": 8.88425702811245e-06, "loss": 0.3182, "step": 14400 }, { "epoch": 0.116, "grad_norm": 6.192761421203613, "learning_rate": 8.876224899598395e-06, "loss": 0.3268, "step": 14500 }, { "epoch": 0.1168, "grad_norm": 6.336472034454346, "learning_rate": 8.868192771084338e-06, "loss": 0.3125, "step": 14600 }, { "epoch": 0.1176, "grad_norm": 7.2869343757629395, "learning_rate": 8.86016064257028e-06, "loss": 0.2938, "step": 14700 }, { "epoch": 0.1184, "grad_norm": 8.914804458618164, "learning_rate": 8.852128514056225e-06, "loss": 0.2831, "step": 14800 }, { "epoch": 0.1192, "grad_norm": 8.745502471923828, "learning_rate": 8.844096385542168e-06, "loss": 0.306, "step": 14900 }, { "epoch": 0.12, "grad_norm": 8.596148490905762, "learning_rate": 8.836064257028112e-06, "loss": 0.2907, "step": 15000 }, { "epoch": 0.1208, "grad_norm": 6.889795303344727, "learning_rate": 8.828032128514056e-06, "loss": 0.2806, "step": 15100 }, { "epoch": 0.1216, "grad_norm": 9.58425521850586, "learning_rate": 8.82e-06, "loss": 0.2853, "step": 15200 }, { "epoch": 0.1224, "grad_norm": 5.967700004577637, "learning_rate": 8.811967871485944e-06, "loss": 0.3109, "step": 15300 }, { "epoch": 0.1232, "grad_norm": 10.997330665588379, "learning_rate": 8.80401606425703e-06, "loss": 0.3129, "step": 15400 }, { "epoch": 0.124, "grad_norm": 8.48721981048584, "learning_rate": 8.795983935742972e-06, "loss": 0.3227, "step": 15500 }, { "epoch": 0.1248, "grad_norm": 9.547022819519043, "learning_rate": 8.787951807228916e-06, "loss": 0.282, "step": 15600 }, { "epoch": 0.1256, "grad_norm": 12.319640159606934, "learning_rate": 8.77991967871486e-06, "loss": 0.3019, "step": 15700 }, { "epoch": 0.1264, "grad_norm": 9.99170970916748, "learning_rate": 8.771887550200804e-06, "loss": 0.2872, "step": 15800 }, { "epoch": 0.1272, "grad_norm": 9.307220458984375, "learning_rate": 8.763855421686748e-06, "loss": 0.2955, "step": 15900 }, { "epoch": 0.128, "grad_norm": 6.70652961730957, "learning_rate": 8.755823293172692e-06, "loss": 0.2813, "step": 16000 }, { "epoch": 0.128, "eval_test1_cer": 0.11614478303084622, "eval_test1_cer_norm": 0.08919730112613153, "eval_test1_loss": 0.2319698929786682, "eval_test1_runtime": 2494.875, "eval_test1_samples_per_second": 1.002, "eval_test1_steps_per_second": 0.251, "eval_test1_wer": 0.2696580075220852, "eval_test1_wer_norm": 0.2076331862415617, "step": 16000 }, { "epoch": 0.128, "eval_test2_cer": 0.25404113935864414, "eval_test2_cer_norm": 0.1936541292221878, "eval_test2_loss": 0.38276800513267517, "eval_test2_runtime": 2638.5507, "eval_test2_samples_per_second": 0.947, "eval_test2_steps_per_second": 0.237, "eval_test2_wer": 0.46260585946440835, "eval_test2_wer_norm": 0.3951409580563832, "step": 16000 }, { "epoch": 0.1288, "grad_norm": 9.956653594970703, "learning_rate": 8.747791164658636e-06, "loss": 0.2785, "step": 16100 }, { "epoch": 0.1296, "grad_norm": 10.152132987976074, "learning_rate": 8.73975903614458e-06, "loss": 0.2918, "step": 16200 }, { "epoch": 0.1304, "grad_norm": 10.379541397094727, "learning_rate": 8.731726907630524e-06, "loss": 0.2879, "step": 16300 }, { "epoch": 0.1312, "grad_norm": 5.655648708343506, "learning_rate": 8.723694779116466e-06, "loss": 0.2935, "step": 16400 }, { "epoch": 0.132, "grad_norm": 9.877130508422852, "learning_rate": 8.71566265060241e-06, "loss": 0.3331, "step": 16500 }, { "epoch": 0.1328, "grad_norm": 8.812068939208984, "learning_rate": 8.707630522088354e-06, "loss": 0.2902, "step": 16600 }, { "epoch": 0.1336, "grad_norm": 8.972885131835938, "learning_rate": 8.699598393574298e-06, "loss": 0.3017, "step": 16700 }, { "epoch": 0.1344, "grad_norm": 6.562330722808838, "learning_rate": 8.691566265060242e-06, "loss": 0.2866, "step": 16800 }, { "epoch": 0.1352, "grad_norm": 10.704404830932617, "learning_rate": 8.683534136546186e-06, "loss": 0.3078, "step": 16900 }, { "epoch": 0.136, "grad_norm": 9.468811988830566, "learning_rate": 8.67550200803213e-06, "loss": 0.2906, "step": 17000 }, { "epoch": 0.1368, "grad_norm": 5.418923854827881, "learning_rate": 8.667469879518073e-06, "loss": 0.2918, "step": 17100 }, { "epoch": 0.1376, "grad_norm": 10.071795463562012, "learning_rate": 8.659437751004017e-06, "loss": 0.3149, "step": 17200 }, { "epoch": 0.1384, "grad_norm": 10.977407455444336, "learning_rate": 8.65140562248996e-06, "loss": 0.3091, "step": 17300 }, { "epoch": 0.1392, "grad_norm": 7.745774745941162, "learning_rate": 8.643373493975904e-06, "loss": 0.2843, "step": 17400 }, { "epoch": 0.14, "grad_norm": 5.645944595336914, "learning_rate": 8.635341365461847e-06, "loss": 0.2824, "step": 17500 }, { "epoch": 0.1408, "grad_norm": 11.466683387756348, "learning_rate": 8.627309236947791e-06, "loss": 0.3228, "step": 17600 }, { "epoch": 0.1416, "grad_norm": 7.580329418182373, "learning_rate": 8.619277108433735e-06, "loss": 0.2967, "step": 17700 }, { "epoch": 0.1424, "grad_norm": 11.873896598815918, "learning_rate": 8.61124497991968e-06, "loss": 0.3073, "step": 17800 }, { "epoch": 0.1432, "grad_norm": 6.72385835647583, "learning_rate": 8.603212851405623e-06, "loss": 0.3105, "step": 17900 }, { "epoch": 0.144, "grad_norm": 11.72990608215332, "learning_rate": 8.595180722891567e-06, "loss": 0.3141, "step": 18000 }, { "epoch": 0.1448, "grad_norm": 16.776378631591797, "learning_rate": 8.587148594377511e-06, "loss": 0.2964, "step": 18100 }, { "epoch": 0.1456, "grad_norm": 9.208036422729492, "learning_rate": 8.579116465863455e-06, "loss": 0.2818, "step": 18200 }, { "epoch": 0.1464, "grad_norm": 10.971381187438965, "learning_rate": 8.571084337349397e-06, "loss": 0.2694, "step": 18300 }, { "epoch": 0.1472, "grad_norm": 10.638858795166016, "learning_rate": 8.563052208835341e-06, "loss": 0.2927, "step": 18400 }, { "epoch": 0.148, "grad_norm": 6.251519203186035, "learning_rate": 8.555020080321285e-06, "loss": 0.2835, "step": 18500 }, { "epoch": 0.1488, "grad_norm": 10.34622859954834, "learning_rate": 8.546987951807229e-06, "loss": 0.298, "step": 18600 }, { "epoch": 0.1496, "grad_norm": 9.303108215332031, "learning_rate": 8.538955823293173e-06, "loss": 0.2856, "step": 18700 }, { "epoch": 0.1504, "grad_norm": 8.3678617477417, "learning_rate": 8.530923694779117e-06, "loss": 0.2782, "step": 18800 }, { "epoch": 0.1512, "grad_norm": 11.090498924255371, "learning_rate": 8.52289156626506e-06, "loss": 0.2863, "step": 18900 }, { "epoch": 0.152, "grad_norm": 5.836663722991943, "learning_rate": 8.514859437751005e-06, "loss": 0.2693, "step": 19000 }, { "epoch": 0.1528, "grad_norm": 6.694278717041016, "learning_rate": 8.506827309236948e-06, "loss": 0.2817, "step": 19100 }, { "epoch": 0.1536, "grad_norm": 5.725604057312012, "learning_rate": 8.498795180722892e-06, "loss": 0.265, "step": 19200 }, { "epoch": 0.1544, "grad_norm": 8.070332527160645, "learning_rate": 8.490763052208836e-06, "loss": 0.302, "step": 19300 }, { "epoch": 0.1552, "grad_norm": 7.241143226623535, "learning_rate": 8.48281124497992e-06, "loss": 0.2898, "step": 19400 }, { "epoch": 0.156, "grad_norm": 8.699810981750488, "learning_rate": 8.474779116465865e-06, "loss": 0.2868, "step": 19500 }, { "epoch": 0.1568, "grad_norm": 4.879644870758057, "learning_rate": 8.466746987951808e-06, "loss": 0.2917, "step": 19600 }, { "epoch": 0.1576, "grad_norm": 6.502651691436768, "learning_rate": 8.458714859437752e-06, "loss": 0.2888, "step": 19700 }, { "epoch": 0.1584, "grad_norm": 5.7493791580200195, "learning_rate": 8.450682730923695e-06, "loss": 0.2936, "step": 19800 }, { "epoch": 0.1592, "grad_norm": 10.4688720703125, "learning_rate": 8.442650602409639e-06, "loss": 0.2806, "step": 19900 }, { "epoch": 0.16, "grad_norm": 7.386699676513672, "learning_rate": 8.434618473895582e-06, "loss": 0.3055, "step": 20000 }, { "epoch": 0.16, "eval_test1_cer": 0.09210452610351781, "eval_test1_cer_norm": 0.07215885898143924, "eval_test1_loss": 0.22505834698677063, "eval_test1_runtime": 2739.9634, "eval_test1_samples_per_second": 0.912, "eval_test1_steps_per_second": 0.228, "eval_test1_wer": 0.23679990670281933, "eval_test1_wer_norm": 0.17437680820596743, "step": 20000 }, { "epoch": 0.16, "eval_test2_cer": 0.15537387538731473, "eval_test2_cer_norm": 0.12383792996591261, "eval_test2_loss": 0.3772575259208679, "eval_test2_runtime": 3715.4173, "eval_test2_samples_per_second": 0.673, "eval_test2_steps_per_second": 0.168, "eval_test2_wer": 0.31795033188372623, "eval_test2_wer_norm": 0.24954159981663993, "step": 20000 }, { "epoch": 0.1608, "grad_norm": 9.69093132019043, "learning_rate": 8.426586345381526e-06, "loss": 0.2777, "step": 20100 }, { "epoch": 0.1616, "grad_norm": 8.120789527893066, "learning_rate": 8.41855421686747e-06, "loss": 0.2859, "step": 20200 }, { "epoch": 0.1624, "grad_norm": 7.084224700927734, "learning_rate": 8.410522088353414e-06, "loss": 0.3035, "step": 20300 }, { "epoch": 0.1632, "grad_norm": 10.893829345703125, "learning_rate": 8.402489959839358e-06, "loss": 0.2792, "step": 20400 }, { "epoch": 0.164, "grad_norm": 36.892173767089844, "learning_rate": 8.394457831325302e-06, "loss": 0.2835, "step": 20500 }, { "epoch": 0.1648, "grad_norm": 8.867621421813965, "learning_rate": 8.386425702811246e-06, "loss": 0.2752, "step": 20600 }, { "epoch": 0.1656, "grad_norm": 5.385016918182373, "learning_rate": 8.378393574297188e-06, "loss": 0.2763, "step": 20700 }, { "epoch": 0.1664, "grad_norm": 6.431478023529053, "learning_rate": 8.370361445783132e-06, "loss": 0.2874, "step": 20800 }, { "epoch": 0.1672, "grad_norm": 7.890101432800293, "learning_rate": 8.362329317269076e-06, "loss": 0.2737, "step": 20900 }, { "epoch": 0.168, "grad_norm": 5.053267955780029, "learning_rate": 8.35429718875502e-06, "loss": 0.2885, "step": 21000 }, { "epoch": 0.1688, "grad_norm": 7.141018390655518, "learning_rate": 8.346265060240964e-06, "loss": 0.2735, "step": 21100 }, { "epoch": 0.1696, "grad_norm": 12.165709495544434, "learning_rate": 8.338232931726908e-06, "loss": 0.3086, "step": 21200 }, { "epoch": 0.1704, "grad_norm": 5.507659912109375, "learning_rate": 8.330200803212852e-06, "loss": 0.2773, "step": 21300 }, { "epoch": 0.1712, "grad_norm": 6.527307987213135, "learning_rate": 8.322248995983936e-06, "loss": 0.2584, "step": 21400 }, { "epoch": 0.172, "grad_norm": 8.494378089904785, "learning_rate": 8.31421686746988e-06, "loss": 0.3005, "step": 21500 }, { "epoch": 0.1728, "grad_norm": 6.213225364685059, "learning_rate": 8.306184738955824e-06, "loss": 0.2708, "step": 21600 }, { "epoch": 0.1736, "grad_norm": 10.444897651672363, "learning_rate": 8.298152610441768e-06, "loss": 0.2817, "step": 21700 }, { "epoch": 0.1744, "grad_norm": 6.7094950675964355, "learning_rate": 8.290120481927712e-06, "loss": 0.2555, "step": 21800 }, { "epoch": 0.1752, "grad_norm": 7.899540901184082, "learning_rate": 8.282088353413656e-06, "loss": 0.2902, "step": 21900 }, { "epoch": 0.176, "grad_norm": 7.049533367156982, "learning_rate": 8.2740562248996e-06, "loss": 0.2648, "step": 22000 }, { "epoch": 0.1768, "grad_norm": 7.662154674530029, "learning_rate": 8.266024096385543e-06, "loss": 0.2984, "step": 22100 }, { "epoch": 0.1776, "grad_norm": 5.362339019775391, "learning_rate": 8.257991967871487e-06, "loss": 0.2533, "step": 22200 }, { "epoch": 0.1784, "grad_norm": 11.844931602478027, "learning_rate": 8.249959839357431e-06, "loss": 0.2967, "step": 22300 }, { "epoch": 0.1792, "grad_norm": 8.356042861938477, "learning_rate": 8.241927710843374e-06, "loss": 0.2817, "step": 22400 }, { "epoch": 0.18, "grad_norm": 6.400118827819824, "learning_rate": 8.233895582329317e-06, "loss": 0.2975, "step": 22500 }, { "epoch": 0.1808, "grad_norm": 7.8221611976623535, "learning_rate": 8.225863453815261e-06, "loss": 0.2928, "step": 22600 }, { "epoch": 0.1816, "grad_norm": 9.997267723083496, "learning_rate": 8.217831325301205e-06, "loss": 0.2846, "step": 22700 }, { "epoch": 0.1824, "grad_norm": 3.741245746612549, "learning_rate": 8.20979919678715e-06, "loss": 0.2712, "step": 22800 }, { "epoch": 0.1832, "grad_norm": 6.534188270568848, "learning_rate": 8.201767068273093e-06, "loss": 0.2879, "step": 22900 }, { "epoch": 0.184, "grad_norm": 5.730849266052246, "learning_rate": 8.193734939759037e-06, "loss": 0.2722, "step": 23000 }, { "epoch": 0.1848, "grad_norm": 4.018589973449707, "learning_rate": 8.185702811244981e-06, "loss": 0.2979, "step": 23100 }, { "epoch": 0.1856, "grad_norm": 7.68761682510376, "learning_rate": 8.177670682730925e-06, "loss": 0.2919, "step": 23200 }, { "epoch": 0.1864, "grad_norm": 9.94321060180664, "learning_rate": 8.169638554216867e-06, "loss": 0.2731, "step": 23300 }, { "epoch": 0.1872, "grad_norm": 2.9645307064056396, "learning_rate": 8.161686746987953e-06, "loss": 0.2698, "step": 23400 }, { "epoch": 0.188, "grad_norm": 4.740218639373779, "learning_rate": 8.153654618473897e-06, "loss": 0.2731, "step": 23500 }, { "epoch": 0.1888, "grad_norm": 5.427436828613281, "learning_rate": 8.145622489959841e-06, "loss": 0.269, "step": 23600 }, { "epoch": 0.1896, "grad_norm": 13.593464851379395, "learning_rate": 8.137590361445785e-06, "loss": 0.263, "step": 23700 }, { "epoch": 0.1904, "grad_norm": 7.22516393661499, "learning_rate": 8.129558232931729e-06, "loss": 0.3125, "step": 23800 }, { "epoch": 0.1912, "grad_norm": 14.015786170959473, "learning_rate": 8.121526104417673e-06, "loss": 0.2832, "step": 23900 }, { "epoch": 0.192, "grad_norm": 8.892059326171875, "learning_rate": 8.113493975903615e-06, "loss": 0.3009, "step": 24000 }, { "epoch": 0.192, "eval_test1_cer": 0.07456214056314886, "eval_test1_cer_norm": 0.05662352630441569, "eval_test1_loss": 0.22622939944267273, "eval_test1_runtime": 3568.0234, "eval_test1_samples_per_second": 0.701, "eval_test1_steps_per_second": 0.175, "eval_test1_wer": 0.19636140995364296, "eval_test1_wer_norm": 0.13466202986644846, "step": 24000 }, { "epoch": 0.192, "eval_test2_cer": 0.17353100384514877, "eval_test2_cer_norm": 0.14121571893399443, "eval_test2_loss": 0.37227049469947815, "eval_test2_runtime": 3757.9926, "eval_test2_samples_per_second": 0.665, "eval_test2_steps_per_second": 0.166, "eval_test2_wer": 0.3350022888532845, "eval_test2_wer_norm": 0.2660726564290626, "step": 24000 }, { "epoch": 0.1928, "grad_norm": 6.917489051818848, "learning_rate": 8.105461847389559e-06, "loss": 0.2745, "step": 24100 }, { "epoch": 0.1936, "grad_norm": 9.797795295715332, "learning_rate": 8.097429718875503e-06, "loss": 0.2792, "step": 24200 }, { "epoch": 0.1944, "grad_norm": 7.5331315994262695, "learning_rate": 8.089397590361447e-06, "loss": 0.2631, "step": 24300 }, { "epoch": 0.1952, "grad_norm": 28.057600021362305, "learning_rate": 8.08136546184739e-06, "loss": 0.2752, "step": 24400 }, { "epoch": 0.196, "grad_norm": 6.589796543121338, "learning_rate": 8.073413654618475e-06, "loss": 0.2601, "step": 24500 }, { "epoch": 0.1968, "grad_norm": 7.585195541381836, "learning_rate": 8.065381526104419e-06, "loss": 0.2845, "step": 24600 }, { "epoch": 0.1976, "grad_norm": 12.819061279296875, "learning_rate": 8.057349397590361e-06, "loss": 0.2807, "step": 24700 }, { "epoch": 0.1984, "grad_norm": 6.550510883331299, "learning_rate": 8.049317269076305e-06, "loss": 0.2641, "step": 24800 }, { "epoch": 0.1992, "grad_norm": 4.5952653884887695, "learning_rate": 8.041285140562249e-06, "loss": 0.2773, "step": 24900 }, { "epoch": 0.2, "grad_norm": 7.8463134765625, "learning_rate": 8.033253012048193e-06, "loss": 0.299, "step": 25000 }, { "epoch": 0.2008, "grad_norm": 8.69621753692627, "learning_rate": 8.025220883534137e-06, "loss": 0.2568, "step": 25100 }, { "epoch": 0.2016, "grad_norm": 19.935726165771484, "learning_rate": 8.01718875502008e-06, "loss": 0.2875, "step": 25200 }, { "epoch": 0.2024, "grad_norm": 7.2960662841796875, "learning_rate": 8.009156626506025e-06, "loss": 0.2757, "step": 25300 }, { "epoch": 0.2032, "grad_norm": 14.16852855682373, "learning_rate": 8.001124497991969e-06, "loss": 0.2733, "step": 25400 }, { "epoch": 0.204, "grad_norm": 10.322851181030273, "learning_rate": 7.993092369477912e-06, "loss": 0.2732, "step": 25500 }, { "epoch": 0.2048, "grad_norm": 6.632780075073242, "learning_rate": 7.985060240963856e-06, "loss": 0.2705, "step": 25600 }, { "epoch": 0.2056, "grad_norm": 9.688406944274902, "learning_rate": 7.9770281124498e-06, "loss": 0.3111, "step": 25700 }, { "epoch": 0.2064, "grad_norm": 11.941987037658691, "learning_rate": 7.968995983935744e-06, "loss": 0.2688, "step": 25800 }, { "epoch": 0.2072, "grad_norm": 6.580658912658691, "learning_rate": 7.960963855421688e-06, "loss": 0.2842, "step": 25900 }, { "epoch": 0.208, "grad_norm": 9.130199432373047, "learning_rate": 7.95293172690763e-06, "loss": 0.2789, "step": 26000 }, { "epoch": 0.2088, "grad_norm": 7.189250469207764, "learning_rate": 7.944899598393574e-06, "loss": 0.2687, "step": 26100 }, { "epoch": 0.2096, "grad_norm": 6.851436614990234, "learning_rate": 7.936867469879518e-06, "loss": 0.3035, "step": 26200 }, { "epoch": 0.2104, "grad_norm": 5.916522979736328, "learning_rate": 7.928835341365462e-06, "loss": 0.2895, "step": 26300 }, { "epoch": 0.2112, "grad_norm": 9.012747764587402, "learning_rate": 7.920803212851406e-06, "loss": 0.258, "step": 26400 }, { "epoch": 0.212, "grad_norm": 4.732649803161621, "learning_rate": 7.91277108433735e-06, "loss": 0.2707, "step": 26500 }, { "epoch": 0.2128, "grad_norm": 7.508853912353516, "learning_rate": 7.904738955823294e-06, "loss": 0.2985, "step": 26600 }, { "epoch": 0.2136, "grad_norm": 7.832762241363525, "learning_rate": 7.896706827309238e-06, "loss": 0.2799, "step": 26700 }, { "epoch": 0.2144, "grad_norm": 11.426980972290039, "learning_rate": 7.888674698795182e-06, "loss": 0.2877, "step": 26800 }, { "epoch": 0.2152, "grad_norm": 9.61413288116455, "learning_rate": 7.880642570281126e-06, "loss": 0.2869, "step": 26900 }, { "epoch": 0.216, "grad_norm": 11.701996803283691, "learning_rate": 7.87261044176707e-06, "loss": 0.2607, "step": 27000 }, { "epoch": 0.2168, "grad_norm": 7.202447891235352, "learning_rate": 7.864578313253013e-06, "loss": 0.2763, "step": 27100 }, { "epoch": 0.2176, "grad_norm": 9.715481758117676, "learning_rate": 7.856546184738957e-06, "loss": 0.2871, "step": 27200 }, { "epoch": 0.2184, "grad_norm": 8.603879928588867, "learning_rate": 7.848514056224901e-06, "loss": 0.2876, "step": 27300 }, { "epoch": 0.2192, "grad_norm": 7.729073524475098, "learning_rate": 7.840481927710844e-06, "loss": 0.25, "step": 27400 }, { "epoch": 0.22, "grad_norm": 4.843418598175049, "learning_rate": 7.832449799196787e-06, "loss": 0.2913, "step": 27500 }, { "epoch": 0.2208, "grad_norm": 13.062045097351074, "learning_rate": 7.824417670682731e-06, "loss": 0.2676, "step": 27600 }, { "epoch": 0.2216, "grad_norm": 9.212933540344238, "learning_rate": 7.816385542168675e-06, "loss": 0.2814, "step": 27700 }, { "epoch": 0.2224, "grad_norm": 8.371397972106934, "learning_rate": 7.80835341365462e-06, "loss": 0.2556, "step": 27800 }, { "epoch": 0.2232, "grad_norm": 11.034435272216797, "learning_rate": 7.800321285140563e-06, "loss": 0.2656, "step": 27900 }, { "epoch": 0.224, "grad_norm": 7.741024494171143, "learning_rate": 7.792289156626507e-06, "loss": 0.2481, "step": 28000 }, { "epoch": 0.224, "eval_test1_cer": 0.08188158189558593, "eval_test1_cer_norm": 0.06053257137368838, "eval_test1_loss": 0.21281211078166962, "eval_test1_runtime": 3606.8392, "eval_test1_samples_per_second": 0.693, "eval_test1_steps_per_second": 0.173, "eval_test1_wer": 0.20149275489081314, "eval_test1_wer_norm": 0.13837341827639615, "step": 28000 }, { "epoch": 0.224, "eval_test2_cer": 0.22203867547691045, "eval_test2_cer_norm": 0.17609234583204214, "eval_test2_loss": 0.35916781425476074, "eval_test2_runtime": 3907.2621, "eval_test2_samples_per_second": 0.64, "eval_test2_steps_per_second": 0.16, "eval_test2_wer": 0.3924525062943465, "eval_test2_wer_norm": 0.32526358010543205, "step": 28000 }, { "epoch": 0.2248, "grad_norm": 8.185860633850098, "learning_rate": 7.784257028112451e-06, "loss": 0.2649, "step": 28100 }, { "epoch": 0.2256, "grad_norm": 9.275001525878906, "learning_rate": 7.776224899598395e-06, "loss": 0.2835, "step": 28200 }, { "epoch": 0.2264, "grad_norm": 11.974363327026367, "learning_rate": 7.768192771084337e-06, "loss": 0.279, "step": 28300 }, { "epoch": 0.2272, "grad_norm": 8.101808547973633, "learning_rate": 7.760160642570281e-06, "loss": 0.2934, "step": 28400 }, { "epoch": 0.228, "grad_norm": 6.571002006530762, "learning_rate": 7.752128514056225e-06, "loss": 0.2728, "step": 28500 }, { "epoch": 0.2288, "grad_norm": 5.396939277648926, "learning_rate": 7.744096385542169e-06, "loss": 0.2606, "step": 28600 }, { "epoch": 0.2296, "grad_norm": 11.369711875915527, "learning_rate": 7.736144578313253e-06, "loss": 0.2621, "step": 28700 }, { "epoch": 0.2304, "grad_norm": 11.154069900512695, "learning_rate": 7.728112449799197e-06, "loss": 0.2668, "step": 28800 }, { "epoch": 0.2312, "grad_norm": 9.739912033081055, "learning_rate": 7.720080321285141e-06, "loss": 0.2448, "step": 28900 }, { "epoch": 0.232, "grad_norm": 7.0749125480651855, "learning_rate": 7.712048192771085e-06, "loss": 0.2571, "step": 29000 }, { "epoch": 0.2328, "grad_norm": 11.714834213256836, "learning_rate": 7.704016064257029e-06, "loss": 0.2687, "step": 29100 }, { "epoch": 0.2336, "grad_norm": 7.279887676239014, "learning_rate": 7.695983935742973e-06, "loss": 0.2606, "step": 29200 }, { "epoch": 0.2344, "grad_norm": 13.882575035095215, "learning_rate": 7.687951807228917e-06, "loss": 0.2906, "step": 29300 }, { "epoch": 0.2352, "grad_norm": 11.140130996704102, "learning_rate": 7.67991967871486e-06, "loss": 0.2795, "step": 29400 }, { "epoch": 0.236, "grad_norm": 8.242719650268555, "learning_rate": 7.671887550200805e-06, "loss": 0.2531, "step": 29500 }, { "epoch": 0.2368, "grad_norm": 3.9837567806243896, "learning_rate": 7.663855421686748e-06, "loss": 0.2684, "step": 29600 }, { "epoch": 0.2376, "grad_norm": 4.7640275955200195, "learning_rate": 7.655823293172692e-06, "loss": 0.2812, "step": 29700 }, { "epoch": 0.2384, "grad_norm": 5.323102951049805, "learning_rate": 7.647791164658636e-06, "loss": 0.2742, "step": 29800 }, { "epoch": 0.2392, "grad_norm": 5.6695966720581055, "learning_rate": 7.63975903614458e-06, "loss": 0.2662, "step": 29900 }, { "epoch": 0.24, "grad_norm": 11.021814346313477, "learning_rate": 7.631726907630522e-06, "loss": 0.2591, "step": 30000 }, { "epoch": 0.2408, "grad_norm": 7.066900253295898, "learning_rate": 7.623694779116466e-06, "loss": 0.2799, "step": 30100 }, { "epoch": 0.2416, "grad_norm": 7.006994247436523, "learning_rate": 7.61566265060241e-06, "loss": 0.2763, "step": 30200 }, { "epoch": 0.2424, "grad_norm": 6.179232597351074, "learning_rate": 7.607630522088354e-06, "loss": 0.2607, "step": 30300 }, { "epoch": 0.2432, "grad_norm": 11.01894760131836, "learning_rate": 7.599598393574298e-06, "loss": 0.2892, "step": 30400 }, { "epoch": 0.244, "grad_norm": 8.360103607177734, "learning_rate": 7.591566265060242e-06, "loss": 0.2682, "step": 30500 }, { "epoch": 0.2448, "grad_norm": 12.027982711791992, "learning_rate": 7.583614457831326e-06, "loss": 0.2795, "step": 30600 }, { "epoch": 0.2456, "grad_norm": 9.705676078796387, "learning_rate": 7.5755823293172694e-06, "loss": 0.2604, "step": 30700 }, { "epoch": 0.2464, "grad_norm": 78.23721313476562, "learning_rate": 7.567550200803213e-06, "loss": 0.2744, "step": 30800 }, { "epoch": 0.2472, "grad_norm": 7.072327613830566, "learning_rate": 7.559518072289157e-06, "loss": 0.2518, "step": 30900 }, { "epoch": 0.248, "grad_norm": 6.551907062530518, "learning_rate": 7.551485943775101e-06, "loss": 0.252, "step": 31000 }, { "epoch": 0.2488, "grad_norm": 7.861015796661377, "learning_rate": 7.543453815261045e-06, "loss": 0.2616, "step": 31100 }, { "epoch": 0.2496, "grad_norm": 13.113914489746094, "learning_rate": 7.535421686746989e-06, "loss": 0.2929, "step": 31200 }, { "epoch": 0.2504, "grad_norm": 10.613142967224121, "learning_rate": 7.527389558232933e-06, "loss": 0.2603, "step": 31300 }, { "epoch": 0.2512, "grad_norm": 9.165702819824219, "learning_rate": 7.519357429718877e-06, "loss": 0.2633, "step": 31400 }, { "epoch": 0.252, "grad_norm": 5.82572078704834, "learning_rate": 7.511325301204821e-06, "loss": 0.2553, "step": 31500 }, { "epoch": 0.2528, "grad_norm": 4.581362247467041, "learning_rate": 7.503293172690763e-06, "loss": 0.2735, "step": 31600 }, { "epoch": 0.2536, "grad_norm": 6.59524393081665, "learning_rate": 7.495261044176707e-06, "loss": 0.2667, "step": 31700 }, { "epoch": 0.2544, "grad_norm": 10.484724044799805, "learning_rate": 7.487228915662651e-06, "loss": 0.2679, "step": 31800 }, { "epoch": 0.2552, "grad_norm": 8.551301002502441, "learning_rate": 7.479196787148595e-06, "loss": 0.2538, "step": 31900 }, { "epoch": 0.256, "grad_norm": 6.701406002044678, "learning_rate": 7.471164658634539e-06, "loss": 0.2507, "step": 32000 }, { "epoch": 0.256, "eval_test1_cer": 0.08450033609679587, "eval_test1_cer_norm": 0.058093019905395345, "eval_test1_loss": 0.20838095247745514, "eval_test1_runtime": 3620.5796, "eval_test1_samples_per_second": 0.69, "eval_test1_steps_per_second": 0.173, "eval_test1_wer": 0.20155106562873554, "eval_test1_wer_norm": 0.13854875946111808, "step": 32000 }, { "epoch": 0.256, "eval_test2_cer": 0.17676018217792214, "eval_test2_cer_norm": 0.13646575766966224, "eval_test2_loss": 0.35614562034606934, "eval_test2_runtime": 3787.2574, "eval_test2_samples_per_second": 0.66, "eval_test2_steps_per_second": 0.165, "eval_test2_wer": 0.3315690089265278, "eval_test2_wer_norm": 0.26286385514554206, "step": 32000 }, { "epoch": 0.2568, "grad_norm": 9.485516548156738, "learning_rate": 7.463132530120483e-06, "loss": 0.2773, "step": 32100 }, { "epoch": 0.2576, "grad_norm": 7.148492336273193, "learning_rate": 7.4551004016064265e-06, "loss": 0.2609, "step": 32200 }, { "epoch": 0.2584, "grad_norm": 7.948647499084473, "learning_rate": 7.4470682730923705e-06, "loss": 0.2901, "step": 32300 }, { "epoch": 0.2592, "grad_norm": 11.446524620056152, "learning_rate": 7.439036144578314e-06, "loss": 0.2531, "step": 32400 }, { "epoch": 0.26, "grad_norm": 7.954312324523926, "learning_rate": 7.4310040160642574e-06, "loss": 0.2474, "step": 32500 }, { "epoch": 0.2608, "grad_norm": 6.527101039886475, "learning_rate": 7.422971887550201e-06, "loss": 0.2713, "step": 32600 }, { "epoch": 0.2616, "grad_norm": 4.255068302154541, "learning_rate": 7.414939759036145e-06, "loss": 0.2774, "step": 32700 }, { "epoch": 0.2624, "grad_norm": 12.29005241394043, "learning_rate": 7.406907630522089e-06, "loss": 0.2869, "step": 32800 }, { "epoch": 0.2632, "grad_norm": 7.255998611450195, "learning_rate": 7.398875502008033e-06, "loss": 0.2776, "step": 32900 }, { "epoch": 0.264, "grad_norm": 5.739976406097412, "learning_rate": 7.390843373493977e-06, "loss": 0.2658, "step": 33000 }, { "epoch": 0.2648, "grad_norm": 9.824767112731934, "learning_rate": 7.38281124497992e-06, "loss": 0.2741, "step": 33100 }, { "epoch": 0.2656, "grad_norm": 5.075085163116455, "learning_rate": 7.374779116465864e-06, "loss": 0.2626, "step": 33200 }, { "epoch": 0.2664, "grad_norm": 11.58659839630127, "learning_rate": 7.366746987951808e-06, "loss": 0.2834, "step": 33300 }, { "epoch": 0.2672, "grad_norm": 3.7526087760925293, "learning_rate": 7.358795180722892e-06, "loss": 0.2896, "step": 33400 }, { "epoch": 0.268, "grad_norm": 5.333492755889893, "learning_rate": 7.350763052208836e-06, "loss": 0.2575, "step": 33500 }, { "epoch": 0.2688, "grad_norm": 8.13162612915039, "learning_rate": 7.34273092369478e-06, "loss": 0.2513, "step": 33600 }, { "epoch": 0.2696, "grad_norm": 8.09466552734375, "learning_rate": 7.334698795180723e-06, "loss": 0.3005, "step": 33700 }, { "epoch": 0.2704, "grad_norm": 4.5985331535339355, "learning_rate": 7.326666666666667e-06, "loss": 0.3217, "step": 33800 }, { "epoch": 0.2712, "grad_norm": 6.758331775665283, "learning_rate": 7.318634538152611e-06, "loss": 0.3054, "step": 33900 }, { "epoch": 0.272, "grad_norm": 9.720072746276855, "learning_rate": 7.310602409638555e-06, "loss": 0.2554, "step": 34000 }, { "epoch": 0.2728, "grad_norm": 10.393074989318848, "learning_rate": 7.302570281124498e-06, "loss": 0.2348, "step": 34100 }, { "epoch": 0.2736, "grad_norm": 5.730389595031738, "learning_rate": 7.294538152610442e-06, "loss": 0.264, "step": 34200 }, { "epoch": 0.2744, "grad_norm": 9.347960472106934, "learning_rate": 7.286506024096386e-06, "loss": 0.3121, "step": 34300 }, { "epoch": 0.2752, "grad_norm": 5.9382171630859375, "learning_rate": 7.27847389558233e-06, "loss": 0.256, "step": 34400 }, { "epoch": 0.276, "grad_norm": 13.730400085449219, "learning_rate": 7.270441767068274e-06, "loss": 0.2474, "step": 34500 }, { "epoch": 0.2768, "grad_norm": 5.6664347648620605, "learning_rate": 7.262409638554218e-06, "loss": 0.2642, "step": 34600 }, { "epoch": 0.2776, "grad_norm": 4.812320232391357, "learning_rate": 7.2543775100401615e-06, "loss": 0.2697, "step": 34700 }, { "epoch": 0.2784, "grad_norm": 6.524253845214844, "learning_rate": 7.2463453815261055e-06, "loss": 0.2511, "step": 34800 }, { "epoch": 0.2792, "grad_norm": 10.69779109954834, "learning_rate": 7.238313253012049e-06, "loss": 0.2455, "step": 34900 }, { "epoch": 0.28, "grad_norm": 6.162754535675049, "learning_rate": 7.230281124497992e-06, "loss": 0.3028, "step": 35000 }, { "epoch": 0.2808, "grad_norm": 10.023324966430664, "learning_rate": 7.2222489959839355e-06, "loss": 0.2618, "step": 35100 }, { "epoch": 0.2816, "grad_norm": 7.126629829406738, "learning_rate": 7.2142168674698794e-06, "loss": 0.2482, "step": 35200 }, { "epoch": 0.2824, "grad_norm": 5.309641361236572, "learning_rate": 7.206184738955823e-06, "loss": 0.2641, "step": 35300 }, { "epoch": 0.2832, "grad_norm": 8.043412208557129, "learning_rate": 7.198152610441767e-06, "loss": 0.2779, "step": 35400 }, { "epoch": 0.284, "grad_norm": 6.982008934020996, "learning_rate": 7.190120481927711e-06, "loss": 0.2519, "step": 35500 }, { "epoch": 0.2848, "grad_norm": 14.100213050842285, "learning_rate": 7.182088353413655e-06, "loss": 0.2621, "step": 35600 }, { "epoch": 0.2856, "grad_norm": 7.013679027557373, "learning_rate": 7.174056224899599e-06, "loss": 0.2511, "step": 35700 }, { "epoch": 0.2864, "grad_norm": 12.395895004272461, "learning_rate": 7.166024096385543e-06, "loss": 0.2574, "step": 35800 }, { "epoch": 0.2872, "grad_norm": 5.958752155303955, "learning_rate": 7.157991967871486e-06, "loss": 0.2376, "step": 35900 }, { "epoch": 0.288, "grad_norm": 7.98759126663208, "learning_rate": 7.14995983935743e-06, "loss": 0.2558, "step": 36000 }, { "epoch": 0.288, "eval_test1_cer": 0.09376167002763462, "eval_test1_cer_norm": 0.06997382764664922, "eval_test1_loss": 0.2042306363582611, "eval_test1_runtime": 3524.6047, "eval_test1_samples_per_second": 0.709, "eval_test1_steps_per_second": 0.177, "eval_test1_wer": 0.2142628064958162, "eval_test1_wer_norm": 0.15251760717729917, "step": 36000 }, { "epoch": 0.288, "eval_test2_cer": 0.18322787172882368, "eval_test2_cer_norm": 0.1433607065385807, "eval_test2_loss": 0.34622320532798767, "eval_test2_runtime": 2531.2967, "eval_test2_samples_per_second": 0.988, "eval_test2_steps_per_second": 0.247, "eval_test2_wer": 0.3355745021744106, "eval_test2_wer_norm": 0.2676197570479028, "step": 36000 }, { "epoch": 0.2888, "grad_norm": 7.333773612976074, "learning_rate": 7.141927710843374e-06, "loss": 0.2581, "step": 36100 }, { "epoch": 0.2896, "grad_norm": 6.082370758056641, "learning_rate": 7.133895582329318e-06, "loss": 0.2444, "step": 36200 }, { "epoch": 0.2904, "grad_norm": 7.414235591888428, "learning_rate": 7.125863453815262e-06, "loss": 0.2501, "step": 36300 }, { "epoch": 0.2912, "grad_norm": 7.523865699768066, "learning_rate": 7.117831325301206e-06, "loss": 0.2732, "step": 36400 }, { "epoch": 0.292, "grad_norm": 4.851339817047119, "learning_rate": 7.1097991967871496e-06, "loss": 0.2674, "step": 36500 }, { "epoch": 0.2928, "grad_norm": 14.28159236907959, "learning_rate": 7.1017670682730935e-06, "loss": 0.2652, "step": 36600 }, { "epoch": 0.2936, "grad_norm": 9.865965843200684, "learning_rate": 7.093734939759037e-06, "loss": 0.2586, "step": 36700 }, { "epoch": 0.2944, "grad_norm": 8.199728965759277, "learning_rate": 7.085702811244981e-06, "loss": 0.2606, "step": 36800 }, { "epoch": 0.2952, "grad_norm": 6.126343727111816, "learning_rate": 7.0776706827309235e-06, "loss": 0.2244, "step": 36900 }, { "epoch": 0.296, "grad_norm": 12.142101287841797, "learning_rate": 7.0696385542168675e-06, "loss": 0.2737, "step": 37000 }, { "epoch": 0.2968, "grad_norm": 18.582122802734375, "learning_rate": 7.061606425702811e-06, "loss": 0.2617, "step": 37100 }, { "epoch": 0.2976, "grad_norm": 6.676758766174316, "learning_rate": 7.053574297188755e-06, "loss": 0.2679, "step": 37200 }, { "epoch": 0.2984, "grad_norm": 11.17313003540039, "learning_rate": 7.045542168674699e-06, "loss": 0.2896, "step": 37300 }, { "epoch": 0.2992, "grad_norm": 6.7832159996032715, "learning_rate": 7.037510040160643e-06, "loss": 0.2645, "step": 37400 }, { "epoch": 0.3, "grad_norm": 7.81158971786499, "learning_rate": 7.029558232931728e-06, "loss": 0.2486, "step": 37500 }, { "epoch": 0.3008, "grad_norm": 5.17408561706543, "learning_rate": 7.0215261044176705e-06, "loss": 0.274, "step": 37600 }, { "epoch": 0.3016, "grad_norm": 4.426403045654297, "learning_rate": 7.0134939759036144e-06, "loss": 0.2574, "step": 37700 }, { "epoch": 0.3024, "grad_norm": 4.935957908630371, "learning_rate": 7.005461847389558e-06, "loss": 0.239, "step": 37800 }, { "epoch": 0.3032, "grad_norm": 5.82489013671875, "learning_rate": 6.997429718875502e-06, "loss": 0.2549, "step": 37900 }, { "epoch": 0.304, "grad_norm": 5.054333209991455, "learning_rate": 6.989397590361446e-06, "loss": 0.2675, "step": 38000 }, { "epoch": 0.3048, "grad_norm": 9.367691040039062, "learning_rate": 6.98136546184739e-06, "loss": 0.2699, "step": 38100 }, { "epoch": 0.3056, "grad_norm": 11.023392677307129, "learning_rate": 6.973333333333334e-06, "loss": 0.2483, "step": 38200 }, { "epoch": 0.3064, "grad_norm": 12.55837345123291, "learning_rate": 6.965301204819278e-06, "loss": 0.2557, "step": 38300 }, { "epoch": 0.3072, "grad_norm": 5.479390621185303, "learning_rate": 6.957269076305222e-06, "loss": 0.2625, "step": 38400 }, { "epoch": 0.308, "grad_norm": 11.671854972839355, "learning_rate": 6.949236947791165e-06, "loss": 0.2608, "step": 38500 }, { "epoch": 0.3088, "grad_norm": 5.811091423034668, "learning_rate": 6.941204819277109e-06, "loss": 0.2671, "step": 38600 }, { "epoch": 0.3096, "grad_norm": 5.157212734222412, "learning_rate": 6.933172690763053e-06, "loss": 0.2701, "step": 38700 }, { "epoch": 0.3104, "grad_norm": 6.742706775665283, "learning_rate": 6.925140562248997e-06, "loss": 0.264, "step": 38800 }, { "epoch": 0.3112, "grad_norm": 10.843968391418457, "learning_rate": 6.917108433734941e-06, "loss": 0.2476, "step": 38900 }, { "epoch": 0.312, "grad_norm": 5.495096206665039, "learning_rate": 6.9090763052208846e-06, "loss": 0.2674, "step": 39000 }, { "epoch": 0.3128, "grad_norm": 11.919147491455078, "learning_rate": 6.901124497991969e-06, "loss": 0.2397, "step": 39100 }, { "epoch": 0.3136, "grad_norm": 9.006637573242188, "learning_rate": 6.893092369477912e-06, "loss": 0.2502, "step": 39200 }, { "epoch": 0.3144, "grad_norm": 4.003146171569824, "learning_rate": 6.885060240963856e-06, "loss": 0.2697, "step": 39300 }, { "epoch": 0.3152, "grad_norm": 10.066797256469727, "learning_rate": 6.8770281124498e-06, "loss": 0.2295, "step": 39400 }, { "epoch": 0.316, "grad_norm": 8.48736572265625, "learning_rate": 6.868995983935744e-06, "loss": 0.2566, "step": 39500 }, { "epoch": 0.3168, "grad_norm": 6.740108966827393, "learning_rate": 6.860963855421688e-06, "loss": 0.2529, "step": 39600 }, { "epoch": 0.3176, "grad_norm": 7.446850299835205, "learning_rate": 6.8529317269076315e-06, "loss": 0.2571, "step": 39700 }, { "epoch": 0.3184, "grad_norm": 4.642506122589111, "learning_rate": 6.8448995983935755e-06, "loss": 0.3017, "step": 39800 }, { "epoch": 0.3192, "grad_norm": 5.234320163726807, "learning_rate": 6.836867469879519e-06, "loss": 0.2668, "step": 39900 }, { "epoch": 0.32, "grad_norm": 7.142499923706055, "learning_rate": 6.828835341365463e-06, "loss": 0.2566, "step": 40000 }, { "epoch": 0.32, "eval_test1_cer": 0.06914724774068265, "eval_test1_cer_norm": 0.04478113669652076, "eval_test1_loss": 0.19836583733558655, "eval_test1_runtime": 2432.2319, "eval_test1_samples_per_second": 1.028, "eval_test1_steps_per_second": 0.257, "eval_test1_wer": 0.17665238053587567, "eval_test1_wer_norm": 0.11657266430930185, "step": 40000 }, { "epoch": 0.32, "eval_test2_cer": 0.15002146563631613, "eval_test2_cer_norm": 0.10912806011775643, "eval_test2_loss": 0.33799564838409424, "eval_test2_runtime": 2481.3184, "eval_test2_samples_per_second": 1.008, "eval_test2_steps_per_second": 0.252, "eval_test2_wer": 0.28587777523460745, "eval_test2_wer_norm": 0.2201466880586752, "step": 40000 }, { "epoch": 0.3208, "grad_norm": 6.141145706176758, "learning_rate": 6.8208032128514055e-06, "loss": 0.2562, "step": 40100 }, { "epoch": 0.3216, "grad_norm": 5.139159679412842, "learning_rate": 6.8127710843373495e-06, "loss": 0.2703, "step": 40200 }, { "epoch": 0.3224, "grad_norm": 7.440242767333984, "learning_rate": 6.804738955823293e-06, "loss": 0.2821, "step": 40300 }, { "epoch": 0.3232, "grad_norm": 6.795882225036621, "learning_rate": 6.796706827309237e-06, "loss": 0.2452, "step": 40400 }, { "epoch": 0.324, "grad_norm": 7.417604446411133, "learning_rate": 6.788674698795181e-06, "loss": 0.2521, "step": 40500 }, { "epoch": 0.3248, "grad_norm": 6.449469089508057, "learning_rate": 6.780642570281125e-06, "loss": 0.2813, "step": 40600 }, { "epoch": 0.3256, "grad_norm": 10.74730110168457, "learning_rate": 6.772610441767069e-06, "loss": 0.2551, "step": 40700 }, { "epoch": 0.3264, "grad_norm": 3.9926106929779053, "learning_rate": 6.764578313253013e-06, "loss": 0.242, "step": 40800 }, { "epoch": 0.3272, "grad_norm": 3.6253511905670166, "learning_rate": 6.756546184738957e-06, "loss": 0.2359, "step": 40900 }, { "epoch": 0.328, "grad_norm": 7.051224231719971, "learning_rate": 6.7485140562249e-06, "loss": 0.2618, "step": 41000 }, { "epoch": 0.3288, "grad_norm": 9.776261329650879, "learning_rate": 6.740481927710844e-06, "loss": 0.2649, "step": 41100 }, { "epoch": 0.3296, "grad_norm": 5.289026260375977, "learning_rate": 6.732449799196788e-06, "loss": 0.2446, "step": 41200 }, { "epoch": 0.3304, "grad_norm": 9.757101058959961, "learning_rate": 6.724417670682732e-06, "loss": 0.2605, "step": 41300 }, { "epoch": 0.3312, "grad_norm": 7.856915473937988, "learning_rate": 6.716385542168675e-06, "loss": 0.2566, "step": 41400 }, { "epoch": 0.332, "grad_norm": 4.344404697418213, "learning_rate": 6.708353413654619e-06, "loss": 0.2482, "step": 41500 }, { "epoch": 0.3328, "grad_norm": 7.893800735473633, "learning_rate": 6.700321285140563e-06, "loss": 0.2555, "step": 41600 }, { "epoch": 0.3336, "grad_norm": 10.901208877563477, "learning_rate": 6.6922891566265066e-06, "loss": 0.2455, "step": 41700 }, { "epoch": 0.3344, "grad_norm": 8.60644817352295, "learning_rate": 6.6842570281124505e-06, "loss": 0.242, "step": 41800 }, { "epoch": 0.3352, "grad_norm": 10.857059478759766, "learning_rate": 6.6762248995983936e-06, "loss": 0.2611, "step": 41900 }, { "epoch": 0.336, "grad_norm": 4.682589054107666, "learning_rate": 6.6681927710843375e-06, "loss": 0.2365, "step": 42000 }, { "epoch": 0.3368, "grad_norm": 10.836357116699219, "learning_rate": 6.660160642570281e-06, "loss": 0.2477, "step": 42100 }, { "epoch": 0.3376, "grad_norm": 4.692202568054199, "learning_rate": 6.652128514056225e-06, "loss": 0.2494, "step": 42200 }, { "epoch": 0.3384, "grad_norm": 10.428234100341797, "learning_rate": 6.644096385542169e-06, "loss": 0.2605, "step": 42300 }, { "epoch": 0.3392, "grad_norm": 4.078035831451416, "learning_rate": 6.636064257028113e-06, "loss": 0.2358, "step": 42400 }, { "epoch": 0.34, "grad_norm": 7.381161689758301, "learning_rate": 6.628032128514057e-06, "loss": 0.2371, "step": 42500 }, { "epoch": 0.3408, "grad_norm": 2.6584036350250244, "learning_rate": 6.620000000000001e-06, "loss": 0.2795, "step": 42600 }, { "epoch": 0.3416, "grad_norm": 10.325010299682617, "learning_rate": 6.611967871485945e-06, "loss": 0.2476, "step": 42700 }, { "epoch": 0.3424, "grad_norm": 6.986746788024902, "learning_rate": 6.603935742971887e-06, "loss": 0.2354, "step": 42800 }, { "epoch": 0.3432, "grad_norm": 6.040923118591309, "learning_rate": 6.595903614457831e-06, "loss": 0.2748, "step": 42900 }, { "epoch": 0.344, "grad_norm": 4.043920993804932, "learning_rate": 6.587871485943775e-06, "loss": 0.2579, "step": 43000 }, { "epoch": 0.3448, "grad_norm": 7.066967964172363, "learning_rate": 6.57991967871486e-06, "loss": 0.2558, "step": 43100 }, { "epoch": 0.3456, "grad_norm": 7.57224702835083, "learning_rate": 6.571887550200804e-06, "loss": 0.2542, "step": 43200 }, { "epoch": 0.3464, "grad_norm": 11.219292640686035, "learning_rate": 6.563855421686748e-06, "loss": 0.2688, "step": 43300 }, { "epoch": 0.3472, "grad_norm": 9.798463821411133, "learning_rate": 6.555823293172692e-06, "loss": 0.2555, "step": 43400 }, { "epoch": 0.348, "grad_norm": 6.011776924133301, "learning_rate": 6.547791164658636e-06, "loss": 0.2517, "step": 43500 }, { "epoch": 0.3488, "grad_norm": 10.116564750671387, "learning_rate": 6.539759036144578e-06, "loss": 0.2585, "step": 43600 }, { "epoch": 0.3496, "grad_norm": 8.014334678649902, "learning_rate": 6.531807228915663e-06, "loss": 0.2386, "step": 43700 }, { "epoch": 0.3504, "grad_norm": 6.700802803039551, "learning_rate": 6.523775100401607e-06, "loss": 0.2412, "step": 43800 }, { "epoch": 0.3512, "grad_norm": 5.972689151763916, "learning_rate": 6.515742971887551e-06, "loss": 0.2461, "step": 43900 }, { "epoch": 0.352, "grad_norm": 6.2880167961120605, "learning_rate": 6.507710843373495e-06, "loss": 0.2535, "step": 44000 }, { "epoch": 0.352, "eval_test1_cer": 0.07431940398834865, "eval_test1_cer_norm": 0.051710807501140536, "eval_test1_loss": 0.19776684045791626, "eval_test1_runtime": 2427.0653, "eval_test1_samples_per_second": 1.03, "eval_test1_steps_per_second": 0.258, "eval_test1_wer": 0.17638998221522492, "eval_test1_wer_norm": 0.11625120547064496, "step": 44000 }, { "epoch": 0.352, "eval_test2_cer": 0.1173377011236794, "eval_test2_cer_norm": 0.09236519987604587, "eval_test2_loss": 0.3366641104221344, "eval_test2_runtime": 2441.4105, "eval_test2_samples_per_second": 1.024, "eval_test2_steps_per_second": 0.256, "eval_test2_wer": 0.2464522774090181, "eval_test2_wer_norm": 0.1820135228054091, "step": 44000 }, { "epoch": 0.3528, "grad_norm": 13.718138694763184, "learning_rate": 6.499678714859439e-06, "loss": 0.2509, "step": 44100 }, { "epoch": 0.3536, "grad_norm": 8.281696319580078, "learning_rate": 6.491646586345383e-06, "loss": 0.2489, "step": 44200 }, { "epoch": 0.3544, "grad_norm": 4.6960225105285645, "learning_rate": 6.483614457831325e-06, "loss": 0.2693, "step": 44300 }, { "epoch": 0.3552, "grad_norm": 6.965161323547363, "learning_rate": 6.475582329317269e-06, "loss": 0.2242, "step": 44400 }, { "epoch": 0.356, "grad_norm": 4.1582722663879395, "learning_rate": 6.467550200803213e-06, "loss": 0.2551, "step": 44500 }, { "epoch": 0.3568, "grad_norm": 6.540956020355225, "learning_rate": 6.459518072289157e-06, "loss": 0.2504, "step": 44600 }, { "epoch": 0.3576, "grad_norm": 8.521894454956055, "learning_rate": 6.451485943775101e-06, "loss": 0.2578, "step": 44700 }, { "epoch": 0.3584, "grad_norm": 9.609855651855469, "learning_rate": 6.443453815261045e-06, "loss": 0.2727, "step": 44800 }, { "epoch": 0.3592, "grad_norm": 7.131938457489014, "learning_rate": 6.4354216867469885e-06, "loss": 0.2519, "step": 44900 }, { "epoch": 0.36, "grad_norm": 3.9930307865142822, "learning_rate": 6.4273895582329325e-06, "loss": 0.246, "step": 45000 }, { "epoch": 0.3608, "grad_norm": 12.197285652160645, "learning_rate": 6.419357429718876e-06, "loss": 0.2592, "step": 45100 }, { "epoch": 0.3616, "grad_norm": 11.38996410369873, "learning_rate": 6.4113253012048195e-06, "loss": 0.2645, "step": 45200 }, { "epoch": 0.3624, "grad_norm": 9.129228591918945, "learning_rate": 6.403293172690763e-06, "loss": 0.254, "step": 45300 }, { "epoch": 0.3632, "grad_norm": 4.596703052520752, "learning_rate": 6.395261044176707e-06, "loss": 0.2555, "step": 45400 }, { "epoch": 0.364, "grad_norm": 4.702436923980713, "learning_rate": 6.387228915662651e-06, "loss": 0.2617, "step": 45500 }, { "epoch": 0.3648, "grad_norm": 6.283945083618164, "learning_rate": 6.379196787148595e-06, "loss": 0.2546, "step": 45600 }, { "epoch": 0.3656, "grad_norm": 8.981857299804688, "learning_rate": 6.371164658634539e-06, "loss": 0.2353, "step": 45700 }, { "epoch": 0.3664, "grad_norm": 7.928671360015869, "learning_rate": 6.363132530120483e-06, "loss": 0.2657, "step": 45800 }, { "epoch": 0.3672, "grad_norm": 7.3669114112854, "learning_rate": 6.355100401606427e-06, "loss": 0.2486, "step": 45900 }, { "epoch": 0.368, "grad_norm": 6.917601108551025, "learning_rate": 6.347068273092371e-06, "loss": 0.2524, "step": 46000 }, { "epoch": 0.3688, "grad_norm": 4.770781517028809, "learning_rate": 6.339036144578313e-06, "loss": 0.2434, "step": 46100 }, { "epoch": 0.3696, "grad_norm": 12.580122947692871, "learning_rate": 6.331004016064257e-06, "loss": 0.2463, "step": 46200 }, { "epoch": 0.3704, "grad_norm": 6.896516799926758, "learning_rate": 6.322971887550201e-06, "loss": 0.2365, "step": 46300 }, { "epoch": 0.3712, "grad_norm": 7.098085880279541, "learning_rate": 6.314939759036145e-06, "loss": 0.2412, "step": 46400 }, { "epoch": 0.372, "grad_norm": 5.216635704040527, "learning_rate": 6.306907630522089e-06, "loss": 0.2209, "step": 46500 }, { "epoch": 0.3728, "grad_norm": 7.694733619689941, "learning_rate": 6.298875502008033e-06, "loss": 0.2256, "step": 46600 }, { "epoch": 0.3736, "grad_norm": 37.76133346557617, "learning_rate": 6.290923694779118e-06, "loss": 0.24, "step": 46700 }, { "epoch": 0.3744, "grad_norm": 4.706324100494385, "learning_rate": 6.28289156626506e-06, "loss": 0.2224, "step": 46800 }, { "epoch": 0.3752, "grad_norm": 9.157281875610352, "learning_rate": 6.274859437751004e-06, "loss": 0.2609, "step": 46900 }, { "epoch": 0.376, "grad_norm": 6.885075092315674, "learning_rate": 6.266827309236948e-06, "loss": 0.2711, "step": 47000 }, { "epoch": 0.3768, "grad_norm": 5.564688205718994, "learning_rate": 6.258795180722892e-06, "loss": 0.2634, "step": 47100 }, { "epoch": 0.3776, "grad_norm": 2.6855292320251465, "learning_rate": 6.250763052208836e-06, "loss": 0.2509, "step": 47200 }, { "epoch": 0.3784, "grad_norm": 9.584918975830078, "learning_rate": 6.24273092369478e-06, "loss": 0.2586, "step": 47300 }, { "epoch": 0.3792, "grad_norm": 9.060691833496094, "learning_rate": 6.2346987951807236e-06, "loss": 0.2658, "step": 47400 }, { "epoch": 0.38, "grad_norm": 3.5146710872650146, "learning_rate": 6.2266666666666675e-06, "loss": 0.2441, "step": 47500 }, { "epoch": 0.3808, "grad_norm": 6.001715183258057, "learning_rate": 6.218634538152611e-06, "loss": 0.2403, "step": 47600 }, { "epoch": 0.3816, "grad_norm": 3.3255221843719482, "learning_rate": 6.2106024096385545e-06, "loss": 0.237, "step": 47700 }, { "epoch": 0.3824, "grad_norm": 9.07324504852295, "learning_rate": 6.202570281124498e-06, "loss": 0.2522, "step": 47800 }, { "epoch": 0.3832, "grad_norm": 10.312811851501465, "learning_rate": 6.194538152610442e-06, "loss": 0.2356, "step": 47900 }, { "epoch": 0.384, "grad_norm": 8.543242454528809, "learning_rate": 6.186506024096386e-06, "loss": 0.25, "step": 48000 }, { "epoch": 0.384, "eval_test1_cer": 0.07607457614459631, "eval_test1_cer_norm": 0.056354599370903063, "eval_test1_loss": 0.19436757266521454, "eval_test1_runtime": 2446.1615, "eval_test1_samples_per_second": 1.022, "eval_test1_steps_per_second": 0.256, "eval_test1_wer": 0.1832123385521444, "eval_test1_wer_norm": 0.12332329992109646, "step": 48000 }, { "epoch": 0.384, "eval_test2_cer": 0.12698323813790272, "eval_test2_cer_norm": 0.10085799504183453, "eval_test2_loss": 0.3315908908843994, "eval_test2_runtime": 2495.6856, "eval_test2_samples_per_second": 1.002, "eval_test2_steps_per_second": 0.25, "eval_test2_wer": 0.25469214923323413, "eval_test2_wer_norm": 0.1900641760256704, "step": 48000 }, { "epoch": 0.3848, "grad_norm": 7.8291215896606445, "learning_rate": 6.17847389558233e-06, "loss": 0.2452, "step": 48100 }, { "epoch": 0.3856, "grad_norm": 4.892631530761719, "learning_rate": 6.170441767068274e-06, "loss": 0.2568, "step": 48200 }, { "epoch": 0.3864, "grad_norm": 9.617656707763672, "learning_rate": 6.162409638554218e-06, "loss": 0.2197, "step": 48300 }, { "epoch": 0.3872, "grad_norm": 9.659303665161133, "learning_rate": 6.154377510040162e-06, "loss": 0.2392, "step": 48400 }, { "epoch": 0.388, "grad_norm": 6.675602912902832, "learning_rate": 6.146345381526105e-06, "loss": 0.2209, "step": 48500 }, { "epoch": 0.3888, "grad_norm": 5.2548298835754395, "learning_rate": 6.138313253012048e-06, "loss": 0.2642, "step": 48600 }, { "epoch": 0.3896, "grad_norm": 5.428624629974365, "learning_rate": 6.130281124497992e-06, "loss": 0.2139, "step": 48700 }, { "epoch": 0.3904, "grad_norm": 10.75515079498291, "learning_rate": 6.122248995983936e-06, "loss": 0.2238, "step": 48800 }, { "epoch": 0.3912, "grad_norm": 9.192462921142578, "learning_rate": 6.11421686746988e-06, "loss": 0.2422, "step": 48900 }, { "epoch": 0.392, "grad_norm": 5.55458927154541, "learning_rate": 6.106184738955824e-06, "loss": 0.2559, "step": 49000 }, { "epoch": 0.3928, "grad_norm": 10.406610488891602, "learning_rate": 6.098152610441768e-06, "loss": 0.2474, "step": 49100 }, { "epoch": 0.3936, "grad_norm": 6.274507999420166, "learning_rate": 6.0901204819277116e-06, "loss": 0.2361, "step": 49200 }, { "epoch": 0.3944, "grad_norm": 10.830498695373535, "learning_rate": 6.0820883534136555e-06, "loss": 0.2409, "step": 49300 }, { "epoch": 0.3952, "grad_norm": 11.912911415100098, "learning_rate": 6.074056224899599e-06, "loss": 0.2467, "step": 49400 }, { "epoch": 0.396, "grad_norm": 8.484502792358398, "learning_rate": 6.0660240963855425e-06, "loss": 0.2411, "step": 49500 }, { "epoch": 0.3968, "grad_norm": 9.443007469177246, "learning_rate": 6.057991967871486e-06, "loss": 0.2449, "step": 49600 }, { "epoch": 0.3976, "grad_norm": 10.93896770477295, "learning_rate": 6.04995983935743e-06, "loss": 0.239, "step": 49700 }, { "epoch": 0.3984, "grad_norm": 7.4916768074035645, "learning_rate": 6.041927710843373e-06, "loss": 0.2349, "step": 49800 }, { "epoch": 0.3992, "grad_norm": 7.075978755950928, "learning_rate": 6.033895582329317e-06, "loss": 0.2418, "step": 49900 }, { "epoch": 0.4, "grad_norm": 5.759922981262207, "learning_rate": 6.025863453815261e-06, "loss": 0.2536, "step": 50000 }, { "epoch": 0.4008, "grad_norm": 10.531188011169434, "learning_rate": 6.017831325301205e-06, "loss": 0.2162, "step": 50100 }, { "epoch": 0.4016, "grad_norm": 7.113009452819824, "learning_rate": 6.009799196787149e-06, "loss": 0.2373, "step": 50200 }, { "epoch": 0.4024, "grad_norm": 8.859579086303711, "learning_rate": 6.001767068273093e-06, "loss": 0.2327, "step": 50300 }, { "epoch": 0.4032, "grad_norm": 5.7207512855529785, "learning_rate": 5.993734939759036e-06, "loss": 0.2605, "step": 50400 }, { "epoch": 0.404, "grad_norm": 5.358285427093506, "learning_rate": 5.98570281124498e-06, "loss": 0.2442, "step": 50500 }, { "epoch": 0.4048, "grad_norm": 9.501456260681152, "learning_rate": 5.977670682730924e-06, "loss": 0.2333, "step": 50600 }, { "epoch": 0.4056, "grad_norm": 9.682687759399414, "learning_rate": 5.969638554216868e-06, "loss": 0.2404, "step": 50700 }, { "epoch": 0.4064, "grad_norm": 6.1207380294799805, "learning_rate": 5.961686746987952e-06, "loss": 0.2401, "step": 50800 }, { "epoch": 0.4072, "grad_norm": 3.812893867492676, "learning_rate": 5.953654618473896e-06, "loss": 0.2411, "step": 50900 }, { "epoch": 0.408, "grad_norm": 8.557660102844238, "learning_rate": 5.94562248995984e-06, "loss": 0.2486, "step": 51000 }, { "epoch": 0.4088, "grad_norm": 8.239738464355469, "learning_rate": 5.937590361445784e-06, "loss": 0.219, "step": 51100 }, { "epoch": 0.4096, "grad_norm": 8.825906753540039, "learning_rate": 5.929558232931727e-06, "loss": 0.2418, "step": 51200 }, { "epoch": 0.4104, "grad_norm": 9.038442611694336, "learning_rate": 5.921526104417671e-06, "loss": 0.2495, "step": 51300 }, { "epoch": 0.4112, "grad_norm": 10.421882629394531, "learning_rate": 5.913493975903615e-06, "loss": 0.2294, "step": 51400 }, { "epoch": 0.412, "grad_norm": 5.004858493804932, "learning_rate": 5.905461847389559e-06, "loss": 0.2553, "step": 51500 }, { "epoch": 0.4128, "grad_norm": 5.850473880767822, "learning_rate": 5.897429718875503e-06, "loss": 0.2443, "step": 51600 }, { "epoch": 0.4136, "grad_norm": 5.483931064605713, "learning_rate": 5.8893975903614466e-06, "loss": 0.2849, "step": 51700 }, { "epoch": 0.4144, "grad_norm": 9.20142650604248, "learning_rate": 5.881445783132531e-06, "loss": 0.2617, "step": 51800 }, { "epoch": 0.4152, "grad_norm": 5.675454139709473, "learning_rate": 5.873413654618474e-06, "loss": 0.2666, "step": 51900 }, { "epoch": 0.416, "grad_norm": 7.959702968597412, "learning_rate": 5.865381526104418e-06, "loss": 0.2286, "step": 52000 }, { "epoch": 0.416, "eval_test1_cer": 0.06314885353648518, "eval_test1_cer_norm": 0.04443057122962038, "eval_test1_loss": 0.1907189041376114, "eval_test1_runtime": 2490.0845, "eval_test1_samples_per_second": 1.004, "eval_test1_steps_per_second": 0.251, "eval_test1_wer": 0.18128808420070555, "eval_test1_wer_norm": 0.12083929980420234, "step": 52000 }, { "epoch": 0.416, "eval_test2_cer": 0.1296011124799343, "eval_test2_cer_norm": 0.09485396653238302, "eval_test2_loss": 0.3308376371860504, "eval_test2_runtime": 3341.4359, "eval_test2_samples_per_second": 0.748, "eval_test2_steps_per_second": 0.187, "eval_test2_wer": 0.2722304875257496, "eval_test2_wer_norm": 0.2069390327756131, "step": 52000 }, { "epoch": 0.4168, "grad_norm": 13.770788192749023, "learning_rate": 5.857349397590362e-06, "loss": 0.2467, "step": 52100 }, { "epoch": 0.4176, "grad_norm": 5.342738151550293, "learning_rate": 5.849317269076306e-06, "loss": 0.233, "step": 52200 }, { "epoch": 0.4184, "grad_norm": 6.316587448120117, "learning_rate": 5.84128514056225e-06, "loss": 0.2513, "step": 52300 }, { "epoch": 0.4192, "grad_norm": 6.252966403961182, "learning_rate": 5.8332530120481936e-06, "loss": 0.2495, "step": 52400 }, { "epoch": 0.42, "grad_norm": 8.304986953735352, "learning_rate": 5.8252208835341375e-06, "loss": 0.2573, "step": 52500 }, { "epoch": 0.4208, "grad_norm": 8.028290748596191, "learning_rate": 5.817188755020081e-06, "loss": 0.2393, "step": 52600 }, { "epoch": 0.4216, "grad_norm": 10.630465507507324, "learning_rate": 5.809156626506025e-06, "loss": 0.2453, "step": 52700 }, { "epoch": 0.4224, "grad_norm": 9.712656021118164, "learning_rate": 5.8011244979919675e-06, "loss": 0.2553, "step": 52800 }, { "epoch": 0.4232, "grad_norm": 9.605881690979004, "learning_rate": 5.7930923694779115e-06, "loss": 0.2393, "step": 52900 }, { "epoch": 0.424, "grad_norm": 10.953594207763672, "learning_rate": 5.785060240963855e-06, "loss": 0.2484, "step": 53000 }, { "epoch": 0.4248, "grad_norm": 4.043101787567139, "learning_rate": 5.777028112449799e-06, "loss": 0.2146, "step": 53100 }, { "epoch": 0.4256, "grad_norm": 8.587929725646973, "learning_rate": 5.768995983935743e-06, "loss": 0.2555, "step": 53200 }, { "epoch": 0.4264, "grad_norm": 6.47918701171875, "learning_rate": 5.760963855421687e-06, "loss": 0.2277, "step": 53300 }, { "epoch": 0.4272, "grad_norm": 6.032763957977295, "learning_rate": 5.752931726907631e-06, "loss": 0.24, "step": 53400 }, { "epoch": 0.428, "grad_norm": 4.708799362182617, "learning_rate": 5.744899598393575e-06, "loss": 0.2262, "step": 53500 }, { "epoch": 0.4288, "grad_norm": 7.626307964324951, "learning_rate": 5.736867469879519e-06, "loss": 0.2344, "step": 53600 }, { "epoch": 0.4296, "grad_norm": 5.346840858459473, "learning_rate": 5.728835341365462e-06, "loss": 0.2071, "step": 53700 }, { "epoch": 0.4304, "grad_norm": 6.508141040802002, "learning_rate": 5.720803212851406e-06, "loss": 0.2275, "step": 53800 }, { "epoch": 0.4312, "grad_norm": 7.617433071136475, "learning_rate": 5.71277108433735e-06, "loss": 0.2359, "step": 53900 }, { "epoch": 0.432, "grad_norm": 5.544378757476807, "learning_rate": 5.704738955823294e-06, "loss": 0.2491, "step": 54000 }, { "epoch": 0.4328, "grad_norm": 5.247679233551025, "learning_rate": 5.696706827309238e-06, "loss": 0.2353, "step": 54100 }, { "epoch": 0.4336, "grad_norm": 9.137139320373535, "learning_rate": 5.688674698795182e-06, "loss": 0.2397, "step": 54200 }, { "epoch": 0.4344, "grad_norm": 6.74301815032959, "learning_rate": 5.6806425702811255e-06, "loss": 0.2602, "step": 54300 }, { "epoch": 0.4352, "grad_norm": 6.35469913482666, "learning_rate": 5.672610441767069e-06, "loss": 0.2515, "step": 54400 }, { "epoch": 0.436, "grad_norm": 10.135574340820312, "learning_rate": 5.664578313253013e-06, "loss": 0.2219, "step": 54500 }, { "epoch": 0.4368, "grad_norm": 5.893723011016846, "learning_rate": 5.6565461847389556e-06, "loss": 0.2569, "step": 54600 }, { "epoch": 0.4376, "grad_norm": 14.050215721130371, "learning_rate": 5.6485140562248995e-06, "loss": 0.2906, "step": 54700 }, { "epoch": 0.4384, "grad_norm": 6.908878326416016, "learning_rate": 5.640481927710843e-06, "loss": 0.2444, "step": 54800 }, { "epoch": 0.4392, "grad_norm": 7.57582950592041, "learning_rate": 5.632449799196787e-06, "loss": 0.2467, "step": 54900 }, { "epoch": 0.44, "grad_norm": 11.190139770507812, "learning_rate": 5.624417670682731e-06, "loss": 0.2593, "step": 55000 }, { "epoch": 0.4408, "grad_norm": 12.42068099975586, "learning_rate": 5.616385542168675e-06, "loss": 0.2453, "step": 55100 }, { "epoch": 0.4416, "grad_norm": 10.345560073852539, "learning_rate": 5.608353413654619e-06, "loss": 0.2411, "step": 55200 }, { "epoch": 0.4424, "grad_norm": 7.91195011138916, "learning_rate": 5.600321285140563e-06, "loss": 0.2414, "step": 55300 }, { "epoch": 0.4432, "grad_norm": 6.820638179779053, "learning_rate": 5.592289156626507e-06, "loss": 0.2363, "step": 55400 }, { "epoch": 0.444, "grad_norm": 4.268233299255371, "learning_rate": 5.58425702811245e-06, "loss": 0.2325, "step": 55500 }, { "epoch": 0.4448, "grad_norm": 8.063896179199219, "learning_rate": 5.576224899598394e-06, "loss": 0.2607, "step": 55600 }, { "epoch": 0.4456, "grad_norm": 3.5322296619415283, "learning_rate": 5.568192771084338e-06, "loss": 0.2346, "step": 55700 }, { "epoch": 0.4464, "grad_norm": 9.244215965270996, "learning_rate": 5.560160642570282e-06, "loss": 0.2155, "step": 55800 }, { "epoch": 0.4472, "grad_norm": 10.747200965881348, "learning_rate": 5.552208835341366e-06, "loss": 0.2351, "step": 55900 }, { "epoch": 0.448, "grad_norm": 14.072000503540039, "learning_rate": 5.54417670682731e-06, "loss": 0.2413, "step": 56000 }, { "epoch": 0.448, "eval_test1_cer": 0.0663977892299649, "eval_test1_cer_norm": 0.0478834009652556, "eval_test1_loss": 0.1882741004228592, "eval_test1_runtime": 3418.3969, "eval_test1_samples_per_second": 0.731, "eval_test1_steps_per_second": 0.183, "eval_test1_wer": 0.16968424735415027, "eval_test1_wer_norm": 0.1093544522049154, "step": 56000 }, { "epoch": 0.448, "eval_test2_cer": 0.13036174263635344, "eval_test2_cer_norm": 0.09973950263402541, "eval_test2_loss": 0.3249567449092865, "eval_test2_runtime": 3504.8089, "eval_test2_samples_per_second": 0.713, "eval_test2_steps_per_second": 0.178, "eval_test2_wer": 0.25629434653238725, "eval_test2_wer_norm": 0.18966307586523035, "step": 56000 }, { "epoch": 0.4488, "grad_norm": 8.576178550720215, "learning_rate": 5.536144578313254e-06, "loss": 0.2404, "step": 56100 }, { "epoch": 0.4496, "grad_norm": 6.107906818389893, "learning_rate": 5.528112449799197e-06, "loss": 0.2217, "step": 56200 }, { "epoch": 0.4504, "grad_norm": 5.073305606842041, "learning_rate": 5.520080321285141e-06, "loss": 0.229, "step": 56300 }, { "epoch": 0.4512, "grad_norm": 5.073732376098633, "learning_rate": 5.512048192771085e-06, "loss": 0.2404, "step": 56400 }, { "epoch": 0.452, "grad_norm": 6.29484748840332, "learning_rate": 5.504016064257029e-06, "loss": 0.2283, "step": 56500 }, { "epoch": 0.4528, "grad_norm": 13.772027015686035, "learning_rate": 5.495983935742973e-06, "loss": 0.2433, "step": 56600 }, { "epoch": 0.4536, "grad_norm": 6.613615989685059, "learning_rate": 5.487951807228917e-06, "loss": 0.2634, "step": 56700 }, { "epoch": 0.4544, "grad_norm": 5.8526082038879395, "learning_rate": 5.480000000000001e-06, "loss": 0.2277, "step": 56800 }, { "epoch": 0.4552, "grad_norm": 6.082682132720947, "learning_rate": 5.471967871485945e-06, "loss": 0.2372, "step": 56900 }, { "epoch": 0.456, "grad_norm": 4.50082540512085, "learning_rate": 5.463935742971888e-06, "loss": 0.2578, "step": 57000 }, { "epoch": 0.4568, "grad_norm": 5.06675910949707, "learning_rate": 5.455903614457832e-06, "loss": 0.2033, "step": 57100 }, { "epoch": 0.4576, "grad_norm": 3.3439102172851562, "learning_rate": 5.447871485943776e-06, "loss": 0.2395, "step": 57200 }, { "epoch": 0.4584, "grad_norm": 12.235926628112793, "learning_rate": 5.43983935742972e-06, "loss": 0.2335, "step": 57300 }, { "epoch": 0.4592, "grad_norm": 6.169180393218994, "learning_rate": 5.431807228915663e-06, "loss": 0.2432, "step": 57400 }, { "epoch": 0.46, "grad_norm": 3.7379937171936035, "learning_rate": 5.423775100401607e-06, "loss": 0.2377, "step": 57500 }, { "epoch": 0.4608, "grad_norm": 6.2007060050964355, "learning_rate": 5.4157429718875506e-06, "loss": 0.2475, "step": 57600 }, { "epoch": 0.4616, "grad_norm": 11.921826362609863, "learning_rate": 5.4077108433734945e-06, "loss": 0.2359, "step": 57700 }, { "epoch": 0.4624, "grad_norm": 4.8144612312316895, "learning_rate": 5.399678714859438e-06, "loss": 0.2222, "step": 57800 }, { "epoch": 0.4632, "grad_norm": 7.309458255767822, "learning_rate": 5.3916465863453815e-06, "loss": 0.2315, "step": 57900 }, { "epoch": 0.464, "grad_norm": 6.312740325927734, "learning_rate": 5.383614457831325e-06, "loss": 0.2557, "step": 58000 }, { "epoch": 0.4648, "grad_norm": 7.315298557281494, "learning_rate": 5.375582329317269e-06, "loss": 0.2673, "step": 58100 }, { "epoch": 0.4656, "grad_norm": 9.65597152709961, "learning_rate": 5.367550200803213e-06, "loss": 0.2417, "step": 58200 }, { "epoch": 0.4664, "grad_norm": 5.868946552276611, "learning_rate": 5.359518072289157e-06, "loss": 0.2289, "step": 58300 }, { "epoch": 0.4672, "grad_norm": 7.116505146026611, "learning_rate": 5.351485943775101e-06, "loss": 0.2238, "step": 58400 }, { "epoch": 0.468, "grad_norm": 4.75971794128418, "learning_rate": 5.343453815261045e-06, "loss": 0.2466, "step": 58500 }, { "epoch": 0.4688, "grad_norm": 5.3648762702941895, "learning_rate": 5.335421686746989e-06, "loss": 0.2539, "step": 58600 }, { "epoch": 0.4696, "grad_norm": 2.8886609077453613, "learning_rate": 5.327389558232933e-06, "loss": 0.2386, "step": 58700 }, { "epoch": 0.4704, "grad_norm": 19.657976150512695, "learning_rate": 5.319357429718875e-06, "loss": 0.2257, "step": 58800 }, { "epoch": 0.4712, "grad_norm": 6.010655879974365, "learning_rate": 5.311325301204819e-06, "loss": 0.2402, "step": 58900 }, { "epoch": 0.472, "grad_norm": 11.614595413208008, "learning_rate": 5.303293172690763e-06, "loss": 0.2422, "step": 59000 }, { "epoch": 0.4728, "grad_norm": 9.773826599121094, "learning_rate": 5.295261044176707e-06, "loss": 0.2224, "step": 59100 }, { "epoch": 0.4736, "grad_norm": 3.5956804752349854, "learning_rate": 5.287228915662651e-06, "loss": 0.2287, "step": 59200 }, { "epoch": 0.4744, "grad_norm": 5.884477615356445, "learning_rate": 5.279196787148595e-06, "loss": 0.2284, "step": 59300 }, { "epoch": 0.4752, "grad_norm": 6.785783290863037, "learning_rate": 5.271164658634539e-06, "loss": 0.2486, "step": 59400 }, { "epoch": 0.476, "grad_norm": 5.1648054122924805, "learning_rate": 5.2631325301204825e-06, "loss": 0.2352, "step": 59500 }, { "epoch": 0.4768, "grad_norm": 7.0386643409729, "learning_rate": 5.255180722891566e-06, "loss": 0.2489, "step": 59600 }, { "epoch": 0.4776, "grad_norm": 8.528487205505371, "learning_rate": 5.24714859437751e-06, "loss": 0.2434, "step": 59700 }, { "epoch": 0.4784, "grad_norm": 10.057316780090332, "learning_rate": 5.239116465863454e-06, "loss": 0.232, "step": 59800 }, { "epoch": 0.4792, "grad_norm": 5.452738285064697, "learning_rate": 5.231084337349398e-06, "loss": 0.2138, "step": 59900 }, { "epoch": 0.48, "grad_norm": 12.630691528320312, "learning_rate": 5.223052208835342e-06, "loss": 0.2406, "step": 60000 }, { "epoch": 0.48, "eval_test1_cer": 0.047455000373440887, "eval_test1_cer_norm": 0.03161812375441208, "eval_test1_loss": 0.18266192078590393, "eval_test1_runtime": 3355.1591, "eval_test1_samples_per_second": 0.745, "eval_test1_steps_per_second": 0.186, "eval_test1_wer": 0.1444065424647949, "eval_test1_wer_norm": 0.08430988632046524, "step": 60000 }, { "epoch": 0.48, "eval_test2_cer": 0.11545245828200246, "eval_test2_cer_norm": 0.0919003718624109, "eval_test2_loss": 0.32027342915534973, "eval_test2_runtime": 3441.9039, "eval_test2_samples_per_second": 0.726, "eval_test2_steps_per_second": 0.182, "eval_test2_wer": 0.24284733348592355, "eval_test2_wer_norm": 0.17731492092596837, "step": 60000 }, { "epoch": 0.4808, "grad_norm": 1.9925609827041626, "learning_rate": 5.2150200803212856e-06, "loss": 0.2353, "step": 60100 }, { "epoch": 0.4816, "grad_norm": 5.7619476318359375, "learning_rate": 5.2069879518072295e-06, "loss": 0.2124, "step": 60200 }, { "epoch": 0.4824, "grad_norm": 6.040704727172852, "learning_rate": 5.198955823293173e-06, "loss": 0.2304, "step": 60300 }, { "epoch": 0.4832, "grad_norm": 14.028429985046387, "learning_rate": 5.1909236947791165e-06, "loss": 0.2549, "step": 60400 }, { "epoch": 0.484, "grad_norm": 14.17192268371582, "learning_rate": 5.18289156626506e-06, "loss": 0.2215, "step": 60500 }, { "epoch": 0.4848, "grad_norm": 8.68266773223877, "learning_rate": 5.174859437751004e-06, "loss": 0.2144, "step": 60600 }, { "epoch": 0.4856, "grad_norm": 7.074737548828125, "learning_rate": 5.166827309236948e-06, "loss": 0.2262, "step": 60700 }, { "epoch": 0.4864, "grad_norm": 7.095781326293945, "learning_rate": 5.158795180722892e-06, "loss": 0.2509, "step": 60800 }, { "epoch": 0.4872, "grad_norm": 14.873151779174805, "learning_rate": 5.150763052208836e-06, "loss": 0.2417, "step": 60900 }, { "epoch": 0.488, "grad_norm": 7.113455295562744, "learning_rate": 5.14273092369478e-06, "loss": 0.2232, "step": 61000 }, { "epoch": 0.4888, "grad_norm": 4.443511009216309, "learning_rate": 5.134698795180724e-06, "loss": 0.2333, "step": 61100 }, { "epoch": 0.4896, "grad_norm": 11.819351196289062, "learning_rate": 5.126666666666668e-06, "loss": 0.2252, "step": 61200 }, { "epoch": 0.4904, "grad_norm": 9.132615089416504, "learning_rate": 5.11863453815261e-06, "loss": 0.2469, "step": 61300 }, { "epoch": 0.4912, "grad_norm": 5.749680042266846, "learning_rate": 5.110602409638554e-06, "loss": 0.2322, "step": 61400 }, { "epoch": 0.492, "grad_norm": 5.154579162597656, "learning_rate": 5.102570281124498e-06, "loss": 0.2293, "step": 61500 }, { "epoch": 0.4928, "grad_norm": 4.018552780151367, "learning_rate": 5.094538152610442e-06, "loss": 0.2087, "step": 61600 }, { "epoch": 0.4936, "grad_norm": 12.570889472961426, "learning_rate": 5.086506024096386e-06, "loss": 0.2374, "step": 61700 }, { "epoch": 0.4944, "grad_norm": 4.278166770935059, "learning_rate": 5.07847389558233e-06, "loss": 0.2298, "step": 61800 }, { "epoch": 1.000696, "grad_norm": 4.555546760559082, "learning_rate": 5.070441767068274e-06, "loss": 0.2077, "step": 61900 }, { "epoch": 1.001496, "grad_norm": 15.487360954284668, "learning_rate": 5.0624096385542175e-06, "loss": 0.2044, "step": 62000 }, { "epoch": 1.002296, "grad_norm": 4.297917366027832, "learning_rate": 5.0543775100401614e-06, "loss": 0.1977, "step": 62100 }, { "epoch": 1.003096, "grad_norm": 8.327834129333496, "learning_rate": 5.0463453815261045e-06, "loss": 0.1749, "step": 62200 }, { "epoch": 1.003896, "grad_norm": 8.811240196228027, "learning_rate": 5.038313253012048e-06, "loss": 0.1868, "step": 62300 }, { "epoch": 1.004696, "grad_norm": 5.721954822540283, "learning_rate": 5.030281124497992e-06, "loss": 0.195, "step": 62400 }, { "epoch": 1.005496, "grad_norm": 7.047698020935059, "learning_rate": 5.022248995983936e-06, "loss": 0.1605, "step": 62500 }, { "epoch": 1.006296, "grad_norm": 6.009364128112793, "learning_rate": 5.0142971887550206e-06, "loss": 0.176, "step": 62600 }, { "epoch": 1.007096, "grad_norm": 5.495716571807861, "learning_rate": 5.0062650602409645e-06, "loss": 0.1991, "step": 62700 }, { "epoch": 1.007896, "grad_norm": 2.433659076690674, "learning_rate": 4.9982329317269076e-06, "loss": 0.1832, "step": 62800 }, { "epoch": 1.008696, "grad_norm": 6.076290607452393, "learning_rate": 4.9902008032128515e-06, "loss": 0.1807, "step": 62900 }, { "epoch": 1.009496, "grad_norm": 5.725612640380859, "learning_rate": 4.982168674698795e-06, "loss": 0.1627, "step": 63000 }, { "epoch": 1.010296, "grad_norm": 4.852512836456299, "learning_rate": 4.974136546184739e-06, "loss": 0.1692, "step": 63100 }, { "epoch": 1.011096, "grad_norm": 6.572232246398926, "learning_rate": 4.966104417670683e-06, "loss": 0.1616, "step": 63200 }, { "epoch": 1.011896, "grad_norm": 9.408519744873047, "learning_rate": 4.958072289156627e-06, "loss": 0.1566, "step": 63300 }, { "epoch": 1.012696, "grad_norm": 4.744565963745117, "learning_rate": 4.950040160642571e-06, "loss": 0.1519, "step": 63400 }, { "epoch": 1.013496, "grad_norm": 13.262666702270508, "learning_rate": 4.942008032128515e-06, "loss": 0.1804, "step": 63500 }, { "epoch": 1.014296, "grad_norm": 3.7456908226013184, "learning_rate": 4.933975903614458e-06, "loss": 0.1703, "step": 63600 }, { "epoch": 1.015096, "grad_norm": 6.4614152908325195, "learning_rate": 4.925943775100402e-06, "loss": 0.1796, "step": 63700 }, { "epoch": 1.015896, "grad_norm": 4.488025665283203, "learning_rate": 4.917911646586346e-06, "loss": 0.1678, "step": 63800 }, { "epoch": 1.016696, "grad_norm": 5.777276992797852, "learning_rate": 4.90987951807229e-06, "loss": 0.1839, "step": 63900 }, { "epoch": 1.017496, "grad_norm": 4.160243034362793, "learning_rate": 4.901847389558234e-06, "loss": 0.1627, "step": 64000 }, { "epoch": 1.017496, "eval_test1_cer": 0.07008084995145268, "eval_test1_cer_norm": 0.050106850433404564, "eval_test1_loss": 0.18447566032409668, "eval_test1_runtime": 3416.4242, "eval_test1_samples_per_second": 0.732, "eval_test1_steps_per_second": 0.183, "eval_test1_wer": 0.17367853290183388, "eval_test1_wer_norm": 0.11408866419240772, "step": 64000 }, { "epoch": 1.017496, "eval_test2_cer": 0.14694627991189757, "eval_test2_cer_norm": 0.11825515184381778, "eval_test2_loss": 0.3210515081882477, "eval_test2_runtime": 3532.7138, "eval_test2_samples_per_second": 0.708, "eval_test2_steps_per_second": 0.177, "eval_test2_wer": 0.2882810711833371, "eval_test2_wer_norm": 0.22352738941095576, "step": 64000 }, { "epoch": 1.018296, "grad_norm": 5.366184234619141, "learning_rate": 4.893815261044177e-06, "loss": 0.1663, "step": 64100 }, { "epoch": 1.019096, "grad_norm": 3.319096326828003, "learning_rate": 4.885783132530121e-06, "loss": 0.1597, "step": 64200 }, { "epoch": 1.019896, "grad_norm": 2.7266533374786377, "learning_rate": 4.877751004016065e-06, "loss": 0.1636, "step": 64300 }, { "epoch": 1.020696, "grad_norm": 5.649815082550049, "learning_rate": 4.869718875502009e-06, "loss": 0.1719, "step": 64400 }, { "epoch": 1.021496, "grad_norm": 9.476170539855957, "learning_rate": 4.861686746987952e-06, "loss": 0.1612, "step": 64500 }, { "epoch": 1.022296, "grad_norm": 6.443653106689453, "learning_rate": 4.853654618473896e-06, "loss": 0.1589, "step": 64600 }, { "epoch": 1.023096, "grad_norm": 4.278164863586426, "learning_rate": 4.8456224899598395e-06, "loss": 0.1575, "step": 64700 }, { "epoch": 1.023896, "grad_norm": 7.513009071350098, "learning_rate": 4.837590361445783e-06, "loss": 0.1678, "step": 64800 }, { "epoch": 1.024696, "grad_norm": 7.260671615600586, "learning_rate": 4.829558232931727e-06, "loss": 0.1605, "step": 64900 }, { "epoch": 1.025496, "grad_norm": 5.455900192260742, "learning_rate": 4.821526104417671e-06, "loss": 0.1555, "step": 65000 }, { "epoch": 1.026296, "grad_norm": 11.086517333984375, "learning_rate": 4.813493975903615e-06, "loss": 0.165, "step": 65100 }, { "epoch": 1.027096, "grad_norm": 4.682340145111084, "learning_rate": 4.805461847389558e-06, "loss": 0.1791, "step": 65200 }, { "epoch": 1.027896, "grad_norm": 8.538371086120605, "learning_rate": 4.797429718875502e-06, "loss": 0.1656, "step": 65300 }, { "epoch": 1.028696, "grad_norm": 6.110259532928467, "learning_rate": 4.789397590361446e-06, "loss": 0.1564, "step": 65400 }, { "epoch": 1.029496, "grad_norm": 6.267505168914795, "learning_rate": 4.78136546184739e-06, "loss": 0.1624, "step": 65500 }, { "epoch": 1.030296, "grad_norm": 7.841426372528076, "learning_rate": 4.773333333333334e-06, "loss": 0.1497, "step": 65600 }, { "epoch": 1.031096, "grad_norm": 3.335782051086426, "learning_rate": 4.765301204819278e-06, "loss": 0.1684, "step": 65700 }, { "epoch": 1.031896, "grad_norm": 6.293437480926514, "learning_rate": 4.757269076305222e-06, "loss": 0.1687, "step": 65800 }, { "epoch": 1.032696, "grad_norm": 9.889056205749512, "learning_rate": 4.749236947791165e-06, "loss": 0.1605, "step": 65900 }, { "epoch": 1.033496, "grad_norm": 7.943349838256836, "learning_rate": 4.741204819277109e-06, "loss": 0.1781, "step": 66000 }, { "epoch": 1.034296, "grad_norm": 9.100912094116211, "learning_rate": 4.733172690763053e-06, "loss": 0.1789, "step": 66100 }, { "epoch": 1.035096, "grad_norm": 5.020230293273926, "learning_rate": 4.725140562248997e-06, "loss": 0.1679, "step": 66200 }, { "epoch": 1.035896, "grad_norm": 3.7011711597442627, "learning_rate": 4.71710843373494e-06, "loss": 0.1617, "step": 66300 }, { "epoch": 1.036696, "grad_norm": 13.44151782989502, "learning_rate": 4.709076305220884e-06, "loss": 0.1717, "step": 66400 }, { "epoch": 1.037496, "grad_norm": 5.3160319328308105, "learning_rate": 4.7010441767068275e-06, "loss": 0.18, "step": 66500 }, { "epoch": 1.0382959999999999, "grad_norm": 9.991889953613281, "learning_rate": 4.693092369477912e-06, "loss": 0.1698, "step": 66600 }, { "epoch": 1.039096, "grad_norm": 5.639313697814941, "learning_rate": 4.685060240963856e-06, "loss": 0.1595, "step": 66700 }, { "epoch": 1.039896, "grad_norm": 4.916014194488525, "learning_rate": 4.6770281124498e-06, "loss": 0.1703, "step": 66800 }, { "epoch": 1.040696, "grad_norm": 5.738508701324463, "learning_rate": 4.668995983935744e-06, "loss": 0.1864, "step": 66900 }, { "epoch": 1.041496, "grad_norm": 8.484038352966309, "learning_rate": 4.6609638554216875e-06, "loss": 0.1587, "step": 67000 }, { "epoch": 1.042296, "grad_norm": 6.696768283843994, "learning_rate": 4.652931726907631e-06, "loss": 0.1601, "step": 67100 }, { "epoch": 1.043096, "grad_norm": 3.6157071590423584, "learning_rate": 4.6448995983935745e-06, "loss": 0.1657, "step": 67200 }, { "epoch": 1.043896, "grad_norm": 6.275000095367432, "learning_rate": 4.6368674698795184e-06, "loss": 0.1559, "step": 67300 }, { "epoch": 1.044696, "grad_norm": 11.618382453918457, "learning_rate": 4.628835341365462e-06, "loss": 0.1636, "step": 67400 }, { "epoch": 1.045496, "grad_norm": 3.8046157360076904, "learning_rate": 4.620803212851405e-06, "loss": 0.165, "step": 67500 }, { "epoch": 1.046296, "grad_norm": 7.296670913696289, "learning_rate": 4.612771084337349e-06, "loss": 0.1692, "step": 67600 }, { "epoch": 1.047096, "grad_norm": 5.143111228942871, "learning_rate": 4.604738955823293e-06, "loss": 0.1567, "step": 67700 }, { "epoch": 1.047896, "grad_norm": 6.818264961242676, "learning_rate": 4.596706827309237e-06, "loss": 0.1569, "step": 67800 }, { "epoch": 1.048696, "grad_norm": 3.8615856170654297, "learning_rate": 4.588674698795181e-06, "loss": 0.1646, "step": 67900 }, { "epoch": 1.049496, "grad_norm": 4.350166320800781, "learning_rate": 4.580642570281125e-06, "loss": 0.1562, "step": 68000 }, { "epoch": 1.049496, "eval_test1_cer": 0.04518167899021585, "eval_test1_cer_norm": 0.02964919441976613, "eval_test1_loss": 0.18461571633815765, "eval_test1_runtime": 3355.9439, "eval_test1_samples_per_second": 0.745, "eval_test1_steps_per_second": 0.186, "eval_test1_wer": 0.1442899209889501, "eval_test1_wer_norm": 0.08173821561121014, "step": 68000 }, { "epoch": 1.049496, "eval_test2_cer": 0.08435995072236532, "eval_test2_cer_norm": 0.06550201425472575, "eval_test2_loss": 0.3253004252910614, "eval_test2_runtime": 3365.7371, "eval_test2_samples_per_second": 0.743, "eval_test2_steps_per_second": 0.186, "eval_test2_wer": 0.19904440375371937, "eval_test2_wer_norm": 0.13302200320880128, "step": 68000 }, { "epoch": 1.050296, "grad_norm": 7.118165016174316, "learning_rate": 4.572610441767069e-06, "loss": 0.1473, "step": 68100 }, { "epoch": 1.051096, "grad_norm": 7.220821857452393, "learning_rate": 4.564578313253013e-06, "loss": 0.1617, "step": 68200 }, { "epoch": 1.051896, "grad_norm": 7.148895740509033, "learning_rate": 4.556546184738957e-06, "loss": 0.1629, "step": 68300 }, { "epoch": 1.052696, "grad_norm": 3.8178248405456543, "learning_rate": 4.5485140562249e-06, "loss": 0.181, "step": 68400 }, { "epoch": 1.053496, "grad_norm": 6.640100479125977, "learning_rate": 4.540481927710844e-06, "loss": 0.1545, "step": 68500 }, { "epoch": 1.054296, "grad_norm": 6.102357387542725, "learning_rate": 4.532530120481928e-06, "loss": 0.1716, "step": 68600 }, { "epoch": 1.055096, "grad_norm": 5.749415397644043, "learning_rate": 4.524497991967872e-06, "loss": 0.1419, "step": 68700 }, { "epoch": 1.055896, "grad_norm": 5.71680212020874, "learning_rate": 4.516465863453816e-06, "loss": 0.1487, "step": 68800 }, { "epoch": 1.056696, "grad_norm": 5.209336757659912, "learning_rate": 4.50843373493976e-06, "loss": 0.1722, "step": 68900 }, { "epoch": 1.057496, "grad_norm": 6.00562047958374, "learning_rate": 4.500401606425703e-06, "loss": 0.1659, "step": 69000 }, { "epoch": 1.058296, "grad_norm": 4.806443214416504, "learning_rate": 4.492369477911647e-06, "loss": 0.1499, "step": 69100 }, { "epoch": 1.059096, "grad_norm": 7.675013065338135, "learning_rate": 4.484337349397591e-06, "loss": 0.1624, "step": 69200 }, { "epoch": 1.059896, "grad_norm": 5.567787170410156, "learning_rate": 4.476305220883535e-06, "loss": 0.1572, "step": 69300 }, { "epoch": 1.060696, "grad_norm": 20.12483787536621, "learning_rate": 4.468273092369479e-06, "loss": 0.169, "step": 69400 }, { "epoch": 1.061496, "grad_norm": 11.066375732421875, "learning_rate": 4.4602409638554225e-06, "loss": 0.1618, "step": 69500 }, { "epoch": 1.062296, "grad_norm": 6.3260579109191895, "learning_rate": 4.452208835341366e-06, "loss": 0.1465, "step": 69600 }, { "epoch": 1.063096, "grad_norm": 5.906326770782471, "learning_rate": 4.4441767068273095e-06, "loss": 0.1532, "step": 69700 }, { "epoch": 1.063896, "grad_norm": 4.177192211151123, "learning_rate": 4.4361445783132534e-06, "loss": 0.1624, "step": 69800 }, { "epoch": 1.064696, "grad_norm": 4.535795211791992, "learning_rate": 4.428112449799197e-06, "loss": 0.1637, "step": 69900 }, { "epoch": 1.065496, "grad_norm": 8.556774139404297, "learning_rate": 4.420080321285141e-06, "loss": 0.1512, "step": 70000 }, { "epoch": 1.066296, "grad_norm": 5.175086975097656, "learning_rate": 4.412048192771084e-06, "loss": 0.1487, "step": 70100 }, { "epoch": 1.067096, "grad_norm": 5.598880767822266, "learning_rate": 4.404016064257028e-06, "loss": 0.1604, "step": 70200 }, { "epoch": 1.067896, "grad_norm": 4.312199115753174, "learning_rate": 4.395983935742972e-06, "loss": 0.1752, "step": 70300 }, { "epoch": 1.068696, "grad_norm": 4.8355255126953125, "learning_rate": 4.387951807228916e-06, "loss": 0.1375, "step": 70400 }, { "epoch": 1.069496, "grad_norm": 0.6319503784179688, "learning_rate": 4.379919678714859e-06, "loss": 0.1518, "step": 70500 }, { "epoch": 1.070296, "grad_norm": 7.824057579040527, "learning_rate": 4.371887550200803e-06, "loss": 0.171, "step": 70600 }, { "epoch": 1.071096, "grad_norm": 6.540349960327148, "learning_rate": 4.363855421686747e-06, "loss": 0.146, "step": 70700 }, { "epoch": 1.071896, "grad_norm": 11.684809684753418, "learning_rate": 4.355823293172691e-06, "loss": 0.1504, "step": 70800 }, { "epoch": 1.072696, "grad_norm": 9.533754348754883, "learning_rate": 4.347791164658635e-06, "loss": 0.1548, "step": 70900 }, { "epoch": 1.073496, "grad_norm": 6.979349136352539, "learning_rate": 4.339759036144579e-06, "loss": 0.1608, "step": 71000 }, { "epoch": 1.074296, "grad_norm": 6.119946002960205, "learning_rate": 4.331807228915663e-06, "loss": 0.1642, "step": 71100 }, { "epoch": 1.075096, "grad_norm": 5.234997272491455, "learning_rate": 4.323775100401606e-06, "loss": 0.159, "step": 71200 }, { "epoch": 1.075896, "grad_norm": 11.375402450561523, "learning_rate": 4.31574297188755e-06, "loss": 0.1598, "step": 71300 }, { "epoch": 1.076696, "grad_norm": 6.117208003997803, "learning_rate": 4.307710843373494e-06, "loss": 0.1581, "step": 71400 }, { "epoch": 1.077496, "grad_norm": 5.760785102844238, "learning_rate": 4.299678714859438e-06, "loss": 0.1581, "step": 71500 }, { "epoch": 1.078296, "grad_norm": 12.158953666687012, "learning_rate": 4.291646586345382e-06, "loss": 0.1645, "step": 71600 }, { "epoch": 1.079096, "grad_norm": 3.9681832790374756, "learning_rate": 4.283614457831326e-06, "loss": 0.1664, "step": 71700 }, { "epoch": 1.079896, "grad_norm": 4.026464939117432, "learning_rate": 4.27558232931727e-06, "loss": 0.1478, "step": 71800 }, { "epoch": 1.080696, "grad_norm": 9.842916488647461, "learning_rate": 4.267550200803214e-06, "loss": 0.1516, "step": 71900 }, { "epoch": 1.081496, "grad_norm": 6.5888776779174805, "learning_rate": 4.2595180722891575e-06, "loss": 0.1743, "step": 72000 }, { "epoch": 1.081496, "eval_test1_cer": 0.056235529165733066, "eval_test1_cer_norm": 0.03523423055682282, "eval_test1_loss": 0.18351316452026367, "eval_test1_runtime": 3389.512, "eval_test1_samples_per_second": 0.738, "eval_test1_steps_per_second": 0.184, "eval_test1_wer": 0.15283244409458002, "eval_test1_wer_norm": 0.09255092200239633, "step": 72000 }, { "epoch": 1.081496, "eval_test2_cer": 0.10279239929816703, "eval_test2_cer_norm": 0.0767353579175705, "eval_test2_loss": 0.32321926951408386, "eval_test2_runtime": 3400.9588, "eval_test2_samples_per_second": 0.735, "eval_test2_steps_per_second": 0.184, "eval_test2_wer": 0.21498054474708173, "eval_test2_wer_norm": 0.14843570937428374, "step": 72000 }, { "epoch": 1.082296, "grad_norm": 11.681933403015137, "learning_rate": 4.251485943775101e-06, "loss": 0.1564, "step": 72100 }, { "epoch": 1.083096, "grad_norm": 5.162091255187988, "learning_rate": 4.2434538152610445e-06, "loss": 0.1469, "step": 72200 }, { "epoch": 1.083896, "grad_norm": 7.147501468658447, "learning_rate": 4.2354216867469884e-06, "loss": 0.1663, "step": 72300 }, { "epoch": 1.084696, "grad_norm": 7.40344762802124, "learning_rate": 4.227389558232932e-06, "loss": 0.1284, "step": 72400 }, { "epoch": 1.085496, "grad_norm": 3.5983896255493164, "learning_rate": 4.219357429718876e-06, "loss": 0.1558, "step": 72500 }, { "epoch": 1.086296, "grad_norm": 4.397582054138184, "learning_rate": 4.211325301204819e-06, "loss": 0.1599, "step": 72600 }, { "epoch": 1.087096, "grad_norm": 2.2897703647613525, "learning_rate": 4.203293172690763e-06, "loss": 0.1611, "step": 72700 }, { "epoch": 1.087896, "grad_norm": 4.20655632019043, "learning_rate": 4.195261044176707e-06, "loss": 0.1597, "step": 72800 }, { "epoch": 1.088696, "grad_norm": 4.474186420440674, "learning_rate": 4.187228915662651e-06, "loss": 0.145, "step": 72900 }, { "epoch": 1.089496, "grad_norm": 4.5594401359558105, "learning_rate": 4.179196787148594e-06, "loss": 0.1545, "step": 73000 }, { "epoch": 1.090296, "grad_norm": 3.502408266067505, "learning_rate": 4.171164658634538e-06, "loss": 0.1648, "step": 73100 }, { "epoch": 1.091096, "grad_norm": 2.6792593002319336, "learning_rate": 4.163132530120482e-06, "loss": 0.1491, "step": 73200 }, { "epoch": 1.091896, "grad_norm": 6.028800010681152, "learning_rate": 4.155100401606426e-06, "loss": 0.1544, "step": 73300 }, { "epoch": 1.0926960000000001, "grad_norm": 5.256026268005371, "learning_rate": 4.14706827309237e-06, "loss": 0.1526, "step": 73400 }, { "epoch": 1.093496, "grad_norm": 5.323427677154541, "learning_rate": 4.139036144578314e-06, "loss": 0.1412, "step": 73500 }, { "epoch": 1.094296, "grad_norm": 3.4750475883483887, "learning_rate": 4.131004016064257e-06, "loss": 0.1372, "step": 73600 }, { "epoch": 1.095096, "grad_norm": 2.8323254585266113, "learning_rate": 4.122971887550201e-06, "loss": 0.156, "step": 73700 }, { "epoch": 1.095896, "grad_norm": 11.332598686218262, "learning_rate": 4.114939759036145e-06, "loss": 0.1443, "step": 73800 }, { "epoch": 1.0966960000000001, "grad_norm": 3.0766139030456543, "learning_rate": 4.106907630522089e-06, "loss": 0.1513, "step": 73900 }, { "epoch": 1.097496, "grad_norm": 8.13890266418457, "learning_rate": 4.0988755020080325e-06, "loss": 0.1441, "step": 74000 }, { "epoch": 1.098296, "grad_norm": 6.174729824066162, "learning_rate": 4.0908433734939765e-06, "loss": 0.1465, "step": 74100 }, { "epoch": 1.099096, "grad_norm": 7.782393455505371, "learning_rate": 4.08281124497992e-06, "loss": 0.1525, "step": 74200 }, { "epoch": 1.099896, "grad_norm": 4.48836088180542, "learning_rate": 4.074779116465864e-06, "loss": 0.1419, "step": 74300 }, { "epoch": 1.100696, "grad_norm": 9.527304649353027, "learning_rate": 4.066746987951807e-06, "loss": 0.1471, "step": 74400 }, { "epoch": 1.101496, "grad_norm": 5.195642471313477, "learning_rate": 4.058714859437751e-06, "loss": 0.1454, "step": 74500 }, { "epoch": 1.102296, "grad_norm": 4.529500961303711, "learning_rate": 4.050682730923695e-06, "loss": 0.1582, "step": 74600 }, { "epoch": 1.103096, "grad_norm": 4.841000080108643, "learning_rate": 4.042650602409639e-06, "loss": 0.1521, "step": 74700 }, { "epoch": 1.103896, "grad_norm": 4.943587303161621, "learning_rate": 4.034618473895583e-06, "loss": 0.148, "step": 74800 }, { "epoch": 1.104696, "grad_norm": 7.331528186798096, "learning_rate": 4.026586345381526e-06, "loss": 0.1469, "step": 74900 }, { "epoch": 1.105496, "grad_norm": 8.253586769104004, "learning_rate": 4.01855421686747e-06, "loss": 0.1736, "step": 75000 }, { "epoch": 1.106296, "grad_norm": 6.348284721374512, "learning_rate": 4.010602409638554e-06, "loss": 0.1398, "step": 75100 }, { "epoch": 1.107096, "grad_norm": 2.285827398300171, "learning_rate": 4.002570281124498e-06, "loss": 0.146, "step": 75200 }, { "epoch": 1.107896, "grad_norm": 7.164559364318848, "learning_rate": 3.994538152610442e-06, "loss": 0.1564, "step": 75300 }, { "epoch": 1.108696, "grad_norm": 2.311056137084961, "learning_rate": 3.986506024096386e-06, "loss": 0.1509, "step": 75400 }, { "epoch": 1.109496, "grad_norm": 3.1986663341522217, "learning_rate": 3.97847389558233e-06, "loss": 0.1521, "step": 75500 }, { "epoch": 1.110296, "grad_norm": 10.752972602844238, "learning_rate": 3.970441767068273e-06, "loss": 0.1321, "step": 75600 }, { "epoch": 1.111096, "grad_norm": 5.435462474822998, "learning_rate": 3.962409638554217e-06, "loss": 0.1413, "step": 75700 }, { "epoch": 1.111896, "grad_norm": 3.7016944885253906, "learning_rate": 3.954457831325301e-06, "loss": 0.1661, "step": 75800 }, { "epoch": 1.112696, "grad_norm": 9.687053680419922, "learning_rate": 3.946425702811245e-06, "loss": 0.1516, "step": 75900 }, { "epoch": 1.113496, "grad_norm": 2.699572801589966, "learning_rate": 3.938393574297189e-06, "loss": 0.153, "step": 76000 }, { "epoch": 1.113496, "eval_test1_cer": 0.07078571962058407, "eval_test1_cer_norm": 0.04619780536413187, "eval_test1_loss": 0.18133017420768738, "eval_test1_runtime": 3430.9909, "eval_test1_samples_per_second": 0.729, "eval_test1_steps_per_second": 0.182, "eval_test1_wer": 0.16875127554739205, "eval_test1_wer_norm": 0.11002659341301615, "step": 76000 }, { "epoch": 1.113496, "eval_test2_cer": 0.13182233919438532, "eval_test2_cer_norm": 0.09897931515339324, "eval_test2_loss": 0.32256972789764404, "eval_test2_runtime": 3529.0223, "eval_test2_samples_per_second": 0.708, "eval_test2_steps_per_second": 0.177, "eval_test2_wer": 0.26012817578393227, "eval_test2_wer_norm": 0.19547902819161128, "step": 76000 }, { "epoch": 1.114296, "grad_norm": 4.699991703033447, "learning_rate": 3.930361445783133e-06, "loss": 0.1306, "step": 76100 }, { "epoch": 1.115096, "grad_norm": 5.847876071929932, "learning_rate": 3.922329317269077e-06, "loss": 0.159, "step": 76200 }, { "epoch": 1.115896, "grad_norm": 14.771410942077637, "learning_rate": 3.91429718875502e-06, "loss": 0.1593, "step": 76300 }, { "epoch": 1.116696, "grad_norm": 3.190328598022461, "learning_rate": 3.906265060240964e-06, "loss": 0.1513, "step": 76400 }, { "epoch": 1.117496, "grad_norm": 3.2370731830596924, "learning_rate": 3.898232931726908e-06, "loss": 0.1524, "step": 76500 }, { "epoch": 1.118296, "grad_norm": 3.932393789291382, "learning_rate": 3.890200803212852e-06, "loss": 0.129, "step": 76600 }, { "epoch": 1.119096, "grad_norm": 3.998230457305908, "learning_rate": 3.882168674698796e-06, "loss": 0.1534, "step": 76700 }, { "epoch": 1.119896, "grad_norm": 5.405791282653809, "learning_rate": 3.874136546184739e-06, "loss": 0.1455, "step": 76800 }, { "epoch": 1.120696, "grad_norm": 5.66414737701416, "learning_rate": 3.866104417670683e-06, "loss": 0.1321, "step": 76900 }, { "epoch": 1.121496, "grad_norm": 2.428980827331543, "learning_rate": 3.858072289156627e-06, "loss": 0.1251, "step": 77000 }, { "epoch": 1.122296, "grad_norm": 8.633134841918945, "learning_rate": 3.850040160642571e-06, "loss": 0.1581, "step": 77100 }, { "epoch": 1.123096, "grad_norm": 6.84693717956543, "learning_rate": 3.8420080321285145e-06, "loss": 0.1516, "step": 77200 }, { "epoch": 1.123896, "grad_norm": 1.603908896446228, "learning_rate": 3.8339759036144584e-06, "loss": 0.1614, "step": 77300 }, { "epoch": 1.124696, "grad_norm": 12.588580131530762, "learning_rate": 3.8259437751004015e-06, "loss": 0.1423, "step": 77400 }, { "epoch": 1.125496, "grad_norm": 8.18181324005127, "learning_rate": 3.8179116465863454e-06, "loss": 0.1409, "step": 77500 }, { "epoch": 1.126296, "grad_norm": 4.0206217765808105, "learning_rate": 3.8098795180722898e-06, "loss": 0.1407, "step": 77600 }, { "epoch": 1.1270959999999999, "grad_norm": 3.093519449234009, "learning_rate": 3.8018473895582333e-06, "loss": 0.128, "step": 77700 }, { "epoch": 1.127896, "grad_norm": 5.9990925788879395, "learning_rate": 3.7938152610441768e-06, "loss": 0.138, "step": 77800 }, { "epoch": 1.128696, "grad_norm": 5.512988567352295, "learning_rate": 3.7857831325301207e-06, "loss": 0.1366, "step": 77900 }, { "epoch": 1.129496, "grad_norm": 2.798251152038574, "learning_rate": 3.7777510040160646e-06, "loss": 0.141, "step": 78000 }, { "epoch": 1.130296, "grad_norm": 4.432553768157959, "learning_rate": 3.769718875502008e-06, "loss": 0.1415, "step": 78100 }, { "epoch": 1.1310959999999999, "grad_norm": 4.363057613372803, "learning_rate": 3.761686746987952e-06, "loss": 0.1516, "step": 78200 }, { "epoch": 1.131896, "grad_norm": 8.875186920166016, "learning_rate": 3.753654618473896e-06, "loss": 0.1578, "step": 78300 }, { "epoch": 1.132696, "grad_norm": 5.948533535003662, "learning_rate": 3.74562248995984e-06, "loss": 0.1436, "step": 78400 }, { "epoch": 1.133496, "grad_norm": 3.6585094928741455, "learning_rate": 3.737590361445784e-06, "loss": 0.1452, "step": 78500 }, { "epoch": 1.134296, "grad_norm": 4.998322486877441, "learning_rate": 3.729558232931727e-06, "loss": 0.1395, "step": 78600 }, { "epoch": 1.1350959999999999, "grad_norm": 2.1381640434265137, "learning_rate": 3.7215261044176708e-06, "loss": 0.1386, "step": 78700 }, { "epoch": 1.135896, "grad_norm": 3.784778594970703, "learning_rate": 3.7134939759036147e-06, "loss": 0.1494, "step": 78800 }, { "epoch": 1.136696, "grad_norm": 12.97223949432373, "learning_rate": 3.7054618473895586e-06, "loss": 0.1389, "step": 78900 }, { "epoch": 1.137496, "grad_norm": 2.718700647354126, "learning_rate": 3.697429718875502e-06, "loss": 0.1488, "step": 79000 }, { "epoch": 1.138296, "grad_norm": 5.164200782775879, "learning_rate": 3.689397590361446e-06, "loss": 0.1554, "step": 79100 }, { "epoch": 1.1390959999999999, "grad_norm": 5.673985004425049, "learning_rate": 3.68136546184739e-06, "loss": 0.136, "step": 79200 }, { "epoch": 1.139896, "grad_norm": 2.9805924892425537, "learning_rate": 3.673333333333334e-06, "loss": 0.1394, "step": 79300 }, { "epoch": 1.140696, "grad_norm": 7.971452713012695, "learning_rate": 3.665301204819278e-06, "loss": 0.1515, "step": 79400 }, { "epoch": 1.141496, "grad_norm": 3.0903499126434326, "learning_rate": 3.657269076305221e-06, "loss": 0.1378, "step": 79500 }, { "epoch": 1.142296, "grad_norm": 4.741939067840576, "learning_rate": 3.649236947791165e-06, "loss": 0.1472, "step": 79600 }, { "epoch": 1.143096, "grad_norm": 5.633648872375488, "learning_rate": 3.6412048192771087e-06, "loss": 0.1573, "step": 79700 }, { "epoch": 1.143896, "grad_norm": 3.337674379348755, "learning_rate": 3.633253012048193e-06, "loss": 0.1531, "step": 79800 }, { "epoch": 1.144696, "grad_norm": 15.155500411987305, "learning_rate": 3.625220883534137e-06, "loss": 0.1438, "step": 79900 }, { "epoch": 1.145496, "grad_norm": 7.186131477355957, "learning_rate": 3.617188755020081e-06, "loss": 0.1315, "step": 80000 }, { "epoch": 1.145496, "eval_test1_cer": 0.050181118828889384, "eval_test1_cer_norm": 0.03300117655533412, "eval_test1_loss": 0.17834880948066711, "eval_test1_runtime": 3408.2771, "eval_test1_samples_per_second": 0.734, "eval_test1_steps_per_second": 0.183, "eval_test1_wer": 0.1458643109128546, "eval_test1_wer_norm": 0.08547882755194483, "step": 80000 }, { "epoch": 1.145496, "eval_test2_cer": 0.10706219434800462, "eval_test2_cer_norm": 0.08262802138208862, "eval_test2_loss": 0.3213089108467102, "eval_test2_runtime": 3477.8704, "eval_test2_samples_per_second": 0.719, "eval_test2_steps_per_second": 0.18, "eval_test2_wer": 0.23108834973678188, "eval_test2_wer_norm": 0.16551111620444647, "step": 80000 }, { "epoch": 1.146296, "grad_norm": 5.269469738006592, "learning_rate": 3.6091566265060248e-06, "loss": 0.1244, "step": 80100 }, { "epoch": 1.147096, "grad_norm": 5.666225433349609, "learning_rate": 3.6012048192771087e-06, "loss": 0.13, "step": 80200 }, { "epoch": 1.147896, "grad_norm": 5.9655375480651855, "learning_rate": 3.5931726907630526e-06, "loss": 0.1464, "step": 80300 }, { "epoch": 1.148696, "grad_norm": 2.792882204055786, "learning_rate": 3.5851405622489965e-06, "loss": 0.1482, "step": 80400 }, { "epoch": 1.149496, "grad_norm": 2.3720922470092773, "learning_rate": 3.57710843373494e-06, "loss": 0.1375, "step": 80500 }, { "epoch": 1.150296, "grad_norm": 3.8564910888671875, "learning_rate": 3.569076305220884e-06, "loss": 0.1291, "step": 80600 }, { "epoch": 1.151096, "grad_norm": 5.648325443267822, "learning_rate": 3.561044176706828e-06, "loss": 0.1341, "step": 80700 }, { "epoch": 1.151896, "grad_norm": 8.060567855834961, "learning_rate": 3.5530120481927718e-06, "loss": 0.1285, "step": 80800 }, { "epoch": 1.152696, "grad_norm": 2.7208454608917236, "learning_rate": 3.544979919678715e-06, "loss": 0.1413, "step": 80900 }, { "epoch": 1.153496, "grad_norm": 5.355711460113525, "learning_rate": 3.5369477911646588e-06, "loss": 0.1259, "step": 81000 }, { "epoch": 1.154296, "grad_norm": 7.103275775909424, "learning_rate": 3.5289156626506027e-06, "loss": 0.149, "step": 81100 }, { "epoch": 1.155096, "grad_norm": 3.506634473800659, "learning_rate": 3.5208835341365466e-06, "loss": 0.1414, "step": 81200 }, { "epoch": 1.155896, "grad_norm": 5.049018383026123, "learning_rate": 3.5128514056224905e-06, "loss": 0.1404, "step": 81300 }, { "epoch": 1.156696, "grad_norm": 4.586015701293945, "learning_rate": 3.504819277108434e-06, "loss": 0.1446, "step": 81400 }, { "epoch": 1.157496, "grad_norm": 4.436155796051025, "learning_rate": 3.496787148594378e-06, "loss": 0.1406, "step": 81500 }, { "epoch": 1.158296, "grad_norm": 2.5560381412506104, "learning_rate": 3.4887550200803214e-06, "loss": 0.1384, "step": 81600 }, { "epoch": 1.159096, "grad_norm": 5.3852057456970215, "learning_rate": 3.4807228915662654e-06, "loss": 0.1412, "step": 81700 }, { "epoch": 1.159896, "grad_norm": 5.565433502197266, "learning_rate": 3.472690763052209e-06, "loss": 0.1441, "step": 81800 }, { "epoch": 1.160696, "grad_norm": 10.139933586120605, "learning_rate": 3.4646586345381528e-06, "loss": 0.1387, "step": 81900 }, { "epoch": 1.161496, "grad_norm": 10.769366264343262, "learning_rate": 3.4566265060240967e-06, "loss": 0.1424, "step": 82000 }, { "epoch": 1.162296, "grad_norm": 7.583619594573975, "learning_rate": 3.4485943775100406e-06, "loss": 0.1476, "step": 82100 }, { "epoch": 1.163096, "grad_norm": 3.2200429439544678, "learning_rate": 3.4405622489959845e-06, "loss": 0.1391, "step": 82200 }, { "epoch": 1.163896, "grad_norm": 4.72890567779541, "learning_rate": 3.4325301204819276e-06, "loss": 0.1371, "step": 82300 }, { "epoch": 1.164696, "grad_norm": 3.9485466480255127, "learning_rate": 3.4244979919678715e-06, "loss": 0.1349, "step": 82400 }, { "epoch": 1.165496, "grad_norm": 7.35235071182251, "learning_rate": 3.4164658634538154e-06, "loss": 0.1419, "step": 82500 }, { "epoch": 1.166296, "grad_norm": 7.841335773468018, "learning_rate": 3.4084337349397594e-06, "loss": 0.1459, "step": 82600 }, { "epoch": 1.167096, "grad_norm": 3.344604015350342, "learning_rate": 3.400401606425703e-06, "loss": 0.1198, "step": 82700 }, { "epoch": 1.167896, "grad_norm": 4.77374792098999, "learning_rate": 3.3923694779116468e-06, "loss": 0.1531, "step": 82800 }, { "epoch": 1.168696, "grad_norm": 6.152327537536621, "learning_rate": 3.3843373493975907e-06, "loss": 0.1303, "step": 82900 }, { "epoch": 1.169496, "grad_norm": 5.056685924530029, "learning_rate": 3.3763052208835346e-06, "loss": 0.1479, "step": 83000 }, { "epoch": 1.170296, "grad_norm": 7.288972854614258, "learning_rate": 3.3682730923694785e-06, "loss": 0.1502, "step": 83100 }, { "epoch": 1.171096, "grad_norm": 6.630878448486328, "learning_rate": 3.3602409638554216e-06, "loss": 0.1435, "step": 83200 }, { "epoch": 1.171896, "grad_norm": 6.784937381744385, "learning_rate": 3.3522088353413655e-06, "loss": 0.1445, "step": 83300 }, { "epoch": 1.172696, "grad_norm": 5.813895225524902, "learning_rate": 3.3441767068273095e-06, "loss": 0.1362, "step": 83400 }, { "epoch": 1.173496, "grad_norm": 6.4298415184021, "learning_rate": 3.3361445783132534e-06, "loss": 0.138, "step": 83500 }, { "epoch": 1.174296, "grad_norm": 3.369779586791992, "learning_rate": 3.328112449799197e-06, "loss": 0.1251, "step": 83600 }, { "epoch": 1.175096, "grad_norm": 2.4179258346557617, "learning_rate": 3.320080321285141e-06, "loss": 0.1491, "step": 83700 }, { "epoch": 1.175896, "grad_norm": 6.803851127624512, "learning_rate": 3.3120481927710847e-06, "loss": 0.1209, "step": 83800 }, { "epoch": 1.176696, "grad_norm": 5.81317138671875, "learning_rate": 3.3040160642570286e-06, "loss": 0.1463, "step": 83900 }, { "epoch": 1.177496, "grad_norm": 3.342420816421509, "learning_rate": 3.2959839357429726e-06, "loss": 0.1108, "step": 84000 }, { "epoch": 1.177496, "eval_test1_cer": 0.04321177832549108, "eval_test1_cer_norm": 0.028405407352270273, "eval_test1_loss": 0.17878110706806183, "eval_test1_runtime": 3382.137, "eval_test1_samples_per_second": 0.739, "eval_test1_steps_per_second": 0.185, "eval_test1_wer": 0.13557246566955306, "eval_test1_wer_norm": 0.07484146234548059, "step": 84000 }, { "epoch": 1.177496, "eval_test2_cer": 0.10907343114197185, "eval_test2_cer_norm": 0.08453091106290672, "eval_test2_loss": 0.32212311029434204, "eval_test2_runtime": 3480.5629, "eval_test2_samples_per_second": 0.718, "eval_test2_steps_per_second": 0.18, "eval_test2_wer": 0.2316891737239643, "eval_test2_wer_norm": 0.16588356635342655, "step": 84000 }, { "epoch": 1.178296, "grad_norm": 5.4935503005981445, "learning_rate": 3.2879518072289156e-06, "loss": 0.1458, "step": 84100 }, { "epoch": 1.179096, "grad_norm": 7.605450630187988, "learning_rate": 3.2799196787148595e-06, "loss": 0.1377, "step": 84200 }, { "epoch": 1.179896, "grad_norm": 6.349878311157227, "learning_rate": 3.2719678714859443e-06, "loss": 0.1365, "step": 84300 }, { "epoch": 1.180696, "grad_norm": 6.173891544342041, "learning_rate": 3.2639357429718878e-06, "loss": 0.1394, "step": 84400 }, { "epoch": 1.181496, "grad_norm": 1.916458249092102, "learning_rate": 3.2559036144578317e-06, "loss": 0.1343, "step": 84500 }, { "epoch": 1.182296, "grad_norm": 4.605401039123535, "learning_rate": 3.2478714859437756e-06, "loss": 0.1438, "step": 84600 }, { "epoch": 1.183096, "grad_norm": 10.40539264678955, "learning_rate": 3.2398393574297195e-06, "loss": 0.1426, "step": 84700 }, { "epoch": 1.183896, "grad_norm": 5.316925525665283, "learning_rate": 3.2318072289156626e-06, "loss": 0.1312, "step": 84800 }, { "epoch": 1.184696, "grad_norm": 3.3205056190490723, "learning_rate": 3.2237751004016065e-06, "loss": 0.1339, "step": 84900 }, { "epoch": 1.185496, "grad_norm": 4.223670959472656, "learning_rate": 3.2157429718875504e-06, "loss": 0.1477, "step": 85000 }, { "epoch": 1.186296, "grad_norm": 2.418142318725586, "learning_rate": 3.2077108433734944e-06, "loss": 0.13, "step": 85100 }, { "epoch": 1.187096, "grad_norm": 5.908360958099365, "learning_rate": 3.1996787148594383e-06, "loss": 0.1351, "step": 85200 }, { "epoch": 1.187896, "grad_norm": 3.79148268699646, "learning_rate": 3.1916465863453818e-06, "loss": 0.1193, "step": 85300 }, { "epoch": 1.188696, "grad_norm": 4.751871109008789, "learning_rate": 3.1836144578313257e-06, "loss": 0.1357, "step": 85400 }, { "epoch": 1.189496, "grad_norm": 5.901039123535156, "learning_rate": 3.175582329317269e-06, "loss": 0.1214, "step": 85500 }, { "epoch": 1.190296, "grad_norm": 4.6331305503845215, "learning_rate": 3.167550200803213e-06, "loss": 0.1593, "step": 85600 }, { "epoch": 1.191096, "grad_norm": 5.742949962615967, "learning_rate": 3.1595180722891566e-06, "loss": 0.1321, "step": 85700 }, { "epoch": 1.191896, "grad_norm": 3.329071283340454, "learning_rate": 3.1514859437751005e-06, "loss": 0.1514, "step": 85800 }, { "epoch": 1.192696, "grad_norm": 6.989814758300781, "learning_rate": 3.1434538152610445e-06, "loss": 0.135, "step": 85900 }, { "epoch": 1.1934960000000001, "grad_norm": 4.263644218444824, "learning_rate": 3.1354216867469884e-06, "loss": 0.1364, "step": 86000 }, { "epoch": 1.194296, "grad_norm": 3.34621524810791, "learning_rate": 3.1273895582329323e-06, "loss": 0.1215, "step": 86100 }, { "epoch": 1.195096, "grad_norm": 5.672510623931885, "learning_rate": 3.1193574297188754e-06, "loss": 0.1323, "step": 86200 }, { "epoch": 1.195896, "grad_norm": 7.123562812805176, "learning_rate": 3.1113253012048193e-06, "loss": 0.128, "step": 86300 }, { "epoch": 1.196696, "grad_norm": 5.052427768707275, "learning_rate": 3.1033734939759036e-06, "loss": 0.1343, "step": 86400 }, { "epoch": 1.1974960000000001, "grad_norm": 9.78446102142334, "learning_rate": 3.0953413654618475e-06, "loss": 0.1354, "step": 86500 }, { "epoch": 1.198296, "grad_norm": 3.7782175540924072, "learning_rate": 3.0873092369477914e-06, "loss": 0.1369, "step": 86600 }, { "epoch": 1.199096, "grad_norm": 3.037858247756958, "learning_rate": 3.0792771084337354e-06, "loss": 0.1368, "step": 86700 }, { "epoch": 1.199896, "grad_norm": 6.603598117828369, "learning_rate": 3.0712449799196793e-06, "loss": 0.1313, "step": 86800 }, { "epoch": 1.200696, "grad_norm": 5.845118522644043, "learning_rate": 3.0632128514056224e-06, "loss": 0.1381, "step": 86900 }, { "epoch": 1.2014960000000001, "grad_norm": 5.331429958343506, "learning_rate": 3.0551807228915663e-06, "loss": 0.1394, "step": 87000 }, { "epoch": 1.202296, "grad_norm": 2.769038677215576, "learning_rate": 3.04714859437751e-06, "loss": 0.138, "step": 87100 }, { "epoch": 1.203096, "grad_norm": 4.4109320640563965, "learning_rate": 3.039116465863454e-06, "loss": 0.1355, "step": 87200 }, { "epoch": 1.203896, "grad_norm": 5.107141494750977, "learning_rate": 3.031084337349398e-06, "loss": 0.1242, "step": 87300 }, { "epoch": 1.204696, "grad_norm": 3.513662815093994, "learning_rate": 3.0230522088353415e-06, "loss": 0.1341, "step": 87400 }, { "epoch": 1.2054960000000001, "grad_norm": 5.542943000793457, "learning_rate": 3.0150200803212855e-06, "loss": 0.1582, "step": 87500 }, { "epoch": 1.206296, "grad_norm": 6.642317771911621, "learning_rate": 3.0069879518072294e-06, "loss": 0.1328, "step": 87600 }, { "epoch": 1.207096, "grad_norm": 4.840209007263184, "learning_rate": 2.9989558232931733e-06, "loss": 0.1442, "step": 87700 }, { "epoch": 1.207896, "grad_norm": 8.443116188049316, "learning_rate": 2.9909236947791164e-06, "loss": 0.1301, "step": 87800 }, { "epoch": 1.208696, "grad_norm": 5.006475925445557, "learning_rate": 2.9828915662650603e-06, "loss": 0.137, "step": 87900 }, { "epoch": 1.209496, "grad_norm": 16.58333969116211, "learning_rate": 2.974859437751004e-06, "loss": 0.141, "step": 88000 }, { "epoch": 1.209496, "eval_test1_cer": 0.05272518485323773, "eval_test1_cer_norm": 0.035959372823972914, "eval_test1_loss": 0.1771300584077835, "eval_test1_runtime": 3405.3955, "eval_test1_samples_per_second": 0.734, "eval_test1_steps_per_second": 0.184, "eval_test1_wer": 0.15084987900521882, "eval_test1_wer_norm": 0.0903883807241591, "step": 88000 }, { "epoch": 1.209496, "eval_test2_cer": 0.10232108858774779, "eval_test2_cer_norm": 0.08082681282925318, "eval_test2_loss": 0.31750166416168213, "eval_test2_runtime": 3462.1713, "eval_test2_samples_per_second": 0.722, "eval_test2_steps_per_second": 0.181, "eval_test2_wer": 0.2193579766536965, "eval_test2_wer_norm": 0.15279051111620445, "step": 88000 }, { "epoch": 1.210296, "grad_norm": 7.229742527008057, "learning_rate": 2.966827309236948e-06, "loss": 0.1565, "step": 88100 }, { "epoch": 1.211096, "grad_norm": 4.775784015655518, "learning_rate": 2.958795180722892e-06, "loss": 0.1191, "step": 88200 }, { "epoch": 1.211896, "grad_norm": 5.914107799530029, "learning_rate": 2.9507630522088355e-06, "loss": 0.1272, "step": 88300 }, { "epoch": 1.212696, "grad_norm": 3.42290997505188, "learning_rate": 2.9427309236947795e-06, "loss": 0.1533, "step": 88400 }, { "epoch": 1.213496, "grad_norm": 5.825013637542725, "learning_rate": 2.9347791164658634e-06, "loss": 0.1337, "step": 88500 }, { "epoch": 1.214296, "grad_norm": 2.929975748062134, "learning_rate": 2.9267469879518073e-06, "loss": 0.1363, "step": 88600 }, { "epoch": 1.215096, "grad_norm": 4.544336318969727, "learning_rate": 2.918714859437751e-06, "loss": 0.1416, "step": 88700 }, { "epoch": 1.215896, "grad_norm": 6.057349681854248, "learning_rate": 2.910682730923695e-06, "loss": 0.1298, "step": 88800 }, { "epoch": 1.216696, "grad_norm": 4.51986026763916, "learning_rate": 2.902650602409639e-06, "loss": 0.1333, "step": 88900 }, { "epoch": 1.217496, "grad_norm": 4.948803424835205, "learning_rate": 2.8946184738955825e-06, "loss": 0.134, "step": 89000 }, { "epoch": 1.218296, "grad_norm": 4.863702774047852, "learning_rate": 2.8865863453815264e-06, "loss": 0.1378, "step": 89100 }, { "epoch": 1.219096, "grad_norm": 4.049374580383301, "learning_rate": 2.8785542168674704e-06, "loss": 0.125, "step": 89200 }, { "epoch": 1.219896, "grad_norm": 3.5720324516296387, "learning_rate": 2.870522088353414e-06, "loss": 0.1395, "step": 89300 }, { "epoch": 1.220696, "grad_norm": 1.931492567062378, "learning_rate": 2.8624899598393574e-06, "loss": 0.131, "step": 89400 }, { "epoch": 1.221496, "grad_norm": 6.574014663696289, "learning_rate": 2.8544578313253013e-06, "loss": 0.132, "step": 89500 }, { "epoch": 1.222296, "grad_norm": 7.461375713348389, "learning_rate": 2.846425702811245e-06, "loss": 0.1289, "step": 89600 }, { "epoch": 1.223096, "grad_norm": 5.367936611175537, "learning_rate": 2.838393574297189e-06, "loss": 0.1261, "step": 89700 }, { "epoch": 1.223896, "grad_norm": 5.853065013885498, "learning_rate": 2.830361445783133e-06, "loss": 0.1161, "step": 89800 }, { "epoch": 1.224696, "grad_norm": 4.45815896987915, "learning_rate": 2.8223293172690765e-06, "loss": 0.1285, "step": 89900 }, { "epoch": 1.225496, "grad_norm": 5.519855499267578, "learning_rate": 2.81429718875502e-06, "loss": 0.1464, "step": 90000 }, { "epoch": 1.226296, "grad_norm": 3.830179214477539, "learning_rate": 2.806265060240964e-06, "loss": 0.1278, "step": 90100 }, { "epoch": 1.227096, "grad_norm": 3.485785722732544, "learning_rate": 2.798232931726908e-06, "loss": 0.1599, "step": 90200 }, { "epoch": 1.2278959999999999, "grad_norm": 5.015017509460449, "learning_rate": 2.7902008032128514e-06, "loss": 0.1213, "step": 90300 }, { "epoch": 1.228696, "grad_norm": 5.003856658935547, "learning_rate": 2.7821686746987953e-06, "loss": 0.1268, "step": 90400 }, { "epoch": 1.229496, "grad_norm": 8.828622817993164, "learning_rate": 2.77421686746988e-06, "loss": 0.128, "step": 90500 }, { "epoch": 1.230296, "grad_norm": 5.4841790199279785, "learning_rate": 2.766184738955823e-06, "loss": 0.1267, "step": 90600 }, { "epoch": 1.231096, "grad_norm": 1.1707990169525146, "learning_rate": 2.758152610441767e-06, "loss": 0.1173, "step": 90700 }, { "epoch": 1.2318959999999999, "grad_norm": 4.860354423522949, "learning_rate": 2.750120481927711e-06, "loss": 0.1248, "step": 90800 }, { "epoch": 1.232696, "grad_norm": 4.709814071655273, "learning_rate": 2.742088353413655e-06, "loss": 0.1288, "step": 90900 }, { "epoch": 1.233496, "grad_norm": 5.921780109405518, "learning_rate": 2.7340562248995988e-06, "loss": 0.1165, "step": 91000 }, { "epoch": 1.234296, "grad_norm": 31.39832305908203, "learning_rate": 2.7260240963855423e-06, "loss": 0.1362, "step": 91100 }, { "epoch": 1.235096, "grad_norm": 9.929832458496094, "learning_rate": 2.717991967871486e-06, "loss": 0.1356, "step": 91200 }, { "epoch": 1.2358959999999999, "grad_norm": 6.635560035705566, "learning_rate": 2.70995983935743e-06, "loss": 0.1242, "step": 91300 }, { "epoch": 1.236696, "grad_norm": 3.8171298503875732, "learning_rate": 2.701927710843374e-06, "loss": 0.1322, "step": 91400 }, { "epoch": 1.237496, "grad_norm": 5.8909430503845215, "learning_rate": 2.693895582329317e-06, "loss": 0.1404, "step": 91500 }, { "epoch": 1.238296, "grad_norm": 12.169204711914062, "learning_rate": 2.685863453815261e-06, "loss": 0.1312, "step": 91600 }, { "epoch": 1.239096, "grad_norm": 2.663769483566284, "learning_rate": 2.677831325301205e-06, "loss": 0.1266, "step": 91700 }, { "epoch": 1.2398959999999999, "grad_norm": 10.018877983093262, "learning_rate": 2.669799196787149e-06, "loss": 0.1279, "step": 91800 }, { "epoch": 1.240696, "grad_norm": 3.6418004035949707, "learning_rate": 2.661767068273093e-06, "loss": 0.1233, "step": 91900 }, { "epoch": 1.241496, "grad_norm": 6.836270332336426, "learning_rate": 2.6537349397590363e-06, "loss": 0.137, "step": 92000 }, { "epoch": 1.241496, "eval_test1_cer": 0.04675013070430951, "eval_test1_cer_norm": 0.031027444954018296, "eval_test1_loss": 0.17643441259860992, "eval_test1_runtime": 3383.8187, "eval_test1_samples_per_second": 0.739, "eval_test1_steps_per_second": 0.185, "eval_test1_wer": 0.1424531327443949, "eval_test1_wer_norm": 0.08281948625032877, "step": 92000 }, { "epoch": 1.241496, "eval_test2_cer": 0.10391234554074738, "eval_test2_cer_norm": 0.07987294700960644, "eval_test2_loss": 0.31574827432632446, "eval_test2_runtime": 3451.2496, "eval_test2_samples_per_second": 0.724, "eval_test2_steps_per_second": 0.181, "eval_test2_wer": 0.22485122453650722, "eval_test2_wer_norm": 0.1593513637405455, "step": 92000 }, { "epoch": 1.242296, "grad_norm": 6.066310405731201, "learning_rate": 2.64570281124498e-06, "loss": 0.1255, "step": 92100 }, { "epoch": 1.243096, "grad_norm": 4.294830799102783, "learning_rate": 2.637670682730924e-06, "loss": 0.1399, "step": 92200 }, { "epoch": 1.243896, "grad_norm": 6.00723934173584, "learning_rate": 2.629638554216868e-06, "loss": 0.1446, "step": 92300 }, { "epoch": 1.244696, "grad_norm": 10.248518943786621, "learning_rate": 2.621606425702811e-06, "loss": 0.1317, "step": 92400 }, { "epoch": 1.245496, "grad_norm": 2.9732255935668945, "learning_rate": 2.613654618473896e-06, "loss": 0.1329, "step": 92500 }, { "epoch": 1.246296, "grad_norm": 5.735791206359863, "learning_rate": 2.6056224899598398e-06, "loss": 0.1246, "step": 92600 }, { "epoch": 1.247096, "grad_norm": 4.58491849899292, "learning_rate": 2.5975903614457833e-06, "loss": 0.118, "step": 92700 }, { "epoch": 1.247896, "grad_norm": 6.321929931640625, "learning_rate": 2.589558232931727e-06, "loss": 0.1317, "step": 92800 }, { "epoch": 1.248696, "grad_norm": 2.8514037132263184, "learning_rate": 2.581526104417671e-06, "loss": 0.124, "step": 92900 }, { "epoch": 1.249496, "grad_norm": 6.127628803253174, "learning_rate": 2.573493975903615e-06, "loss": 0.1429, "step": 93000 }, { "epoch": 1.250296, "grad_norm": 5.147157192230225, "learning_rate": 2.565461847389558e-06, "loss": 0.1294, "step": 93100 }, { "epoch": 1.251096, "grad_norm": 1.7546809911727905, "learning_rate": 2.557429718875502e-06, "loss": 0.1228, "step": 93200 }, { "epoch": 1.251896, "grad_norm": 5.040311813354492, "learning_rate": 2.549397590361446e-06, "loss": 0.1272, "step": 93300 }, { "epoch": 1.252696, "grad_norm": 6.703260898590088, "learning_rate": 2.54136546184739e-06, "loss": 0.1312, "step": 93400 }, { "epoch": 1.253496, "grad_norm": 4.4106974601745605, "learning_rate": 2.5333333333333338e-06, "loss": 0.1209, "step": 93500 }, { "epoch": 1.254296, "grad_norm": 4.892759799957275, "learning_rate": 2.5253012048192773e-06, "loss": 0.1273, "step": 93600 }, { "epoch": 1.255096, "grad_norm": 3.4537353515625, "learning_rate": 2.517269076305221e-06, "loss": 0.1292, "step": 93700 }, { "epoch": 1.255896, "grad_norm": 6.59584379196167, "learning_rate": 2.5092369477911647e-06, "loss": 0.1194, "step": 93800 }, { "epoch": 1.256696, "grad_norm": 7.888189792633057, "learning_rate": 2.5012048192771086e-06, "loss": 0.1292, "step": 93900 }, { "epoch": 1.257496, "grad_norm": 3.4533464908599854, "learning_rate": 2.4931726907630525e-06, "loss": 0.1271, "step": 94000 }, { "epoch": 1.258296, "grad_norm": 1.2868270874023438, "learning_rate": 2.4851405622489965e-06, "loss": 0.1416, "step": 94100 }, { "epoch": 1.259096, "grad_norm": 4.686305999755859, "learning_rate": 2.47710843373494e-06, "loss": 0.135, "step": 94200 }, { "epoch": 1.259896, "grad_norm": 2.333994150161743, "learning_rate": 2.469076305220884e-06, "loss": 0.1143, "step": 94300 }, { "epoch": 1.260696, "grad_norm": 24.702146530151367, "learning_rate": 2.4610441767068274e-06, "loss": 0.122, "step": 94400 }, { "epoch": 1.261496, "grad_norm": 2.577688694000244, "learning_rate": 2.4530120481927713e-06, "loss": 0.1323, "step": 94500 }, { "epoch": 1.262296, "grad_norm": 4.011758804321289, "learning_rate": 2.4449799196787148e-06, "loss": 0.1375, "step": 94600 }, { "epoch": 1.263096, "grad_norm": 3.8181052207946777, "learning_rate": 2.4369477911646587e-06, "loss": 0.1364, "step": 94700 }, { "epoch": 1.263896, "grad_norm": 5.570916652679443, "learning_rate": 2.4289959839357434e-06, "loss": 0.1334, "step": 94800 }, { "epoch": 1.264696, "grad_norm": 5.255520820617676, "learning_rate": 2.420963855421687e-06, "loss": 0.1346, "step": 94900 }, { "epoch": 1.265496, "grad_norm": 6.02402400970459, "learning_rate": 2.412931726907631e-06, "loss": 0.1299, "step": 95000 }, { "epoch": 1.266296, "grad_norm": 5.61665153503418, "learning_rate": 2.4048995983935744e-06, "loss": 0.1332, "step": 95100 }, { "epoch": 1.267096, "grad_norm": 3.135876417160034, "learning_rate": 2.3968674698795183e-06, "loss": 0.1466, "step": 95200 }, { "epoch": 1.267896, "grad_norm": 3.9527804851531982, "learning_rate": 2.3888353413654618e-06, "loss": 0.118, "step": 95300 }, { "epoch": 1.268696, "grad_norm": 8.5547513961792, "learning_rate": 2.3808032128514057e-06, "loss": 0.1289, "step": 95400 }, { "epoch": 1.269496, "grad_norm": 4.350017070770264, "learning_rate": 2.3727710843373496e-06, "loss": 0.1485, "step": 95500 }, { "epoch": 1.270296, "grad_norm": 7.225109100341797, "learning_rate": 2.3647389558232935e-06, "loss": 0.1724, "step": 95600 }, { "epoch": 1.271096, "grad_norm": 3.152327060699463, "learning_rate": 2.356706827309237e-06, "loss": 0.151, "step": 95700 }, { "epoch": 1.271896, "grad_norm": 9.343049049377441, "learning_rate": 2.348674698795181e-06, "loss": 0.1219, "step": 95800 }, { "epoch": 1.272696, "grad_norm": 3.7528295516967773, "learning_rate": 2.340642570281125e-06, "loss": 0.1011, "step": 95900 }, { "epoch": 1.273496, "grad_norm": 1.7038061618804932, "learning_rate": 2.3326104417670684e-06, "loss": 0.1287, "step": 96000 }, { "epoch": 1.273496, "eval_test1_cer": 0.04843528269474942, "eval_test1_cer_norm": 0.03259298388839532, "eval_test1_loss": 0.17615145444869995, "eval_test1_runtime": 3390.194, "eval_test1_samples_per_second": 0.737, "eval_test1_steps_per_second": 0.184, "eval_test1_wer": 0.14195749147205458, "eval_test1_wer_norm": 0.08346240392764254, "step": 96000 }, { "epoch": 1.273496, "eval_test2_cer": 0.10040784709000634, "eval_test2_cer_norm": 0.07324914781530834, "eval_test2_loss": 0.31477001309394836, "eval_test2_runtime": 3429.6171, "eval_test2_samples_per_second": 0.729, "eval_test2_steps_per_second": 0.182, "eval_test2_wer": 0.21283474479285877, "eval_test2_wer_norm": 0.14789135915654367, "step": 96000 }, { "epoch": 1.274296, "grad_norm": 4.975541591644287, "learning_rate": 2.3245783132530123e-06, "loss": 0.1545, "step": 96100 }, { "epoch": 1.275096, "grad_norm": 2.7354822158813477, "learning_rate": 2.316546184738956e-06, "loss": 0.1379, "step": 96200 }, { "epoch": 1.275896, "grad_norm": 4.287178993225098, "learning_rate": 2.3085140562248997e-06, "loss": 0.1109, "step": 96300 }, { "epoch": 1.276696, "grad_norm": 7.494585990905762, "learning_rate": 2.3004819277108436e-06, "loss": 0.1361, "step": 96400 }, { "epoch": 1.277496, "grad_norm": 5.967809677124023, "learning_rate": 2.292449799196787e-06, "loss": 0.1282, "step": 96500 }, { "epoch": 1.278296, "grad_norm": 6.256405830383301, "learning_rate": 2.284417670682731e-06, "loss": 0.1254, "step": 96600 }, { "epoch": 1.279096, "grad_norm": 4.7275214195251465, "learning_rate": 2.276385542168675e-06, "loss": 0.1125, "step": 96700 }, { "epoch": 1.279896, "grad_norm": 6.460155010223389, "learning_rate": 2.2685140562248997e-06, "loss": 0.1531, "step": 96800 }, { "epoch": 1.280696, "grad_norm": 7.814698696136475, "learning_rate": 2.2604819277108436e-06, "loss": 0.1353, "step": 96900 }, { "epoch": 1.281496, "grad_norm": 2.7123336791992188, "learning_rate": 2.252449799196787e-06, "loss": 0.1223, "step": 97000 }, { "epoch": 1.282296, "grad_norm": 4.504587650299072, "learning_rate": 2.244417670682731e-06, "loss": 0.1358, "step": 97100 }, { "epoch": 1.283096, "grad_norm": 7.585097789764404, "learning_rate": 2.236385542168675e-06, "loss": 0.1378, "step": 97200 }, { "epoch": 1.283896, "grad_norm": 3.129997491836548, "learning_rate": 2.228353413654619e-06, "loss": 0.1234, "step": 97300 }, { "epoch": 1.284696, "grad_norm": 6.103450775146484, "learning_rate": 2.2203212851405628e-06, "loss": 0.1188, "step": 97400 }, { "epoch": 1.285496, "grad_norm": 5.75684928894043, "learning_rate": 2.2122891566265063e-06, "loss": 0.118, "step": 97500 }, { "epoch": 1.286296, "grad_norm": 4.268872261047363, "learning_rate": 2.20425702811245e-06, "loss": 0.1242, "step": 97600 }, { "epoch": 1.287096, "grad_norm": 10.057229042053223, "learning_rate": 2.1962248995983937e-06, "loss": 0.107, "step": 97700 }, { "epoch": 1.287896, "grad_norm": 6.790497303009033, "learning_rate": 2.1881927710843376e-06, "loss": 0.1169, "step": 97800 }, { "epoch": 1.288696, "grad_norm": 7.399913311004639, "learning_rate": 2.180160642570281e-06, "loss": 0.1389, "step": 97900 }, { "epoch": 1.289496, "grad_norm": 8.95304012298584, "learning_rate": 2.172128514056225e-06, "loss": 0.1119, "step": 98000 }, { "epoch": 1.290296, "grad_norm": 3.782831907272339, "learning_rate": 2.164096385542169e-06, "loss": 0.1158, "step": 98100 }, { "epoch": 1.291096, "grad_norm": 9.19373893737793, "learning_rate": 2.1560642570281124e-06, "loss": 0.1373, "step": 98200 }, { "epoch": 1.291896, "grad_norm": 8.121448516845703, "learning_rate": 2.1480321285140563e-06, "loss": 0.1379, "step": 98300 }, { "epoch": 1.292696, "grad_norm": 5.722080707550049, "learning_rate": 2.1400000000000003e-06, "loss": 0.1171, "step": 98400 }, { "epoch": 1.293496, "grad_norm": 4.1129255294799805, "learning_rate": 2.131967871485944e-06, "loss": 0.1302, "step": 98500 }, { "epoch": 1.2942960000000001, "grad_norm": 3.102550983428955, "learning_rate": 2.1239357429718877e-06, "loss": 0.1256, "step": 98600 }, { "epoch": 1.295096, "grad_norm": 5.705405235290527, "learning_rate": 2.1159036144578316e-06, "loss": 0.1103, "step": 98700 }, { "epoch": 1.295896, "grad_norm": 5.043685436248779, "learning_rate": 2.107871485943775e-06, "loss": 0.1207, "step": 98800 }, { "epoch": 1.296696, "grad_norm": 5.753846645355225, "learning_rate": 2.099839357429719e-06, "loss": 0.1195, "step": 98900 }, { "epoch": 1.297496, "grad_norm": 5.277960300445557, "learning_rate": 2.091807228915663e-06, "loss": 0.1326, "step": 99000 }, { "epoch": 1.2982960000000001, "grad_norm": 4.641193866729736, "learning_rate": 2.0837751004016064e-06, "loss": 0.1409, "step": 99100 }, { "epoch": 1.299096, "grad_norm": 6.307822227478027, "learning_rate": 2.0757429718875504e-06, "loss": 0.1271, "step": 99200 }, { "epoch": 1.299896, "grad_norm": 4.50252628326416, "learning_rate": 2.0677108433734943e-06, "loss": 0.1242, "step": 99300 }, { "epoch": 1.300696, "grad_norm": 6.755404949188232, "learning_rate": 2.059678714859438e-06, "loss": 0.1349, "step": 99400 }, { "epoch": 1.301496, "grad_norm": 4.805443286895752, "learning_rate": 2.0516465863453817e-06, "loss": 0.1227, "step": 99500 }, { "epoch": 1.3022960000000001, "grad_norm": 3.4150850772857666, "learning_rate": 2.0436144578313256e-06, "loss": 0.1147, "step": 99600 }, { "epoch": 1.303096, "grad_norm": 6.213440895080566, "learning_rate": 2.035582329317269e-06, "loss": 0.1236, "step": 99700 }, { "epoch": 1.303896, "grad_norm": 6.957757472991943, "learning_rate": 2.027550200803213e-06, "loss": 0.1323, "step": 99800 }, { "epoch": 1.304696, "grad_norm": 7.19875955581665, "learning_rate": 2.019518072289157e-06, "loss": 0.1283, "step": 99900 }, { "epoch": 1.305496, "grad_norm": 2.5074002742767334, "learning_rate": 2.0114859437751004e-06, "loss": 0.1128, "step": 100000 }, { "epoch": 1.305496, "eval_test1_cer": 0.0477444170587796, "eval_test1_cer_norm": 0.03308281508872188, "eval_test1_loss": 0.17668992280960083, "eval_test1_runtime": 3382.0305, "eval_test1_samples_per_second": 0.739, "eval_test1_steps_per_second": 0.185, "eval_test1_wer": 0.1387795562552844, "eval_test1_wer_norm": 0.07951722727139893, "step": 100000 }, { "epoch": 1.305496, "eval_test2_cer": 0.10429499384029567, "eval_test2_cer_norm": 0.0824295010845987, "eval_test2_loss": 0.3178161084651947, "eval_test2_runtime": 3460.99, "eval_test2_samples_per_second": 0.722, "eval_test2_steps_per_second": 0.181, "eval_test2_wer": 0.2208743419546807, "eval_test2_wer_norm": 0.15585606234242494, "step": 100000 }, { "epoch": 1.3062960000000001, "grad_norm": 4.352635383605957, "learning_rate": 2.0034538152610444e-06, "loss": 0.1269, "step": 100100 }, { "epoch": 1.307096, "grad_norm": 6.455359935760498, "learning_rate": 1.995421686746988e-06, "loss": 0.1228, "step": 100200 }, { "epoch": 1.307896, "grad_norm": 4.56587028503418, "learning_rate": 1.9873895582329318e-06, "loss": 0.1302, "step": 100300 }, { "epoch": 1.308696, "grad_norm": 6.275040149688721, "learning_rate": 1.9793574297188757e-06, "loss": 0.1316, "step": 100400 }, { "epoch": 1.309496, "grad_norm": 2.4755728244781494, "learning_rate": 1.9713253012048196e-06, "loss": 0.1324, "step": 100500 }, { "epoch": 1.3102960000000001, "grad_norm": 5.687671184539795, "learning_rate": 1.963293172690763e-06, "loss": 0.1296, "step": 100600 }, { "epoch": 1.311096, "grad_norm": 3.915098190307617, "learning_rate": 1.955261044176707e-06, "loss": 0.1198, "step": 100700 }, { "epoch": 1.311896, "grad_norm": 5.507267475128174, "learning_rate": 1.947228915662651e-06, "loss": 0.1308, "step": 100800 }, { "epoch": 1.3126959999999999, "grad_norm": 6.799343109130859, "learning_rate": 1.939277108433735e-06, "loss": 0.1106, "step": 100900 }, { "epoch": 1.313496, "grad_norm": 2.4747636318206787, "learning_rate": 1.9312449799196788e-06, "loss": 0.1198, "step": 101000 }, { "epoch": 1.3142960000000001, "grad_norm": 1.4364855289459229, "learning_rate": 1.9232128514056227e-06, "loss": 0.1251, "step": 101100 }, { "epoch": 1.315096, "grad_norm": 3.4942729473114014, "learning_rate": 1.9151807228915666e-06, "loss": 0.105, "step": 101200 }, { "epoch": 1.315896, "grad_norm": 7.960775852203369, "learning_rate": 1.9071485943775103e-06, "loss": 0.121, "step": 101300 }, { "epoch": 1.3166959999999999, "grad_norm": 4.901790618896484, "learning_rate": 1.899116465863454e-06, "loss": 0.1248, "step": 101400 }, { "epoch": 1.317496, "grad_norm": 3.4517650604248047, "learning_rate": 1.891084337349398e-06, "loss": 0.1101, "step": 101500 }, { "epoch": 1.3182960000000001, "grad_norm": 2.9589340686798096, "learning_rate": 1.8830522088353414e-06, "loss": 0.1569, "step": 101600 }, { "epoch": 1.319096, "grad_norm": 4.821226119995117, "learning_rate": 1.8750200803212854e-06, "loss": 0.1248, "step": 101700 }, { "epoch": 1.319896, "grad_norm": 5.956510543823242, "learning_rate": 1.866987951807229e-06, "loss": 0.129, "step": 101800 }, { "epoch": 1.3206959999999999, "grad_norm": 8.236087799072266, "learning_rate": 1.8589558232931728e-06, "loss": 0.1288, "step": 101900 }, { "epoch": 1.321496, "grad_norm": 7.885124206542969, "learning_rate": 1.8509236947791165e-06, "loss": 0.133, "step": 102000 }, { "epoch": 1.3222960000000001, "grad_norm": 4.618575096130371, "learning_rate": 1.8428915662650604e-06, "loss": 0.131, "step": 102100 }, { "epoch": 1.323096, "grad_norm": 3.3860082626342773, "learning_rate": 1.8348594377510043e-06, "loss": 0.1275, "step": 102200 }, { "epoch": 1.323896, "grad_norm": 0.5851134061813354, "learning_rate": 1.8268273092369478e-06, "loss": 0.1163, "step": 102300 }, { "epoch": 1.3246959999999999, "grad_norm": 8.403120040893555, "learning_rate": 1.8187951807228917e-06, "loss": 0.142, "step": 102400 }, { "epoch": 1.325496, "grad_norm": 4.019391059875488, "learning_rate": 1.8107630522088354e-06, "loss": 0.1234, "step": 102500 }, { "epoch": 1.326296, "grad_norm": 7.64513635635376, "learning_rate": 1.8028112449799198e-06, "loss": 0.1202, "step": 102600 }, { "epoch": 1.327096, "grad_norm": 4.8131489753723145, "learning_rate": 1.7947791164658637e-06, "loss": 0.1169, "step": 102700 }, { "epoch": 1.327896, "grad_norm": 7.043356895446777, "learning_rate": 1.7867469879518074e-06, "loss": 0.1209, "step": 102800 }, { "epoch": 1.3286959999999999, "grad_norm": 4.337855815887451, "learning_rate": 1.7787148594377513e-06, "loss": 0.1365, "step": 102900 }, { "epoch": 1.329496, "grad_norm": 6.657166481018066, "learning_rate": 1.7706827309236948e-06, "loss": 0.1256, "step": 103000 }, { "epoch": 1.330296, "grad_norm": 4.905508041381836, "learning_rate": 1.7626506024096387e-06, "loss": 0.1118, "step": 103100 }, { "epoch": 1.331096, "grad_norm": 5.122402667999268, "learning_rate": 1.7546184738955824e-06, "loss": 0.1276, "step": 103200 }, { "epoch": 1.331896, "grad_norm": 4.78582239151001, "learning_rate": 1.7465863453815264e-06, "loss": 0.1194, "step": 103300 }, { "epoch": 1.3326959999999999, "grad_norm": 5.943844318389893, "learning_rate": 1.7385542168674698e-06, "loss": 0.1206, "step": 103400 }, { "epoch": 1.333496, "grad_norm": 8.206818580627441, "learning_rate": 1.7305220883534138e-06, "loss": 0.1151, "step": 103500 }, { "epoch": 1.334296, "grad_norm": 5.556063175201416, "learning_rate": 1.7224899598393577e-06, "loss": 0.1114, "step": 103600 }, { "epoch": 1.335096, "grad_norm": 7.587924957275391, "learning_rate": 1.7144578313253014e-06, "loss": 0.1207, "step": 103700 }, { "epoch": 1.335896, "grad_norm": 7.417220592498779, "learning_rate": 1.706425702811245e-06, "loss": 0.1163, "step": 103800 }, { "epoch": 1.3366959999999999, "grad_norm": 7.316643238067627, "learning_rate": 1.6983935742971888e-06, "loss": 0.1157, "step": 103900 }, { "epoch": 1.337496, "grad_norm": 1.7829804420471191, "learning_rate": 1.6903614457831327e-06, "loss": 0.1155, "step": 104000 }, { "epoch": 1.337496, "eval_test1_cer": 0.045685824184031665, "eval_test1_cer_norm": 0.03030710495353807, "eval_test1_loss": 0.17226466536521912, "eval_test1_runtime": 3385.0143, "eval_test1_samples_per_second": 0.739, "eval_test1_steps_per_second": 0.185, "eval_test1_wer": 0.13615557304877693, "eval_test1_wer_norm": 0.07755925070867062, "step": 104000 }, { "epoch": 1.337496, "eval_test2_cer": 0.10362769253742488, "eval_test2_cer_norm": 0.08312674310505114, "eval_test2_loss": 0.31396326422691345, "eval_test2_runtime": 3448.444, "eval_test2_samples_per_second": 0.725, "eval_test2_steps_per_second": 0.181, "eval_test2_wer": 0.21904325932707713, "eval_test2_wer_norm": 0.15513981205592484, "step": 104000 }, { "epoch": 1.338296, "grad_norm": 3.8256428241729736, "learning_rate": 1.6823293172690762e-06, "loss": 0.1251, "step": 104100 }, { "epoch": 1.339096, "grad_norm": 4.764732837677002, "learning_rate": 1.6742971887550201e-06, "loss": 0.1183, "step": 104200 }, { "epoch": 1.339896, "grad_norm": 3.5565683841705322, "learning_rate": 1.666265060240964e-06, "loss": 0.1152, "step": 104300 }, { "epoch": 1.3406959999999999, "grad_norm": 5.896035671234131, "learning_rate": 1.6582329317269078e-06, "loss": 0.136, "step": 104400 }, { "epoch": 1.341496, "grad_norm": 6.5424628257751465, "learning_rate": 1.6502008032128517e-06, "loss": 0.128, "step": 104500 }, { "epoch": 1.342296, "grad_norm": 5.04475736618042, "learning_rate": 1.6421686746987952e-06, "loss": 0.1149, "step": 104600 }, { "epoch": 1.343096, "grad_norm": 5.732762813568115, "learning_rate": 1.6341365461847391e-06, "loss": 0.131, "step": 104700 }, { "epoch": 1.343896, "grad_norm": 5.360403060913086, "learning_rate": 1.6261044176706828e-06, "loss": 0.1283, "step": 104800 }, { "epoch": 1.344696, "grad_norm": 6.674147129058838, "learning_rate": 1.6180722891566267e-06, "loss": 0.1357, "step": 104900 }, { "epoch": 1.345496, "grad_norm": 3.1230456829071045, "learning_rate": 1.6100401606425702e-06, "loss": 0.1237, "step": 105000 }, { "epoch": 1.346296, "grad_norm": 6.3545756340026855, "learning_rate": 1.6020080321285142e-06, "loss": 0.1248, "step": 105100 }, { "epoch": 1.347096, "grad_norm": 9.923174858093262, "learning_rate": 1.593975903614458e-06, "loss": 0.1382, "step": 105200 }, { "epoch": 1.347896, "grad_norm": 3.6945223808288574, "learning_rate": 1.5859437751004018e-06, "loss": 0.1244, "step": 105300 }, { "epoch": 1.348696, "grad_norm": 8.398360252380371, "learning_rate": 1.5779116465863457e-06, "loss": 0.123, "step": 105400 }, { "epoch": 1.349496, "grad_norm": 1.4128965139389038, "learning_rate": 1.5698795180722892e-06, "loss": 0.1175, "step": 105500 }, { "epoch": 1.350296, "grad_norm": 11.629409790039062, "learning_rate": 1.5618473895582331e-06, "loss": 0.1117, "step": 105600 }, { "epoch": 1.351096, "grad_norm": 4.094301700592041, "learning_rate": 1.5538152610441768e-06, "loss": 0.1193, "step": 105700 }, { "epoch": 1.351896, "grad_norm": 4.269906044006348, "learning_rate": 1.5457831325301205e-06, "loss": 0.1256, "step": 105800 }, { "epoch": 1.352696, "grad_norm": 5.27727746963501, "learning_rate": 1.5377510040160642e-06, "loss": 0.1224, "step": 105900 }, { "epoch": 1.353496, "grad_norm": 4.353626728057861, "learning_rate": 1.5297188755020082e-06, "loss": 0.1196, "step": 106000 }, { "epoch": 1.354296, "grad_norm": 3.226297616958618, "learning_rate": 1.521686746987952e-06, "loss": 0.1364, "step": 106100 }, { "epoch": 1.355096, "grad_norm": 0.7694222331047058, "learning_rate": 1.5136546184738956e-06, "loss": 0.103, "step": 106200 }, { "epoch": 1.355896, "grad_norm": 3.6190733909606934, "learning_rate": 1.5056224899598395e-06, "loss": 0.1235, "step": 106300 }, { "epoch": 1.356696, "grad_norm": 3.509673833847046, "learning_rate": 1.4975903614457832e-06, "loss": 0.121, "step": 106400 }, { "epoch": 1.357496, "grad_norm": 8.607564926147461, "learning_rate": 1.4895582329317271e-06, "loss": 0.1318, "step": 106500 }, { "epoch": 1.358296, "grad_norm": 3.327017307281494, "learning_rate": 1.4815261044176706e-06, "loss": 0.1283, "step": 106600 }, { "epoch": 1.359096, "grad_norm": 8.573492050170898, "learning_rate": 1.4735742971887552e-06, "loss": 0.1222, "step": 106700 }, { "epoch": 1.359896, "grad_norm": 4.089103698730469, "learning_rate": 1.465542168674699e-06, "loss": 0.1257, "step": 106800 }, { "epoch": 1.360696, "grad_norm": 3.942911386489868, "learning_rate": 1.4575100401606426e-06, "loss": 0.1205, "step": 106900 }, { "epoch": 1.361496, "grad_norm": 6.010968208312988, "learning_rate": 1.4494779116465865e-06, "loss": 0.1295, "step": 107000 }, { "epoch": 1.362296, "grad_norm": 6.7945756912231445, "learning_rate": 1.4414457831325302e-06, "loss": 0.1246, "step": 107100 }, { "epoch": 1.363096, "grad_norm": 2.255643129348755, "learning_rate": 1.4334136546184741e-06, "loss": 0.1254, "step": 107200 }, { "epoch": 1.363896, "grad_norm": 4.047929763793945, "learning_rate": 1.4253815261044176e-06, "loss": 0.1197, "step": 107300 }, { "epoch": 1.364696, "grad_norm": 4.005350112915039, "learning_rate": 1.4173493975903615e-06, "loss": 0.1261, "step": 107400 }, { "epoch": 1.365496, "grad_norm": 5.783274173736572, "learning_rate": 1.4093172690763055e-06, "loss": 0.1157, "step": 107500 }, { "epoch": 1.366296, "grad_norm": 5.45566463470459, "learning_rate": 1.4012851405622492e-06, "loss": 0.125, "step": 107600 }, { "epoch": 1.367096, "grad_norm": 4.851735591888428, "learning_rate": 1.3932530120481929e-06, "loss": 0.1242, "step": 107700 }, { "epoch": 1.367896, "grad_norm": 5.266098976135254, "learning_rate": 1.3852208835341366e-06, "loss": 0.1267, "step": 107800 }, { "epoch": 1.368696, "grad_norm": 4.1477742195129395, "learning_rate": 1.3771887550200805e-06, "loss": 0.1158, "step": 107900 }, { "epoch": 1.369496, "grad_norm": 12.537534713745117, "learning_rate": 1.369156626506024e-06, "loss": 0.1114, "step": 108000 }, { "epoch": 1.369496, "eval_test1_cer": 0.041395922025543354, "eval_test1_cer_norm": 0.027075179484716785, "eval_test1_loss": 0.1729966700077057, "eval_test1_runtime": 3379.841, "eval_test1_samples_per_second": 0.74, "eval_test1_steps_per_second": 0.185, "eval_test1_wer": 0.12840024490509927, "eval_test1_wer_norm": 0.06993190917326632, "step": 108000 }, { "epoch": 1.369496, "eval_test2_cer": 0.1033523724194572, "eval_test2_cer_norm": 0.077180818097304, "eval_test2_loss": 0.3105059862136841, "eval_test2_runtime": 3449.5535, "eval_test2_samples_per_second": 0.725, "eval_test2_steps_per_second": 0.181, "eval_test2_wer": 0.2153238727397574, "eval_test2_wer_norm": 0.15038391015356406, "step": 108000 }, { "epoch": 1.370296, "grad_norm": 2.571197986602783, "learning_rate": 1.361124497991968e-06, "loss": 0.1133, "step": 108100 }, { "epoch": 1.371096, "grad_norm": 2.839953660964966, "learning_rate": 1.3531726907630524e-06, "loss": 0.1225, "step": 108200 }, { "epoch": 1.371896, "grad_norm": 2.108365535736084, "learning_rate": 1.345140562248996e-06, "loss": 0.1111, "step": 108300 }, { "epoch": 1.372696, "grad_norm": 4.538883686065674, "learning_rate": 1.3371084337349399e-06, "loss": 0.0988, "step": 108400 }, { "epoch": 1.373496, "grad_norm": 5.8930277824401855, "learning_rate": 1.3290763052208836e-06, "loss": 0.1124, "step": 108500 }, { "epoch": 1.374296, "grad_norm": 3.4291086196899414, "learning_rate": 1.3210441767068275e-06, "loss": 0.0998, "step": 108600 }, { "epoch": 1.375096, "grad_norm": 3.634300947189331, "learning_rate": 1.313012048192771e-06, "loss": 0.123, "step": 108700 }, { "epoch": 1.375896, "grad_norm": 10.641935348510742, "learning_rate": 1.304979919678715e-06, "loss": 0.1331, "step": 108800 }, { "epoch": 1.376696, "grad_norm": 3.8561360836029053, "learning_rate": 1.2969477911646588e-06, "loss": 0.1386, "step": 108900 }, { "epoch": 1.377496, "grad_norm": 5.578998565673828, "learning_rate": 1.2889156626506025e-06, "loss": 0.1252, "step": 109000 }, { "epoch": 1.378296, "grad_norm": 6.097825050354004, "learning_rate": 1.2808835341365464e-06, "loss": 0.1226, "step": 109100 }, { "epoch": 1.379096, "grad_norm": 7.930078983306885, "learning_rate": 1.27285140562249e-06, "loss": 0.1324, "step": 109200 }, { "epoch": 1.379896, "grad_norm": 4.191389083862305, "learning_rate": 1.2648192771084339e-06, "loss": 0.1265, "step": 109300 }, { "epoch": 1.380696, "grad_norm": 4.194836139678955, "learning_rate": 1.2567871485943776e-06, "loss": 0.1131, "step": 109400 }, { "epoch": 1.381496, "grad_norm": 0.9715979695320129, "learning_rate": 1.2487550200803215e-06, "loss": 0.1149, "step": 109500 }, { "epoch": 1.382296, "grad_norm": 2.0711517333984375, "learning_rate": 1.2407228915662652e-06, "loss": 0.1196, "step": 109600 }, { "epoch": 1.383096, "grad_norm": 3.178311824798584, "learning_rate": 1.232690763052209e-06, "loss": 0.1116, "step": 109700 }, { "epoch": 1.383896, "grad_norm": 4.997854232788086, "learning_rate": 1.2246586345381526e-06, "loss": 0.1215, "step": 109800 }, { "epoch": 1.384696, "grad_norm": 4.431860446929932, "learning_rate": 1.2166265060240963e-06, "loss": 0.1129, "step": 109900 }, { "epoch": 1.385496, "grad_norm": 4.320796966552734, "learning_rate": 1.2085943775100402e-06, "loss": 0.1375, "step": 110000 }, { "epoch": 1.386296, "grad_norm": 7.0986552238464355, "learning_rate": 1.2005622489959842e-06, "loss": 0.1051, "step": 110100 }, { "epoch": 1.387096, "grad_norm": 2.893299102783203, "learning_rate": 1.1925301204819279e-06, "loss": 0.1133, "step": 110200 }, { "epoch": 1.387896, "grad_norm": 1.8434147834777832, "learning_rate": 1.1844979919678716e-06, "loss": 0.1099, "step": 110300 }, { "epoch": 1.388696, "grad_norm": 3.1360116004943848, "learning_rate": 1.1764658634538153e-06, "loss": 0.1276, "step": 110400 }, { "epoch": 1.389496, "grad_norm": 2.8671581745147705, "learning_rate": 1.1684337349397592e-06, "loss": 0.1113, "step": 110500 }, { "epoch": 1.390296, "grad_norm": 8.354147911071777, "learning_rate": 1.160401606425703e-06, "loss": 0.1085, "step": 110600 }, { "epoch": 1.391096, "grad_norm": 14.839973449707031, "learning_rate": 1.1523694779116466e-06, "loss": 0.1096, "step": 110700 }, { "epoch": 1.391896, "grad_norm": 3.917928695678711, "learning_rate": 1.1443373493975903e-06, "loss": 0.1219, "step": 110800 }, { "epoch": 1.392696, "grad_norm": 6.117668151855469, "learning_rate": 1.1363052208835343e-06, "loss": 0.1115, "step": 110900 }, { "epoch": 1.393496, "grad_norm": 1.9348158836364746, "learning_rate": 1.128273092369478e-06, "loss": 0.1219, "step": 111000 }, { "epoch": 1.394296, "grad_norm": 5.231213569641113, "learning_rate": 1.1202409638554219e-06, "loss": 0.1178, "step": 111100 }, { "epoch": 1.3950960000000001, "grad_norm": 5.860185623168945, "learning_rate": 1.1122088353413656e-06, "loss": 0.1136, "step": 111200 }, { "epoch": 1.395896, "grad_norm": 4.386104106903076, "learning_rate": 1.10425702811245e-06, "loss": 0.1159, "step": 111300 }, { "epoch": 1.396696, "grad_norm": 1.6194918155670166, "learning_rate": 1.0962248995983938e-06, "loss": 0.1187, "step": 111400 }, { "epoch": 1.397496, "grad_norm": 5.940235137939453, "learning_rate": 1.0881927710843375e-06, "loss": 0.1157, "step": 111500 }, { "epoch": 1.398296, "grad_norm": 4.942705154418945, "learning_rate": 1.0801606425702812e-06, "loss": 0.1005, "step": 111600 }, { "epoch": 1.3990960000000001, "grad_norm": 5.09475564956665, "learning_rate": 1.072128514056225e-06, "loss": 0.1204, "step": 111700 }, { "epoch": 1.399896, "grad_norm": 5.27365255355835, "learning_rate": 1.0640963855421687e-06, "loss": 0.127, "step": 111800 }, { "epoch": 1.400696, "grad_norm": 6.454215049743652, "learning_rate": 1.0560642570281126e-06, "loss": 0.1025, "step": 111900 }, { "epoch": 1.401496, "grad_norm": 5.676671028137207, "learning_rate": 1.0480321285140563e-06, "loss": 0.1141, "step": 112000 }, { "epoch": 1.401496, "eval_test1_cer": 0.04145660616924341, "eval_test1_cer_norm": 0.02695992508463995, "eval_test1_loss": 0.17103232443332672, "eval_test1_runtime": 2490.4053, "eval_test1_samples_per_second": 1.004, "eval_test1_steps_per_second": 0.251, "eval_test1_wer": 0.12912912912912913, "eval_test1_wer_norm": 0.07016569741956223, "step": 112000 }, { "epoch": 1.401496, "eval_test2_cer": 0.08579254862433269, "eval_test2_cer_norm": 0.06814572358227455, "eval_test2_loss": 0.3084600865840912, "eval_test2_runtime": 2471.9908, "eval_test2_samples_per_second": 1.011, "eval_test2_steps_per_second": 0.253, "eval_test2_wer": 0.19641222247653925, "eval_test2_wer_norm": 0.13070135228054092, "step": 112000 }, { "epoch": 1.402296, "grad_norm": 2.858332633972168, "learning_rate": 1.04e-06, "loss": 0.1044, "step": 112100 }, { "epoch": 1.4030960000000001, "grad_norm": 5.488587379455566, "learning_rate": 1.0319678714859437e-06, "loss": 0.1366, "step": 112200 }, { "epoch": 1.403896, "grad_norm": 13.426318168640137, "learning_rate": 1.0239357429718876e-06, "loss": 0.1186, "step": 112300 }, { "epoch": 1.404696, "grad_norm": 4.668258190155029, "learning_rate": 1.0159036144578315e-06, "loss": 0.1056, "step": 112400 }, { "epoch": 1.405496, "grad_norm": 3.497905969619751, "learning_rate": 1.0078714859437753e-06, "loss": 0.1168, "step": 112500 }, { "epoch": 1.406296, "grad_norm": 5.060064315795898, "learning_rate": 9.99839357429719e-07, "loss": 0.1167, "step": 112600 }, { "epoch": 1.4070960000000001, "grad_norm": 1.985062837600708, "learning_rate": 9.918072289156627e-07, "loss": 0.1234, "step": 112700 }, { "epoch": 1.407896, "grad_norm": 3.2145867347717285, "learning_rate": 9.837751004016064e-07, "loss": 0.1231, "step": 112800 }, { "epoch": 1.408696, "grad_norm": 6.194746971130371, "learning_rate": 9.757429718875503e-07, "loss": 0.1038, "step": 112900 }, { "epoch": 1.409496, "grad_norm": 4.408815860748291, "learning_rate": 9.67710843373494e-07, "loss": 0.1157, "step": 113000 }, { "epoch": 1.410296, "grad_norm": 4.587121486663818, "learning_rate": 9.59678714859438e-07, "loss": 0.1163, "step": 113100 }, { "epoch": 1.4110960000000001, "grad_norm": 4.01561975479126, "learning_rate": 9.516465863453816e-07, "loss": 0.1077, "step": 113200 }, { "epoch": 1.411896, "grad_norm": 4.253355026245117, "learning_rate": 9.436144578313254e-07, "loss": 0.1209, "step": 113300 }, { "epoch": 1.412696, "grad_norm": 1.5479423999786377, "learning_rate": 9.355823293172692e-07, "loss": 0.1378, "step": 113400 }, { "epoch": 1.4134959999999999, "grad_norm": 4.244668960571289, "learning_rate": 9.27550200803213e-07, "loss": 0.1381, "step": 113500 }, { "epoch": 1.414296, "grad_norm": 4.061355113983154, "learning_rate": 9.195180722891567e-07, "loss": 0.1283, "step": 113600 }, { "epoch": 1.4150960000000001, "grad_norm": 8.999344825744629, "learning_rate": 9.114859437751005e-07, "loss": 0.1291, "step": 113700 }, { "epoch": 1.415896, "grad_norm": 3.263817310333252, "learning_rate": 9.034538152610442e-07, "loss": 0.119, "step": 113800 }, { "epoch": 1.416696, "grad_norm": 5.394161224365234, "learning_rate": 8.954216867469879e-07, "loss": 0.1218, "step": 113900 }, { "epoch": 1.4174959999999999, "grad_norm": 11.564488410949707, "learning_rate": 8.873895582329318e-07, "loss": 0.1222, "step": 114000 }, { "epoch": 1.418296, "grad_norm": 3.848464250564575, "learning_rate": 8.793574297188756e-07, "loss": 0.1175, "step": 114100 }, { "epoch": 1.4190960000000001, "grad_norm": 3.4740781784057617, "learning_rate": 8.713253012048194e-07, "loss": 0.1267, "step": 114200 }, { "epoch": 1.419896, "grad_norm": 3.503969192504883, "learning_rate": 8.632931726907632e-07, "loss": 0.1333, "step": 114300 }, { "epoch": 1.420696, "grad_norm": 3.658344268798828, "learning_rate": 8.552610441767069e-07, "loss": 0.1137, "step": 114400 }, { "epoch": 1.4214959999999999, "grad_norm": 5.1479973793029785, "learning_rate": 8.472289156626507e-07, "loss": 0.1254, "step": 114500 }, { "epoch": 1.422296, "grad_norm": 5.20347261428833, "learning_rate": 8.391967871485944e-07, "loss": 0.1167, "step": 114600 }, { "epoch": 1.4230960000000001, "grad_norm": 5.197832107543945, "learning_rate": 8.311646586345381e-07, "loss": 0.1297, "step": 114700 }, { "epoch": 1.423896, "grad_norm": 2.3857178688049316, "learning_rate": 8.23132530120482e-07, "loss": 0.1173, "step": 114800 }, { "epoch": 1.424696, "grad_norm": 5.162271976470947, "learning_rate": 8.151004016064258e-07, "loss": 0.1089, "step": 114900 }, { "epoch": 1.4254959999999999, "grad_norm": 4.035723686218262, "learning_rate": 8.070682730923695e-07, "loss": 0.1186, "step": 115000 }, { "epoch": 1.426296, "grad_norm": 7.1610283851623535, "learning_rate": 7.991164658634539e-07, "loss": 0.1196, "step": 115100 }, { "epoch": 1.427096, "grad_norm": 6.167757987976074, "learning_rate": 7.910843373493976e-07, "loss": 0.112, "step": 115200 }, { "epoch": 1.427896, "grad_norm": 8.109100341796875, "learning_rate": 7.830522088353415e-07, "loss": 0.1092, "step": 115300 }, { "epoch": 1.428696, "grad_norm": 2.2968735694885254, "learning_rate": 7.750200803212853e-07, "loss": 0.1058, "step": 115400 }, { "epoch": 1.4294959999999999, "grad_norm": 4.020580768585205, "learning_rate": 7.66987951807229e-07, "loss": 0.1089, "step": 115500 }, { "epoch": 1.430296, "grad_norm": 1.7853455543518066, "learning_rate": 7.589558232931728e-07, "loss": 0.1077, "step": 115600 }, { "epoch": 1.431096, "grad_norm": 2.6213812828063965, "learning_rate": 7.509236947791165e-07, "loss": 0.114, "step": 115700 }, { "epoch": 1.431896, "grad_norm": 7.169987678527832, "learning_rate": 7.428915662650602e-07, "loss": 0.1247, "step": 115800 }, { "epoch": 1.432696, "grad_norm": 5.280975818634033, "learning_rate": 7.348594377510041e-07, "loss": 0.1117, "step": 115900 }, { "epoch": 1.4334959999999999, "grad_norm": 7.396920204162598, "learning_rate": 7.268273092369478e-07, "loss": 0.1218, "step": 116000 }, { "epoch": 1.4334959999999999, "eval_test1_cer": 0.04053700799163493, "eval_test1_cer_norm": 0.02625399188416933, "eval_test1_loss": 0.16978052258491516, "eval_test1_runtime": 2459.5813, "eval_test1_samples_per_second": 1.016, "eval_test1_steps_per_second": 0.254, "eval_test1_wer": 0.12650514592262166, "eval_test1_wer_norm": 0.06905520324965662, "step": 116000 }, { "epoch": 1.4334959999999999, "eval_test2_cer": 0.0906269832381379, "eval_test2_cer_norm": 0.07077006507592191, "eval_test2_loss": 0.30657845735549927, "eval_test2_runtime": 2479.2829, "eval_test2_samples_per_second": 1.008, "eval_test2_steps_per_second": 0.252, "eval_test2_wer": 0.20224879835202564, "eval_test2_wer_norm": 0.13654595461838184, "step": 116000 }, { "epoch": 1.434296, "grad_norm": 7.435978412628174, "learning_rate": 7.187951807228916e-07, "loss": 0.1312, "step": 116100 }, { "epoch": 1.435096, "grad_norm": 6.736993789672852, "learning_rate": 7.107630522088355e-07, "loss": 0.1245, "step": 116200 }, { "epoch": 1.435896, "grad_norm": 3.6509387493133545, "learning_rate": 7.027309236947792e-07, "loss": 0.1041, "step": 116300 }, { "epoch": 1.436696, "grad_norm": 5.3854851722717285, "learning_rate": 6.94698795180723e-07, "loss": 0.126, "step": 116400 }, { "epoch": 1.4374959999999999, "grad_norm": 5.283120155334473, "learning_rate": 6.866666666666667e-07, "loss": 0.1615, "step": 116500 }, { "epoch": 1.438296, "grad_norm": 3.9473323822021484, "learning_rate": 6.786345381526105e-07, "loss": 0.1218, "step": 116600 }, { "epoch": 1.439096, "grad_norm": 11.495280265808105, "learning_rate": 6.706024096385542e-07, "loss": 0.1165, "step": 116700 }, { "epoch": 1.439896, "grad_norm": 6.106124401092529, "learning_rate": 6.62570281124498e-07, "loss": 0.1291, "step": 116800 }, { "epoch": 1.440696, "grad_norm": 3.252533435821533, "learning_rate": 6.545381526104418e-07, "loss": 0.1302, "step": 116900 }, { "epoch": 1.4414959999999999, "grad_norm": 4.417702674865723, "learning_rate": 6.465060240963857e-07, "loss": 0.1185, "step": 117000 }, { "epoch": 1.442296, "grad_norm": 7.446925163269043, "learning_rate": 6.384738955823294e-07, "loss": 0.1181, "step": 117100 }, { "epoch": 1.443096, "grad_norm": 4.945169448852539, "learning_rate": 6.304417670682732e-07, "loss": 0.1146, "step": 117200 }, { "epoch": 1.443896, "grad_norm": 4.255674362182617, "learning_rate": 6.224096385542169e-07, "loss": 0.1149, "step": 117300 }, { "epoch": 1.444696, "grad_norm": 7.243592739105225, "learning_rate": 6.143775100401607e-07, "loss": 0.1266, "step": 117400 }, { "epoch": 1.445496, "grad_norm": 4.7151265144348145, "learning_rate": 6.063453815261044e-07, "loss": 0.1228, "step": 117500 }, { "epoch": 1.446296, "grad_norm": 3.492438793182373, "learning_rate": 5.983132530120483e-07, "loss": 0.107, "step": 117600 }, { "epoch": 1.447096, "grad_norm": 5.758606910705566, "learning_rate": 5.902811244979921e-07, "loss": 0.1104, "step": 117700 }, { "epoch": 1.447896, "grad_norm": 5.847075462341309, "learning_rate": 5.822489959839358e-07, "loss": 0.1234, "step": 117800 }, { "epoch": 1.448696, "grad_norm": 4.088864803314209, "learning_rate": 5.742168674698796e-07, "loss": 0.1247, "step": 117900 }, { "epoch": 1.449496, "grad_norm": 2.7524282932281494, "learning_rate": 5.661847389558233e-07, "loss": 0.0971, "step": 118000 }, { "epoch": 1.450296, "grad_norm": 6.989465236663818, "learning_rate": 5.581526104417671e-07, "loss": 0.126, "step": 118100 }, { "epoch": 1.451096, "grad_norm": 9.567557334899902, "learning_rate": 5.501204819277109e-07, "loss": 0.1191, "step": 118200 }, { "epoch": 1.451896, "grad_norm": 10.12435531616211, "learning_rate": 5.420883534136546e-07, "loss": 0.1096, "step": 118300 }, { "epoch": 1.452696, "grad_norm": 1.7563780546188354, "learning_rate": 5.340562248995985e-07, "loss": 0.1197, "step": 118400 }, { "epoch": 1.453496, "grad_norm": 5.218570232391357, "learning_rate": 5.260240963855423e-07, "loss": 0.1207, "step": 118500 }, { "epoch": 1.454296, "grad_norm": 4.665302753448486, "learning_rate": 5.17991967871486e-07, "loss": 0.115, "step": 118600 }, { "epoch": 1.455096, "grad_norm": 5.446451663970947, "learning_rate": 5.099598393574298e-07, "loss": 0.119, "step": 118700 }, { "epoch": 1.455896, "grad_norm": 4.688202381134033, "learning_rate": 5.020080321285141e-07, "loss": 0.1369, "step": 118800 }, { "epoch": 1.456696, "grad_norm": 3.5095245838165283, "learning_rate": 4.939759036144578e-07, "loss": 0.1094, "step": 118900 }, { "epoch": 1.457496, "grad_norm": 3.919813394546509, "learning_rate": 4.859437751004017e-07, "loss": 0.1113, "step": 119000 }, { "epoch": 1.458296, "grad_norm": 3.1427431106567383, "learning_rate": 4.779116465863454e-07, "loss": 0.1127, "step": 119100 }, { "epoch": 1.459096, "grad_norm": 5.548600673675537, "learning_rate": 4.698795180722892e-07, "loss": 0.1175, "step": 119200 }, { "epoch": 1.459896, "grad_norm": 3.4401326179504395, "learning_rate": 4.6184738955823296e-07, "loss": 0.1118, "step": 119300 }, { "epoch": 1.460696, "grad_norm": 0.7513042688369751, "learning_rate": 4.538152610441767e-07, "loss": 0.1227, "step": 119400 }, { "epoch": 1.461496, "grad_norm": 4.836828708648682, "learning_rate": 4.4578313253012054e-07, "loss": 0.1208, "step": 119500 }, { "epoch": 1.462296, "grad_norm": 9.92773723602295, "learning_rate": 4.377510040160643e-07, "loss": 0.1021, "step": 119600 }, { "epoch": 1.463096, "grad_norm": 4.6646575927734375, "learning_rate": 4.2971887550200806e-07, "loss": 0.1142, "step": 119700 }, { "epoch": 1.463896, "grad_norm": 0.4946214258670807, "learning_rate": 4.216867469879518e-07, "loss": 0.1334, "step": 119800 }, { "epoch": 1.464696, "grad_norm": 7.3409247398376465, "learning_rate": 4.1365461847389564e-07, "loss": 0.1387, "step": 119900 }, { "epoch": 1.465496, "grad_norm": 2.3244385719299316, "learning_rate": 4.056224899598394e-07, "loss": 0.1216, "step": 120000 }, { "epoch": 1.465496, "eval_test1_cer": 0.04472888191799238, "eval_test1_cer_norm": 0.029865296419910196, "eval_test1_loss": 0.16931886970996857, "eval_test1_runtime": 2461.9854, "eval_test1_samples_per_second": 1.015, "eval_test1_steps_per_second": 0.254, "eval_test1_wer": 0.13493104755240679, "eval_test1_wer_norm": 0.076477980069552, "step": 120000 }, { "epoch": 1.465496, "eval_test2_cer": 0.09805129353791019, "eval_test2_cer_norm": 0.07766985590331578, "eval_test2_loss": 0.3047462999820709, "eval_test2_runtime": 2488.9087, "eval_test2_samples_per_second": 1.004, "eval_test2_steps_per_second": 0.251, "eval_test2_wer": 0.21137560082398718, "eval_test2_wer_norm": 0.1462296584918634, "step": 120000 }, { "epoch": 1.466296, "grad_norm": 5.934924602508545, "learning_rate": 3.9759036144578316e-07, "loss": 0.1166, "step": 120100 }, { "epoch": 1.467096, "grad_norm": 6.291118621826172, "learning_rate": 3.895582329317269e-07, "loss": 0.1144, "step": 120200 }, { "epoch": 1.467896, "grad_norm": 7.283012866973877, "learning_rate": 3.8152610441767073e-07, "loss": 0.1258, "step": 120300 }, { "epoch": 1.468696, "grad_norm": 2.082486391067505, "learning_rate": 3.734939759036145e-07, "loss": 0.1209, "step": 120400 }, { "epoch": 1.469496, "grad_norm": 7.8560967445373535, "learning_rate": 3.6546184738955826e-07, "loss": 0.115, "step": 120500 }, { "epoch": 1.470296, "grad_norm": 6.016510963439941, "learning_rate": 3.57429718875502e-07, "loss": 0.1082, "step": 120600 }, { "epoch": 1.471096, "grad_norm": 2.1153485774993896, "learning_rate": 3.4939759036144583e-07, "loss": 0.1286, "step": 120700 }, { "epoch": 1.471896, "grad_norm": 5.02449369430542, "learning_rate": 3.413654618473896e-07, "loss": 0.111, "step": 120800 }, { "epoch": 1.472696, "grad_norm": 2.6848514080047607, "learning_rate": 3.3333333333333335e-07, "loss": 0.1161, "step": 120900 }, { "epoch": 1.473496, "grad_norm": 3.306321620941162, "learning_rate": 3.253012048192771e-07, "loss": 0.1164, "step": 121000 }, { "epoch": 1.474296, "grad_norm": 5.202853202819824, "learning_rate": 3.1726907630522093e-07, "loss": 0.1148, "step": 121100 }, { "epoch": 1.475096, "grad_norm": 6.273738861083984, "learning_rate": 3.092369477911647e-07, "loss": 0.1257, "step": 121200 }, { "epoch": 1.475896, "grad_norm": 1.6317389011383057, "learning_rate": 3.01285140562249e-07, "loss": 0.1169, "step": 121300 }, { "epoch": 1.476696, "grad_norm": 3.1588313579559326, "learning_rate": 2.932530120481928e-07, "loss": 0.1243, "step": 121400 }, { "epoch": 1.477496, "grad_norm": 3.1574482917785645, "learning_rate": 2.852208835341366e-07, "loss": 0.1254, "step": 121500 }, { "epoch": 1.478296, "grad_norm": 3.313225507736206, "learning_rate": 2.7718875502008034e-07, "loss": 0.1196, "step": 121600 }, { "epoch": 1.479096, "grad_norm": 3.645125389099121, "learning_rate": 2.691566265060241e-07, "loss": 0.0956, "step": 121700 }, { "epoch": 1.479896, "grad_norm": 0.8086249232292175, "learning_rate": 2.611244979919679e-07, "loss": 0.1171, "step": 121800 }, { "epoch": 1.480696, "grad_norm": 4.813057899475098, "learning_rate": 2.530923694779117e-07, "loss": 0.1254, "step": 121900 }, { "epoch": 1.481496, "grad_norm": 3.0106194019317627, "learning_rate": 2.4506024096385544e-07, "loss": 0.109, "step": 122000 }, { "epoch": 1.482296, "grad_norm": 3.1917545795440674, "learning_rate": 2.370281124497992e-07, "loss": 0.1082, "step": 122100 }, { "epoch": 1.483096, "grad_norm": 8.249537467956543, "learning_rate": 2.28995983935743e-07, "loss": 0.1334, "step": 122200 }, { "epoch": 1.483896, "grad_norm": 2.7427356243133545, "learning_rate": 2.2096385542168677e-07, "loss": 0.1209, "step": 122300 }, { "epoch": 1.484696, "grad_norm": 2.4920859336853027, "learning_rate": 2.1293172690763056e-07, "loss": 0.1041, "step": 122400 }, { "epoch": 1.485496, "grad_norm": 5.013920307159424, "learning_rate": 2.0489959839357432e-07, "loss": 0.1114, "step": 122500 }, { "epoch": 1.486296, "grad_norm": 0.5811383128166199, "learning_rate": 1.968674698795181e-07, "loss": 0.1228, "step": 122600 }, { "epoch": 1.487096, "grad_norm": 5.773433685302734, "learning_rate": 1.8883534136546187e-07, "loss": 0.1284, "step": 122700 }, { "epoch": 1.487896, "grad_norm": 3.8643128871917725, "learning_rate": 1.8080321285140566e-07, "loss": 0.1159, "step": 122800 }, { "epoch": 1.488696, "grad_norm": 5.357062816619873, "learning_rate": 1.7277108433734942e-07, "loss": 0.1225, "step": 122900 }, { "epoch": 1.489496, "grad_norm": 4.911788463592529, "learning_rate": 1.647389558232932e-07, "loss": 0.1116, "step": 123000 }, { "epoch": 1.490296, "grad_norm": 4.041905403137207, "learning_rate": 1.5670682730923697e-07, "loss": 0.1236, "step": 123100 }, { "epoch": 1.491096, "grad_norm": 5.671024799346924, "learning_rate": 1.4867469879518073e-07, "loss": 0.1168, "step": 123200 }, { "epoch": 1.491896, "grad_norm": 8.380229949951172, "learning_rate": 1.4064257028112452e-07, "loss": 0.1192, "step": 123300 }, { "epoch": 1.492696, "grad_norm": 8.371071815490723, "learning_rate": 1.3261044176706828e-07, "loss": 0.102, "step": 123400 }, { "epoch": 1.493496, "grad_norm": 4.045628547668457, "learning_rate": 1.2457831325301207e-07, "loss": 0.1177, "step": 123500 }, { "epoch": 1.494296, "grad_norm": 7.542957782745361, "learning_rate": 1.1654618473895584e-07, "loss": 0.108, "step": 123600 }, { "epoch": 2.000592, "grad_norm": 5.552263259887695, "learning_rate": 1.0851405622489961e-07, "loss": 0.1093, "step": 123700 }, { "epoch": 2.001392, "grad_norm": 4.822888374328613, "learning_rate": 1.0048192771084339e-07, "loss": 0.1042, "step": 123800 }, { "epoch": 2.002192, "grad_norm": 0.32907435297966003, "learning_rate": 9.244979919678716e-08, "loss": 0.1014, "step": 123900 }, { "epoch": 2.002992, "grad_norm": 5.526859760284424, "learning_rate": 8.441767068273094e-08, "loss": 0.0819, "step": 124000 }, { "epoch": 2.002992, "eval_test1_cer": 0.04037829561580402, "eval_test1_cer_norm": 0.026181957884121306, "eval_test1_loss": 0.1684691458940506, "eval_test1_runtime": 2457.6805, "eval_test1_samples_per_second": 1.017, "eval_test1_steps_per_second": 0.254, "eval_test1_wer": 0.1262135922330097, "eval_test1_wer_norm": 0.06817849732604693, "step": 124000 }, { "epoch": 2.002992, "eval_test2_cer": 0.09239089857020197, "eval_test2_cer_norm": 0.07192245119305857, "eval_test2_loss": 0.30393916368484497, "eval_test2_runtime": 2485.4178, "eval_test2_samples_per_second": 1.006, "eval_test2_steps_per_second": 0.251, "eval_test2_wer": 0.20482375829709315, "eval_test2_wer_norm": 0.13981205592482238, "step": 124000 } ], "logging_steps": 100, "max_steps": 125000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.062139105004749e+20, "train_batch_size": 4, "trial_name": null, "trial_params": null }