|
{ |
|
"best_metric": 6.049679487179487, |
|
"best_model_checkpoint": "./exp/whisper-small-taiwanese-asr-v2/checkpoint-7000", |
|
"epoch": 22.675736961451246, |
|
"eval_steps": 1000, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05668934240362812, |
|
"grad_norm": 39.12731170654297, |
|
"learning_rate": 4.2000000000000006e-07, |
|
"loss": 3.459, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11337868480725624, |
|
"grad_norm": 27.20041847229004, |
|
"learning_rate": 9.200000000000001e-07, |
|
"loss": 3.0795, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17006802721088435, |
|
"grad_norm": 23.331146240234375, |
|
"learning_rate": 1.42e-06, |
|
"loss": 2.4576, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.22675736961451248, |
|
"grad_norm": 18.951128005981445, |
|
"learning_rate": 1.9200000000000003e-06, |
|
"loss": 1.8809, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2834467120181406, |
|
"grad_norm": 18.83504295349121, |
|
"learning_rate": 2.42e-06, |
|
"loss": 1.3831, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 16.38509750366211, |
|
"learning_rate": 2.92e-06, |
|
"loss": 1.049, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3968253968253968, |
|
"grad_norm": 17.442344665527344, |
|
"learning_rate": 3.4200000000000007e-06, |
|
"loss": 0.8571, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.45351473922902497, |
|
"grad_norm": 15.140103340148926, |
|
"learning_rate": 3.920000000000001e-06, |
|
"loss": 0.6812, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5102040816326531, |
|
"grad_norm": 16.342971801757812, |
|
"learning_rate": 4.42e-06, |
|
"loss": 0.5756, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5668934240362812, |
|
"grad_norm": 18.02117347717285, |
|
"learning_rate": 4.92e-06, |
|
"loss": 0.5135, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6235827664399093, |
|
"grad_norm": 19.521883010864258, |
|
"learning_rate": 5.420000000000001e-06, |
|
"loss": 0.4338, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 14.71985149383545, |
|
"learning_rate": 5.92e-06, |
|
"loss": 0.3755, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7369614512471655, |
|
"grad_norm": 6.9398722648620605, |
|
"learning_rate": 6.42e-06, |
|
"loss": 0.3662, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"grad_norm": 8.429058074951172, |
|
"learning_rate": 6.92e-06, |
|
"loss": 0.3196, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8503401360544217, |
|
"grad_norm": 7.899880409240723, |
|
"learning_rate": 7.420000000000001e-06, |
|
"loss": 0.3186, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.9070294784580499, |
|
"grad_norm": 9.854070663452148, |
|
"learning_rate": 7.92e-06, |
|
"loss": 0.2849, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.963718820861678, |
|
"grad_norm": 9.135157585144043, |
|
"learning_rate": 8.42e-06, |
|
"loss": 0.3387, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.0204081632653061, |
|
"grad_norm": 7.550724983215332, |
|
"learning_rate": 8.920000000000001e-06, |
|
"loss": 0.2661, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.0770975056689343, |
|
"grad_norm": 6.407596111297607, |
|
"learning_rate": 9.42e-06, |
|
"loss": 0.1618, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.1337868480725624, |
|
"grad_norm": 8.470088958740234, |
|
"learning_rate": 9.920000000000002e-06, |
|
"loss": 0.2531, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1904761904761905, |
|
"grad_norm": 14.254158020019531, |
|
"learning_rate": 9.977894736842106e-06, |
|
"loss": 0.2415, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.2471655328798186, |
|
"grad_norm": 5.5717339515686035, |
|
"learning_rate": 9.951578947368423e-06, |
|
"loss": 0.1863, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.3038548752834467, |
|
"grad_norm": 9.035225868225098, |
|
"learning_rate": 9.925263157894738e-06, |
|
"loss": 0.1772, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.3605442176870748, |
|
"grad_norm": 12.706698417663574, |
|
"learning_rate": 9.898947368421054e-06, |
|
"loss": 0.1589, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.417233560090703, |
|
"grad_norm": 8.393030166625977, |
|
"learning_rate": 9.87263157894737e-06, |
|
"loss": 0.172, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.473922902494331, |
|
"grad_norm": 5.842218399047852, |
|
"learning_rate": 9.846315789473684e-06, |
|
"loss": 0.1656, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.5306122448979593, |
|
"grad_norm": 9.525617599487305, |
|
"learning_rate": 9.820000000000001e-06, |
|
"loss": 0.186, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.5873015873015874, |
|
"grad_norm": 19.124101638793945, |
|
"learning_rate": 9.793684210526316e-06, |
|
"loss": 0.1943, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.6439909297052155, |
|
"grad_norm": 9.63739013671875, |
|
"learning_rate": 9.767368421052632e-06, |
|
"loss": 0.1514, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.7006802721088436, |
|
"grad_norm": 14.47154712677002, |
|
"learning_rate": 9.741052631578947e-06, |
|
"loss": 0.142, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.7573696145124718, |
|
"grad_norm": 6.553714275360107, |
|
"learning_rate": 9.714736842105264e-06, |
|
"loss": 0.1563, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.8140589569160999, |
|
"grad_norm": 6.272864818572998, |
|
"learning_rate": 9.68842105263158e-06, |
|
"loss": 0.1454, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.870748299319728, |
|
"grad_norm": 6.724349021911621, |
|
"learning_rate": 9.662105263157896e-06, |
|
"loss": 0.3289, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.927437641723356, |
|
"grad_norm": 6.75054931640625, |
|
"learning_rate": 9.635789473684212e-06, |
|
"loss": 0.1455, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.9841269841269842, |
|
"grad_norm": 7.212646961212158, |
|
"learning_rate": 9.609473684210527e-06, |
|
"loss": 0.2098, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.0408163265306123, |
|
"grad_norm": 4.189349174499512, |
|
"learning_rate": 9.583157894736842e-06, |
|
"loss": 0.1016, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.0975056689342404, |
|
"grad_norm": 9.265031814575195, |
|
"learning_rate": 9.556842105263159e-06, |
|
"loss": 0.1116, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.1541950113378685, |
|
"grad_norm": 4.884438991546631, |
|
"learning_rate": 9.530526315789474e-06, |
|
"loss": 0.1209, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.2108843537414966, |
|
"grad_norm": 4.258934497833252, |
|
"learning_rate": 9.50421052631579e-06, |
|
"loss": 0.1331, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.2675736961451247, |
|
"grad_norm": 5.421435832977295, |
|
"learning_rate": 9.477894736842106e-06, |
|
"loss": 0.083, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.2675736961451247, |
|
"eval_loss": 0.1953069269657135, |
|
"eval_runtime": 158.2725, |
|
"eval_samples_per_second": 2.957, |
|
"eval_steps_per_second": 0.493, |
|
"eval_wer": 8.173076923076923, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.324263038548753, |
|
"grad_norm": 15.091805458068848, |
|
"learning_rate": 9.451578947368422e-06, |
|
"loss": 0.0908, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.380952380952381, |
|
"grad_norm": 9.11117172241211, |
|
"learning_rate": 9.425263157894737e-06, |
|
"loss": 0.1528, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.437641723356009, |
|
"grad_norm": 8.174238204956055, |
|
"learning_rate": 9.398947368421052e-06, |
|
"loss": 0.059, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.494331065759637, |
|
"grad_norm": 12.375757217407227, |
|
"learning_rate": 9.372631578947369e-06, |
|
"loss": 0.0773, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.5510204081632653, |
|
"grad_norm": 8.518278121948242, |
|
"learning_rate": 9.346315789473684e-06, |
|
"loss": 0.0714, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.6077097505668934, |
|
"grad_norm": 7.095608234405518, |
|
"learning_rate": 9.32e-06, |
|
"loss": 0.0778, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.6643990929705215, |
|
"grad_norm": 5.508459568023682, |
|
"learning_rate": 9.293684210526317e-06, |
|
"loss": 0.0696, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.7210884353741496, |
|
"grad_norm": 1.7915226221084595, |
|
"learning_rate": 9.267368421052632e-06, |
|
"loss": 0.0504, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 4.419779300689697, |
|
"learning_rate": 9.241052631578949e-06, |
|
"loss": 0.0667, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.834467120181406, |
|
"grad_norm": 4.396119594573975, |
|
"learning_rate": 9.214736842105264e-06, |
|
"loss": 0.1037, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.891156462585034, |
|
"grad_norm": 4.604401111602783, |
|
"learning_rate": 9.18842105263158e-06, |
|
"loss": 0.1276, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.947845804988662, |
|
"grad_norm": 6.504410743713379, |
|
"learning_rate": 9.162105263157895e-06, |
|
"loss": 0.0683, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.00453514739229, |
|
"grad_norm": 3.4603614807128906, |
|
"learning_rate": 9.13578947368421e-06, |
|
"loss": 0.0548, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 3.061224489795918, |
|
"grad_norm": 8.05783748626709, |
|
"learning_rate": 9.109473684210527e-06, |
|
"loss": 0.0423, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.1179138321995463, |
|
"grad_norm": 1.4570063352584839, |
|
"learning_rate": 9.083157894736842e-06, |
|
"loss": 0.0388, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 3.1746031746031744, |
|
"grad_norm": 2.493945360183716, |
|
"learning_rate": 9.056842105263159e-06, |
|
"loss": 0.0704, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.2312925170068025, |
|
"grad_norm": 3.1329710483551025, |
|
"learning_rate": 9.030526315789474e-06, |
|
"loss": 0.0418, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 3.287981859410431, |
|
"grad_norm": 0.9819092154502869, |
|
"learning_rate": 9.00421052631579e-06, |
|
"loss": 0.0572, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.3446712018140587, |
|
"grad_norm": 3.2691588401794434, |
|
"learning_rate": 8.977894736842107e-06, |
|
"loss": 0.0391, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 3.4013605442176873, |
|
"grad_norm": 0.9585368037223816, |
|
"learning_rate": 8.951578947368422e-06, |
|
"loss": 0.0468, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.458049886621315, |
|
"grad_norm": 5.486790657043457, |
|
"learning_rate": 8.925263157894739e-06, |
|
"loss": 0.0423, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 3.5147392290249435, |
|
"grad_norm": 7.715363502502441, |
|
"learning_rate": 8.898947368421054e-06, |
|
"loss": 0.119, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 0.7970800995826721, |
|
"learning_rate": 8.872631578947369e-06, |
|
"loss": 0.0264, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 3.6281179138321997, |
|
"grad_norm": 9.640973091125488, |
|
"learning_rate": 8.846315789473685e-06, |
|
"loss": 0.0305, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.6848072562358274, |
|
"grad_norm": 5.621988296508789, |
|
"learning_rate": 8.82e-06, |
|
"loss": 0.0329, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 3.741496598639456, |
|
"grad_norm": 2.357621908187866, |
|
"learning_rate": 8.793684210526317e-06, |
|
"loss": 0.0657, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.798185941043084, |
|
"grad_norm": 3.6409835815429688, |
|
"learning_rate": 8.767368421052632e-06, |
|
"loss": 0.0311, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 3.854875283446712, |
|
"grad_norm": 2.0802805423736572, |
|
"learning_rate": 8.741052631578949e-06, |
|
"loss": 0.0763, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.9115646258503403, |
|
"grad_norm": 14.157549858093262, |
|
"learning_rate": 8.714736842105264e-06, |
|
"loss": 0.029, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 3.9682539682539684, |
|
"grad_norm": 3.303739309310913, |
|
"learning_rate": 8.688421052631579e-06, |
|
"loss": 0.0429, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 4.024943310657596, |
|
"grad_norm": 3.3203935623168945, |
|
"learning_rate": 8.662105263157895e-06, |
|
"loss": 0.0212, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 4.081632653061225, |
|
"grad_norm": 0.7093961834907532, |
|
"learning_rate": 8.63578947368421e-06, |
|
"loss": 0.012, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.138321995464852, |
|
"grad_norm": 2.5663654804229736, |
|
"learning_rate": 8.609473684210527e-06, |
|
"loss": 0.0232, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 4.195011337868481, |
|
"grad_norm": 27.419864654541016, |
|
"learning_rate": 8.583157894736843e-06, |
|
"loss": 0.0192, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 4.2517006802721085, |
|
"grad_norm": 0.45838263630867004, |
|
"learning_rate": 8.556842105263158e-06, |
|
"loss": 0.0577, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 4.308390022675737, |
|
"grad_norm": 1.192967176437378, |
|
"learning_rate": 8.530526315789475e-06, |
|
"loss": 0.0622, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.365079365079365, |
|
"grad_norm": 5.143068790435791, |
|
"learning_rate": 8.50421052631579e-06, |
|
"loss": 0.0217, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 4.421768707482993, |
|
"grad_norm": 3.8326940536499023, |
|
"learning_rate": 8.477894736842107e-06, |
|
"loss": 0.0212, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.478458049886621, |
|
"grad_norm": 1.7538135051727295, |
|
"learning_rate": 8.451578947368422e-06, |
|
"loss": 0.0499, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 4.535147392290249, |
|
"grad_norm": 0.5768720507621765, |
|
"learning_rate": 8.425263157894737e-06, |
|
"loss": 0.0444, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.535147392290249, |
|
"eval_loss": 0.19730134308338165, |
|
"eval_runtime": 158.9638, |
|
"eval_samples_per_second": 2.944, |
|
"eval_steps_per_second": 0.491, |
|
"eval_wer": 6.944444444444445, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.591836734693878, |
|
"grad_norm": 4.980081558227539, |
|
"learning_rate": 8.398947368421053e-06, |
|
"loss": 0.0205, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 4.648526077097506, |
|
"grad_norm": 2.384981155395508, |
|
"learning_rate": 8.372631578947368e-06, |
|
"loss": 0.027, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.705215419501133, |
|
"grad_norm": 1.0443973541259766, |
|
"learning_rate": 8.346315789473685e-06, |
|
"loss": 0.0537, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 4.761904761904762, |
|
"grad_norm": 1.0461288690567017, |
|
"learning_rate": 8.32e-06, |
|
"loss": 0.0146, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.81859410430839, |
|
"grad_norm": 6.465195655822754, |
|
"learning_rate": 8.293684210526317e-06, |
|
"loss": 0.0349, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 4.875283446712018, |
|
"grad_norm": 4.054196834564209, |
|
"learning_rate": 8.267368421052632e-06, |
|
"loss": 0.0186, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 4.931972789115647, |
|
"grad_norm": 1.7794383764266968, |
|
"learning_rate": 8.241052631578948e-06, |
|
"loss": 0.0224, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 4.988662131519274, |
|
"grad_norm": 5.188144683837891, |
|
"learning_rate": 8.214736842105265e-06, |
|
"loss": 0.047, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.045351473922903, |
|
"grad_norm": 0.08940722048282623, |
|
"learning_rate": 8.18842105263158e-06, |
|
"loss": 0.0186, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 5.1020408163265305, |
|
"grad_norm": 3.670670747756958, |
|
"learning_rate": 8.162105263157895e-06, |
|
"loss": 0.0183, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 5.158730158730159, |
|
"grad_norm": 2.3486833572387695, |
|
"learning_rate": 8.135789473684212e-06, |
|
"loss": 0.015, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 5.215419501133787, |
|
"grad_norm": 4.572961807250977, |
|
"learning_rate": 8.109473684210527e-06, |
|
"loss": 0.0239, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.272108843537415, |
|
"grad_norm": 1.114425539970398, |
|
"learning_rate": 8.083157894736843e-06, |
|
"loss": 0.0396, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 5.328798185941043, |
|
"grad_norm": 0.35853487253189087, |
|
"learning_rate": 8.056842105263158e-06, |
|
"loss": 0.0185, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 5.3854875283446715, |
|
"grad_norm": 5.747114181518555, |
|
"learning_rate": 8.030526315789475e-06, |
|
"loss": 0.0136, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 5.442176870748299, |
|
"grad_norm": 0.33728256821632385, |
|
"learning_rate": 8.00421052631579e-06, |
|
"loss": 0.0366, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.498866213151928, |
|
"grad_norm": 13.636420249938965, |
|
"learning_rate": 7.977894736842105e-06, |
|
"loss": 0.0441, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"grad_norm": 2.6928389072418213, |
|
"learning_rate": 7.951578947368421e-06, |
|
"loss": 0.014, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 5.612244897959184, |
|
"grad_norm": 0.18423891067504883, |
|
"learning_rate": 7.925263157894736e-06, |
|
"loss": 0.007, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 5.668934240362812, |
|
"grad_norm": 9.745643615722656, |
|
"learning_rate": 7.898947368421053e-06, |
|
"loss": 0.0065, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.72562358276644, |
|
"grad_norm": 1.0876612663269043, |
|
"learning_rate": 7.87263157894737e-06, |
|
"loss": 0.0464, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 5.782312925170068, |
|
"grad_norm": 3.2040324211120605, |
|
"learning_rate": 7.846315789473685e-06, |
|
"loss": 0.0085, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 5.839002267573696, |
|
"grad_norm": 0.07621826976537704, |
|
"learning_rate": 7.820000000000001e-06, |
|
"loss": 0.0417, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 5.895691609977324, |
|
"grad_norm": 0.11689532548189163, |
|
"learning_rate": 7.793684210526316e-06, |
|
"loss": 0.0113, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.9523809523809526, |
|
"grad_norm": 1.2615737915039062, |
|
"learning_rate": 7.767368421052633e-06, |
|
"loss": 0.017, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 6.00907029478458, |
|
"grad_norm": 6.465045928955078, |
|
"learning_rate": 7.741052631578948e-06, |
|
"loss": 0.0134, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 6.065759637188209, |
|
"grad_norm": 0.6422730088233948, |
|
"learning_rate": 7.714736842105263e-06, |
|
"loss": 0.0407, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 6.122448979591836, |
|
"grad_norm": 0.07282353192567825, |
|
"learning_rate": 7.68842105263158e-06, |
|
"loss": 0.0066, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.179138321995465, |
|
"grad_norm": 0.13422216475009918, |
|
"learning_rate": 7.662105263157895e-06, |
|
"loss": 0.0345, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 6.235827664399093, |
|
"grad_norm": 4.29299783706665, |
|
"learning_rate": 7.635789473684211e-06, |
|
"loss": 0.0047, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 6.292517006802721, |
|
"grad_norm": 0.05262218415737152, |
|
"learning_rate": 7.609473684210526e-06, |
|
"loss": 0.0023, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 6.349206349206349, |
|
"grad_norm": 6.577340602874756, |
|
"learning_rate": 7.583157894736842e-06, |
|
"loss": 0.005, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.405895691609977, |
|
"grad_norm": 0.2815414071083069, |
|
"learning_rate": 7.556842105263158e-06, |
|
"loss": 0.0155, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 6.462585034013605, |
|
"grad_norm": 0.05367182940244675, |
|
"learning_rate": 7.5305263157894745e-06, |
|
"loss": 0.04, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 6.519274376417234, |
|
"grad_norm": 0.14756247401237488, |
|
"learning_rate": 7.50421052631579e-06, |
|
"loss": 0.0049, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 6.575963718820862, |
|
"grad_norm": 2.2667605876922607, |
|
"learning_rate": 7.477894736842106e-06, |
|
"loss": 0.0028, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 6.63265306122449, |
|
"grad_norm": 0.13220186531543732, |
|
"learning_rate": 7.451578947368422e-06, |
|
"loss": 0.0114, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 6.6893424036281175, |
|
"grad_norm": 2.413485288619995, |
|
"learning_rate": 7.425263157894738e-06, |
|
"loss": 0.0028, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 6.746031746031746, |
|
"grad_norm": 1.9141589403152466, |
|
"learning_rate": 7.398947368421054e-06, |
|
"loss": 0.0079, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 6.802721088435375, |
|
"grad_norm": 5.837501525878906, |
|
"learning_rate": 7.3726315789473694e-06, |
|
"loss": 0.0294, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.802721088435375, |
|
"eval_loss": 0.19840003550052643, |
|
"eval_runtime": 156.6444, |
|
"eval_samples_per_second": 2.988, |
|
"eval_steps_per_second": 0.498, |
|
"eval_wer": 6.517094017094018, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.859410430839002, |
|
"grad_norm": 0.0996256172657013, |
|
"learning_rate": 7.346315789473684e-06, |
|
"loss": 0.0027, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 6.91609977324263, |
|
"grad_norm": 0.028051115572452545, |
|
"learning_rate": 7.32e-06, |
|
"loss": 0.008, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 6.9727891156462585, |
|
"grad_norm": 0.14238622784614563, |
|
"learning_rate": 7.293684210526316e-06, |
|
"loss": 0.0263, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 7.029478458049887, |
|
"grad_norm": 0.112076535820961, |
|
"learning_rate": 7.267368421052632e-06, |
|
"loss": 0.0192, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 7.086167800453515, |
|
"grad_norm": 0.14218856394290924, |
|
"learning_rate": 7.241052631578948e-06, |
|
"loss": 0.0099, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"grad_norm": 0.08735861629247665, |
|
"learning_rate": 7.2147368421052635e-06, |
|
"loss": 0.0013, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 7.199546485260771, |
|
"grad_norm": 0.07858515530824661, |
|
"learning_rate": 7.18842105263158e-06, |
|
"loss": 0.043, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 7.2562358276643995, |
|
"grad_norm": 0.04395943507552147, |
|
"learning_rate": 7.162105263157896e-06, |
|
"loss": 0.0196, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.312925170068027, |
|
"grad_norm": 0.13118867576122284, |
|
"learning_rate": 7.135789473684212e-06, |
|
"loss": 0.0032, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 7.369614512471656, |
|
"grad_norm": 0.5723868608474731, |
|
"learning_rate": 7.109473684210528e-06, |
|
"loss": 0.0193, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 7.426303854875283, |
|
"grad_norm": 0.17946386337280273, |
|
"learning_rate": 7.08421052631579e-06, |
|
"loss": 0.0066, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 7.482993197278912, |
|
"grad_norm": 0.31329983472824097, |
|
"learning_rate": 7.057894736842106e-06, |
|
"loss": 0.004, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 7.5396825396825395, |
|
"grad_norm": 0.08119330555200577, |
|
"learning_rate": 7.031578947368422e-06, |
|
"loss": 0.0079, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 7.596371882086168, |
|
"grad_norm": 0.2911977767944336, |
|
"learning_rate": 7.005263157894738e-06, |
|
"loss": 0.0053, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 7.653061224489796, |
|
"grad_norm": 0.024690864607691765, |
|
"learning_rate": 6.9789473684210525e-06, |
|
"loss": 0.0054, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 7.709750566893424, |
|
"grad_norm": 0.09139782190322876, |
|
"learning_rate": 6.953684210526316e-06, |
|
"loss": 0.0043, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 7.766439909297052, |
|
"grad_norm": 0.3346998393535614, |
|
"learning_rate": 6.9273684210526326e-06, |
|
"loss": 0.0063, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 7.8231292517006805, |
|
"grad_norm": 2.5169262886047363, |
|
"learning_rate": 6.901052631578948e-06, |
|
"loss": 0.0036, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 7.879818594104308, |
|
"grad_norm": 0.05118921771645546, |
|
"learning_rate": 6.874736842105264e-06, |
|
"loss": 0.0152, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 7.936507936507937, |
|
"grad_norm": 0.141609787940979, |
|
"learning_rate": 6.84842105263158e-06, |
|
"loss": 0.0029, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.993197278911564, |
|
"grad_norm": 0.059452034533023834, |
|
"learning_rate": 6.822105263157896e-06, |
|
"loss": 0.0084, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 8.049886621315192, |
|
"grad_norm": 9.327384948730469, |
|
"learning_rate": 6.795789473684211e-06, |
|
"loss": 0.0056, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 8.106575963718821, |
|
"grad_norm": 0.031346723437309265, |
|
"learning_rate": 6.769473684210527e-06, |
|
"loss": 0.0095, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 8.16326530612245, |
|
"grad_norm": 0.03407048434019089, |
|
"learning_rate": 6.7431578947368425e-06, |
|
"loss": 0.031, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 8.219954648526077, |
|
"grad_norm": 0.023022985085844994, |
|
"learning_rate": 6.716842105263158e-06, |
|
"loss": 0.0061, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 8.276643990929704, |
|
"grad_norm": 0.1935151070356369, |
|
"learning_rate": 6.690526315789474e-06, |
|
"loss": 0.0049, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"grad_norm": 1.2079312801361084, |
|
"learning_rate": 6.66421052631579e-06, |
|
"loss": 0.0107, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 8.390022675736962, |
|
"grad_norm": 0.20077985525131226, |
|
"learning_rate": 6.637894736842106e-06, |
|
"loss": 0.0021, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 8.44671201814059, |
|
"grad_norm": 18.450538635253906, |
|
"learning_rate": 6.611578947368421e-06, |
|
"loss": 0.0159, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 8.503401360544217, |
|
"grad_norm": 0.03869379311800003, |
|
"learning_rate": 6.585263157894738e-06, |
|
"loss": 0.0052, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 8.560090702947846, |
|
"grad_norm": 0.026166923344135284, |
|
"learning_rate": 6.558947368421054e-06, |
|
"loss": 0.0007, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 8.616780045351474, |
|
"grad_norm": 0.02891625091433525, |
|
"learning_rate": 6.532631578947369e-06, |
|
"loss": 0.0041, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 8.673469387755102, |
|
"grad_norm": 0.08599188923835754, |
|
"learning_rate": 6.506315789473685e-06, |
|
"loss": 0.0009, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 8.73015873015873, |
|
"grad_norm": 0.017651915550231934, |
|
"learning_rate": 6.480000000000001e-06, |
|
"loss": 0.0211, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 8.786848072562359, |
|
"grad_norm": 14.024144172668457, |
|
"learning_rate": 6.4536842105263165e-06, |
|
"loss": 0.021, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 8.843537414965986, |
|
"grad_norm": 0.057578567415475845, |
|
"learning_rate": 6.427368421052632e-06, |
|
"loss": 0.0021, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 8.900226757369614, |
|
"grad_norm": 0.04060237482190132, |
|
"learning_rate": 6.401052631578948e-06, |
|
"loss": 0.0025, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 8.956916099773242, |
|
"grad_norm": 0.04346761852502823, |
|
"learning_rate": 6.374736842105264e-06, |
|
"loss": 0.0055, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 9.013605442176871, |
|
"grad_norm": 0.014886971563100815, |
|
"learning_rate": 6.348421052631579e-06, |
|
"loss": 0.0067, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 9.070294784580499, |
|
"grad_norm": 2.309483289718628, |
|
"learning_rate": 6.322105263157895e-06, |
|
"loss": 0.0334, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.070294784580499, |
|
"eval_loss": 0.20987384021282196, |
|
"eval_runtime": 157.5775, |
|
"eval_samples_per_second": 2.97, |
|
"eval_steps_per_second": 0.495, |
|
"eval_wer": 6.303418803418803, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.126984126984127, |
|
"grad_norm": 0.2992580235004425, |
|
"learning_rate": 6.2957894736842105e-06, |
|
"loss": 0.0048, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 9.183673469387756, |
|
"grad_norm": 0.1327856183052063, |
|
"learning_rate": 6.269473684210526e-06, |
|
"loss": 0.0003, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 9.240362811791384, |
|
"grad_norm": 0.034492090344429016, |
|
"learning_rate": 6.243157894736842e-06, |
|
"loss": 0.0007, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 9.297052154195011, |
|
"grad_norm": 0.021785929799079895, |
|
"learning_rate": 6.216842105263159e-06, |
|
"loss": 0.0013, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 9.353741496598639, |
|
"grad_norm": 0.17917415499687195, |
|
"learning_rate": 6.190526315789475e-06, |
|
"loss": 0.0186, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 9.410430839002268, |
|
"grad_norm": 0.04263261333107948, |
|
"learning_rate": 6.1642105263157905e-06, |
|
"loss": 0.0011, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 9.467120181405896, |
|
"grad_norm": 0.06537426263093948, |
|
"learning_rate": 6.137894736842106e-06, |
|
"loss": 0.0007, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 9.523809523809524, |
|
"grad_norm": 0.05984114482998848, |
|
"learning_rate": 6.111578947368422e-06, |
|
"loss": 0.0031, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 9.580498866213151, |
|
"grad_norm": 0.0186479389667511, |
|
"learning_rate": 6.085263157894737e-06, |
|
"loss": 0.0067, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 9.63718820861678, |
|
"grad_norm": 0.01856757327914238, |
|
"learning_rate": 6.058947368421053e-06, |
|
"loss": 0.0012, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 9.693877551020408, |
|
"grad_norm": 0.01589180715382099, |
|
"learning_rate": 6.032631578947369e-06, |
|
"loss": 0.0033, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 9.750566893424036, |
|
"grad_norm": 0.013659857213497162, |
|
"learning_rate": 6.0063157894736845e-06, |
|
"loss": 0.0136, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 9.807256235827664, |
|
"grad_norm": 0.07311205565929413, |
|
"learning_rate": 5.98e-06, |
|
"loss": 0.0114, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 9.863945578231293, |
|
"grad_norm": 0.00827726535499096, |
|
"learning_rate": 5.953684210526316e-06, |
|
"loss": 0.0006, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 9.920634920634921, |
|
"grad_norm": 0.02168506383895874, |
|
"learning_rate": 5.927368421052632e-06, |
|
"loss": 0.0141, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 9.977324263038549, |
|
"grad_norm": 0.09996296465396881, |
|
"learning_rate": 5.901052631578947e-06, |
|
"loss": 0.0022, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 10.034013605442176, |
|
"grad_norm": 0.016953065991401672, |
|
"learning_rate": 5.8747368421052645e-06, |
|
"loss": 0.0024, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 10.090702947845806, |
|
"grad_norm": 0.0506548210978508, |
|
"learning_rate": 5.84842105263158e-06, |
|
"loss": 0.0049, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 10.147392290249433, |
|
"grad_norm": 0.017473401501774788, |
|
"learning_rate": 5.822105263157895e-06, |
|
"loss": 0.0009, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 10.204081632653061, |
|
"grad_norm": 0.016834545880556107, |
|
"learning_rate": 5.795789473684211e-06, |
|
"loss": 0.0016, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.260770975056689, |
|
"grad_norm": 0.07962112873792648, |
|
"learning_rate": 5.769473684210527e-06, |
|
"loss": 0.0005, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 10.317460317460318, |
|
"grad_norm": 0.020868808031082153, |
|
"learning_rate": 5.743157894736843e-06, |
|
"loss": 0.0162, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 10.374149659863946, |
|
"grad_norm": 0.0351821593940258, |
|
"learning_rate": 5.7168421052631585e-06, |
|
"loss": 0.0217, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 10.430839002267573, |
|
"grad_norm": 0.036179013550281525, |
|
"learning_rate": 5.690526315789474e-06, |
|
"loss": 0.0022, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 10.487528344671201, |
|
"grad_norm": 0.04735976830124855, |
|
"learning_rate": 5.66421052631579e-06, |
|
"loss": 0.0061, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 10.54421768707483, |
|
"grad_norm": 0.015306883491575718, |
|
"learning_rate": 5.637894736842105e-06, |
|
"loss": 0.0009, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 10.600907029478458, |
|
"grad_norm": 0.015260276384651661, |
|
"learning_rate": 5.611578947368421e-06, |
|
"loss": 0.0067, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 10.657596371882086, |
|
"grad_norm": 0.029503723606467247, |
|
"learning_rate": 5.585263157894737e-06, |
|
"loss": 0.0006, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 10.714285714285714, |
|
"grad_norm": 0.017955463379621506, |
|
"learning_rate": 5.558947368421053e-06, |
|
"loss": 0.0005, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 10.770975056689343, |
|
"grad_norm": 0.11942701041698456, |
|
"learning_rate": 5.532631578947368e-06, |
|
"loss": 0.003, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 10.82766439909297, |
|
"grad_norm": 0.02495400980114937, |
|
"learning_rate": 5.506315789473685e-06, |
|
"loss": 0.0017, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 10.884353741496598, |
|
"grad_norm": 0.1065245270729065, |
|
"learning_rate": 5.480000000000001e-06, |
|
"loss": 0.0047, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 10.941043083900226, |
|
"grad_norm": 0.10682205855846405, |
|
"learning_rate": 5.453684210526317e-06, |
|
"loss": 0.0045, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 10.997732426303855, |
|
"grad_norm": 0.016077643260359764, |
|
"learning_rate": 5.4273684210526325e-06, |
|
"loss": 0.0007, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 11.054421768707483, |
|
"grad_norm": 0.020081788301467896, |
|
"learning_rate": 5.401052631578948e-06, |
|
"loss": 0.0004, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 11.11111111111111, |
|
"grad_norm": 0.017382116988301277, |
|
"learning_rate": 5.374736842105263e-06, |
|
"loss": 0.0012, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 11.167800453514738, |
|
"grad_norm": 0.5074162483215332, |
|
"learning_rate": 5.348421052631579e-06, |
|
"loss": 0.0038, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 11.224489795918368, |
|
"grad_norm": 0.011525845155119896, |
|
"learning_rate": 5.322105263157895e-06, |
|
"loss": 0.0011, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 11.281179138321995, |
|
"grad_norm": 0.016790462657809258, |
|
"learning_rate": 5.295789473684211e-06, |
|
"loss": 0.0013, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 11.337868480725623, |
|
"grad_norm": 2.887037754058838, |
|
"learning_rate": 5.269473684210527e-06, |
|
"loss": 0.0011, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 11.337868480725623, |
|
"eval_loss": 0.2228717803955078, |
|
"eval_runtime": 177.4582, |
|
"eval_samples_per_second": 2.637, |
|
"eval_steps_per_second": 0.44, |
|
"eval_wer": 6.3835470085470085, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 11.39455782312925, |
|
"grad_norm": 0.02366207167506218, |
|
"learning_rate": 5.243157894736842e-06, |
|
"loss": 0.0002, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 11.45124716553288, |
|
"grad_norm": 1.1653227806091309, |
|
"learning_rate": 5.216842105263158e-06, |
|
"loss": 0.0052, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 11.507936507936508, |
|
"grad_norm": 0.2194329798221588, |
|
"learning_rate": 5.190526315789474e-06, |
|
"loss": 0.0014, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 11.564625850340136, |
|
"grad_norm": 0.03989162668585777, |
|
"learning_rate": 5.164210526315791e-06, |
|
"loss": 0.0006, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 11.621315192743765, |
|
"grad_norm": 0.02930096909403801, |
|
"learning_rate": 5.1378947368421065e-06, |
|
"loss": 0.003, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 11.678004535147393, |
|
"grad_norm": 0.011350632645189762, |
|
"learning_rate": 5.1115789473684215e-06, |
|
"loss": 0.0039, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 11.73469387755102, |
|
"grad_norm": 0.010673941113054752, |
|
"learning_rate": 5.085263157894737e-06, |
|
"loss": 0.0004, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 11.791383219954648, |
|
"grad_norm": 0.05096409469842911, |
|
"learning_rate": 5.058947368421053e-06, |
|
"loss": 0.0048, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 11.848072562358277, |
|
"grad_norm": 0.011835623532533646, |
|
"learning_rate": 5.032631578947369e-06, |
|
"loss": 0.0093, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 11.904761904761905, |
|
"grad_norm": 0.013359226286411285, |
|
"learning_rate": 5.006315789473685e-06, |
|
"loss": 0.0062, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 11.961451247165533, |
|
"grad_norm": 0.03088083118200302, |
|
"learning_rate": 4.980000000000001e-06, |
|
"loss": 0.0002, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 12.01814058956916, |
|
"grad_norm": 0.02855735644698143, |
|
"learning_rate": 4.953684210526316e-06, |
|
"loss": 0.0002, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 12.07482993197279, |
|
"grad_norm": 0.04174978658556938, |
|
"learning_rate": 4.927368421052631e-06, |
|
"loss": 0.0002, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 12.131519274376418, |
|
"grad_norm": 0.013824643567204475, |
|
"learning_rate": 4.901052631578947e-06, |
|
"loss": 0.0002, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 12.188208616780045, |
|
"grad_norm": 0.010323552414774895, |
|
"learning_rate": 4.874736842105264e-06, |
|
"loss": 0.0002, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 12.244897959183673, |
|
"grad_norm": 7.9211883544921875, |
|
"learning_rate": 4.84842105263158e-06, |
|
"loss": 0.0041, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 12.301587301587302, |
|
"grad_norm": 0.011090376414358616, |
|
"learning_rate": 4.8221052631578955e-06, |
|
"loss": 0.0046, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 12.35827664399093, |
|
"grad_norm": 0.993200957775116, |
|
"learning_rate": 4.7957894736842105e-06, |
|
"loss": 0.0077, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 12.414965986394558, |
|
"grad_norm": 0.5387348532676697, |
|
"learning_rate": 4.769473684210526e-06, |
|
"loss": 0.0014, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 12.471655328798185, |
|
"grad_norm": 0.030508503317832947, |
|
"learning_rate": 4.743157894736842e-06, |
|
"loss": 0.0029, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 12.528344671201815, |
|
"grad_norm": 0.015367632731795311, |
|
"learning_rate": 4.716842105263159e-06, |
|
"loss": 0.002, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 12.585034013605442, |
|
"grad_norm": 0.01937568373978138, |
|
"learning_rate": 4.690526315789475e-06, |
|
"loss": 0.0016, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 12.64172335600907, |
|
"grad_norm": 0.06024911627173424, |
|
"learning_rate": 4.6642105263157896e-06, |
|
"loss": 0.0016, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 12.698412698412698, |
|
"grad_norm": 0.005301471799612045, |
|
"learning_rate": 4.637894736842105e-06, |
|
"loss": 0.0014, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 12.755102040816327, |
|
"grad_norm": 0.03389279916882515, |
|
"learning_rate": 4.611578947368421e-06, |
|
"loss": 0.0017, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 12.811791383219955, |
|
"grad_norm": 0.13554659485816956, |
|
"learning_rate": 4.585263157894737e-06, |
|
"loss": 0.0105, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 12.868480725623582, |
|
"grad_norm": 0.018741684034466743, |
|
"learning_rate": 4.558947368421053e-06, |
|
"loss": 0.004, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 12.92517006802721, |
|
"grad_norm": 0.1560622751712799, |
|
"learning_rate": 4.532631578947369e-06, |
|
"loss": 0.0005, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 12.98185941043084, |
|
"grad_norm": 0.1737220734357834, |
|
"learning_rate": 4.5063157894736845e-06, |
|
"loss": 0.0019, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 13.038548752834467, |
|
"grad_norm": 0.02937311679124832, |
|
"learning_rate": 4.48e-06, |
|
"loss": 0.002, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 13.095238095238095, |
|
"grad_norm": 0.006793774198740721, |
|
"learning_rate": 4.453684210526316e-06, |
|
"loss": 0.0005, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 13.151927437641723, |
|
"grad_norm": 0.014649259857833385, |
|
"learning_rate": 4.427368421052632e-06, |
|
"loss": 0.0025, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 13.208616780045352, |
|
"grad_norm": 0.0212300606071949, |
|
"learning_rate": 4.401052631578948e-06, |
|
"loss": 0.0014, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 13.26530612244898, |
|
"grad_norm": 0.013082647696137428, |
|
"learning_rate": 4.374736842105264e-06, |
|
"loss": 0.0039, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 13.321995464852607, |
|
"grad_norm": 0.05612126737833023, |
|
"learning_rate": 4.348421052631579e-06, |
|
"loss": 0.0007, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 13.378684807256235, |
|
"grad_norm": 0.37419337034225464, |
|
"learning_rate": 4.322105263157895e-06, |
|
"loss": 0.0012, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 13.435374149659864, |
|
"grad_norm": 0.03296487405896187, |
|
"learning_rate": 4.295789473684211e-06, |
|
"loss": 0.0043, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 13.492063492063492, |
|
"grad_norm": 0.009840169921517372, |
|
"learning_rate": 4.269473684210527e-06, |
|
"loss": 0.0039, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 13.54875283446712, |
|
"grad_norm": 0.015135574154555798, |
|
"learning_rate": 4.243157894736843e-06, |
|
"loss": 0.0014, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 13.60544217687075, |
|
"grad_norm": 0.022306112572550774, |
|
"learning_rate": 4.2168421052631585e-06, |
|
"loss": 0.0001, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.60544217687075, |
|
"eval_loss": 0.22002862393856049, |
|
"eval_runtime": 147.8825, |
|
"eval_samples_per_second": 3.165, |
|
"eval_steps_per_second": 0.527, |
|
"eval_wer": 6.209935897435898, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.662131519274377, |
|
"grad_norm": 0.0066378237679600716, |
|
"learning_rate": 4.1905263157894735e-06, |
|
"loss": 0.0007, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 13.718820861678005, |
|
"grad_norm": 0.010166754946112633, |
|
"learning_rate": 4.16421052631579e-06, |
|
"loss": 0.0008, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 13.775510204081632, |
|
"grad_norm": 0.0136796273291111, |
|
"learning_rate": 4.137894736842106e-06, |
|
"loss": 0.0009, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 13.83219954648526, |
|
"grad_norm": 0.017127549275755882, |
|
"learning_rate": 4.111578947368422e-06, |
|
"loss": 0.0015, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 13.88888888888889, |
|
"grad_norm": 0.024442024528980255, |
|
"learning_rate": 4.085263157894737e-06, |
|
"loss": 0.0032, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 13.945578231292517, |
|
"grad_norm": 0.17017020285129547, |
|
"learning_rate": 4.0589473684210526e-06, |
|
"loss": 0.0015, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 14.002267573696145, |
|
"grad_norm": 0.00828185211867094, |
|
"learning_rate": 4.032631578947368e-06, |
|
"loss": 0.0026, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 14.058956916099774, |
|
"grad_norm": 0.007860764861106873, |
|
"learning_rate": 4.006315789473684e-06, |
|
"loss": 0.0034, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 14.115646258503402, |
|
"grad_norm": 0.011614521034061909, |
|
"learning_rate": 3.980000000000001e-06, |
|
"loss": 0.0044, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 14.17233560090703, |
|
"grad_norm": 0.018276942893862724, |
|
"learning_rate": 3.953684210526316e-06, |
|
"loss": 0.0004, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 14.229024943310657, |
|
"grad_norm": 0.012453128583729267, |
|
"learning_rate": 3.927368421052632e-06, |
|
"loss": 0.0002, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 0.015684612095355988, |
|
"learning_rate": 3.9010526315789475e-06, |
|
"loss": 0.0003, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 14.342403628117914, |
|
"grad_norm": 0.006171511020511389, |
|
"learning_rate": 3.874736842105263e-06, |
|
"loss": 0.0001, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 14.399092970521542, |
|
"grad_norm": 0.006511132698506117, |
|
"learning_rate": 3.848421052631579e-06, |
|
"loss": 0.0004, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 14.45578231292517, |
|
"grad_norm": 0.050404928624629974, |
|
"learning_rate": 3.822105263157895e-06, |
|
"loss": 0.0002, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 14.512471655328799, |
|
"grad_norm": 10.834954261779785, |
|
"learning_rate": 3.795789473684211e-06, |
|
"loss": 0.0012, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 14.569160997732427, |
|
"grad_norm": 0.13081848621368408, |
|
"learning_rate": 3.7694736842105266e-06, |
|
"loss": 0.0016, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 14.625850340136054, |
|
"grad_norm": 0.011676596477627754, |
|
"learning_rate": 3.7431578947368424e-06, |
|
"loss": 0.0044, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 14.682539682539682, |
|
"grad_norm": 0.014040385372936726, |
|
"learning_rate": 3.716842105263158e-06, |
|
"loss": 0.0001, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 14.739229024943311, |
|
"grad_norm": 0.011808693408966064, |
|
"learning_rate": 3.690526315789474e-06, |
|
"loss": 0.0001, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 14.795918367346939, |
|
"grad_norm": 0.012787124142050743, |
|
"learning_rate": 3.6642105263157894e-06, |
|
"loss": 0.0045, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 14.852607709750567, |
|
"grad_norm": 0.005284798797219992, |
|
"learning_rate": 3.6378947368421057e-06, |
|
"loss": 0.0001, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 14.909297052154194, |
|
"grad_norm": 0.036842990666627884, |
|
"learning_rate": 3.6115789473684215e-06, |
|
"loss": 0.0004, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 14.965986394557824, |
|
"grad_norm": 0.024015046656131744, |
|
"learning_rate": 3.5852631578947373e-06, |
|
"loss": 0.0003, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 15.022675736961451, |
|
"grad_norm": 0.012714927084743977, |
|
"learning_rate": 3.558947368421053e-06, |
|
"loss": 0.0025, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 15.079365079365079, |
|
"grad_norm": 0.017115842550992966, |
|
"learning_rate": 3.5326315789473685e-06, |
|
"loss": 0.0012, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 15.136054421768707, |
|
"grad_norm": 0.008900342509150505, |
|
"learning_rate": 3.5063157894736843e-06, |
|
"loss": 0.0001, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 15.192743764172336, |
|
"grad_norm": 0.007803457789123058, |
|
"learning_rate": 3.48e-06, |
|
"loss": 0.0004, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 15.249433106575964, |
|
"grad_norm": 0.013770255260169506, |
|
"learning_rate": 3.4536842105263164e-06, |
|
"loss": 0.0011, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 15.306122448979592, |
|
"grad_norm": 0.06877677142620087, |
|
"learning_rate": 3.427368421052632e-06, |
|
"loss": 0.0018, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 15.36281179138322, |
|
"grad_norm": 0.011991630308330059, |
|
"learning_rate": 3.4010526315789476e-06, |
|
"loss": 0.0001, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 15.419501133786849, |
|
"grad_norm": 0.013049086555838585, |
|
"learning_rate": 3.3747368421052634e-06, |
|
"loss": 0.0002, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 15.476190476190476, |
|
"grad_norm": 0.012979848310351372, |
|
"learning_rate": 3.3484210526315792e-06, |
|
"loss": 0.0002, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 15.532879818594104, |
|
"grad_norm": 0.006456771399825811, |
|
"learning_rate": 3.3221052631578946e-06, |
|
"loss": 0.0001, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 15.589569160997732, |
|
"grad_norm": 0.012274966575205326, |
|
"learning_rate": 3.2957894736842104e-06, |
|
"loss": 0.0001, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 15.646258503401361, |
|
"grad_norm": 0.010561280883848667, |
|
"learning_rate": 3.2694736842105267e-06, |
|
"loss": 0.0004, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 15.702947845804989, |
|
"grad_norm": 0.009232975542545319, |
|
"learning_rate": 3.2431578947368425e-06, |
|
"loss": 0.0002, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 15.759637188208616, |
|
"grad_norm": 0.015166404657065868, |
|
"learning_rate": 3.2168421052631583e-06, |
|
"loss": 0.0001, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 15.816326530612244, |
|
"grad_norm": 0.0094530014321208, |
|
"learning_rate": 3.1905263157894737e-06, |
|
"loss": 0.0001, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 15.873015873015873, |
|
"grad_norm": 0.017429711297154427, |
|
"learning_rate": 3.1642105263157895e-06, |
|
"loss": 0.0001, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 15.873015873015873, |
|
"eval_loss": 0.2297230064868927, |
|
"eval_runtime": 188.711, |
|
"eval_samples_per_second": 2.48, |
|
"eval_steps_per_second": 0.413, |
|
"eval_wer": 6.049679487179487, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 15.929705215419501, |
|
"grad_norm": 0.005318532232195139, |
|
"learning_rate": 3.1378947368421054e-06, |
|
"loss": 0.001, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 15.986394557823129, |
|
"grad_norm": 0.011030340567231178, |
|
"learning_rate": 3.111578947368421e-06, |
|
"loss": 0.0001, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 16.04308390022676, |
|
"grad_norm": 0.004018599167466164, |
|
"learning_rate": 3.0852631578947374e-06, |
|
"loss": 0.0001, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 16.099773242630384, |
|
"grad_norm": 0.006292372010648251, |
|
"learning_rate": 3.058947368421053e-06, |
|
"loss": 0.0063, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 16.156462585034014, |
|
"grad_norm": 0.008721155114471912, |
|
"learning_rate": 3.0326315789473686e-06, |
|
"loss": 0.0001, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 16.213151927437643, |
|
"grad_norm": 0.007462701760232449, |
|
"learning_rate": 3.0063157894736844e-06, |
|
"loss": 0.0013, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 16.26984126984127, |
|
"grad_norm": 0.0060442672111094, |
|
"learning_rate": 2.9800000000000003e-06, |
|
"loss": 0.0002, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 16.3265306122449, |
|
"grad_norm": 0.007179939653724432, |
|
"learning_rate": 2.9536842105263157e-06, |
|
"loss": 0.0001, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 16.383219954648528, |
|
"grad_norm": 0.006970668211579323, |
|
"learning_rate": 2.927368421052632e-06, |
|
"loss": 0.0025, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 16.439909297052154, |
|
"grad_norm": 0.0061109112575650215, |
|
"learning_rate": 2.9010526315789477e-06, |
|
"loss": 0.0003, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 16.496598639455783, |
|
"grad_norm": 0.02214565873146057, |
|
"learning_rate": 2.8747368421052635e-06, |
|
"loss": 0.0001, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 16.55328798185941, |
|
"grad_norm": 0.009677527472376823, |
|
"learning_rate": 2.8484210526315794e-06, |
|
"loss": 0.0001, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 16.60997732426304, |
|
"grad_norm": 3.2391059398651123, |
|
"learning_rate": 2.8221052631578948e-06, |
|
"loss": 0.0013, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 0.005941161885857582, |
|
"learning_rate": 2.7957894736842106e-06, |
|
"loss": 0.0001, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 16.723356009070294, |
|
"grad_norm": 0.005041074473410845, |
|
"learning_rate": 2.7694736842105264e-06, |
|
"loss": 0.0001, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 16.780045351473923, |
|
"grad_norm": 0.010110282339155674, |
|
"learning_rate": 2.7431578947368426e-06, |
|
"loss": 0.0001, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 16.836734693877553, |
|
"grad_norm": 0.0052981507033109665, |
|
"learning_rate": 2.7168421052631585e-06, |
|
"loss": 0.0001, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 16.89342403628118, |
|
"grad_norm": 0.007062564603984356, |
|
"learning_rate": 2.690526315789474e-06, |
|
"loss": 0.0004, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 16.950113378684808, |
|
"grad_norm": 0.005766382906585932, |
|
"learning_rate": 2.6642105263157897e-06, |
|
"loss": 0.0001, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 17.006802721088434, |
|
"grad_norm": 8.800177574157715, |
|
"learning_rate": 2.6378947368421055e-06, |
|
"loss": 0.0022, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 17.063492063492063, |
|
"grad_norm": 0.003864010563120246, |
|
"learning_rate": 2.6115789473684213e-06, |
|
"loss": 0.0001, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 17.120181405895693, |
|
"grad_norm": 0.003043045522645116, |
|
"learning_rate": 2.5852631578947367e-06, |
|
"loss": 0.0001, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 17.17687074829932, |
|
"grad_norm": 0.0026129058096557856, |
|
"learning_rate": 2.558947368421053e-06, |
|
"loss": 0.0001, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 17.233560090702948, |
|
"grad_norm": 0.003994261380285025, |
|
"learning_rate": 2.5326315789473688e-06, |
|
"loss": 0.0001, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 17.290249433106577, |
|
"grad_norm": 0.006509356200695038, |
|
"learning_rate": 2.5063157894736846e-06, |
|
"loss": 0.0001, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 17.346938775510203, |
|
"grad_norm": 0.006231856532394886, |
|
"learning_rate": 2.4800000000000004e-06, |
|
"loss": 0.0001, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 17.403628117913833, |
|
"grad_norm": 0.00826491229236126, |
|
"learning_rate": 2.453684210526316e-06, |
|
"loss": 0.0027, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 17.46031746031746, |
|
"grad_norm": 0.007504597306251526, |
|
"learning_rate": 2.427368421052632e-06, |
|
"loss": 0.0001, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 17.517006802721088, |
|
"grad_norm": 0.0033706706017255783, |
|
"learning_rate": 2.4010526315789474e-06, |
|
"loss": 0.0001, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 17.573696145124718, |
|
"grad_norm": 0.004383792169392109, |
|
"learning_rate": 2.3747368421052632e-06, |
|
"loss": 0.0018, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 17.630385487528343, |
|
"grad_norm": 0.004007370211184025, |
|
"learning_rate": 2.348421052631579e-06, |
|
"loss": 0.0001, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 17.687074829931973, |
|
"grad_norm": 0.006295809056609869, |
|
"learning_rate": 2.322105263157895e-06, |
|
"loss": 0.0001, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 17.743764172335602, |
|
"grad_norm": 0.006831544451415539, |
|
"learning_rate": 2.2957894736842107e-06, |
|
"loss": 0.0003, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 17.800453514739228, |
|
"grad_norm": 0.0033715348690748215, |
|
"learning_rate": 2.2694736842105265e-06, |
|
"loss": 0.0001, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 17.857142857142858, |
|
"grad_norm": 0.006168752908706665, |
|
"learning_rate": 2.2431578947368423e-06, |
|
"loss": 0.0012, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 17.913832199546484, |
|
"grad_norm": 0.006377949379384518, |
|
"learning_rate": 2.216842105263158e-06, |
|
"loss": 0.0052, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 17.970521541950113, |
|
"grad_norm": 1.9607151746749878, |
|
"learning_rate": 2.190526315789474e-06, |
|
"loss": 0.0004, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 18.027210884353742, |
|
"grad_norm": 0.0046304683201014996, |
|
"learning_rate": 2.16421052631579e-06, |
|
"loss": 0.001, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 18.08390022675737, |
|
"grad_norm": 0.008269163779914379, |
|
"learning_rate": 2.1378947368421056e-06, |
|
"loss": 0.0001, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 18.140589569160998, |
|
"grad_norm": 0.0044001140631735325, |
|
"learning_rate": 2.111578947368421e-06, |
|
"loss": 0.0001, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 18.140589569160998, |
|
"eval_loss": 0.2317376732826233, |
|
"eval_runtime": 158.0944, |
|
"eval_samples_per_second": 2.96, |
|
"eval_steps_per_second": 0.493, |
|
"eval_wer": 6.076388888888888, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 18.197278911564627, |
|
"grad_norm": 0.006642814259976149, |
|
"learning_rate": 2.085263157894737e-06, |
|
"loss": 0.0001, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 18.253968253968253, |
|
"grad_norm": 0.00599477905780077, |
|
"learning_rate": 2.058947368421053e-06, |
|
"loss": 0.0001, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 18.310657596371883, |
|
"grad_norm": 0.0045234388671815395, |
|
"learning_rate": 2.0326315789473685e-06, |
|
"loss": 0.0001, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 18.367346938775512, |
|
"grad_norm": 0.002616587560623884, |
|
"learning_rate": 2.0063157894736843e-06, |
|
"loss": 0.0001, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 18.424036281179138, |
|
"grad_norm": 0.0050145648419857025, |
|
"learning_rate": 1.98e-06, |
|
"loss": 0.0001, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 18.480725623582767, |
|
"grad_norm": 0.0029045080300420523, |
|
"learning_rate": 1.953684210526316e-06, |
|
"loss": 0.0002, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 18.537414965986393, |
|
"grad_norm": 0.0041219014674425125, |
|
"learning_rate": 1.9273684210526317e-06, |
|
"loss": 0.0009, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 18.594104308390023, |
|
"grad_norm": 0.006918082479387522, |
|
"learning_rate": 1.9010526315789476e-06, |
|
"loss": 0.0001, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 18.650793650793652, |
|
"grad_norm": 0.0045529440976679325, |
|
"learning_rate": 1.8747368421052634e-06, |
|
"loss": 0.0005, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 18.707482993197278, |
|
"grad_norm": 0.004559030756354332, |
|
"learning_rate": 1.848421052631579e-06, |
|
"loss": 0.0001, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 18.764172335600907, |
|
"grad_norm": 0.003188680624589324, |
|
"learning_rate": 1.8221052631578948e-06, |
|
"loss": 0.0001, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 18.820861678004537, |
|
"grad_norm": 0.0051582190208137035, |
|
"learning_rate": 1.7957894736842108e-06, |
|
"loss": 0.0001, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 18.877551020408163, |
|
"grad_norm": 0.003925441298633814, |
|
"learning_rate": 1.7694736842105264e-06, |
|
"loss": 0.0001, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 18.934240362811792, |
|
"grad_norm": 0.005711190402507782, |
|
"learning_rate": 1.7431578947368423e-06, |
|
"loss": 0.0, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 18.990929705215418, |
|
"grad_norm": 0.006396492477506399, |
|
"learning_rate": 1.716842105263158e-06, |
|
"loss": 0.0002, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 19.047619047619047, |
|
"grad_norm": 0.0038605357985943556, |
|
"learning_rate": 1.6905263157894739e-06, |
|
"loss": 0.0001, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 19.104308390022677, |
|
"grad_norm": 0.003054672619327903, |
|
"learning_rate": 1.6642105263157895e-06, |
|
"loss": 0.0, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 19.160997732426303, |
|
"grad_norm": 0.0045293658040463924, |
|
"learning_rate": 1.6378947368421053e-06, |
|
"loss": 0.0, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 19.217687074829932, |
|
"grad_norm": 0.005090142600238323, |
|
"learning_rate": 1.6115789473684211e-06, |
|
"loss": 0.0001, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 19.27437641723356, |
|
"grad_norm": 0.003649334190413356, |
|
"learning_rate": 1.585263157894737e-06, |
|
"loss": 0.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 19.331065759637188, |
|
"grad_norm": 0.014431001618504524, |
|
"learning_rate": 1.5589473684210526e-06, |
|
"loss": 0.0001, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 19.387755102040817, |
|
"grad_norm": 0.005074130836874247, |
|
"learning_rate": 1.5326315789473686e-06, |
|
"loss": 0.0, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 19.444444444444443, |
|
"grad_norm": 0.0036257512401789427, |
|
"learning_rate": 1.5063157894736844e-06, |
|
"loss": 0.0001, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 19.501133786848072, |
|
"grad_norm": 0.0060266111977398396, |
|
"learning_rate": 1.48e-06, |
|
"loss": 0.0001, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 19.5578231292517, |
|
"grad_norm": 0.004775646608322859, |
|
"learning_rate": 1.453684210526316e-06, |
|
"loss": 0.0, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 19.614512471655328, |
|
"grad_norm": 0.006195446942001581, |
|
"learning_rate": 1.4273684210526317e-06, |
|
"loss": 0.0003, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 19.671201814058957, |
|
"grad_norm": 0.0044461763463914394, |
|
"learning_rate": 1.4010526315789475e-06, |
|
"loss": 0.0001, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 19.727891156462587, |
|
"grad_norm": 0.0022899750620126724, |
|
"learning_rate": 1.374736842105263e-06, |
|
"loss": 0.0, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 19.784580498866212, |
|
"grad_norm": 0.005077675450593233, |
|
"learning_rate": 1.3484210526315791e-06, |
|
"loss": 0.0001, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 19.841269841269842, |
|
"grad_norm": 0.005482817534357309, |
|
"learning_rate": 1.322105263157895e-06, |
|
"loss": 0.0, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 19.897959183673468, |
|
"grad_norm": 0.00202305824495852, |
|
"learning_rate": 1.2957894736842105e-06, |
|
"loss": 0.0001, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 19.954648526077097, |
|
"grad_norm": 0.005788388196378946, |
|
"learning_rate": 1.2694736842105266e-06, |
|
"loss": 0.0001, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 20.011337868480727, |
|
"grad_norm": 0.004344166722148657, |
|
"learning_rate": 1.2431578947368422e-06, |
|
"loss": 0.0014, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 20.068027210884352, |
|
"grad_norm": 0.004923074971884489, |
|
"learning_rate": 1.216842105263158e-06, |
|
"loss": 0.0, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 20.124716553287982, |
|
"grad_norm": 0.0024910017382353544, |
|
"learning_rate": 1.1905263157894738e-06, |
|
"loss": 0.0, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 20.18140589569161, |
|
"grad_norm": 0.0026297103613615036, |
|
"learning_rate": 1.1642105263157896e-06, |
|
"loss": 0.0, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 20.238095238095237, |
|
"grad_norm": 0.005076109431684017, |
|
"learning_rate": 1.1378947368421052e-06, |
|
"loss": 0.0001, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 20.294784580498867, |
|
"grad_norm": 0.003189537674188614, |
|
"learning_rate": 1.1115789473684213e-06, |
|
"loss": 0.0, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 20.351473922902493, |
|
"grad_norm": 0.0030386645812541246, |
|
"learning_rate": 1.0852631578947369e-06, |
|
"loss": 0.0, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 20.408163265306122, |
|
"grad_norm": 0.001944896299391985, |
|
"learning_rate": 1.0589473684210527e-06, |
|
"loss": 0.0001, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 20.408163265306122, |
|
"eval_loss": 0.23750941455364227, |
|
"eval_runtime": 160.3655, |
|
"eval_samples_per_second": 2.918, |
|
"eval_steps_per_second": 0.486, |
|
"eval_wer": 6.396901709401709, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 20.46485260770975, |
|
"grad_norm": 0.0040268674492836, |
|
"learning_rate": 1.0326315789473685e-06, |
|
"loss": 0.0, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 20.521541950113377, |
|
"grad_norm": 2.8923232555389404, |
|
"learning_rate": 1.0063157894736843e-06, |
|
"loss": 0.0002, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 20.578231292517007, |
|
"grad_norm": 0.002543982584029436, |
|
"learning_rate": 9.800000000000001e-07, |
|
"loss": 0.0, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 20.634920634920636, |
|
"grad_norm": 0.002120724180713296, |
|
"learning_rate": 9.536842105263158e-07, |
|
"loss": 0.0002, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 20.691609977324262, |
|
"grad_norm": 0.0036805281415581703, |
|
"learning_rate": 9.273684210526317e-07, |
|
"loss": 0.0, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 20.74829931972789, |
|
"grad_norm": 0.002348339883610606, |
|
"learning_rate": 9.010526315789474e-07, |
|
"loss": 0.0001, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 20.80498866213152, |
|
"grad_norm": 0.0036750957369804382, |
|
"learning_rate": 8.747368421052632e-07, |
|
"loss": 0.0, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 20.861678004535147, |
|
"grad_norm": 0.004974485840648413, |
|
"learning_rate": 8.48421052631579e-07, |
|
"loss": 0.0, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 20.918367346938776, |
|
"grad_norm": 0.004362870939075947, |
|
"learning_rate": 8.221052631578947e-07, |
|
"loss": 0.0001, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 20.975056689342402, |
|
"grad_norm": 0.002504055853933096, |
|
"learning_rate": 7.957894736842107e-07, |
|
"loss": 0.0001, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 21.03174603174603, |
|
"grad_norm": 0.004000427667051554, |
|
"learning_rate": 7.694736842105263e-07, |
|
"loss": 0.0001, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 21.08843537414966, |
|
"grad_norm": 0.003886349266394973, |
|
"learning_rate": 7.431578947368422e-07, |
|
"loss": 0.0, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 21.145124716553287, |
|
"grad_norm": 0.003481630701571703, |
|
"learning_rate": 7.16842105263158e-07, |
|
"loss": 0.0, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 21.201814058956916, |
|
"grad_norm": 0.01767110824584961, |
|
"learning_rate": 6.905263157894737e-07, |
|
"loss": 0.0, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 21.258503401360546, |
|
"grad_norm": 0.005323050078004599, |
|
"learning_rate": 6.642105263157895e-07, |
|
"loss": 0.0, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 21.31519274376417, |
|
"grad_norm": 0.05196991562843323, |
|
"learning_rate": 6.378947368421053e-07, |
|
"loss": 0.0001, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 21.3718820861678, |
|
"grad_norm": 0.003023393452167511, |
|
"learning_rate": 6.115789473684211e-07, |
|
"loss": 0.0, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 21.428571428571427, |
|
"grad_norm": 0.0037847934290766716, |
|
"learning_rate": 5.852631578947369e-07, |
|
"loss": 0.0, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 21.485260770975056, |
|
"grad_norm": 0.0039050974883139133, |
|
"learning_rate": 5.589473684210526e-07, |
|
"loss": 0.0001, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 21.541950113378686, |
|
"grad_norm": 0.0036291517317295074, |
|
"learning_rate": 5.326315789473684e-07, |
|
"loss": 0.0001, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 21.598639455782312, |
|
"grad_norm": 0.003725625341758132, |
|
"learning_rate": 5.063157894736842e-07, |
|
"loss": 0.0001, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 21.65532879818594, |
|
"grad_norm": 0.0034233913756906986, |
|
"learning_rate": 4.800000000000001e-07, |
|
"loss": 0.0, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 21.71201814058957, |
|
"grad_norm": 0.005571336485445499, |
|
"learning_rate": 4.5368421052631583e-07, |
|
"loss": 0.0, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 21.768707482993197, |
|
"grad_norm": 0.0034070161636918783, |
|
"learning_rate": 4.273684210526316e-07, |
|
"loss": 0.0001, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 21.825396825396826, |
|
"grad_norm": 0.0027184481732547283, |
|
"learning_rate": 4.0105263157894736e-07, |
|
"loss": 0.0, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 21.882086167800452, |
|
"grad_norm": 0.001929171965457499, |
|
"learning_rate": 3.7473684210526323e-07, |
|
"loss": 0.0, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 21.93877551020408, |
|
"grad_norm": 0.003997990861535072, |
|
"learning_rate": 3.48421052631579e-07, |
|
"loss": 0.0, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 21.99546485260771, |
|
"grad_norm": 0.0038647083565592766, |
|
"learning_rate": 3.2210526315789476e-07, |
|
"loss": 0.0, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 22.052154195011337, |
|
"grad_norm": 0.00401474442332983, |
|
"learning_rate": 2.9578947368421053e-07, |
|
"loss": 0.0, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 22.108843537414966, |
|
"grad_norm": 0.0030088857747614384, |
|
"learning_rate": 2.6947368421052635e-07, |
|
"loss": 0.0001, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 22.165532879818596, |
|
"grad_norm": 0.003003130666911602, |
|
"learning_rate": 2.431578947368421e-07, |
|
"loss": 0.0, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 22.22222222222222, |
|
"grad_norm": 0.004516700282692909, |
|
"learning_rate": 2.168421052631579e-07, |
|
"loss": 0.0, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 22.27891156462585, |
|
"grad_norm": 0.002650737063959241, |
|
"learning_rate": 1.9052631578947372e-07, |
|
"loss": 0.0, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 22.335600907029477, |
|
"grad_norm": 0.00888384971767664, |
|
"learning_rate": 1.642105263157895e-07, |
|
"loss": 0.0, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 22.392290249433106, |
|
"grad_norm": 0.0020597188267856836, |
|
"learning_rate": 1.3789473684210528e-07, |
|
"loss": 0.0, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 22.448979591836736, |
|
"grad_norm": 0.004189135041087866, |
|
"learning_rate": 1.1157894736842106e-07, |
|
"loss": 0.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 22.50566893424036, |
|
"grad_norm": 0.003128908108919859, |
|
"learning_rate": 8.526315789473685e-08, |
|
"loss": 0.0, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 22.56235827664399, |
|
"grad_norm": 0.0031592377927154303, |
|
"learning_rate": 5.8947368421052637e-08, |
|
"loss": 0.0, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 22.61904761904762, |
|
"grad_norm": 0.004137367941439152, |
|
"learning_rate": 3.263157894736842e-08, |
|
"loss": 0.0, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 22.675736961451246, |
|
"grad_norm": 0.0022842560429126024, |
|
"learning_rate": 6.315789473684211e-09, |
|
"loss": 0.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 22.675736961451246, |
|
"eval_loss": 0.23789168894290924, |
|
"eval_runtime": 182.0655, |
|
"eval_samples_per_second": 2.571, |
|
"eval_steps_per_second": 0.428, |
|
"eval_wer": 6.490384615384616, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 22.675736961451246, |
|
"step": 10000, |
|
"total_flos": 1.73151240192e+19, |
|
"train_loss": 0.06552563527043676, |
|
"train_runtime": 7527.9049, |
|
"train_samples_per_second": 7.97, |
|
"train_steps_per_second": 1.328 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 23, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.73151240192e+19, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|