whisper-small-taiwanese-asr-v2 / trainer_state.json
gacky1601's picture
End of training
54d4fef verified
{
"best_metric": 6.049679487179487,
"best_model_checkpoint": "./exp/whisper-small-taiwanese-asr-v2/checkpoint-7000",
"epoch": 22.675736961451246,
"eval_steps": 1000,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05668934240362812,
"grad_norm": 39.12731170654297,
"learning_rate": 4.2000000000000006e-07,
"loss": 3.459,
"step": 25
},
{
"epoch": 0.11337868480725624,
"grad_norm": 27.20041847229004,
"learning_rate": 9.200000000000001e-07,
"loss": 3.0795,
"step": 50
},
{
"epoch": 0.17006802721088435,
"grad_norm": 23.331146240234375,
"learning_rate": 1.42e-06,
"loss": 2.4576,
"step": 75
},
{
"epoch": 0.22675736961451248,
"grad_norm": 18.951128005981445,
"learning_rate": 1.9200000000000003e-06,
"loss": 1.8809,
"step": 100
},
{
"epoch": 0.2834467120181406,
"grad_norm": 18.83504295349121,
"learning_rate": 2.42e-06,
"loss": 1.3831,
"step": 125
},
{
"epoch": 0.3401360544217687,
"grad_norm": 16.38509750366211,
"learning_rate": 2.92e-06,
"loss": 1.049,
"step": 150
},
{
"epoch": 0.3968253968253968,
"grad_norm": 17.442344665527344,
"learning_rate": 3.4200000000000007e-06,
"loss": 0.8571,
"step": 175
},
{
"epoch": 0.45351473922902497,
"grad_norm": 15.140103340148926,
"learning_rate": 3.920000000000001e-06,
"loss": 0.6812,
"step": 200
},
{
"epoch": 0.5102040816326531,
"grad_norm": 16.342971801757812,
"learning_rate": 4.42e-06,
"loss": 0.5756,
"step": 225
},
{
"epoch": 0.5668934240362812,
"grad_norm": 18.02117347717285,
"learning_rate": 4.92e-06,
"loss": 0.5135,
"step": 250
},
{
"epoch": 0.6235827664399093,
"grad_norm": 19.521883010864258,
"learning_rate": 5.420000000000001e-06,
"loss": 0.4338,
"step": 275
},
{
"epoch": 0.6802721088435374,
"grad_norm": 14.71985149383545,
"learning_rate": 5.92e-06,
"loss": 0.3755,
"step": 300
},
{
"epoch": 0.7369614512471655,
"grad_norm": 6.9398722648620605,
"learning_rate": 6.42e-06,
"loss": 0.3662,
"step": 325
},
{
"epoch": 0.7936507936507936,
"grad_norm": 8.429058074951172,
"learning_rate": 6.92e-06,
"loss": 0.3196,
"step": 350
},
{
"epoch": 0.8503401360544217,
"grad_norm": 7.899880409240723,
"learning_rate": 7.420000000000001e-06,
"loss": 0.3186,
"step": 375
},
{
"epoch": 0.9070294784580499,
"grad_norm": 9.854070663452148,
"learning_rate": 7.92e-06,
"loss": 0.2849,
"step": 400
},
{
"epoch": 0.963718820861678,
"grad_norm": 9.135157585144043,
"learning_rate": 8.42e-06,
"loss": 0.3387,
"step": 425
},
{
"epoch": 1.0204081632653061,
"grad_norm": 7.550724983215332,
"learning_rate": 8.920000000000001e-06,
"loss": 0.2661,
"step": 450
},
{
"epoch": 1.0770975056689343,
"grad_norm": 6.407596111297607,
"learning_rate": 9.42e-06,
"loss": 0.1618,
"step": 475
},
{
"epoch": 1.1337868480725624,
"grad_norm": 8.470088958740234,
"learning_rate": 9.920000000000002e-06,
"loss": 0.2531,
"step": 500
},
{
"epoch": 1.1904761904761905,
"grad_norm": 14.254158020019531,
"learning_rate": 9.977894736842106e-06,
"loss": 0.2415,
"step": 525
},
{
"epoch": 1.2471655328798186,
"grad_norm": 5.5717339515686035,
"learning_rate": 9.951578947368423e-06,
"loss": 0.1863,
"step": 550
},
{
"epoch": 1.3038548752834467,
"grad_norm": 9.035225868225098,
"learning_rate": 9.925263157894738e-06,
"loss": 0.1772,
"step": 575
},
{
"epoch": 1.3605442176870748,
"grad_norm": 12.706698417663574,
"learning_rate": 9.898947368421054e-06,
"loss": 0.1589,
"step": 600
},
{
"epoch": 1.417233560090703,
"grad_norm": 8.393030166625977,
"learning_rate": 9.87263157894737e-06,
"loss": 0.172,
"step": 625
},
{
"epoch": 1.473922902494331,
"grad_norm": 5.842218399047852,
"learning_rate": 9.846315789473684e-06,
"loss": 0.1656,
"step": 650
},
{
"epoch": 1.5306122448979593,
"grad_norm": 9.525617599487305,
"learning_rate": 9.820000000000001e-06,
"loss": 0.186,
"step": 675
},
{
"epoch": 1.5873015873015874,
"grad_norm": 19.124101638793945,
"learning_rate": 9.793684210526316e-06,
"loss": 0.1943,
"step": 700
},
{
"epoch": 1.6439909297052155,
"grad_norm": 9.63739013671875,
"learning_rate": 9.767368421052632e-06,
"loss": 0.1514,
"step": 725
},
{
"epoch": 1.7006802721088436,
"grad_norm": 14.47154712677002,
"learning_rate": 9.741052631578947e-06,
"loss": 0.142,
"step": 750
},
{
"epoch": 1.7573696145124718,
"grad_norm": 6.553714275360107,
"learning_rate": 9.714736842105264e-06,
"loss": 0.1563,
"step": 775
},
{
"epoch": 1.8140589569160999,
"grad_norm": 6.272864818572998,
"learning_rate": 9.68842105263158e-06,
"loss": 0.1454,
"step": 800
},
{
"epoch": 1.870748299319728,
"grad_norm": 6.724349021911621,
"learning_rate": 9.662105263157896e-06,
"loss": 0.3289,
"step": 825
},
{
"epoch": 1.927437641723356,
"grad_norm": 6.75054931640625,
"learning_rate": 9.635789473684212e-06,
"loss": 0.1455,
"step": 850
},
{
"epoch": 1.9841269841269842,
"grad_norm": 7.212646961212158,
"learning_rate": 9.609473684210527e-06,
"loss": 0.2098,
"step": 875
},
{
"epoch": 2.0408163265306123,
"grad_norm": 4.189349174499512,
"learning_rate": 9.583157894736842e-06,
"loss": 0.1016,
"step": 900
},
{
"epoch": 2.0975056689342404,
"grad_norm": 9.265031814575195,
"learning_rate": 9.556842105263159e-06,
"loss": 0.1116,
"step": 925
},
{
"epoch": 2.1541950113378685,
"grad_norm": 4.884438991546631,
"learning_rate": 9.530526315789474e-06,
"loss": 0.1209,
"step": 950
},
{
"epoch": 2.2108843537414966,
"grad_norm": 4.258934497833252,
"learning_rate": 9.50421052631579e-06,
"loss": 0.1331,
"step": 975
},
{
"epoch": 2.2675736961451247,
"grad_norm": 5.421435832977295,
"learning_rate": 9.477894736842106e-06,
"loss": 0.083,
"step": 1000
},
{
"epoch": 2.2675736961451247,
"eval_loss": 0.1953069269657135,
"eval_runtime": 158.2725,
"eval_samples_per_second": 2.957,
"eval_steps_per_second": 0.493,
"eval_wer": 8.173076923076923,
"step": 1000
},
{
"epoch": 2.324263038548753,
"grad_norm": 15.091805458068848,
"learning_rate": 9.451578947368422e-06,
"loss": 0.0908,
"step": 1025
},
{
"epoch": 2.380952380952381,
"grad_norm": 9.11117172241211,
"learning_rate": 9.425263157894737e-06,
"loss": 0.1528,
"step": 1050
},
{
"epoch": 2.437641723356009,
"grad_norm": 8.174238204956055,
"learning_rate": 9.398947368421052e-06,
"loss": 0.059,
"step": 1075
},
{
"epoch": 2.494331065759637,
"grad_norm": 12.375757217407227,
"learning_rate": 9.372631578947369e-06,
"loss": 0.0773,
"step": 1100
},
{
"epoch": 2.5510204081632653,
"grad_norm": 8.518278121948242,
"learning_rate": 9.346315789473684e-06,
"loss": 0.0714,
"step": 1125
},
{
"epoch": 2.6077097505668934,
"grad_norm": 7.095608234405518,
"learning_rate": 9.32e-06,
"loss": 0.0778,
"step": 1150
},
{
"epoch": 2.6643990929705215,
"grad_norm": 5.508459568023682,
"learning_rate": 9.293684210526317e-06,
"loss": 0.0696,
"step": 1175
},
{
"epoch": 2.7210884353741496,
"grad_norm": 1.7915226221084595,
"learning_rate": 9.267368421052632e-06,
"loss": 0.0504,
"step": 1200
},
{
"epoch": 2.7777777777777777,
"grad_norm": 4.419779300689697,
"learning_rate": 9.241052631578949e-06,
"loss": 0.0667,
"step": 1225
},
{
"epoch": 2.834467120181406,
"grad_norm": 4.396119594573975,
"learning_rate": 9.214736842105264e-06,
"loss": 0.1037,
"step": 1250
},
{
"epoch": 2.891156462585034,
"grad_norm": 4.604401111602783,
"learning_rate": 9.18842105263158e-06,
"loss": 0.1276,
"step": 1275
},
{
"epoch": 2.947845804988662,
"grad_norm": 6.504410743713379,
"learning_rate": 9.162105263157895e-06,
"loss": 0.0683,
"step": 1300
},
{
"epoch": 3.00453514739229,
"grad_norm": 3.4603614807128906,
"learning_rate": 9.13578947368421e-06,
"loss": 0.0548,
"step": 1325
},
{
"epoch": 3.061224489795918,
"grad_norm": 8.05783748626709,
"learning_rate": 9.109473684210527e-06,
"loss": 0.0423,
"step": 1350
},
{
"epoch": 3.1179138321995463,
"grad_norm": 1.4570063352584839,
"learning_rate": 9.083157894736842e-06,
"loss": 0.0388,
"step": 1375
},
{
"epoch": 3.1746031746031744,
"grad_norm": 2.493945360183716,
"learning_rate": 9.056842105263159e-06,
"loss": 0.0704,
"step": 1400
},
{
"epoch": 3.2312925170068025,
"grad_norm": 3.1329710483551025,
"learning_rate": 9.030526315789474e-06,
"loss": 0.0418,
"step": 1425
},
{
"epoch": 3.287981859410431,
"grad_norm": 0.9819092154502869,
"learning_rate": 9.00421052631579e-06,
"loss": 0.0572,
"step": 1450
},
{
"epoch": 3.3446712018140587,
"grad_norm": 3.2691588401794434,
"learning_rate": 8.977894736842107e-06,
"loss": 0.0391,
"step": 1475
},
{
"epoch": 3.4013605442176873,
"grad_norm": 0.9585368037223816,
"learning_rate": 8.951578947368422e-06,
"loss": 0.0468,
"step": 1500
},
{
"epoch": 3.458049886621315,
"grad_norm": 5.486790657043457,
"learning_rate": 8.925263157894739e-06,
"loss": 0.0423,
"step": 1525
},
{
"epoch": 3.5147392290249435,
"grad_norm": 7.715363502502441,
"learning_rate": 8.898947368421054e-06,
"loss": 0.119,
"step": 1550
},
{
"epoch": 3.571428571428571,
"grad_norm": 0.7970800995826721,
"learning_rate": 8.872631578947369e-06,
"loss": 0.0264,
"step": 1575
},
{
"epoch": 3.6281179138321997,
"grad_norm": 9.640973091125488,
"learning_rate": 8.846315789473685e-06,
"loss": 0.0305,
"step": 1600
},
{
"epoch": 3.6848072562358274,
"grad_norm": 5.621988296508789,
"learning_rate": 8.82e-06,
"loss": 0.0329,
"step": 1625
},
{
"epoch": 3.741496598639456,
"grad_norm": 2.357621908187866,
"learning_rate": 8.793684210526317e-06,
"loss": 0.0657,
"step": 1650
},
{
"epoch": 3.798185941043084,
"grad_norm": 3.6409835815429688,
"learning_rate": 8.767368421052632e-06,
"loss": 0.0311,
"step": 1675
},
{
"epoch": 3.854875283446712,
"grad_norm": 2.0802805423736572,
"learning_rate": 8.741052631578949e-06,
"loss": 0.0763,
"step": 1700
},
{
"epoch": 3.9115646258503403,
"grad_norm": 14.157549858093262,
"learning_rate": 8.714736842105264e-06,
"loss": 0.029,
"step": 1725
},
{
"epoch": 3.9682539682539684,
"grad_norm": 3.303739309310913,
"learning_rate": 8.688421052631579e-06,
"loss": 0.0429,
"step": 1750
},
{
"epoch": 4.024943310657596,
"grad_norm": 3.3203935623168945,
"learning_rate": 8.662105263157895e-06,
"loss": 0.0212,
"step": 1775
},
{
"epoch": 4.081632653061225,
"grad_norm": 0.7093961834907532,
"learning_rate": 8.63578947368421e-06,
"loss": 0.012,
"step": 1800
},
{
"epoch": 4.138321995464852,
"grad_norm": 2.5663654804229736,
"learning_rate": 8.609473684210527e-06,
"loss": 0.0232,
"step": 1825
},
{
"epoch": 4.195011337868481,
"grad_norm": 27.419864654541016,
"learning_rate": 8.583157894736843e-06,
"loss": 0.0192,
"step": 1850
},
{
"epoch": 4.2517006802721085,
"grad_norm": 0.45838263630867004,
"learning_rate": 8.556842105263158e-06,
"loss": 0.0577,
"step": 1875
},
{
"epoch": 4.308390022675737,
"grad_norm": 1.192967176437378,
"learning_rate": 8.530526315789475e-06,
"loss": 0.0622,
"step": 1900
},
{
"epoch": 4.365079365079365,
"grad_norm": 5.143068790435791,
"learning_rate": 8.50421052631579e-06,
"loss": 0.0217,
"step": 1925
},
{
"epoch": 4.421768707482993,
"grad_norm": 3.8326940536499023,
"learning_rate": 8.477894736842107e-06,
"loss": 0.0212,
"step": 1950
},
{
"epoch": 4.478458049886621,
"grad_norm": 1.7538135051727295,
"learning_rate": 8.451578947368422e-06,
"loss": 0.0499,
"step": 1975
},
{
"epoch": 4.535147392290249,
"grad_norm": 0.5768720507621765,
"learning_rate": 8.425263157894737e-06,
"loss": 0.0444,
"step": 2000
},
{
"epoch": 4.535147392290249,
"eval_loss": 0.19730134308338165,
"eval_runtime": 158.9638,
"eval_samples_per_second": 2.944,
"eval_steps_per_second": 0.491,
"eval_wer": 6.944444444444445,
"step": 2000
},
{
"epoch": 4.591836734693878,
"grad_norm": 4.980081558227539,
"learning_rate": 8.398947368421053e-06,
"loss": 0.0205,
"step": 2025
},
{
"epoch": 4.648526077097506,
"grad_norm": 2.384981155395508,
"learning_rate": 8.372631578947368e-06,
"loss": 0.027,
"step": 2050
},
{
"epoch": 4.705215419501133,
"grad_norm": 1.0443973541259766,
"learning_rate": 8.346315789473685e-06,
"loss": 0.0537,
"step": 2075
},
{
"epoch": 4.761904761904762,
"grad_norm": 1.0461288690567017,
"learning_rate": 8.32e-06,
"loss": 0.0146,
"step": 2100
},
{
"epoch": 4.81859410430839,
"grad_norm": 6.465195655822754,
"learning_rate": 8.293684210526317e-06,
"loss": 0.0349,
"step": 2125
},
{
"epoch": 4.875283446712018,
"grad_norm": 4.054196834564209,
"learning_rate": 8.267368421052632e-06,
"loss": 0.0186,
"step": 2150
},
{
"epoch": 4.931972789115647,
"grad_norm": 1.7794383764266968,
"learning_rate": 8.241052631578948e-06,
"loss": 0.0224,
"step": 2175
},
{
"epoch": 4.988662131519274,
"grad_norm": 5.188144683837891,
"learning_rate": 8.214736842105265e-06,
"loss": 0.047,
"step": 2200
},
{
"epoch": 5.045351473922903,
"grad_norm": 0.08940722048282623,
"learning_rate": 8.18842105263158e-06,
"loss": 0.0186,
"step": 2225
},
{
"epoch": 5.1020408163265305,
"grad_norm": 3.670670747756958,
"learning_rate": 8.162105263157895e-06,
"loss": 0.0183,
"step": 2250
},
{
"epoch": 5.158730158730159,
"grad_norm": 2.3486833572387695,
"learning_rate": 8.135789473684212e-06,
"loss": 0.015,
"step": 2275
},
{
"epoch": 5.215419501133787,
"grad_norm": 4.572961807250977,
"learning_rate": 8.109473684210527e-06,
"loss": 0.0239,
"step": 2300
},
{
"epoch": 5.272108843537415,
"grad_norm": 1.114425539970398,
"learning_rate": 8.083157894736843e-06,
"loss": 0.0396,
"step": 2325
},
{
"epoch": 5.328798185941043,
"grad_norm": 0.35853487253189087,
"learning_rate": 8.056842105263158e-06,
"loss": 0.0185,
"step": 2350
},
{
"epoch": 5.3854875283446715,
"grad_norm": 5.747114181518555,
"learning_rate": 8.030526315789475e-06,
"loss": 0.0136,
"step": 2375
},
{
"epoch": 5.442176870748299,
"grad_norm": 0.33728256821632385,
"learning_rate": 8.00421052631579e-06,
"loss": 0.0366,
"step": 2400
},
{
"epoch": 5.498866213151928,
"grad_norm": 13.636420249938965,
"learning_rate": 7.977894736842105e-06,
"loss": 0.0441,
"step": 2425
},
{
"epoch": 5.555555555555555,
"grad_norm": 2.6928389072418213,
"learning_rate": 7.951578947368421e-06,
"loss": 0.014,
"step": 2450
},
{
"epoch": 5.612244897959184,
"grad_norm": 0.18423891067504883,
"learning_rate": 7.925263157894736e-06,
"loss": 0.007,
"step": 2475
},
{
"epoch": 5.668934240362812,
"grad_norm": 9.745643615722656,
"learning_rate": 7.898947368421053e-06,
"loss": 0.0065,
"step": 2500
},
{
"epoch": 5.72562358276644,
"grad_norm": 1.0876612663269043,
"learning_rate": 7.87263157894737e-06,
"loss": 0.0464,
"step": 2525
},
{
"epoch": 5.782312925170068,
"grad_norm": 3.2040324211120605,
"learning_rate": 7.846315789473685e-06,
"loss": 0.0085,
"step": 2550
},
{
"epoch": 5.839002267573696,
"grad_norm": 0.07621826976537704,
"learning_rate": 7.820000000000001e-06,
"loss": 0.0417,
"step": 2575
},
{
"epoch": 5.895691609977324,
"grad_norm": 0.11689532548189163,
"learning_rate": 7.793684210526316e-06,
"loss": 0.0113,
"step": 2600
},
{
"epoch": 5.9523809523809526,
"grad_norm": 1.2615737915039062,
"learning_rate": 7.767368421052633e-06,
"loss": 0.017,
"step": 2625
},
{
"epoch": 6.00907029478458,
"grad_norm": 6.465045928955078,
"learning_rate": 7.741052631578948e-06,
"loss": 0.0134,
"step": 2650
},
{
"epoch": 6.065759637188209,
"grad_norm": 0.6422730088233948,
"learning_rate": 7.714736842105263e-06,
"loss": 0.0407,
"step": 2675
},
{
"epoch": 6.122448979591836,
"grad_norm": 0.07282353192567825,
"learning_rate": 7.68842105263158e-06,
"loss": 0.0066,
"step": 2700
},
{
"epoch": 6.179138321995465,
"grad_norm": 0.13422216475009918,
"learning_rate": 7.662105263157895e-06,
"loss": 0.0345,
"step": 2725
},
{
"epoch": 6.235827664399093,
"grad_norm": 4.29299783706665,
"learning_rate": 7.635789473684211e-06,
"loss": 0.0047,
"step": 2750
},
{
"epoch": 6.292517006802721,
"grad_norm": 0.05262218415737152,
"learning_rate": 7.609473684210526e-06,
"loss": 0.0023,
"step": 2775
},
{
"epoch": 6.349206349206349,
"grad_norm": 6.577340602874756,
"learning_rate": 7.583157894736842e-06,
"loss": 0.005,
"step": 2800
},
{
"epoch": 6.405895691609977,
"grad_norm": 0.2815414071083069,
"learning_rate": 7.556842105263158e-06,
"loss": 0.0155,
"step": 2825
},
{
"epoch": 6.462585034013605,
"grad_norm": 0.05367182940244675,
"learning_rate": 7.5305263157894745e-06,
"loss": 0.04,
"step": 2850
},
{
"epoch": 6.519274376417234,
"grad_norm": 0.14756247401237488,
"learning_rate": 7.50421052631579e-06,
"loss": 0.0049,
"step": 2875
},
{
"epoch": 6.575963718820862,
"grad_norm": 2.2667605876922607,
"learning_rate": 7.477894736842106e-06,
"loss": 0.0028,
"step": 2900
},
{
"epoch": 6.63265306122449,
"grad_norm": 0.13220186531543732,
"learning_rate": 7.451578947368422e-06,
"loss": 0.0114,
"step": 2925
},
{
"epoch": 6.6893424036281175,
"grad_norm": 2.413485288619995,
"learning_rate": 7.425263157894738e-06,
"loss": 0.0028,
"step": 2950
},
{
"epoch": 6.746031746031746,
"grad_norm": 1.9141589403152466,
"learning_rate": 7.398947368421054e-06,
"loss": 0.0079,
"step": 2975
},
{
"epoch": 6.802721088435375,
"grad_norm": 5.837501525878906,
"learning_rate": 7.3726315789473694e-06,
"loss": 0.0294,
"step": 3000
},
{
"epoch": 6.802721088435375,
"eval_loss": 0.19840003550052643,
"eval_runtime": 156.6444,
"eval_samples_per_second": 2.988,
"eval_steps_per_second": 0.498,
"eval_wer": 6.517094017094018,
"step": 3000
},
{
"epoch": 6.859410430839002,
"grad_norm": 0.0996256172657013,
"learning_rate": 7.346315789473684e-06,
"loss": 0.0027,
"step": 3025
},
{
"epoch": 6.91609977324263,
"grad_norm": 0.028051115572452545,
"learning_rate": 7.32e-06,
"loss": 0.008,
"step": 3050
},
{
"epoch": 6.9727891156462585,
"grad_norm": 0.14238622784614563,
"learning_rate": 7.293684210526316e-06,
"loss": 0.0263,
"step": 3075
},
{
"epoch": 7.029478458049887,
"grad_norm": 0.112076535820961,
"learning_rate": 7.267368421052632e-06,
"loss": 0.0192,
"step": 3100
},
{
"epoch": 7.086167800453515,
"grad_norm": 0.14218856394290924,
"learning_rate": 7.241052631578948e-06,
"loss": 0.0099,
"step": 3125
},
{
"epoch": 7.142857142857143,
"grad_norm": 0.08735861629247665,
"learning_rate": 7.2147368421052635e-06,
"loss": 0.0013,
"step": 3150
},
{
"epoch": 7.199546485260771,
"grad_norm": 0.07858515530824661,
"learning_rate": 7.18842105263158e-06,
"loss": 0.043,
"step": 3175
},
{
"epoch": 7.2562358276643995,
"grad_norm": 0.04395943507552147,
"learning_rate": 7.162105263157896e-06,
"loss": 0.0196,
"step": 3200
},
{
"epoch": 7.312925170068027,
"grad_norm": 0.13118867576122284,
"learning_rate": 7.135789473684212e-06,
"loss": 0.0032,
"step": 3225
},
{
"epoch": 7.369614512471656,
"grad_norm": 0.5723868608474731,
"learning_rate": 7.109473684210528e-06,
"loss": 0.0193,
"step": 3250
},
{
"epoch": 7.426303854875283,
"grad_norm": 0.17946386337280273,
"learning_rate": 7.08421052631579e-06,
"loss": 0.0066,
"step": 3275
},
{
"epoch": 7.482993197278912,
"grad_norm": 0.31329983472824097,
"learning_rate": 7.057894736842106e-06,
"loss": 0.004,
"step": 3300
},
{
"epoch": 7.5396825396825395,
"grad_norm": 0.08119330555200577,
"learning_rate": 7.031578947368422e-06,
"loss": 0.0079,
"step": 3325
},
{
"epoch": 7.596371882086168,
"grad_norm": 0.2911977767944336,
"learning_rate": 7.005263157894738e-06,
"loss": 0.0053,
"step": 3350
},
{
"epoch": 7.653061224489796,
"grad_norm": 0.024690864607691765,
"learning_rate": 6.9789473684210525e-06,
"loss": 0.0054,
"step": 3375
},
{
"epoch": 7.709750566893424,
"grad_norm": 0.09139782190322876,
"learning_rate": 6.953684210526316e-06,
"loss": 0.0043,
"step": 3400
},
{
"epoch": 7.766439909297052,
"grad_norm": 0.3346998393535614,
"learning_rate": 6.9273684210526326e-06,
"loss": 0.0063,
"step": 3425
},
{
"epoch": 7.8231292517006805,
"grad_norm": 2.5169262886047363,
"learning_rate": 6.901052631578948e-06,
"loss": 0.0036,
"step": 3450
},
{
"epoch": 7.879818594104308,
"grad_norm": 0.05118921771645546,
"learning_rate": 6.874736842105264e-06,
"loss": 0.0152,
"step": 3475
},
{
"epoch": 7.936507936507937,
"grad_norm": 0.141609787940979,
"learning_rate": 6.84842105263158e-06,
"loss": 0.0029,
"step": 3500
},
{
"epoch": 7.993197278911564,
"grad_norm": 0.059452034533023834,
"learning_rate": 6.822105263157896e-06,
"loss": 0.0084,
"step": 3525
},
{
"epoch": 8.049886621315192,
"grad_norm": 9.327384948730469,
"learning_rate": 6.795789473684211e-06,
"loss": 0.0056,
"step": 3550
},
{
"epoch": 8.106575963718821,
"grad_norm": 0.031346723437309265,
"learning_rate": 6.769473684210527e-06,
"loss": 0.0095,
"step": 3575
},
{
"epoch": 8.16326530612245,
"grad_norm": 0.03407048434019089,
"learning_rate": 6.7431578947368425e-06,
"loss": 0.031,
"step": 3600
},
{
"epoch": 8.219954648526077,
"grad_norm": 0.023022985085844994,
"learning_rate": 6.716842105263158e-06,
"loss": 0.0061,
"step": 3625
},
{
"epoch": 8.276643990929704,
"grad_norm": 0.1935151070356369,
"learning_rate": 6.690526315789474e-06,
"loss": 0.0049,
"step": 3650
},
{
"epoch": 8.333333333333334,
"grad_norm": 1.2079312801361084,
"learning_rate": 6.66421052631579e-06,
"loss": 0.0107,
"step": 3675
},
{
"epoch": 8.390022675736962,
"grad_norm": 0.20077985525131226,
"learning_rate": 6.637894736842106e-06,
"loss": 0.0021,
"step": 3700
},
{
"epoch": 8.44671201814059,
"grad_norm": 18.450538635253906,
"learning_rate": 6.611578947368421e-06,
"loss": 0.0159,
"step": 3725
},
{
"epoch": 8.503401360544217,
"grad_norm": 0.03869379311800003,
"learning_rate": 6.585263157894738e-06,
"loss": 0.0052,
"step": 3750
},
{
"epoch": 8.560090702947846,
"grad_norm": 0.026166923344135284,
"learning_rate": 6.558947368421054e-06,
"loss": 0.0007,
"step": 3775
},
{
"epoch": 8.616780045351474,
"grad_norm": 0.02891625091433525,
"learning_rate": 6.532631578947369e-06,
"loss": 0.0041,
"step": 3800
},
{
"epoch": 8.673469387755102,
"grad_norm": 0.08599188923835754,
"learning_rate": 6.506315789473685e-06,
"loss": 0.0009,
"step": 3825
},
{
"epoch": 8.73015873015873,
"grad_norm": 0.017651915550231934,
"learning_rate": 6.480000000000001e-06,
"loss": 0.0211,
"step": 3850
},
{
"epoch": 8.786848072562359,
"grad_norm": 14.024144172668457,
"learning_rate": 6.4536842105263165e-06,
"loss": 0.021,
"step": 3875
},
{
"epoch": 8.843537414965986,
"grad_norm": 0.057578567415475845,
"learning_rate": 6.427368421052632e-06,
"loss": 0.0021,
"step": 3900
},
{
"epoch": 8.900226757369614,
"grad_norm": 0.04060237482190132,
"learning_rate": 6.401052631578948e-06,
"loss": 0.0025,
"step": 3925
},
{
"epoch": 8.956916099773242,
"grad_norm": 0.04346761852502823,
"learning_rate": 6.374736842105264e-06,
"loss": 0.0055,
"step": 3950
},
{
"epoch": 9.013605442176871,
"grad_norm": 0.014886971563100815,
"learning_rate": 6.348421052631579e-06,
"loss": 0.0067,
"step": 3975
},
{
"epoch": 9.070294784580499,
"grad_norm": 2.309483289718628,
"learning_rate": 6.322105263157895e-06,
"loss": 0.0334,
"step": 4000
},
{
"epoch": 9.070294784580499,
"eval_loss": 0.20987384021282196,
"eval_runtime": 157.5775,
"eval_samples_per_second": 2.97,
"eval_steps_per_second": 0.495,
"eval_wer": 6.303418803418803,
"step": 4000
},
{
"epoch": 9.126984126984127,
"grad_norm": 0.2992580235004425,
"learning_rate": 6.2957894736842105e-06,
"loss": 0.0048,
"step": 4025
},
{
"epoch": 9.183673469387756,
"grad_norm": 0.1327856183052063,
"learning_rate": 6.269473684210526e-06,
"loss": 0.0003,
"step": 4050
},
{
"epoch": 9.240362811791384,
"grad_norm": 0.034492090344429016,
"learning_rate": 6.243157894736842e-06,
"loss": 0.0007,
"step": 4075
},
{
"epoch": 9.297052154195011,
"grad_norm": 0.021785929799079895,
"learning_rate": 6.216842105263159e-06,
"loss": 0.0013,
"step": 4100
},
{
"epoch": 9.353741496598639,
"grad_norm": 0.17917415499687195,
"learning_rate": 6.190526315789475e-06,
"loss": 0.0186,
"step": 4125
},
{
"epoch": 9.410430839002268,
"grad_norm": 0.04263261333107948,
"learning_rate": 6.1642105263157905e-06,
"loss": 0.0011,
"step": 4150
},
{
"epoch": 9.467120181405896,
"grad_norm": 0.06537426263093948,
"learning_rate": 6.137894736842106e-06,
"loss": 0.0007,
"step": 4175
},
{
"epoch": 9.523809523809524,
"grad_norm": 0.05984114482998848,
"learning_rate": 6.111578947368422e-06,
"loss": 0.0031,
"step": 4200
},
{
"epoch": 9.580498866213151,
"grad_norm": 0.0186479389667511,
"learning_rate": 6.085263157894737e-06,
"loss": 0.0067,
"step": 4225
},
{
"epoch": 9.63718820861678,
"grad_norm": 0.01856757327914238,
"learning_rate": 6.058947368421053e-06,
"loss": 0.0012,
"step": 4250
},
{
"epoch": 9.693877551020408,
"grad_norm": 0.01589180715382099,
"learning_rate": 6.032631578947369e-06,
"loss": 0.0033,
"step": 4275
},
{
"epoch": 9.750566893424036,
"grad_norm": 0.013659857213497162,
"learning_rate": 6.0063157894736845e-06,
"loss": 0.0136,
"step": 4300
},
{
"epoch": 9.807256235827664,
"grad_norm": 0.07311205565929413,
"learning_rate": 5.98e-06,
"loss": 0.0114,
"step": 4325
},
{
"epoch": 9.863945578231293,
"grad_norm": 0.00827726535499096,
"learning_rate": 5.953684210526316e-06,
"loss": 0.0006,
"step": 4350
},
{
"epoch": 9.920634920634921,
"grad_norm": 0.02168506383895874,
"learning_rate": 5.927368421052632e-06,
"loss": 0.0141,
"step": 4375
},
{
"epoch": 9.977324263038549,
"grad_norm": 0.09996296465396881,
"learning_rate": 5.901052631578947e-06,
"loss": 0.0022,
"step": 4400
},
{
"epoch": 10.034013605442176,
"grad_norm": 0.016953065991401672,
"learning_rate": 5.8747368421052645e-06,
"loss": 0.0024,
"step": 4425
},
{
"epoch": 10.090702947845806,
"grad_norm": 0.0506548210978508,
"learning_rate": 5.84842105263158e-06,
"loss": 0.0049,
"step": 4450
},
{
"epoch": 10.147392290249433,
"grad_norm": 0.017473401501774788,
"learning_rate": 5.822105263157895e-06,
"loss": 0.0009,
"step": 4475
},
{
"epoch": 10.204081632653061,
"grad_norm": 0.016834545880556107,
"learning_rate": 5.795789473684211e-06,
"loss": 0.0016,
"step": 4500
},
{
"epoch": 10.260770975056689,
"grad_norm": 0.07962112873792648,
"learning_rate": 5.769473684210527e-06,
"loss": 0.0005,
"step": 4525
},
{
"epoch": 10.317460317460318,
"grad_norm": 0.020868808031082153,
"learning_rate": 5.743157894736843e-06,
"loss": 0.0162,
"step": 4550
},
{
"epoch": 10.374149659863946,
"grad_norm": 0.0351821593940258,
"learning_rate": 5.7168421052631585e-06,
"loss": 0.0217,
"step": 4575
},
{
"epoch": 10.430839002267573,
"grad_norm": 0.036179013550281525,
"learning_rate": 5.690526315789474e-06,
"loss": 0.0022,
"step": 4600
},
{
"epoch": 10.487528344671201,
"grad_norm": 0.04735976830124855,
"learning_rate": 5.66421052631579e-06,
"loss": 0.0061,
"step": 4625
},
{
"epoch": 10.54421768707483,
"grad_norm": 0.015306883491575718,
"learning_rate": 5.637894736842105e-06,
"loss": 0.0009,
"step": 4650
},
{
"epoch": 10.600907029478458,
"grad_norm": 0.015260276384651661,
"learning_rate": 5.611578947368421e-06,
"loss": 0.0067,
"step": 4675
},
{
"epoch": 10.657596371882086,
"grad_norm": 0.029503723606467247,
"learning_rate": 5.585263157894737e-06,
"loss": 0.0006,
"step": 4700
},
{
"epoch": 10.714285714285714,
"grad_norm": 0.017955463379621506,
"learning_rate": 5.558947368421053e-06,
"loss": 0.0005,
"step": 4725
},
{
"epoch": 10.770975056689343,
"grad_norm": 0.11942701041698456,
"learning_rate": 5.532631578947368e-06,
"loss": 0.003,
"step": 4750
},
{
"epoch": 10.82766439909297,
"grad_norm": 0.02495400980114937,
"learning_rate": 5.506315789473685e-06,
"loss": 0.0017,
"step": 4775
},
{
"epoch": 10.884353741496598,
"grad_norm": 0.1065245270729065,
"learning_rate": 5.480000000000001e-06,
"loss": 0.0047,
"step": 4800
},
{
"epoch": 10.941043083900226,
"grad_norm": 0.10682205855846405,
"learning_rate": 5.453684210526317e-06,
"loss": 0.0045,
"step": 4825
},
{
"epoch": 10.997732426303855,
"grad_norm": 0.016077643260359764,
"learning_rate": 5.4273684210526325e-06,
"loss": 0.0007,
"step": 4850
},
{
"epoch": 11.054421768707483,
"grad_norm": 0.020081788301467896,
"learning_rate": 5.401052631578948e-06,
"loss": 0.0004,
"step": 4875
},
{
"epoch": 11.11111111111111,
"grad_norm": 0.017382116988301277,
"learning_rate": 5.374736842105263e-06,
"loss": 0.0012,
"step": 4900
},
{
"epoch": 11.167800453514738,
"grad_norm": 0.5074162483215332,
"learning_rate": 5.348421052631579e-06,
"loss": 0.0038,
"step": 4925
},
{
"epoch": 11.224489795918368,
"grad_norm": 0.011525845155119896,
"learning_rate": 5.322105263157895e-06,
"loss": 0.0011,
"step": 4950
},
{
"epoch": 11.281179138321995,
"grad_norm": 0.016790462657809258,
"learning_rate": 5.295789473684211e-06,
"loss": 0.0013,
"step": 4975
},
{
"epoch": 11.337868480725623,
"grad_norm": 2.887037754058838,
"learning_rate": 5.269473684210527e-06,
"loss": 0.0011,
"step": 5000
},
{
"epoch": 11.337868480725623,
"eval_loss": 0.2228717803955078,
"eval_runtime": 177.4582,
"eval_samples_per_second": 2.637,
"eval_steps_per_second": 0.44,
"eval_wer": 6.3835470085470085,
"step": 5000
},
{
"epoch": 11.39455782312925,
"grad_norm": 0.02366207167506218,
"learning_rate": 5.243157894736842e-06,
"loss": 0.0002,
"step": 5025
},
{
"epoch": 11.45124716553288,
"grad_norm": 1.1653227806091309,
"learning_rate": 5.216842105263158e-06,
"loss": 0.0052,
"step": 5050
},
{
"epoch": 11.507936507936508,
"grad_norm": 0.2194329798221588,
"learning_rate": 5.190526315789474e-06,
"loss": 0.0014,
"step": 5075
},
{
"epoch": 11.564625850340136,
"grad_norm": 0.03989162668585777,
"learning_rate": 5.164210526315791e-06,
"loss": 0.0006,
"step": 5100
},
{
"epoch": 11.621315192743765,
"grad_norm": 0.02930096909403801,
"learning_rate": 5.1378947368421065e-06,
"loss": 0.003,
"step": 5125
},
{
"epoch": 11.678004535147393,
"grad_norm": 0.011350632645189762,
"learning_rate": 5.1115789473684215e-06,
"loss": 0.0039,
"step": 5150
},
{
"epoch": 11.73469387755102,
"grad_norm": 0.010673941113054752,
"learning_rate": 5.085263157894737e-06,
"loss": 0.0004,
"step": 5175
},
{
"epoch": 11.791383219954648,
"grad_norm": 0.05096409469842911,
"learning_rate": 5.058947368421053e-06,
"loss": 0.0048,
"step": 5200
},
{
"epoch": 11.848072562358277,
"grad_norm": 0.011835623532533646,
"learning_rate": 5.032631578947369e-06,
"loss": 0.0093,
"step": 5225
},
{
"epoch": 11.904761904761905,
"grad_norm": 0.013359226286411285,
"learning_rate": 5.006315789473685e-06,
"loss": 0.0062,
"step": 5250
},
{
"epoch": 11.961451247165533,
"grad_norm": 0.03088083118200302,
"learning_rate": 4.980000000000001e-06,
"loss": 0.0002,
"step": 5275
},
{
"epoch": 12.01814058956916,
"grad_norm": 0.02855735644698143,
"learning_rate": 4.953684210526316e-06,
"loss": 0.0002,
"step": 5300
},
{
"epoch": 12.07482993197279,
"grad_norm": 0.04174978658556938,
"learning_rate": 4.927368421052631e-06,
"loss": 0.0002,
"step": 5325
},
{
"epoch": 12.131519274376418,
"grad_norm": 0.013824643567204475,
"learning_rate": 4.901052631578947e-06,
"loss": 0.0002,
"step": 5350
},
{
"epoch": 12.188208616780045,
"grad_norm": 0.010323552414774895,
"learning_rate": 4.874736842105264e-06,
"loss": 0.0002,
"step": 5375
},
{
"epoch": 12.244897959183673,
"grad_norm": 7.9211883544921875,
"learning_rate": 4.84842105263158e-06,
"loss": 0.0041,
"step": 5400
},
{
"epoch": 12.301587301587302,
"grad_norm": 0.011090376414358616,
"learning_rate": 4.8221052631578955e-06,
"loss": 0.0046,
"step": 5425
},
{
"epoch": 12.35827664399093,
"grad_norm": 0.993200957775116,
"learning_rate": 4.7957894736842105e-06,
"loss": 0.0077,
"step": 5450
},
{
"epoch": 12.414965986394558,
"grad_norm": 0.5387348532676697,
"learning_rate": 4.769473684210526e-06,
"loss": 0.0014,
"step": 5475
},
{
"epoch": 12.471655328798185,
"grad_norm": 0.030508503317832947,
"learning_rate": 4.743157894736842e-06,
"loss": 0.0029,
"step": 5500
},
{
"epoch": 12.528344671201815,
"grad_norm": 0.015367632731795311,
"learning_rate": 4.716842105263159e-06,
"loss": 0.002,
"step": 5525
},
{
"epoch": 12.585034013605442,
"grad_norm": 0.01937568373978138,
"learning_rate": 4.690526315789475e-06,
"loss": 0.0016,
"step": 5550
},
{
"epoch": 12.64172335600907,
"grad_norm": 0.06024911627173424,
"learning_rate": 4.6642105263157896e-06,
"loss": 0.0016,
"step": 5575
},
{
"epoch": 12.698412698412698,
"grad_norm": 0.005301471799612045,
"learning_rate": 4.637894736842105e-06,
"loss": 0.0014,
"step": 5600
},
{
"epoch": 12.755102040816327,
"grad_norm": 0.03389279916882515,
"learning_rate": 4.611578947368421e-06,
"loss": 0.0017,
"step": 5625
},
{
"epoch": 12.811791383219955,
"grad_norm": 0.13554659485816956,
"learning_rate": 4.585263157894737e-06,
"loss": 0.0105,
"step": 5650
},
{
"epoch": 12.868480725623582,
"grad_norm": 0.018741684034466743,
"learning_rate": 4.558947368421053e-06,
"loss": 0.004,
"step": 5675
},
{
"epoch": 12.92517006802721,
"grad_norm": 0.1560622751712799,
"learning_rate": 4.532631578947369e-06,
"loss": 0.0005,
"step": 5700
},
{
"epoch": 12.98185941043084,
"grad_norm": 0.1737220734357834,
"learning_rate": 4.5063157894736845e-06,
"loss": 0.0019,
"step": 5725
},
{
"epoch": 13.038548752834467,
"grad_norm": 0.02937311679124832,
"learning_rate": 4.48e-06,
"loss": 0.002,
"step": 5750
},
{
"epoch": 13.095238095238095,
"grad_norm": 0.006793774198740721,
"learning_rate": 4.453684210526316e-06,
"loss": 0.0005,
"step": 5775
},
{
"epoch": 13.151927437641723,
"grad_norm": 0.014649259857833385,
"learning_rate": 4.427368421052632e-06,
"loss": 0.0025,
"step": 5800
},
{
"epoch": 13.208616780045352,
"grad_norm": 0.0212300606071949,
"learning_rate": 4.401052631578948e-06,
"loss": 0.0014,
"step": 5825
},
{
"epoch": 13.26530612244898,
"grad_norm": 0.013082647696137428,
"learning_rate": 4.374736842105264e-06,
"loss": 0.0039,
"step": 5850
},
{
"epoch": 13.321995464852607,
"grad_norm": 0.05612126737833023,
"learning_rate": 4.348421052631579e-06,
"loss": 0.0007,
"step": 5875
},
{
"epoch": 13.378684807256235,
"grad_norm": 0.37419337034225464,
"learning_rate": 4.322105263157895e-06,
"loss": 0.0012,
"step": 5900
},
{
"epoch": 13.435374149659864,
"grad_norm": 0.03296487405896187,
"learning_rate": 4.295789473684211e-06,
"loss": 0.0043,
"step": 5925
},
{
"epoch": 13.492063492063492,
"grad_norm": 0.009840169921517372,
"learning_rate": 4.269473684210527e-06,
"loss": 0.0039,
"step": 5950
},
{
"epoch": 13.54875283446712,
"grad_norm": 0.015135574154555798,
"learning_rate": 4.243157894736843e-06,
"loss": 0.0014,
"step": 5975
},
{
"epoch": 13.60544217687075,
"grad_norm": 0.022306112572550774,
"learning_rate": 4.2168421052631585e-06,
"loss": 0.0001,
"step": 6000
},
{
"epoch": 13.60544217687075,
"eval_loss": 0.22002862393856049,
"eval_runtime": 147.8825,
"eval_samples_per_second": 3.165,
"eval_steps_per_second": 0.527,
"eval_wer": 6.209935897435898,
"step": 6000
},
{
"epoch": 13.662131519274377,
"grad_norm": 0.0066378237679600716,
"learning_rate": 4.1905263157894735e-06,
"loss": 0.0007,
"step": 6025
},
{
"epoch": 13.718820861678005,
"grad_norm": 0.010166754946112633,
"learning_rate": 4.16421052631579e-06,
"loss": 0.0008,
"step": 6050
},
{
"epoch": 13.775510204081632,
"grad_norm": 0.0136796273291111,
"learning_rate": 4.137894736842106e-06,
"loss": 0.0009,
"step": 6075
},
{
"epoch": 13.83219954648526,
"grad_norm": 0.017127549275755882,
"learning_rate": 4.111578947368422e-06,
"loss": 0.0015,
"step": 6100
},
{
"epoch": 13.88888888888889,
"grad_norm": 0.024442024528980255,
"learning_rate": 4.085263157894737e-06,
"loss": 0.0032,
"step": 6125
},
{
"epoch": 13.945578231292517,
"grad_norm": 0.17017020285129547,
"learning_rate": 4.0589473684210526e-06,
"loss": 0.0015,
"step": 6150
},
{
"epoch": 14.002267573696145,
"grad_norm": 0.00828185211867094,
"learning_rate": 4.032631578947368e-06,
"loss": 0.0026,
"step": 6175
},
{
"epoch": 14.058956916099774,
"grad_norm": 0.007860764861106873,
"learning_rate": 4.006315789473684e-06,
"loss": 0.0034,
"step": 6200
},
{
"epoch": 14.115646258503402,
"grad_norm": 0.011614521034061909,
"learning_rate": 3.980000000000001e-06,
"loss": 0.0044,
"step": 6225
},
{
"epoch": 14.17233560090703,
"grad_norm": 0.018276942893862724,
"learning_rate": 3.953684210526316e-06,
"loss": 0.0004,
"step": 6250
},
{
"epoch": 14.229024943310657,
"grad_norm": 0.012453128583729267,
"learning_rate": 3.927368421052632e-06,
"loss": 0.0002,
"step": 6275
},
{
"epoch": 14.285714285714286,
"grad_norm": 0.015684612095355988,
"learning_rate": 3.9010526315789475e-06,
"loss": 0.0003,
"step": 6300
},
{
"epoch": 14.342403628117914,
"grad_norm": 0.006171511020511389,
"learning_rate": 3.874736842105263e-06,
"loss": 0.0001,
"step": 6325
},
{
"epoch": 14.399092970521542,
"grad_norm": 0.006511132698506117,
"learning_rate": 3.848421052631579e-06,
"loss": 0.0004,
"step": 6350
},
{
"epoch": 14.45578231292517,
"grad_norm": 0.050404928624629974,
"learning_rate": 3.822105263157895e-06,
"loss": 0.0002,
"step": 6375
},
{
"epoch": 14.512471655328799,
"grad_norm": 10.834954261779785,
"learning_rate": 3.795789473684211e-06,
"loss": 0.0012,
"step": 6400
},
{
"epoch": 14.569160997732427,
"grad_norm": 0.13081848621368408,
"learning_rate": 3.7694736842105266e-06,
"loss": 0.0016,
"step": 6425
},
{
"epoch": 14.625850340136054,
"grad_norm": 0.011676596477627754,
"learning_rate": 3.7431578947368424e-06,
"loss": 0.0044,
"step": 6450
},
{
"epoch": 14.682539682539682,
"grad_norm": 0.014040385372936726,
"learning_rate": 3.716842105263158e-06,
"loss": 0.0001,
"step": 6475
},
{
"epoch": 14.739229024943311,
"grad_norm": 0.011808693408966064,
"learning_rate": 3.690526315789474e-06,
"loss": 0.0001,
"step": 6500
},
{
"epoch": 14.795918367346939,
"grad_norm": 0.012787124142050743,
"learning_rate": 3.6642105263157894e-06,
"loss": 0.0045,
"step": 6525
},
{
"epoch": 14.852607709750567,
"grad_norm": 0.005284798797219992,
"learning_rate": 3.6378947368421057e-06,
"loss": 0.0001,
"step": 6550
},
{
"epoch": 14.909297052154194,
"grad_norm": 0.036842990666627884,
"learning_rate": 3.6115789473684215e-06,
"loss": 0.0004,
"step": 6575
},
{
"epoch": 14.965986394557824,
"grad_norm": 0.024015046656131744,
"learning_rate": 3.5852631578947373e-06,
"loss": 0.0003,
"step": 6600
},
{
"epoch": 15.022675736961451,
"grad_norm": 0.012714927084743977,
"learning_rate": 3.558947368421053e-06,
"loss": 0.0025,
"step": 6625
},
{
"epoch": 15.079365079365079,
"grad_norm": 0.017115842550992966,
"learning_rate": 3.5326315789473685e-06,
"loss": 0.0012,
"step": 6650
},
{
"epoch": 15.136054421768707,
"grad_norm": 0.008900342509150505,
"learning_rate": 3.5063157894736843e-06,
"loss": 0.0001,
"step": 6675
},
{
"epoch": 15.192743764172336,
"grad_norm": 0.007803457789123058,
"learning_rate": 3.48e-06,
"loss": 0.0004,
"step": 6700
},
{
"epoch": 15.249433106575964,
"grad_norm": 0.013770255260169506,
"learning_rate": 3.4536842105263164e-06,
"loss": 0.0011,
"step": 6725
},
{
"epoch": 15.306122448979592,
"grad_norm": 0.06877677142620087,
"learning_rate": 3.427368421052632e-06,
"loss": 0.0018,
"step": 6750
},
{
"epoch": 15.36281179138322,
"grad_norm": 0.011991630308330059,
"learning_rate": 3.4010526315789476e-06,
"loss": 0.0001,
"step": 6775
},
{
"epoch": 15.419501133786849,
"grad_norm": 0.013049086555838585,
"learning_rate": 3.3747368421052634e-06,
"loss": 0.0002,
"step": 6800
},
{
"epoch": 15.476190476190476,
"grad_norm": 0.012979848310351372,
"learning_rate": 3.3484210526315792e-06,
"loss": 0.0002,
"step": 6825
},
{
"epoch": 15.532879818594104,
"grad_norm": 0.006456771399825811,
"learning_rate": 3.3221052631578946e-06,
"loss": 0.0001,
"step": 6850
},
{
"epoch": 15.589569160997732,
"grad_norm": 0.012274966575205326,
"learning_rate": 3.2957894736842104e-06,
"loss": 0.0001,
"step": 6875
},
{
"epoch": 15.646258503401361,
"grad_norm": 0.010561280883848667,
"learning_rate": 3.2694736842105267e-06,
"loss": 0.0004,
"step": 6900
},
{
"epoch": 15.702947845804989,
"grad_norm": 0.009232975542545319,
"learning_rate": 3.2431578947368425e-06,
"loss": 0.0002,
"step": 6925
},
{
"epoch": 15.759637188208616,
"grad_norm": 0.015166404657065868,
"learning_rate": 3.2168421052631583e-06,
"loss": 0.0001,
"step": 6950
},
{
"epoch": 15.816326530612244,
"grad_norm": 0.0094530014321208,
"learning_rate": 3.1905263157894737e-06,
"loss": 0.0001,
"step": 6975
},
{
"epoch": 15.873015873015873,
"grad_norm": 0.017429711297154427,
"learning_rate": 3.1642105263157895e-06,
"loss": 0.0001,
"step": 7000
},
{
"epoch": 15.873015873015873,
"eval_loss": 0.2297230064868927,
"eval_runtime": 188.711,
"eval_samples_per_second": 2.48,
"eval_steps_per_second": 0.413,
"eval_wer": 6.049679487179487,
"step": 7000
},
{
"epoch": 15.929705215419501,
"grad_norm": 0.005318532232195139,
"learning_rate": 3.1378947368421054e-06,
"loss": 0.001,
"step": 7025
},
{
"epoch": 15.986394557823129,
"grad_norm": 0.011030340567231178,
"learning_rate": 3.111578947368421e-06,
"loss": 0.0001,
"step": 7050
},
{
"epoch": 16.04308390022676,
"grad_norm": 0.004018599167466164,
"learning_rate": 3.0852631578947374e-06,
"loss": 0.0001,
"step": 7075
},
{
"epoch": 16.099773242630384,
"grad_norm": 0.006292372010648251,
"learning_rate": 3.058947368421053e-06,
"loss": 0.0063,
"step": 7100
},
{
"epoch": 16.156462585034014,
"grad_norm": 0.008721155114471912,
"learning_rate": 3.0326315789473686e-06,
"loss": 0.0001,
"step": 7125
},
{
"epoch": 16.213151927437643,
"grad_norm": 0.007462701760232449,
"learning_rate": 3.0063157894736844e-06,
"loss": 0.0013,
"step": 7150
},
{
"epoch": 16.26984126984127,
"grad_norm": 0.0060442672111094,
"learning_rate": 2.9800000000000003e-06,
"loss": 0.0002,
"step": 7175
},
{
"epoch": 16.3265306122449,
"grad_norm": 0.007179939653724432,
"learning_rate": 2.9536842105263157e-06,
"loss": 0.0001,
"step": 7200
},
{
"epoch": 16.383219954648528,
"grad_norm": 0.006970668211579323,
"learning_rate": 2.927368421052632e-06,
"loss": 0.0025,
"step": 7225
},
{
"epoch": 16.439909297052154,
"grad_norm": 0.0061109112575650215,
"learning_rate": 2.9010526315789477e-06,
"loss": 0.0003,
"step": 7250
},
{
"epoch": 16.496598639455783,
"grad_norm": 0.02214565873146057,
"learning_rate": 2.8747368421052635e-06,
"loss": 0.0001,
"step": 7275
},
{
"epoch": 16.55328798185941,
"grad_norm": 0.009677527472376823,
"learning_rate": 2.8484210526315794e-06,
"loss": 0.0001,
"step": 7300
},
{
"epoch": 16.60997732426304,
"grad_norm": 3.2391059398651123,
"learning_rate": 2.8221052631578948e-06,
"loss": 0.0013,
"step": 7325
},
{
"epoch": 16.666666666666668,
"grad_norm": 0.005941161885857582,
"learning_rate": 2.7957894736842106e-06,
"loss": 0.0001,
"step": 7350
},
{
"epoch": 16.723356009070294,
"grad_norm": 0.005041074473410845,
"learning_rate": 2.7694736842105264e-06,
"loss": 0.0001,
"step": 7375
},
{
"epoch": 16.780045351473923,
"grad_norm": 0.010110282339155674,
"learning_rate": 2.7431578947368426e-06,
"loss": 0.0001,
"step": 7400
},
{
"epoch": 16.836734693877553,
"grad_norm": 0.0052981507033109665,
"learning_rate": 2.7168421052631585e-06,
"loss": 0.0001,
"step": 7425
},
{
"epoch": 16.89342403628118,
"grad_norm": 0.007062564603984356,
"learning_rate": 2.690526315789474e-06,
"loss": 0.0004,
"step": 7450
},
{
"epoch": 16.950113378684808,
"grad_norm": 0.005766382906585932,
"learning_rate": 2.6642105263157897e-06,
"loss": 0.0001,
"step": 7475
},
{
"epoch": 17.006802721088434,
"grad_norm": 8.800177574157715,
"learning_rate": 2.6378947368421055e-06,
"loss": 0.0022,
"step": 7500
},
{
"epoch": 17.063492063492063,
"grad_norm": 0.003864010563120246,
"learning_rate": 2.6115789473684213e-06,
"loss": 0.0001,
"step": 7525
},
{
"epoch": 17.120181405895693,
"grad_norm": 0.003043045522645116,
"learning_rate": 2.5852631578947367e-06,
"loss": 0.0001,
"step": 7550
},
{
"epoch": 17.17687074829932,
"grad_norm": 0.0026129058096557856,
"learning_rate": 2.558947368421053e-06,
"loss": 0.0001,
"step": 7575
},
{
"epoch": 17.233560090702948,
"grad_norm": 0.003994261380285025,
"learning_rate": 2.5326315789473688e-06,
"loss": 0.0001,
"step": 7600
},
{
"epoch": 17.290249433106577,
"grad_norm": 0.006509356200695038,
"learning_rate": 2.5063157894736846e-06,
"loss": 0.0001,
"step": 7625
},
{
"epoch": 17.346938775510203,
"grad_norm": 0.006231856532394886,
"learning_rate": 2.4800000000000004e-06,
"loss": 0.0001,
"step": 7650
},
{
"epoch": 17.403628117913833,
"grad_norm": 0.00826491229236126,
"learning_rate": 2.453684210526316e-06,
"loss": 0.0027,
"step": 7675
},
{
"epoch": 17.46031746031746,
"grad_norm": 0.007504597306251526,
"learning_rate": 2.427368421052632e-06,
"loss": 0.0001,
"step": 7700
},
{
"epoch": 17.517006802721088,
"grad_norm": 0.0033706706017255783,
"learning_rate": 2.4010526315789474e-06,
"loss": 0.0001,
"step": 7725
},
{
"epoch": 17.573696145124718,
"grad_norm": 0.004383792169392109,
"learning_rate": 2.3747368421052632e-06,
"loss": 0.0018,
"step": 7750
},
{
"epoch": 17.630385487528343,
"grad_norm": 0.004007370211184025,
"learning_rate": 2.348421052631579e-06,
"loss": 0.0001,
"step": 7775
},
{
"epoch": 17.687074829931973,
"grad_norm": 0.006295809056609869,
"learning_rate": 2.322105263157895e-06,
"loss": 0.0001,
"step": 7800
},
{
"epoch": 17.743764172335602,
"grad_norm": 0.006831544451415539,
"learning_rate": 2.2957894736842107e-06,
"loss": 0.0003,
"step": 7825
},
{
"epoch": 17.800453514739228,
"grad_norm": 0.0033715348690748215,
"learning_rate": 2.2694736842105265e-06,
"loss": 0.0001,
"step": 7850
},
{
"epoch": 17.857142857142858,
"grad_norm": 0.006168752908706665,
"learning_rate": 2.2431578947368423e-06,
"loss": 0.0012,
"step": 7875
},
{
"epoch": 17.913832199546484,
"grad_norm": 0.006377949379384518,
"learning_rate": 2.216842105263158e-06,
"loss": 0.0052,
"step": 7900
},
{
"epoch": 17.970521541950113,
"grad_norm": 1.9607151746749878,
"learning_rate": 2.190526315789474e-06,
"loss": 0.0004,
"step": 7925
},
{
"epoch": 18.027210884353742,
"grad_norm": 0.0046304683201014996,
"learning_rate": 2.16421052631579e-06,
"loss": 0.001,
"step": 7950
},
{
"epoch": 18.08390022675737,
"grad_norm": 0.008269163779914379,
"learning_rate": 2.1378947368421056e-06,
"loss": 0.0001,
"step": 7975
},
{
"epoch": 18.140589569160998,
"grad_norm": 0.0044001140631735325,
"learning_rate": 2.111578947368421e-06,
"loss": 0.0001,
"step": 8000
},
{
"epoch": 18.140589569160998,
"eval_loss": 0.2317376732826233,
"eval_runtime": 158.0944,
"eval_samples_per_second": 2.96,
"eval_steps_per_second": 0.493,
"eval_wer": 6.076388888888888,
"step": 8000
},
{
"epoch": 18.197278911564627,
"grad_norm": 0.006642814259976149,
"learning_rate": 2.085263157894737e-06,
"loss": 0.0001,
"step": 8025
},
{
"epoch": 18.253968253968253,
"grad_norm": 0.00599477905780077,
"learning_rate": 2.058947368421053e-06,
"loss": 0.0001,
"step": 8050
},
{
"epoch": 18.310657596371883,
"grad_norm": 0.0045234388671815395,
"learning_rate": 2.0326315789473685e-06,
"loss": 0.0001,
"step": 8075
},
{
"epoch": 18.367346938775512,
"grad_norm": 0.002616587560623884,
"learning_rate": 2.0063157894736843e-06,
"loss": 0.0001,
"step": 8100
},
{
"epoch": 18.424036281179138,
"grad_norm": 0.0050145648419857025,
"learning_rate": 1.98e-06,
"loss": 0.0001,
"step": 8125
},
{
"epoch": 18.480725623582767,
"grad_norm": 0.0029045080300420523,
"learning_rate": 1.953684210526316e-06,
"loss": 0.0002,
"step": 8150
},
{
"epoch": 18.537414965986393,
"grad_norm": 0.0041219014674425125,
"learning_rate": 1.9273684210526317e-06,
"loss": 0.0009,
"step": 8175
},
{
"epoch": 18.594104308390023,
"grad_norm": 0.006918082479387522,
"learning_rate": 1.9010526315789476e-06,
"loss": 0.0001,
"step": 8200
},
{
"epoch": 18.650793650793652,
"grad_norm": 0.0045529440976679325,
"learning_rate": 1.8747368421052634e-06,
"loss": 0.0005,
"step": 8225
},
{
"epoch": 18.707482993197278,
"grad_norm": 0.004559030756354332,
"learning_rate": 1.848421052631579e-06,
"loss": 0.0001,
"step": 8250
},
{
"epoch": 18.764172335600907,
"grad_norm": 0.003188680624589324,
"learning_rate": 1.8221052631578948e-06,
"loss": 0.0001,
"step": 8275
},
{
"epoch": 18.820861678004537,
"grad_norm": 0.0051582190208137035,
"learning_rate": 1.7957894736842108e-06,
"loss": 0.0001,
"step": 8300
},
{
"epoch": 18.877551020408163,
"grad_norm": 0.003925441298633814,
"learning_rate": 1.7694736842105264e-06,
"loss": 0.0001,
"step": 8325
},
{
"epoch": 18.934240362811792,
"grad_norm": 0.005711190402507782,
"learning_rate": 1.7431578947368423e-06,
"loss": 0.0,
"step": 8350
},
{
"epoch": 18.990929705215418,
"grad_norm": 0.006396492477506399,
"learning_rate": 1.716842105263158e-06,
"loss": 0.0002,
"step": 8375
},
{
"epoch": 19.047619047619047,
"grad_norm": 0.0038605357985943556,
"learning_rate": 1.6905263157894739e-06,
"loss": 0.0001,
"step": 8400
},
{
"epoch": 19.104308390022677,
"grad_norm": 0.003054672619327903,
"learning_rate": 1.6642105263157895e-06,
"loss": 0.0,
"step": 8425
},
{
"epoch": 19.160997732426303,
"grad_norm": 0.0045293658040463924,
"learning_rate": 1.6378947368421053e-06,
"loss": 0.0,
"step": 8450
},
{
"epoch": 19.217687074829932,
"grad_norm": 0.005090142600238323,
"learning_rate": 1.6115789473684211e-06,
"loss": 0.0001,
"step": 8475
},
{
"epoch": 19.27437641723356,
"grad_norm": 0.003649334190413356,
"learning_rate": 1.585263157894737e-06,
"loss": 0.0,
"step": 8500
},
{
"epoch": 19.331065759637188,
"grad_norm": 0.014431001618504524,
"learning_rate": 1.5589473684210526e-06,
"loss": 0.0001,
"step": 8525
},
{
"epoch": 19.387755102040817,
"grad_norm": 0.005074130836874247,
"learning_rate": 1.5326315789473686e-06,
"loss": 0.0,
"step": 8550
},
{
"epoch": 19.444444444444443,
"grad_norm": 0.0036257512401789427,
"learning_rate": 1.5063157894736844e-06,
"loss": 0.0001,
"step": 8575
},
{
"epoch": 19.501133786848072,
"grad_norm": 0.0060266111977398396,
"learning_rate": 1.48e-06,
"loss": 0.0001,
"step": 8600
},
{
"epoch": 19.5578231292517,
"grad_norm": 0.004775646608322859,
"learning_rate": 1.453684210526316e-06,
"loss": 0.0,
"step": 8625
},
{
"epoch": 19.614512471655328,
"grad_norm": 0.006195446942001581,
"learning_rate": 1.4273684210526317e-06,
"loss": 0.0003,
"step": 8650
},
{
"epoch": 19.671201814058957,
"grad_norm": 0.0044461763463914394,
"learning_rate": 1.4010526315789475e-06,
"loss": 0.0001,
"step": 8675
},
{
"epoch": 19.727891156462587,
"grad_norm": 0.0022899750620126724,
"learning_rate": 1.374736842105263e-06,
"loss": 0.0,
"step": 8700
},
{
"epoch": 19.784580498866212,
"grad_norm": 0.005077675450593233,
"learning_rate": 1.3484210526315791e-06,
"loss": 0.0001,
"step": 8725
},
{
"epoch": 19.841269841269842,
"grad_norm": 0.005482817534357309,
"learning_rate": 1.322105263157895e-06,
"loss": 0.0,
"step": 8750
},
{
"epoch": 19.897959183673468,
"grad_norm": 0.00202305824495852,
"learning_rate": 1.2957894736842105e-06,
"loss": 0.0001,
"step": 8775
},
{
"epoch": 19.954648526077097,
"grad_norm": 0.005788388196378946,
"learning_rate": 1.2694736842105266e-06,
"loss": 0.0001,
"step": 8800
},
{
"epoch": 20.011337868480727,
"grad_norm": 0.004344166722148657,
"learning_rate": 1.2431578947368422e-06,
"loss": 0.0014,
"step": 8825
},
{
"epoch": 20.068027210884352,
"grad_norm": 0.004923074971884489,
"learning_rate": 1.216842105263158e-06,
"loss": 0.0,
"step": 8850
},
{
"epoch": 20.124716553287982,
"grad_norm": 0.0024910017382353544,
"learning_rate": 1.1905263157894738e-06,
"loss": 0.0,
"step": 8875
},
{
"epoch": 20.18140589569161,
"grad_norm": 0.0026297103613615036,
"learning_rate": 1.1642105263157896e-06,
"loss": 0.0,
"step": 8900
},
{
"epoch": 20.238095238095237,
"grad_norm": 0.005076109431684017,
"learning_rate": 1.1378947368421052e-06,
"loss": 0.0001,
"step": 8925
},
{
"epoch": 20.294784580498867,
"grad_norm": 0.003189537674188614,
"learning_rate": 1.1115789473684213e-06,
"loss": 0.0,
"step": 8950
},
{
"epoch": 20.351473922902493,
"grad_norm": 0.0030386645812541246,
"learning_rate": 1.0852631578947369e-06,
"loss": 0.0,
"step": 8975
},
{
"epoch": 20.408163265306122,
"grad_norm": 0.001944896299391985,
"learning_rate": 1.0589473684210527e-06,
"loss": 0.0001,
"step": 9000
},
{
"epoch": 20.408163265306122,
"eval_loss": 0.23750941455364227,
"eval_runtime": 160.3655,
"eval_samples_per_second": 2.918,
"eval_steps_per_second": 0.486,
"eval_wer": 6.396901709401709,
"step": 9000
},
{
"epoch": 20.46485260770975,
"grad_norm": 0.0040268674492836,
"learning_rate": 1.0326315789473685e-06,
"loss": 0.0,
"step": 9025
},
{
"epoch": 20.521541950113377,
"grad_norm": 2.8923232555389404,
"learning_rate": 1.0063157894736843e-06,
"loss": 0.0002,
"step": 9050
},
{
"epoch": 20.578231292517007,
"grad_norm": 0.002543982584029436,
"learning_rate": 9.800000000000001e-07,
"loss": 0.0,
"step": 9075
},
{
"epoch": 20.634920634920636,
"grad_norm": 0.002120724180713296,
"learning_rate": 9.536842105263158e-07,
"loss": 0.0002,
"step": 9100
},
{
"epoch": 20.691609977324262,
"grad_norm": 0.0036805281415581703,
"learning_rate": 9.273684210526317e-07,
"loss": 0.0,
"step": 9125
},
{
"epoch": 20.74829931972789,
"grad_norm": 0.002348339883610606,
"learning_rate": 9.010526315789474e-07,
"loss": 0.0001,
"step": 9150
},
{
"epoch": 20.80498866213152,
"grad_norm": 0.0036750957369804382,
"learning_rate": 8.747368421052632e-07,
"loss": 0.0,
"step": 9175
},
{
"epoch": 20.861678004535147,
"grad_norm": 0.004974485840648413,
"learning_rate": 8.48421052631579e-07,
"loss": 0.0,
"step": 9200
},
{
"epoch": 20.918367346938776,
"grad_norm": 0.004362870939075947,
"learning_rate": 8.221052631578947e-07,
"loss": 0.0001,
"step": 9225
},
{
"epoch": 20.975056689342402,
"grad_norm": 0.002504055853933096,
"learning_rate": 7.957894736842107e-07,
"loss": 0.0001,
"step": 9250
},
{
"epoch": 21.03174603174603,
"grad_norm": 0.004000427667051554,
"learning_rate": 7.694736842105263e-07,
"loss": 0.0001,
"step": 9275
},
{
"epoch": 21.08843537414966,
"grad_norm": 0.003886349266394973,
"learning_rate": 7.431578947368422e-07,
"loss": 0.0,
"step": 9300
},
{
"epoch": 21.145124716553287,
"grad_norm": 0.003481630701571703,
"learning_rate": 7.16842105263158e-07,
"loss": 0.0,
"step": 9325
},
{
"epoch": 21.201814058956916,
"grad_norm": 0.01767110824584961,
"learning_rate": 6.905263157894737e-07,
"loss": 0.0,
"step": 9350
},
{
"epoch": 21.258503401360546,
"grad_norm": 0.005323050078004599,
"learning_rate": 6.642105263157895e-07,
"loss": 0.0,
"step": 9375
},
{
"epoch": 21.31519274376417,
"grad_norm": 0.05196991562843323,
"learning_rate": 6.378947368421053e-07,
"loss": 0.0001,
"step": 9400
},
{
"epoch": 21.3718820861678,
"grad_norm": 0.003023393452167511,
"learning_rate": 6.115789473684211e-07,
"loss": 0.0,
"step": 9425
},
{
"epoch": 21.428571428571427,
"grad_norm": 0.0037847934290766716,
"learning_rate": 5.852631578947369e-07,
"loss": 0.0,
"step": 9450
},
{
"epoch": 21.485260770975056,
"grad_norm": 0.0039050974883139133,
"learning_rate": 5.589473684210526e-07,
"loss": 0.0001,
"step": 9475
},
{
"epoch": 21.541950113378686,
"grad_norm": 0.0036291517317295074,
"learning_rate": 5.326315789473684e-07,
"loss": 0.0001,
"step": 9500
},
{
"epoch": 21.598639455782312,
"grad_norm": 0.003725625341758132,
"learning_rate": 5.063157894736842e-07,
"loss": 0.0001,
"step": 9525
},
{
"epoch": 21.65532879818594,
"grad_norm": 0.0034233913756906986,
"learning_rate": 4.800000000000001e-07,
"loss": 0.0,
"step": 9550
},
{
"epoch": 21.71201814058957,
"grad_norm": 0.005571336485445499,
"learning_rate": 4.5368421052631583e-07,
"loss": 0.0,
"step": 9575
},
{
"epoch": 21.768707482993197,
"grad_norm": 0.0034070161636918783,
"learning_rate": 4.273684210526316e-07,
"loss": 0.0001,
"step": 9600
},
{
"epoch": 21.825396825396826,
"grad_norm": 0.0027184481732547283,
"learning_rate": 4.0105263157894736e-07,
"loss": 0.0,
"step": 9625
},
{
"epoch": 21.882086167800452,
"grad_norm": 0.001929171965457499,
"learning_rate": 3.7473684210526323e-07,
"loss": 0.0,
"step": 9650
},
{
"epoch": 21.93877551020408,
"grad_norm": 0.003997990861535072,
"learning_rate": 3.48421052631579e-07,
"loss": 0.0,
"step": 9675
},
{
"epoch": 21.99546485260771,
"grad_norm": 0.0038647083565592766,
"learning_rate": 3.2210526315789476e-07,
"loss": 0.0,
"step": 9700
},
{
"epoch": 22.052154195011337,
"grad_norm": 0.00401474442332983,
"learning_rate": 2.9578947368421053e-07,
"loss": 0.0,
"step": 9725
},
{
"epoch": 22.108843537414966,
"grad_norm": 0.0030088857747614384,
"learning_rate": 2.6947368421052635e-07,
"loss": 0.0001,
"step": 9750
},
{
"epoch": 22.165532879818596,
"grad_norm": 0.003003130666911602,
"learning_rate": 2.431578947368421e-07,
"loss": 0.0,
"step": 9775
},
{
"epoch": 22.22222222222222,
"grad_norm": 0.004516700282692909,
"learning_rate": 2.168421052631579e-07,
"loss": 0.0,
"step": 9800
},
{
"epoch": 22.27891156462585,
"grad_norm": 0.002650737063959241,
"learning_rate": 1.9052631578947372e-07,
"loss": 0.0,
"step": 9825
},
{
"epoch": 22.335600907029477,
"grad_norm": 0.00888384971767664,
"learning_rate": 1.642105263157895e-07,
"loss": 0.0,
"step": 9850
},
{
"epoch": 22.392290249433106,
"grad_norm": 0.0020597188267856836,
"learning_rate": 1.3789473684210528e-07,
"loss": 0.0,
"step": 9875
},
{
"epoch": 22.448979591836736,
"grad_norm": 0.004189135041087866,
"learning_rate": 1.1157894736842106e-07,
"loss": 0.0,
"step": 9900
},
{
"epoch": 22.50566893424036,
"grad_norm": 0.003128908108919859,
"learning_rate": 8.526315789473685e-08,
"loss": 0.0,
"step": 9925
},
{
"epoch": 22.56235827664399,
"grad_norm": 0.0031592377927154303,
"learning_rate": 5.8947368421052637e-08,
"loss": 0.0,
"step": 9950
},
{
"epoch": 22.61904761904762,
"grad_norm": 0.004137367941439152,
"learning_rate": 3.263157894736842e-08,
"loss": 0.0,
"step": 9975
},
{
"epoch": 22.675736961451246,
"grad_norm": 0.0022842560429126024,
"learning_rate": 6.315789473684211e-09,
"loss": 0.0,
"step": 10000
},
{
"epoch": 22.675736961451246,
"eval_loss": 0.23789168894290924,
"eval_runtime": 182.0655,
"eval_samples_per_second": 2.571,
"eval_steps_per_second": 0.428,
"eval_wer": 6.490384615384616,
"step": 10000
},
{
"epoch": 22.675736961451246,
"step": 10000,
"total_flos": 1.73151240192e+19,
"train_loss": 0.06552563527043676,
"train_runtime": 7527.9049,
"train_samples_per_second": 7.97,
"train_steps_per_second": 1.328
}
],
"logging_steps": 25,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 23,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.73151240192e+19,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}