aradia-ctc-distilhubert-ft / trainer_state.json
Abdulwahab Sahyoun
End of training
87a7862
raw
history blame
18.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 29.997830802603037,
"global_step": 6900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.43,
"eval_loss": 4.412876605987549,
"eval_runtime": 167.4094,
"eval_samples_per_second": 29.264,
"eval_steps_per_second": 0.92,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 0.87,
"eval_loss": 3.592686653137207,
"eval_runtime": 162.2391,
"eval_samples_per_second": 30.196,
"eval_steps_per_second": 0.949,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 1.3,
"eval_loss": 3.3779923915863037,
"eval_runtime": 162.9178,
"eval_samples_per_second": 30.07,
"eval_steps_per_second": 0.945,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 1.74,
"eval_loss": 3.0830302238464355,
"eval_runtime": 165.7412,
"eval_samples_per_second": 29.558,
"eval_steps_per_second": 0.929,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 2.17,
"learning_rate": 0.00029759999999999997,
"loss": 5.3551,
"step": 500
},
{
"epoch": 2.17,
"eval_loss": 2.6277503967285156,
"eval_runtime": 162.7347,
"eval_samples_per_second": 30.104,
"eval_steps_per_second": 0.946,
"eval_wer": 0.999864842947505,
"step": 500
},
{
"epoch": 2.61,
"eval_loss": 1.8358852863311768,
"eval_runtime": 165.3543,
"eval_samples_per_second": 29.627,
"eval_steps_per_second": 0.931,
"eval_wer": 0.999972968589501,
"step": 600
},
{
"epoch": 3.04,
"eval_loss": 1.7877882719039917,
"eval_runtime": 161.7494,
"eval_samples_per_second": 30.288,
"eval_steps_per_second": 0.952,
"eval_wer": 0.9914040114613181,
"step": 700
},
{
"epoch": 3.48,
"eval_loss": 1.5219440460205078,
"eval_runtime": 163.3701,
"eval_samples_per_second": 29.987,
"eval_steps_per_second": 0.943,
"eval_wer": 0.9875114883494621,
"step": 800
},
{
"epoch": 3.91,
"eval_loss": 1.4348260164260864,
"eval_runtime": 163.9354,
"eval_samples_per_second": 29.884,
"eval_steps_per_second": 0.939,
"eval_wer": 0.9879439909174461,
"step": 900
},
{
"epoch": 4.35,
"learning_rate": 0.00027675,
"loss": 1.7199,
"step": 1000
},
{
"epoch": 4.35,
"eval_loss": 1.4353642463684082,
"eval_runtime": 167.0923,
"eval_samples_per_second": 29.319,
"eval_steps_per_second": 0.922,
"eval_wer": 0.9644266637833162,
"step": 1000
},
{
"epoch": 4.78,
"eval_loss": 1.5210459232330322,
"eval_runtime": 174.6297,
"eval_samples_per_second": 28.054,
"eval_steps_per_second": 0.882,
"eval_wer": 0.9518840893117803,
"step": 1100
},
{
"epoch": 5.22,
"eval_loss": 1.3606946468353271,
"eval_runtime": 164.1085,
"eval_samples_per_second": 29.852,
"eval_steps_per_second": 0.938,
"eval_wer": 0.9474779694004433,
"step": 1200
},
{
"epoch": 5.65,
"eval_loss": 1.383901834487915,
"eval_runtime": 163.0064,
"eval_samples_per_second": 30.054,
"eval_steps_per_second": 0.945,
"eval_wer": 0.9343136724874304,
"step": 1300
},
{
"epoch": 6.09,
"eval_loss": 1.2805912494659424,
"eval_runtime": 161.8354,
"eval_samples_per_second": 30.271,
"eval_steps_per_second": 0.952,
"eval_wer": 0.8944423420014056,
"step": 1400
},
{
"epoch": 6.52,
"learning_rate": 0.00025331249999999996,
"loss": 1.2342,
"step": 1500
},
{
"epoch": 6.52,
"eval_loss": 1.3035786151885986,
"eval_runtime": 161.6796,
"eval_samples_per_second": 30.301,
"eval_steps_per_second": 0.953,
"eval_wer": 0.9011191003946586,
"step": 1500
},
{
"epoch": 6.95,
"eval_loss": 1.3704484701156616,
"eval_runtime": 162.7459,
"eval_samples_per_second": 30.102,
"eval_steps_per_second": 0.946,
"eval_wer": 0.9072011677569336,
"step": 1600
},
{
"epoch": 7.39,
"eval_loss": 1.298081636428833,
"eval_runtime": 160.2526,
"eval_samples_per_second": 30.57,
"eval_steps_per_second": 0.961,
"eval_wer": 0.8890630913121047,
"step": 1700
},
{
"epoch": 7.82,
"eval_loss": 1.278607964515686,
"eval_runtime": 160.6135,
"eval_samples_per_second": 30.502,
"eval_steps_per_second": 0.959,
"eval_wer": 0.8733308104016868,
"step": 1800
},
{
"epoch": 8.26,
"eval_loss": 1.2897309064865112,
"eval_runtime": 160.1151,
"eval_samples_per_second": 30.597,
"eval_steps_per_second": 0.962,
"eval_wer": 0.8866843271881927,
"step": 1900
},
{
"epoch": 8.69,
"learning_rate": 0.00022987499999999996,
"loss": 0.9831,
"step": 2000
},
{
"epoch": 8.69,
"eval_loss": 1.4436370134353638,
"eval_runtime": 161.3545,
"eval_samples_per_second": 30.362,
"eval_steps_per_second": 0.954,
"eval_wer": 0.8779531815970157,
"step": 2000
},
{
"epoch": 9.13,
"eval_loss": 1.3680068254470825,
"eval_runtime": 162.428,
"eval_samples_per_second": 30.161,
"eval_steps_per_second": 0.948,
"eval_wer": 0.8872519868086717,
"step": 2100
},
{
"epoch": 9.56,
"eval_loss": 1.347055435180664,
"eval_runtime": 168.1105,
"eval_samples_per_second": 29.142,
"eval_steps_per_second": 0.916,
"eval_wer": 0.8692490674163378,
"step": 2200
},
{
"epoch": 10.0,
"eval_loss": 1.3724839687347412,
"eval_runtime": 161.7991,
"eval_samples_per_second": 30.278,
"eval_steps_per_second": 0.952,
"eval_wer": 0.8729253392442018,
"step": 2300
},
{
"epoch": 10.43,
"eval_loss": 1.4439136981964111,
"eval_runtime": 164.3202,
"eval_samples_per_second": 29.814,
"eval_steps_per_second": 0.937,
"eval_wer": 0.8770881764610478,
"step": 2400
},
{
"epoch": 10.87,
"learning_rate": 0.00020643749999999997,
"loss": 0.8071,
"step": 2500
},
{
"epoch": 10.87,
"eval_loss": 1.5114314556121826,
"eval_runtime": 160.915,
"eval_samples_per_second": 30.445,
"eval_steps_per_second": 0.957,
"eval_wer": 0.8928474887819646,
"step": 2500
},
{
"epoch": 11.3,
"eval_loss": 1.6155508756637573,
"eval_runtime": 162.3974,
"eval_samples_per_second": 30.167,
"eval_steps_per_second": 0.948,
"eval_wer": 0.8958479753473536,
"step": 2600
},
{
"epoch": 11.74,
"eval_loss": 1.4381186962127686,
"eval_runtime": 160.766,
"eval_samples_per_second": 30.473,
"eval_steps_per_second": 0.958,
"eval_wer": 0.8748716008001297,
"step": 2700
},
{
"epoch": 12.17,
"eval_loss": 1.508760929107666,
"eval_runtime": 161.5608,
"eval_samples_per_second": 30.323,
"eval_steps_per_second": 0.953,
"eval_wer": 0.8716818943612478,
"step": 2800
},
{
"epoch": 12.61,
"eval_loss": 1.5485790967941284,
"eval_runtime": 163.3707,
"eval_samples_per_second": 29.987,
"eval_steps_per_second": 0.943,
"eval_wer": 0.8812510136778937,
"step": 2900
},
{
"epoch": 13.04,
"learning_rate": 0.00018299999999999998,
"loss": 0.6321,
"step": 3000
},
{
"epoch": 13.04,
"eval_loss": 1.4535671472549438,
"eval_runtime": 162.8299,
"eval_samples_per_second": 30.087,
"eval_steps_per_second": 0.946,
"eval_wer": 0.8884143374601287,
"step": 3000
},
{
"epoch": 13.48,
"eval_loss": 1.4679176807403564,
"eval_runtime": 163.238,
"eval_samples_per_second": 30.011,
"eval_steps_per_second": 0.943,
"eval_wer": 0.8947126561063956,
"step": 3100
},
{
"epoch": 13.91,
"eval_loss": 1.5627696514129639,
"eval_runtime": 163.36,
"eval_samples_per_second": 29.989,
"eval_steps_per_second": 0.943,
"eval_wer": 0.9117424447207655,
"step": 3200
},
{
"epoch": 14.35,
"eval_loss": 1.58307683467865,
"eval_runtime": 166.5717,
"eval_samples_per_second": 29.411,
"eval_steps_per_second": 0.925,
"eval_wer": 0.8716008001297507,
"step": 3300
},
{
"epoch": 14.78,
"eval_loss": 1.6733046770095825,
"eval_runtime": 162.8481,
"eval_samples_per_second": 30.083,
"eval_steps_per_second": 0.946,
"eval_wer": 0.8701951667838028,
"step": 3400
},
{
"epoch": 15.22,
"learning_rate": 0.00015956249999999998,
"loss": 0.4998,
"step": 3500
},
{
"epoch": 15.22,
"eval_loss": 1.82253897190094,
"eval_runtime": 166.0482,
"eval_samples_per_second": 29.503,
"eval_steps_per_second": 0.927,
"eval_wer": 0.8664648321349409,
"step": 3500
},
{
"epoch": 15.65,
"eval_loss": 1.8557839393615723,
"eval_runtime": 164.7566,
"eval_samples_per_second": 29.735,
"eval_steps_per_second": 0.935,
"eval_wer": 0.8732226847596908,
"step": 3600
},
{
"epoch": 16.09,
"eval_loss": 1.7512831687927246,
"eval_runtime": 164.2229,
"eval_samples_per_second": 29.831,
"eval_steps_per_second": 0.938,
"eval_wer": 0.8765745796615667,
"step": 3700
},
{
"epoch": 16.52,
"eval_loss": 1.8561654090881348,
"eval_runtime": 163.759,
"eval_samples_per_second": 29.916,
"eval_steps_per_second": 0.94,
"eval_wer": 0.8753041033681137,
"step": 3800
},
{
"epoch": 16.95,
"eval_loss": 1.9017548561096191,
"eval_runtime": 163.9631,
"eval_samples_per_second": 29.879,
"eval_steps_per_second": 0.939,
"eval_wer": 0.8703573552467968,
"step": 3900
},
{
"epoch": 17.39,
"learning_rate": 0.000136125,
"loss": 0.4421,
"step": 4000
},
{
"epoch": 17.39,
"eval_loss": 1.9341310262680054,
"eval_runtime": 162.0015,
"eval_samples_per_second": 30.24,
"eval_steps_per_second": 0.951,
"eval_wer": 0.8788992809644808,
"step": 4000
},
{
"epoch": 17.82,
"eval_loss": 1.9581764936447144,
"eval_runtime": 163.9276,
"eval_samples_per_second": 29.885,
"eval_steps_per_second": 0.939,
"eval_wer": 0.8781424014705087,
"step": 4100
},
{
"epoch": 18.26,
"eval_loss": 1.8863332271575928,
"eval_runtime": 162.6969,
"eval_samples_per_second": 30.111,
"eval_steps_per_second": 0.947,
"eval_wer": 0.8820889874033627,
"step": 4200
},
{
"epoch": 18.69,
"eval_loss": 1.9366161823272705,
"eval_runtime": 163.4934,
"eval_samples_per_second": 29.965,
"eval_steps_per_second": 0.942,
"eval_wer": 0.8846569714007677,
"step": 4300
},
{
"epoch": 19.13,
"eval_loss": 2.1901650428771973,
"eval_runtime": 166.8301,
"eval_samples_per_second": 29.365,
"eval_steps_per_second": 0.923,
"eval_wer": 0.8721414283397307,
"step": 4400
},
{
"epoch": 19.56,
"learning_rate": 0.00011268749999999998,
"loss": 0.3712,
"step": 4500
},
{
"epoch": 19.56,
"eval_loss": 2.164060592651367,
"eval_runtime": 162.9234,
"eval_samples_per_second": 30.069,
"eval_steps_per_second": 0.945,
"eval_wer": 0.8670054603449208,
"step": 4500
},
{
"epoch": 20.0,
"eval_loss": 2.163888931274414,
"eval_runtime": 164.0497,
"eval_samples_per_second": 29.863,
"eval_steps_per_second": 0.939,
"eval_wer": 0.8776017732605287,
"step": 4600
},
{
"epoch": 20.43,
"eval_loss": 2.269531726837158,
"eval_runtime": 161.5615,
"eval_samples_per_second": 30.323,
"eval_steps_per_second": 0.953,
"eval_wer": 0.9029842677190896,
"step": 4700
},
{
"epoch": 20.87,
"eval_loss": 2.1909375190734863,
"eval_runtime": 159.4363,
"eval_samples_per_second": 30.727,
"eval_steps_per_second": 0.966,
"eval_wer": 0.8936584310969347,
"step": 4800
},
{
"epoch": 21.3,
"eval_loss": 2.160627603530884,
"eval_runtime": 160.8614,
"eval_samples_per_second": 30.455,
"eval_steps_per_second": 0.957,
"eval_wer": 0.8959290695788507,
"step": 4900
},
{
"epoch": 21.74,
"learning_rate": 8.924999999999999e-05,
"loss": 0.3067,
"step": 5000
},
{
"epoch": 21.74,
"eval_loss": 2.1755802631378174,
"eval_runtime": 160.8247,
"eval_samples_per_second": 30.462,
"eval_steps_per_second": 0.958,
"eval_wer": 0.8943342163594097,
"step": 5000
},
{
"epoch": 22.17,
"eval_loss": 2.409205913543701,
"eval_runtime": 160.8564,
"eval_samples_per_second": 30.456,
"eval_steps_per_second": 0.957,
"eval_wer": 0.8772503649240417,
"step": 5100
},
{
"epoch": 22.61,
"eval_loss": 2.499131202697754,
"eval_runtime": 163.2327,
"eval_samples_per_second": 30.012,
"eval_steps_per_second": 0.943,
"eval_wer": 0.8721143969292318,
"step": 5200
},
{
"epoch": 23.04,
"eval_loss": 2.3339521884918213,
"eval_runtime": 160.7999,
"eval_samples_per_second": 30.466,
"eval_steps_per_second": 0.958,
"eval_wer": 0.8910363842785316,
"step": 5300
},
{
"epoch": 23.48,
"eval_loss": 2.3566715717315674,
"eval_runtime": 161.7409,
"eval_samples_per_second": 30.289,
"eval_steps_per_second": 0.952,
"eval_wer": 0.8946045304643996,
"step": 5400
},
{
"epoch": 23.91,
"learning_rate": 6.58125e-05,
"loss": 0.2764,
"step": 5500
},
{
"epoch": 23.91,
"eval_loss": 2.3214945793151855,
"eval_runtime": 165.7293,
"eval_samples_per_second": 29.56,
"eval_steps_per_second": 0.929,
"eval_wer": 0.8897118451640806,
"step": 5500
},
{
"epoch": 24.35,
"eval_loss": 2.482433795928955,
"eval_runtime": 158.7566,
"eval_samples_per_second": 30.859,
"eval_steps_per_second": 0.97,
"eval_wer": 0.9002270638481916,
"step": 5600
},
{
"epoch": 24.78,
"eval_loss": 2.4584639072418213,
"eval_runtime": 160.826,
"eval_samples_per_second": 30.461,
"eval_steps_per_second": 0.958,
"eval_wer": 0.8962534465048386,
"step": 5700
},
{
"epoch": 25.22,
"eval_loss": 2.580402135848999,
"eval_runtime": 161.3761,
"eval_samples_per_second": 30.358,
"eval_steps_per_second": 0.954,
"eval_wer": 0.8878737092501486,
"step": 5800
},
{
"epoch": 25.65,
"eval_loss": 2.5814249515533447,
"eval_runtime": 161.7763,
"eval_samples_per_second": 30.283,
"eval_steps_per_second": 0.952,
"eval_wer": 0.8902795047845596,
"step": 5900
},
{
"epoch": 26.09,
"learning_rate": 4.237499999999999e-05,
"loss": 0.2593,
"step": 6000
},
{
"epoch": 26.09,
"eval_loss": 2.5374372005462646,
"eval_runtime": 161.3184,
"eval_samples_per_second": 30.369,
"eval_steps_per_second": 0.955,
"eval_wer": 0.8867654214196897,
"step": 6000
},
{
"epoch": 26.52,
"eval_loss": 2.5346157550811768,
"eval_runtime": 161.3913,
"eval_samples_per_second": 30.355,
"eval_steps_per_second": 0.954,
"eval_wer": 0.8922257663404877,
"step": 6100
},
{
"epoch": 26.95,
"eval_loss": 2.546508312225342,
"eval_runtime": 161.2827,
"eval_samples_per_second": 30.375,
"eval_steps_per_second": 0.955,
"eval_wer": 0.8873060496296696,
"step": 6200
},
{
"epoch": 27.39,
"eval_loss": 2.6001508235931396,
"eval_runtime": 161.5241,
"eval_samples_per_second": 30.33,
"eval_steps_per_second": 0.953,
"eval_wer": 0.8919013894144997,
"step": 6300
},
{
"epoch": 27.82,
"eval_loss": 2.610161304473877,
"eval_runtime": 159.1491,
"eval_samples_per_second": 30.782,
"eval_steps_per_second": 0.968,
"eval_wer": 0.8927663945504677,
"step": 6400
},
{
"epoch": 28.26,
"learning_rate": 1.89375e-05,
"loss": 0.227,
"step": 6500
},
{
"epoch": 28.26,
"eval_loss": 2.692467451095581,
"eval_runtime": 161.7243,
"eval_samples_per_second": 30.292,
"eval_steps_per_second": 0.952,
"eval_wer": 0.8914418554360166,
"step": 6500
},
{
"epoch": 28.69,
"eval_loss": 2.6981399059295654,
"eval_runtime": 164.1482,
"eval_samples_per_second": 29.845,
"eval_steps_per_second": 0.938,
"eval_wer": 0.8913337297940207,
"step": 6600
},
{
"epoch": 29.13,
"eval_loss": 2.687201499938965,
"eval_runtime": 161.4856,
"eval_samples_per_second": 30.337,
"eval_steps_per_second": 0.954,
"eval_wer": 0.8890630913121047,
"step": 6700
},
{
"epoch": 29.56,
"eval_loss": 2.7014529705047607,
"eval_runtime": 161.2779,
"eval_samples_per_second": 30.376,
"eval_steps_per_second": 0.955,
"eval_wer": 0.8896577823430827,
"step": 6800
},
{
"epoch": 30.0,
"eval_loss": 2.711408853530884,
"eval_runtime": 161.2987,
"eval_samples_per_second": 30.372,
"eval_steps_per_second": 0.955,
"eval_wer": 0.8907931015840407,
"step": 6900
},
{
"epoch": 30.0,
"step": 6900,
"total_flos": 4.4600972514253696e+18,
"train_loss": 0.9629449507118999,
"train_runtime": 26035.0137,
"train_samples_per_second": 16.973,
"train_steps_per_second": 0.265
}
],
"max_steps": 6900,
"num_train_epochs": 30,
"total_flos": 4.4600972514253696e+18,
"trial_name": null,
"trial_params": null
}