enko_xlsr_13p_run1 / trainer_state.json
yesj1234's picture
Upload folder using huggingface_hub
99d6d7b
raw
history blame contribute delete
No virus
21.9 kB
{
"best_metric": 0.3040466010570526,
"best_model_checkpoint": "./enko_xlsr_13p_run1/checkpoint-77020",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 77020,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 0.0003,
"loss": 4.7797,
"step": 500
},
{
"epoch": 0.13,
"learning_rate": 0.00029803972817564033,
"loss": 1.2377,
"step": 1000
},
{
"epoch": 0.19,
"learning_rate": 0.0002960794563512807,
"loss": 0.9452,
"step": 1500
},
{
"epoch": 0.26,
"learning_rate": 0.000294119184526921,
"loss": 0.8622,
"step": 2000
},
{
"epoch": 0.32,
"learning_rate": 0.0002921589127025614,
"loss": 0.82,
"step": 2500
},
{
"epoch": 0.39,
"learning_rate": 0.0002901986408782018,
"loss": 0.7742,
"step": 3000
},
{
"epoch": 0.45,
"learning_rate": 0.00028823836905384213,
"loss": 0.75,
"step": 3500
},
{
"epoch": 0.52,
"learning_rate": 0.0002862780972294825,
"loss": 0.7459,
"step": 4000
},
{
"epoch": 0.58,
"learning_rate": 0.00028431782540512285,
"loss": 0.7282,
"step": 4500
},
{
"epoch": 0.65,
"learning_rate": 0.00028235755358076316,
"loss": 0.7104,
"step": 5000
},
{
"epoch": 0.71,
"learning_rate": 0.0002803972817564035,
"loss": 0.692,
"step": 5500
},
{
"epoch": 0.78,
"learning_rate": 0.0002784370099320439,
"loss": 0.6954,
"step": 6000
},
{
"epoch": 0.84,
"learning_rate": 0.00027647673810768424,
"loss": 0.6844,
"step": 6500
},
{
"epoch": 0.91,
"learning_rate": 0.0002745164662833246,
"loss": 0.6779,
"step": 7000
},
{
"epoch": 0.97,
"learning_rate": 0.00027255619445896496,
"loss": 0.6595,
"step": 7500
},
{
"epoch": 1.0,
"eval_loss": 0.44951537251472473,
"eval_runtime": 11.7967,
"eval_samples_per_second": 60.271,
"eval_steps_per_second": 7.544,
"eval_wer": 0.297432521395655,
"step": 7702
},
{
"epoch": 1.04,
"learning_rate": 0.0002705959226346053,
"loss": 0.6426,
"step": 8000
},
{
"epoch": 1.1,
"learning_rate": 0.0002686356508102457,
"loss": 0.6215,
"step": 8500
},
{
"epoch": 1.17,
"learning_rate": 0.00026667537898588604,
"loss": 0.6209,
"step": 9000
},
{
"epoch": 1.23,
"learning_rate": 0.0002647151071615264,
"loss": 0.6134,
"step": 9500
},
{
"epoch": 1.3,
"learning_rate": 0.00026275483533716676,
"loss": 0.6127,
"step": 10000
},
{
"epoch": 1.36,
"learning_rate": 0.00026079456351280707,
"loss": 0.5987,
"step": 10500
},
{
"epoch": 1.43,
"learning_rate": 0.0002588342916884474,
"loss": 0.6028,
"step": 11000
},
{
"epoch": 1.49,
"learning_rate": 0.0002568740198640878,
"loss": 0.5893,
"step": 11500
},
{
"epoch": 1.56,
"learning_rate": 0.00025491374803972815,
"loss": 0.59,
"step": 12000
},
{
"epoch": 1.62,
"learning_rate": 0.0002529534762153685,
"loss": 0.594,
"step": 12500
},
{
"epoch": 1.69,
"learning_rate": 0.00025099320439100887,
"loss": 0.5903,
"step": 13000
},
{
"epoch": 1.75,
"learning_rate": 0.0002490329325666492,
"loss": 0.5779,
"step": 13500
},
{
"epoch": 1.82,
"learning_rate": 0.0002470726607422896,
"loss": 0.582,
"step": 14000
},
{
"epoch": 1.88,
"learning_rate": 0.00024511238891792995,
"loss": 0.5781,
"step": 14500
},
{
"epoch": 1.95,
"learning_rate": 0.0002431521170935703,
"loss": 0.5717,
"step": 15000
},
{
"epoch": 2.0,
"eval_loss": 0.39816993474960327,
"eval_runtime": 11.9909,
"eval_samples_per_second": 59.295,
"eval_steps_per_second": 7.422,
"eval_wer": 0.25622119815668204,
"step": 15404
},
{
"epoch": 2.01,
"learning_rate": 0.00024119184526921064,
"loss": 0.5632,
"step": 15500
},
{
"epoch": 2.08,
"learning_rate": 0.000239231573444851,
"loss": 0.5298,
"step": 16000
},
{
"epoch": 2.14,
"learning_rate": 0.00023727130162049136,
"loss": 0.5391,
"step": 16500
},
{
"epoch": 2.21,
"learning_rate": 0.00023531102979613172,
"loss": 0.5274,
"step": 17000
},
{
"epoch": 2.27,
"learning_rate": 0.00023335075797177208,
"loss": 0.5368,
"step": 17500
},
{
"epoch": 2.34,
"learning_rate": 0.00023139048614741244,
"loss": 0.5191,
"step": 18000
},
{
"epoch": 2.4,
"learning_rate": 0.00022943021432305277,
"loss": 0.523,
"step": 18500
},
{
"epoch": 2.47,
"learning_rate": 0.00022746994249869313,
"loss": 0.529,
"step": 19000
},
{
"epoch": 2.53,
"learning_rate": 0.0002255096706743335,
"loss": 0.5293,
"step": 19500
},
{
"epoch": 2.6,
"learning_rate": 0.00022354939884997385,
"loss": 0.5091,
"step": 20000
},
{
"epoch": 2.66,
"learning_rate": 0.0002215891270256142,
"loss": 0.5258,
"step": 20500
},
{
"epoch": 2.73,
"learning_rate": 0.00021962885520125454,
"loss": 0.5107,
"step": 21000
},
{
"epoch": 2.79,
"learning_rate": 0.0002176685833768949,
"loss": 0.5197,
"step": 21500
},
{
"epoch": 2.86,
"learning_rate": 0.00021570831155253526,
"loss": 0.5162,
"step": 22000
},
{
"epoch": 2.92,
"learning_rate": 0.00021374803972817562,
"loss": 0.5162,
"step": 22500
},
{
"epoch": 2.99,
"learning_rate": 0.00021178776790381598,
"loss": 0.5134,
"step": 23000
},
{
"epoch": 3.0,
"eval_loss": 0.3769395649433136,
"eval_runtime": 12.4234,
"eval_samples_per_second": 57.231,
"eval_steps_per_second": 7.164,
"eval_wer": 0.23647136273864383,
"step": 23106
},
{
"epoch": 3.05,
"learning_rate": 0.00020982749607945634,
"loss": 0.4797,
"step": 23500
},
{
"epoch": 3.12,
"learning_rate": 0.00020786722425509668,
"loss": 0.4777,
"step": 24000
},
{
"epoch": 3.18,
"learning_rate": 0.00020590695243073704,
"loss": 0.4729,
"step": 24500
},
{
"epoch": 3.25,
"learning_rate": 0.0002039466806063774,
"loss": 0.4749,
"step": 25000
},
{
"epoch": 3.31,
"learning_rate": 0.00020198640878201776,
"loss": 0.4803,
"step": 25500
},
{
"epoch": 3.38,
"learning_rate": 0.00020002613695765812,
"loss": 0.4738,
"step": 26000
},
{
"epoch": 3.44,
"learning_rate": 0.00019806586513329845,
"loss": 0.4782,
"step": 26500
},
{
"epoch": 3.51,
"learning_rate": 0.0001961055933089388,
"loss": 0.4994,
"step": 27000
},
{
"epoch": 3.57,
"learning_rate": 0.00019414532148457917,
"loss": 0.4676,
"step": 27500
},
{
"epoch": 3.64,
"learning_rate": 0.00019218504966021953,
"loss": 0.457,
"step": 28000
},
{
"epoch": 3.7,
"learning_rate": 0.0001902247778358599,
"loss": 0.469,
"step": 28500
},
{
"epoch": 3.77,
"learning_rate": 0.00018826450601150025,
"loss": 0.4573,
"step": 29000
},
{
"epoch": 3.83,
"learning_rate": 0.00018630423418714058,
"loss": 0.4672,
"step": 29500
},
{
"epoch": 3.9,
"learning_rate": 0.00018434396236278094,
"loss": 0.4736,
"step": 30000
},
{
"epoch": 3.96,
"learning_rate": 0.0001823836905384213,
"loss": 0.467,
"step": 30500
},
{
"epoch": 4.0,
"eval_loss": 0.34988921880722046,
"eval_runtime": 12.3506,
"eval_samples_per_second": 57.568,
"eval_steps_per_second": 7.206,
"eval_wer": 0.22027649769585253,
"step": 30808
},
{
"epoch": 4.02,
"learning_rate": 0.00018042341871406166,
"loss": 0.4541,
"step": 31000
},
{
"epoch": 4.09,
"learning_rate": 0.00017846314688970202,
"loss": 0.4273,
"step": 31500
},
{
"epoch": 4.15,
"learning_rate": 0.00017650287506534236,
"loss": 0.4334,
"step": 32000
},
{
"epoch": 4.22,
"learning_rate": 0.00017454260324098272,
"loss": 0.4203,
"step": 32500
},
{
"epoch": 4.28,
"learning_rate": 0.00017258233141662308,
"loss": 0.4331,
"step": 33000
},
{
"epoch": 4.35,
"learning_rate": 0.00017062205959226344,
"loss": 0.4306,
"step": 33500
},
{
"epoch": 4.41,
"learning_rate": 0.00016866178776790382,
"loss": 0.4251,
"step": 34000
},
{
"epoch": 4.48,
"learning_rate": 0.00016670151594354418,
"loss": 0.4231,
"step": 34500
},
{
"epoch": 4.54,
"learning_rate": 0.0001647412441191845,
"loss": 0.4256,
"step": 35000
},
{
"epoch": 4.61,
"learning_rate": 0.00016278097229482488,
"loss": 0.4332,
"step": 35500
},
{
"epoch": 4.67,
"learning_rate": 0.00016082070047046524,
"loss": 0.4238,
"step": 36000
},
{
"epoch": 4.74,
"learning_rate": 0.0001588604286461056,
"loss": 0.4251,
"step": 36500
},
{
"epoch": 4.8,
"learning_rate": 0.00015690015682174596,
"loss": 0.425,
"step": 37000
},
{
"epoch": 4.87,
"learning_rate": 0.0001549398849973863,
"loss": 0.4208,
"step": 37500
},
{
"epoch": 4.93,
"learning_rate": 0.00015297961317302665,
"loss": 0.4245,
"step": 38000
},
{
"epoch": 5.0,
"learning_rate": 0.000151019341348667,
"loss": 0.4156,
"step": 38500
},
{
"epoch": 5.0,
"eval_loss": 0.3391418159008026,
"eval_runtime": 12.5768,
"eval_samples_per_second": 56.533,
"eval_steps_per_second": 7.077,
"eval_wer": 0.21158657011191573,
"step": 38510
},
{
"epoch": 5.06,
"learning_rate": 0.00014905906952430737,
"loss": 0.3845,
"step": 39000
},
{
"epoch": 5.13,
"learning_rate": 0.0001470987976999477,
"loss": 0.393,
"step": 39500
},
{
"epoch": 5.19,
"learning_rate": 0.00014513852587558806,
"loss": 0.3938,
"step": 40000
},
{
"epoch": 5.26,
"learning_rate": 0.00014317825405122842,
"loss": 0.3902,
"step": 40500
},
{
"epoch": 5.32,
"learning_rate": 0.00014121798222686878,
"loss": 0.3817,
"step": 41000
},
{
"epoch": 5.39,
"learning_rate": 0.00013925771040250914,
"loss": 0.3937,
"step": 41500
},
{
"epoch": 5.45,
"learning_rate": 0.0001372974385781495,
"loss": 0.3821,
"step": 42000
},
{
"epoch": 5.52,
"learning_rate": 0.00013533716675378984,
"loss": 0.3903,
"step": 42500
},
{
"epoch": 5.58,
"learning_rate": 0.0001333768949294302,
"loss": 0.381,
"step": 43000
},
{
"epoch": 5.65,
"learning_rate": 0.00013141662310507056,
"loss": 0.3866,
"step": 43500
},
{
"epoch": 5.71,
"learning_rate": 0.00012945635128071092,
"loss": 0.3816,
"step": 44000
},
{
"epoch": 5.78,
"learning_rate": 0.00012749607945635128,
"loss": 0.3845,
"step": 44500
},
{
"epoch": 5.84,
"learning_rate": 0.0001255358076319916,
"loss": 0.3777,
"step": 45000
},
{
"epoch": 5.91,
"learning_rate": 0.00012357553580763197,
"loss": 0.3777,
"step": 45500
},
{
"epoch": 5.97,
"learning_rate": 0.00012161526398327234,
"loss": 0.379,
"step": 46000
},
{
"epoch": 6.0,
"eval_loss": 0.3327275514602661,
"eval_runtime": 12.31,
"eval_samples_per_second": 57.758,
"eval_steps_per_second": 7.23,
"eval_wer": 0.1998683344305464,
"step": 46212
},
{
"epoch": 6.04,
"learning_rate": 0.00011965499215891269,
"loss": 0.3657,
"step": 46500
},
{
"epoch": 6.1,
"learning_rate": 0.00011769472033455305,
"loss": 0.3527,
"step": 47000
},
{
"epoch": 6.17,
"learning_rate": 0.00011573444851019341,
"loss": 0.3505,
"step": 47500
},
{
"epoch": 6.23,
"learning_rate": 0.00011377417668583375,
"loss": 0.3472,
"step": 48000
},
{
"epoch": 6.3,
"learning_rate": 0.00011181390486147411,
"loss": 0.3497,
"step": 48500
},
{
"epoch": 6.36,
"learning_rate": 0.00010985363303711446,
"loss": 0.349,
"step": 49000
},
{
"epoch": 6.43,
"learning_rate": 0.00010789336121275482,
"loss": 0.3545,
"step": 49500
},
{
"epoch": 6.49,
"learning_rate": 0.00010593308938839518,
"loss": 0.3453,
"step": 50000
},
{
"epoch": 6.56,
"learning_rate": 0.00010397281756403553,
"loss": 0.3501,
"step": 50500
},
{
"epoch": 6.62,
"learning_rate": 0.00010201254573967589,
"loss": 0.3473,
"step": 51000
},
{
"epoch": 6.69,
"learning_rate": 0.00010005227391531626,
"loss": 0.3462,
"step": 51500
},
{
"epoch": 6.75,
"learning_rate": 9.80920020909566e-05,
"loss": 0.3385,
"step": 52000
},
{
"epoch": 6.82,
"learning_rate": 9.613173026659697e-05,
"loss": 0.3459,
"step": 52500
},
{
"epoch": 6.88,
"learning_rate": 9.417145844223731e-05,
"loss": 0.3441,
"step": 53000
},
{
"epoch": 6.95,
"learning_rate": 9.221118661787767e-05,
"loss": 0.3475,
"step": 53500
},
{
"epoch": 7.0,
"eval_loss": 0.31270119547843933,
"eval_runtime": 12.4561,
"eval_samples_per_second": 57.08,
"eval_steps_per_second": 7.145,
"eval_wer": 0.1947333772218565,
"step": 53914
},
{
"epoch": 7.01,
"learning_rate": 9.025091479351803e-05,
"loss": 0.3379,
"step": 54000
},
{
"epoch": 7.08,
"learning_rate": 8.829064296915838e-05,
"loss": 0.3159,
"step": 54500
},
{
"epoch": 7.14,
"learning_rate": 8.633037114479874e-05,
"loss": 0.3254,
"step": 55000
},
{
"epoch": 7.21,
"learning_rate": 8.43700993204391e-05,
"loss": 0.3195,
"step": 55500
},
{
"epoch": 7.27,
"learning_rate": 8.240982749607945e-05,
"loss": 0.3225,
"step": 56000
},
{
"epoch": 7.34,
"learning_rate": 8.044955567171981e-05,
"loss": 0.3128,
"step": 56500
},
{
"epoch": 7.4,
"learning_rate": 7.848928384736017e-05,
"loss": 0.318,
"step": 57000
},
{
"epoch": 7.47,
"learning_rate": 7.652901202300051e-05,
"loss": 0.3165,
"step": 57500
},
{
"epoch": 7.53,
"learning_rate": 7.456874019864087e-05,
"loss": 0.3191,
"step": 58000
},
{
"epoch": 7.6,
"learning_rate": 7.260846837428123e-05,
"loss": 0.3169,
"step": 58500
},
{
"epoch": 7.66,
"learning_rate": 7.064819654992158e-05,
"loss": 0.3118,
"step": 59000
},
{
"epoch": 7.73,
"learning_rate": 6.868792472556194e-05,
"loss": 0.3192,
"step": 59500
},
{
"epoch": 7.79,
"learning_rate": 6.67276529012023e-05,
"loss": 0.3055,
"step": 60000
},
{
"epoch": 7.86,
"learning_rate": 6.476738107684265e-05,
"loss": 0.3134,
"step": 60500
},
{
"epoch": 7.92,
"learning_rate": 6.2807109252483e-05,
"loss": 0.3081,
"step": 61000
},
{
"epoch": 7.98,
"learning_rate": 6.084683742812336e-05,
"loss": 0.3105,
"step": 61500
},
{
"epoch": 8.0,
"eval_loss": 0.30813026428222656,
"eval_runtime": 12.2002,
"eval_samples_per_second": 58.278,
"eval_steps_per_second": 7.295,
"eval_wer": 0.1814351547070441,
"step": 61616
},
{
"epoch": 8.05,
"learning_rate": 5.888656560376372e-05,
"loss": 0.2986,
"step": 62000
},
{
"epoch": 8.11,
"learning_rate": 5.692629377940407e-05,
"loss": 0.2908,
"step": 62500
},
{
"epoch": 8.18,
"learning_rate": 5.4966021955044426e-05,
"loss": 0.2835,
"step": 63000
},
{
"epoch": 8.24,
"learning_rate": 5.3005750130684786e-05,
"loss": 0.2904,
"step": 63500
},
{
"epoch": 8.31,
"learning_rate": 5.104547830632514e-05,
"loss": 0.289,
"step": 64000
},
{
"epoch": 8.37,
"learning_rate": 4.908520648196549e-05,
"loss": 0.2905,
"step": 64500
},
{
"epoch": 8.44,
"learning_rate": 4.7124934657605846e-05,
"loss": 0.288,
"step": 65000
},
{
"epoch": 8.5,
"learning_rate": 4.516466283324621e-05,
"loss": 0.2813,
"step": 65500
},
{
"epoch": 8.57,
"learning_rate": 4.3204391008886566e-05,
"loss": 0.2831,
"step": 66000
},
{
"epoch": 8.63,
"learning_rate": 4.124411918452692e-05,
"loss": 0.2914,
"step": 66500
},
{
"epoch": 8.7,
"learning_rate": 3.928384736016727e-05,
"loss": 0.2807,
"step": 67000
},
{
"epoch": 8.76,
"learning_rate": 3.7323575535807626e-05,
"loss": 0.2833,
"step": 67500
},
{
"epoch": 8.83,
"learning_rate": 3.5363303711447986e-05,
"loss": 0.2825,
"step": 68000
},
{
"epoch": 8.89,
"learning_rate": 3.340303188708834e-05,
"loss": 0.2824,
"step": 68500
},
{
"epoch": 8.96,
"learning_rate": 3.14427600627287e-05,
"loss": 0.281,
"step": 69000
},
{
"epoch": 9.0,
"eval_loss": 0.3067900538444519,
"eval_runtime": 12.6252,
"eval_samples_per_second": 56.316,
"eval_steps_per_second": 7.049,
"eval_wer": 0.17419354838709677,
"step": 69318
},
{
"epoch": 9.02,
"learning_rate": 2.948248823836905e-05,
"loss": 0.2725,
"step": 69500
},
{
"epoch": 9.09,
"learning_rate": 2.7522216414009406e-05,
"loss": 0.2696,
"step": 70000
},
{
"epoch": 9.15,
"learning_rate": 2.5561944589649766e-05,
"loss": 0.2713,
"step": 70500
},
{
"epoch": 9.22,
"learning_rate": 2.360167276529012e-05,
"loss": 0.2634,
"step": 71000
},
{
"epoch": 9.28,
"learning_rate": 2.1641400940930475e-05,
"loss": 0.2635,
"step": 71500
},
{
"epoch": 9.35,
"learning_rate": 1.968112911657083e-05,
"loss": 0.2658,
"step": 72000
},
{
"epoch": 9.41,
"learning_rate": 1.7720857292211185e-05,
"loss": 0.2648,
"step": 72500
},
{
"epoch": 9.48,
"learning_rate": 1.576058546785154e-05,
"loss": 0.273,
"step": 73000
},
{
"epoch": 9.54,
"learning_rate": 1.3800313643491895e-05,
"loss": 0.2601,
"step": 73500
},
{
"epoch": 9.61,
"learning_rate": 1.1840041819132252e-05,
"loss": 0.2598,
"step": 74000
},
{
"epoch": 9.67,
"learning_rate": 9.879769994772608e-06,
"loss": 0.2698,
"step": 74500
},
{
"epoch": 9.74,
"learning_rate": 7.919498170412963e-06,
"loss": 0.2574,
"step": 75000
},
{
"epoch": 9.8,
"learning_rate": 5.959226346053318e-06,
"loss": 0.2646,
"step": 75500
},
{
"epoch": 9.87,
"learning_rate": 3.998954521693675e-06,
"loss": 0.2584,
"step": 76000
},
{
"epoch": 9.93,
"learning_rate": 2.0386826973340303e-06,
"loss": 0.2623,
"step": 76500
},
{
"epoch": 10.0,
"learning_rate": 7.841087297438577e-08,
"loss": 0.2584,
"step": 77000
},
{
"epoch": 10.0,
"eval_loss": 0.3040466010570526,
"eval_runtime": 12.4236,
"eval_samples_per_second": 57.23,
"eval_steps_per_second": 7.164,
"eval_wer": 0.17129690585911783,
"step": 77020
},
{
"epoch": 10.0,
"step": 77020,
"total_flos": 1.409382714117005e+20,
"train_loss": 0.4655882824058503,
"train_runtime": 41869.3879,
"train_samples_per_second": 29.431,
"train_steps_per_second": 1.84
}
],
"logging_steps": 500,
"max_steps": 77020,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1.409382714117005e+20,
"trial_name": null,
"trial_params": null
}