SentencePieceBPE-PubMed-FR / trainer_state.json
qanastek's picture
Upload 41 files
4b74869
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 18.0,
"global_step": 94050,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-09,
"loss": 10.4865,
"step": 1
},
{
"epoch": 0.1,
"learning_rate": 2.5e-06,
"loss": 9.4439,
"step": 500
},
{
"epoch": 0.19,
"learning_rate": 5e-06,
"loss": 7.6179,
"step": 1000
},
{
"epoch": 0.29,
"learning_rate": 7.5e-06,
"loss": 6.3619,
"step": 1500
},
{
"epoch": 0.38,
"learning_rate": 1e-05,
"loss": 6.0809,
"step": 2000
},
{
"epoch": 0.48,
"learning_rate": 1.25e-05,
"loss": 5.957,
"step": 2500
},
{
"epoch": 0.57,
"learning_rate": 1.5e-05,
"loss": 5.8758,
"step": 3000
},
{
"epoch": 0.67,
"learning_rate": 1.75e-05,
"loss": 5.8102,
"step": 3500
},
{
"epoch": 0.77,
"learning_rate": 2e-05,
"loss": 5.7625,
"step": 4000
},
{
"epoch": 0.86,
"learning_rate": 2.25e-05,
"loss": 5.7195,
"step": 4500
},
{
"epoch": 0.96,
"learning_rate": 2.5e-05,
"loss": 5.6801,
"step": 5000
},
{
"epoch": 1.05,
"learning_rate": 2.7500000000000004e-05,
"loss": 5.6449,
"step": 5500
},
{
"epoch": 1.15,
"learning_rate": 3e-05,
"loss": 5.6134,
"step": 6000
},
{
"epoch": 1.24,
"learning_rate": 3.2500000000000004e-05,
"loss": 5.591,
"step": 6500
},
{
"epoch": 1.34,
"learning_rate": 3.5e-05,
"loss": 5.5683,
"step": 7000
},
{
"epoch": 1.44,
"learning_rate": 3.7500000000000003e-05,
"loss": 5.5419,
"step": 7500
},
{
"epoch": 1.53,
"learning_rate": 4e-05,
"loss": 5.5231,
"step": 8000
},
{
"epoch": 1.63,
"learning_rate": 4.25e-05,
"loss": 5.506,
"step": 8500
},
{
"epoch": 1.72,
"learning_rate": 4.4995000000000005e-05,
"loss": 5.4871,
"step": 9000
},
{
"epoch": 1.82,
"learning_rate": 4.7495e-05,
"loss": 5.4763,
"step": 9500
},
{
"epoch": 1.91,
"learning_rate": 4.9995000000000005e-05,
"loss": 5.4615,
"step": 10000
},
{
"epoch": 2.01,
"learning_rate": 4.998800480769231e-05,
"loss": 5.4468,
"step": 10500
},
{
"epoch": 2.11,
"learning_rate": 4.997600961538462e-05,
"loss": 5.4341,
"step": 11000
},
{
"epoch": 2.2,
"learning_rate": 4.996399038461539e-05,
"loss": 5.4224,
"step": 11500
},
{
"epoch": 2.3,
"learning_rate": 4.995197115384615e-05,
"loss": 5.4099,
"step": 12000
},
{
"epoch": 2.39,
"learning_rate": 4.993995192307693e-05,
"loss": 5.3978,
"step": 12500
},
{
"epoch": 2.49,
"learning_rate": 4.992795673076923e-05,
"loss": 5.3897,
"step": 13000
},
{
"epoch": 2.58,
"learning_rate": 4.991593750000001e-05,
"loss": 5.3836,
"step": 13500
},
{
"epoch": 2.68,
"learning_rate": 4.990391826923077e-05,
"loss": 5.3737,
"step": 14000
},
{
"epoch": 2.78,
"learning_rate": 4.989189903846154e-05,
"loss": 5.3668,
"step": 14500
},
{
"epoch": 2.87,
"learning_rate": 4.987990384615385e-05,
"loss": 5.3597,
"step": 15000
},
{
"epoch": 2.97,
"learning_rate": 4.986788461538462e-05,
"loss": 5.3485,
"step": 15500
},
{
"epoch": 3.06,
"learning_rate": 4.9855889423076926e-05,
"loss": 5.3413,
"step": 16000
},
{
"epoch": 3.16,
"learning_rate": 4.9843870192307694e-05,
"loss": 5.338,
"step": 16500
},
{
"epoch": 3.25,
"learning_rate": 4.983185096153846e-05,
"loss": 5.3304,
"step": 17000
},
{
"epoch": 3.35,
"learning_rate": 4.981983173076924e-05,
"loss": 5.3258,
"step": 17500
},
{
"epoch": 3.44,
"learning_rate": 4.98078125e-05,
"loss": 5.317,
"step": 18000
},
{
"epoch": 3.54,
"learning_rate": 4.9795793269230774e-05,
"loss": 5.3134,
"step": 18500
},
{
"epoch": 3.64,
"learning_rate": 4.978377403846154e-05,
"loss": 5.3097,
"step": 19000
},
{
"epoch": 3.73,
"learning_rate": 4.977175480769231e-05,
"loss": 5.3019,
"step": 19500
},
{
"epoch": 3.83,
"learning_rate": 4.9759759615384614e-05,
"loss": 5.2985,
"step": 20000
},
{
"epoch": 3.92,
"learning_rate": 4.974774038461539e-05,
"loss": 5.2942,
"step": 20500
},
{
"epoch": 4.02,
"learning_rate": 4.973572115384616e-05,
"loss": 5.2893,
"step": 21000
},
{
"epoch": 4.11,
"learning_rate": 4.9723701923076925e-05,
"loss": 5.2843,
"step": 21500
},
{
"epoch": 4.21,
"learning_rate": 4.971170673076923e-05,
"loss": 5.2784,
"step": 22000
},
{
"epoch": 4.31,
"learning_rate": 4.9699687500000004e-05,
"loss": 5.2732,
"step": 22500
},
{
"epoch": 4.4,
"learning_rate": 4.968766826923077e-05,
"loss": 5.2701,
"step": 23000
},
{
"epoch": 4.5,
"learning_rate": 4.967564903846154e-05,
"loss": 5.2677,
"step": 23500
},
{
"epoch": 4.59,
"learning_rate": 4.9663653846153844e-05,
"loss": 5.2644,
"step": 24000
},
{
"epoch": 4.69,
"learning_rate": 4.965163461538462e-05,
"loss": 5.2562,
"step": 24500
},
{
"epoch": 4.78,
"learning_rate": 4.963963942307693e-05,
"loss": 5.2557,
"step": 25000
},
{
"epoch": 4.88,
"learning_rate": 4.96276201923077e-05,
"loss": 5.2529,
"step": 25500
},
{
"epoch": 4.98,
"learning_rate": 4.9615600961538466e-05,
"loss": 5.2504,
"step": 26000
},
{
"epoch": 5.07,
"learning_rate": 4.9603581730769234e-05,
"loss": 5.1431,
"step": 26500
},
{
"epoch": 5.17,
"learning_rate": 4.95915625e-05,
"loss": 4.8144,
"step": 27000
},
{
"epoch": 5.26,
"learning_rate": 4.957954326923077e-05,
"loss": 4.5379,
"step": 27500
},
{
"epoch": 5.36,
"learning_rate": 4.9567524038461545e-05,
"loss": 4.282,
"step": 28000
},
{
"epoch": 5.45,
"learning_rate": 4.955550480769231e-05,
"loss": 3.7642,
"step": 28500
},
{
"epoch": 5.55,
"learning_rate": 4.9543485576923075e-05,
"loss": 3.0854,
"step": 29000
},
{
"epoch": 5.65,
"learning_rate": 4.9531490384615385e-05,
"loss": 2.6674,
"step": 29500
},
{
"epoch": 5.74,
"learning_rate": 4.951947115384616e-05,
"loss": 2.2254,
"step": 30000
},
{
"epoch": 5.84,
"learning_rate": 4.950745192307693e-05,
"loss": 1.9446,
"step": 30500
},
{
"epoch": 5.93,
"learning_rate": 4.94954326923077e-05,
"loss": 1.7693,
"step": 31000
},
{
"epoch": 6.03,
"learning_rate": 4.94834375e-05,
"loss": 1.6527,
"step": 31500
},
{
"epoch": 6.12,
"learning_rate": 4.9471418269230775e-05,
"loss": 1.5696,
"step": 32000
},
{
"epoch": 6.22,
"learning_rate": 4.9459399038461544e-05,
"loss": 1.5054,
"step": 32500
},
{
"epoch": 6.32,
"learning_rate": 4.944737980769231e-05,
"loss": 1.4447,
"step": 33000
},
{
"epoch": 6.41,
"learning_rate": 4.9435384615384616e-05,
"loss": 1.3901,
"step": 33500
},
{
"epoch": 6.51,
"learning_rate": 4.942336538461539e-05,
"loss": 1.3332,
"step": 34000
},
{
"epoch": 6.6,
"learning_rate": 4.941134615384615e-05,
"loss": 1.2692,
"step": 34500
},
{
"epoch": 6.7,
"learning_rate": 4.939932692307693e-05,
"loss": 1.217,
"step": 35000
},
{
"epoch": 6.79,
"learning_rate": 4.938733173076923e-05,
"loss": 1.178,
"step": 35500
},
{
"epoch": 6.89,
"learning_rate": 4.9375312500000006e-05,
"loss": 1.143,
"step": 36000
},
{
"epoch": 6.99,
"learning_rate": 4.936329326923077e-05,
"loss": 1.1109,
"step": 36500
},
{
"epoch": 7.08,
"learning_rate": 4.935129807692308e-05,
"loss": 1.0859,
"step": 37000
},
{
"epoch": 7.18,
"learning_rate": 4.9339278846153846e-05,
"loss": 1.0619,
"step": 37500
},
{
"epoch": 7.27,
"learning_rate": 4.932725961538462e-05,
"loss": 1.0387,
"step": 38000
},
{
"epoch": 7.37,
"learning_rate": 4.931524038461538e-05,
"loss": 1.0205,
"step": 38500
},
{
"epoch": 7.46,
"learning_rate": 4.930322115384616e-05,
"loss": 1.0017,
"step": 39000
},
{
"epoch": 7.56,
"learning_rate": 4.929122596153846e-05,
"loss": 0.9856,
"step": 39500
},
{
"epoch": 7.66,
"learning_rate": 4.9279206730769236e-05,
"loss": 0.9707,
"step": 40000
},
{
"epoch": 7.75,
"learning_rate": 4.92671875e-05,
"loss": 0.9574,
"step": 40500
},
{
"epoch": 7.85,
"learning_rate": 4.925516826923077e-05,
"loss": 0.9455,
"step": 41000
},
{
"epoch": 7.94,
"learning_rate": 4.924314903846154e-05,
"loss": 0.9323,
"step": 41500
},
{
"epoch": 8.04,
"learning_rate": 4.923112980769231e-05,
"loss": 0.9199,
"step": 42000
},
{
"epoch": 8.13,
"learning_rate": 4.921913461538461e-05,
"loss": 0.9113,
"step": 42500
},
{
"epoch": 8.23,
"learning_rate": 4.920713942307692e-05,
"loss": 0.9012,
"step": 43000
},
{
"epoch": 8.33,
"learning_rate": 4.919512019230769e-05,
"loss": 0.8939,
"step": 43500
},
{
"epoch": 8.42,
"learning_rate": 4.9183100961538466e-05,
"loss": 0.8851,
"step": 44000
},
{
"epoch": 8.52,
"learning_rate": 4.917108173076923e-05,
"loss": 0.8745,
"step": 44500
},
{
"epoch": 8.61,
"learning_rate": 4.91590625e-05,
"loss": 0.8651,
"step": 45000
},
{
"epoch": 8.71,
"learning_rate": 4.914704326923077e-05,
"loss": 0.8578,
"step": 45500
},
{
"epoch": 8.8,
"learning_rate": 4.913502403846154e-05,
"loss": 0.8519,
"step": 46000
},
{
"epoch": 8.9,
"learning_rate": 4.912300480769231e-05,
"loss": 0.8457,
"step": 46500
},
{
"epoch": 9.0,
"learning_rate": 4.911098557692308e-05,
"loss": 0.8389,
"step": 47000
},
{
"epoch": 9.09,
"learning_rate": 4.9098990384615386e-05,
"loss": 0.8305,
"step": 47500
},
{
"epoch": 9.19,
"learning_rate": 4.9086971153846154e-05,
"loss": 0.8233,
"step": 48000
},
{
"epoch": 9.28,
"learning_rate": 4.907495192307692e-05,
"loss": 0.8189,
"step": 48500
},
{
"epoch": 9.38,
"learning_rate": 4.90629326923077e-05,
"loss": 0.8129,
"step": 49000
},
{
"epoch": 9.47,
"learning_rate": 4.905093750000001e-05,
"loss": 0.8076,
"step": 49500
},
{
"epoch": 9.57,
"learning_rate": 4.903894230769231e-05,
"loss": 0.8019,
"step": 50000
},
{
"epoch": 9.67,
"learning_rate": 4.902692307692308e-05,
"loss": 0.7962,
"step": 50500
},
{
"epoch": 9.76,
"learning_rate": 4.901490384615385e-05,
"loss": 0.7904,
"step": 51000
},
{
"epoch": 9.86,
"learning_rate": 4.900288461538462e-05,
"loss": 0.7879,
"step": 51500
},
{
"epoch": 9.95,
"learning_rate": 4.8990865384615384e-05,
"loss": 0.7811,
"step": 52000
},
{
"epoch": 10.05,
"learning_rate": 4.897884615384616e-05,
"loss": 0.7781,
"step": 52500
},
{
"epoch": 10.14,
"learning_rate": 4.896682692307693e-05,
"loss": 0.7724,
"step": 53000
},
{
"epoch": 10.24,
"learning_rate": 4.8954807692307695e-05,
"loss": 0.7682,
"step": 53500
},
{
"epoch": 10.33,
"learning_rate": 4.89428125e-05,
"loss": 0.7637,
"step": 54000
},
{
"epoch": 10.43,
"learning_rate": 4.893081730769231e-05,
"loss": 0.7592,
"step": 54500
},
{
"epoch": 10.53,
"learning_rate": 4.891879807692308e-05,
"loss": 0.7541,
"step": 55000
},
{
"epoch": 10.62,
"learning_rate": 4.890677884615385e-05,
"loss": 0.75,
"step": 55500
},
{
"epoch": 10.72,
"learning_rate": 4.8894759615384614e-05,
"loss": 0.749,
"step": 56000
},
{
"epoch": 10.81,
"learning_rate": 4.888274038461539e-05,
"loss": 0.7434,
"step": 56500
},
{
"epoch": 10.91,
"learning_rate": 4.887074519230769e-05,
"loss": 0.7407,
"step": 57000
},
{
"epoch": 11.0,
"learning_rate": 4.885872596153847e-05,
"loss": 0.7381,
"step": 57500
},
{
"epoch": 11.1,
"learning_rate": 4.884670673076923e-05,
"loss": 0.7344,
"step": 58000
},
{
"epoch": 11.2,
"learning_rate": 4.8834687500000004e-05,
"loss": 0.7282,
"step": 58500
},
{
"epoch": 11.29,
"learning_rate": 4.882269230769231e-05,
"loss": 0.7272,
"step": 59000
},
{
"epoch": 11.39,
"learning_rate": 4.881067307692308e-05,
"loss": 0.7236,
"step": 59500
},
{
"epoch": 11.48,
"learning_rate": 4.8798653846153845e-05,
"loss": 0.7196,
"step": 60000
},
{
"epoch": 11.58,
"learning_rate": 4.878663461538462e-05,
"loss": 0.7164,
"step": 60500
},
{
"epoch": 11.67,
"learning_rate": 4.877461538461539e-05,
"loss": 0.7129,
"step": 61000
},
{
"epoch": 11.77,
"learning_rate": 4.87626201923077e-05,
"loss": 0.71,
"step": 61500
},
{
"epoch": 11.87,
"learning_rate": 4.875060096153846e-05,
"loss": 0.7088,
"step": 62000
},
{
"epoch": 11.96,
"learning_rate": 4.8738581730769235e-05,
"loss": 0.7057,
"step": 62500
},
{
"epoch": 12.06,
"learning_rate": 4.87265625e-05,
"loss": 0.7022,
"step": 63000
},
{
"epoch": 12.15,
"learning_rate": 4.8714567307692313e-05,
"loss": 0.6977,
"step": 63500
},
{
"epoch": 12.25,
"learning_rate": 4.8702548076923075e-05,
"loss": 0.6988,
"step": 64000
},
{
"epoch": 12.34,
"learning_rate": 4.869052884615385e-05,
"loss": 0.6943,
"step": 64500
},
{
"epoch": 12.44,
"learning_rate": 4.867850961538462e-05,
"loss": 0.6919,
"step": 65000
},
{
"epoch": 12.54,
"learning_rate": 4.8666490384615386e-05,
"loss": 0.6888,
"step": 65500
},
{
"epoch": 12.63,
"learning_rate": 4.865449519230769e-05,
"loss": 0.686,
"step": 66000
},
{
"epoch": 12.73,
"learning_rate": 4.8642475961538465e-05,
"loss": 0.6843,
"step": 66500
},
{
"epoch": 12.82,
"learning_rate": 4.863045673076923e-05,
"loss": 0.681,
"step": 67000
},
{
"epoch": 12.92,
"learning_rate": 4.86184375e-05,
"loss": 0.68,
"step": 67500
},
{
"epoch": 13.01,
"learning_rate": 4.860644230769231e-05,
"loss": 0.6775,
"step": 68000
},
{
"epoch": 13.11,
"learning_rate": 4.859442307692308e-05,
"loss": 0.6745,
"step": 68500
},
{
"epoch": 13.21,
"learning_rate": 4.858240384615385e-05,
"loss": 0.6726,
"step": 69000
},
{
"epoch": 13.3,
"learning_rate": 4.8570384615384616e-05,
"loss": 0.6716,
"step": 69500
},
{
"epoch": 13.4,
"learning_rate": 4.855836538461539e-05,
"loss": 0.6691,
"step": 70000
},
{
"epoch": 13.49,
"learning_rate": 4.8546370192307695e-05,
"loss": 0.6665,
"step": 70500
},
{
"epoch": 13.59,
"learning_rate": 4.853435096153846e-05,
"loss": 0.6625,
"step": 71000
},
{
"epoch": 13.68,
"learning_rate": 4.852233173076923e-05,
"loss": 0.6609,
"step": 71500
},
{
"epoch": 13.78,
"learning_rate": 4.8510312500000006e-05,
"loss": 0.66,
"step": 72000
},
{
"epoch": 13.88,
"learning_rate": 4.8498293269230774e-05,
"loss": 0.6566,
"step": 72500
},
{
"epoch": 13.97,
"learning_rate": 4.848627403846154e-05,
"loss": 0.6561,
"step": 73000
},
{
"epoch": 14.07,
"learning_rate": 4.8474278846153847e-05,
"loss": 0.6536,
"step": 73500
},
{
"epoch": 14.16,
"learning_rate": 4.846225961538462e-05,
"loss": 0.6509,
"step": 74000
},
{
"epoch": 14.26,
"learning_rate": 4.845024038461539e-05,
"loss": 0.6509,
"step": 74500
},
{
"epoch": 14.35,
"learning_rate": 4.843822115384616e-05,
"loss": 0.6471,
"step": 75000
},
{
"epoch": 14.45,
"learning_rate": 4.8426201923076926e-05,
"loss": 0.6459,
"step": 75500
},
{
"epoch": 14.55,
"learning_rate": 4.8414206730769237e-05,
"loss": 0.6452,
"step": 76000
},
{
"epoch": 14.64,
"learning_rate": 4.840221153846154e-05,
"loss": 0.6425,
"step": 76500
},
{
"epoch": 14.74,
"learning_rate": 4.839019230769231e-05,
"loss": 0.6406,
"step": 77000
},
{
"epoch": 14.83,
"learning_rate": 4.837817307692308e-05,
"loss": 0.6393,
"step": 77500
},
{
"epoch": 14.93,
"learning_rate": 4.836615384615385e-05,
"loss": 0.6362,
"step": 78000
},
{
"epoch": 15.02,
"learning_rate": 4.835413461538461e-05,
"loss": 0.6368,
"step": 78500
},
{
"epoch": 15.12,
"learning_rate": 4.834211538461539e-05,
"loss": 0.634,
"step": 79000
},
{
"epoch": 15.22,
"learning_rate": 4.8330096153846156e-05,
"loss": 0.6324,
"step": 79500
},
{
"epoch": 15.31,
"learning_rate": 4.831810096153847e-05,
"loss": 0.6287,
"step": 80000
},
{
"epoch": 15.41,
"learning_rate": 4.830608173076923e-05,
"loss": 0.6286,
"step": 80500
},
{
"epoch": 15.5,
"learning_rate": 4.82940625e-05,
"loss": 0.6267,
"step": 81000
},
{
"epoch": 15.6,
"learning_rate": 4.828204326923077e-05,
"loss": 0.6257,
"step": 81500
},
{
"epoch": 15.69,
"learning_rate": 4.827002403846154e-05,
"loss": 0.6245,
"step": 82000
},
{
"epoch": 15.79,
"learning_rate": 4.825800480769231e-05,
"loss": 0.6216,
"step": 82500
},
{
"epoch": 15.89,
"learning_rate": 4.824598557692308e-05,
"loss": 0.6208,
"step": 83000
},
{
"epoch": 15.98,
"learning_rate": 4.8233966346153844e-05,
"loss": 0.621,
"step": 83500
},
{
"epoch": 16.08,
"learning_rate": 4.822194711538462e-05,
"loss": 0.617,
"step": 84000
},
{
"epoch": 16.17,
"learning_rate": 4.820995192307692e-05,
"loss": 0.6158,
"step": 84500
},
{
"epoch": 16.27,
"learning_rate": 4.819795673076923e-05,
"loss": 0.617,
"step": 85000
},
{
"epoch": 16.36,
"learning_rate": 4.81859375e-05,
"loss": 0.6149,
"step": 85500
},
{
"epoch": 16.46,
"learning_rate": 4.817391826923077e-05,
"loss": 0.6128,
"step": 86000
},
{
"epoch": 16.56,
"learning_rate": 4.816189903846154e-05,
"loss": 0.6119,
"step": 86500
},
{
"epoch": 16.65,
"learning_rate": 4.814987980769231e-05,
"loss": 0.6104,
"step": 87000
},
{
"epoch": 16.75,
"learning_rate": 4.8137860576923074e-05,
"loss": 0.6082,
"step": 87500
},
{
"epoch": 16.84,
"learning_rate": 4.812584134615385e-05,
"loss": 0.6077,
"step": 88000
},
{
"epoch": 16.94,
"learning_rate": 4.811382211538462e-05,
"loss": 0.6066,
"step": 88500
},
{
"epoch": 17.03,
"learning_rate": 4.810182692307693e-05,
"loss": 0.6051,
"step": 89000
},
{
"epoch": 17.13,
"learning_rate": 4.808980769230769e-05,
"loss": 0.6035,
"step": 89500
},
{
"epoch": 17.22,
"learning_rate": 4.8077788461538464e-05,
"loss": 0.6032,
"step": 90000
},
{
"epoch": 17.32,
"learning_rate": 4.806576923076923e-05,
"loss": 0.6001,
"step": 90500
},
{
"epoch": 17.42,
"learning_rate": 4.805375e-05,
"loss": 0.598,
"step": 91000
},
{
"epoch": 17.51,
"learning_rate": 4.8041754807692304e-05,
"loss": 0.5996,
"step": 91500
},
{
"epoch": 17.61,
"learning_rate": 4.802973557692308e-05,
"loss": 0.5987,
"step": 92000
},
{
"epoch": 17.7,
"learning_rate": 4.801771634615385e-05,
"loss": 0.5973,
"step": 92500
},
{
"epoch": 17.8,
"learning_rate": 4.800572115384616e-05,
"loss": 0.5957,
"step": 93000
},
{
"epoch": 17.89,
"learning_rate": 4.7993701923076926e-05,
"loss": 0.5938,
"step": 93500
},
{
"epoch": 17.99,
"learning_rate": 4.7981682692307694e-05,
"loss": 0.593,
"step": 94000
}
],
"max_steps": 2090000,
"num_train_epochs": 400,
"total_flos": 2.5346531711380357e+19,
"trial_name": null,
"trial_params": null
}