gulbert-ft-ita / trainer_state.json
mrovera's picture
model commit
57b1160
{
"best_metric": 0.8538806684223458,
"best_model_checkpoint": "output/ipzs-sg-bert_xxl-bs-16/checkpoint-114021",
"epoch": 9.0,
"global_step": 114021,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 3.505111768884679e-05,
"loss": 0.0964,
"step": 500
},
{
"epoch": 0.08,
"learning_rate": 3.491223537769359e-05,
"loss": 0.0133,
"step": 1000
},
{
"epoch": 0.12,
"learning_rate": 3.4773353066540374e-05,
"loss": 0.0109,
"step": 1500
},
{
"epoch": 0.16,
"learning_rate": 3.463447075538717e-05,
"loss": 0.0102,
"step": 2000
},
{
"epoch": 0.2,
"learning_rate": 3.4495588444233954e-05,
"loss": 0.0099,
"step": 2500
},
{
"epoch": 0.24,
"learning_rate": 3.435670613308075e-05,
"loss": 0.0098,
"step": 3000
},
{
"epoch": 0.28,
"learning_rate": 3.421782382192754e-05,
"loss": 0.0096,
"step": 3500
},
{
"epoch": 0.32,
"learning_rate": 3.407894151077433e-05,
"loss": 0.0088,
"step": 4000
},
{
"epoch": 0.36,
"learning_rate": 3.394005919962113e-05,
"loss": 0.0074,
"step": 4500
},
{
"epoch": 0.39,
"learning_rate": 3.3801176888467914e-05,
"loss": 0.0064,
"step": 5000
},
{
"epoch": 0.43,
"learning_rate": 3.36622945773147e-05,
"loss": 0.0057,
"step": 5500
},
{
"epoch": 0.47,
"learning_rate": 3.35234122661615e-05,
"loss": 0.005,
"step": 6000
},
{
"epoch": 0.51,
"learning_rate": 3.338452995500829e-05,
"loss": 0.0048,
"step": 6500
},
{
"epoch": 0.55,
"learning_rate": 3.324564764385508e-05,
"loss": 0.0042,
"step": 7000
},
{
"epoch": 0.59,
"learning_rate": 3.310676533270187e-05,
"loss": 0.0039,
"step": 7500
},
{
"epoch": 0.63,
"learning_rate": 3.296788302154867e-05,
"loss": 0.0038,
"step": 8000
},
{
"epoch": 0.67,
"learning_rate": 3.2829000710395454e-05,
"loss": 0.0036,
"step": 8500
},
{
"epoch": 0.71,
"learning_rate": 3.269011839924224e-05,
"loss": 0.0036,
"step": 9000
},
{
"epoch": 0.75,
"learning_rate": 3.255123608808904e-05,
"loss": 0.0033,
"step": 9500
},
{
"epoch": 0.79,
"learning_rate": 3.241235377693583e-05,
"loss": 0.0033,
"step": 10000
},
{
"epoch": 0.83,
"learning_rate": 3.227347146578262e-05,
"loss": 0.0031,
"step": 10500
},
{
"epoch": 0.87,
"learning_rate": 3.2134589154629414e-05,
"loss": 0.003,
"step": 11000
},
{
"epoch": 0.91,
"learning_rate": 3.19957068434762e-05,
"loss": 0.0029,
"step": 11500
},
{
"epoch": 0.95,
"learning_rate": 3.1856824532322994e-05,
"loss": 0.0029,
"step": 12000
},
{
"epoch": 0.99,
"learning_rate": 3.171794222116978e-05,
"loss": 0.0028,
"step": 12500
},
{
"epoch": 1.0,
"eval_accuracy": 0.6213869893484506,
"eval_f1": 0.7871180162568165,
"eval_f1_macro": 0.09915671829069983,
"eval_f1_weighted": 0.7187367346051451,
"eval_loss": 0.00270162639208138,
"eval_p": 0.8552003089493689,
"eval_r": 0.7290764165655865,
"eval_roc_auc": 0.8644347845537721,
"eval_runtime": 154.8297,
"eval_samples_per_second": 508.133,
"eval_steps_per_second": 31.764,
"step": 12669
},
{
"epoch": 1.03,
"learning_rate": 3.157905991001658e-05,
"loss": 0.0027,
"step": 13000
},
{
"epoch": 1.07,
"learning_rate": 3.144017759886337e-05,
"loss": 0.0026,
"step": 13500
},
{
"epoch": 1.11,
"learning_rate": 3.1301295287710154e-05,
"loss": 0.0026,
"step": 14000
},
{
"epoch": 1.14,
"learning_rate": 3.1162412976556954e-05,
"loss": 0.0026,
"step": 14500
},
{
"epoch": 1.18,
"learning_rate": 3.102353066540374e-05,
"loss": 0.0025,
"step": 15000
},
{
"epoch": 1.22,
"learning_rate": 3.0884648354250534e-05,
"loss": 0.0025,
"step": 15500
},
{
"epoch": 1.26,
"learning_rate": 3.074576604309732e-05,
"loss": 0.0025,
"step": 16000
},
{
"epoch": 1.3,
"learning_rate": 3.0606883731944114e-05,
"loss": 0.0025,
"step": 16500
},
{
"epoch": 1.34,
"learning_rate": 3.0468001420790908e-05,
"loss": 0.0024,
"step": 17000
},
{
"epoch": 1.38,
"learning_rate": 3.0329119109637698e-05,
"loss": 0.0023,
"step": 17500
},
{
"epoch": 1.42,
"learning_rate": 3.019023679848449e-05,
"loss": 0.0023,
"step": 18000
},
{
"epoch": 1.46,
"learning_rate": 3.005135448733128e-05,
"loss": 0.0022,
"step": 18500
},
{
"epoch": 1.5,
"learning_rate": 2.991247217617807e-05,
"loss": 0.0024,
"step": 19000
},
{
"epoch": 1.54,
"learning_rate": 2.9773589865024864e-05,
"loss": 0.0023,
"step": 19500
},
{
"epoch": 1.58,
"learning_rate": 2.9634707553871654e-05,
"loss": 0.0023,
"step": 20000
},
{
"epoch": 1.62,
"learning_rate": 2.9495825242718448e-05,
"loss": 0.0022,
"step": 20500
},
{
"epoch": 1.66,
"learning_rate": 2.9356942931565238e-05,
"loss": 0.0022,
"step": 21000
},
{
"epoch": 1.7,
"learning_rate": 2.921806062041203e-05,
"loss": 0.0022,
"step": 21500
},
{
"epoch": 1.74,
"learning_rate": 2.907917830925882e-05,
"loss": 0.0021,
"step": 22000
},
{
"epoch": 1.78,
"learning_rate": 2.894029599810561e-05,
"loss": 0.0021,
"step": 22500
},
{
"epoch": 1.82,
"learning_rate": 2.8801413686952405e-05,
"loss": 0.0021,
"step": 23000
},
{
"epoch": 1.85,
"learning_rate": 2.8662531375799195e-05,
"loss": 0.0021,
"step": 23500
},
{
"epoch": 1.89,
"learning_rate": 2.8523649064645988e-05,
"loss": 0.0021,
"step": 24000
},
{
"epoch": 1.93,
"learning_rate": 2.8384766753492778e-05,
"loss": 0.0021,
"step": 24500
},
{
"epoch": 1.97,
"learning_rate": 2.8245884442339568e-05,
"loss": 0.0021,
"step": 25000
},
{
"epoch": 2.0,
"eval_accuracy": 0.6822711442153697,
"eval_f1": 0.8278927653585252,
"eval_f1_macro": 0.17721735370294905,
"eval_f1_weighted": 0.7911965688289363,
"eval_loss": 0.002050888491794467,
"eval_p": 0.8518841031375748,
"eval_r": 0.8052157338416219,
"eval_roc_auc": 0.9024905715784995,
"eval_runtime": 150.3062,
"eval_samples_per_second": 523.425,
"eval_steps_per_second": 32.72,
"step": 25338
},
{
"epoch": 2.01,
"learning_rate": 2.810700213118636e-05,
"loss": 0.002,
"step": 25500
},
{
"epoch": 2.05,
"learning_rate": 2.796811982003315e-05,
"loss": 0.002,
"step": 26000
},
{
"epoch": 2.09,
"learning_rate": 2.7829237508879945e-05,
"loss": 0.0019,
"step": 26500
},
{
"epoch": 2.13,
"learning_rate": 2.7690355197726735e-05,
"loss": 0.0019,
"step": 27000
},
{
"epoch": 2.17,
"learning_rate": 2.7551472886573525e-05,
"loss": 0.0018,
"step": 27500
},
{
"epoch": 2.21,
"learning_rate": 2.7412590575420318e-05,
"loss": 0.0019,
"step": 28000
},
{
"epoch": 2.25,
"learning_rate": 2.7273708264267108e-05,
"loss": 0.0019,
"step": 28500
},
{
"epoch": 2.29,
"learning_rate": 2.71348259531139e-05,
"loss": 0.0018,
"step": 29000
},
{
"epoch": 2.33,
"learning_rate": 2.699594364196069e-05,
"loss": 0.0019,
"step": 29500
},
{
"epoch": 2.37,
"learning_rate": 2.685706133080748e-05,
"loss": 0.0018,
"step": 30000
},
{
"epoch": 2.41,
"learning_rate": 2.6718179019654275e-05,
"loss": 0.0017,
"step": 30500
},
{
"epoch": 2.45,
"learning_rate": 2.6579296708501065e-05,
"loss": 0.0018,
"step": 31000
},
{
"epoch": 2.49,
"learning_rate": 2.644041439734786e-05,
"loss": 0.0018,
"step": 31500
},
{
"epoch": 2.53,
"learning_rate": 2.630153208619465e-05,
"loss": 0.0018,
"step": 32000
},
{
"epoch": 2.57,
"learning_rate": 2.616264977504144e-05,
"loss": 0.0018,
"step": 32500
},
{
"epoch": 2.6,
"learning_rate": 2.6023767463888232e-05,
"loss": 0.0018,
"step": 33000
},
{
"epoch": 2.64,
"learning_rate": 2.5884885152735022e-05,
"loss": 0.0017,
"step": 33500
},
{
"epoch": 2.68,
"learning_rate": 2.5746002841581815e-05,
"loss": 0.0017,
"step": 34000
},
{
"epoch": 2.72,
"learning_rate": 2.5607120530428605e-05,
"loss": 0.0017,
"step": 34500
},
{
"epoch": 2.76,
"learning_rate": 2.5468238219275395e-05,
"loss": 0.0017,
"step": 35000
},
{
"epoch": 2.8,
"learning_rate": 2.532935590812219e-05,
"loss": 0.0018,
"step": 35500
},
{
"epoch": 2.84,
"learning_rate": 2.519047359696898e-05,
"loss": 0.0018,
"step": 36000
},
{
"epoch": 2.88,
"learning_rate": 2.5051591285815772e-05,
"loss": 0.0017,
"step": 36500
},
{
"epoch": 2.92,
"learning_rate": 2.4912708974662562e-05,
"loss": 0.0018,
"step": 37000
},
{
"epoch": 2.96,
"learning_rate": 2.4773826663509355e-05,
"loss": 0.0018,
"step": 37500
},
{
"epoch": 3.0,
"learning_rate": 2.4634944352356145e-05,
"loss": 0.0018,
"step": 38000
},
{
"epoch": 3.0,
"eval_accuracy": 0.6974095635152655,
"eval_f1": 0.8369747009165567,
"eval_f1_macro": 0.21887278841705338,
"eval_f1_weighted": 0.8081134454474668,
"eval_loss": 0.001851799781434238,
"eval_p": 0.8528991177184794,
"eval_r": 0.8216340322301161,
"eval_roc_auc": 0.910698290784354,
"eval_runtime": 150.2603,
"eval_samples_per_second": 523.585,
"eval_steps_per_second": 32.73,
"step": 38007
},
{
"epoch": 3.04,
"learning_rate": 2.4496062041202935e-05,
"loss": 0.0016,
"step": 38500
},
{
"epoch": 3.08,
"learning_rate": 2.435717973004973e-05,
"loss": 0.0015,
"step": 39000
},
{
"epoch": 3.12,
"learning_rate": 2.421829741889652e-05,
"loss": 0.0016,
"step": 39500
},
{
"epoch": 3.16,
"learning_rate": 2.4079415107743312e-05,
"loss": 0.0016,
"step": 40000
},
{
"epoch": 3.2,
"learning_rate": 2.3940532796590102e-05,
"loss": 0.0015,
"step": 40500
},
{
"epoch": 3.24,
"learning_rate": 2.3801650485436892e-05,
"loss": 0.0016,
"step": 41000
},
{
"epoch": 3.28,
"learning_rate": 2.3662768174283686e-05,
"loss": 0.0015,
"step": 41500
},
{
"epoch": 3.32,
"learning_rate": 2.3523885863130476e-05,
"loss": 0.0015,
"step": 42000
},
{
"epoch": 3.35,
"learning_rate": 2.338500355197727e-05,
"loss": 0.0015,
"step": 42500
},
{
"epoch": 3.39,
"learning_rate": 2.324612124082406e-05,
"loss": 0.0016,
"step": 43000
},
{
"epoch": 3.43,
"learning_rate": 2.310723892967085e-05,
"loss": 0.0016,
"step": 43500
},
{
"epoch": 3.47,
"learning_rate": 2.2968356618517642e-05,
"loss": 0.0016,
"step": 44000
},
{
"epoch": 3.51,
"learning_rate": 2.2829474307364432e-05,
"loss": 0.0015,
"step": 44500
},
{
"epoch": 3.55,
"learning_rate": 2.2690591996211226e-05,
"loss": 0.0015,
"step": 45000
},
{
"epoch": 3.59,
"learning_rate": 2.2551709685058016e-05,
"loss": 0.0016,
"step": 45500
},
{
"epoch": 3.63,
"learning_rate": 2.2412827373904806e-05,
"loss": 0.0015,
"step": 46000
},
{
"epoch": 3.67,
"learning_rate": 2.22739450627516e-05,
"loss": 0.0015,
"step": 46500
},
{
"epoch": 3.71,
"learning_rate": 2.213506275159839e-05,
"loss": 0.0015,
"step": 47000
},
{
"epoch": 3.75,
"learning_rate": 2.1996180440445182e-05,
"loss": 0.0015,
"step": 47500
},
{
"epoch": 3.79,
"learning_rate": 2.1857298129291972e-05,
"loss": 0.0015,
"step": 48000
},
{
"epoch": 3.83,
"learning_rate": 2.1718415818138762e-05,
"loss": 0.0015,
"step": 48500
},
{
"epoch": 3.87,
"learning_rate": 2.1579533506985556e-05,
"loss": 0.0015,
"step": 49000
},
{
"epoch": 3.91,
"learning_rate": 2.1440651195832346e-05,
"loss": 0.0015,
"step": 49500
},
{
"epoch": 3.95,
"learning_rate": 2.130176888467914e-05,
"loss": 0.0015,
"step": 50000
},
{
"epoch": 3.99,
"learning_rate": 2.116288657352593e-05,
"loss": 0.0015,
"step": 50500
},
{
"epoch": 4.0,
"eval_accuracy": 0.710870173119455,
"eval_f1": 0.8427627441351563,
"eval_f1_macro": 0.276326750740236,
"eval_f1_weighted": 0.8243063205078164,
"eval_loss": 0.0018125491915270686,
"eval_p": 0.8380480784058393,
"eval_r": 0.8475307572344482,
"eval_roc_auc": 0.923628157802133,
"eval_runtime": 150.1179,
"eval_samples_per_second": 524.082,
"eval_steps_per_second": 32.761,
"step": 50676
},
{
"epoch": 4.03,
"learning_rate": 2.1024004262372723e-05,
"loss": 0.0014,
"step": 51000
},
{
"epoch": 4.07,
"learning_rate": 2.0885121951219513e-05,
"loss": 0.0014,
"step": 51500
},
{
"epoch": 4.1,
"learning_rate": 2.0746239640066303e-05,
"loss": 0.0013,
"step": 52000
},
{
"epoch": 4.14,
"learning_rate": 2.0607357328913096e-05,
"loss": 0.0013,
"step": 52500
},
{
"epoch": 4.18,
"learning_rate": 2.0468475017759886e-05,
"loss": 0.0013,
"step": 53000
},
{
"epoch": 4.22,
"learning_rate": 2.032959270660668e-05,
"loss": 0.0013,
"step": 53500
},
{
"epoch": 4.26,
"learning_rate": 2.019071039545347e-05,
"loss": 0.0013,
"step": 54000
},
{
"epoch": 4.3,
"learning_rate": 2.005182808430026e-05,
"loss": 0.0013,
"step": 54500
},
{
"epoch": 4.34,
"learning_rate": 1.9912945773147053e-05,
"loss": 0.0013,
"step": 55000
},
{
"epoch": 4.38,
"learning_rate": 1.9774063461993843e-05,
"loss": 0.0013,
"step": 55500
},
{
"epoch": 4.42,
"learning_rate": 1.9635181150840636e-05,
"loss": 0.0014,
"step": 56000
},
{
"epoch": 4.46,
"learning_rate": 1.9496298839687426e-05,
"loss": 0.0013,
"step": 56500
},
{
"epoch": 4.5,
"learning_rate": 1.9357416528534216e-05,
"loss": 0.0014,
"step": 57000
},
{
"epoch": 4.54,
"learning_rate": 1.921853421738101e-05,
"loss": 0.0014,
"step": 57500
},
{
"epoch": 4.58,
"learning_rate": 1.90796519062278e-05,
"loss": 0.0013,
"step": 58000
},
{
"epoch": 4.62,
"learning_rate": 1.8940769595074593e-05,
"loss": 0.0013,
"step": 58500
},
{
"epoch": 4.66,
"learning_rate": 1.8801887283921383e-05,
"loss": 0.0013,
"step": 59000
},
{
"epoch": 4.7,
"learning_rate": 1.8663004972768173e-05,
"loss": 0.0013,
"step": 59500
},
{
"epoch": 4.74,
"learning_rate": 1.8524122661614966e-05,
"loss": 0.0014,
"step": 60000
},
{
"epoch": 4.78,
"learning_rate": 1.8385240350461756e-05,
"loss": 0.0013,
"step": 60500
},
{
"epoch": 4.81,
"learning_rate": 1.824635803930855e-05,
"loss": 0.0014,
"step": 61000
},
{
"epoch": 4.85,
"learning_rate": 1.810747572815534e-05,
"loss": 0.0013,
"step": 61500
},
{
"epoch": 4.89,
"learning_rate": 1.796859341700213e-05,
"loss": 0.0014,
"step": 62000
},
{
"epoch": 4.93,
"learning_rate": 1.7829711105848923e-05,
"loss": 0.0013,
"step": 62500
},
{
"epoch": 4.97,
"learning_rate": 1.7690828794695713e-05,
"loss": 0.0013,
"step": 63000
},
{
"epoch": 5.0,
"eval_accuracy": 0.71883976917406,
"eval_f1": 0.8474025833437978,
"eval_f1_macro": 0.28796797014277603,
"eval_f1_weighted": 0.8288548463559504,
"eval_loss": 0.0016964372480288148,
"eval_p": 0.8470321413422668,
"eval_r": 0.8477733495061515,
"eval_roc_auc": 0.9237584040683807,
"eval_runtime": 151.5541,
"eval_samples_per_second": 519.115,
"eval_steps_per_second": 32.45,
"step": 63345
},
{
"epoch": 5.01,
"learning_rate": 1.7551946483542503e-05,
"loss": 0.0013,
"step": 63500
},
{
"epoch": 5.05,
"learning_rate": 1.7413064172389297e-05,
"loss": 0.0012,
"step": 64000
},
{
"epoch": 5.09,
"learning_rate": 1.7274181861236087e-05,
"loss": 0.0012,
"step": 64500
},
{
"epoch": 5.13,
"learning_rate": 1.713529955008288e-05,
"loss": 0.0011,
"step": 65000
},
{
"epoch": 5.17,
"learning_rate": 1.699641723892967e-05,
"loss": 0.0012,
"step": 65500
},
{
"epoch": 5.21,
"learning_rate": 1.685753492777646e-05,
"loss": 0.0012,
"step": 66000
},
{
"epoch": 5.25,
"learning_rate": 1.6718652616623253e-05,
"loss": 0.0012,
"step": 66500
},
{
"epoch": 5.29,
"learning_rate": 1.6579770305470043e-05,
"loss": 0.0012,
"step": 67000
},
{
"epoch": 5.33,
"learning_rate": 1.6440887994316837e-05,
"loss": 0.0012,
"step": 67500
},
{
"epoch": 5.37,
"learning_rate": 1.6302005683163627e-05,
"loss": 0.0012,
"step": 68000
},
{
"epoch": 5.41,
"learning_rate": 1.616312337201042e-05,
"loss": 0.0012,
"step": 68500
},
{
"epoch": 5.45,
"learning_rate": 1.602424106085721e-05,
"loss": 0.0012,
"step": 69000
},
{
"epoch": 5.49,
"learning_rate": 1.5885358749704e-05,
"loss": 0.0012,
"step": 69500
},
{
"epoch": 5.53,
"learning_rate": 1.5746476438550794e-05,
"loss": 0.0011,
"step": 70000
},
{
"epoch": 5.56,
"learning_rate": 1.5607594127397584e-05,
"loss": 0.0012,
"step": 70500
},
{
"epoch": 5.6,
"learning_rate": 1.5468711816244377e-05,
"loss": 0.0012,
"step": 71000
},
{
"epoch": 5.64,
"learning_rate": 1.5329829505091167e-05,
"loss": 0.0012,
"step": 71500
},
{
"epoch": 5.68,
"learning_rate": 1.5190947193937959e-05,
"loss": 0.0012,
"step": 72000
},
{
"epoch": 5.72,
"learning_rate": 1.505206488278475e-05,
"loss": 0.0012,
"step": 72500
},
{
"epoch": 5.76,
"learning_rate": 1.4913182571631542e-05,
"loss": 0.0012,
"step": 73000
},
{
"epoch": 5.8,
"learning_rate": 1.4774300260478334e-05,
"loss": 0.0012,
"step": 73500
},
{
"epoch": 5.84,
"learning_rate": 1.4635417949325124e-05,
"loss": 0.0012,
"step": 74000
},
{
"epoch": 5.88,
"learning_rate": 1.4496535638171915e-05,
"loss": 0.0012,
"step": 74500
},
{
"epoch": 5.92,
"learning_rate": 1.4357653327018707e-05,
"loss": 0.0012,
"step": 75000
},
{
"epoch": 5.96,
"learning_rate": 1.4218771015865499e-05,
"loss": 0.0011,
"step": 75500
},
{
"epoch": 6.0,
"learning_rate": 1.407988870471229e-05,
"loss": 0.0012,
"step": 76000
},
{
"epoch": 6.0,
"eval_accuracy": 0.7229326079772225,
"eval_f1": 0.8492761531099651,
"eval_f1_macro": 0.3068715201994391,
"eval_f1_weighted": 0.8317541964664328,
"eval_loss": 0.0016757699195295572,
"eval_p": 0.8489857055039438,
"eval_r": 0.8495667995148155,
"eval_roc_auc": 0.9246567913434838,
"eval_runtime": 151.532,
"eval_samples_per_second": 519.191,
"eval_steps_per_second": 32.455,
"step": 76014
},
{
"epoch": 6.04,
"learning_rate": 1.3941006393559082e-05,
"loss": 0.001,
"step": 76500
},
{
"epoch": 6.08,
"learning_rate": 1.3802124082405872e-05,
"loss": 0.001,
"step": 77000
},
{
"epoch": 6.12,
"learning_rate": 1.3663241771252664e-05,
"loss": 0.001,
"step": 77500
},
{
"epoch": 6.16,
"learning_rate": 1.3524359460099456e-05,
"loss": 0.001,
"step": 78000
},
{
"epoch": 6.2,
"learning_rate": 1.3385477148946247e-05,
"loss": 0.0011,
"step": 78500
},
{
"epoch": 6.24,
"learning_rate": 1.3246594837793039e-05,
"loss": 0.0011,
"step": 79000
},
{
"epoch": 6.28,
"learning_rate": 1.3107712526639829e-05,
"loss": 0.0011,
"step": 79500
},
{
"epoch": 6.31,
"learning_rate": 1.296883021548662e-05,
"loss": 0.001,
"step": 80000
},
{
"epoch": 6.35,
"learning_rate": 1.2829947904333412e-05,
"loss": 0.0011,
"step": 80500
},
{
"epoch": 6.39,
"learning_rate": 1.2691065593180204e-05,
"loss": 0.001,
"step": 81000
},
{
"epoch": 6.43,
"learning_rate": 1.2552183282026996e-05,
"loss": 0.001,
"step": 81500
},
{
"epoch": 6.47,
"learning_rate": 1.2413300970873786e-05,
"loss": 0.0011,
"step": 82000
},
{
"epoch": 6.51,
"learning_rate": 1.2274418659720577e-05,
"loss": 0.001,
"step": 82500
},
{
"epoch": 6.55,
"learning_rate": 1.213553634856737e-05,
"loss": 0.0011,
"step": 83000
},
{
"epoch": 6.59,
"learning_rate": 1.1996654037414161e-05,
"loss": 0.001,
"step": 83500
},
{
"epoch": 6.63,
"learning_rate": 1.1857771726260953e-05,
"loss": 0.001,
"step": 84000
},
{
"epoch": 6.67,
"learning_rate": 1.1718889415107744e-05,
"loss": 0.001,
"step": 84500
},
{
"epoch": 6.71,
"learning_rate": 1.1580007103954534e-05,
"loss": 0.0011,
"step": 85000
},
{
"epoch": 6.75,
"learning_rate": 1.1441124792801326e-05,
"loss": 0.0011,
"step": 85500
},
{
"epoch": 6.79,
"learning_rate": 1.1302242481648118e-05,
"loss": 0.001,
"step": 86000
},
{
"epoch": 6.83,
"learning_rate": 1.116336017049491e-05,
"loss": 0.001,
"step": 86500
},
{
"epoch": 6.87,
"learning_rate": 1.1024477859341701e-05,
"loss": 0.0011,
"step": 87000
},
{
"epoch": 6.91,
"learning_rate": 1.0885595548188491e-05,
"loss": 0.001,
"step": 87500
},
{
"epoch": 6.95,
"learning_rate": 1.0746713237035283e-05,
"loss": 0.0011,
"step": 88000
},
{
"epoch": 6.99,
"learning_rate": 1.0607830925882074e-05,
"loss": 0.001,
"step": 88500
},
{
"epoch": 7.0,
"eval_accuracy": 0.7276609807560313,
"eval_f1": 0.851625034501794,
"eval_f1_macro": 0.33216946866780483,
"eval_f1_weighted": 0.8360047793004518,
"eval_loss": 0.0016766807530075312,
"eval_p": 0.8478514752859547,
"eval_r": 0.8554323340842142,
"eval_roc_auc": 0.9275875551926686,
"eval_runtime": 149.9874,
"eval_samples_per_second": 524.538,
"eval_steps_per_second": 32.789,
"step": 88683
},
{
"epoch": 7.03,
"learning_rate": 1.0468948614728866e-05,
"loss": 0.001,
"step": 89000
},
{
"epoch": 7.06,
"learning_rate": 1.0330066303575658e-05,
"loss": 0.0009,
"step": 89500
},
{
"epoch": 7.1,
"learning_rate": 1.019118399242245e-05,
"loss": 0.0009,
"step": 90000
},
{
"epoch": 7.14,
"learning_rate": 1.005230168126924e-05,
"loss": 0.0009,
"step": 90500
},
{
"epoch": 7.18,
"learning_rate": 9.913419370116031e-06,
"loss": 0.0009,
"step": 91000
},
{
"epoch": 7.22,
"learning_rate": 9.774537058962823e-06,
"loss": 0.0009,
"step": 91500
},
{
"epoch": 7.26,
"learning_rate": 9.635654747809615e-06,
"loss": 0.0009,
"step": 92000
},
{
"epoch": 7.3,
"learning_rate": 9.496772436656406e-06,
"loss": 0.0009,
"step": 92500
},
{
"epoch": 7.34,
"learning_rate": 9.357890125503196e-06,
"loss": 0.0009,
"step": 93000
},
{
"epoch": 7.38,
"learning_rate": 9.219007814349988e-06,
"loss": 0.0009,
"step": 93500
},
{
"epoch": 7.42,
"learning_rate": 9.08012550319678e-06,
"loss": 0.0009,
"step": 94000
},
{
"epoch": 7.46,
"learning_rate": 8.941243192043571e-06,
"loss": 0.0009,
"step": 94500
},
{
"epoch": 7.5,
"learning_rate": 8.802360880890363e-06,
"loss": 0.0009,
"step": 95000
},
{
"epoch": 7.54,
"learning_rate": 8.663478569737153e-06,
"loss": 0.0009,
"step": 95500
},
{
"epoch": 7.58,
"learning_rate": 8.524596258583945e-06,
"loss": 0.0009,
"step": 96000
},
{
"epoch": 7.62,
"learning_rate": 8.385713947430736e-06,
"loss": 0.0009,
"step": 96500
},
{
"epoch": 7.66,
"learning_rate": 8.246831636277528e-06,
"loss": 0.0009,
"step": 97000
},
{
"epoch": 7.7,
"learning_rate": 8.107949325124318e-06,
"loss": 0.0009,
"step": 97500
},
{
"epoch": 7.74,
"learning_rate": 7.96906701397111e-06,
"loss": 0.0009,
"step": 98000
},
{
"epoch": 7.77,
"learning_rate": 7.830184702817902e-06,
"loss": 0.0009,
"step": 98500
},
{
"epoch": 7.81,
"learning_rate": 7.691302391664693e-06,
"loss": 0.001,
"step": 99000
},
{
"epoch": 7.85,
"learning_rate": 7.552420080511485e-06,
"loss": 0.001,
"step": 99500
},
{
"epoch": 7.89,
"learning_rate": 7.413537769358277e-06,
"loss": 0.0009,
"step": 100000
},
{
"epoch": 7.93,
"learning_rate": 7.2746554582050675e-06,
"loss": 0.001,
"step": 100500
},
{
"epoch": 7.97,
"learning_rate": 7.135773147051859e-06,
"loss": 0.0009,
"step": 101000
},
{
"epoch": 8.0,
"eval_accuracy": 0.7294913186058927,
"eval_f1": 0.8529003679860734,
"eval_f1_macro": 0.34351140192299934,
"eval_f1_weighted": 0.8387988374835376,
"eval_loss": 0.001688135787844658,
"eval_p": 0.8483892536904006,
"eval_r": 0.8574597123548778,
"eval_roc_auc": 0.928601476610379,
"eval_runtime": 152.9938,
"eval_samples_per_second": 514.23,
"eval_steps_per_second": 32.145,
"step": 101352
},
{
"epoch": 8.01,
"learning_rate": 6.99689083589865e-06,
"loss": 0.0009,
"step": 101500
},
{
"epoch": 8.05,
"learning_rate": 6.858008524745442e-06,
"loss": 0.0008,
"step": 102000
},
{
"epoch": 8.09,
"learning_rate": 6.7191262135922334e-06,
"loss": 0.0008,
"step": 102500
},
{
"epoch": 8.13,
"learning_rate": 6.580243902439024e-06,
"loss": 0.0008,
"step": 103000
},
{
"epoch": 8.17,
"learning_rate": 6.441361591285816e-06,
"loss": 0.0008,
"step": 103500
},
{
"epoch": 8.21,
"learning_rate": 6.302479280132608e-06,
"loss": 0.0009,
"step": 104000
},
{
"epoch": 8.25,
"learning_rate": 6.1635969689793985e-06,
"loss": 0.0008,
"step": 104500
},
{
"epoch": 8.29,
"learning_rate": 6.02471465782619e-06,
"loss": 0.0009,
"step": 105000
},
{
"epoch": 8.33,
"learning_rate": 5.885832346672981e-06,
"loss": 0.0008,
"step": 105500
},
{
"epoch": 8.37,
"learning_rate": 5.746950035519773e-06,
"loss": 0.0008,
"step": 106000
},
{
"epoch": 8.41,
"learning_rate": 5.6080677243665645e-06,
"loss": 0.0008,
"step": 106500
},
{
"epoch": 8.45,
"learning_rate": 5.469185413213355e-06,
"loss": 0.0008,
"step": 107000
},
{
"epoch": 8.49,
"learning_rate": 5.330303102060147e-06,
"loss": 0.0008,
"step": 107500
},
{
"epoch": 8.52,
"learning_rate": 5.191420790906939e-06,
"loss": 0.0009,
"step": 108000
},
{
"epoch": 8.56,
"learning_rate": 5.0525384797537296e-06,
"loss": 0.0008,
"step": 108500
},
{
"epoch": 8.6,
"learning_rate": 4.913656168600521e-06,
"loss": 0.0009,
"step": 109000
},
{
"epoch": 8.64,
"learning_rate": 4.774773857447313e-06,
"loss": 0.0008,
"step": 109500
},
{
"epoch": 8.68,
"learning_rate": 4.635891546294104e-06,
"loss": 0.0008,
"step": 110000
},
{
"epoch": 8.72,
"learning_rate": 4.4970092351408955e-06,
"loss": 0.0009,
"step": 110500
},
{
"epoch": 8.76,
"learning_rate": 4.358126923987686e-06,
"loss": 0.0008,
"step": 111000
},
{
"epoch": 8.8,
"learning_rate": 4.219244612834478e-06,
"loss": 0.0008,
"step": 111500
},
{
"epoch": 8.84,
"learning_rate": 4.080362301681269e-06,
"loss": 0.0008,
"step": 112000
},
{
"epoch": 8.88,
"learning_rate": 3.941479990528061e-06,
"loss": 0.0009,
"step": 112500
},
{
"epoch": 8.92,
"learning_rate": 3.8025976793748523e-06,
"loss": 0.0008,
"step": 113000
},
{
"epoch": 8.96,
"learning_rate": 3.6637153682216436e-06,
"loss": 0.0008,
"step": 113500
},
{
"epoch": 9.0,
"learning_rate": 3.524833057068435e-06,
"loss": 0.0008,
"step": 114000
},
{
"epoch": 9.0,
"eval_accuracy": 0.7333426544983095,
"eval_f1": 0.8538806684223458,
"eval_f1_macro": 0.35458058741266574,
"eval_f1_weighted": 0.8401736902228668,
"eval_loss": 0.0016993152676150203,
"eval_p": 0.8495998764892057,
"eval_r": 0.8582048171893952,
"eval_roc_auc": 0.9289751251101112,
"eval_runtime": 150.295,
"eval_samples_per_second": 523.464,
"eval_steps_per_second": 32.722,
"step": 114021
}
],
"max_steps": 126690,
"num_train_epochs": 10,
"total_flos": 7.245026294480845e+16,
"trial_name": null,
"trial_params": null
}