|
{ |
|
"best_metric": 0.8538806684223458, |
|
"best_model_checkpoint": "output/ipzs-sg-bert_xxl-bs-16/checkpoint-114021", |
|
"epoch": 9.0, |
|
"global_step": 114021, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.505111768884679e-05, |
|
"loss": 0.0964, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.491223537769359e-05, |
|
"loss": 0.0133, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.4773353066540374e-05, |
|
"loss": 0.0109, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.463447075538717e-05, |
|
"loss": 0.0102, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.4495588444233954e-05, |
|
"loss": 0.0099, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.435670613308075e-05, |
|
"loss": 0.0098, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.421782382192754e-05, |
|
"loss": 0.0096, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.407894151077433e-05, |
|
"loss": 0.0088, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.394005919962113e-05, |
|
"loss": 0.0074, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3801176888467914e-05, |
|
"loss": 0.0064, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.36622945773147e-05, |
|
"loss": 0.0057, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.35234122661615e-05, |
|
"loss": 0.005, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.338452995500829e-05, |
|
"loss": 0.0048, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.324564764385508e-05, |
|
"loss": 0.0042, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.310676533270187e-05, |
|
"loss": 0.0039, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.296788302154867e-05, |
|
"loss": 0.0038, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.2829000710395454e-05, |
|
"loss": 0.0036, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.269011839924224e-05, |
|
"loss": 0.0036, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.255123608808904e-05, |
|
"loss": 0.0033, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.241235377693583e-05, |
|
"loss": 0.0033, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.227347146578262e-05, |
|
"loss": 0.0031, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.2134589154629414e-05, |
|
"loss": 0.003, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.19957068434762e-05, |
|
"loss": 0.0029, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.1856824532322994e-05, |
|
"loss": 0.0029, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.171794222116978e-05, |
|
"loss": 0.0028, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6213869893484506, |
|
"eval_f1": 0.7871180162568165, |
|
"eval_f1_macro": 0.09915671829069983, |
|
"eval_f1_weighted": 0.7187367346051451, |
|
"eval_loss": 0.00270162639208138, |
|
"eval_p": 0.8552003089493689, |
|
"eval_r": 0.7290764165655865, |
|
"eval_roc_auc": 0.8644347845537721, |
|
"eval_runtime": 154.8297, |
|
"eval_samples_per_second": 508.133, |
|
"eval_steps_per_second": 31.764, |
|
"step": 12669 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.157905991001658e-05, |
|
"loss": 0.0027, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.144017759886337e-05, |
|
"loss": 0.0026, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.1301295287710154e-05, |
|
"loss": 0.0026, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.1162412976556954e-05, |
|
"loss": 0.0026, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.102353066540374e-05, |
|
"loss": 0.0025, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.0884648354250534e-05, |
|
"loss": 0.0025, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.074576604309732e-05, |
|
"loss": 0.0025, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.0606883731944114e-05, |
|
"loss": 0.0025, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0468001420790908e-05, |
|
"loss": 0.0024, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.0329119109637698e-05, |
|
"loss": 0.0023, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.019023679848449e-05, |
|
"loss": 0.0023, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.005135448733128e-05, |
|
"loss": 0.0022, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.991247217617807e-05, |
|
"loss": 0.0024, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.9773589865024864e-05, |
|
"loss": 0.0023, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.9634707553871654e-05, |
|
"loss": 0.0023, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.9495825242718448e-05, |
|
"loss": 0.0022, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.9356942931565238e-05, |
|
"loss": 0.0022, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.921806062041203e-05, |
|
"loss": 0.0022, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.907917830925882e-05, |
|
"loss": 0.0021, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.894029599810561e-05, |
|
"loss": 0.0021, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.8801413686952405e-05, |
|
"loss": 0.0021, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.8662531375799195e-05, |
|
"loss": 0.0021, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.8523649064645988e-05, |
|
"loss": 0.0021, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.8384766753492778e-05, |
|
"loss": 0.0021, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.8245884442339568e-05, |
|
"loss": 0.0021, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6822711442153697, |
|
"eval_f1": 0.8278927653585252, |
|
"eval_f1_macro": 0.17721735370294905, |
|
"eval_f1_weighted": 0.7911965688289363, |
|
"eval_loss": 0.002050888491794467, |
|
"eval_p": 0.8518841031375748, |
|
"eval_r": 0.8052157338416219, |
|
"eval_roc_auc": 0.9024905715784995, |
|
"eval_runtime": 150.3062, |
|
"eval_samples_per_second": 523.425, |
|
"eval_steps_per_second": 32.72, |
|
"step": 25338 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.810700213118636e-05, |
|
"loss": 0.002, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.796811982003315e-05, |
|
"loss": 0.002, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.7829237508879945e-05, |
|
"loss": 0.0019, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.7690355197726735e-05, |
|
"loss": 0.0019, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.7551472886573525e-05, |
|
"loss": 0.0018, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.7412590575420318e-05, |
|
"loss": 0.0019, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.7273708264267108e-05, |
|
"loss": 0.0019, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.71348259531139e-05, |
|
"loss": 0.0018, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.699594364196069e-05, |
|
"loss": 0.0019, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.685706133080748e-05, |
|
"loss": 0.0018, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.6718179019654275e-05, |
|
"loss": 0.0017, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.6579296708501065e-05, |
|
"loss": 0.0018, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.644041439734786e-05, |
|
"loss": 0.0018, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.630153208619465e-05, |
|
"loss": 0.0018, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.616264977504144e-05, |
|
"loss": 0.0018, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.6023767463888232e-05, |
|
"loss": 0.0018, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.5884885152735022e-05, |
|
"loss": 0.0017, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.5746002841581815e-05, |
|
"loss": 0.0017, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.5607120530428605e-05, |
|
"loss": 0.0017, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.5468238219275395e-05, |
|
"loss": 0.0017, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.532935590812219e-05, |
|
"loss": 0.0018, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.519047359696898e-05, |
|
"loss": 0.0018, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.5051591285815772e-05, |
|
"loss": 0.0017, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.4912708974662562e-05, |
|
"loss": 0.0018, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.4773826663509355e-05, |
|
"loss": 0.0018, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.4634944352356145e-05, |
|
"loss": 0.0018, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6974095635152655, |
|
"eval_f1": 0.8369747009165567, |
|
"eval_f1_macro": 0.21887278841705338, |
|
"eval_f1_weighted": 0.8081134454474668, |
|
"eval_loss": 0.001851799781434238, |
|
"eval_p": 0.8528991177184794, |
|
"eval_r": 0.8216340322301161, |
|
"eval_roc_auc": 0.910698290784354, |
|
"eval_runtime": 150.2603, |
|
"eval_samples_per_second": 523.585, |
|
"eval_steps_per_second": 32.73, |
|
"step": 38007 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 2.4496062041202935e-05, |
|
"loss": 0.0016, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 2.435717973004973e-05, |
|
"loss": 0.0015, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 2.421829741889652e-05, |
|
"loss": 0.0016, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 2.4079415107743312e-05, |
|
"loss": 0.0016, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.3940532796590102e-05, |
|
"loss": 0.0015, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 2.3801650485436892e-05, |
|
"loss": 0.0016, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.3662768174283686e-05, |
|
"loss": 0.0015, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 2.3523885863130476e-05, |
|
"loss": 0.0015, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 2.338500355197727e-05, |
|
"loss": 0.0015, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 2.324612124082406e-05, |
|
"loss": 0.0016, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 2.310723892967085e-05, |
|
"loss": 0.0016, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 2.2968356618517642e-05, |
|
"loss": 0.0016, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 2.2829474307364432e-05, |
|
"loss": 0.0015, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 2.2690591996211226e-05, |
|
"loss": 0.0015, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.2551709685058016e-05, |
|
"loss": 0.0016, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.2412827373904806e-05, |
|
"loss": 0.0015, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2.22739450627516e-05, |
|
"loss": 0.0015, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.213506275159839e-05, |
|
"loss": 0.0015, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2.1996180440445182e-05, |
|
"loss": 0.0015, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 2.1857298129291972e-05, |
|
"loss": 0.0015, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.1718415818138762e-05, |
|
"loss": 0.0015, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.1579533506985556e-05, |
|
"loss": 0.0015, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.1440651195832346e-05, |
|
"loss": 0.0015, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 2.130176888467914e-05, |
|
"loss": 0.0015, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 2.116288657352593e-05, |
|
"loss": 0.0015, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.710870173119455, |
|
"eval_f1": 0.8427627441351563, |
|
"eval_f1_macro": 0.276326750740236, |
|
"eval_f1_weighted": 0.8243063205078164, |
|
"eval_loss": 0.0018125491915270686, |
|
"eval_p": 0.8380480784058393, |
|
"eval_r": 0.8475307572344482, |
|
"eval_roc_auc": 0.923628157802133, |
|
"eval_runtime": 150.1179, |
|
"eval_samples_per_second": 524.082, |
|
"eval_steps_per_second": 32.761, |
|
"step": 50676 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 2.1024004262372723e-05, |
|
"loss": 0.0014, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.0885121951219513e-05, |
|
"loss": 0.0014, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 2.0746239640066303e-05, |
|
"loss": 0.0013, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 2.0607357328913096e-05, |
|
"loss": 0.0013, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 2.0468475017759886e-05, |
|
"loss": 0.0013, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 2.032959270660668e-05, |
|
"loss": 0.0013, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.019071039545347e-05, |
|
"loss": 0.0013, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 2.005182808430026e-05, |
|
"loss": 0.0013, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 1.9912945773147053e-05, |
|
"loss": 0.0013, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.9774063461993843e-05, |
|
"loss": 0.0013, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.9635181150840636e-05, |
|
"loss": 0.0014, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.9496298839687426e-05, |
|
"loss": 0.0013, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.9357416528534216e-05, |
|
"loss": 0.0014, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.921853421738101e-05, |
|
"loss": 0.0014, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 1.90796519062278e-05, |
|
"loss": 0.0013, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 1.8940769595074593e-05, |
|
"loss": 0.0013, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 1.8801887283921383e-05, |
|
"loss": 0.0013, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.8663004972768173e-05, |
|
"loss": 0.0013, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 1.8524122661614966e-05, |
|
"loss": 0.0014, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 1.8385240350461756e-05, |
|
"loss": 0.0013, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.824635803930855e-05, |
|
"loss": 0.0014, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.810747572815534e-05, |
|
"loss": 0.0013, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 1.796859341700213e-05, |
|
"loss": 0.0014, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.7829711105848923e-05, |
|
"loss": 0.0013, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 1.7690828794695713e-05, |
|
"loss": 0.0013, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.71883976917406, |
|
"eval_f1": 0.8474025833437978, |
|
"eval_f1_macro": 0.28796797014277603, |
|
"eval_f1_weighted": 0.8288548463559504, |
|
"eval_loss": 0.0016964372480288148, |
|
"eval_p": 0.8470321413422668, |
|
"eval_r": 0.8477733495061515, |
|
"eval_roc_auc": 0.9237584040683807, |
|
"eval_runtime": 151.5541, |
|
"eval_samples_per_second": 519.115, |
|
"eval_steps_per_second": 32.45, |
|
"step": 63345 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 1.7551946483542503e-05, |
|
"loss": 0.0013, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 1.7413064172389297e-05, |
|
"loss": 0.0012, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 1.7274181861236087e-05, |
|
"loss": 0.0012, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 1.713529955008288e-05, |
|
"loss": 0.0011, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 1.699641723892967e-05, |
|
"loss": 0.0012, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 1.685753492777646e-05, |
|
"loss": 0.0012, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 1.6718652616623253e-05, |
|
"loss": 0.0012, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 1.6579770305470043e-05, |
|
"loss": 0.0012, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 1.6440887994316837e-05, |
|
"loss": 0.0012, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 1.6302005683163627e-05, |
|
"loss": 0.0012, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 1.616312337201042e-05, |
|
"loss": 0.0012, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 1.602424106085721e-05, |
|
"loss": 0.0012, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 1.5885358749704e-05, |
|
"loss": 0.0012, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 1.5746476438550794e-05, |
|
"loss": 0.0011, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 1.5607594127397584e-05, |
|
"loss": 0.0012, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 1.5468711816244377e-05, |
|
"loss": 0.0012, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 1.5329829505091167e-05, |
|
"loss": 0.0012, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 1.5190947193937959e-05, |
|
"loss": 0.0012, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 1.505206488278475e-05, |
|
"loss": 0.0012, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 1.4913182571631542e-05, |
|
"loss": 0.0012, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 1.4774300260478334e-05, |
|
"loss": 0.0012, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 1.4635417949325124e-05, |
|
"loss": 0.0012, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 1.4496535638171915e-05, |
|
"loss": 0.0012, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 1.4357653327018707e-05, |
|
"loss": 0.0012, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 1.4218771015865499e-05, |
|
"loss": 0.0011, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.407988870471229e-05, |
|
"loss": 0.0012, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7229326079772225, |
|
"eval_f1": 0.8492761531099651, |
|
"eval_f1_macro": 0.3068715201994391, |
|
"eval_f1_weighted": 0.8317541964664328, |
|
"eval_loss": 0.0016757699195295572, |
|
"eval_p": 0.8489857055039438, |
|
"eval_r": 0.8495667995148155, |
|
"eval_roc_auc": 0.9246567913434838, |
|
"eval_runtime": 151.532, |
|
"eval_samples_per_second": 519.191, |
|
"eval_steps_per_second": 32.455, |
|
"step": 76014 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 1.3941006393559082e-05, |
|
"loss": 0.001, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.3802124082405872e-05, |
|
"loss": 0.001, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 1.3663241771252664e-05, |
|
"loss": 0.001, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 1.3524359460099456e-05, |
|
"loss": 0.001, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 1.3385477148946247e-05, |
|
"loss": 0.0011, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 1.3246594837793039e-05, |
|
"loss": 0.0011, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 1.3107712526639829e-05, |
|
"loss": 0.0011, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 1.296883021548662e-05, |
|
"loss": 0.001, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 1.2829947904333412e-05, |
|
"loss": 0.0011, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 1.2691065593180204e-05, |
|
"loss": 0.001, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 1.2552183282026996e-05, |
|
"loss": 0.001, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 1.2413300970873786e-05, |
|
"loss": 0.0011, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 1.2274418659720577e-05, |
|
"loss": 0.001, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 1.213553634856737e-05, |
|
"loss": 0.0011, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 1.1996654037414161e-05, |
|
"loss": 0.001, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 1.1857771726260953e-05, |
|
"loss": 0.001, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.1718889415107744e-05, |
|
"loss": 0.001, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 1.1580007103954534e-05, |
|
"loss": 0.0011, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 1.1441124792801326e-05, |
|
"loss": 0.0011, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 1.1302242481648118e-05, |
|
"loss": 0.001, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 1.116336017049491e-05, |
|
"loss": 0.001, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 1.1024477859341701e-05, |
|
"loss": 0.0011, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 1.0885595548188491e-05, |
|
"loss": 0.001, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 1.0746713237035283e-05, |
|
"loss": 0.0011, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 1.0607830925882074e-05, |
|
"loss": 0.001, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7276609807560313, |
|
"eval_f1": 0.851625034501794, |
|
"eval_f1_macro": 0.33216946866780483, |
|
"eval_f1_weighted": 0.8360047793004518, |
|
"eval_loss": 0.0016766807530075312, |
|
"eval_p": 0.8478514752859547, |
|
"eval_r": 0.8554323340842142, |
|
"eval_roc_auc": 0.9275875551926686, |
|
"eval_runtime": 149.9874, |
|
"eval_samples_per_second": 524.538, |
|
"eval_steps_per_second": 32.789, |
|
"step": 88683 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.0468948614728866e-05, |
|
"loss": 0.001, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.0330066303575658e-05, |
|
"loss": 0.0009, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 1.019118399242245e-05, |
|
"loss": 0.0009, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.005230168126924e-05, |
|
"loss": 0.0009, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 9.913419370116031e-06, |
|
"loss": 0.0009, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 9.774537058962823e-06, |
|
"loss": 0.0009, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 9.635654747809615e-06, |
|
"loss": 0.0009, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 9.496772436656406e-06, |
|
"loss": 0.0009, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 9.357890125503196e-06, |
|
"loss": 0.0009, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 9.219007814349988e-06, |
|
"loss": 0.0009, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 9.08012550319678e-06, |
|
"loss": 0.0009, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 8.941243192043571e-06, |
|
"loss": 0.0009, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 8.802360880890363e-06, |
|
"loss": 0.0009, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 8.663478569737153e-06, |
|
"loss": 0.0009, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 8.524596258583945e-06, |
|
"loss": 0.0009, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 8.385713947430736e-06, |
|
"loss": 0.0009, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 8.246831636277528e-06, |
|
"loss": 0.0009, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 8.107949325124318e-06, |
|
"loss": 0.0009, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 7.96906701397111e-06, |
|
"loss": 0.0009, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 7.830184702817902e-06, |
|
"loss": 0.0009, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 7.691302391664693e-06, |
|
"loss": 0.001, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 7.552420080511485e-06, |
|
"loss": 0.001, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 7.413537769358277e-06, |
|
"loss": 0.0009, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 7.2746554582050675e-06, |
|
"loss": 0.001, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 7.135773147051859e-06, |
|
"loss": 0.0009, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7294913186058927, |
|
"eval_f1": 0.8529003679860734, |
|
"eval_f1_macro": 0.34351140192299934, |
|
"eval_f1_weighted": 0.8387988374835376, |
|
"eval_loss": 0.001688135787844658, |
|
"eval_p": 0.8483892536904006, |
|
"eval_r": 0.8574597123548778, |
|
"eval_roc_auc": 0.928601476610379, |
|
"eval_runtime": 152.9938, |
|
"eval_samples_per_second": 514.23, |
|
"eval_steps_per_second": 32.145, |
|
"step": 101352 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 6.99689083589865e-06, |
|
"loss": 0.0009, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 6.858008524745442e-06, |
|
"loss": 0.0008, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 6.7191262135922334e-06, |
|
"loss": 0.0008, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 6.580243902439024e-06, |
|
"loss": 0.0008, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 6.441361591285816e-06, |
|
"loss": 0.0008, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 6.302479280132608e-06, |
|
"loss": 0.0009, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 6.1635969689793985e-06, |
|
"loss": 0.0008, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 6.02471465782619e-06, |
|
"loss": 0.0009, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 5.885832346672981e-06, |
|
"loss": 0.0008, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 5.746950035519773e-06, |
|
"loss": 0.0008, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 5.6080677243665645e-06, |
|
"loss": 0.0008, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 5.469185413213355e-06, |
|
"loss": 0.0008, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 5.330303102060147e-06, |
|
"loss": 0.0008, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 5.191420790906939e-06, |
|
"loss": 0.0009, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 5.0525384797537296e-06, |
|
"loss": 0.0008, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 4.913656168600521e-06, |
|
"loss": 0.0009, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 4.774773857447313e-06, |
|
"loss": 0.0008, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 4.635891546294104e-06, |
|
"loss": 0.0008, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 4.4970092351408955e-06, |
|
"loss": 0.0009, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 4.358126923987686e-06, |
|
"loss": 0.0008, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 4.219244612834478e-06, |
|
"loss": 0.0008, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 4.080362301681269e-06, |
|
"loss": 0.0008, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 3.941479990528061e-06, |
|
"loss": 0.0009, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 3.8025976793748523e-06, |
|
"loss": 0.0008, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 3.6637153682216436e-06, |
|
"loss": 0.0008, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 3.524833057068435e-06, |
|
"loss": 0.0008, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7333426544983095, |
|
"eval_f1": 0.8538806684223458, |
|
"eval_f1_macro": 0.35458058741266574, |
|
"eval_f1_weighted": 0.8401736902228668, |
|
"eval_loss": 0.0016993152676150203, |
|
"eval_p": 0.8495998764892057, |
|
"eval_r": 0.8582048171893952, |
|
"eval_roc_auc": 0.9289751251101112, |
|
"eval_runtime": 150.295, |
|
"eval_samples_per_second": 523.464, |
|
"eval_steps_per_second": 32.722, |
|
"step": 114021 |
|
} |
|
], |
|
"max_steps": 126690, |
|
"num_train_epochs": 10, |
|
"total_flos": 7.245026294480845e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|