|
{ |
|
"best_metric": 0.12031927704811096, |
|
"best_model_checkpoint": "Joseph-large-2024_09_16-batch-size32_epochs150_freeze/checkpoint-22113", |
|
"epoch": 91.0, |
|
"eval_steps": 500, |
|
"global_step": 24843, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.21656271656271656, |
|
"eval_f1_macro": 0.5384503258991854, |
|
"eval_f1_micro": 0.7477812526413659, |
|
"eval_loss": 0.17758780717849731, |
|
"eval_roc_auc": 0.8364480125638629, |
|
"eval_runtime": 594.1704, |
|
"eval_samples_per_second": 4.857, |
|
"eval_steps_per_second": 0.153, |
|
"learning_rate": 0.001, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.8315018315018317, |
|
"grad_norm": 0.2863590717315674, |
|
"learning_rate": 0.001, |
|
"loss": 0.2726, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.24532224532224534, |
|
"eval_f1_macro": 0.5760774961516321, |
|
"eval_f1_micro": 0.7697450182129848, |
|
"eval_loss": 0.153945192694664, |
|
"eval_roc_auc": 0.8447603712499155, |
|
"eval_runtime": 583.9253, |
|
"eval_samples_per_second": 4.942, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.001, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.2515592515592516, |
|
"eval_f1_macro": 0.6098114408992151, |
|
"eval_f1_micro": 0.7744839226208509, |
|
"eval_loss": 0.14735348522663116, |
|
"eval_roc_auc": 0.8447204921454395, |
|
"eval_runtime": 592.1092, |
|
"eval_samples_per_second": 4.874, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.001, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 3.663003663003663, |
|
"grad_norm": 0.24709707498550415, |
|
"learning_rate": 0.001, |
|
"loss": 0.1701, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.25363825363825365, |
|
"eval_f1_macro": 0.6213514572326843, |
|
"eval_f1_micro": 0.7738915615654661, |
|
"eval_loss": 0.14645476639270782, |
|
"eval_roc_auc": 0.8440185501551167, |
|
"eval_runtime": 589.7047, |
|
"eval_samples_per_second": 4.894, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.001, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.25017325017325015, |
|
"eval_f1_macro": 0.6353051230272125, |
|
"eval_f1_micro": 0.78146492434663, |
|
"eval_loss": 0.14515458047389984, |
|
"eval_roc_auc": 0.850253359073442, |
|
"eval_runtime": 596.3244, |
|
"eval_samples_per_second": 4.84, |
|
"eval_steps_per_second": 0.153, |
|
"learning_rate": 0.001, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 5.4945054945054945, |
|
"grad_norm": 0.24183644354343414, |
|
"learning_rate": 0.001, |
|
"loss": 0.1622, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.2577962577962578, |
|
"eval_f1_macro": 0.6141782571643486, |
|
"eval_f1_micro": 0.781259480778399, |
|
"eval_loss": 0.1445809006690979, |
|
"eval_roc_auc": 0.8479418733548026, |
|
"eval_runtime": 590.8762, |
|
"eval_samples_per_second": 4.884, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.001, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.26195426195426197, |
|
"eval_f1_macro": 0.6232727577909734, |
|
"eval_f1_micro": 0.7800943800943801, |
|
"eval_loss": 0.14445114135742188, |
|
"eval_roc_auc": 0.8500464116049401, |
|
"eval_runtime": 601.2858, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 0.151, |
|
"learning_rate": 0.001, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 7.326007326007326, |
|
"grad_norm": 0.2069859653711319, |
|
"learning_rate": 0.001, |
|
"loss": 0.159, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.25848925848925847, |
|
"eval_f1_macro": 0.6339480584029394, |
|
"eval_f1_micro": 0.7879197465681098, |
|
"eval_loss": 0.14366209506988525, |
|
"eval_roc_auc": 0.8585186757078976, |
|
"eval_runtime": 584.2361, |
|
"eval_samples_per_second": 4.94, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.001, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.2577962577962578, |
|
"eval_f1_macro": 0.6442804243684905, |
|
"eval_f1_micro": 0.785476860138072, |
|
"eval_loss": 0.1447097659111023, |
|
"eval_roc_auc": 0.8547549628224266, |
|
"eval_runtime": 597.754, |
|
"eval_samples_per_second": 4.828, |
|
"eval_steps_per_second": 0.152, |
|
"learning_rate": 0.001, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 9.157509157509157, |
|
"grad_norm": 0.2012098729610443, |
|
"learning_rate": 0.001, |
|
"loss": 0.1563, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.2442827442827443, |
|
"eval_f1_macro": 0.6149084687726756, |
|
"eval_f1_micro": 0.7683399403144626, |
|
"eval_loss": 0.1538563072681427, |
|
"eval_roc_auc": 0.8340795854397406, |
|
"eval_runtime": 582.8083, |
|
"eval_samples_per_second": 4.952, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.001, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 10.989010989010989, |
|
"grad_norm": 0.1713365614414215, |
|
"learning_rate": 0.001, |
|
"loss": 0.1558, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.26334026334026334, |
|
"eval_f1_macro": 0.6334773464226039, |
|
"eval_f1_micro": 0.7896514859952961, |
|
"eval_loss": 0.1389196366071701, |
|
"eval_roc_auc": 0.8560665869710553, |
|
"eval_runtime": 581.0367, |
|
"eval_samples_per_second": 4.967, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.001, |
|
"step": 3003 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.26403326403326405, |
|
"eval_f1_macro": 0.6406158966836866, |
|
"eval_f1_micro": 0.7908438442264407, |
|
"eval_loss": 0.1395249217748642, |
|
"eval_roc_auc": 0.8586128939486779, |
|
"eval_runtime": 577.8577, |
|
"eval_samples_per_second": 4.994, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.001, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 12.820512820512821, |
|
"grad_norm": 0.1511967033147812, |
|
"learning_rate": 0.001, |
|
"loss": 0.155, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.26507276507276506, |
|
"eval_f1_macro": 0.6557265830014797, |
|
"eval_f1_micro": 0.7893533497260687, |
|
"eval_loss": 0.1390257179737091, |
|
"eval_roc_auc": 0.8535165777960444, |
|
"eval_runtime": 583.6052, |
|
"eval_samples_per_second": 4.945, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.001, |
|
"step": 3549 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.2623007623007623, |
|
"eval_f1_macro": 0.640540413256037, |
|
"eval_f1_micro": 0.787792943600309, |
|
"eval_loss": 0.13910652697086334, |
|
"eval_roc_auc": 0.8540478028799876, |
|
"eval_runtime": 575.9469, |
|
"eval_samples_per_second": 5.011, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 14.652014652014651, |
|
"grad_norm": 0.17730183899402618, |
|
"learning_rate": 0.001, |
|
"loss": 0.154, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.253984753984754, |
|
"eval_f1_macro": 0.6406412255611948, |
|
"eval_f1_micro": 0.7885381419454319, |
|
"eval_loss": 0.13990363478660583, |
|
"eval_roc_auc": 0.8550214137243141, |
|
"eval_runtime": 589.3337, |
|
"eval_samples_per_second": 4.897, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.001, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.2668052668052668, |
|
"eval_f1_macro": 0.6374513053376879, |
|
"eval_f1_micro": 0.7847859161051945, |
|
"eval_loss": 0.13938209414482117, |
|
"eval_roc_auc": 0.8490304535245456, |
|
"eval_runtime": 577.6041, |
|
"eval_samples_per_second": 4.997, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 16.483516483516482, |
|
"grad_norm": 0.16627363860607147, |
|
"learning_rate": 0.001, |
|
"loss": 0.1527, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.24185724185724186, |
|
"eval_f1_macro": 0.6424904129432089, |
|
"eval_f1_micro": 0.7857319587628866, |
|
"eval_loss": 0.15936270356178284, |
|
"eval_roc_auc": 0.8640092624686252, |
|
"eval_runtime": 582.0471, |
|
"eval_samples_per_second": 4.958, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.001, |
|
"step": 4641 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.27546777546777546, |
|
"eval_f1_macro": 0.6768028620378452, |
|
"eval_f1_micro": 0.8036556603773585, |
|
"eval_loss": 0.13188092410564423, |
|
"eval_roc_auc": 0.8679473273890709, |
|
"eval_runtime": 578.2193, |
|
"eval_samples_per_second": 4.991, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 18.315018315018314, |
|
"grad_norm": 0.17489519715309143, |
|
"learning_rate": 0.0001, |
|
"loss": 0.149, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.27893277893277896, |
|
"eval_f1_macro": 0.6715138701269487, |
|
"eval_f1_micro": 0.8038422649140546, |
|
"eval_loss": 0.13244545459747314, |
|
"eval_roc_auc": 0.8679532160291196, |
|
"eval_runtime": 573.231, |
|
"eval_samples_per_second": 5.035, |
|
"eval_steps_per_second": 0.159, |
|
"learning_rate": 0.0001, |
|
"step": 5187 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.27893277893277896, |
|
"eval_f1_macro": 0.6733647561041333, |
|
"eval_f1_micro": 0.8066104665720725, |
|
"eval_loss": 0.1306440383195877, |
|
"eval_roc_auc": 0.8722063701403293, |
|
"eval_runtime": 585.8615, |
|
"eval_samples_per_second": 4.926, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 20.146520146520146, |
|
"grad_norm": 0.15949666500091553, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1412, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.2817047817047817, |
|
"eval_f1_macro": 0.6728395801753237, |
|
"eval_f1_micro": 0.8037271837637748, |
|
"eval_loss": 0.1302667111158371, |
|
"eval_roc_auc": 0.8650841471907833, |
|
"eval_runtime": 573.2529, |
|
"eval_samples_per_second": 5.034, |
|
"eval_steps_per_second": 0.159, |
|
"learning_rate": 0.0001, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 21.978021978021978, |
|
"grad_norm": 0.1902228742837906, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1385, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.2841302841302841, |
|
"eval_f1_macro": 0.6735047356746011, |
|
"eval_f1_micro": 0.8074214632089395, |
|
"eval_loss": 0.12870918214321136, |
|
"eval_roc_auc": 0.869659164440877, |
|
"eval_runtime": 581.2149, |
|
"eval_samples_per_second": 4.965, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 6006 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.2841302841302841, |
|
"eval_f1_macro": 0.678520497542563, |
|
"eval_f1_micro": 0.8058198574902932, |
|
"eval_loss": 0.1287251114845276, |
|
"eval_roc_auc": 0.8654212081182214, |
|
"eval_runtime": 576.7801, |
|
"eval_samples_per_second": 5.004, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.0001, |
|
"step": 6279 |
|
}, |
|
{ |
|
"epoch": 23.80952380952381, |
|
"grad_norm": 0.20738890767097473, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1377, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.2869022869022869, |
|
"eval_f1_macro": 0.6840871439155845, |
|
"eval_f1_micro": 0.8057504997660669, |
|
"eval_loss": 0.1279863715171814, |
|
"eval_roc_auc": 0.8662783338824032, |
|
"eval_runtime": 580.6409, |
|
"eval_samples_per_second": 4.97, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.28586278586278585, |
|
"eval_f1_macro": 0.6787317976982782, |
|
"eval_f1_micro": 0.8074392712550608, |
|
"eval_loss": 0.127402663230896, |
|
"eval_roc_auc": 0.8696325247161917, |
|
"eval_runtime": 571.2341, |
|
"eval_samples_per_second": 5.052, |
|
"eval_steps_per_second": 0.159, |
|
"learning_rate": 0.0001, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 25.641025641025642, |
|
"grad_norm": 0.18043966591358185, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1361, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.28586278586278585, |
|
"eval_f1_macro": 0.6740298841901063, |
|
"eval_f1_micro": 0.8063818050664064, |
|
"eval_loss": 0.12828372418880463, |
|
"eval_roc_auc": 0.8672987185031534, |
|
"eval_runtime": 580.3575, |
|
"eval_samples_per_second": 4.973, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 7098 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.2882882882882883, |
|
"eval_f1_macro": 0.68897744745899, |
|
"eval_f1_micro": 0.8110456615281781, |
|
"eval_loss": 0.12681305408477783, |
|
"eval_roc_auc": 0.8743537519356053, |
|
"eval_runtime": 580.6984, |
|
"eval_samples_per_second": 4.97, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 7371 |
|
}, |
|
{ |
|
"epoch": 27.47252747252747, |
|
"grad_norm": 0.17303021252155304, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1354, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.28932778932778935, |
|
"eval_f1_macro": 0.6812786729949134, |
|
"eval_f1_micro": 0.8099940913311386, |
|
"eval_loss": 0.12666279077529907, |
|
"eval_roc_auc": 0.8708112319303257, |
|
"eval_runtime": 576.4341, |
|
"eval_samples_per_second": 5.007, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.0001, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.29175329175329173, |
|
"eval_f1_macro": 0.6881122302734826, |
|
"eval_f1_micro": 0.8081058020477816, |
|
"eval_loss": 0.12675043940544128, |
|
"eval_roc_auc": 0.8666940799181009, |
|
"eval_runtime": 578.6537, |
|
"eval_samples_per_second": 4.987, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 7917 |
|
}, |
|
{ |
|
"epoch": 29.304029304029303, |
|
"grad_norm": 0.17676015198230743, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1339, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.2927927927927928, |
|
"eval_f1_macro": 0.6872571297964245, |
|
"eval_f1_micro": 0.8108657880239013, |
|
"eval_loss": 0.12635387480258942, |
|
"eval_roc_auc": 0.8701037366439205, |
|
"eval_runtime": 582.8932, |
|
"eval_samples_per_second": 4.951, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.0001, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.29140679140679143, |
|
"eval_f1_macro": 0.6823767206574823, |
|
"eval_f1_micro": 0.8089332139965051, |
|
"eval_loss": 0.1258317530155182, |
|
"eval_roc_auc": 0.867421427567313, |
|
"eval_runtime": 578.6563, |
|
"eval_samples_per_second": 4.987, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 8463 |
|
}, |
|
{ |
|
"epoch": 31.135531135531135, |
|
"grad_norm": 0.19538064301013947, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1332, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.29313929313929316, |
|
"eval_f1_macro": 0.6924178674344362, |
|
"eval_f1_micro": 0.8112645318336341, |
|
"eval_loss": 0.1260402798652649, |
|
"eval_roc_auc": 0.8731307927316745, |
|
"eval_runtime": 588.1542, |
|
"eval_samples_per_second": 4.907, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 32.967032967032964, |
|
"grad_norm": 0.20809406042099, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1321, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.2910602910602911, |
|
"eval_f1_macro": 0.6959916792345996, |
|
"eval_f1_micro": 0.8133097762073027, |
|
"eval_loss": 0.1250443458557129, |
|
"eval_roc_auc": 0.8735809182308003, |
|
"eval_runtime": 593.1758, |
|
"eval_samples_per_second": 4.865, |
|
"eval_steps_per_second": 0.153, |
|
"learning_rate": 0.0001, |
|
"step": 9009 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.29417879417879417, |
|
"eval_f1_macro": 0.6891130310994343, |
|
"eval_f1_micro": 0.8116187492060803, |
|
"eval_loss": 0.12511762976646423, |
|
"eval_roc_auc": 0.8707582878249215, |
|
"eval_runtime": 596.9449, |
|
"eval_samples_per_second": 4.835, |
|
"eval_steps_per_second": 0.152, |
|
"learning_rate": 0.0001, |
|
"step": 9282 |
|
}, |
|
{ |
|
"epoch": 34.798534798534796, |
|
"grad_norm": 0.23537498712539673, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1309, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.2955647955647956, |
|
"eval_f1_macro": 0.6945448365895581, |
|
"eval_f1_micro": 0.8124288545048274, |
|
"eval_loss": 0.12488266825675964, |
|
"eval_roc_auc": 0.8724487031041583, |
|
"eval_runtime": 587.4735, |
|
"eval_samples_per_second": 4.913, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.29417879417879417, |
|
"eval_f1_macro": 0.6971439978031583, |
|
"eval_f1_micro": 0.8115410842141152, |
|
"eval_loss": 0.1252983808517456, |
|
"eval_roc_auc": 0.8688457809436633, |
|
"eval_runtime": 588.3939, |
|
"eval_samples_per_second": 4.905, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 9828 |
|
}, |
|
{ |
|
"epoch": 36.63003663003663, |
|
"grad_norm": 0.2208484709262848, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1305, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.29521829521829523, |
|
"eval_f1_macro": 0.6961006786941204, |
|
"eval_f1_micro": 0.8116249469664828, |
|
"eval_loss": 0.12479764968156815, |
|
"eval_roc_auc": 0.870187076326433, |
|
"eval_runtime": 588.2172, |
|
"eval_samples_per_second": 4.906, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 10101 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.3004158004158004, |
|
"eval_f1_macro": 0.6991177533793484, |
|
"eval_f1_micro": 0.8129930394431555, |
|
"eval_loss": 0.12497606873512268, |
|
"eval_roc_auc": 0.8725859392689393, |
|
"eval_runtime": 593.6086, |
|
"eval_samples_per_second": 4.862, |
|
"eval_steps_per_second": 0.153, |
|
"learning_rate": 0.0001, |
|
"step": 10374 |
|
}, |
|
{ |
|
"epoch": 38.46153846153846, |
|
"grad_norm": 0.27702799439430237, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1285, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.29521829521829523, |
|
"eval_f1_macro": 0.6970545191351545, |
|
"eval_f1_micro": 0.8141541282874172, |
|
"eval_loss": 0.1252022236585617, |
|
"eval_roc_auc": 0.8767588388621269, |
|
"eval_runtime": 586.6264, |
|
"eval_samples_per_second": 4.92, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 10647 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.2955647955647956, |
|
"eval_f1_macro": 0.7070171403235663, |
|
"eval_f1_micro": 0.816655585106383, |
|
"eval_loss": 0.12485132366418839, |
|
"eval_roc_auc": 0.8790201839509206, |
|
"eval_runtime": 586.1423, |
|
"eval_samples_per_second": 4.924, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 40.29304029304029, |
|
"grad_norm": 0.24468237161636353, |
|
"learning_rate": 0.0001, |
|
"loss": 0.129, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.28967428967428965, |
|
"eval_f1_macro": 0.6961881266838973, |
|
"eval_f1_micro": 0.8103573101656658, |
|
"eval_loss": 0.12500154972076416, |
|
"eval_roc_auc": 0.8684120872988876, |
|
"eval_runtime": 589.4017, |
|
"eval_samples_per_second": 4.896, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.0001, |
|
"step": 11193 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.3038808038808039, |
|
"eval_f1_macro": 0.7064304960359926, |
|
"eval_f1_micro": 0.816535301022975, |
|
"eval_loss": 0.12350151687860489, |
|
"eval_roc_auc": 0.8763375931853813, |
|
"eval_runtime": 591.1388, |
|
"eval_samples_per_second": 4.882, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.0001, |
|
"step": 11466 |
|
}, |
|
{ |
|
"epoch": 42.124542124542124, |
|
"grad_norm": 0.29772019386291504, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1277, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.2955647955647956, |
|
"eval_f1_macro": 0.7047254887418923, |
|
"eval_f1_micro": 0.8150093808630394, |
|
"eval_loss": 0.12367021292448044, |
|
"eval_roc_auc": 0.8771339814503815, |
|
"eval_runtime": 597.1526, |
|
"eval_samples_per_second": 4.833, |
|
"eval_steps_per_second": 0.152, |
|
"learning_rate": 0.0001, |
|
"step": 11739 |
|
}, |
|
{ |
|
"epoch": 43.956043956043956, |
|
"grad_norm": 0.32455483078956604, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1279, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.30076230076230076, |
|
"eval_f1_macro": 0.705396366545505, |
|
"eval_f1_micro": 0.8170209225905745, |
|
"eval_loss": 0.12371324002742767, |
|
"eval_roc_auc": 0.8789390307110687, |
|
"eval_runtime": 618.592, |
|
"eval_samples_per_second": 4.665, |
|
"eval_steps_per_second": 0.147, |
|
"learning_rate": 0.0001, |
|
"step": 12012 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.30145530145530147, |
|
"eval_f1_macro": 0.7058009223379548, |
|
"eval_f1_micro": 0.8163231034048448, |
|
"eval_loss": 0.12333343178033829, |
|
"eval_roc_auc": 0.8758137724962146, |
|
"eval_runtime": 593.5671, |
|
"eval_samples_per_second": 4.862, |
|
"eval_steps_per_second": 0.153, |
|
"learning_rate": 0.0001, |
|
"step": 12285 |
|
}, |
|
{ |
|
"epoch": 45.78754578754579, |
|
"grad_norm": 0.2913854718208313, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1264, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.30076230076230076, |
|
"eval_f1_macro": 0.6992655670184796, |
|
"eval_f1_micro": 0.8158692722371967, |
|
"eval_loss": 0.12297776341438293, |
|
"eval_roc_auc": 0.8746400552002995, |
|
"eval_runtime": 597.9766, |
|
"eval_samples_per_second": 4.826, |
|
"eval_steps_per_second": 0.152, |
|
"learning_rate": 0.0001, |
|
"step": 12558 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.29902979902979904, |
|
"eval_f1_macro": 0.7026416067016249, |
|
"eval_f1_micro": 0.8135392426486143, |
|
"eval_loss": 0.12366960942745209, |
|
"eval_roc_auc": 0.8720258880176504, |
|
"eval_runtime": 608.7312, |
|
"eval_samples_per_second": 4.741, |
|
"eval_steps_per_second": 0.149, |
|
"learning_rate": 0.0001, |
|
"step": 12831 |
|
}, |
|
{ |
|
"epoch": 47.61904761904762, |
|
"grad_norm": 0.2678842842578888, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1267, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.30180180180180183, |
|
"eval_f1_macro": 0.7044430417074125, |
|
"eval_f1_micro": 0.8169049621530698, |
|
"eval_loss": 0.12326876819133759, |
|
"eval_roc_auc": 0.8756520830494087, |
|
"eval_runtime": 581.2988, |
|
"eval_samples_per_second": 4.965, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.30214830214830213, |
|
"eval_f1_macro": 0.705026725915288, |
|
"eval_f1_micro": 0.8161126713333613, |
|
"eval_loss": 0.12315386533737183, |
|
"eval_roc_auc": 0.8761665945900701, |
|
"eval_runtime": 586.6485, |
|
"eval_samples_per_second": 4.919, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 13377 |
|
}, |
|
{ |
|
"epoch": 49.45054945054945, |
|
"grad_norm": 0.31903648376464844, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1249, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.30145530145530147, |
|
"eval_f1_macro": 0.7085649491291086, |
|
"eval_f1_micro": 0.8179686845851126, |
|
"eval_loss": 0.12265044450759888, |
|
"eval_roc_auc": 0.877547259354487, |
|
"eval_runtime": 581.1034, |
|
"eval_samples_per_second": 4.966, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.30214830214830213, |
|
"eval_f1_macro": 0.710831288086539, |
|
"eval_f1_micro": 0.8190420609445996, |
|
"eval_loss": 0.12310674786567688, |
|
"eval_roc_auc": 0.87941579593488, |
|
"eval_runtime": 606.1273, |
|
"eval_samples_per_second": 4.761, |
|
"eval_steps_per_second": 0.15, |
|
"learning_rate": 0.0001, |
|
"step": 13923 |
|
}, |
|
{ |
|
"epoch": 51.282051282051285, |
|
"grad_norm": 0.26118701696395874, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1243, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.30214830214830213, |
|
"eval_f1_macro": 0.704117146056294, |
|
"eval_f1_micro": 0.816390260370511, |
|
"eval_loss": 0.12280686944723129, |
|
"eval_roc_auc": 0.8743457841629654, |
|
"eval_runtime": 611.1334, |
|
"eval_samples_per_second": 4.722, |
|
"eval_steps_per_second": 0.149, |
|
"learning_rate": 0.0001, |
|
"step": 14196 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.3038808038808039, |
|
"eval_f1_macro": 0.7080185810697228, |
|
"eval_f1_micro": 0.8189015751312609, |
|
"eval_loss": 0.1225290596485138, |
|
"eval_roc_auc": 0.8794495583654101, |
|
"eval_runtime": 586.3165, |
|
"eval_samples_per_second": 4.922, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 14469 |
|
}, |
|
{ |
|
"epoch": 53.11355311355312, |
|
"grad_norm": 0.24176084995269775, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1248, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.30180180180180183, |
|
"eval_f1_macro": 0.7053875588266636, |
|
"eval_f1_micro": 0.8162527837304089, |
|
"eval_loss": 0.12376156449317932, |
|
"eval_roc_auc": 0.8755284752634194, |
|
"eval_runtime": 590.1707, |
|
"eval_samples_per_second": 4.89, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.0001, |
|
"step": 14742 |
|
}, |
|
{ |
|
"epoch": 54.94505494505494, |
|
"grad_norm": 0.3652552366256714, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1233, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.30284130284130284, |
|
"eval_f1_macro": 0.7092508494713976, |
|
"eval_f1_micro": 0.818075117370892, |
|
"eval_loss": 0.12211860716342926, |
|
"eval_roc_auc": 0.8772347053641084, |
|
"eval_runtime": 591.2857, |
|
"eval_samples_per_second": 4.881, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.0001, |
|
"step": 15015 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.3049203049203049, |
|
"eval_f1_macro": 0.7091508009521661, |
|
"eval_f1_micro": 0.818769689935334, |
|
"eval_loss": 0.12255053967237473, |
|
"eval_roc_auc": 0.8809376807503978, |
|
"eval_runtime": 619.809, |
|
"eval_samples_per_second": 4.656, |
|
"eval_steps_per_second": 0.147, |
|
"learning_rate": 0.0001, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 56.776556776556774, |
|
"grad_norm": 0.2984638214111328, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1237, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.3052668052668053, |
|
"eval_f1_macro": 0.7056269081565454, |
|
"eval_f1_micro": 0.8183564389510606, |
|
"eval_loss": 0.12233822792768478, |
|
"eval_roc_auc": 0.8785286099375562, |
|
"eval_runtime": 613.5916, |
|
"eval_samples_per_second": 4.703, |
|
"eval_steps_per_second": 0.148, |
|
"learning_rate": 0.0001, |
|
"step": 15561 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.30284130284130284, |
|
"eval_f1_macro": 0.7093876090831799, |
|
"eval_f1_micro": 0.8179678964618875, |
|
"eval_loss": 0.12230789661407471, |
|
"eval_roc_auc": 0.8764581158068805, |
|
"eval_runtime": 607.5944, |
|
"eval_samples_per_second": 4.75, |
|
"eval_steps_per_second": 0.15, |
|
"learning_rate": 0.0001, |
|
"step": 15834 |
|
}, |
|
{ |
|
"epoch": 58.608058608058606, |
|
"grad_norm": 0.2825208604335785, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1234, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.30734580734580735, |
|
"eval_f1_macro": 0.7102428483836337, |
|
"eval_f1_micro": 0.8198051269184126, |
|
"eval_loss": 0.12226579338312149, |
|
"eval_roc_auc": 0.8788947787212716, |
|
"eval_runtime": 579.6776, |
|
"eval_samples_per_second": 4.979, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 16107 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.29799029799029797, |
|
"eval_f1_macro": 0.7068409531794828, |
|
"eval_f1_micro": 0.8173416232565955, |
|
"eval_loss": 0.1236739531159401, |
|
"eval_roc_auc": 0.8761882980515712, |
|
"eval_runtime": 577.7976, |
|
"eval_samples_per_second": 4.995, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 60.43956043956044, |
|
"grad_norm": 0.313997745513916, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1232, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.305959805959806, |
|
"eval_f1_macro": 0.7139384635953806, |
|
"eval_f1_micro": 0.8201011747982775, |
|
"eval_loss": 0.12236195057630539, |
|
"eval_roc_auc": 0.8791326851447451, |
|
"eval_runtime": 576.7269, |
|
"eval_samples_per_second": 5.004, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.0001, |
|
"step": 16653 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.30284130284130284, |
|
"eval_f1_macro": 0.7188990083298508, |
|
"eval_f1_micro": 0.8209334277030684, |
|
"eval_loss": 0.12215279042720795, |
|
"eval_roc_auc": 0.8808361117698756, |
|
"eval_runtime": 585.2414, |
|
"eval_samples_per_second": 4.931, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 1e-05, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 62.27106227106227, |
|
"grad_norm": 0.322051465511322, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1204, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.3097713097713098, |
|
"eval_f1_macro": 0.7190866276619315, |
|
"eval_f1_micro": 0.820752746564184, |
|
"eval_loss": 0.12084941565990448, |
|
"eval_roc_auc": 0.8797410795822204, |
|
"eval_runtime": 586.6749, |
|
"eval_samples_per_second": 4.919, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 1e-05, |
|
"step": 17199 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.3108108108108108, |
|
"eval_f1_macro": 0.7187730185556146, |
|
"eval_f1_micro": 0.8218151540383014, |
|
"eval_loss": 0.12093428522348404, |
|
"eval_roc_auc": 0.8813373302757117, |
|
"eval_runtime": 589.1584, |
|
"eval_samples_per_second": 4.899, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 1e-05, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 64.1025641025641, |
|
"grad_norm": 0.32896944880485535, |
|
"learning_rate": 1e-05, |
|
"loss": 0.12, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.30803880803880807, |
|
"eval_f1_macro": 0.7186584702198188, |
|
"eval_f1_micro": 0.8209837715435904, |
|
"eval_loss": 0.12085793167352676, |
|
"eval_roc_auc": 0.8787241154699269, |
|
"eval_runtime": 587.8337, |
|
"eval_samples_per_second": 4.91, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 1e-05, |
|
"step": 17745 |
|
}, |
|
{ |
|
"epoch": 65.93406593406593, |
|
"grad_norm": 0.3432726263999939, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1187, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.3135828135828136, |
|
"eval_f1_macro": 0.7185770967712465, |
|
"eval_f1_micro": 0.8215507887488523, |
|
"eval_loss": 0.12076118588447571, |
|
"eval_roc_auc": 0.8804684272314324, |
|
"eval_runtime": 588.6607, |
|
"eval_samples_per_second": 4.903, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 1e-05, |
|
"step": 18018 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.31115731115731116, |
|
"eval_f1_macro": 0.7239469969506999, |
|
"eval_f1_micro": 0.8232429532417151, |
|
"eval_loss": 0.1210499182343483, |
|
"eval_roc_auc": 0.8847649346106092, |
|
"eval_runtime": 591.9527, |
|
"eval_samples_per_second": 4.875, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 1e-05, |
|
"step": 18291 |
|
}, |
|
{ |
|
"epoch": 67.76556776556777, |
|
"grad_norm": 0.3457956612110138, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1179, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.3125433125433125, |
|
"eval_f1_macro": 0.720063006101889, |
|
"eval_f1_micro": 0.8211584808443447, |
|
"eval_loss": 0.1208076998591423, |
|
"eval_roc_auc": 0.8815457934989414, |
|
"eval_runtime": 585.5944, |
|
"eval_samples_per_second": 4.928, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 1e-05, |
|
"step": 18564 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.31011781011781014, |
|
"eval_f1_macro": 0.7197984794848579, |
|
"eval_f1_micro": 0.821014765549839, |
|
"eval_loss": 0.12105683237314224, |
|
"eval_roc_auc": 0.8794876936812611, |
|
"eval_runtime": 586.2959, |
|
"eval_samples_per_second": 4.922, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 1e-05, |
|
"step": 18837 |
|
}, |
|
{ |
|
"epoch": 69.59706959706959, |
|
"grad_norm": 0.2593284845352173, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1177, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.31115731115731116, |
|
"eval_f1_macro": 0.719699492247552, |
|
"eval_f1_micro": 0.821309285237141, |
|
"eval_loss": 0.12111356854438782, |
|
"eval_roc_auc": 0.8802103867562204, |
|
"eval_runtime": 592.4319, |
|
"eval_samples_per_second": 4.871, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 1e-05, |
|
"step": 19110 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.31115731115731116, |
|
"eval_f1_macro": 0.7163966165871272, |
|
"eval_f1_micro": 0.8206033106461642, |
|
"eval_loss": 0.12063230574131012, |
|
"eval_roc_auc": 0.8779552656514418, |
|
"eval_runtime": 588.5502, |
|
"eval_samples_per_second": 4.904, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 1e-05, |
|
"step": 19383 |
|
}, |
|
{ |
|
"epoch": 71.42857142857143, |
|
"grad_norm": 0.3827510476112366, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1179, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.3128898128898129, |
|
"eval_f1_macro": 0.7171818163524962, |
|
"eval_f1_micro": 0.8206118081490495, |
|
"eval_loss": 0.12075439840555191, |
|
"eval_roc_auc": 0.8783204820582929, |
|
"eval_runtime": 584.4453, |
|
"eval_samples_per_second": 4.938, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 1e-05, |
|
"step": 19656 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.31323631323631324, |
|
"eval_f1_macro": 0.7214307826544399, |
|
"eval_f1_micro": 0.8217462106977327, |
|
"eval_loss": 0.12078637629747391, |
|
"eval_roc_auc": 0.8804145307491638, |
|
"eval_runtime": 588.6768, |
|
"eval_samples_per_second": 4.903, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 1e-05, |
|
"step": 19929 |
|
}, |
|
{ |
|
"epoch": 73.26007326007326, |
|
"grad_norm": 0.29221299290657043, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1177, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.3108108108108108, |
|
"eval_f1_macro": 0.715483654869702, |
|
"eval_f1_micro": 0.8200794388574326, |
|
"eval_loss": 0.12086880952119827, |
|
"eval_roc_auc": 0.8759783190736136, |
|
"eval_runtime": 595.2281, |
|
"eval_samples_per_second": 4.849, |
|
"eval_steps_per_second": 0.153, |
|
"learning_rate": 1e-05, |
|
"step": 20202 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.3153153153153153, |
|
"eval_f1_macro": 0.7151281975948514, |
|
"eval_f1_micro": 0.8207404925448148, |
|
"eval_loss": 0.12054955214262009, |
|
"eval_roc_auc": 0.8789508616462007, |
|
"eval_runtime": 588.2243, |
|
"eval_samples_per_second": 4.906, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 1e-05, |
|
"step": 20475 |
|
}, |
|
{ |
|
"epoch": 75.0915750915751, |
|
"grad_norm": 0.3488374352455139, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1171, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.31566181566181567, |
|
"eval_f1_macro": 0.722403613960237, |
|
"eval_f1_micro": 0.8221261740503699, |
|
"eval_loss": 0.12033110857009888, |
|
"eval_roc_auc": 0.8820333836259857, |
|
"eval_runtime": 591.3419, |
|
"eval_samples_per_second": 4.88, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 1e-05, |
|
"step": 20748 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 0.37617847323417664, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1171, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.3135828135828136, |
|
"eval_f1_macro": 0.7234417953998725, |
|
"eval_f1_micro": 0.8231996372480317, |
|
"eval_loss": 0.12079885601997375, |
|
"eval_roc_auc": 0.8850904999468814, |
|
"eval_runtime": 598.1869, |
|
"eval_samples_per_second": 4.825, |
|
"eval_steps_per_second": 0.152, |
|
"learning_rate": 1e-05, |
|
"step": 21021 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.3115038115038115, |
|
"eval_f1_macro": 0.7233107692667189, |
|
"eval_f1_micro": 0.8230326613403982, |
|
"eval_loss": 0.12099317461252213, |
|
"eval_roc_auc": 0.8837231818723246, |
|
"eval_runtime": 585.8629, |
|
"eval_samples_per_second": 4.926, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 1e-05, |
|
"step": 21294 |
|
}, |
|
{ |
|
"epoch": 78.75457875457876, |
|
"grad_norm": 0.36047452688217163, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1168, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.31011781011781014, |
|
"eval_f1_macro": 0.7172980311198172, |
|
"eval_f1_micro": 0.8202369947054374, |
|
"eval_loss": 0.12051720172166824, |
|
"eval_roc_auc": 0.8777043478820749, |
|
"eval_runtime": 595.4358, |
|
"eval_samples_per_second": 4.847, |
|
"eval_steps_per_second": 0.153, |
|
"learning_rate": 1e-05, |
|
"step": 21567 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.31185031185031187, |
|
"eval_f1_macro": 0.7248558336823359, |
|
"eval_f1_micro": 0.8231793006530544, |
|
"eval_loss": 0.12073608487844467, |
|
"eval_roc_auc": 0.8842671366132298, |
|
"eval_runtime": 604.2206, |
|
"eval_samples_per_second": 4.776, |
|
"eval_steps_per_second": 0.151, |
|
"learning_rate": 1e-05, |
|
"step": 21840 |
|
}, |
|
{ |
|
"epoch": 80.58608058608058, |
|
"grad_norm": 0.35180962085723877, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1171, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.3128898128898129, |
|
"eval_f1_macro": 0.7212996450160633, |
|
"eval_f1_micro": 0.822080253872813, |
|
"eval_loss": 0.12031927704811096, |
|
"eval_roc_auc": 0.8805991577232666, |
|
"eval_runtime": 597.4583, |
|
"eval_samples_per_second": 4.83, |
|
"eval_steps_per_second": 0.152, |
|
"learning_rate": 1e-05, |
|
"step": 22113 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.3142758142758143, |
|
"eval_f1_macro": 0.7178066335813648, |
|
"eval_f1_micro": 0.8215302193202746, |
|
"eval_loss": 0.1204884946346283, |
|
"eval_roc_auc": 0.8795852787754633, |
|
"eval_runtime": 583.2227, |
|
"eval_samples_per_second": 4.948, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 1e-05, |
|
"step": 22386 |
|
}, |
|
{ |
|
"epoch": 82.41758241758242, |
|
"grad_norm": 0.3334418535232544, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1157, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.31115731115731116, |
|
"eval_f1_macro": 0.7113142483409282, |
|
"eval_f1_micro": 0.8179971218149497, |
|
"eval_loss": 0.12136666476726532, |
|
"eval_roc_auc": 0.8743461195069316, |
|
"eval_runtime": 580.12, |
|
"eval_samples_per_second": 4.975, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 22659 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.3115038115038115, |
|
"eval_f1_macro": 0.7250649377579587, |
|
"eval_f1_micro": 0.8234267187629895, |
|
"eval_loss": 0.12041348963975906, |
|
"eval_roc_auc": 0.8827056007271329, |
|
"eval_runtime": 581.6672, |
|
"eval_samples_per_second": 4.962, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 22932 |
|
}, |
|
{ |
|
"epoch": 84.24908424908425, |
|
"grad_norm": 0.3593423068523407, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1169, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.31323631323631324, |
|
"eval_f1_macro": 0.7213085414821642, |
|
"eval_f1_micro": 0.8229879338226147, |
|
"eval_loss": 0.12035409361124039, |
|
"eval_roc_auc": 0.8831987441502598, |
|
"eval_runtime": 579.5593, |
|
"eval_samples_per_second": 4.98, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 23205 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.3076923076923077, |
|
"eval_f1_macro": 0.7218120076279698, |
|
"eval_f1_micro": 0.8196243388446962, |
|
"eval_loss": 0.12250283360481262, |
|
"eval_roc_auc": 0.8800111790264615, |
|
"eval_runtime": 583.2647, |
|
"eval_samples_per_second": 4.948, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 23478 |
|
}, |
|
{ |
|
"epoch": 86.08058608058609, |
|
"grad_norm": 0.3458651602268219, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1157, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.3090783090783091, |
|
"eval_f1_macro": 0.7151954083158903, |
|
"eval_f1_micro": 0.8203968852047224, |
|
"eval_loss": 0.12075748294591904, |
|
"eval_roc_auc": 0.8788858049167326, |
|
"eval_runtime": 580.273, |
|
"eval_samples_per_second": 4.974, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 23751 |
|
}, |
|
{ |
|
"epoch": 87.91208791208791, |
|
"grad_norm": 0.2947898507118225, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1156, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.30838530838530837, |
|
"eval_f1_macro": 0.7168335672232342, |
|
"eval_f1_micro": 0.8215440749647566, |
|
"eval_loss": 0.12086642533540726, |
|
"eval_roc_auc": 0.8824075738373759, |
|
"eval_runtime": 581.7056, |
|
"eval_samples_per_second": 4.961, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 24024 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.3163548163548164, |
|
"eval_f1_macro": 0.733984551040518, |
|
"eval_f1_micro": 0.8244650323850127, |
|
"eval_loss": 0.12105640023946762, |
|
"eval_roc_auc": 0.8874893628893881, |
|
"eval_runtime": 592.213, |
|
"eval_samples_per_second": 4.873, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 24297 |
|
}, |
|
{ |
|
"epoch": 89.74358974358974, |
|
"grad_norm": 0.27441146969795227, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.1157, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.31185031185031187, |
|
"eval_f1_macro": 0.7245620055819162, |
|
"eval_f1_micro": 0.8232248520710059, |
|
"eval_loss": 0.12090421468019485, |
|
"eval_roc_auc": 0.8860665789228, |
|
"eval_runtime": 602.5705, |
|
"eval_samples_per_second": 4.789, |
|
"eval_steps_per_second": 0.151, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 24570 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.3115038115038115, |
|
"eval_f1_macro": 0.7163143946337084, |
|
"eval_f1_micro": 0.8200938495056143, |
|
"eval_loss": 0.12043782323598862, |
|
"eval_roc_auc": 0.8784951828509925, |
|
"eval_runtime": 602.4626, |
|
"eval_samples_per_second": 4.79, |
|
"eval_steps_per_second": 0.151, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 24843 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 24843, |
|
"total_flos": 1.1760993126572918e+21, |
|
"train_loss": 0.13338198287712655, |
|
"train_runtime": 215448.2605, |
|
"train_samples_per_second": 6.068, |
|
"train_steps_per_second": 0.19 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 40950, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1760993126572918e+21, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|