|
{ |
|
"best_metric": 0.8048906048906048, |
|
"best_model_checkpoint": "final_models/glue_models/structroberta_s2_50ep//finetune/qqp/checkpoint-10600", |
|
"epoch": 3.3114323258869907, |
|
"global_step": 12600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.6895384788513184, |
|
"eval_f1": 0.6217489805165383, |
|
"eval_loss": 0.5790485143661499, |
|
"eval_mcc": 0.3590350743412485, |
|
"eval_runtime": 56.9694, |
|
"eval_samples_per_second": 471.99, |
|
"eval_steps_per_second": 59.014, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.7031499743461609, |
|
"eval_f1": 0.659238387978142, |
|
"eval_loss": 0.5603744983673096, |
|
"eval_mcc": 0.397609114853472, |
|
"eval_runtime": 57.0635, |
|
"eval_samples_per_second": 471.212, |
|
"eval_steps_per_second": 58.917, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.934296977660973e-05, |
|
"loss": 0.5879, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.7312655448913574, |
|
"eval_f1": 0.6582482027998486, |
|
"eval_loss": 0.5294312834739685, |
|
"eval_mcc": 0.4414063833899318, |
|
"eval_runtime": 57.2359, |
|
"eval_samples_per_second": 469.792, |
|
"eval_steps_per_second": 58.739, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.7384432554244995, |
|
"eval_f1": 0.6671399498319845, |
|
"eval_loss": 0.5185254216194153, |
|
"eval_mcc": 0.4564598592164868, |
|
"eval_runtime": 57.1234, |
|
"eval_samples_per_second": 470.718, |
|
"eval_steps_per_second": 58.855, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.868593955321945e-05, |
|
"loss": 0.5309, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.7485216856002808, |
|
"eval_f1": 0.6855761182925696, |
|
"eval_loss": 0.5134466886520386, |
|
"eval_mcc": 0.4786827802251824, |
|
"eval_runtime": 57.2501, |
|
"eval_samples_per_second": 469.676, |
|
"eval_steps_per_second": 58.725, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.752352237701416, |
|
"eval_f1": 0.7012427654897034, |
|
"eval_loss": 0.4966827630996704, |
|
"eval_mcc": 0.48998691711625764, |
|
"eval_runtime": 57.1252, |
|
"eval_samples_per_second": 470.703, |
|
"eval_steps_per_second": 58.853, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.7582654356956482, |
|
"eval_f1": 0.6868677136525676, |
|
"eval_loss": 0.4926142990589142, |
|
"eval_mcc": 0.4977363897273405, |
|
"eval_runtime": 56.8921, |
|
"eval_samples_per_second": 472.631, |
|
"eval_steps_per_second": 59.094, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.8028909329829176e-05, |
|
"loss": 0.5081, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.7646249532699585, |
|
"eval_f1": 0.7162774017124669, |
|
"eval_loss": 0.4827042818069458, |
|
"eval_mcc": 0.5153659006018214, |
|
"eval_runtime": 57.181, |
|
"eval_samples_per_second": 470.244, |
|
"eval_steps_per_second": 58.796, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.7632861137390137, |
|
"eval_f1": 0.6841347823929335, |
|
"eval_loss": 0.48687198758125305, |
|
"eval_mcc": 0.5088622782048654, |
|
"eval_runtime": 57.1217, |
|
"eval_samples_per_second": 470.732, |
|
"eval_steps_per_second": 58.857, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.73718791064389e-05, |
|
"loss": 0.4924, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.7514225244522095, |
|
"eval_f1": 0.6440136344269279, |
|
"eval_loss": 0.49411216378211975, |
|
"eval_mcc": 0.4884785053093981, |
|
"eval_runtime": 57.12, |
|
"eval_samples_per_second": 470.746, |
|
"eval_steps_per_second": 58.859, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.7751868963241577, |
|
"eval_f1": 0.7179056418871623, |
|
"eval_loss": 0.4720868468284607, |
|
"eval_mcc": 0.5341846799936681, |
|
"eval_runtime": 57.0874, |
|
"eval_samples_per_second": 471.015, |
|
"eval_steps_per_second": 58.892, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.7680836319923401, |
|
"eval_f1": 0.689070602313522, |
|
"eval_loss": 0.48496729135513306, |
|
"eval_mcc": 0.5194278885822263, |
|
"eval_runtime": 56.9884, |
|
"eval_samples_per_second": 471.833, |
|
"eval_steps_per_second": 58.994, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.6714848883048626e-05, |
|
"loss": 0.475, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.7773066759109497, |
|
"eval_f1": 0.7529091359247337, |
|
"eval_loss": 0.4599149525165558, |
|
"eval_mcc": 0.5560435147097561, |
|
"eval_runtime": 57.3093, |
|
"eval_samples_per_second": 469.191, |
|
"eval_steps_per_second": 58.664, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.7876827120780945, |
|
"eval_f1": 0.7471320370288347, |
|
"eval_loss": 0.45423364639282227, |
|
"eval_mcc": 0.5641587819264187, |
|
"eval_runtime": 57.0781, |
|
"eval_samples_per_second": 471.091, |
|
"eval_steps_per_second": 58.902, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.605781865965835e-05, |
|
"loss": 0.4584, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.7906206846237183, |
|
"eval_f1": 0.7403136531365313, |
|
"eval_loss": 0.44478940963745117, |
|
"eval_mcc": 0.5668900307355025, |
|
"eval_runtime": 57.1687, |
|
"eval_samples_per_second": 470.345, |
|
"eval_steps_per_second": 58.808, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.789616584777832, |
|
"eval_f1": 0.7573248680880271, |
|
"eval_loss": 0.4422828257083893, |
|
"eval_mcc": 0.5727700186824436, |
|
"eval_runtime": 57.2015, |
|
"eval_samples_per_second": 470.075, |
|
"eval_steps_per_second": 58.775, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.7930380702018738, |
|
"eval_f1": 0.7571035747021082, |
|
"eval_loss": 0.4391527771949768, |
|
"eval_mcc": 0.5770275603454963, |
|
"eval_runtime": 57.1341, |
|
"eval_samples_per_second": 470.63, |
|
"eval_steps_per_second": 58.844, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.540078843626807e-05, |
|
"loss": 0.4479, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.7989140748977661, |
|
"eval_f1": 0.7556599936734602, |
|
"eval_loss": 0.43771788477897644, |
|
"eval_mcc": 0.585340244040819, |
|
"eval_runtime": 57.2391, |
|
"eval_samples_per_second": 469.766, |
|
"eval_steps_per_second": 58.736, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7988768815994263, |
|
"eval_f1": 0.7547614728822782, |
|
"eval_loss": 0.42634981870651245, |
|
"eval_mcc": 0.5850092755942151, |
|
"eval_runtime": 57.0922, |
|
"eval_samples_per_second": 470.975, |
|
"eval_steps_per_second": 58.887, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.474375821287779e-05, |
|
"loss": 0.4171, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.8009223341941833, |
|
"eval_f1": 0.7616545705507815, |
|
"eval_loss": 0.4461493492126465, |
|
"eval_mcc": 0.5907844940997748, |
|
"eval_runtime": 57.2068, |
|
"eval_samples_per_second": 470.031, |
|
"eval_steps_per_second": 58.769, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.7885008454322815, |
|
"eval_f1": 0.7156642167891606, |
|
"eval_loss": 0.46828022599220276, |
|
"eval_mcc": 0.5637818475370291, |
|
"eval_runtime": 57.3281, |
|
"eval_samples_per_second": 469.037, |
|
"eval_steps_per_second": 58.645, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 0.8006619811058044, |
|
"eval_f1": 0.768546506606788, |
|
"eval_loss": 0.45249348878860474, |
|
"eval_mcc": 0.5941976311906684, |
|
"eval_runtime": 57.3016, |
|
"eval_samples_per_second": 469.254, |
|
"eval_steps_per_second": 58.672, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.408672798948752e-05, |
|
"loss": 0.3771, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_accuracy": 0.8068727254867554, |
|
"eval_f1": 0.7759223300970873, |
|
"eval_loss": 0.4169043004512787, |
|
"eval_mcc": 0.606982696864278, |
|
"eval_runtime": 57.1406, |
|
"eval_samples_per_second": 470.576, |
|
"eval_steps_per_second": 58.837, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.7954925894737244, |
|
"eval_f1": 0.7291533270945181, |
|
"eval_loss": 0.4451744556427002, |
|
"eval_mcc": 0.5777518921950682, |
|
"eval_runtime": 57.2245, |
|
"eval_samples_per_second": 469.886, |
|
"eval_steps_per_second": 58.751, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.342969776609724e-05, |
|
"loss": 0.3773, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_accuracy": 0.8078768253326416, |
|
"eval_f1": 0.7645396536007292, |
|
"eval_loss": 0.42197084426879883, |
|
"eval_mcc": 0.603319424026119, |
|
"eval_runtime": 57.0783, |
|
"eval_samples_per_second": 471.09, |
|
"eval_steps_per_second": 58.902, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_accuracy": 0.8099594712257385, |
|
"eval_f1": 0.7777294475859068, |
|
"eval_loss": 0.4131057560443878, |
|
"eval_mcc": 0.6121005762340846, |
|
"eval_runtime": 57.038, |
|
"eval_samples_per_second": 471.423, |
|
"eval_steps_per_second": 58.943, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_accuracy": 0.8107776641845703, |
|
"eval_f1": 0.7651186409380483, |
|
"eval_loss": 0.4410916864871979, |
|
"eval_mcc": 0.6088060459447059, |
|
"eval_runtime": 57.1661, |
|
"eval_samples_per_second": 470.366, |
|
"eval_steps_per_second": 58.811, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.2772667542706967e-05, |
|
"loss": 0.3716, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.8099594712257385, |
|
"eval_f1": 0.7631188577786018, |
|
"eval_loss": 0.43267035484313965, |
|
"eval_mcc": 0.6069920811878712, |
|
"eval_runtime": 56.9593, |
|
"eval_samples_per_second": 472.074, |
|
"eval_steps_per_second": 59.025, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.8123767971992493, |
|
"eval_f1": 0.7751481927173864, |
|
"eval_loss": 0.4137977659702301, |
|
"eval_mcc": 0.6142423355676122, |
|
"eval_runtime": 57.2611, |
|
"eval_samples_per_second": 469.586, |
|
"eval_steps_per_second": 58.713, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.211563731931669e-05, |
|
"loss": 0.3652, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_accuracy": 0.811670184135437, |
|
"eval_f1": 0.762119503945885, |
|
"eval_loss": 0.4430968165397644, |
|
"eval_mcc": 0.610427625756327, |
|
"eval_runtime": 57.15, |
|
"eval_samples_per_second": 470.499, |
|
"eval_steps_per_second": 58.828, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.811149537563324, |
|
"eval_f1": 0.773667320377964, |
|
"eval_loss": 0.42289894819259644, |
|
"eval_mcc": 0.6117143787328192, |
|
"eval_runtime": 57.3569, |
|
"eval_samples_per_second": 468.801, |
|
"eval_steps_per_second": 58.615, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_accuracy": 0.813975989818573, |
|
"eval_f1": 0.772325898953118, |
|
"eval_loss": 0.4201619327068329, |
|
"eval_mcc": 0.6160239371091856, |
|
"eval_runtime": 57.1429, |
|
"eval_samples_per_second": 470.557, |
|
"eval_steps_per_second": 58.835, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.1458607095926416e-05, |
|
"loss": 0.3651, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_accuracy": 0.8141990900039673, |
|
"eval_f1": 0.787512759441987, |
|
"eval_loss": 0.42058825492858887, |
|
"eval_mcc": 0.6243481982422204, |
|
"eval_runtime": 57.0624, |
|
"eval_samples_per_second": 471.221, |
|
"eval_steps_per_second": 58.918, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.8160214424133301, |
|
"eval_f1": 0.7705366668212812, |
|
"eval_loss": 0.4051779508590698, |
|
"eval_mcc": 0.6196198122308723, |
|
"eval_runtime": 57.3622, |
|
"eval_samples_per_second": 468.758, |
|
"eval_steps_per_second": 58.61, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.080157687253614e-05, |
|
"loss": 0.3559, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_accuracy": 0.8187363147735596, |
|
"eval_f1": 0.778816482120167, |
|
"eval_loss": 0.4070768654346466, |
|
"eval_mcc": 0.6260354328376837, |
|
"eval_runtime": 57.2066, |
|
"eval_samples_per_second": 470.033, |
|
"eval_steps_per_second": 58.769, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_accuracy": 0.8202982544898987, |
|
"eval_f1": 0.7892900750043608, |
|
"eval_loss": 0.40111517906188965, |
|
"eval_mcc": 0.6328935271313376, |
|
"eval_runtime": 57.2191, |
|
"eval_samples_per_second": 469.93, |
|
"eval_steps_per_second": 58.757, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.8220833539962769, |
|
"eval_f1": 0.7814926463871381, |
|
"eval_loss": 0.39430883526802063, |
|
"eval_mcc": 0.6326604221713003, |
|
"eval_runtime": 57.1848, |
|
"eval_samples_per_second": 470.213, |
|
"eval_steps_per_second": 58.792, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.0144546649145865e-05, |
|
"loss": 0.3619, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8156495094299316, |
|
"eval_f1": 0.7624478842191019, |
|
"eval_loss": 0.41225436329841614, |
|
"eval_mcc": 0.619232101302901, |
|
"eval_runtime": 57.0196, |
|
"eval_samples_per_second": 471.575, |
|
"eval_steps_per_second": 58.962, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_accuracy": 0.8209304809570312, |
|
"eval_f1": 0.7901320664254892, |
|
"eval_loss": 0.4239133894443512, |
|
"eval_mcc": 0.6342483559433012, |
|
"eval_runtime": 57.2383, |
|
"eval_samples_per_second": 469.773, |
|
"eval_steps_per_second": 58.737, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.948751642575559e-05, |
|
"loss": 0.2886, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_accuracy": 0.821525514125824, |
|
"eval_f1": 0.790097537506014, |
|
"eval_loss": 0.4144342243671417, |
|
"eval_mcc": 0.6350233886842492, |
|
"eval_runtime": 57.1057, |
|
"eval_samples_per_second": 470.864, |
|
"eval_steps_per_second": 58.873, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_accuracy": 0.8241288065910339, |
|
"eval_f1": 0.7847617313731737, |
|
"eval_loss": 0.40638136863708496, |
|
"eval_mcc": 0.6370477000221836, |
|
"eval_runtime": 57.2228, |
|
"eval_samples_per_second": 469.9, |
|
"eval_steps_per_second": 58.753, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_accuracy": 0.8260998725891113, |
|
"eval_f1": 0.7945337903154934, |
|
"eval_loss": 0.42089590430259705, |
|
"eval_mcc": 0.6438486028810766, |
|
"eval_runtime": 57.1363, |
|
"eval_samples_per_second": 470.612, |
|
"eval_steps_per_second": 58.842, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.8830486202365314e-05, |
|
"loss": 0.2766, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_accuracy": 0.8188478350639343, |
|
"eval_f1": 0.7956709593523219, |
|
"eval_loss": 0.43640926480293274, |
|
"eval_mcc": 0.63650625614698, |
|
"eval_runtime": 56.9978, |
|
"eval_samples_per_second": 471.755, |
|
"eval_steps_per_second": 58.985, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_accuracy": 0.8274387121200562, |
|
"eval_f1": 0.7945811935540995, |
|
"eval_loss": 0.405335932970047, |
|
"eval_mcc": 0.6458161739050261, |
|
"eval_runtime": 57.0558, |
|
"eval_samples_per_second": 471.275, |
|
"eval_steps_per_second": 58.925, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.817345597897504e-05, |
|
"loss": 0.2797, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_accuracy": 0.8264718055725098, |
|
"eval_f1": 0.7906684611933603, |
|
"eval_loss": 0.4281841516494751, |
|
"eval_mcc": 0.6427212326252381, |
|
"eval_runtime": 57.0173, |
|
"eval_samples_per_second": 471.593, |
|
"eval_steps_per_second": 58.965, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_accuracy": 0.8209676742553711, |
|
"eval_f1": 0.775926270713089, |
|
"eval_loss": 0.4169626533985138, |
|
"eval_mcc": 0.6299012875382383, |
|
"eval_runtime": 57.2133, |
|
"eval_samples_per_second": 469.978, |
|
"eval_steps_per_second": 58.763, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_accuracy": 0.8285172581672668, |
|
"eval_f1": 0.7999826486791307, |
|
"eval_loss": 0.40043607354164124, |
|
"eval_mcc": 0.6503868538586679, |
|
"eval_runtime": 57.1097, |
|
"eval_samples_per_second": 470.831, |
|
"eval_steps_per_second": 58.869, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.7516425755584764e-05, |
|
"loss": 0.2866, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_accuracy": 0.8278850317001343, |
|
"eval_f1": 0.7914375844975214, |
|
"eval_loss": 0.4065154492855072, |
|
"eval_mcc": 0.6453385200418826, |
|
"eval_runtime": 57.1041, |
|
"eval_samples_per_second": 470.877, |
|
"eval_steps_per_second": 58.875, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_accuracy": 0.8289635181427002, |
|
"eval_f1": 0.791702522759183, |
|
"eval_loss": 0.39962634444236755, |
|
"eval_mcc": 0.6472846692602012, |
|
"eval_runtime": 57.1663, |
|
"eval_samples_per_second": 470.364, |
|
"eval_steps_per_second": 58.811, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.685939553219449e-05, |
|
"loss": 0.2838, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_accuracy": 0.8313064575195312, |
|
"eval_f1": 0.7954914337240757, |
|
"eval_loss": 0.41669055819511414, |
|
"eval_mcc": 0.6523724926921574, |
|
"eval_runtime": 57.0331, |
|
"eval_samples_per_second": 471.463, |
|
"eval_steps_per_second": 58.948, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.8318271636962891, |
|
"eval_f1": 0.7962328767123288, |
|
"eval_loss": 0.428965300321579, |
|
"eval_mcc": 0.6534785544952427, |
|
"eval_runtime": 57.2333, |
|
"eval_samples_per_second": 469.814, |
|
"eval_steps_per_second": 58.742, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_accuracy": 0.8308601975440979, |
|
"eval_f1": 0.7983863817714337, |
|
"eval_loss": 0.4066237211227417, |
|
"eval_mcc": 0.6527156031218291, |
|
"eval_runtime": 57.2722, |
|
"eval_samples_per_second": 469.495, |
|
"eval_steps_per_second": 58.702, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.6202365308804206e-05, |
|
"loss": 0.2807, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_accuracy": 0.8308601975440979, |
|
"eval_f1": 0.8048906048906048, |
|
"eval_loss": 0.3991719186306, |
|
"eval_mcc": 0.6567986450071405, |
|
"eval_runtime": 57.1392, |
|
"eval_samples_per_second": 470.588, |
|
"eval_steps_per_second": 58.839, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_accuracy": 0.830302357673645, |
|
"eval_f1": 0.7989779285431076, |
|
"eval_loss": 0.41573354601860046, |
|
"eval_mcc": 0.6521800844709269, |
|
"eval_runtime": 57.1598, |
|
"eval_samples_per_second": 470.418, |
|
"eval_steps_per_second": 58.818, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.554533508541393e-05, |
|
"loss": 0.2854, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_accuracy": 0.827996551990509, |
|
"eval_f1": 0.7814375502102926, |
|
"eval_loss": 0.4512079060077667, |
|
"eval_mcc": 0.6447879159418899, |
|
"eval_runtime": 57.062, |
|
"eval_samples_per_second": 471.224, |
|
"eval_steps_per_second": 58.918, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_accuracy": 0.8250957727432251, |
|
"eval_f1": 0.7756095233551219, |
|
"eval_loss": 0.426098495721817, |
|
"eval_mcc": 0.6390143935892307, |
|
"eval_runtime": 57.1498, |
|
"eval_samples_per_second": 470.5, |
|
"eval_steps_per_second": 58.828, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8325709104537964, |
|
"eval_f1": 0.798478066248881, |
|
"eval_loss": 0.4026671350002289, |
|
"eval_mcc": 0.655443685564215, |
|
"eval_runtime": 57.2209, |
|
"eval_samples_per_second": 469.916, |
|
"eval_steps_per_second": 58.755, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.4888304862023655e-05, |
|
"loss": 0.2705, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_accuracy": 0.8301535844802856, |
|
"eval_f1": 0.7850317721816898, |
|
"eval_loss": 0.5190560817718506, |
|
"eval_mcc": 0.6492185236914582, |
|
"eval_runtime": 57.1491, |
|
"eval_samples_per_second": 470.506, |
|
"eval_steps_per_second": 58.829, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.8269924521446228, |
|
"eval_f1": 0.8008220585716732, |
|
"eval_loss": 0.4870125353336334, |
|
"eval_mcc": 0.6492371751318124, |
|
"eval_runtime": 57.0005, |
|
"eval_samples_per_second": 471.733, |
|
"eval_steps_per_second": 58.982, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.423127463863337e-05, |
|
"loss": 0.2011, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_accuracy": 0.830599844455719, |
|
"eval_f1": 0.7876357872161873, |
|
"eval_loss": 0.47779834270477295, |
|
"eval_mcc": 0.6500219154781351, |
|
"eval_runtime": 57.4254, |
|
"eval_samples_per_second": 468.243, |
|
"eval_steps_per_second": 58.546, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.8338726162910461, |
|
"eval_f1": 0.7996771155657204, |
|
"eval_loss": 0.43881040811538696, |
|
"eval_mcc": 0.6579979480473281, |
|
"eval_runtime": 57.1566, |
|
"eval_samples_per_second": 470.445, |
|
"eval_steps_per_second": 58.821, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_accuracy": 0.834430456161499, |
|
"eval_f1": 0.795854732208364, |
|
"eval_loss": 0.4549373984336853, |
|
"eval_mcc": 0.658150442576307, |
|
"eval_runtime": 57.0319, |
|
"eval_samples_per_second": 471.473, |
|
"eval_steps_per_second": 58.949, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 3.35742444152431e-05, |
|
"loss": 0.2077, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_accuracy": 0.830599844455719, |
|
"eval_f1": 0.79477359765713, |
|
"eval_loss": 0.45785439014434814, |
|
"eval_mcc": 0.6509541866022467, |
|
"eval_runtime": 57.1908, |
|
"eval_samples_per_second": 470.163, |
|
"eval_steps_per_second": 58.786, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"step": 12600, |
|
"total_flos": 7.386343728182784e+16, |
|
"train_loss": 0.36493192960345555, |
|
"train_runtime": 7909.6164, |
|
"train_samples_per_second": 307.851, |
|
"train_steps_per_second": 4.811 |
|
} |
|
], |
|
"max_steps": 38050, |
|
"num_train_epochs": 10, |
|
"total_flos": 7.386343728182784e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|