{ "best_metric": 0.8048906048906048, "best_model_checkpoint": "final_models/glue_models/structroberta_s2_50ep//finetune/qqp/checkpoint-10600", "epoch": 3.3114323258869907, "global_step": 12600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "eval_accuracy": 0.6895384788513184, "eval_f1": 0.6217489805165383, "eval_loss": 0.5790485143661499, "eval_mcc": 0.3590350743412485, "eval_runtime": 56.9694, "eval_samples_per_second": 471.99, "eval_steps_per_second": 59.014, "step": 200 }, { "epoch": 0.11, "eval_accuracy": 0.7031499743461609, "eval_f1": 0.659238387978142, "eval_loss": 0.5603744983673096, "eval_mcc": 0.397609114853472, "eval_runtime": 57.0635, "eval_samples_per_second": 471.212, "eval_steps_per_second": 58.917, "step": 400 }, { "epoch": 0.13, "learning_rate": 4.934296977660973e-05, "loss": 0.5879, "step": 500 }, { "epoch": 0.16, "eval_accuracy": 0.7312655448913574, "eval_f1": 0.6582482027998486, "eval_loss": 0.5294312834739685, "eval_mcc": 0.4414063833899318, "eval_runtime": 57.2359, "eval_samples_per_second": 469.792, "eval_steps_per_second": 58.739, "step": 600 }, { "epoch": 0.21, "eval_accuracy": 0.7384432554244995, "eval_f1": 0.6671399498319845, "eval_loss": 0.5185254216194153, "eval_mcc": 0.4564598592164868, "eval_runtime": 57.1234, "eval_samples_per_second": 470.718, "eval_steps_per_second": 58.855, "step": 800 }, { "epoch": 0.26, "learning_rate": 4.868593955321945e-05, "loss": 0.5309, "step": 1000 }, { "epoch": 0.26, "eval_accuracy": 0.7485216856002808, "eval_f1": 0.6855761182925696, "eval_loss": 0.5134466886520386, "eval_mcc": 0.4786827802251824, "eval_runtime": 57.2501, "eval_samples_per_second": 469.676, "eval_steps_per_second": 58.725, "step": 1000 }, { "epoch": 0.32, "eval_accuracy": 0.752352237701416, "eval_f1": 0.7012427654897034, "eval_loss": 0.4966827630996704, "eval_mcc": 0.48998691711625764, "eval_runtime": 57.1252, "eval_samples_per_second": 470.703, "eval_steps_per_second": 58.853, "step": 1200 }, { "epoch": 0.37, "eval_accuracy": 0.7582654356956482, "eval_f1": 0.6868677136525676, "eval_loss": 0.4926142990589142, "eval_mcc": 0.4977363897273405, "eval_runtime": 56.8921, "eval_samples_per_second": 472.631, "eval_steps_per_second": 59.094, "step": 1400 }, { "epoch": 0.39, "learning_rate": 4.8028909329829176e-05, "loss": 0.5081, "step": 1500 }, { "epoch": 0.42, "eval_accuracy": 0.7646249532699585, "eval_f1": 0.7162774017124669, "eval_loss": 0.4827042818069458, "eval_mcc": 0.5153659006018214, "eval_runtime": 57.181, "eval_samples_per_second": 470.244, "eval_steps_per_second": 58.796, "step": 1600 }, { "epoch": 0.47, "eval_accuracy": 0.7632861137390137, "eval_f1": 0.6841347823929335, "eval_loss": 0.48687198758125305, "eval_mcc": 0.5088622782048654, "eval_runtime": 57.1217, "eval_samples_per_second": 470.732, "eval_steps_per_second": 58.857, "step": 1800 }, { "epoch": 0.53, "learning_rate": 4.73718791064389e-05, "loss": 0.4924, "step": 2000 }, { "epoch": 0.53, "eval_accuracy": 0.7514225244522095, "eval_f1": 0.6440136344269279, "eval_loss": 0.49411216378211975, "eval_mcc": 0.4884785053093981, "eval_runtime": 57.12, "eval_samples_per_second": 470.746, "eval_steps_per_second": 58.859, "step": 2000 }, { "epoch": 0.58, "eval_accuracy": 0.7751868963241577, "eval_f1": 0.7179056418871623, "eval_loss": 0.4720868468284607, "eval_mcc": 0.5341846799936681, "eval_runtime": 57.0874, "eval_samples_per_second": 471.015, "eval_steps_per_second": 58.892, "step": 2200 }, { "epoch": 0.63, "eval_accuracy": 0.7680836319923401, "eval_f1": 0.689070602313522, "eval_loss": 0.48496729135513306, "eval_mcc": 0.5194278885822263, "eval_runtime": 56.9884, "eval_samples_per_second": 471.833, "eval_steps_per_second": 58.994, "step": 2400 }, { "epoch": 0.66, "learning_rate": 4.6714848883048626e-05, "loss": 0.475, "step": 2500 }, { "epoch": 0.68, "eval_accuracy": 0.7773066759109497, "eval_f1": 0.7529091359247337, "eval_loss": 0.4599149525165558, "eval_mcc": 0.5560435147097561, "eval_runtime": 57.3093, "eval_samples_per_second": 469.191, "eval_steps_per_second": 58.664, "step": 2600 }, { "epoch": 0.74, "eval_accuracy": 0.7876827120780945, "eval_f1": 0.7471320370288347, "eval_loss": 0.45423364639282227, "eval_mcc": 0.5641587819264187, "eval_runtime": 57.0781, "eval_samples_per_second": 471.091, "eval_steps_per_second": 58.902, "step": 2800 }, { "epoch": 0.79, "learning_rate": 4.605781865965835e-05, "loss": 0.4584, "step": 3000 }, { "epoch": 0.79, "eval_accuracy": 0.7906206846237183, "eval_f1": 0.7403136531365313, "eval_loss": 0.44478940963745117, "eval_mcc": 0.5668900307355025, "eval_runtime": 57.1687, "eval_samples_per_second": 470.345, "eval_steps_per_second": 58.808, "step": 3000 }, { "epoch": 0.84, "eval_accuracy": 0.789616584777832, "eval_f1": 0.7573248680880271, "eval_loss": 0.4422828257083893, "eval_mcc": 0.5727700186824436, "eval_runtime": 57.2015, "eval_samples_per_second": 470.075, "eval_steps_per_second": 58.775, "step": 3200 }, { "epoch": 0.89, "eval_accuracy": 0.7930380702018738, "eval_f1": 0.7571035747021082, "eval_loss": 0.4391527771949768, "eval_mcc": 0.5770275603454963, "eval_runtime": 57.1341, "eval_samples_per_second": 470.63, "eval_steps_per_second": 58.844, "step": 3400 }, { "epoch": 0.92, "learning_rate": 4.540078843626807e-05, "loss": 0.4479, "step": 3500 }, { "epoch": 0.95, "eval_accuracy": 0.7989140748977661, "eval_f1": 0.7556599936734602, "eval_loss": 0.43771788477897644, "eval_mcc": 0.585340244040819, "eval_runtime": 57.2391, "eval_samples_per_second": 469.766, "eval_steps_per_second": 58.736, "step": 3600 }, { "epoch": 1.0, "eval_accuracy": 0.7988768815994263, "eval_f1": 0.7547614728822782, "eval_loss": 0.42634981870651245, "eval_mcc": 0.5850092755942151, "eval_runtime": 57.0922, "eval_samples_per_second": 470.975, "eval_steps_per_second": 58.887, "step": 3800 }, { "epoch": 1.05, "learning_rate": 4.474375821287779e-05, "loss": 0.4171, "step": 4000 }, { "epoch": 1.05, "eval_accuracy": 0.8009223341941833, "eval_f1": 0.7616545705507815, "eval_loss": 0.4461493492126465, "eval_mcc": 0.5907844940997748, "eval_runtime": 57.2068, "eval_samples_per_second": 470.031, "eval_steps_per_second": 58.769, "step": 4000 }, { "epoch": 1.1, "eval_accuracy": 0.7885008454322815, "eval_f1": 0.7156642167891606, "eval_loss": 0.46828022599220276, "eval_mcc": 0.5637818475370291, "eval_runtime": 57.3281, "eval_samples_per_second": 469.037, "eval_steps_per_second": 58.645, "step": 4200 }, { "epoch": 1.16, "eval_accuracy": 0.8006619811058044, "eval_f1": 0.768546506606788, "eval_loss": 0.45249348878860474, "eval_mcc": 0.5941976311906684, "eval_runtime": 57.3016, "eval_samples_per_second": 469.254, "eval_steps_per_second": 58.672, "step": 4400 }, { "epoch": 1.18, "learning_rate": 4.408672798948752e-05, "loss": 0.3771, "step": 4500 }, { "epoch": 1.21, "eval_accuracy": 0.8068727254867554, "eval_f1": 0.7759223300970873, "eval_loss": 0.4169043004512787, "eval_mcc": 0.606982696864278, "eval_runtime": 57.1406, "eval_samples_per_second": 470.576, "eval_steps_per_second": 58.837, "step": 4600 }, { "epoch": 1.26, "eval_accuracy": 0.7954925894737244, "eval_f1": 0.7291533270945181, "eval_loss": 0.4451744556427002, "eval_mcc": 0.5777518921950682, "eval_runtime": 57.2245, "eval_samples_per_second": 469.886, "eval_steps_per_second": 58.751, "step": 4800 }, { "epoch": 1.31, "learning_rate": 4.342969776609724e-05, "loss": 0.3773, "step": 5000 }, { "epoch": 1.31, "eval_accuracy": 0.8078768253326416, "eval_f1": 0.7645396536007292, "eval_loss": 0.42197084426879883, "eval_mcc": 0.603319424026119, "eval_runtime": 57.0783, "eval_samples_per_second": 471.09, "eval_steps_per_second": 58.902, "step": 5000 }, { "epoch": 1.37, "eval_accuracy": 0.8099594712257385, "eval_f1": 0.7777294475859068, "eval_loss": 0.4131057560443878, "eval_mcc": 0.6121005762340846, "eval_runtime": 57.038, "eval_samples_per_second": 471.423, "eval_steps_per_second": 58.943, "step": 5200 }, { "epoch": 1.42, "eval_accuracy": 0.8107776641845703, "eval_f1": 0.7651186409380483, "eval_loss": 0.4410916864871979, "eval_mcc": 0.6088060459447059, "eval_runtime": 57.1661, "eval_samples_per_second": 470.366, "eval_steps_per_second": 58.811, "step": 5400 }, { "epoch": 1.45, "learning_rate": 4.2772667542706967e-05, "loss": 0.3716, "step": 5500 }, { "epoch": 1.47, "eval_accuracy": 0.8099594712257385, "eval_f1": 0.7631188577786018, "eval_loss": 0.43267035484313965, "eval_mcc": 0.6069920811878712, "eval_runtime": 56.9593, "eval_samples_per_second": 472.074, "eval_steps_per_second": 59.025, "step": 5600 }, { "epoch": 1.52, "eval_accuracy": 0.8123767971992493, "eval_f1": 0.7751481927173864, "eval_loss": 0.4137977659702301, "eval_mcc": 0.6142423355676122, "eval_runtime": 57.2611, "eval_samples_per_second": 469.586, "eval_steps_per_second": 58.713, "step": 5800 }, { "epoch": 1.58, "learning_rate": 4.211563731931669e-05, "loss": 0.3652, "step": 6000 }, { "epoch": 1.58, "eval_accuracy": 0.811670184135437, "eval_f1": 0.762119503945885, "eval_loss": 0.4430968165397644, "eval_mcc": 0.610427625756327, "eval_runtime": 57.15, "eval_samples_per_second": 470.499, "eval_steps_per_second": 58.828, "step": 6000 }, { "epoch": 1.63, "eval_accuracy": 0.811149537563324, "eval_f1": 0.773667320377964, "eval_loss": 0.42289894819259644, "eval_mcc": 0.6117143787328192, "eval_runtime": 57.3569, "eval_samples_per_second": 468.801, "eval_steps_per_second": 58.615, "step": 6200 }, { "epoch": 1.68, "eval_accuracy": 0.813975989818573, "eval_f1": 0.772325898953118, "eval_loss": 0.4201619327068329, "eval_mcc": 0.6160239371091856, "eval_runtime": 57.1429, "eval_samples_per_second": 470.557, "eval_steps_per_second": 58.835, "step": 6400 }, { "epoch": 1.71, "learning_rate": 4.1458607095926416e-05, "loss": 0.3651, "step": 6500 }, { "epoch": 1.73, "eval_accuracy": 0.8141990900039673, "eval_f1": 0.787512759441987, "eval_loss": 0.42058825492858887, "eval_mcc": 0.6243481982422204, "eval_runtime": 57.0624, "eval_samples_per_second": 471.221, "eval_steps_per_second": 58.918, "step": 6600 }, { "epoch": 1.79, "eval_accuracy": 0.8160214424133301, "eval_f1": 0.7705366668212812, "eval_loss": 0.4051779508590698, "eval_mcc": 0.6196198122308723, "eval_runtime": 57.3622, "eval_samples_per_second": 468.758, "eval_steps_per_second": 58.61, "step": 6800 }, { "epoch": 1.84, "learning_rate": 4.080157687253614e-05, "loss": 0.3559, "step": 7000 }, { "epoch": 1.84, "eval_accuracy": 0.8187363147735596, "eval_f1": 0.778816482120167, "eval_loss": 0.4070768654346466, "eval_mcc": 0.6260354328376837, "eval_runtime": 57.2066, "eval_samples_per_second": 470.033, "eval_steps_per_second": 58.769, "step": 7000 }, { "epoch": 1.89, "eval_accuracy": 0.8202982544898987, "eval_f1": 0.7892900750043608, "eval_loss": 0.40111517906188965, "eval_mcc": 0.6328935271313376, "eval_runtime": 57.2191, "eval_samples_per_second": 469.93, "eval_steps_per_second": 58.757, "step": 7200 }, { "epoch": 1.94, "eval_accuracy": 0.8220833539962769, "eval_f1": 0.7814926463871381, "eval_loss": 0.39430883526802063, "eval_mcc": 0.6326604221713003, "eval_runtime": 57.1848, "eval_samples_per_second": 470.213, "eval_steps_per_second": 58.792, "step": 7400 }, { "epoch": 1.97, "learning_rate": 4.0144546649145865e-05, "loss": 0.3619, "step": 7500 }, { "epoch": 2.0, "eval_accuracy": 0.8156495094299316, "eval_f1": 0.7624478842191019, "eval_loss": 0.41225436329841614, "eval_mcc": 0.619232101302901, "eval_runtime": 57.0196, "eval_samples_per_second": 471.575, "eval_steps_per_second": 58.962, "step": 7600 }, { "epoch": 2.05, "eval_accuracy": 0.8209304809570312, "eval_f1": 0.7901320664254892, "eval_loss": 0.4239133894443512, "eval_mcc": 0.6342483559433012, "eval_runtime": 57.2383, "eval_samples_per_second": 469.773, "eval_steps_per_second": 58.737, "step": 7800 }, { "epoch": 2.1, "learning_rate": 3.948751642575559e-05, "loss": 0.2886, "step": 8000 }, { "epoch": 2.1, "eval_accuracy": 0.821525514125824, "eval_f1": 0.790097537506014, "eval_loss": 0.4144342243671417, "eval_mcc": 0.6350233886842492, "eval_runtime": 57.1057, "eval_samples_per_second": 470.864, "eval_steps_per_second": 58.873, "step": 8000 }, { "epoch": 2.16, "eval_accuracy": 0.8241288065910339, "eval_f1": 0.7847617313731737, "eval_loss": 0.40638136863708496, "eval_mcc": 0.6370477000221836, "eval_runtime": 57.2228, "eval_samples_per_second": 469.9, "eval_steps_per_second": 58.753, "step": 8200 }, { "epoch": 2.21, "eval_accuracy": 0.8260998725891113, "eval_f1": 0.7945337903154934, "eval_loss": 0.42089590430259705, "eval_mcc": 0.6438486028810766, "eval_runtime": 57.1363, "eval_samples_per_second": 470.612, "eval_steps_per_second": 58.842, "step": 8400 }, { "epoch": 2.23, "learning_rate": 3.8830486202365314e-05, "loss": 0.2766, "step": 8500 }, { "epoch": 2.26, "eval_accuracy": 0.8188478350639343, "eval_f1": 0.7956709593523219, "eval_loss": 0.43640926480293274, "eval_mcc": 0.63650625614698, "eval_runtime": 56.9978, "eval_samples_per_second": 471.755, "eval_steps_per_second": 58.985, "step": 8600 }, { "epoch": 2.31, "eval_accuracy": 0.8274387121200562, "eval_f1": 0.7945811935540995, "eval_loss": 0.405335932970047, "eval_mcc": 0.6458161739050261, "eval_runtime": 57.0558, "eval_samples_per_second": 471.275, "eval_steps_per_second": 58.925, "step": 8800 }, { "epoch": 2.37, "learning_rate": 3.817345597897504e-05, "loss": 0.2797, "step": 9000 }, { "epoch": 2.37, "eval_accuracy": 0.8264718055725098, "eval_f1": 0.7906684611933603, "eval_loss": 0.4281841516494751, "eval_mcc": 0.6427212326252381, "eval_runtime": 57.0173, "eval_samples_per_second": 471.593, "eval_steps_per_second": 58.965, "step": 9000 }, { "epoch": 2.42, "eval_accuracy": 0.8209676742553711, "eval_f1": 0.775926270713089, "eval_loss": 0.4169626533985138, "eval_mcc": 0.6299012875382383, "eval_runtime": 57.2133, "eval_samples_per_second": 469.978, "eval_steps_per_second": 58.763, "step": 9200 }, { "epoch": 2.47, "eval_accuracy": 0.8285172581672668, "eval_f1": 0.7999826486791307, "eval_loss": 0.40043607354164124, "eval_mcc": 0.6503868538586679, "eval_runtime": 57.1097, "eval_samples_per_second": 470.831, "eval_steps_per_second": 58.869, "step": 9400 }, { "epoch": 2.5, "learning_rate": 3.7516425755584764e-05, "loss": 0.2866, "step": 9500 }, { "epoch": 2.52, "eval_accuracy": 0.8278850317001343, "eval_f1": 0.7914375844975214, "eval_loss": 0.4065154492855072, "eval_mcc": 0.6453385200418826, "eval_runtime": 57.1041, "eval_samples_per_second": 470.877, "eval_steps_per_second": 58.875, "step": 9600 }, { "epoch": 2.58, "eval_accuracy": 0.8289635181427002, "eval_f1": 0.791702522759183, "eval_loss": 0.39962634444236755, "eval_mcc": 0.6472846692602012, "eval_runtime": 57.1663, "eval_samples_per_second": 470.364, "eval_steps_per_second": 58.811, "step": 9800 }, { "epoch": 2.63, "learning_rate": 3.685939553219449e-05, "loss": 0.2838, "step": 10000 }, { "epoch": 2.63, "eval_accuracy": 0.8313064575195312, "eval_f1": 0.7954914337240757, "eval_loss": 0.41669055819511414, "eval_mcc": 0.6523724926921574, "eval_runtime": 57.0331, "eval_samples_per_second": 471.463, "eval_steps_per_second": 58.948, "step": 10000 }, { "epoch": 2.68, "eval_accuracy": 0.8318271636962891, "eval_f1": 0.7962328767123288, "eval_loss": 0.428965300321579, "eval_mcc": 0.6534785544952427, "eval_runtime": 57.2333, "eval_samples_per_second": 469.814, "eval_steps_per_second": 58.742, "step": 10200 }, { "epoch": 2.73, "eval_accuracy": 0.8308601975440979, "eval_f1": 0.7983863817714337, "eval_loss": 0.4066237211227417, "eval_mcc": 0.6527156031218291, "eval_runtime": 57.2722, "eval_samples_per_second": 469.495, "eval_steps_per_second": 58.702, "step": 10400 }, { "epoch": 2.76, "learning_rate": 3.6202365308804206e-05, "loss": 0.2807, "step": 10500 }, { "epoch": 2.79, "eval_accuracy": 0.8308601975440979, "eval_f1": 0.8048906048906048, "eval_loss": 0.3991719186306, "eval_mcc": 0.6567986450071405, "eval_runtime": 57.1392, "eval_samples_per_second": 470.588, "eval_steps_per_second": 58.839, "step": 10600 }, { "epoch": 2.84, "eval_accuracy": 0.830302357673645, "eval_f1": 0.7989779285431076, "eval_loss": 0.41573354601860046, "eval_mcc": 0.6521800844709269, "eval_runtime": 57.1598, "eval_samples_per_second": 470.418, "eval_steps_per_second": 58.818, "step": 10800 }, { "epoch": 2.89, "learning_rate": 3.554533508541393e-05, "loss": 0.2854, "step": 11000 }, { "epoch": 2.89, "eval_accuracy": 0.827996551990509, "eval_f1": 0.7814375502102926, "eval_loss": 0.4512079060077667, "eval_mcc": 0.6447879159418899, "eval_runtime": 57.062, "eval_samples_per_second": 471.224, "eval_steps_per_second": 58.918, "step": 11000 }, { "epoch": 2.94, "eval_accuracy": 0.8250957727432251, "eval_f1": 0.7756095233551219, "eval_loss": 0.426098495721817, "eval_mcc": 0.6390143935892307, "eval_runtime": 57.1498, "eval_samples_per_second": 470.5, "eval_steps_per_second": 58.828, "step": 11200 }, { "epoch": 3.0, "eval_accuracy": 0.8325709104537964, "eval_f1": 0.798478066248881, "eval_loss": 0.4026671350002289, "eval_mcc": 0.655443685564215, "eval_runtime": 57.2209, "eval_samples_per_second": 469.916, "eval_steps_per_second": 58.755, "step": 11400 }, { "epoch": 3.02, "learning_rate": 3.4888304862023655e-05, "loss": 0.2705, "step": 11500 }, { "epoch": 3.05, "eval_accuracy": 0.8301535844802856, "eval_f1": 0.7850317721816898, "eval_loss": 0.5190560817718506, "eval_mcc": 0.6492185236914582, "eval_runtime": 57.1491, "eval_samples_per_second": 470.506, "eval_steps_per_second": 58.829, "step": 11600 }, { "epoch": 3.1, "eval_accuracy": 0.8269924521446228, "eval_f1": 0.8008220585716732, "eval_loss": 0.4870125353336334, "eval_mcc": 0.6492371751318124, "eval_runtime": 57.0005, "eval_samples_per_second": 471.733, "eval_steps_per_second": 58.982, "step": 11800 }, { "epoch": 3.15, "learning_rate": 3.423127463863337e-05, "loss": 0.2011, "step": 12000 }, { "epoch": 3.15, "eval_accuracy": 0.830599844455719, "eval_f1": 0.7876357872161873, "eval_loss": 0.47779834270477295, "eval_mcc": 0.6500219154781351, "eval_runtime": 57.4254, "eval_samples_per_second": 468.243, "eval_steps_per_second": 58.546, "step": 12000 }, { "epoch": 3.21, "eval_accuracy": 0.8338726162910461, "eval_f1": 0.7996771155657204, "eval_loss": 0.43881040811538696, "eval_mcc": 0.6579979480473281, "eval_runtime": 57.1566, "eval_samples_per_second": 470.445, "eval_steps_per_second": 58.821, "step": 12200 }, { "epoch": 3.26, "eval_accuracy": 0.834430456161499, "eval_f1": 0.795854732208364, "eval_loss": 0.4549373984336853, "eval_mcc": 0.658150442576307, "eval_runtime": 57.0319, "eval_samples_per_second": 471.473, "eval_steps_per_second": 58.949, "step": 12400 }, { "epoch": 3.29, "learning_rate": 3.35742444152431e-05, "loss": 0.2077, "step": 12500 }, { "epoch": 3.31, "eval_accuracy": 0.830599844455719, "eval_f1": 0.79477359765713, "eval_loss": 0.45785439014434814, "eval_mcc": 0.6509541866022467, "eval_runtime": 57.1908, "eval_samples_per_second": 470.163, "eval_steps_per_second": 58.786, "step": 12600 }, { "epoch": 3.31, "step": 12600, "total_flos": 7.386343728182784e+16, "train_loss": 0.36493192960345555, "train_runtime": 7909.6164, "train_samples_per_second": 307.851, "train_steps_per_second": 4.811 } ], "max_steps": 38050, "num_train_epochs": 10, "total_flos": 7.386343728182784e+16, "trial_name": null, "trial_params": null }