{ "best_metric": 0.704805850982666, "best_model_checkpoint": "final_models/glue_models/structroberta_s2_50ep//finetune/mnli-mm/checkpoint-12600", "epoch": 3.596059113300493, "global_step": 14600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "eval_accuracy": 0.47517505288124084, "eval_loss": 1.0218451023101807, "eval_runtime": 13.3834, "eval_samples_per_second": 469.539, "eval_steps_per_second": 58.73, "step": 200 }, { "epoch": 0.1, "eval_accuracy": 0.5058879852294922, "eval_loss": 0.9849981069564819, "eval_runtime": 13.3966, "eval_samples_per_second": 469.075, "eval_steps_per_second": 58.672, "step": 400 }, { "epoch": 0.12, "learning_rate": 4.938423645320197e-05, "loss": 1.0369, "step": 500 }, { "epoch": 0.15, "eval_accuracy": 0.5035009384155273, "eval_loss": 0.977361261844635, "eval_runtime": 13.3479, "eval_samples_per_second": 470.784, "eval_steps_per_second": 58.886, "step": 600 }, { "epoch": 0.2, "eval_accuracy": 0.5544239282608032, "eval_loss": 0.9257498383522034, "eval_runtime": 13.332, "eval_samples_per_second": 471.347, "eval_steps_per_second": 58.956, "step": 800 }, { "epoch": 0.25, "learning_rate": 4.876847290640394e-05, "loss": 0.9552, "step": 1000 }, { "epoch": 0.25, "eval_accuracy": 0.5798854231834412, "eval_loss": 0.8948496580123901, "eval_runtime": 13.3129, "eval_samples_per_second": 472.022, "eval_steps_per_second": 59.04, "step": 1000 }, { "epoch": 0.3, "eval_accuracy": 0.5932527184486389, "eval_loss": 0.8739783763885498, "eval_runtime": 13.3024, "eval_samples_per_second": 472.396, "eval_steps_per_second": 59.087, "step": 1200 }, { "epoch": 0.34, "eval_accuracy": 0.5992997884750366, "eval_loss": 0.8715024590492249, "eval_runtime": 13.3455, "eval_samples_per_second": 470.872, "eval_steps_per_second": 58.896, "step": 1400 }, { "epoch": 0.37, "learning_rate": 4.8152709359605915e-05, "loss": 0.9196, "step": 1500 }, { "epoch": 0.39, "eval_accuracy": 0.6125079393386841, "eval_loss": 0.8552348613739014, "eval_runtime": 13.3292, "eval_samples_per_second": 471.446, "eval_steps_per_second": 58.968, "step": 1600 }, { "epoch": 0.44, "eval_accuracy": 0.6031190156936646, "eval_loss": 0.850425660610199, "eval_runtime": 13.3264, "eval_samples_per_second": 471.546, "eval_steps_per_second": 58.981, "step": 1800 }, { "epoch": 0.49, "learning_rate": 4.753694581280788e-05, "loss": 0.8826, "step": 2000 }, { "epoch": 0.49, "eval_accuracy": 0.6261935234069824, "eval_loss": 0.831270158290863, "eval_runtime": 13.3443, "eval_samples_per_second": 470.914, "eval_steps_per_second": 58.902, "step": 2000 }, { "epoch": 0.54, "eval_accuracy": 0.6239656209945679, "eval_loss": 0.8242226243019104, "eval_runtime": 13.3277, "eval_samples_per_second": 471.498, "eval_steps_per_second": 58.975, "step": 2200 }, { "epoch": 0.59, "eval_accuracy": 0.6328771710395813, "eval_loss": 0.811508059501648, "eval_runtime": 13.3485, "eval_samples_per_second": 470.764, "eval_steps_per_second": 58.883, "step": 2400 }, { "epoch": 0.62, "learning_rate": 4.6921182266009855e-05, "loss": 0.8583, "step": 2500 }, { "epoch": 0.64, "eval_accuracy": 0.6341502070426941, "eval_loss": 0.812808632850647, "eval_runtime": 13.2942, "eval_samples_per_second": 472.686, "eval_steps_per_second": 59.123, "step": 2600 }, { "epoch": 0.69, "eval_accuracy": 0.6338319778442383, "eval_loss": 0.8130684494972229, "eval_runtime": 13.3877, "eval_samples_per_second": 469.385, "eval_steps_per_second": 58.711, "step": 2800 }, { "epoch": 0.74, "learning_rate": 4.630541871921182e-05, "loss": 0.844, "step": 3000 }, { "epoch": 0.74, "eval_accuracy": 0.6233291029930115, "eval_loss": 0.8244798183441162, "eval_runtime": 13.2608, "eval_samples_per_second": 473.877, "eval_steps_per_second": 59.272, "step": 3000 }, { "epoch": 0.79, "eval_accuracy": 0.6532463431358337, "eval_loss": 0.7844635844230652, "eval_runtime": 13.2963, "eval_samples_per_second": 472.614, "eval_steps_per_second": 59.114, "step": 3200 }, { "epoch": 0.84, "eval_accuracy": 0.6473583579063416, "eval_loss": 0.8013091683387756, "eval_runtime": 13.2787, "eval_samples_per_second": 473.239, "eval_steps_per_second": 59.192, "step": 3400 }, { "epoch": 0.86, "learning_rate": 4.5689655172413794e-05, "loss": 0.8253, "step": 3500 }, { "epoch": 0.89, "eval_accuracy": 0.6537237167358398, "eval_loss": 0.7885627150535583, "eval_runtime": 13.2853, "eval_samples_per_second": 473.003, "eval_steps_per_second": 59.163, "step": 3600 }, { "epoch": 0.94, "eval_accuracy": 0.657224714756012, "eval_loss": 0.7786855101585388, "eval_runtime": 13.2824, "eval_samples_per_second": 473.108, "eval_steps_per_second": 59.176, "step": 3800 }, { "epoch": 0.99, "learning_rate": 4.507389162561577e-05, "loss": 0.8155, "step": 4000 }, { "epoch": 0.99, "eval_accuracy": 0.6702737212181091, "eval_loss": 0.7679744362831116, "eval_runtime": 13.3377, "eval_samples_per_second": 471.146, "eval_steps_per_second": 58.931, "step": 4000 }, { "epoch": 1.03, "eval_accuracy": 0.6697962880134583, "eval_loss": 0.7682982087135315, "eval_runtime": 13.2938, "eval_samples_per_second": 472.703, "eval_steps_per_second": 59.125, "step": 4200 }, { "epoch": 1.08, "eval_accuracy": 0.6580203771591187, "eval_loss": 0.7912976145744324, "eval_runtime": 13.3163, "eval_samples_per_second": 471.902, "eval_steps_per_second": 59.025, "step": 4400 }, { "epoch": 1.11, "learning_rate": 4.4458128078817734e-05, "loss": 0.7535, "step": 4500 }, { "epoch": 1.13, "eval_accuracy": 0.66343092918396, "eval_loss": 0.7666918039321899, "eval_runtime": 13.3587, "eval_samples_per_second": 470.404, "eval_steps_per_second": 58.838, "step": 4600 }, { "epoch": 1.18, "eval_accuracy": 0.6731381416320801, "eval_loss": 0.758724570274353, "eval_runtime": 13.3698, "eval_samples_per_second": 470.016, "eval_steps_per_second": 58.789, "step": 4800 }, { "epoch": 1.23, "learning_rate": 4.384236453201971e-05, "loss": 0.7414, "step": 5000 }, { "epoch": 1.23, "eval_accuracy": 0.6814131140708923, "eval_loss": 0.7651153802871704, "eval_runtime": 13.3862, "eval_samples_per_second": 469.437, "eval_steps_per_second": 58.717, "step": 5000 }, { "epoch": 1.28, "eval_accuracy": 0.676957368850708, "eval_loss": 0.7536196112632751, "eval_runtime": 13.3955, "eval_samples_per_second": 469.111, "eval_steps_per_second": 58.676, "step": 5200 }, { "epoch": 1.33, "eval_accuracy": 0.6669318675994873, "eval_loss": 0.7888858318328857, "eval_runtime": 13.397, "eval_samples_per_second": 469.06, "eval_steps_per_second": 58.67, "step": 5400 }, { "epoch": 1.35, "learning_rate": 4.3226600985221674e-05, "loss": 0.7301, "step": 5500 }, { "epoch": 1.38, "eval_accuracy": 0.6752068996429443, "eval_loss": 0.7618388533592224, "eval_runtime": 13.4029, "eval_samples_per_second": 468.852, "eval_steps_per_second": 58.644, "step": 5600 }, { "epoch": 1.43, "eval_accuracy": 0.6767982244491577, "eval_loss": 0.7687408328056335, "eval_runtime": 13.3939, "eval_samples_per_second": 469.17, "eval_steps_per_second": 58.684, "step": 5800 }, { "epoch": 1.48, "learning_rate": 4.261083743842365e-05, "loss": 0.7268, "step": 6000 }, { "epoch": 1.48, "eval_accuracy": 0.6807765960693359, "eval_loss": 0.736904501914978, "eval_runtime": 13.3486, "eval_samples_per_second": 470.762, "eval_steps_per_second": 58.883, "step": 6000 }, { "epoch": 1.53, "eval_accuracy": 0.684118390083313, "eval_loss": 0.7463005185127258, "eval_runtime": 13.3702, "eval_samples_per_second": 469.999, "eval_steps_per_second": 58.787, "step": 6200 }, { "epoch": 1.58, "eval_accuracy": 0.6817314028739929, "eval_loss": 0.7449538707733154, "eval_runtime": 13.3959, "eval_samples_per_second": 469.098, "eval_steps_per_second": 58.675, "step": 6400 }, { "epoch": 1.6, "learning_rate": 4.199507389162562e-05, "loss": 0.7356, "step": 6500 }, { "epoch": 1.63, "eval_accuracy": 0.6713876724243164, "eval_loss": 0.7528935670852661, "eval_runtime": 13.3896, "eval_samples_per_second": 469.319, "eval_steps_per_second": 58.702, "step": 6600 }, { "epoch": 1.67, "eval_accuracy": 0.6662953495979309, "eval_loss": 0.7754761576652527, "eval_runtime": 13.4001, "eval_samples_per_second": 468.953, "eval_steps_per_second": 58.656, "step": 6800 }, { "epoch": 1.72, "learning_rate": 4.1379310344827587e-05, "loss": 0.7266, "step": 7000 }, { "epoch": 1.72, "eval_accuracy": 0.6858688592910767, "eval_loss": 0.7322450280189514, "eval_runtime": 13.3919, "eval_samples_per_second": 469.241, "eval_steps_per_second": 58.692, "step": 7000 }, { "epoch": 1.77, "eval_accuracy": 0.6847549080848694, "eval_loss": 0.7321462035179138, "eval_runtime": 13.3912, "eval_samples_per_second": 469.265, "eval_steps_per_second": 58.695, "step": 7200 }, { "epoch": 1.82, "eval_accuracy": 0.684118390083313, "eval_loss": 0.7431775331497192, "eval_runtime": 13.297, "eval_samples_per_second": 472.587, "eval_steps_per_second": 59.111, "step": 7400 }, { "epoch": 1.85, "learning_rate": 4.076354679802955e-05, "loss": 0.727, "step": 7500 }, { "epoch": 1.87, "eval_accuracy": 0.6823679208755493, "eval_loss": 0.7466910481452942, "eval_runtime": 13.2854, "eval_samples_per_second": 473.001, "eval_steps_per_second": 59.163, "step": 7600 }, { "epoch": 1.92, "eval_accuracy": 0.6952577829360962, "eval_loss": 0.7211511731147766, "eval_runtime": 13.2945, "eval_samples_per_second": 472.678, "eval_steps_per_second": 59.122, "step": 7800 }, { "epoch": 1.97, "learning_rate": 4.014778325123153e-05, "loss": 0.7198, "step": 8000 }, { "epoch": 1.97, "eval_accuracy": 0.6871419548988342, "eval_loss": 0.7214533686637878, "eval_runtime": 13.2721, "eval_samples_per_second": 473.473, "eval_steps_per_second": 59.222, "step": 8000 }, { "epoch": 2.02, "eval_accuracy": 0.6879376173019409, "eval_loss": 0.7568030953407288, "eval_runtime": 13.3149, "eval_samples_per_second": 471.952, "eval_steps_per_second": 59.032, "step": 8200 }, { "epoch": 2.07, "eval_accuracy": 0.6838001012802124, "eval_loss": 0.7772753238677979, "eval_runtime": 13.2999, "eval_samples_per_second": 472.486, "eval_steps_per_second": 59.098, "step": 8400 }, { "epoch": 2.09, "learning_rate": 3.95320197044335e-05, "loss": 0.6354, "step": 8500 }, { "epoch": 2.12, "eval_accuracy": 0.6828452944755554, "eval_loss": 0.780381441116333, "eval_runtime": 13.3032, "eval_samples_per_second": 472.368, "eval_steps_per_second": 59.084, "step": 8600 }, { "epoch": 2.17, "eval_accuracy": 0.6904837489128113, "eval_loss": 0.7529885172843933, "eval_runtime": 13.3496, "eval_samples_per_second": 470.727, "eval_steps_per_second": 58.878, "step": 8800 }, { "epoch": 2.22, "learning_rate": 3.891625615763547e-05, "loss": 0.6095, "step": 9000 }, { "epoch": 2.22, "eval_accuracy": 0.682208776473999, "eval_loss": 0.7504951357841492, "eval_runtime": 13.3411, "eval_samples_per_second": 471.026, "eval_steps_per_second": 58.916, "step": 9000 }, { "epoch": 2.27, "eval_accuracy": 0.6930299401283264, "eval_loss": 0.7513622641563416, "eval_runtime": 13.4014, "eval_samples_per_second": 468.907, "eval_steps_per_second": 58.651, "step": 9200 }, { "epoch": 2.32, "eval_accuracy": 0.6855506300926208, "eval_loss": 0.7566074132919312, "eval_runtime": 13.3927, "eval_samples_per_second": 469.21, "eval_steps_per_second": 58.689, "step": 9400 }, { "epoch": 2.34, "learning_rate": 3.830049261083744e-05, "loss": 0.6186, "step": 9500 }, { "epoch": 2.36, "eval_accuracy": 0.6952577829360962, "eval_loss": 0.734920859336853, "eval_runtime": 13.4003, "eval_samples_per_second": 468.944, "eval_steps_per_second": 58.655, "step": 9600 }, { "epoch": 2.41, "eval_accuracy": 0.6968491673469543, "eval_loss": 0.7476750612258911, "eval_runtime": 13.3899, "eval_samples_per_second": 469.31, "eval_steps_per_second": 58.701, "step": 9800 }, { "epoch": 2.46, "learning_rate": 3.768472906403941e-05, "loss": 0.6154, "step": 10000 }, { "epoch": 2.46, "eval_accuracy": 0.6992361545562744, "eval_loss": 0.7323787212371826, "eval_runtime": 13.4055, "eval_samples_per_second": 468.764, "eval_steps_per_second": 58.633, "step": 10000 }, { "epoch": 2.51, "eval_accuracy": 0.6936664581298828, "eval_loss": 0.7456046938896179, "eval_runtime": 13.3722, "eval_samples_per_second": 469.932, "eval_steps_per_second": 58.779, "step": 10200 }, { "epoch": 2.56, "eval_accuracy": 0.6892107129096985, "eval_loss": 0.7425976991653442, "eval_runtime": 13.3595, "eval_samples_per_second": 470.377, "eval_steps_per_second": 58.835, "step": 10400 }, { "epoch": 2.59, "learning_rate": 3.7068965517241385e-05, "loss": 0.6239, "step": 10500 }, { "epoch": 2.61, "eval_accuracy": 0.700509250164032, "eval_loss": 0.736376941204071, "eval_runtime": 13.3381, "eval_samples_per_second": 471.133, "eval_steps_per_second": 58.929, "step": 10600 }, { "epoch": 2.66, "eval_accuracy": 0.6992361545562744, "eval_loss": 0.7204196453094482, "eval_runtime": 13.3897, "eval_samples_per_second": 469.316, "eval_steps_per_second": 58.702, "step": 10800 }, { "epoch": 2.71, "learning_rate": 3.645320197044335e-05, "loss": 0.6181, "step": 11000 }, { "epoch": 2.71, "eval_accuracy": 0.702896237373352, "eval_loss": 0.728840708732605, "eval_runtime": 13.3661, "eval_samples_per_second": 470.144, "eval_steps_per_second": 58.805, "step": 11000 }, { "epoch": 2.76, "eval_accuracy": 0.7022597193717957, "eval_loss": 0.7289478182792664, "eval_runtime": 13.3368, "eval_samples_per_second": 471.176, "eval_steps_per_second": 58.935, "step": 11200 }, { "epoch": 2.81, "eval_accuracy": 0.7017822861671448, "eval_loss": 0.7217112183570862, "eval_runtime": 13.3515, "eval_samples_per_second": 470.66, "eval_steps_per_second": 58.87, "step": 11400 }, { "epoch": 2.83, "learning_rate": 3.583743842364532e-05, "loss": 0.6225, "step": 11500 }, { "epoch": 2.86, "eval_accuracy": 0.6963717341423035, "eval_loss": 0.7319472432136536, "eval_runtime": 13.3549, "eval_samples_per_second": 470.538, "eval_steps_per_second": 58.855, "step": 11600 }, { "epoch": 2.91, "eval_accuracy": 0.6997135877609253, "eval_loss": 0.7350678443908691, "eval_runtime": 13.4093, "eval_samples_per_second": 468.63, "eval_steps_per_second": 58.616, "step": 11800 }, { "epoch": 2.96, "learning_rate": 3.522167487684729e-05, "loss": 0.6206, "step": 12000 }, { "epoch": 2.96, "eval_accuracy": 0.6992361545562744, "eval_loss": 0.7226902842521667, "eval_runtime": 13.4077, "eval_samples_per_second": 468.686, "eval_steps_per_second": 58.623, "step": 12000 }, { "epoch": 3.0, "eval_accuracy": 0.6949395537376404, "eval_loss": 0.7860310077667236, "eval_runtime": 13.3803, "eval_samples_per_second": 469.646, "eval_steps_per_second": 58.743, "step": 12200 }, { "epoch": 3.05, "eval_accuracy": 0.7022597193717957, "eval_loss": 0.7564778923988342, "eval_runtime": 13.3381, "eval_samples_per_second": 471.13, "eval_steps_per_second": 58.929, "step": 12400 }, { "epoch": 3.08, "learning_rate": 3.4605911330049265e-05, "loss": 0.5444, "step": 12500 }, { "epoch": 3.1, "eval_accuracy": 0.704805850982666, "eval_loss": 0.7792636156082153, "eval_runtime": 13.3882, "eval_samples_per_second": 469.368, "eval_steps_per_second": 58.708, "step": 12600 }, { "epoch": 3.15, "eval_accuracy": 0.7030553817749023, "eval_loss": 0.7826818823814392, "eval_runtime": 13.354, "eval_samples_per_second": 470.571, "eval_steps_per_second": 58.859, "step": 12800 }, { "epoch": 3.2, "learning_rate": 3.399014778325123e-05, "loss": 0.4956, "step": 13000 }, { "epoch": 3.2, "eval_accuracy": 0.7003501057624817, "eval_loss": 0.7699744701385498, "eval_runtime": 13.3655, "eval_samples_per_second": 470.166, "eval_steps_per_second": 58.808, "step": 13000 }, { "epoch": 3.25, "eval_accuracy": 0.6955760717391968, "eval_loss": 0.8116973638534546, "eval_runtime": 13.3615, "eval_samples_per_second": 470.305, "eval_steps_per_second": 58.826, "step": 13200 }, { "epoch": 3.3, "eval_accuracy": 0.6973265409469604, "eval_loss": 0.8015252351760864, "eval_runtime": 13.3418, "eval_samples_per_second": 471.002, "eval_steps_per_second": 58.913, "step": 13400 }, { "epoch": 3.33, "learning_rate": 3.3374384236453204e-05, "loss": 0.4992, "step": 13500 }, { "epoch": 3.35, "eval_accuracy": 0.6947804093360901, "eval_loss": 0.7988014221191406, "eval_runtime": 13.3581, "eval_samples_per_second": 470.428, "eval_steps_per_second": 58.841, "step": 13600 }, { "epoch": 3.4, "eval_accuracy": 0.6971673965454102, "eval_loss": 0.7811249494552612, "eval_runtime": 13.4127, "eval_samples_per_second": 468.511, "eval_steps_per_second": 58.601, "step": 13800 }, { "epoch": 3.45, "learning_rate": 3.275862068965517e-05, "loss": 0.5008, "step": 14000 }, { "epoch": 3.45, "eval_accuracy": 0.6958943605422974, "eval_loss": 0.7732043266296387, "eval_runtime": 13.3777, "eval_samples_per_second": 469.736, "eval_steps_per_second": 58.754, "step": 14000 }, { "epoch": 3.5, "eval_accuracy": 0.6947804093360901, "eval_loss": 0.8099920749664307, "eval_runtime": 13.4361, "eval_samples_per_second": 467.694, "eval_steps_per_second": 58.499, "step": 14200 }, { "epoch": 3.55, "eval_accuracy": 0.6917568445205688, "eval_loss": 0.7906026244163513, "eval_runtime": 13.3744, "eval_samples_per_second": 469.852, "eval_steps_per_second": 58.769, "step": 14400 }, { "epoch": 3.57, "learning_rate": 3.2142857142857144e-05, "loss": 0.5138, "step": 14500 }, { "epoch": 3.6, "eval_accuracy": 0.6957352161407471, "eval_loss": 0.8155683279037476, "eval_runtime": 13.328, "eval_samples_per_second": 471.489, "eval_steps_per_second": 58.974, "step": 14600 }, { "epoch": 3.6, "step": 14600, "total_flos": 8.557886095148544e+16, "train_loss": 0.7061373245552794, "train_runtime": 5975.4138, "train_samples_per_second": 434.748, "train_steps_per_second": 6.795 } ], "max_steps": 40600, "num_train_epochs": 10, "total_flos": 8.557886095148544e+16, "trial_name": null, "trial_params": null }