Omar
update
88cccb3
{
"best_metric": 0.704805850982666,
"best_model_checkpoint": "final_models/glue_models/structroberta_s2_50ep//finetune/mnli-mm/checkpoint-12600",
"epoch": 3.596059113300493,
"global_step": 14600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"eval_accuracy": 0.47517505288124084,
"eval_loss": 1.0218451023101807,
"eval_runtime": 13.3834,
"eval_samples_per_second": 469.539,
"eval_steps_per_second": 58.73,
"step": 200
},
{
"epoch": 0.1,
"eval_accuracy": 0.5058879852294922,
"eval_loss": 0.9849981069564819,
"eval_runtime": 13.3966,
"eval_samples_per_second": 469.075,
"eval_steps_per_second": 58.672,
"step": 400
},
{
"epoch": 0.12,
"learning_rate": 4.938423645320197e-05,
"loss": 1.0369,
"step": 500
},
{
"epoch": 0.15,
"eval_accuracy": 0.5035009384155273,
"eval_loss": 0.977361261844635,
"eval_runtime": 13.3479,
"eval_samples_per_second": 470.784,
"eval_steps_per_second": 58.886,
"step": 600
},
{
"epoch": 0.2,
"eval_accuracy": 0.5544239282608032,
"eval_loss": 0.9257498383522034,
"eval_runtime": 13.332,
"eval_samples_per_second": 471.347,
"eval_steps_per_second": 58.956,
"step": 800
},
{
"epoch": 0.25,
"learning_rate": 4.876847290640394e-05,
"loss": 0.9552,
"step": 1000
},
{
"epoch": 0.25,
"eval_accuracy": 0.5798854231834412,
"eval_loss": 0.8948496580123901,
"eval_runtime": 13.3129,
"eval_samples_per_second": 472.022,
"eval_steps_per_second": 59.04,
"step": 1000
},
{
"epoch": 0.3,
"eval_accuracy": 0.5932527184486389,
"eval_loss": 0.8739783763885498,
"eval_runtime": 13.3024,
"eval_samples_per_second": 472.396,
"eval_steps_per_second": 59.087,
"step": 1200
},
{
"epoch": 0.34,
"eval_accuracy": 0.5992997884750366,
"eval_loss": 0.8715024590492249,
"eval_runtime": 13.3455,
"eval_samples_per_second": 470.872,
"eval_steps_per_second": 58.896,
"step": 1400
},
{
"epoch": 0.37,
"learning_rate": 4.8152709359605915e-05,
"loss": 0.9196,
"step": 1500
},
{
"epoch": 0.39,
"eval_accuracy": 0.6125079393386841,
"eval_loss": 0.8552348613739014,
"eval_runtime": 13.3292,
"eval_samples_per_second": 471.446,
"eval_steps_per_second": 58.968,
"step": 1600
},
{
"epoch": 0.44,
"eval_accuracy": 0.6031190156936646,
"eval_loss": 0.850425660610199,
"eval_runtime": 13.3264,
"eval_samples_per_second": 471.546,
"eval_steps_per_second": 58.981,
"step": 1800
},
{
"epoch": 0.49,
"learning_rate": 4.753694581280788e-05,
"loss": 0.8826,
"step": 2000
},
{
"epoch": 0.49,
"eval_accuracy": 0.6261935234069824,
"eval_loss": 0.831270158290863,
"eval_runtime": 13.3443,
"eval_samples_per_second": 470.914,
"eval_steps_per_second": 58.902,
"step": 2000
},
{
"epoch": 0.54,
"eval_accuracy": 0.6239656209945679,
"eval_loss": 0.8242226243019104,
"eval_runtime": 13.3277,
"eval_samples_per_second": 471.498,
"eval_steps_per_second": 58.975,
"step": 2200
},
{
"epoch": 0.59,
"eval_accuracy": 0.6328771710395813,
"eval_loss": 0.811508059501648,
"eval_runtime": 13.3485,
"eval_samples_per_second": 470.764,
"eval_steps_per_second": 58.883,
"step": 2400
},
{
"epoch": 0.62,
"learning_rate": 4.6921182266009855e-05,
"loss": 0.8583,
"step": 2500
},
{
"epoch": 0.64,
"eval_accuracy": 0.6341502070426941,
"eval_loss": 0.812808632850647,
"eval_runtime": 13.2942,
"eval_samples_per_second": 472.686,
"eval_steps_per_second": 59.123,
"step": 2600
},
{
"epoch": 0.69,
"eval_accuracy": 0.6338319778442383,
"eval_loss": 0.8130684494972229,
"eval_runtime": 13.3877,
"eval_samples_per_second": 469.385,
"eval_steps_per_second": 58.711,
"step": 2800
},
{
"epoch": 0.74,
"learning_rate": 4.630541871921182e-05,
"loss": 0.844,
"step": 3000
},
{
"epoch": 0.74,
"eval_accuracy": 0.6233291029930115,
"eval_loss": 0.8244798183441162,
"eval_runtime": 13.2608,
"eval_samples_per_second": 473.877,
"eval_steps_per_second": 59.272,
"step": 3000
},
{
"epoch": 0.79,
"eval_accuracy": 0.6532463431358337,
"eval_loss": 0.7844635844230652,
"eval_runtime": 13.2963,
"eval_samples_per_second": 472.614,
"eval_steps_per_second": 59.114,
"step": 3200
},
{
"epoch": 0.84,
"eval_accuracy": 0.6473583579063416,
"eval_loss": 0.8013091683387756,
"eval_runtime": 13.2787,
"eval_samples_per_second": 473.239,
"eval_steps_per_second": 59.192,
"step": 3400
},
{
"epoch": 0.86,
"learning_rate": 4.5689655172413794e-05,
"loss": 0.8253,
"step": 3500
},
{
"epoch": 0.89,
"eval_accuracy": 0.6537237167358398,
"eval_loss": 0.7885627150535583,
"eval_runtime": 13.2853,
"eval_samples_per_second": 473.003,
"eval_steps_per_second": 59.163,
"step": 3600
},
{
"epoch": 0.94,
"eval_accuracy": 0.657224714756012,
"eval_loss": 0.7786855101585388,
"eval_runtime": 13.2824,
"eval_samples_per_second": 473.108,
"eval_steps_per_second": 59.176,
"step": 3800
},
{
"epoch": 0.99,
"learning_rate": 4.507389162561577e-05,
"loss": 0.8155,
"step": 4000
},
{
"epoch": 0.99,
"eval_accuracy": 0.6702737212181091,
"eval_loss": 0.7679744362831116,
"eval_runtime": 13.3377,
"eval_samples_per_second": 471.146,
"eval_steps_per_second": 58.931,
"step": 4000
},
{
"epoch": 1.03,
"eval_accuracy": 0.6697962880134583,
"eval_loss": 0.7682982087135315,
"eval_runtime": 13.2938,
"eval_samples_per_second": 472.703,
"eval_steps_per_second": 59.125,
"step": 4200
},
{
"epoch": 1.08,
"eval_accuracy": 0.6580203771591187,
"eval_loss": 0.7912976145744324,
"eval_runtime": 13.3163,
"eval_samples_per_second": 471.902,
"eval_steps_per_second": 59.025,
"step": 4400
},
{
"epoch": 1.11,
"learning_rate": 4.4458128078817734e-05,
"loss": 0.7535,
"step": 4500
},
{
"epoch": 1.13,
"eval_accuracy": 0.66343092918396,
"eval_loss": 0.7666918039321899,
"eval_runtime": 13.3587,
"eval_samples_per_second": 470.404,
"eval_steps_per_second": 58.838,
"step": 4600
},
{
"epoch": 1.18,
"eval_accuracy": 0.6731381416320801,
"eval_loss": 0.758724570274353,
"eval_runtime": 13.3698,
"eval_samples_per_second": 470.016,
"eval_steps_per_second": 58.789,
"step": 4800
},
{
"epoch": 1.23,
"learning_rate": 4.384236453201971e-05,
"loss": 0.7414,
"step": 5000
},
{
"epoch": 1.23,
"eval_accuracy": 0.6814131140708923,
"eval_loss": 0.7651153802871704,
"eval_runtime": 13.3862,
"eval_samples_per_second": 469.437,
"eval_steps_per_second": 58.717,
"step": 5000
},
{
"epoch": 1.28,
"eval_accuracy": 0.676957368850708,
"eval_loss": 0.7536196112632751,
"eval_runtime": 13.3955,
"eval_samples_per_second": 469.111,
"eval_steps_per_second": 58.676,
"step": 5200
},
{
"epoch": 1.33,
"eval_accuracy": 0.6669318675994873,
"eval_loss": 0.7888858318328857,
"eval_runtime": 13.397,
"eval_samples_per_second": 469.06,
"eval_steps_per_second": 58.67,
"step": 5400
},
{
"epoch": 1.35,
"learning_rate": 4.3226600985221674e-05,
"loss": 0.7301,
"step": 5500
},
{
"epoch": 1.38,
"eval_accuracy": 0.6752068996429443,
"eval_loss": 0.7618388533592224,
"eval_runtime": 13.4029,
"eval_samples_per_second": 468.852,
"eval_steps_per_second": 58.644,
"step": 5600
},
{
"epoch": 1.43,
"eval_accuracy": 0.6767982244491577,
"eval_loss": 0.7687408328056335,
"eval_runtime": 13.3939,
"eval_samples_per_second": 469.17,
"eval_steps_per_second": 58.684,
"step": 5800
},
{
"epoch": 1.48,
"learning_rate": 4.261083743842365e-05,
"loss": 0.7268,
"step": 6000
},
{
"epoch": 1.48,
"eval_accuracy": 0.6807765960693359,
"eval_loss": 0.736904501914978,
"eval_runtime": 13.3486,
"eval_samples_per_second": 470.762,
"eval_steps_per_second": 58.883,
"step": 6000
},
{
"epoch": 1.53,
"eval_accuracy": 0.684118390083313,
"eval_loss": 0.7463005185127258,
"eval_runtime": 13.3702,
"eval_samples_per_second": 469.999,
"eval_steps_per_second": 58.787,
"step": 6200
},
{
"epoch": 1.58,
"eval_accuracy": 0.6817314028739929,
"eval_loss": 0.7449538707733154,
"eval_runtime": 13.3959,
"eval_samples_per_second": 469.098,
"eval_steps_per_second": 58.675,
"step": 6400
},
{
"epoch": 1.6,
"learning_rate": 4.199507389162562e-05,
"loss": 0.7356,
"step": 6500
},
{
"epoch": 1.63,
"eval_accuracy": 0.6713876724243164,
"eval_loss": 0.7528935670852661,
"eval_runtime": 13.3896,
"eval_samples_per_second": 469.319,
"eval_steps_per_second": 58.702,
"step": 6600
},
{
"epoch": 1.67,
"eval_accuracy": 0.6662953495979309,
"eval_loss": 0.7754761576652527,
"eval_runtime": 13.4001,
"eval_samples_per_second": 468.953,
"eval_steps_per_second": 58.656,
"step": 6800
},
{
"epoch": 1.72,
"learning_rate": 4.1379310344827587e-05,
"loss": 0.7266,
"step": 7000
},
{
"epoch": 1.72,
"eval_accuracy": 0.6858688592910767,
"eval_loss": 0.7322450280189514,
"eval_runtime": 13.3919,
"eval_samples_per_second": 469.241,
"eval_steps_per_second": 58.692,
"step": 7000
},
{
"epoch": 1.77,
"eval_accuracy": 0.6847549080848694,
"eval_loss": 0.7321462035179138,
"eval_runtime": 13.3912,
"eval_samples_per_second": 469.265,
"eval_steps_per_second": 58.695,
"step": 7200
},
{
"epoch": 1.82,
"eval_accuracy": 0.684118390083313,
"eval_loss": 0.7431775331497192,
"eval_runtime": 13.297,
"eval_samples_per_second": 472.587,
"eval_steps_per_second": 59.111,
"step": 7400
},
{
"epoch": 1.85,
"learning_rate": 4.076354679802955e-05,
"loss": 0.727,
"step": 7500
},
{
"epoch": 1.87,
"eval_accuracy": 0.6823679208755493,
"eval_loss": 0.7466910481452942,
"eval_runtime": 13.2854,
"eval_samples_per_second": 473.001,
"eval_steps_per_second": 59.163,
"step": 7600
},
{
"epoch": 1.92,
"eval_accuracy": 0.6952577829360962,
"eval_loss": 0.7211511731147766,
"eval_runtime": 13.2945,
"eval_samples_per_second": 472.678,
"eval_steps_per_second": 59.122,
"step": 7800
},
{
"epoch": 1.97,
"learning_rate": 4.014778325123153e-05,
"loss": 0.7198,
"step": 8000
},
{
"epoch": 1.97,
"eval_accuracy": 0.6871419548988342,
"eval_loss": 0.7214533686637878,
"eval_runtime": 13.2721,
"eval_samples_per_second": 473.473,
"eval_steps_per_second": 59.222,
"step": 8000
},
{
"epoch": 2.02,
"eval_accuracy": 0.6879376173019409,
"eval_loss": 0.7568030953407288,
"eval_runtime": 13.3149,
"eval_samples_per_second": 471.952,
"eval_steps_per_second": 59.032,
"step": 8200
},
{
"epoch": 2.07,
"eval_accuracy": 0.6838001012802124,
"eval_loss": 0.7772753238677979,
"eval_runtime": 13.2999,
"eval_samples_per_second": 472.486,
"eval_steps_per_second": 59.098,
"step": 8400
},
{
"epoch": 2.09,
"learning_rate": 3.95320197044335e-05,
"loss": 0.6354,
"step": 8500
},
{
"epoch": 2.12,
"eval_accuracy": 0.6828452944755554,
"eval_loss": 0.780381441116333,
"eval_runtime": 13.3032,
"eval_samples_per_second": 472.368,
"eval_steps_per_second": 59.084,
"step": 8600
},
{
"epoch": 2.17,
"eval_accuracy": 0.6904837489128113,
"eval_loss": 0.7529885172843933,
"eval_runtime": 13.3496,
"eval_samples_per_second": 470.727,
"eval_steps_per_second": 58.878,
"step": 8800
},
{
"epoch": 2.22,
"learning_rate": 3.891625615763547e-05,
"loss": 0.6095,
"step": 9000
},
{
"epoch": 2.22,
"eval_accuracy": 0.682208776473999,
"eval_loss": 0.7504951357841492,
"eval_runtime": 13.3411,
"eval_samples_per_second": 471.026,
"eval_steps_per_second": 58.916,
"step": 9000
},
{
"epoch": 2.27,
"eval_accuracy": 0.6930299401283264,
"eval_loss": 0.7513622641563416,
"eval_runtime": 13.4014,
"eval_samples_per_second": 468.907,
"eval_steps_per_second": 58.651,
"step": 9200
},
{
"epoch": 2.32,
"eval_accuracy": 0.6855506300926208,
"eval_loss": 0.7566074132919312,
"eval_runtime": 13.3927,
"eval_samples_per_second": 469.21,
"eval_steps_per_second": 58.689,
"step": 9400
},
{
"epoch": 2.34,
"learning_rate": 3.830049261083744e-05,
"loss": 0.6186,
"step": 9500
},
{
"epoch": 2.36,
"eval_accuracy": 0.6952577829360962,
"eval_loss": 0.734920859336853,
"eval_runtime": 13.4003,
"eval_samples_per_second": 468.944,
"eval_steps_per_second": 58.655,
"step": 9600
},
{
"epoch": 2.41,
"eval_accuracy": 0.6968491673469543,
"eval_loss": 0.7476750612258911,
"eval_runtime": 13.3899,
"eval_samples_per_second": 469.31,
"eval_steps_per_second": 58.701,
"step": 9800
},
{
"epoch": 2.46,
"learning_rate": 3.768472906403941e-05,
"loss": 0.6154,
"step": 10000
},
{
"epoch": 2.46,
"eval_accuracy": 0.6992361545562744,
"eval_loss": 0.7323787212371826,
"eval_runtime": 13.4055,
"eval_samples_per_second": 468.764,
"eval_steps_per_second": 58.633,
"step": 10000
},
{
"epoch": 2.51,
"eval_accuracy": 0.6936664581298828,
"eval_loss": 0.7456046938896179,
"eval_runtime": 13.3722,
"eval_samples_per_second": 469.932,
"eval_steps_per_second": 58.779,
"step": 10200
},
{
"epoch": 2.56,
"eval_accuracy": 0.6892107129096985,
"eval_loss": 0.7425976991653442,
"eval_runtime": 13.3595,
"eval_samples_per_second": 470.377,
"eval_steps_per_second": 58.835,
"step": 10400
},
{
"epoch": 2.59,
"learning_rate": 3.7068965517241385e-05,
"loss": 0.6239,
"step": 10500
},
{
"epoch": 2.61,
"eval_accuracy": 0.700509250164032,
"eval_loss": 0.736376941204071,
"eval_runtime": 13.3381,
"eval_samples_per_second": 471.133,
"eval_steps_per_second": 58.929,
"step": 10600
},
{
"epoch": 2.66,
"eval_accuracy": 0.6992361545562744,
"eval_loss": 0.7204196453094482,
"eval_runtime": 13.3897,
"eval_samples_per_second": 469.316,
"eval_steps_per_second": 58.702,
"step": 10800
},
{
"epoch": 2.71,
"learning_rate": 3.645320197044335e-05,
"loss": 0.6181,
"step": 11000
},
{
"epoch": 2.71,
"eval_accuracy": 0.702896237373352,
"eval_loss": 0.728840708732605,
"eval_runtime": 13.3661,
"eval_samples_per_second": 470.144,
"eval_steps_per_second": 58.805,
"step": 11000
},
{
"epoch": 2.76,
"eval_accuracy": 0.7022597193717957,
"eval_loss": 0.7289478182792664,
"eval_runtime": 13.3368,
"eval_samples_per_second": 471.176,
"eval_steps_per_second": 58.935,
"step": 11200
},
{
"epoch": 2.81,
"eval_accuracy": 0.7017822861671448,
"eval_loss": 0.7217112183570862,
"eval_runtime": 13.3515,
"eval_samples_per_second": 470.66,
"eval_steps_per_second": 58.87,
"step": 11400
},
{
"epoch": 2.83,
"learning_rate": 3.583743842364532e-05,
"loss": 0.6225,
"step": 11500
},
{
"epoch": 2.86,
"eval_accuracy": 0.6963717341423035,
"eval_loss": 0.7319472432136536,
"eval_runtime": 13.3549,
"eval_samples_per_second": 470.538,
"eval_steps_per_second": 58.855,
"step": 11600
},
{
"epoch": 2.91,
"eval_accuracy": 0.6997135877609253,
"eval_loss": 0.7350678443908691,
"eval_runtime": 13.4093,
"eval_samples_per_second": 468.63,
"eval_steps_per_second": 58.616,
"step": 11800
},
{
"epoch": 2.96,
"learning_rate": 3.522167487684729e-05,
"loss": 0.6206,
"step": 12000
},
{
"epoch": 2.96,
"eval_accuracy": 0.6992361545562744,
"eval_loss": 0.7226902842521667,
"eval_runtime": 13.4077,
"eval_samples_per_second": 468.686,
"eval_steps_per_second": 58.623,
"step": 12000
},
{
"epoch": 3.0,
"eval_accuracy": 0.6949395537376404,
"eval_loss": 0.7860310077667236,
"eval_runtime": 13.3803,
"eval_samples_per_second": 469.646,
"eval_steps_per_second": 58.743,
"step": 12200
},
{
"epoch": 3.05,
"eval_accuracy": 0.7022597193717957,
"eval_loss": 0.7564778923988342,
"eval_runtime": 13.3381,
"eval_samples_per_second": 471.13,
"eval_steps_per_second": 58.929,
"step": 12400
},
{
"epoch": 3.08,
"learning_rate": 3.4605911330049265e-05,
"loss": 0.5444,
"step": 12500
},
{
"epoch": 3.1,
"eval_accuracy": 0.704805850982666,
"eval_loss": 0.7792636156082153,
"eval_runtime": 13.3882,
"eval_samples_per_second": 469.368,
"eval_steps_per_second": 58.708,
"step": 12600
},
{
"epoch": 3.15,
"eval_accuracy": 0.7030553817749023,
"eval_loss": 0.7826818823814392,
"eval_runtime": 13.354,
"eval_samples_per_second": 470.571,
"eval_steps_per_second": 58.859,
"step": 12800
},
{
"epoch": 3.2,
"learning_rate": 3.399014778325123e-05,
"loss": 0.4956,
"step": 13000
},
{
"epoch": 3.2,
"eval_accuracy": 0.7003501057624817,
"eval_loss": 0.7699744701385498,
"eval_runtime": 13.3655,
"eval_samples_per_second": 470.166,
"eval_steps_per_second": 58.808,
"step": 13000
},
{
"epoch": 3.25,
"eval_accuracy": 0.6955760717391968,
"eval_loss": 0.8116973638534546,
"eval_runtime": 13.3615,
"eval_samples_per_second": 470.305,
"eval_steps_per_second": 58.826,
"step": 13200
},
{
"epoch": 3.3,
"eval_accuracy": 0.6973265409469604,
"eval_loss": 0.8015252351760864,
"eval_runtime": 13.3418,
"eval_samples_per_second": 471.002,
"eval_steps_per_second": 58.913,
"step": 13400
},
{
"epoch": 3.33,
"learning_rate": 3.3374384236453204e-05,
"loss": 0.4992,
"step": 13500
},
{
"epoch": 3.35,
"eval_accuracy": 0.6947804093360901,
"eval_loss": 0.7988014221191406,
"eval_runtime": 13.3581,
"eval_samples_per_second": 470.428,
"eval_steps_per_second": 58.841,
"step": 13600
},
{
"epoch": 3.4,
"eval_accuracy": 0.6971673965454102,
"eval_loss": 0.7811249494552612,
"eval_runtime": 13.4127,
"eval_samples_per_second": 468.511,
"eval_steps_per_second": 58.601,
"step": 13800
},
{
"epoch": 3.45,
"learning_rate": 3.275862068965517e-05,
"loss": 0.5008,
"step": 14000
},
{
"epoch": 3.45,
"eval_accuracy": 0.6958943605422974,
"eval_loss": 0.7732043266296387,
"eval_runtime": 13.3777,
"eval_samples_per_second": 469.736,
"eval_steps_per_second": 58.754,
"step": 14000
},
{
"epoch": 3.5,
"eval_accuracy": 0.6947804093360901,
"eval_loss": 0.8099920749664307,
"eval_runtime": 13.4361,
"eval_samples_per_second": 467.694,
"eval_steps_per_second": 58.499,
"step": 14200
},
{
"epoch": 3.55,
"eval_accuracy": 0.6917568445205688,
"eval_loss": 0.7906026244163513,
"eval_runtime": 13.3744,
"eval_samples_per_second": 469.852,
"eval_steps_per_second": 58.769,
"step": 14400
},
{
"epoch": 3.57,
"learning_rate": 3.2142857142857144e-05,
"loss": 0.5138,
"step": 14500
},
{
"epoch": 3.6,
"eval_accuracy": 0.6957352161407471,
"eval_loss": 0.8155683279037476,
"eval_runtime": 13.328,
"eval_samples_per_second": 471.489,
"eval_steps_per_second": 58.974,
"step": 14600
},
{
"epoch": 3.6,
"step": 14600,
"total_flos": 8.557886095148544e+16,
"train_loss": 0.7061373245552794,
"train_runtime": 5975.4138,
"train_samples_per_second": 434.748,
"train_steps_per_second": 6.795
}
],
"max_steps": 40600,
"num_train_epochs": 10,
"total_flos": 8.557886095148544e+16,
"trial_name": null,
"trial_params": null
}