|
{ |
|
"best_metric": 0.7949723601341248, |
|
"best_model_checkpoint": "/jmain02/home/J2AD002/jxm09/hrk35-jxm09/hatemoji_r7_data/output/hs_data--prior_rounds--r7--5/deberta123/checkpoint-46574", |
|
"epoch": 3.0, |
|
"global_step": 69861, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9856858619258244e-05, |
|
"loss": 0.53, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9713717238516486e-05, |
|
"loss": 0.308, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9570575857774725e-05, |
|
"loss": 0.2211, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9427434477032967e-05, |
|
"loss": 0.1787, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.928429309629121e-05, |
|
"loss": 0.1482, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.914115171554945e-05, |
|
"loss": 0.1392, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.899801033480769e-05, |
|
"loss": 0.1246, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8854868954065932e-05, |
|
"loss": 0.1191, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8711727573324174e-05, |
|
"loss": 0.115, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8568586192582416e-05, |
|
"loss": 0.1038, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.8425444811840655e-05, |
|
"loss": 0.098, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.8282303431098897e-05, |
|
"loss": 0.1044, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.813916205035714e-05, |
|
"loss": 0.1011, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.799602066961538e-05, |
|
"loss": 0.0935, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.785287928887362e-05, |
|
"loss": 0.0926, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.7709737908131862e-05, |
|
"loss": 0.095, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.7566596527390104e-05, |
|
"loss": 0.0863, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.7423455146648346e-05, |
|
"loss": 0.0886, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7280313765906588e-05, |
|
"loss": 0.0876, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.713717238516483e-05, |
|
"loss": 0.089, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.699403100442307e-05, |
|
"loss": 0.0871, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.685088962368131e-05, |
|
"loss": 0.0874, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6707748242939553e-05, |
|
"loss": 0.0802, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.6564606862197795e-05, |
|
"loss": 0.0826, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.6421465481456037e-05, |
|
"loss": 0.08, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.627832410071428e-05, |
|
"loss": 0.081, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.6135182719972518e-05, |
|
"loss": 0.0798, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.599204133923076e-05, |
|
"loss": 0.0813, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.5848899958489002e-05, |
|
"loss": 0.0779, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.5705758577747244e-05, |
|
"loss": 0.0769, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5562617197005483e-05, |
|
"loss": 0.0817, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5419475816263725e-05, |
|
"loss": 0.0765, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5276334435521967e-05, |
|
"loss": 0.0784, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5133193054780208e-05, |
|
"loss": 0.0763, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4990051674038448e-05, |
|
"loss": 0.0795, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.484691029329669e-05, |
|
"loss": 0.0758, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4703768912554932e-05, |
|
"loss": 0.0748, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.4560627531813175e-05, |
|
"loss": 0.0741, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4417486151071413e-05, |
|
"loss": 0.0726, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4274344770329655e-05, |
|
"loss": 0.0752, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4131203389587897e-05, |
|
"loss": 0.0746, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.398806200884614e-05, |
|
"loss": 0.0733, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3844920628104378e-05, |
|
"loss": 0.071, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.370177924736262e-05, |
|
"loss": 0.0745, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3558637866620862e-05, |
|
"loss": 0.0751, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3415496485879105e-05, |
|
"loss": 0.0722, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8151779770851135, |
|
"eval_loss": 0.8366696834564209, |
|
"eval_runtime": 23.897, |
|
"eval_samples_per_second": 196.301, |
|
"eval_steps_per_second": 3.097, |
|
"step": 23287 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.3272355105137345e-05, |
|
"loss": 0.0662, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.3129213724395587e-05, |
|
"loss": 0.0602, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.2986072343653827e-05, |
|
"loss": 0.0619, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.284293096291207e-05, |
|
"loss": 0.0583, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.269978958217031e-05, |
|
"loss": 0.0586, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.2556648201428552e-05, |
|
"loss": 0.0585, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.2413506820686794e-05, |
|
"loss": 0.059, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2270365439945036e-05, |
|
"loss": 0.0575, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.2127224059203275e-05, |
|
"loss": 0.0596, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.1984082678461517e-05, |
|
"loss": 0.0595, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.184094129771976e-05, |
|
"loss": 0.0588, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.1697799916978001e-05, |
|
"loss": 0.0593, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.155465853623624e-05, |
|
"loss": 0.0612, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1411517155494482e-05, |
|
"loss": 0.059, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.1268375774752724e-05, |
|
"loss": 0.0566, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.1125234394010966e-05, |
|
"loss": 0.0589, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.0982093013269207e-05, |
|
"loss": 0.0601, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.0838951632527447e-05, |
|
"loss": 0.0601, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.069581025178569e-05, |
|
"loss": 0.0595, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.0552668871043931e-05, |
|
"loss": 0.0603, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.0409527490302172e-05, |
|
"loss": 0.0571, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0266386109560414e-05, |
|
"loss": 0.0587, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0123244728818656e-05, |
|
"loss": 0.0565, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.980103348076896e-06, |
|
"loss": 0.0579, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.836961967335139e-06, |
|
"loss": 0.0619, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.693820586593379e-06, |
|
"loss": 0.0563, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.550679205851621e-06, |
|
"loss": 0.0548, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.407537825109861e-06, |
|
"loss": 0.0581, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.264396444368104e-06, |
|
"loss": 0.0568, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.121255063626344e-06, |
|
"loss": 0.055, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.978113682884586e-06, |
|
"loss": 0.0598, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.834972302142826e-06, |
|
"loss": 0.0597, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.691830921401069e-06, |
|
"loss": 0.058, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.548689540659309e-06, |
|
"loss": 0.0554, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.405548159917551e-06, |
|
"loss": 0.0563, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.262406779175792e-06, |
|
"loss": 0.0571, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.119265398434034e-06, |
|
"loss": 0.0579, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.976124017692276e-06, |
|
"loss": 0.0564, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.832982636950516e-06, |
|
"loss": 0.0577, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.689841256208758e-06, |
|
"loss": 0.0568, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.5466998754669995e-06, |
|
"loss": 0.0596, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 7.403558494725241e-06, |
|
"loss": 0.057, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 7.260417113983482e-06, |
|
"loss": 0.0579, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.117275733241723e-06, |
|
"loss": 0.0561, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.9741343524999645e-06, |
|
"loss": 0.0548, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.830992971758206e-06, |
|
"loss": 0.0571, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.687851591016448e-06, |
|
"loss": 0.055, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8311660885810852, |
|
"eval_loss": 0.7949723601341248, |
|
"eval_runtime": 27.7911, |
|
"eval_samples_per_second": 168.795, |
|
"eval_steps_per_second": 2.663, |
|
"step": 46574 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.544710210274688e-06, |
|
"loss": 0.0471, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.40156882953293e-06, |
|
"loss": 0.0435, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 6.258427448791171e-06, |
|
"loss": 0.043, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.115286068049413e-06, |
|
"loss": 0.0431, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 5.972144687307654e-06, |
|
"loss": 0.0433, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.8290033065658954e-06, |
|
"loss": 0.0442, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.685861925824137e-06, |
|
"loss": 0.0447, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.542720545082379e-06, |
|
"loss": 0.0454, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5.399579164340619e-06, |
|
"loss": 0.0441, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.256437783598861e-06, |
|
"loss": 0.0441, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 5.113296402857102e-06, |
|
"loss": 0.0444, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.970155022115344e-06, |
|
"loss": 0.0435, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.827013641373585e-06, |
|
"loss": 0.0445, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.683872260631826e-06, |
|
"loss": 0.041, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.540730879890068e-06, |
|
"loss": 0.0461, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.397589499148309e-06, |
|
"loss": 0.0421, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.254448118406551e-06, |
|
"loss": 0.0423, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.111306737664792e-06, |
|
"loss": 0.0448, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.9681653569230335e-06, |
|
"loss": 0.0447, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.825023976181275e-06, |
|
"loss": 0.0461, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.681882595439516e-06, |
|
"loss": 0.0444, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.5387412146977573e-06, |
|
"loss": 0.0431, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.3955998339559985e-06, |
|
"loss": 0.042, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.25245845321424e-06, |
|
"loss": 0.0415, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.1093170724724815e-06, |
|
"loss": 0.0433, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.9661756917307227e-06, |
|
"loss": 0.0422, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.823034310988964e-06, |
|
"loss": 0.0389, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.6798929302472052e-06, |
|
"loss": 0.0424, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.536751549505447e-06, |
|
"loss": 0.0397, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.393610168763688e-06, |
|
"loss": 0.044, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.2504687880219294e-06, |
|
"loss": 0.0443, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.1073274072801707e-06, |
|
"loss": 0.0414, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.9641860265384124e-06, |
|
"loss": 0.041, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.8210446457966536e-06, |
|
"loss": 0.0389, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.6779032650548949e-06, |
|
"loss": 0.0433, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.5347618843131361e-06, |
|
"loss": 0.0417, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.3916205035713776e-06, |
|
"loss": 0.0416, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.2484791228296189e-06, |
|
"loss": 0.0424, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.1053377420878603e-06, |
|
"loss": 0.0431, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 9.621963613461016e-07, |
|
"loss": 0.0417, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 8.19054980604343e-07, |
|
"loss": 0.0418, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 6.759135998625843e-07, |
|
"loss": 0.0399, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.327722191208257e-07, |
|
"loss": 0.0416, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.89630838379067e-07, |
|
"loss": 0.0424, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.464894576373084e-07, |
|
"loss": 0.0418, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.0334807689554974e-07, |
|
"loss": 0.0399, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8266894221305847, |
|
"eval_loss": 0.950989842414856, |
|
"eval_runtime": 23.2697, |
|
"eval_samples_per_second": 201.593, |
|
"eval_steps_per_second": 3.18, |
|
"step": 69861 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 69861, |
|
"total_flos": 1.9117803270808904e+18, |
|
"train_runtime": 54665.4444, |
|
"train_samples_per_second": 81.787, |
|
"train_steps_per_second": 1.278 |
|
} |
|
], |
|
"max_steps": 69861, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.9117803270808904e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|