|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 75.0, |
|
"global_step": 187500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.990119762874309e-05, |
|
"loss": 7.6579, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.09562864791743528, |
|
"eval_loss": 7.127312660217285, |
|
"eval_runtime": 239.9352, |
|
"eval_samples_per_second": 20.839, |
|
"eval_steps_per_second": 2.605, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.980119522868549e-05, |
|
"loss": 7.0332, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.11961642364018389, |
|
"eval_loss": 6.915997505187988, |
|
"eval_runtime": 238.3965, |
|
"eval_samples_per_second": 20.973, |
|
"eval_steps_per_second": 2.622, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.970119282862789e-05, |
|
"loss": 6.8887, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.12904858139542238, |
|
"eval_loss": 6.821927547454834, |
|
"eval_runtime": 239.3066, |
|
"eval_samples_per_second": 20.894, |
|
"eval_steps_per_second": 2.612, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.960119042857029e-05, |
|
"loss": 6.8053, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.13103776288974772, |
|
"eval_loss": 6.758518695831299, |
|
"eval_runtime": 238.5002, |
|
"eval_samples_per_second": 20.964, |
|
"eval_steps_per_second": 2.621, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.950118802851269e-05, |
|
"loss": 6.7592, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.1364351071286491, |
|
"eval_loss": 6.682760715484619, |
|
"eval_runtime": 238.971, |
|
"eval_samples_per_second": 20.923, |
|
"eval_steps_per_second": 2.615, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.940118562845509e-05, |
|
"loss": 6.6764, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.13949271934969193, |
|
"eval_loss": 6.636216640472412, |
|
"eval_runtime": 238.2351, |
|
"eval_samples_per_second": 20.988, |
|
"eval_steps_per_second": 2.623, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.930118322839748e-05, |
|
"loss": 6.6333, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": 0.1388095837001635, |
|
"eval_loss": 6.624228000640869, |
|
"eval_runtime": 238.5516, |
|
"eval_samples_per_second": 20.96, |
|
"eval_steps_per_second": 2.62, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.920118082833988e-05, |
|
"loss": 6.5896, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.1399531007383465, |
|
"eval_loss": 6.5691633224487305, |
|
"eval_runtime": 239.1652, |
|
"eval_samples_per_second": 20.906, |
|
"eval_steps_per_second": 2.613, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.910117842828228e-05, |
|
"loss": 6.5774, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.14199867302770844, |
|
"eval_loss": 6.539389610290527, |
|
"eval_runtime": 238.2931, |
|
"eval_samples_per_second": 20.983, |
|
"eval_steps_per_second": 2.623, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.9001176028224685e-05, |
|
"loss": 6.5486, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.14192549527631204, |
|
"eval_loss": 6.52717399597168, |
|
"eval_runtime": 238.4964, |
|
"eval_samples_per_second": 20.965, |
|
"eval_steps_per_second": 2.621, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.890117362816708e-05, |
|
"loss": 6.5136, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.14366604383395618, |
|
"eval_loss": 6.514117240905762, |
|
"eval_runtime": 238.2132, |
|
"eval_samples_per_second": 20.99, |
|
"eval_steps_per_second": 2.624, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.880117122810948e-05, |
|
"loss": 6.4988, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.14545551844518384, |
|
"eval_loss": 6.504667282104492, |
|
"eval_runtime": 238.3225, |
|
"eval_samples_per_second": 20.98, |
|
"eval_steps_per_second": 2.622, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.8701168828051874e-05, |
|
"loss": 6.4956, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_accuracy": 0.14478254828400722, |
|
"eval_loss": 6.484868049621582, |
|
"eval_runtime": 238.9391, |
|
"eval_samples_per_second": 20.926, |
|
"eval_steps_per_second": 2.616, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 4.8601166427994274e-05, |
|
"loss": 6.4848, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_accuracy": 0.14653028416523506, |
|
"eval_loss": 6.466109275817871, |
|
"eval_runtime": 238.6887, |
|
"eval_samples_per_second": 20.948, |
|
"eval_steps_per_second": 2.618, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.8501164027936675e-05, |
|
"loss": 6.4672, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.14503110201270616, |
|
"eval_loss": 6.4636101722717285, |
|
"eval_runtime": 238.5511, |
|
"eval_samples_per_second": 20.96, |
|
"eval_steps_per_second": 2.62, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 4.840116162787907e-05, |
|
"loss": 6.4398, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_accuracy": 0.14670060391437695, |
|
"eval_loss": 6.445173263549805, |
|
"eval_runtime": 238.6611, |
|
"eval_samples_per_second": 20.95, |
|
"eval_steps_per_second": 2.619, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 4.830115922782147e-05, |
|
"loss": 6.4398, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_accuracy": 0.14309234861721049, |
|
"eval_loss": 6.446484565734863, |
|
"eval_runtime": 238.6523, |
|
"eval_samples_per_second": 20.951, |
|
"eval_steps_per_second": 2.619, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 4.8201156827763864e-05, |
|
"loss": 6.4143, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_accuracy": 0.1470335800879734, |
|
"eval_loss": 6.419277191162109, |
|
"eval_runtime": 239.0519, |
|
"eval_samples_per_second": 20.916, |
|
"eval_steps_per_second": 2.614, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 4.810115442770627e-05, |
|
"loss": 6.417, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_accuracy": 0.14629917323469946, |
|
"eval_loss": 6.421440601348877, |
|
"eval_runtime": 238.9987, |
|
"eval_samples_per_second": 20.921, |
|
"eval_steps_per_second": 2.615, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.8001152027648666e-05, |
|
"loss": 6.3938, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.14721643768292184, |
|
"eval_loss": 6.402393817901611, |
|
"eval_runtime": 238.7319, |
|
"eval_samples_per_second": 20.944, |
|
"eval_steps_per_second": 2.618, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 4.7901149627591066e-05, |
|
"loss": 6.3577, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_accuracy": 0.1467791629141584, |
|
"eval_loss": 6.405508041381836, |
|
"eval_runtime": 238.6576, |
|
"eval_samples_per_second": 20.951, |
|
"eval_steps_per_second": 2.619, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 4.780114722753346e-05, |
|
"loss": 6.3719, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_accuracy": 0.14650020970045946, |
|
"eval_loss": 6.390870094299316, |
|
"eval_runtime": 238.7638, |
|
"eval_samples_per_second": 20.941, |
|
"eval_steps_per_second": 2.618, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 4.770114482747586e-05, |
|
"loss": 6.3545, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_accuracy": 0.14796731042792302, |
|
"eval_loss": 6.386704921722412, |
|
"eval_runtime": 239.4196, |
|
"eval_samples_per_second": 20.884, |
|
"eval_steps_per_second": 2.61, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 4.760114242741826e-05, |
|
"loss": 6.3502, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_accuracy": 0.14863574330478854, |
|
"eval_loss": 6.36898136138916, |
|
"eval_runtime": 239.0117, |
|
"eval_samples_per_second": 20.919, |
|
"eval_steps_per_second": 2.615, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.7501140027360656e-05, |
|
"loss": 6.3476, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.14774925794518562, |
|
"eval_loss": 6.374772071838379, |
|
"eval_runtime": 238.4542, |
|
"eval_samples_per_second": 20.968, |
|
"eval_steps_per_second": 2.621, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 4.740113762730306e-05, |
|
"loss": 6.3198, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"eval_accuracy": 0.14550738585638578, |
|
"eval_loss": 6.365925312042236, |
|
"eval_runtime": 239.1139, |
|
"eval_samples_per_second": 20.911, |
|
"eval_steps_per_second": 2.614, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 4.730113522724546e-05, |
|
"loss": 6.3145, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"eval_accuracy": 0.14902667786765839, |
|
"eval_loss": 6.340302467346191, |
|
"eval_runtime": 238.6079, |
|
"eval_samples_per_second": 20.955, |
|
"eval_steps_per_second": 2.619, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 4.720113282718786e-05, |
|
"loss": 6.3114, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_accuracy": 0.1485525671599362, |
|
"eval_loss": 6.338578224182129, |
|
"eval_runtime": 239.046, |
|
"eval_samples_per_second": 20.916, |
|
"eval_steps_per_second": 2.615, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 4.710113042713025e-05, |
|
"loss": 6.3071, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"eval_accuracy": 0.14830638357038803, |
|
"eval_loss": 6.3231096267700195, |
|
"eval_runtime": 239.2069, |
|
"eval_samples_per_second": 20.902, |
|
"eval_steps_per_second": 2.613, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.7001128027072646e-05, |
|
"loss": 6.3148, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.15010454322169461, |
|
"eval_loss": 6.31591796875, |
|
"eval_runtime": 238.9169, |
|
"eval_samples_per_second": 20.928, |
|
"eval_steps_per_second": 2.616, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 4.6901125627015054e-05, |
|
"loss": 6.2703, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_accuracy": 0.149010929341272, |
|
"eval_loss": 6.327798366546631, |
|
"eval_runtime": 239.0819, |
|
"eval_samples_per_second": 20.913, |
|
"eval_steps_per_second": 2.614, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 4.680112322695745e-05, |
|
"loss": 6.272, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_accuracy": 0.14926184852480373, |
|
"eval_loss": 6.314311504364014, |
|
"eval_runtime": 239.1891, |
|
"eval_samples_per_second": 20.904, |
|
"eval_steps_per_second": 2.613, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 4.670112082689985e-05, |
|
"loss": 6.2713, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_accuracy": 0.149609282500592, |
|
"eval_loss": 6.310863971710205, |
|
"eval_runtime": 238.9461, |
|
"eval_samples_per_second": 20.925, |
|
"eval_steps_per_second": 2.616, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 4.660111842684224e-05, |
|
"loss": 6.2626, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_accuracy": 0.14950698945650215, |
|
"eval_loss": 6.288058757781982, |
|
"eval_runtime": 238.8827, |
|
"eval_samples_per_second": 20.931, |
|
"eval_steps_per_second": 2.616, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.6501116026784644e-05, |
|
"loss": 6.2542, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.15001838460595662, |
|
"eval_loss": 6.285544395446777, |
|
"eval_runtime": 238.7799, |
|
"eval_samples_per_second": 20.94, |
|
"eval_steps_per_second": 2.617, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 4.6401113626727044e-05, |
|
"loss": 6.226, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_accuracy": 0.15007755457193658, |
|
"eval_loss": 6.284069538116455, |
|
"eval_runtime": 238.9785, |
|
"eval_samples_per_second": 20.922, |
|
"eval_steps_per_second": 2.615, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 4.630111122666944e-05, |
|
"loss": 6.2225, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"eval_accuracy": 0.15004452640952928, |
|
"eval_loss": 6.269779682159424, |
|
"eval_runtime": 238.1129, |
|
"eval_samples_per_second": 20.998, |
|
"eval_steps_per_second": 2.625, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 4.620110882661184e-05, |
|
"loss": 6.2073, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"eval_accuracy": 0.1489249056948642, |
|
"eval_loss": 6.264680862426758, |
|
"eval_runtime": 239.8511, |
|
"eval_samples_per_second": 20.846, |
|
"eval_steps_per_second": 2.606, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 4.610110642655424e-05, |
|
"loss": 6.2133, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"eval_accuracy": 0.14988711325252577, |
|
"eval_loss": 6.258930683135986, |
|
"eval_runtime": 238.7682, |
|
"eval_samples_per_second": 20.941, |
|
"eval_steps_per_second": 2.618, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.600110402649664e-05, |
|
"loss": 6.2134, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.1510672200633593, |
|
"eval_loss": 6.249328136444092, |
|
"eval_runtime": 239.5811, |
|
"eval_samples_per_second": 20.87, |
|
"eval_steps_per_second": 2.609, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 4.5901101626439035e-05, |
|
"loss": 6.1887, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_accuracy": 0.1502341927776338, |
|
"eval_loss": 6.242542743682861, |
|
"eval_runtime": 238.4196, |
|
"eval_samples_per_second": 20.971, |
|
"eval_steps_per_second": 2.621, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 4.5801099226381436e-05, |
|
"loss": 6.1822, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"eval_accuracy": 0.15187036444962188, |
|
"eval_loss": 6.232594966888428, |
|
"eval_runtime": 239.3713, |
|
"eval_samples_per_second": 20.888, |
|
"eval_steps_per_second": 2.611, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 4.5701096826323836e-05, |
|
"loss": 6.1752, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"eval_accuracy": 0.1495673710048999, |
|
"eval_loss": 6.239598274230957, |
|
"eval_runtime": 238.3388, |
|
"eval_samples_per_second": 20.979, |
|
"eval_steps_per_second": 2.622, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 4.560109442626623e-05, |
|
"loss": 6.1705, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_accuracy": 0.15107269620915528, |
|
"eval_loss": 6.23054313659668, |
|
"eval_runtime": 239.3311, |
|
"eval_samples_per_second": 20.892, |
|
"eval_steps_per_second": 2.611, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.550109202620863e-05, |
|
"loss": 6.1737, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.15264056342116805, |
|
"eval_loss": 6.201099872589111, |
|
"eval_runtime": 238.7124, |
|
"eval_samples_per_second": 20.946, |
|
"eval_steps_per_second": 2.618, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 4.5401089626151025e-05, |
|
"loss": 6.1468, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_accuracy": 0.15174466124507308, |
|
"eval_loss": 6.213028430938721, |
|
"eval_runtime": 239.0381, |
|
"eval_samples_per_second": 20.917, |
|
"eval_steps_per_second": 2.615, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 4.530108722609343e-05, |
|
"loss": 6.1346, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"eval_accuracy": 0.15341171411043722, |
|
"eval_loss": 6.193819522857666, |
|
"eval_runtime": 238.1389, |
|
"eval_samples_per_second": 20.996, |
|
"eval_steps_per_second": 2.625, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 4.520108482603583e-05, |
|
"loss": 6.126, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_accuracy": 0.15403517970455113, |
|
"eval_loss": 6.188257217407227, |
|
"eval_runtime": 239.6406, |
|
"eval_samples_per_second": 20.865, |
|
"eval_steps_per_second": 2.608, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 4.510108242597823e-05, |
|
"loss": 6.1265, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_accuracy": 0.1534727219928538, |
|
"eval_loss": 6.178363800048828, |
|
"eval_runtime": 238.9451, |
|
"eval_samples_per_second": 20.925, |
|
"eval_steps_per_second": 2.616, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.500108002592062e-05, |
|
"loss": 6.1141, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.15413779661230084, |
|
"eval_loss": 6.167110919952393, |
|
"eval_runtime": 239.3956, |
|
"eval_samples_per_second": 20.886, |
|
"eval_steps_per_second": 2.611, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 4.490107762586302e-05, |
|
"loss": 6.0804, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"eval_accuracy": 0.15433432565732236, |
|
"eval_loss": 6.159796714782715, |
|
"eval_runtime": 238.255, |
|
"eval_samples_per_second": 20.986, |
|
"eval_steps_per_second": 2.623, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 4.480107522580542e-05, |
|
"loss": 6.0756, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"eval_accuracy": 0.15528530161534182, |
|
"eval_loss": 6.153306484222412, |
|
"eval_runtime": 239.5862, |
|
"eval_samples_per_second": 20.869, |
|
"eval_steps_per_second": 2.609, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 4.470107282574782e-05, |
|
"loss": 6.0657, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"eval_accuracy": 0.1552249733339224, |
|
"eval_loss": 6.150942802429199, |
|
"eval_runtime": 238.6186, |
|
"eval_samples_per_second": 20.954, |
|
"eval_steps_per_second": 2.619, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 4.460107042569022e-05, |
|
"loss": 6.062, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"eval_accuracy": 0.1587447574438766, |
|
"eval_loss": 6.111793041229248, |
|
"eval_runtime": 240.3225, |
|
"eval_samples_per_second": 20.805, |
|
"eval_steps_per_second": 2.601, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.450106802563262e-05, |
|
"loss": 6.051, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.15917448452711824, |
|
"eval_loss": 6.102757930755615, |
|
"eval_runtime": 238.5988, |
|
"eval_samples_per_second": 20.956, |
|
"eval_steps_per_second": 2.619, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.440106562557502e-05, |
|
"loss": 5.9972, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"eval_accuracy": 0.1635643501862021, |
|
"eval_loss": 6.075224876403809, |
|
"eval_runtime": 239.5936, |
|
"eval_samples_per_second": 20.869, |
|
"eval_steps_per_second": 2.609, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 4.4301063225517413e-05, |
|
"loss": 5.9992, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"eval_accuracy": 0.16585055162650203, |
|
"eval_loss": 6.051377296447754, |
|
"eval_runtime": 238.4053, |
|
"eval_samples_per_second": 20.973, |
|
"eval_steps_per_second": 2.622, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 4.4201060825459814e-05, |
|
"loss": 5.9786, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"eval_accuracy": 0.16999095102797587, |
|
"eval_loss": 6.0217790603637695, |
|
"eval_runtime": 239.6751, |
|
"eval_samples_per_second": 20.862, |
|
"eval_steps_per_second": 2.608, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 4.4101058425402215e-05, |
|
"loss": 5.9425, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"eval_accuracy": 0.1729126550711828, |
|
"eval_loss": 5.975002288818359, |
|
"eval_runtime": 238.6151, |
|
"eval_samples_per_second": 20.954, |
|
"eval_steps_per_second": 2.619, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.400105602534461e-05, |
|
"loss": 5.8962, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.17712782712782713, |
|
"eval_loss": 5.941808700561523, |
|
"eval_runtime": 239.9021, |
|
"eval_samples_per_second": 20.842, |
|
"eval_steps_per_second": 2.605, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 4.390105362528701e-05, |
|
"loss": 5.8435, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"eval_accuracy": 0.1786500285756687, |
|
"eval_loss": 5.892694473266602, |
|
"eval_runtime": 238.1492, |
|
"eval_samples_per_second": 20.995, |
|
"eval_steps_per_second": 2.624, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 4.3801051225229404e-05, |
|
"loss": 5.8062, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"eval_accuracy": 0.1884558398438117, |
|
"eval_loss": 5.821302890777588, |
|
"eval_runtime": 238.0587, |
|
"eval_samples_per_second": 21.003, |
|
"eval_steps_per_second": 2.625, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 4.370104882517181e-05, |
|
"loss": 5.7915, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"eval_accuracy": 0.194766964409799, |
|
"eval_loss": 5.766781330108643, |
|
"eval_runtime": 238.7498, |
|
"eval_samples_per_second": 20.942, |
|
"eval_steps_per_second": 2.618, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 4.3601046425114205e-05, |
|
"loss": 5.7237, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_accuracy": 0.2000967307534251, |
|
"eval_loss": 5.707735538482666, |
|
"eval_runtime": 238.212, |
|
"eval_samples_per_second": 20.99, |
|
"eval_steps_per_second": 2.624, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 4.3501044025056606e-05, |
|
"loss": 5.6816, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.2074816899981318, |
|
"eval_loss": 5.6437764167785645, |
|
"eval_runtime": 238.3771, |
|
"eval_samples_per_second": 20.975, |
|
"eval_steps_per_second": 2.622, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 4.3401041624999e-05, |
|
"loss": 5.5966, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"eval_accuracy": 0.21245663568914436, |
|
"eval_loss": 5.579566478729248, |
|
"eval_runtime": 238.7052, |
|
"eval_samples_per_second": 20.946, |
|
"eval_steps_per_second": 2.618, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 4.33010392249414e-05, |
|
"loss": 5.5319, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"eval_accuracy": 0.2188389812564588, |
|
"eval_loss": 5.523879528045654, |
|
"eval_runtime": 238.3135, |
|
"eval_samples_per_second": 20.981, |
|
"eval_steps_per_second": 2.623, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 4.32010368248838e-05, |
|
"loss": 5.4744, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"eval_accuracy": 0.22693598869200868, |
|
"eval_loss": 5.4436564445495605, |
|
"eval_runtime": 239.2172, |
|
"eval_samples_per_second": 20.902, |
|
"eval_steps_per_second": 2.613, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 4.3101034424826196e-05, |
|
"loss": 5.4245, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"eval_accuracy": 0.23376848720407853, |
|
"eval_loss": 5.3778486251831055, |
|
"eval_runtime": 237.8545, |
|
"eval_samples_per_second": 21.021, |
|
"eval_steps_per_second": 2.628, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 4.3001032024768597e-05, |
|
"loss": 5.3751, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.23909228078814623, |
|
"eval_loss": 5.325531482696533, |
|
"eval_runtime": 239.3036, |
|
"eval_samples_per_second": 20.894, |
|
"eval_steps_per_second": 2.612, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 4.2901029624711e-05, |
|
"loss": 5.2751, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"eval_accuracy": 0.24685205362641527, |
|
"eval_loss": 5.252104759216309, |
|
"eval_runtime": 238.2439, |
|
"eval_samples_per_second": 20.987, |
|
"eval_steps_per_second": 2.623, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 4.28010272246534e-05, |
|
"loss": 5.2219, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"eval_accuracy": 0.2519453843197298, |
|
"eval_loss": 5.184392929077148, |
|
"eval_runtime": 238.9412, |
|
"eval_samples_per_second": 20.926, |
|
"eval_steps_per_second": 2.616, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"learning_rate": 4.270102482459579e-05, |
|
"loss": 5.1513, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"eval_accuracy": 0.25571065989847713, |
|
"eval_loss": 5.144058704376221, |
|
"eval_runtime": 239.1692, |
|
"eval_samples_per_second": 20.906, |
|
"eval_steps_per_second": 2.613, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 4.2601022424538186e-05, |
|
"loss": 5.1198, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"eval_accuracy": 0.26201602768533194, |
|
"eval_loss": 5.08328914642334, |
|
"eval_runtime": 239.2967, |
|
"eval_samples_per_second": 20.895, |
|
"eval_steps_per_second": 2.612, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 4.2501020024480594e-05, |
|
"loss": 5.0865, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.26588186321967033, |
|
"eval_loss": 5.031526565551758, |
|
"eval_runtime": 239.4214, |
|
"eval_samples_per_second": 20.884, |
|
"eval_steps_per_second": 2.61, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 4.240101762442299e-05, |
|
"loss": 5.0009, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"eval_accuracy": 0.27209091683735137, |
|
"eval_loss": 4.977504730224609, |
|
"eval_runtime": 238.005, |
|
"eval_samples_per_second": 21.008, |
|
"eval_steps_per_second": 2.626, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 4.230101522436539e-05, |
|
"loss": 4.9533, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"eval_accuracy": 0.2761229329330592, |
|
"eval_loss": 4.92480993270874, |
|
"eval_runtime": 239.2788, |
|
"eval_samples_per_second": 20.896, |
|
"eval_steps_per_second": 2.612, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 4.220101282430778e-05, |
|
"loss": 4.8984, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"eval_accuracy": 0.284984501086343, |
|
"eval_loss": 4.843219757080078, |
|
"eval_runtime": 238.6717, |
|
"eval_samples_per_second": 20.949, |
|
"eval_steps_per_second": 2.619, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 4.210101042425018e-05, |
|
"loss": 4.8225, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"eval_accuracy": 0.2922410149325292, |
|
"eval_loss": 4.7489118576049805, |
|
"eval_runtime": 238.6056, |
|
"eval_samples_per_second": 20.955, |
|
"eval_steps_per_second": 2.619, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.2001008024192584e-05, |
|
"loss": 4.7467, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.3021739816363235, |
|
"eval_loss": 4.653831481933594, |
|
"eval_runtime": 238.1775, |
|
"eval_samples_per_second": 20.993, |
|
"eval_steps_per_second": 2.624, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 4.190100562413498e-05, |
|
"loss": 4.6515, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"eval_accuracy": 0.3053980176966654, |
|
"eval_loss": 4.602191925048828, |
|
"eval_runtime": 238.6405, |
|
"eval_samples_per_second": 20.952, |
|
"eval_steps_per_second": 2.619, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 4.180100322407738e-05, |
|
"loss": 4.5913, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"eval_accuracy": 0.31172282596673734, |
|
"eval_loss": 4.532454967498779, |
|
"eval_runtime": 238.4758, |
|
"eval_samples_per_second": 20.966, |
|
"eval_steps_per_second": 2.621, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 16.6, |
|
"learning_rate": 4.170100082401977e-05, |
|
"loss": 4.532, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 16.6, |
|
"eval_accuracy": 0.3172949714616906, |
|
"eval_loss": 4.486767292022705, |
|
"eval_runtime": 238.5426, |
|
"eval_samples_per_second": 20.961, |
|
"eval_steps_per_second": 2.62, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 4.160099842396218e-05, |
|
"loss": 4.4654, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"eval_accuracy": 0.32319931360364396, |
|
"eval_loss": 4.4189629554748535, |
|
"eval_runtime": 238.5096, |
|
"eval_samples_per_second": 20.964, |
|
"eval_steps_per_second": 2.62, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 4.1500996023904575e-05, |
|
"loss": 4.4093, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.3282410682702511, |
|
"eval_loss": 4.374806880950928, |
|
"eval_runtime": 238.2081, |
|
"eval_samples_per_second": 20.99, |
|
"eval_steps_per_second": 2.624, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 4.1400993623846975e-05, |
|
"loss": 4.3464, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"eval_accuracy": 0.33113666533184927, |
|
"eval_loss": 4.329699516296387, |
|
"eval_runtime": 238.6019, |
|
"eval_samples_per_second": 20.955, |
|
"eval_steps_per_second": 2.619, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 17.4, |
|
"learning_rate": 4.130099122378937e-05, |
|
"loss": 4.3109, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 17.4, |
|
"eval_accuracy": 0.33443005394851116, |
|
"eval_loss": 4.298059463500977, |
|
"eval_runtime": 239.4997, |
|
"eval_samples_per_second": 20.877, |
|
"eval_steps_per_second": 2.61, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 4.120098882373177e-05, |
|
"loss": 4.2486, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"eval_accuracy": 0.33968982981534496, |
|
"eval_loss": 4.248338222503662, |
|
"eval_runtime": 238.9263, |
|
"eval_samples_per_second": 20.927, |
|
"eval_steps_per_second": 2.616, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 4.110098642367417e-05, |
|
"loss": 4.2214, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"eval_accuracy": 0.3430102865749281, |
|
"eval_loss": 4.211354732513428, |
|
"eval_runtime": 239.8092, |
|
"eval_samples_per_second": 20.85, |
|
"eval_steps_per_second": 2.606, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 4.1000984023616565e-05, |
|
"loss": 4.1774, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.3472969219503573, |
|
"eval_loss": 4.166539669036865, |
|
"eval_runtime": 238.982, |
|
"eval_samples_per_second": 20.922, |
|
"eval_steps_per_second": 2.615, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"learning_rate": 4.0900981623558966e-05, |
|
"loss": 4.1424, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"eval_accuracy": 0.34982222082376263, |
|
"eval_loss": 4.140753269195557, |
|
"eval_runtime": 239.4629, |
|
"eval_samples_per_second": 20.88, |
|
"eval_steps_per_second": 2.61, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 4.0800979223501366e-05, |
|
"loss": 4.1067, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"eval_accuracy": 0.3526568981561805, |
|
"eval_loss": 4.114173412322998, |
|
"eval_runtime": 239.2864, |
|
"eval_samples_per_second": 20.895, |
|
"eval_steps_per_second": 2.612, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 4.070097682344377e-05, |
|
"loss": 4.0837, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"eval_accuracy": 0.3564153178049743, |
|
"eval_loss": 4.076625823974609, |
|
"eval_runtime": 238.644, |
|
"eval_samples_per_second": 20.952, |
|
"eval_steps_per_second": 2.619, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 4.060097442338616e-05, |
|
"loss": 4.0442, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"eval_accuracy": 0.35937041559006977, |
|
"eval_loss": 4.049180030822754, |
|
"eval_runtime": 238.9064, |
|
"eval_samples_per_second": 20.929, |
|
"eval_steps_per_second": 2.616, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 4.050097202332856e-05, |
|
"loss": 4.0248, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.3630438412177381, |
|
"eval_loss": 4.01298189163208, |
|
"eval_runtime": 239.1093, |
|
"eval_samples_per_second": 20.911, |
|
"eval_steps_per_second": 2.614, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 4.040096962327096e-05, |
|
"loss": 3.9663, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"eval_accuracy": 0.3619489778849462, |
|
"eval_loss": 4.020573616027832, |
|
"eval_runtime": 239.7356, |
|
"eval_samples_per_second": 20.856, |
|
"eval_steps_per_second": 2.607, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"learning_rate": 4.030096722321336e-05, |
|
"loss": 3.9401, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"eval_accuracy": 0.3671375300214891, |
|
"eval_loss": 3.970278024673462, |
|
"eval_runtime": 238.9689, |
|
"eval_samples_per_second": 20.923, |
|
"eval_steps_per_second": 2.615, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 4.020096482315576e-05, |
|
"loss": 3.9246, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"eval_accuracy": 0.3694057226705796, |
|
"eval_loss": 3.955792188644409, |
|
"eval_runtime": 237.9835, |
|
"eval_samples_per_second": 21.01, |
|
"eval_steps_per_second": 2.626, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 19.8, |
|
"learning_rate": 4.010096242309815e-05, |
|
"loss": 3.9172, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 19.8, |
|
"eval_accuracy": 0.3719208481724609, |
|
"eval_loss": 3.9305856227874756, |
|
"eval_runtime": 239.3028, |
|
"eval_samples_per_second": 20.894, |
|
"eval_steps_per_second": 2.612, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 4.000096002304056e-05, |
|
"loss": 3.8908, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.3750794239951685, |
|
"eval_loss": 3.898867130279541, |
|
"eval_runtime": 239.4161, |
|
"eval_samples_per_second": 20.884, |
|
"eval_steps_per_second": 2.611, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"learning_rate": 3.990095762298295e-05, |
|
"loss": 3.8366, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"eval_accuracy": 0.37685135603364095, |
|
"eval_loss": 3.877788782119751, |
|
"eval_runtime": 239.074, |
|
"eval_samples_per_second": 20.914, |
|
"eval_steps_per_second": 2.614, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"learning_rate": 3.9800955222925354e-05, |
|
"loss": 3.8279, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"eval_accuracy": 0.37645562990796916, |
|
"eval_loss": 3.8702280521392822, |
|
"eval_runtime": 239.2592, |
|
"eval_samples_per_second": 20.898, |
|
"eval_steps_per_second": 2.612, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 20.6, |
|
"learning_rate": 3.970095282286775e-05, |
|
"loss": 3.7992, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 20.6, |
|
"eval_accuracy": 0.38063372045240457, |
|
"eval_loss": 3.827329397201538, |
|
"eval_runtime": 239.4288, |
|
"eval_samples_per_second": 20.883, |
|
"eval_steps_per_second": 2.61, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 3.960095042281015e-05, |
|
"loss": 3.7949, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"eval_accuracy": 0.3803827148043566, |
|
"eval_loss": 3.829620599746704, |
|
"eval_runtime": 239.6513, |
|
"eval_samples_per_second": 20.864, |
|
"eval_steps_per_second": 2.608, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 3.950094802275255e-05, |
|
"loss": 3.7669, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.38461295677917423, |
|
"eval_loss": 3.788926601409912, |
|
"eval_runtime": 240.0632, |
|
"eval_samples_per_second": 20.828, |
|
"eval_steps_per_second": 2.603, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"learning_rate": 3.9400945622694944e-05, |
|
"loss": 3.7173, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"eval_accuracy": 0.3850090819647317, |
|
"eval_loss": 3.7976882457733154, |
|
"eval_runtime": 239.5627, |
|
"eval_samples_per_second": 20.871, |
|
"eval_steps_per_second": 2.609, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 21.4, |
|
"learning_rate": 3.9300943222637344e-05, |
|
"loss": 3.7073, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 21.4, |
|
"eval_accuracy": 0.38859488849548585, |
|
"eval_loss": 3.7608718872070312, |
|
"eval_runtime": 238.4323, |
|
"eval_samples_per_second": 20.97, |
|
"eval_steps_per_second": 2.621, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 3.9200940822579745e-05, |
|
"loss": 3.7031, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"eval_accuracy": 0.39072964788576736, |
|
"eval_loss": 3.738675832748413, |
|
"eval_runtime": 239.1766, |
|
"eval_samples_per_second": 20.905, |
|
"eval_steps_per_second": 2.613, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 21.8, |
|
"learning_rate": 3.9100938422522146e-05, |
|
"loss": 3.678, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 21.8, |
|
"eval_accuracy": 0.3919720448368493, |
|
"eval_loss": 3.7181637287139893, |
|
"eval_runtime": 238.4998, |
|
"eval_samples_per_second": 20.964, |
|
"eval_steps_per_second": 2.621, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 3.900093602246454e-05, |
|
"loss": 3.6664, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.39393000929080213, |
|
"eval_loss": 3.6974339485168457, |
|
"eval_runtime": 239.9697, |
|
"eval_samples_per_second": 20.836, |
|
"eval_steps_per_second": 2.604, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 22.2, |
|
"learning_rate": 3.890093362240694e-05, |
|
"loss": 3.63, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 22.2, |
|
"eval_accuracy": 0.395838777829905, |
|
"eval_loss": 3.683786630630493, |
|
"eval_runtime": 238.6856, |
|
"eval_samples_per_second": 20.948, |
|
"eval_steps_per_second": 2.619, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 3.880093122234934e-05, |
|
"loss": 3.6072, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"eval_accuracy": 0.39780745729365696, |
|
"eval_loss": 3.668595314025879, |
|
"eval_runtime": 238.9371, |
|
"eval_samples_per_second": 20.926, |
|
"eval_steps_per_second": 2.616, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 22.6, |
|
"learning_rate": 3.8700928822291736e-05, |
|
"loss": 3.5878, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 22.6, |
|
"eval_accuracy": 0.4004617930964775, |
|
"eval_loss": 3.6446495056152344, |
|
"eval_runtime": 238.383, |
|
"eval_samples_per_second": 20.975, |
|
"eval_steps_per_second": 2.622, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"learning_rate": 3.8600926422234136e-05, |
|
"loss": 3.5748, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"eval_accuracy": 0.4013079441994284, |
|
"eval_loss": 3.633631706237793, |
|
"eval_runtime": 239.4332, |
|
"eval_samples_per_second": 20.883, |
|
"eval_steps_per_second": 2.61, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 3.850092402217653e-05, |
|
"loss": 3.5668, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.4023029638942638, |
|
"eval_loss": 3.618192672729492, |
|
"eval_runtime": 240.1475, |
|
"eval_samples_per_second": 20.821, |
|
"eval_steps_per_second": 2.603, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"learning_rate": 3.840092162211894e-05, |
|
"loss": 3.508, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"eval_accuracy": 0.40511161946832847, |
|
"eval_loss": 3.5944156646728516, |
|
"eval_runtime": 240.7097, |
|
"eval_samples_per_second": 20.772, |
|
"eval_steps_per_second": 2.596, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 23.4, |
|
"learning_rate": 3.830091922206133e-05, |
|
"loss": 3.5188, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 23.4, |
|
"eval_accuracy": 0.4101088779063732, |
|
"eval_loss": 3.5640573501586914, |
|
"eval_runtime": 238.4535, |
|
"eval_samples_per_second": 20.968, |
|
"eval_steps_per_second": 2.621, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"learning_rate": 3.820091682200373e-05, |
|
"loss": 3.4902, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"eval_accuracy": 0.41012237850373057, |
|
"eval_loss": 3.5568885803222656, |
|
"eval_runtime": 239.3958, |
|
"eval_samples_per_second": 20.886, |
|
"eval_steps_per_second": 2.611, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 23.8, |
|
"learning_rate": 3.810091442194613e-05, |
|
"loss": 3.4667, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 23.8, |
|
"eval_accuracy": 0.41220216515450525, |
|
"eval_loss": 3.5408358573913574, |
|
"eval_runtime": 238.5286, |
|
"eval_samples_per_second": 20.962, |
|
"eval_steps_per_second": 2.62, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 3.800091202188853e-05, |
|
"loss": 3.4734, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.4146663257801724, |
|
"eval_loss": 3.5163965225219727, |
|
"eval_runtime": 240.0735, |
|
"eval_samples_per_second": 20.827, |
|
"eval_steps_per_second": 2.603, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 24.2, |
|
"learning_rate": 3.790090962183093e-05, |
|
"loss": 3.4178, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 24.2, |
|
"eval_accuracy": 0.4166579906298803, |
|
"eval_loss": 3.5096375942230225, |
|
"eval_runtime": 239.0893, |
|
"eval_samples_per_second": 20.913, |
|
"eval_steps_per_second": 2.614, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 24.4, |
|
"learning_rate": 3.780090722177332e-05, |
|
"loss": 3.4139, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 24.4, |
|
"eval_accuracy": 0.41929409690187325, |
|
"eval_loss": 3.489515542984009, |
|
"eval_runtime": 240.9991, |
|
"eval_samples_per_second": 20.747, |
|
"eval_steps_per_second": 2.593, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 24.6, |
|
"learning_rate": 3.770090482171572e-05, |
|
"loss": 3.4161, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 24.6, |
|
"eval_accuracy": 0.42010133427676133, |
|
"eval_loss": 3.475292682647705, |
|
"eval_runtime": 238.9304, |
|
"eval_samples_per_second": 20.927, |
|
"eval_steps_per_second": 2.616, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"learning_rate": 3.7600902421658124e-05, |
|
"loss": 3.3962, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"eval_accuracy": 0.4221688310052524, |
|
"eval_loss": 3.4501795768737793, |
|
"eval_runtime": 240.1731, |
|
"eval_samples_per_second": 20.818, |
|
"eval_steps_per_second": 2.602, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 3.750090002160052e-05, |
|
"loss": 3.3857, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.42396696829715874, |
|
"eval_loss": 3.4505577087402344, |
|
"eval_runtime": 239.537, |
|
"eval_samples_per_second": 20.874, |
|
"eval_steps_per_second": 2.609, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"learning_rate": 3.740089762154292e-05, |
|
"loss": 3.3437, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"eval_accuracy": 0.42821535707983566, |
|
"eval_loss": 3.4162397384643555, |
|
"eval_runtime": 239.3465, |
|
"eval_samples_per_second": 20.89, |
|
"eval_steps_per_second": 2.611, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 25.4, |
|
"learning_rate": 3.730089522148531e-05, |
|
"loss": 3.3282, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 25.4, |
|
"eval_accuracy": 0.42975245728349876, |
|
"eval_loss": 3.401306629180908, |
|
"eval_runtime": 238.9024, |
|
"eval_samples_per_second": 20.929, |
|
"eval_steps_per_second": 2.616, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 3.720089282142772e-05, |
|
"loss": 3.3285, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"eval_accuracy": 0.4319420899272967, |
|
"eval_loss": 3.3905553817749023, |
|
"eval_runtime": 240.2665, |
|
"eval_samples_per_second": 20.81, |
|
"eval_steps_per_second": 2.601, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 25.8, |
|
"learning_rate": 3.7100890421370114e-05, |
|
"loss": 3.3161, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 25.8, |
|
"eval_accuracy": 0.4342018381086879, |
|
"eval_loss": 3.365351915359497, |
|
"eval_runtime": 239.2968, |
|
"eval_samples_per_second": 20.895, |
|
"eval_steps_per_second": 2.612, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 3.7000888021312515e-05, |
|
"loss": 3.3026, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.434839985763914, |
|
"eval_loss": 3.3654284477233887, |
|
"eval_runtime": 239.0653, |
|
"eval_samples_per_second": 20.915, |
|
"eval_steps_per_second": 2.614, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 26.2, |
|
"learning_rate": 3.690088562125491e-05, |
|
"loss": 3.2554, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 26.2, |
|
"eval_accuracy": 0.43755692455598844, |
|
"eval_loss": 3.345842123031616, |
|
"eval_runtime": 239.4412, |
|
"eval_samples_per_second": 20.882, |
|
"eval_steps_per_second": 2.61, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"learning_rate": 3.680088322119731e-05, |
|
"loss": 3.2426, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"eval_accuracy": 0.4402917592198749, |
|
"eval_loss": 3.334620237350464, |
|
"eval_runtime": 239.2321, |
|
"eval_samples_per_second": 20.9, |
|
"eval_steps_per_second": 2.613, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 26.6, |
|
"learning_rate": 3.670088082113971e-05, |
|
"loss": 3.2419, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 26.6, |
|
"eval_accuracy": 0.4414129103148642, |
|
"eval_loss": 3.3168656826019287, |
|
"eval_runtime": 238.7655, |
|
"eval_samples_per_second": 20.941, |
|
"eval_steps_per_second": 2.618, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"learning_rate": 3.6600878421082105e-05, |
|
"loss": 3.2503, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"eval_accuracy": 0.4437969829978306, |
|
"eval_loss": 3.299358606338501, |
|
"eval_runtime": 240.0903, |
|
"eval_samples_per_second": 20.825, |
|
"eval_steps_per_second": 2.603, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 3.6500876021024505e-05, |
|
"loss": 3.234, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.4456840349724567, |
|
"eval_loss": 3.290809392929077, |
|
"eval_runtime": 239.1298, |
|
"eval_samples_per_second": 20.909, |
|
"eval_steps_per_second": 2.614, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 3.6400873620966906e-05, |
|
"loss": 3.1794, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"eval_accuracy": 0.4489755289834922, |
|
"eval_loss": 3.273737907409668, |
|
"eval_runtime": 239.1064, |
|
"eval_samples_per_second": 20.911, |
|
"eval_steps_per_second": 2.614, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 27.4, |
|
"learning_rate": 3.630087122090931e-05, |
|
"loss": 3.1827, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 27.4, |
|
"eval_accuracy": 0.44896031733539016, |
|
"eval_loss": 3.2674996852874756, |
|
"eval_runtime": 238.9338, |
|
"eval_samples_per_second": 20.926, |
|
"eval_steps_per_second": 2.616, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 27.6, |
|
"learning_rate": 3.62008688208517e-05, |
|
"loss": 3.1748, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 27.6, |
|
"eval_accuracy": 0.4508367080874434, |
|
"eval_loss": 3.253206491470337, |
|
"eval_runtime": 239.5701, |
|
"eval_samples_per_second": 20.871, |
|
"eval_steps_per_second": 2.609, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 27.8, |
|
"learning_rate": 3.61008664207941e-05, |
|
"loss": 3.1795, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 27.8, |
|
"eval_accuracy": 0.4520478699877418, |
|
"eval_loss": 3.2528598308563232, |
|
"eval_runtime": 238.6747, |
|
"eval_samples_per_second": 20.949, |
|
"eval_steps_per_second": 2.619, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 3.6000864020736496e-05, |
|
"loss": 3.1664, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.45429478372104043, |
|
"eval_loss": 3.2276272773742676, |
|
"eval_runtime": 239.9976, |
|
"eval_samples_per_second": 20.834, |
|
"eval_steps_per_second": 2.604, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 28.2, |
|
"learning_rate": 3.59008616206789e-05, |
|
"loss": 3.0937, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 28.2, |
|
"eval_accuracy": 0.45732565941802994, |
|
"eval_loss": 3.2136926651000977, |
|
"eval_runtime": 238.8553, |
|
"eval_samples_per_second": 20.933, |
|
"eval_steps_per_second": 2.617, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"learning_rate": 3.58008592206213e-05, |
|
"loss": 3.1302, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"eval_accuracy": 0.45828199538748304, |
|
"eval_loss": 3.2121808528900146, |
|
"eval_runtime": 240.2268, |
|
"eval_samples_per_second": 20.814, |
|
"eval_steps_per_second": 2.602, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 28.6, |
|
"learning_rate": 3.570085682056369e-05, |
|
"loss": 3.1121, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 28.6, |
|
"eval_accuracy": 0.45979415210572067, |
|
"eval_loss": 3.2013251781463623, |
|
"eval_runtime": 239.2805, |
|
"eval_samples_per_second": 20.896, |
|
"eval_steps_per_second": 2.612, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 3.560085442050609e-05, |
|
"loss": 3.1117, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"eval_accuracy": 0.46116117510718196, |
|
"eval_loss": 3.1767895221710205, |
|
"eval_runtime": 240.4898, |
|
"eval_samples_per_second": 20.791, |
|
"eval_steps_per_second": 2.599, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 3.550085202044849e-05, |
|
"loss": 3.0967, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.46315271898609695, |
|
"eval_loss": 3.175037384033203, |
|
"eval_runtime": 239.5716, |
|
"eval_samples_per_second": 20.871, |
|
"eval_steps_per_second": 2.609, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"learning_rate": 3.5400849620390894e-05, |
|
"loss": 3.0433, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"eval_accuracy": 0.46477897932081197, |
|
"eval_loss": 3.1635282039642334, |
|
"eval_runtime": 239.7456, |
|
"eval_samples_per_second": 20.855, |
|
"eval_steps_per_second": 2.607, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 29.4, |
|
"learning_rate": 3.530084722033329e-05, |
|
"loss": 3.0561, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 29.4, |
|
"eval_accuracy": 0.46795523906408953, |
|
"eval_loss": 3.1412038803100586, |
|
"eval_runtime": 238.8196, |
|
"eval_samples_per_second": 20.936, |
|
"eval_steps_per_second": 2.617, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 29.6, |
|
"learning_rate": 3.520084482027569e-05, |
|
"loss": 3.0527, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 29.6, |
|
"eval_accuracy": 0.46849550206201507, |
|
"eval_loss": 3.1406843662261963, |
|
"eval_runtime": 239.7188, |
|
"eval_samples_per_second": 20.858, |
|
"eval_steps_per_second": 2.607, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 29.8, |
|
"learning_rate": 3.510084242021809e-05, |
|
"loss": 3.0449, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 29.8, |
|
"eval_accuracy": 0.4677367515550262, |
|
"eval_loss": 3.13971209526062, |
|
"eval_runtime": 239.4737, |
|
"eval_samples_per_second": 20.879, |
|
"eval_steps_per_second": 2.61, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 3.500084002016048e-05, |
|
"loss": 3.0446, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.47304644334291035, |
|
"eval_loss": 3.10322642326355, |
|
"eval_runtime": 238.8991, |
|
"eval_samples_per_second": 20.929, |
|
"eval_steps_per_second": 2.616, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 30.2, |
|
"learning_rate": 3.4900837620102884e-05, |
|
"loss": 3.0095, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 30.2, |
|
"eval_accuracy": 0.4724041544924169, |
|
"eval_loss": 3.103961706161499, |
|
"eval_runtime": 240.0302, |
|
"eval_samples_per_second": 20.831, |
|
"eval_steps_per_second": 2.604, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 3.480083522004528e-05, |
|
"loss": 2.9953, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"eval_accuracy": 0.475397151249092, |
|
"eval_loss": 3.088655948638916, |
|
"eval_runtime": 238.9923, |
|
"eval_samples_per_second": 20.921, |
|
"eval_steps_per_second": 2.615, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 30.6, |
|
"learning_rate": 3.4700832819987686e-05, |
|
"loss": 2.9744, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 30.6, |
|
"eval_accuracy": 0.4752069375763294, |
|
"eval_loss": 3.0858309268951416, |
|
"eval_runtime": 239.2927, |
|
"eval_samples_per_second": 20.895, |
|
"eval_steps_per_second": 2.612, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 30.8, |
|
"learning_rate": 3.460083041993008e-05, |
|
"loss": 2.9941, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 30.8, |
|
"eval_accuracy": 0.4753717486767733, |
|
"eval_loss": 3.087771415710449, |
|
"eval_runtime": 239.4588, |
|
"eval_samples_per_second": 20.88, |
|
"eval_steps_per_second": 2.61, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 3.450082801987248e-05, |
|
"loss": 2.982, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.47787563198971167, |
|
"eval_loss": 3.0625386238098145, |
|
"eval_runtime": 239.7845, |
|
"eval_samples_per_second": 20.852, |
|
"eval_steps_per_second": 2.607, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 31.2, |
|
"learning_rate": 3.4400825619814875e-05, |
|
"loss": 2.9441, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 31.2, |
|
"eval_accuracy": 0.4781303466693604, |
|
"eval_loss": 3.0685055255889893, |
|
"eval_runtime": 239.71, |
|
"eval_samples_per_second": 20.859, |
|
"eval_steps_per_second": 2.607, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 31.4, |
|
"learning_rate": 3.4300823219757275e-05, |
|
"loss": 2.954, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 31.4, |
|
"eval_accuracy": 0.48178897637795276, |
|
"eval_loss": 3.048638105392456, |
|
"eval_runtime": 239.3662, |
|
"eval_samples_per_second": 20.888, |
|
"eval_steps_per_second": 2.611, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 31.6, |
|
"learning_rate": 3.4200820819699676e-05, |
|
"loss": 2.9435, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 31.6, |
|
"eval_accuracy": 0.4798843674965247, |
|
"eval_loss": 3.0472371578216553, |
|
"eval_runtime": 239.0072, |
|
"eval_samples_per_second": 20.92, |
|
"eval_steps_per_second": 2.615, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 31.8, |
|
"learning_rate": 3.410081841964207e-05, |
|
"loss": 2.9319, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 31.8, |
|
"eval_accuracy": 0.48184346093459357, |
|
"eval_loss": 3.0237534046173096, |
|
"eval_runtime": 239.7344, |
|
"eval_samples_per_second": 20.856, |
|
"eval_steps_per_second": 2.607, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 3.400081601958447e-05, |
|
"loss": 2.9331, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.48462323691055215, |
|
"eval_loss": 3.0231850147247314, |
|
"eval_runtime": 238.7423, |
|
"eval_samples_per_second": 20.943, |
|
"eval_steps_per_second": 2.618, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 32.2, |
|
"learning_rate": 3.390081361952687e-05, |
|
"loss": 2.9014, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 32.2, |
|
"eval_accuracy": 0.48449900049580463, |
|
"eval_loss": 3.018786907196045, |
|
"eval_runtime": 238.4212, |
|
"eval_samples_per_second": 20.971, |
|
"eval_steps_per_second": 2.621, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 32.4, |
|
"learning_rate": 3.380081121946927e-05, |
|
"loss": 2.8917, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 32.4, |
|
"eval_accuracy": 0.4857029500949358, |
|
"eval_loss": 3.0059878826141357, |
|
"eval_runtime": 239.3143, |
|
"eval_samples_per_second": 20.893, |
|
"eval_steps_per_second": 2.612, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 32.6, |
|
"learning_rate": 3.3700808819411667e-05, |
|
"loss": 2.8968, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 32.6, |
|
"eval_accuracy": 0.4856941143840495, |
|
"eval_loss": 3.0099101066589355, |
|
"eval_runtime": 238.6193, |
|
"eval_samples_per_second": 20.954, |
|
"eval_steps_per_second": 2.619, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"learning_rate": 3.360080641935407e-05, |
|
"loss": 2.8961, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"eval_accuracy": 0.4874124592319167, |
|
"eval_loss": 2.9975407123565674, |
|
"eval_runtime": 239.4937, |
|
"eval_samples_per_second": 20.877, |
|
"eval_steps_per_second": 2.61, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 3.350080401929647e-05, |
|
"loss": 2.8857, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.488242813118945, |
|
"eval_loss": 2.9877071380615234, |
|
"eval_runtime": 238.7976, |
|
"eval_samples_per_second": 20.938, |
|
"eval_steps_per_second": 2.617, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 33.2, |
|
"learning_rate": 3.340080161923886e-05, |
|
"loss": 2.8631, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 33.2, |
|
"eval_accuracy": 0.49157931890808476, |
|
"eval_loss": 2.9715187549591064, |
|
"eval_runtime": 238.9736, |
|
"eval_samples_per_second": 20.923, |
|
"eval_steps_per_second": 2.615, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 33.4, |
|
"learning_rate": 3.330079921918126e-05, |
|
"loss": 2.8642, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 33.4, |
|
"eval_accuracy": 0.49055765035247234, |
|
"eval_loss": 2.9709725379943848, |
|
"eval_runtime": 239.361, |
|
"eval_samples_per_second": 20.889, |
|
"eval_steps_per_second": 2.611, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"learning_rate": 3.320079681912366e-05, |
|
"loss": 2.859, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"eval_accuracy": 0.4905766366229897, |
|
"eval_loss": 2.974759101867676, |
|
"eval_runtime": 239.3057, |
|
"eval_samples_per_second": 20.894, |
|
"eval_steps_per_second": 2.612, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 33.8, |
|
"learning_rate": 3.3100794419066064e-05, |
|
"loss": 2.8308, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 33.8, |
|
"eval_accuracy": 0.4920043569545519, |
|
"eval_loss": 2.9632480144500732, |
|
"eval_runtime": 239.1871, |
|
"eval_samples_per_second": 20.904, |
|
"eval_steps_per_second": 2.613, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 3.300079201900846e-05, |
|
"loss": 2.8486, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.4936041140507698, |
|
"eval_loss": 2.947859048843384, |
|
"eval_runtime": 239.9603, |
|
"eval_samples_per_second": 20.837, |
|
"eval_steps_per_second": 2.605, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 34.2, |
|
"learning_rate": 3.290078961895085e-05, |
|
"loss": 2.8018, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 34.2, |
|
"eval_accuracy": 0.49392006520647136, |
|
"eval_loss": 2.9504497051239014, |
|
"eval_runtime": 239.0602, |
|
"eval_samples_per_second": 20.915, |
|
"eval_steps_per_second": 2.614, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 34.4, |
|
"learning_rate": 3.280078721889325e-05, |
|
"loss": 2.8078, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 34.4, |
|
"eval_accuracy": 0.49590170397025385, |
|
"eval_loss": 2.939467668533325, |
|
"eval_runtime": 239.5778, |
|
"eval_samples_per_second": 20.87, |
|
"eval_steps_per_second": 2.609, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 34.6, |
|
"learning_rate": 3.2700784818835654e-05, |
|
"loss": 2.8194, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 34.6, |
|
"eval_accuracy": 0.49452836394919286, |
|
"eval_loss": 2.9382665157318115, |
|
"eval_runtime": 239.129, |
|
"eval_samples_per_second": 20.909, |
|
"eval_steps_per_second": 2.614, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 34.8, |
|
"learning_rate": 3.2600782418778055e-05, |
|
"loss": 2.8219, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 34.8, |
|
"eval_accuracy": 0.4967188432146814, |
|
"eval_loss": 2.9314422607421875, |
|
"eval_runtime": 239.8662, |
|
"eval_samples_per_second": 20.845, |
|
"eval_steps_per_second": 2.606, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 3.250078001872045e-05, |
|
"loss": 2.8107, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.4967919542148713, |
|
"eval_loss": 2.9249651432037354, |
|
"eval_runtime": 239.2643, |
|
"eval_samples_per_second": 20.897, |
|
"eval_steps_per_second": 2.612, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"learning_rate": 3.240077761866285e-05, |
|
"loss": 2.7785, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"eval_accuracy": 0.4987307763864494, |
|
"eval_loss": 2.918447256088257, |
|
"eval_runtime": 238.6175, |
|
"eval_samples_per_second": 20.954, |
|
"eval_steps_per_second": 2.619, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"learning_rate": 3.230077521860525e-05, |
|
"loss": 2.7726, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"eval_accuracy": 0.499268800358809, |
|
"eval_loss": 2.9100756645202637, |
|
"eval_runtime": 239.3281, |
|
"eval_samples_per_second": 20.892, |
|
"eval_steps_per_second": 2.611, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 35.6, |
|
"learning_rate": 3.2200772818547644e-05, |
|
"loss": 2.7508, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 35.6, |
|
"eval_accuracy": 0.4988590950922179, |
|
"eval_loss": 2.9117419719696045, |
|
"eval_runtime": 239.6738, |
|
"eval_samples_per_second": 20.862, |
|
"eval_steps_per_second": 2.608, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 35.8, |
|
"learning_rate": 3.2100770418490045e-05, |
|
"loss": 2.7862, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 35.8, |
|
"eval_accuracy": 0.5026493096646942, |
|
"eval_loss": 2.8842239379882812, |
|
"eval_runtime": 239.5089, |
|
"eval_samples_per_second": 20.876, |
|
"eval_steps_per_second": 2.61, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 3.200076801843244e-05, |
|
"loss": 2.7591, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.501588354174561, |
|
"eval_loss": 2.894536018371582, |
|
"eval_runtime": 238.9394, |
|
"eval_samples_per_second": 20.926, |
|
"eval_steps_per_second": 2.616, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 36.2, |
|
"learning_rate": 3.190076561837485e-05, |
|
"loss": 2.737, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 36.2, |
|
"eval_accuracy": 0.5023016269467401, |
|
"eval_loss": 2.872495651245117, |
|
"eval_runtime": 239.9672, |
|
"eval_samples_per_second": 20.836, |
|
"eval_steps_per_second": 2.605, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 36.4, |
|
"learning_rate": 3.180076321831724e-05, |
|
"loss": 2.7336, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 36.4, |
|
"eval_accuracy": 0.5033328195433141, |
|
"eval_loss": 2.881958484649658, |
|
"eval_runtime": 239.4309, |
|
"eval_samples_per_second": 20.883, |
|
"eval_steps_per_second": 2.61, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 36.6, |
|
"learning_rate": 3.170076081825964e-05, |
|
"loss": 2.7505, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 36.6, |
|
"eval_accuracy": 0.5042960942014569, |
|
"eval_loss": 2.8711090087890625, |
|
"eval_runtime": 239.0632, |
|
"eval_samples_per_second": 20.915, |
|
"eval_steps_per_second": 2.614, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"learning_rate": 3.1600758418202036e-05, |
|
"loss": 2.7342, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"eval_accuracy": 0.5062909670999637, |
|
"eval_loss": 2.8555378913879395, |
|
"eval_runtime": 238.9919, |
|
"eval_samples_per_second": 20.921, |
|
"eval_steps_per_second": 2.615, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 3.1500756018144436e-05, |
|
"loss": 2.7344, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.507249352454356, |
|
"eval_loss": 2.843308687210083, |
|
"eval_runtime": 239.4594, |
|
"eval_samples_per_second": 20.88, |
|
"eval_steps_per_second": 2.61, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 37.2, |
|
"learning_rate": 3.140075361808684e-05, |
|
"loss": 2.7074, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 37.2, |
|
"eval_accuracy": 0.5069092076602495, |
|
"eval_loss": 2.843485116958618, |
|
"eval_runtime": 238.9804, |
|
"eval_samples_per_second": 20.922, |
|
"eval_steps_per_second": 2.615, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 37.4, |
|
"learning_rate": 3.130075121802923e-05, |
|
"loss": 2.7078, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 37.4, |
|
"eval_accuracy": 0.5066123820683948, |
|
"eval_loss": 2.8465309143066406, |
|
"eval_runtime": 239.3346, |
|
"eval_samples_per_second": 20.891, |
|
"eval_steps_per_second": 2.611, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 37.6, |
|
"learning_rate": 3.120074881797163e-05, |
|
"loss": 2.708, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 37.6, |
|
"eval_accuracy": 0.5065022031997726, |
|
"eval_loss": 2.8465352058410645, |
|
"eval_runtime": 239.3543, |
|
"eval_samples_per_second": 20.89, |
|
"eval_steps_per_second": 2.611, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 37.8, |
|
"learning_rate": 3.110074641791403e-05, |
|
"loss": 2.6835, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 37.8, |
|
"eval_accuracy": 0.5073458075648071, |
|
"eval_loss": 2.8454036712646484, |
|
"eval_runtime": 238.5106, |
|
"eval_samples_per_second": 20.963, |
|
"eval_steps_per_second": 2.62, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 3.1000744017856434e-05, |
|
"loss": 2.7154, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.5081197929815703, |
|
"eval_loss": 2.8387258052825928, |
|
"eval_runtime": 239.2141, |
|
"eval_samples_per_second": 20.902, |
|
"eval_steps_per_second": 2.613, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 38.2, |
|
"learning_rate": 3.090074161779883e-05, |
|
"loss": 2.6777, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 38.2, |
|
"eval_accuracy": 0.5092756865772764, |
|
"eval_loss": 2.83198618888855, |
|
"eval_runtime": 239.2553, |
|
"eval_samples_per_second": 20.898, |
|
"eval_steps_per_second": 2.612, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"learning_rate": 3.080073921774123e-05, |
|
"loss": 2.6722, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"eval_accuracy": 0.5109568775574441, |
|
"eval_loss": 2.820936441421509, |
|
"eval_runtime": 239.1462, |
|
"eval_samples_per_second": 20.908, |
|
"eval_steps_per_second": 2.613, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 38.6, |
|
"learning_rate": 3.070073681768363e-05, |
|
"loss": 2.6708, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 38.6, |
|
"eval_accuracy": 0.5117181222012486, |
|
"eval_loss": 2.8265380859375, |
|
"eval_runtime": 239.0035, |
|
"eval_samples_per_second": 20.92, |
|
"eval_steps_per_second": 2.615, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 38.8, |
|
"learning_rate": 3.060073441762602e-05, |
|
"loss": 2.6511, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 38.8, |
|
"eval_accuracy": 0.5104035925348941, |
|
"eval_loss": 2.8190081119537354, |
|
"eval_runtime": 238.9835, |
|
"eval_samples_per_second": 20.922, |
|
"eval_steps_per_second": 2.615, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 3.0500732017568424e-05, |
|
"loss": 2.6588, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.511878533029498, |
|
"eval_loss": 2.8074145317077637, |
|
"eval_runtime": 238.8839, |
|
"eval_samples_per_second": 20.931, |
|
"eval_steps_per_second": 2.616, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 39.2, |
|
"learning_rate": 3.040072961751082e-05, |
|
"loss": 2.6394, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 39.2, |
|
"eval_accuracy": 0.5138737260593822, |
|
"eval_loss": 2.804438591003418, |
|
"eval_runtime": 239.6863, |
|
"eval_samples_per_second": 20.861, |
|
"eval_steps_per_second": 2.608, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 39.4, |
|
"learning_rate": 3.0300727217453222e-05, |
|
"loss": 2.6258, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 39.4, |
|
"eval_accuracy": 0.5143354155135605, |
|
"eval_loss": 2.796783208847046, |
|
"eval_runtime": 239.0541, |
|
"eval_samples_per_second": 20.916, |
|
"eval_steps_per_second": 2.614, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 39.6, |
|
"learning_rate": 3.020072481739562e-05, |
|
"loss": 2.6519, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 39.6, |
|
"eval_accuracy": 0.512845071487533, |
|
"eval_loss": 2.802494525909424, |
|
"eval_runtime": 239.4204, |
|
"eval_samples_per_second": 20.884, |
|
"eval_steps_per_second": 2.61, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 39.8, |
|
"learning_rate": 3.0100722417338017e-05, |
|
"loss": 2.637, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 39.8, |
|
"eval_accuracy": 0.515363185645707, |
|
"eval_loss": 2.787698268890381, |
|
"eval_runtime": 239.9441, |
|
"eval_samples_per_second": 20.838, |
|
"eval_steps_per_second": 2.605, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 3.0000720017280414e-05, |
|
"loss": 2.641, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.5151119273466245, |
|
"eval_loss": 2.7949695587158203, |
|
"eval_runtime": 238.5726, |
|
"eval_samples_per_second": 20.958, |
|
"eval_steps_per_second": 2.62, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 40.2, |
|
"learning_rate": 2.9900717617222812e-05, |
|
"loss": 2.6178, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 40.2, |
|
"eval_accuracy": 0.5161491701558796, |
|
"eval_loss": 2.79156494140625, |
|
"eval_runtime": 239.0536, |
|
"eval_samples_per_second": 20.916, |
|
"eval_steps_per_second": 2.614, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 40.4, |
|
"learning_rate": 2.9800715217165216e-05, |
|
"loss": 2.6176, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 40.4, |
|
"eval_accuracy": 0.5171055870046763, |
|
"eval_loss": 2.77885103225708, |
|
"eval_runtime": 238.5263, |
|
"eval_samples_per_second": 20.962, |
|
"eval_steps_per_second": 2.62, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 40.6, |
|
"learning_rate": 2.9700712817107613e-05, |
|
"loss": 2.6063, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 40.6, |
|
"eval_accuracy": 0.5157570081079291, |
|
"eval_loss": 2.7819836139678955, |
|
"eval_runtime": 239.7277, |
|
"eval_samples_per_second": 20.857, |
|
"eval_steps_per_second": 2.607, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 40.8, |
|
"learning_rate": 2.960071041705001e-05, |
|
"loss": 2.6006, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 40.8, |
|
"eval_accuracy": 0.5179590168867658, |
|
"eval_loss": 2.7666218280792236, |
|
"eval_runtime": 239.777, |
|
"eval_samples_per_second": 20.853, |
|
"eval_steps_per_second": 2.607, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 2.9500708016992408e-05, |
|
"loss": 2.5968, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.5197707375448762, |
|
"eval_loss": 2.7592687606811523, |
|
"eval_runtime": 240.0119, |
|
"eval_samples_per_second": 20.832, |
|
"eval_steps_per_second": 2.604, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 41.2, |
|
"learning_rate": 2.940070561693481e-05, |
|
"loss": 2.5773, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 41.2, |
|
"eval_accuracy": 0.5210189873417721, |
|
"eval_loss": 2.751286745071411, |
|
"eval_runtime": 238.9629, |
|
"eval_samples_per_second": 20.924, |
|
"eval_steps_per_second": 2.615, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 41.4, |
|
"learning_rate": 2.9300703216877206e-05, |
|
"loss": 2.6042, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 41.4, |
|
"eval_accuracy": 0.5182829688096432, |
|
"eval_loss": 2.756362199783325, |
|
"eval_runtime": 238.761, |
|
"eval_samples_per_second": 20.941, |
|
"eval_steps_per_second": 2.618, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"learning_rate": 2.9200700816819604e-05, |
|
"loss": 2.5869, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"eval_accuracy": 0.5194518990938655, |
|
"eval_loss": 2.7505102157592773, |
|
"eval_runtime": 239.377, |
|
"eval_samples_per_second": 20.888, |
|
"eval_steps_per_second": 2.611, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 41.8, |
|
"learning_rate": 2.9100698416762e-05, |
|
"loss": 2.5678, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 41.8, |
|
"eval_accuracy": 0.5229825642971183, |
|
"eval_loss": 2.7337899208068848, |
|
"eval_runtime": 239.8535, |
|
"eval_samples_per_second": 20.846, |
|
"eval_steps_per_second": 2.606, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 2.9000696016704405e-05, |
|
"loss": 2.5901, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.5217317192506195, |
|
"eval_loss": 2.742098808288574, |
|
"eval_runtime": 239.5301, |
|
"eval_samples_per_second": 20.874, |
|
"eval_steps_per_second": 2.609, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 42.2, |
|
"learning_rate": 2.8900693616646803e-05, |
|
"loss": 2.5389, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 42.2, |
|
"eval_accuracy": 0.5203878282565099, |
|
"eval_loss": 2.750554323196411, |
|
"eval_runtime": 239.9053, |
|
"eval_samples_per_second": 20.842, |
|
"eval_steps_per_second": 2.605, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 42.4, |
|
"learning_rate": 2.88006912165892e-05, |
|
"loss": 2.542, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 42.4, |
|
"eval_accuracy": 0.5211733971231627, |
|
"eval_loss": 2.745476007461548, |
|
"eval_runtime": 239.1309, |
|
"eval_samples_per_second": 20.909, |
|
"eval_steps_per_second": 2.614, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 42.6, |
|
"learning_rate": 2.8700688816531594e-05, |
|
"loss": 2.5589, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 42.6, |
|
"eval_accuracy": 0.5228571609000146, |
|
"eval_loss": 2.731588363647461, |
|
"eval_runtime": 239.7976, |
|
"eval_samples_per_second": 20.851, |
|
"eval_steps_per_second": 2.606, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 42.8, |
|
"learning_rate": 2.8600686416473998e-05, |
|
"loss": 2.5723, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 42.8, |
|
"eval_accuracy": 0.5228206356006956, |
|
"eval_loss": 2.7301199436187744, |
|
"eval_runtime": 240.5491, |
|
"eval_samples_per_second": 20.786, |
|
"eval_steps_per_second": 2.598, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 2.8500684016416396e-05, |
|
"loss": 2.5388, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.5227556727866768, |
|
"eval_loss": 2.7347464561462402, |
|
"eval_runtime": 239.7747, |
|
"eval_samples_per_second": 20.853, |
|
"eval_steps_per_second": 2.607, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 43.2, |
|
"learning_rate": 2.8400681616358793e-05, |
|
"loss": 2.5275, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 43.2, |
|
"eval_accuracy": 0.5226272952884897, |
|
"eval_loss": 2.7289857864379883, |
|
"eval_runtime": 239.6863, |
|
"eval_samples_per_second": 20.861, |
|
"eval_steps_per_second": 2.608, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"learning_rate": 2.830067921630119e-05, |
|
"loss": 2.5344, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"eval_accuracy": 0.523518819169507, |
|
"eval_loss": 2.724255084991455, |
|
"eval_runtime": 238.5606, |
|
"eval_samples_per_second": 20.959, |
|
"eval_steps_per_second": 2.62, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 43.6, |
|
"learning_rate": 2.8200676816243595e-05, |
|
"loss": 2.5322, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 43.6, |
|
"eval_accuracy": 0.5245920745920746, |
|
"eval_loss": 2.7146334648132324, |
|
"eval_runtime": 238.7987, |
|
"eval_samples_per_second": 20.938, |
|
"eval_steps_per_second": 2.617, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 43.8, |
|
"learning_rate": 2.810067441618599e-05, |
|
"loss": 2.5245, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 43.8, |
|
"eval_accuracy": 0.5248407200904217, |
|
"eval_loss": 2.7163941860198975, |
|
"eval_runtime": 238.7952, |
|
"eval_samples_per_second": 20.938, |
|
"eval_steps_per_second": 2.617, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 2.8000672016128386e-05, |
|
"loss": 2.5264, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.5245739431165456, |
|
"eval_loss": 2.709345817565918, |
|
"eval_runtime": 240.3791, |
|
"eval_samples_per_second": 20.8, |
|
"eval_steps_per_second": 2.6, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 44.2, |
|
"learning_rate": 2.7900669616070783e-05, |
|
"loss": 2.5049, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 44.2, |
|
"eval_accuracy": 0.5277196208130921, |
|
"eval_loss": 2.7002735137939453, |
|
"eval_runtime": 239.5615, |
|
"eval_samples_per_second": 20.871, |
|
"eval_steps_per_second": 2.609, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 44.4, |
|
"learning_rate": 2.7800667216013188e-05, |
|
"loss": 2.4978, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 44.4, |
|
"eval_accuracy": 0.5264407968087224, |
|
"eval_loss": 2.7004919052124023, |
|
"eval_runtime": 238.9114, |
|
"eval_samples_per_second": 20.928, |
|
"eval_steps_per_second": 2.616, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 44.6, |
|
"learning_rate": 2.7700664815955585e-05, |
|
"loss": 2.5084, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 44.6, |
|
"eval_accuracy": 0.5251704545454545, |
|
"eval_loss": 2.7114479541778564, |
|
"eval_runtime": 239.0613, |
|
"eval_samples_per_second": 20.915, |
|
"eval_steps_per_second": 2.614, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"learning_rate": 2.7600662415897982e-05, |
|
"loss": 2.5068, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"eval_accuracy": 0.5272083200585458, |
|
"eval_loss": 2.6994175910949707, |
|
"eval_runtime": 239.9458, |
|
"eval_samples_per_second": 20.838, |
|
"eval_steps_per_second": 2.605, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 2.750066001584038e-05, |
|
"loss": 2.5117, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.5278801603168437, |
|
"eval_loss": 2.6876628398895264, |
|
"eval_runtime": 238.9585, |
|
"eval_samples_per_second": 20.924, |
|
"eval_steps_per_second": 2.616, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 45.2, |
|
"learning_rate": 2.740065761578278e-05, |
|
"loss": 2.4844, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 45.2, |
|
"eval_accuracy": 0.5281054209737237, |
|
"eval_loss": 2.6892876625061035, |
|
"eval_runtime": 239.8784, |
|
"eval_samples_per_second": 20.844, |
|
"eval_steps_per_second": 2.605, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 45.4, |
|
"learning_rate": 2.7300655215725178e-05, |
|
"loss": 2.4846, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 45.4, |
|
"eval_accuracy": 0.5291772541468727, |
|
"eval_loss": 2.6861720085144043, |
|
"eval_runtime": 239.6956, |
|
"eval_samples_per_second": 20.86, |
|
"eval_steps_per_second": 2.607, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 45.6, |
|
"learning_rate": 2.7200652815667575e-05, |
|
"loss": 2.4671, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 45.6, |
|
"eval_accuracy": 0.5287297495387571, |
|
"eval_loss": 2.6842663288116455, |
|
"eval_runtime": 238.9115, |
|
"eval_samples_per_second": 20.928, |
|
"eval_steps_per_second": 2.616, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 45.8, |
|
"learning_rate": 2.7100650415609973e-05, |
|
"loss": 2.4758, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 45.8, |
|
"eval_accuracy": 0.5284488319441458, |
|
"eval_loss": 2.6910390853881836, |
|
"eval_runtime": 239.5137, |
|
"eval_samples_per_second": 20.876, |
|
"eval_steps_per_second": 2.609, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 2.7000648015552377e-05, |
|
"loss": 2.4827, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.5292716595750058, |
|
"eval_loss": 2.6797308921813965, |
|
"eval_runtime": 239.2914, |
|
"eval_samples_per_second": 20.895, |
|
"eval_steps_per_second": 2.612, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 46.2, |
|
"learning_rate": 2.6900645615494774e-05, |
|
"loss": 2.4586, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 46.2, |
|
"eval_accuracy": 0.5293859870362431, |
|
"eval_loss": 2.6884403228759766, |
|
"eval_runtime": 239.2735, |
|
"eval_samples_per_second": 20.897, |
|
"eval_steps_per_second": 2.612, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"learning_rate": 2.6800643215437172e-05, |
|
"loss": 2.4539, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"eval_accuracy": 0.5301808865428363, |
|
"eval_loss": 2.6732141971588135, |
|
"eval_runtime": 239.5067, |
|
"eval_samples_per_second": 20.876, |
|
"eval_steps_per_second": 2.61, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 46.6, |
|
"learning_rate": 2.670064081537957e-05, |
|
"loss": 2.4543, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 46.6, |
|
"eval_accuracy": 0.532382243521029, |
|
"eval_loss": 2.6684887409210205, |
|
"eval_runtime": 238.6531, |
|
"eval_samples_per_second": 20.951, |
|
"eval_steps_per_second": 2.619, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 46.8, |
|
"learning_rate": 2.660063841532197e-05, |
|
"loss": 2.4635, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 46.8, |
|
"eval_accuracy": 0.5319910740175746, |
|
"eval_loss": 2.6664233207702637, |
|
"eval_runtime": 239.6023, |
|
"eval_samples_per_second": 20.868, |
|
"eval_steps_per_second": 2.608, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 2.6500636015264367e-05, |
|
"loss": 2.4616, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.5319308055436347, |
|
"eval_loss": 2.652108907699585, |
|
"eval_runtime": 239.838, |
|
"eval_samples_per_second": 20.847, |
|
"eval_steps_per_second": 2.606, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"learning_rate": 2.6400633615206765e-05, |
|
"loss": 2.4322, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"eval_accuracy": 0.5311958432900132, |
|
"eval_loss": 2.6661252975463867, |
|
"eval_runtime": 238.6884, |
|
"eval_samples_per_second": 20.948, |
|
"eval_steps_per_second": 2.618, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 47.4, |
|
"learning_rate": 2.6300631215149162e-05, |
|
"loss": 2.4427, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 47.4, |
|
"eval_accuracy": 0.5323433070866141, |
|
"eval_loss": 2.6600053310394287, |
|
"eval_runtime": 240.0825, |
|
"eval_samples_per_second": 20.826, |
|
"eval_steps_per_second": 2.603, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 47.6, |
|
"learning_rate": 2.6200628815091566e-05, |
|
"loss": 2.4388, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 47.6, |
|
"eval_accuracy": 0.5331623073986071, |
|
"eval_loss": 2.648709535598755, |
|
"eval_runtime": 240.2521, |
|
"eval_samples_per_second": 20.811, |
|
"eval_steps_per_second": 2.601, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 47.8, |
|
"learning_rate": 2.6100626415033964e-05, |
|
"loss": 2.4434, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 47.8, |
|
"eval_accuracy": 0.5311764779890039, |
|
"eval_loss": 2.6652913093566895, |
|
"eval_runtime": 239.5743, |
|
"eval_samples_per_second": 20.87, |
|
"eval_steps_per_second": 2.609, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 2.600062401497636e-05, |
|
"loss": 2.4288, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.5353168737222834, |
|
"eval_loss": 2.6441142559051514, |
|
"eval_runtime": 239.7847, |
|
"eval_samples_per_second": 20.852, |
|
"eval_steps_per_second": 2.607, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 48.2, |
|
"learning_rate": 2.590062161491876e-05, |
|
"loss": 2.4, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 48.2, |
|
"eval_accuracy": 0.5336576378767517, |
|
"eval_loss": 2.6524970531463623, |
|
"eval_runtime": 240.1766, |
|
"eval_samples_per_second": 20.818, |
|
"eval_steps_per_second": 2.602, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 48.4, |
|
"learning_rate": 2.580061921486116e-05, |
|
"loss": 2.4151, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 48.4, |
|
"eval_accuracy": 0.5335010622480515, |
|
"eval_loss": 2.6437137126922607, |
|
"eval_runtime": 240.2765, |
|
"eval_samples_per_second": 20.809, |
|
"eval_steps_per_second": 2.601, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 48.6, |
|
"learning_rate": 2.5700616814803557e-05, |
|
"loss": 2.4238, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 48.6, |
|
"eval_accuracy": 0.5363357667453298, |
|
"eval_loss": 2.6379194259643555, |
|
"eval_runtime": 239.3331, |
|
"eval_samples_per_second": 20.891, |
|
"eval_steps_per_second": 2.611, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 48.8, |
|
"learning_rate": 2.5600614414745954e-05, |
|
"loss": 2.4171, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 48.8, |
|
"eval_accuracy": 0.5340343564481496, |
|
"eval_loss": 2.648090124130249, |
|
"eval_runtime": 240.2867, |
|
"eval_samples_per_second": 20.808, |
|
"eval_steps_per_second": 2.601, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 2.550061201468835e-05, |
|
"loss": 2.4183, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.5346044128042008, |
|
"eval_loss": 2.644219160079956, |
|
"eval_runtime": 241.3259, |
|
"eval_samples_per_second": 20.719, |
|
"eval_steps_per_second": 2.59, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 49.2, |
|
"learning_rate": 2.5400609614630756e-05, |
|
"loss": 2.3919, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 49.2, |
|
"eval_accuracy": 0.5362844450065609, |
|
"eval_loss": 2.629089832305908, |
|
"eval_runtime": 239.9635, |
|
"eval_samples_per_second": 20.836, |
|
"eval_steps_per_second": 2.605, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 49.4, |
|
"learning_rate": 2.5300607214573153e-05, |
|
"loss": 2.3972, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 49.4, |
|
"eval_accuracy": 0.5351801935447217, |
|
"eval_loss": 2.6336562633514404, |
|
"eval_runtime": 241.0833, |
|
"eval_samples_per_second": 20.74, |
|
"eval_steps_per_second": 2.592, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"learning_rate": 2.520060481451555e-05, |
|
"loss": 2.3952, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"eval_accuracy": 0.5374739972158475, |
|
"eval_loss": 2.619722366333008, |
|
"eval_runtime": 240.3225, |
|
"eval_samples_per_second": 20.805, |
|
"eval_steps_per_second": 2.601, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 49.8, |
|
"learning_rate": 2.5100602414457948e-05, |
|
"loss": 2.3916, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 49.8, |
|
"eval_accuracy": 0.5356588926900613, |
|
"eval_loss": 2.6252872943878174, |
|
"eval_runtime": 238.8695, |
|
"eval_samples_per_second": 20.932, |
|
"eval_steps_per_second": 2.616, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 2.500060001440035e-05, |
|
"loss": 2.3955, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.5384588625208724, |
|
"eval_loss": 2.623581647872925, |
|
"eval_runtime": 239.826, |
|
"eval_samples_per_second": 20.848, |
|
"eval_steps_per_second": 2.606, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 50.2, |
|
"learning_rate": 2.4900597614342746e-05, |
|
"loss": 2.3602, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 50.2, |
|
"eval_accuracy": 0.5353075818087105, |
|
"eval_loss": 2.6330502033233643, |
|
"eval_runtime": 238.9948, |
|
"eval_samples_per_second": 20.921, |
|
"eval_steps_per_second": 2.615, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 50.4, |
|
"learning_rate": 2.4800595214285143e-05, |
|
"loss": 2.3826, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 50.4, |
|
"eval_accuracy": 0.536037202954501, |
|
"eval_loss": 2.627243757247925, |
|
"eval_runtime": 239.9878, |
|
"eval_samples_per_second": 20.834, |
|
"eval_steps_per_second": 2.604, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 50.6, |
|
"learning_rate": 2.4700592814227544e-05, |
|
"loss": 2.3774, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 50.6, |
|
"eval_accuracy": 0.538067971866305, |
|
"eval_loss": 2.618502378463745, |
|
"eval_runtime": 239.3755, |
|
"eval_samples_per_second": 20.888, |
|
"eval_steps_per_second": 2.611, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 50.8, |
|
"learning_rate": 2.460059041416994e-05, |
|
"loss": 2.3674, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 50.8, |
|
"eval_accuracy": 0.5392022529377222, |
|
"eval_loss": 2.606363296508789, |
|
"eval_runtime": 239.0192, |
|
"eval_samples_per_second": 20.919, |
|
"eval_steps_per_second": 2.615, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 2.4500588014112342e-05, |
|
"loss": 2.3612, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.5381538014980092, |
|
"eval_loss": 2.6098031997680664, |
|
"eval_runtime": 239.4732, |
|
"eval_samples_per_second": 20.879, |
|
"eval_steps_per_second": 2.61, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 51.2, |
|
"learning_rate": 2.440058561405474e-05, |
|
"loss": 2.3488, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 51.2, |
|
"eval_accuracy": 0.5395342527125473, |
|
"eval_loss": 2.607774496078491, |
|
"eval_runtime": 238.9635, |
|
"eval_samples_per_second": 20.924, |
|
"eval_steps_per_second": 2.615, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 51.4, |
|
"learning_rate": 2.4300583213997137e-05, |
|
"loss": 2.3546, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 51.4, |
|
"eval_accuracy": 0.5388677484544013, |
|
"eval_loss": 2.614074230194092, |
|
"eval_runtime": 238.9869, |
|
"eval_samples_per_second": 20.922, |
|
"eval_steps_per_second": 2.615, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 51.6, |
|
"learning_rate": 2.4200580813939535e-05, |
|
"loss": 2.3653, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 51.6, |
|
"eval_accuracy": 0.5410012023997633, |
|
"eval_loss": 2.6000912189483643, |
|
"eval_runtime": 239.3164, |
|
"eval_samples_per_second": 20.893, |
|
"eval_steps_per_second": 2.612, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 51.8, |
|
"learning_rate": 2.4100578413881932e-05, |
|
"loss": 2.3517, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 51.8, |
|
"eval_accuracy": 0.5399884734937597, |
|
"eval_loss": 2.603484869003296, |
|
"eval_runtime": 239.7089, |
|
"eval_samples_per_second": 20.859, |
|
"eval_steps_per_second": 2.607, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 2.4000576013824333e-05, |
|
"loss": 2.3592, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.5414965001054849, |
|
"eval_loss": 2.5917770862579346, |
|
"eval_runtime": 239.3915, |
|
"eval_samples_per_second": 20.886, |
|
"eval_steps_per_second": 2.611, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 52.2, |
|
"learning_rate": 2.390057361376673e-05, |
|
"loss": 2.3439, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 52.2, |
|
"eval_accuracy": 0.5404831606707972, |
|
"eval_loss": 2.604349136352539, |
|
"eval_runtime": 238.9796, |
|
"eval_samples_per_second": 20.922, |
|
"eval_steps_per_second": 2.615, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 52.4, |
|
"learning_rate": 2.380057121370913e-05, |
|
"loss": 2.3341, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 52.4, |
|
"eval_accuracy": 0.5412468175701713, |
|
"eval_loss": 2.5933175086975098, |
|
"eval_runtime": 239.4613, |
|
"eval_samples_per_second": 20.88, |
|
"eval_steps_per_second": 2.61, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 52.6, |
|
"learning_rate": 2.370056881365153e-05, |
|
"loss": 2.3317, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 52.6, |
|
"eval_accuracy": 0.5414899639162706, |
|
"eval_loss": 2.5935215950012207, |
|
"eval_runtime": 239.2429, |
|
"eval_samples_per_second": 20.899, |
|
"eval_steps_per_second": 2.612, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"learning_rate": 2.360056641359393e-05, |
|
"loss": 2.3503, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"eval_accuracy": 0.541917771950174, |
|
"eval_loss": 2.5935444831848145, |
|
"eval_runtime": 239.3726, |
|
"eval_samples_per_second": 20.888, |
|
"eval_steps_per_second": 2.611, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 2.3500564013536323e-05, |
|
"loss": 2.3295, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.5435171818164625, |
|
"eval_loss": 2.5758824348449707, |
|
"eval_runtime": 239.7503, |
|
"eval_samples_per_second": 20.855, |
|
"eval_steps_per_second": 2.607, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 53.2, |
|
"learning_rate": 2.3400561613478724e-05, |
|
"loss": 2.3229, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 53.2, |
|
"eval_accuracy": 0.5419268270298838, |
|
"eval_loss": 2.584718942642212, |
|
"eval_runtime": 239.549, |
|
"eval_samples_per_second": 20.873, |
|
"eval_steps_per_second": 2.609, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 53.4, |
|
"learning_rate": 2.330055921342112e-05, |
|
"loss": 2.3174, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 53.4, |
|
"eval_accuracy": 0.5433115780118934, |
|
"eval_loss": 2.579352378845215, |
|
"eval_runtime": 240.0227, |
|
"eval_samples_per_second": 20.831, |
|
"eval_steps_per_second": 2.604, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 53.6, |
|
"learning_rate": 2.3200556813363522e-05, |
|
"loss": 2.3121, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 53.6, |
|
"eval_accuracy": 0.5427292175460517, |
|
"eval_loss": 2.5821499824523926, |
|
"eval_runtime": 239.1079, |
|
"eval_samples_per_second": 20.911, |
|
"eval_steps_per_second": 2.614, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 53.8, |
|
"learning_rate": 2.310055441330592e-05, |
|
"loss": 2.3222, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 53.8, |
|
"eval_accuracy": 0.5440522389940277, |
|
"eval_loss": 2.5802907943725586, |
|
"eval_runtime": 238.9623, |
|
"eval_samples_per_second": 20.924, |
|
"eval_steps_per_second": 2.615, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 2.300055201324832e-05, |
|
"loss": 2.3273, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.5418589113279378, |
|
"eval_loss": 2.58471941947937, |
|
"eval_runtime": 240.3564, |
|
"eval_samples_per_second": 20.802, |
|
"eval_steps_per_second": 2.6, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 54.2, |
|
"learning_rate": 2.2900549613190718e-05, |
|
"loss": 2.2979, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 54.2, |
|
"eval_accuracy": 0.5431181820766608, |
|
"eval_loss": 2.581387519836426, |
|
"eval_runtime": 239.5645, |
|
"eval_samples_per_second": 20.871, |
|
"eval_steps_per_second": 2.609, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 54.4, |
|
"learning_rate": 2.2800547213133115e-05, |
|
"loss": 2.3075, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 54.4, |
|
"eval_accuracy": 0.5456039674552573, |
|
"eval_loss": 2.5715229511260986, |
|
"eval_runtime": 239.5747, |
|
"eval_samples_per_second": 20.87, |
|
"eval_steps_per_second": 2.609, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 54.6, |
|
"learning_rate": 2.2700544813075513e-05, |
|
"loss": 2.2956, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 54.6, |
|
"eval_accuracy": 0.544241455455272, |
|
"eval_loss": 2.5736727714538574, |
|
"eval_runtime": 238.8327, |
|
"eval_samples_per_second": 20.935, |
|
"eval_steps_per_second": 2.617, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 54.8, |
|
"learning_rate": 2.2600542413017913e-05, |
|
"loss": 2.3068, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 54.8, |
|
"eval_accuracy": 0.543567351165669, |
|
"eval_loss": 2.5768492221832275, |
|
"eval_runtime": 239.6828, |
|
"eval_samples_per_second": 20.861, |
|
"eval_steps_per_second": 2.608, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 2.250054001296031e-05, |
|
"loss": 2.2982, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.5449586143181261, |
|
"eval_loss": 2.5657105445861816, |
|
"eval_runtime": 239.5684, |
|
"eval_samples_per_second": 20.871, |
|
"eval_steps_per_second": 2.609, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 55.2, |
|
"learning_rate": 2.240053761290271e-05, |
|
"loss": 2.2886, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 55.2, |
|
"eval_accuracy": 0.5452792134123282, |
|
"eval_loss": 2.5683178901672363, |
|
"eval_runtime": 239.1443, |
|
"eval_samples_per_second": 20.908, |
|
"eval_steps_per_second": 2.613, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 55.4, |
|
"learning_rate": 2.230053521284511e-05, |
|
"loss": 2.2807, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 55.4, |
|
"eval_accuracy": 0.5443893773089225, |
|
"eval_loss": 2.5663530826568604, |
|
"eval_runtime": 239.5227, |
|
"eval_samples_per_second": 20.875, |
|
"eval_steps_per_second": 2.609, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 55.6, |
|
"learning_rate": 2.220053281278751e-05, |
|
"loss": 2.2892, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 55.6, |
|
"eval_accuracy": 0.5455853036917409, |
|
"eval_loss": 2.5552079677581787, |
|
"eval_runtime": 239.1742, |
|
"eval_samples_per_second": 20.905, |
|
"eval_steps_per_second": 2.613, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 55.8, |
|
"learning_rate": 2.2100530412729907e-05, |
|
"loss": 2.281, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 55.8, |
|
"eval_accuracy": 0.546019219633517, |
|
"eval_loss": 2.560382843017578, |
|
"eval_runtime": 239.5538, |
|
"eval_samples_per_second": 20.872, |
|
"eval_steps_per_second": 2.609, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 2.2000528012672305e-05, |
|
"loss": 2.2996, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.545175555940259, |
|
"eval_loss": 2.569575071334839, |
|
"eval_runtime": 239.4402, |
|
"eval_samples_per_second": 20.882, |
|
"eval_steps_per_second": 2.61, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 56.2, |
|
"learning_rate": 2.1900525612614702e-05, |
|
"loss": 2.2637, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 56.2, |
|
"eval_accuracy": 0.5446701227486882, |
|
"eval_loss": 2.567713499069214, |
|
"eval_runtime": 239.3563, |
|
"eval_samples_per_second": 20.889, |
|
"eval_steps_per_second": 2.611, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 56.4, |
|
"learning_rate": 2.1800523212557103e-05, |
|
"loss": 2.2616, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 56.4, |
|
"eval_accuracy": 0.5458312504519459, |
|
"eval_loss": 2.560295820236206, |
|
"eval_runtime": 239.7976, |
|
"eval_samples_per_second": 20.851, |
|
"eval_steps_per_second": 2.606, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"learning_rate": 2.17005208124995e-05, |
|
"loss": 2.28, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"eval_accuracy": 0.5473221625039519, |
|
"eval_loss": 2.5473392009735107, |
|
"eval_runtime": 239.703, |
|
"eval_samples_per_second": 20.859, |
|
"eval_steps_per_second": 2.607, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 56.8, |
|
"learning_rate": 2.16005184124419e-05, |
|
"loss": 2.2851, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 56.8, |
|
"eval_accuracy": 0.5455403910931405, |
|
"eval_loss": 2.560549736022949, |
|
"eval_runtime": 239.2486, |
|
"eval_samples_per_second": 20.899, |
|
"eval_steps_per_second": 2.612, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 2.1500516012384298e-05, |
|
"loss": 2.2779, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.5481797430708371, |
|
"eval_loss": 2.538954019546509, |
|
"eval_runtime": 239.3788, |
|
"eval_samples_per_second": 20.887, |
|
"eval_steps_per_second": 2.611, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 57.2, |
|
"learning_rate": 2.14005136123267e-05, |
|
"loss": 2.2553, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 57.2, |
|
"eval_accuracy": 0.5465248526358569, |
|
"eval_loss": 2.551297903060913, |
|
"eval_runtime": 239.321, |
|
"eval_samples_per_second": 20.892, |
|
"eval_steps_per_second": 2.612, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 57.4, |
|
"learning_rate": 2.1300511212269093e-05, |
|
"loss": 2.2577, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 57.4, |
|
"eval_accuracy": 0.5489012757952415, |
|
"eval_loss": 2.541961431503296, |
|
"eval_runtime": 240.0573, |
|
"eval_samples_per_second": 20.828, |
|
"eval_steps_per_second": 2.604, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 57.6, |
|
"learning_rate": 2.1200508812211494e-05, |
|
"loss": 2.2639, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 57.6, |
|
"eval_accuracy": 0.549243428946737, |
|
"eval_loss": 2.5330581665039062, |
|
"eval_runtime": 239.73, |
|
"eval_samples_per_second": 20.857, |
|
"eval_steps_per_second": 2.607, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 57.8, |
|
"learning_rate": 2.110050641215389e-05, |
|
"loss": 2.2646, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 57.8, |
|
"eval_accuracy": 0.5463414171375807, |
|
"eval_loss": 2.553412675857544, |
|
"eval_runtime": 240.2504, |
|
"eval_samples_per_second": 20.812, |
|
"eval_steps_per_second": 2.601, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 2.1000504012096292e-05, |
|
"loss": 2.2502, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.5491033816851943, |
|
"eval_loss": 2.537835121154785, |
|
"eval_runtime": 239.3546, |
|
"eval_samples_per_second": 20.89, |
|
"eval_steps_per_second": 2.611, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 58.2, |
|
"learning_rate": 2.090050161203869e-05, |
|
"loss": 2.2341, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 58.2, |
|
"eval_accuracy": 0.5498957186264518, |
|
"eval_loss": 2.5327913761138916, |
|
"eval_runtime": 238.7537, |
|
"eval_samples_per_second": 20.942, |
|
"eval_steps_per_second": 2.618, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 58.4, |
|
"learning_rate": 2.080049921198109e-05, |
|
"loss": 2.2446, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 58.4, |
|
"eval_accuracy": 0.551388546467156, |
|
"eval_loss": 2.524890184402466, |
|
"eval_runtime": 238.6361, |
|
"eval_samples_per_second": 20.952, |
|
"eval_steps_per_second": 2.619, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 58.6, |
|
"learning_rate": 2.0700496811923488e-05, |
|
"loss": 2.2413, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 58.6, |
|
"eval_accuracy": 0.5494109926997167, |
|
"eval_loss": 2.5354578495025635, |
|
"eval_runtime": 239.8195, |
|
"eval_samples_per_second": 20.849, |
|
"eval_steps_per_second": 2.606, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 58.8, |
|
"learning_rate": 2.0600494411865885e-05, |
|
"loss": 2.2358, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 58.8, |
|
"eval_accuracy": 0.5500933200499388, |
|
"eval_loss": 2.5274887084960938, |
|
"eval_runtime": 239.5977, |
|
"eval_samples_per_second": 20.868, |
|
"eval_steps_per_second": 2.609, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 2.0500492011808282e-05, |
|
"loss": 2.2515, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.5500455346141847, |
|
"eval_loss": 2.530198812484741, |
|
"eval_runtime": 240.0143, |
|
"eval_samples_per_second": 20.832, |
|
"eval_steps_per_second": 2.604, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 59.2, |
|
"learning_rate": 2.0400489611750683e-05, |
|
"loss": 2.2254, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 59.2, |
|
"eval_accuracy": 0.5491007766091739, |
|
"eval_loss": 2.5325417518615723, |
|
"eval_runtime": 239.8311, |
|
"eval_samples_per_second": 20.848, |
|
"eval_steps_per_second": 2.606, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 59.4, |
|
"learning_rate": 2.030048721169308e-05, |
|
"loss": 2.2288, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 59.4, |
|
"eval_accuracy": 0.5502322742638851, |
|
"eval_loss": 2.5338664054870605, |
|
"eval_runtime": 239.0805, |
|
"eval_samples_per_second": 20.913, |
|
"eval_steps_per_second": 2.614, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 59.6, |
|
"learning_rate": 2.020048481163548e-05, |
|
"loss": 2.2229, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 59.6, |
|
"eval_accuracy": 0.5492136878089197, |
|
"eval_loss": 2.52840518951416, |
|
"eval_runtime": 239.0571, |
|
"eval_samples_per_second": 20.916, |
|
"eval_steps_per_second": 2.614, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 59.8, |
|
"learning_rate": 2.010048241157788e-05, |
|
"loss": 2.226, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 59.8, |
|
"eval_accuracy": 0.5520625337131285, |
|
"eval_loss": 2.5211117267608643, |
|
"eval_runtime": 239.9761, |
|
"eval_samples_per_second": 20.835, |
|
"eval_steps_per_second": 2.604, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 2.000048001152028e-05, |
|
"loss": 2.2299, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.5512880798568485, |
|
"eval_loss": 2.5254392623901367, |
|
"eval_runtime": 238.9239, |
|
"eval_samples_per_second": 20.927, |
|
"eval_steps_per_second": 2.616, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 60.2, |
|
"learning_rate": 1.9900477611462677e-05, |
|
"loss": 2.2047, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 60.2, |
|
"eval_accuracy": 0.5509219445568596, |
|
"eval_loss": 2.5192694664001465, |
|
"eval_runtime": 239.8776, |
|
"eval_samples_per_second": 20.844, |
|
"eval_steps_per_second": 2.605, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 60.4, |
|
"learning_rate": 1.9800475211405074e-05, |
|
"loss": 2.2143, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 60.4, |
|
"eval_accuracy": 0.5507610195889735, |
|
"eval_loss": 2.5283658504486084, |
|
"eval_runtime": 238.7607, |
|
"eval_samples_per_second": 20.941, |
|
"eval_steps_per_second": 2.618, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 60.6, |
|
"learning_rate": 1.9700472811347472e-05, |
|
"loss": 2.2182, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 60.6, |
|
"eval_accuracy": 0.551352798513339, |
|
"eval_loss": 2.5226526260375977, |
|
"eval_runtime": 239.1735, |
|
"eval_samples_per_second": 20.905, |
|
"eval_steps_per_second": 2.613, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 60.8, |
|
"learning_rate": 1.9600470411289873e-05, |
|
"loss": 2.2085, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 60.8, |
|
"eval_accuracy": 0.5525627969593881, |
|
"eval_loss": 2.5139427185058594, |
|
"eval_runtime": 240.0214, |
|
"eval_samples_per_second": 20.831, |
|
"eval_steps_per_second": 2.604, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"learning_rate": 1.950046801123227e-05, |
|
"loss": 2.2209, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.5529032788336304, |
|
"eval_loss": 2.5072460174560547, |
|
"eval_runtime": 240.1385, |
|
"eval_samples_per_second": 20.821, |
|
"eval_steps_per_second": 2.603, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 61.2, |
|
"learning_rate": 1.940046561117467e-05, |
|
"loss": 2.197, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 61.2, |
|
"eval_accuracy": 0.5528882078639463, |
|
"eval_loss": 2.5094316005706787, |
|
"eval_runtime": 239.1412, |
|
"eval_samples_per_second": 20.908, |
|
"eval_steps_per_second": 2.614, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 61.4, |
|
"learning_rate": 1.9300463211117068e-05, |
|
"loss": 2.2029, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 61.4, |
|
"eval_accuracy": 0.552518564768301, |
|
"eval_loss": 2.512848138809204, |
|
"eval_runtime": 239.081, |
|
"eval_samples_per_second": 20.913, |
|
"eval_steps_per_second": 2.614, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 61.6, |
|
"learning_rate": 1.920046081105947e-05, |
|
"loss": 2.2026, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 61.6, |
|
"eval_accuracy": 0.5537632882691113, |
|
"eval_loss": 2.500983238220215, |
|
"eval_runtime": 239.906, |
|
"eval_samples_per_second": 20.841, |
|
"eval_steps_per_second": 2.605, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 61.8, |
|
"learning_rate": 1.9100458411001866e-05, |
|
"loss": 2.1836, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 61.8, |
|
"eval_accuracy": 0.5522234398340118, |
|
"eval_loss": 2.5177018642425537, |
|
"eval_runtime": 240.0817, |
|
"eval_samples_per_second": 20.826, |
|
"eval_steps_per_second": 2.603, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"learning_rate": 1.9000456010944264e-05, |
|
"loss": 2.2043, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.5532023668639053, |
|
"eval_loss": 2.504979372024536, |
|
"eval_runtime": 241.5827, |
|
"eval_samples_per_second": 20.697, |
|
"eval_steps_per_second": 2.587, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 62.2, |
|
"learning_rate": 1.890045361088666e-05, |
|
"loss": 2.1853, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 62.2, |
|
"eval_accuracy": 0.5533680683605509, |
|
"eval_loss": 2.5054237842559814, |
|
"eval_runtime": 238.9823, |
|
"eval_samples_per_second": 20.922, |
|
"eval_steps_per_second": 2.615, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 62.4, |
|
"learning_rate": 1.8800451210829062e-05, |
|
"loss": 2.1735, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 62.4, |
|
"eval_accuracy": 0.5529888819519332, |
|
"eval_loss": 2.501115560531616, |
|
"eval_runtime": 241.4967, |
|
"eval_samples_per_second": 20.704, |
|
"eval_steps_per_second": 2.588, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 62.6, |
|
"learning_rate": 1.870044881077146e-05, |
|
"loss": 2.2014, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 62.6, |
|
"eval_accuracy": 0.5530117516377819, |
|
"eval_loss": 2.513277530670166, |
|
"eval_runtime": 239.8115, |
|
"eval_samples_per_second": 20.85, |
|
"eval_steps_per_second": 2.606, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 62.8, |
|
"learning_rate": 1.860044641071386e-05, |
|
"loss": 2.1819, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 62.8, |
|
"eval_accuracy": 0.5528446296185154, |
|
"eval_loss": 2.509094476699829, |
|
"eval_runtime": 240.0096, |
|
"eval_samples_per_second": 20.832, |
|
"eval_steps_per_second": 2.604, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"learning_rate": 1.8500444010656258e-05, |
|
"loss": 2.1891, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.5552905997463995, |
|
"eval_loss": 2.494983196258545, |
|
"eval_runtime": 239.6425, |
|
"eval_samples_per_second": 20.864, |
|
"eval_steps_per_second": 2.608, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 63.2, |
|
"learning_rate": 1.8400441610598655e-05, |
|
"loss": 2.1725, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 63.2, |
|
"eval_accuracy": 0.5525888871738494, |
|
"eval_loss": 2.5135655403137207, |
|
"eval_runtime": 239.4698, |
|
"eval_samples_per_second": 20.879, |
|
"eval_steps_per_second": 2.61, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 63.4, |
|
"learning_rate": 1.8300439210541052e-05, |
|
"loss": 2.1627, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 63.4, |
|
"eval_accuracy": 0.5559112693932448, |
|
"eval_loss": 2.4985179901123047, |
|
"eval_runtime": 238.778, |
|
"eval_samples_per_second": 20.94, |
|
"eval_steps_per_second": 2.617, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 63.6, |
|
"learning_rate": 1.8200436810483453e-05, |
|
"loss": 2.1637, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 63.6, |
|
"eval_accuracy": 0.5545219866850771, |
|
"eval_loss": 2.4936916828155518, |
|
"eval_runtime": 239.9514, |
|
"eval_samples_per_second": 20.838, |
|
"eval_steps_per_second": 2.605, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 63.8, |
|
"learning_rate": 1.810043441042585e-05, |
|
"loss": 2.1812, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 63.8, |
|
"eval_accuracy": 0.5547172908406778, |
|
"eval_loss": 2.4982199668884277, |
|
"eval_runtime": 239.8545, |
|
"eval_samples_per_second": 20.846, |
|
"eval_steps_per_second": 2.606, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 1.8000432010368248e-05, |
|
"loss": 2.1813, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.5542093602880088, |
|
"eval_loss": 2.496227741241455, |
|
"eval_runtime": 239.2798, |
|
"eval_samples_per_second": 20.896, |
|
"eval_steps_per_second": 2.612, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 64.2, |
|
"learning_rate": 1.790042961031065e-05, |
|
"loss": 2.1406, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 64.2, |
|
"eval_accuracy": 0.5536167372868017, |
|
"eval_loss": 2.4971325397491455, |
|
"eval_runtime": 238.8834, |
|
"eval_samples_per_second": 20.931, |
|
"eval_steps_per_second": 2.616, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 64.4, |
|
"learning_rate": 1.7800427210253046e-05, |
|
"loss": 2.1552, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 64.4, |
|
"eval_accuracy": 0.5572511133344485, |
|
"eval_loss": 2.4775383472442627, |
|
"eval_runtime": 240.2398, |
|
"eval_samples_per_second": 20.813, |
|
"eval_steps_per_second": 2.602, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 64.6, |
|
"learning_rate": 1.7700424810195447e-05, |
|
"loss": 2.1507, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 64.6, |
|
"eval_accuracy": 0.5555152177138546, |
|
"eval_loss": 2.497971773147583, |
|
"eval_runtime": 238.8788, |
|
"eval_samples_per_second": 20.931, |
|
"eval_steps_per_second": 2.616, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 64.8, |
|
"learning_rate": 1.7600422410137844e-05, |
|
"loss": 2.1644, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 64.8, |
|
"eval_accuracy": 0.5557170695407047, |
|
"eval_loss": 2.493004322052002, |
|
"eval_runtime": 239.6996, |
|
"eval_samples_per_second": 20.859, |
|
"eval_steps_per_second": 2.607, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 1.750042001008024e-05, |
|
"loss": 2.1634, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.5560676366496264, |
|
"eval_loss": 2.497638702392578, |
|
"eval_runtime": 239.1488, |
|
"eval_samples_per_second": 20.907, |
|
"eval_steps_per_second": 2.613, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 65.2, |
|
"learning_rate": 1.740041761002264e-05, |
|
"loss": 2.1409, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 65.2, |
|
"eval_accuracy": 0.5558225740583671, |
|
"eval_loss": 2.4956326484680176, |
|
"eval_runtime": 238.3192, |
|
"eval_samples_per_second": 20.98, |
|
"eval_steps_per_second": 2.623, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 65.4, |
|
"learning_rate": 1.730041520996504e-05, |
|
"loss": 2.1453, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 65.4, |
|
"eval_accuracy": 0.5554011908137227, |
|
"eval_loss": 2.487835168838501, |
|
"eval_runtime": 238.4531, |
|
"eval_samples_per_second": 20.968, |
|
"eval_steps_per_second": 2.621, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 65.6, |
|
"learning_rate": 1.7200412809907437e-05, |
|
"loss": 2.1361, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 65.6, |
|
"eval_accuracy": 0.5566455517043989, |
|
"eval_loss": 2.4900457859039307, |
|
"eval_runtime": 239.9045, |
|
"eval_samples_per_second": 20.842, |
|
"eval_steps_per_second": 2.605, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 65.8, |
|
"learning_rate": 1.7100410409849838e-05, |
|
"loss": 2.1604, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 65.8, |
|
"eval_accuracy": 0.5581541652045535, |
|
"eval_loss": 2.4757144451141357, |
|
"eval_runtime": 238.296, |
|
"eval_samples_per_second": 20.982, |
|
"eval_steps_per_second": 2.623, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"learning_rate": 1.7000408009792235e-05, |
|
"loss": 2.138, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.5568600527817493, |
|
"eval_loss": 2.487783432006836, |
|
"eval_runtime": 241.5368, |
|
"eval_samples_per_second": 20.701, |
|
"eval_steps_per_second": 2.588, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 66.2, |
|
"learning_rate": 1.6900405609734636e-05, |
|
"loss": 2.1241, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 66.2, |
|
"eval_accuracy": 0.5563906690268614, |
|
"eval_loss": 2.4848339557647705, |
|
"eval_runtime": 239.626, |
|
"eval_samples_per_second": 20.866, |
|
"eval_steps_per_second": 2.608, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 66.4, |
|
"learning_rate": 1.6800403209677034e-05, |
|
"loss": 2.1262, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 66.4, |
|
"eval_accuracy": 0.557129026746901, |
|
"eval_loss": 2.477325439453125, |
|
"eval_runtime": 239.431, |
|
"eval_samples_per_second": 20.883, |
|
"eval_steps_per_second": 2.61, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 66.6, |
|
"learning_rate": 1.670040080961943e-05, |
|
"loss": 2.142, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 66.6, |
|
"eval_accuracy": 0.5568254288979674, |
|
"eval_loss": 2.4832918643951416, |
|
"eval_runtime": 239.6217, |
|
"eval_samples_per_second": 20.866, |
|
"eval_steps_per_second": 2.608, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 66.8, |
|
"learning_rate": 1.660039840956183e-05, |
|
"loss": 2.1514, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 66.8, |
|
"eval_accuracy": 0.5570819128758042, |
|
"eval_loss": 2.4777631759643555, |
|
"eval_runtime": 239.6322, |
|
"eval_samples_per_second": 20.865, |
|
"eval_steps_per_second": 2.608, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"learning_rate": 1.650039600950423e-05, |
|
"loss": 2.1382, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.5591870729198872, |
|
"eval_loss": 2.468890905380249, |
|
"eval_runtime": 238.9575, |
|
"eval_samples_per_second": 20.924, |
|
"eval_steps_per_second": 2.616, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 67.2, |
|
"learning_rate": 1.6400393609446627e-05, |
|
"loss": 2.1222, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 67.2, |
|
"eval_accuracy": 0.5574107995800479, |
|
"eval_loss": 2.4710476398468018, |
|
"eval_runtime": 240.0729, |
|
"eval_samples_per_second": 20.827, |
|
"eval_steps_per_second": 2.603, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 67.4, |
|
"learning_rate": 1.6300391209389027e-05, |
|
"loss": 2.1227, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 67.4, |
|
"eval_accuracy": 0.5587516806483989, |
|
"eval_loss": 2.469508647918701, |
|
"eval_runtime": 239.7738, |
|
"eval_samples_per_second": 20.853, |
|
"eval_steps_per_second": 2.607, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 67.6, |
|
"learning_rate": 1.6200388809331425e-05, |
|
"loss": 2.1292, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 67.6, |
|
"eval_accuracy": 0.5585561370499786, |
|
"eval_loss": 2.477346181869507, |
|
"eval_runtime": 239.8749, |
|
"eval_samples_per_second": 20.844, |
|
"eval_steps_per_second": 2.606, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 67.8, |
|
"learning_rate": 1.6100386409273822e-05, |
|
"loss": 2.1263, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 67.8, |
|
"eval_accuracy": 0.5578509387181249, |
|
"eval_loss": 2.474029302597046, |
|
"eval_runtime": 239.6966, |
|
"eval_samples_per_second": 20.86, |
|
"eval_steps_per_second": 2.607, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 1.600038400921622e-05, |
|
"loss": 2.1137, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.5601281707259325, |
|
"eval_loss": 2.4593210220336914, |
|
"eval_runtime": 239.2768, |
|
"eval_samples_per_second": 20.896, |
|
"eval_steps_per_second": 2.612, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 68.2, |
|
"learning_rate": 1.590038160915862e-05, |
|
"loss": 2.1064, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 68.2, |
|
"eval_accuracy": 0.5578209157966231, |
|
"eval_loss": 2.4727413654327393, |
|
"eval_runtime": 239.7217, |
|
"eval_samples_per_second": 20.858, |
|
"eval_steps_per_second": 2.607, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 68.4, |
|
"learning_rate": 1.5800379209101018e-05, |
|
"loss": 2.1006, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 68.4, |
|
"eval_accuracy": 0.5591064999321577, |
|
"eval_loss": 2.4696574211120605, |
|
"eval_runtime": 239.6832, |
|
"eval_samples_per_second": 20.861, |
|
"eval_steps_per_second": 2.608, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 68.6, |
|
"learning_rate": 1.570037680904342e-05, |
|
"loss": 2.1024, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 68.6, |
|
"eval_accuracy": 0.5610141279251564, |
|
"eval_loss": 2.456801652908325, |
|
"eval_runtime": 239.2907, |
|
"eval_samples_per_second": 20.895, |
|
"eval_steps_per_second": 2.612, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 68.8, |
|
"learning_rate": 1.5600374408985816e-05, |
|
"loss": 2.1168, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 68.8, |
|
"eval_accuracy": 0.5585552637469694, |
|
"eval_loss": 2.465235948562622, |
|
"eval_runtime": 239.8915, |
|
"eval_samples_per_second": 20.843, |
|
"eval_steps_per_second": 2.605, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"learning_rate": 1.5500372008928217e-05, |
|
"loss": 2.1173, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.5599213275657079, |
|
"eval_loss": 2.4659504890441895, |
|
"eval_runtime": 239.7834, |
|
"eval_samples_per_second": 20.852, |
|
"eval_steps_per_second": 2.607, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 69.2, |
|
"learning_rate": 1.5400369608870614e-05, |
|
"loss": 2.1033, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 69.2, |
|
"eval_accuracy": 0.5608148024123105, |
|
"eval_loss": 2.4544801712036133, |
|
"eval_runtime": 239.7783, |
|
"eval_samples_per_second": 20.853, |
|
"eval_steps_per_second": 2.607, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 69.4, |
|
"learning_rate": 1.530036720881301e-05, |
|
"loss": 2.1007, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 69.4, |
|
"eval_accuracy": 0.560514818378259, |
|
"eval_loss": 2.454843044281006, |
|
"eval_runtime": 238.5559, |
|
"eval_samples_per_second": 20.959, |
|
"eval_steps_per_second": 2.62, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 69.6, |
|
"learning_rate": 1.520036480875541e-05, |
|
"loss": 2.1084, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 69.6, |
|
"eval_accuracy": 0.5603224959593116, |
|
"eval_loss": 2.450347661972046, |
|
"eval_runtime": 240.0033, |
|
"eval_samples_per_second": 20.833, |
|
"eval_steps_per_second": 2.604, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 69.8, |
|
"learning_rate": 1.510036240869781e-05, |
|
"loss": 2.1022, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 69.8, |
|
"eval_accuracy": 0.5603836619647419, |
|
"eval_loss": 2.459425449371338, |
|
"eval_runtime": 239.0288, |
|
"eval_samples_per_second": 20.918, |
|
"eval_steps_per_second": 2.615, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 1.5000360008640207e-05, |
|
"loss": 2.0954, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.5593701413176825, |
|
"eval_loss": 2.4609487056732178, |
|
"eval_runtime": 239.8644, |
|
"eval_samples_per_second": 20.845, |
|
"eval_steps_per_second": 2.606, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 70.2, |
|
"learning_rate": 1.4900357608582608e-05, |
|
"loss": 2.0732, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 70.2, |
|
"eval_accuracy": 0.562392128702269, |
|
"eval_loss": 2.454357385635376, |
|
"eval_runtime": 238.6067, |
|
"eval_samples_per_second": 20.955, |
|
"eval_steps_per_second": 2.619, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 70.4, |
|
"learning_rate": 1.4800355208525005e-05, |
|
"loss": 2.0784, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 70.4, |
|
"eval_accuracy": 0.5609833085023106, |
|
"eval_loss": 2.4554500579833984, |
|
"eval_runtime": 240.1656, |
|
"eval_samples_per_second": 20.819, |
|
"eval_steps_per_second": 2.602, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 70.6, |
|
"learning_rate": 1.4700352808467404e-05, |
|
"loss": 2.0955, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 70.6, |
|
"eval_accuracy": 0.5613185277510245, |
|
"eval_loss": 2.450934410095215, |
|
"eval_runtime": 239.4126, |
|
"eval_samples_per_second": 20.884, |
|
"eval_steps_per_second": 2.611, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 70.8, |
|
"learning_rate": 1.4600350408409802e-05, |
|
"loss": 2.0918, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 70.8, |
|
"eval_accuracy": 0.5611795024033451, |
|
"eval_loss": 2.4546260833740234, |
|
"eval_runtime": 239.9371, |
|
"eval_samples_per_second": 20.839, |
|
"eval_steps_per_second": 2.605, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"learning_rate": 1.4500348008352203e-05, |
|
"loss": 2.0934, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.560534958630708, |
|
"eval_loss": 2.4539401531219482, |
|
"eval_runtime": 239.175, |
|
"eval_samples_per_second": 20.905, |
|
"eval_steps_per_second": 2.613, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 71.2, |
|
"learning_rate": 1.44003456082946e-05, |
|
"loss": 2.0698, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 71.2, |
|
"eval_accuracy": 0.5608924561414544, |
|
"eval_loss": 2.4519293308258057, |
|
"eval_runtime": 239.8395, |
|
"eval_samples_per_second": 20.847, |
|
"eval_steps_per_second": 2.606, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 71.4, |
|
"learning_rate": 1.4300343208236999e-05, |
|
"loss": 2.0712, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 71.4, |
|
"eval_accuracy": 0.5619750991633243, |
|
"eval_loss": 2.440948009490967, |
|
"eval_runtime": 239.0587, |
|
"eval_samples_per_second": 20.915, |
|
"eval_steps_per_second": 2.614, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 71.6, |
|
"learning_rate": 1.4200340808179397e-05, |
|
"loss": 2.0857, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 71.6, |
|
"eval_accuracy": 0.5620386045955454, |
|
"eval_loss": 2.4435293674468994, |
|
"eval_runtime": 239.8911, |
|
"eval_samples_per_second": 20.843, |
|
"eval_steps_per_second": 2.605, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 71.8, |
|
"learning_rate": 1.4100338408121797e-05, |
|
"loss": 2.0745, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 71.8, |
|
"eval_accuracy": 0.5632759493830151, |
|
"eval_loss": 2.439894676208496, |
|
"eval_runtime": 239.7702, |
|
"eval_samples_per_second": 20.853, |
|
"eval_steps_per_second": 2.607, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 1.4000336008064193e-05, |
|
"loss": 2.0735, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.5619281706515695, |
|
"eval_loss": 2.4514319896698, |
|
"eval_runtime": 239.885, |
|
"eval_samples_per_second": 20.843, |
|
"eval_steps_per_second": 2.605, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 72.2, |
|
"learning_rate": 1.3900333608006594e-05, |
|
"loss": 2.0602, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 72.2, |
|
"eval_accuracy": 0.563163832523804, |
|
"eval_loss": 2.440223455429077, |
|
"eval_runtime": 239.5216, |
|
"eval_samples_per_second": 20.875, |
|
"eval_steps_per_second": 2.609, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 72.4, |
|
"learning_rate": 1.3800331207948991e-05, |
|
"loss": 2.0648, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 72.4, |
|
"eval_accuracy": 0.5628968925704558, |
|
"eval_loss": 2.4417946338653564, |
|
"eval_runtime": 239.1858, |
|
"eval_samples_per_second": 20.904, |
|
"eval_steps_per_second": 2.613, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 72.6, |
|
"learning_rate": 1.370032880789139e-05, |
|
"loss": 2.0672, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 72.6, |
|
"eval_accuracy": 0.561726001922683, |
|
"eval_loss": 2.443669080734253, |
|
"eval_runtime": 239.6521, |
|
"eval_samples_per_second": 20.864, |
|
"eval_steps_per_second": 2.608, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 72.8, |
|
"learning_rate": 1.3600326407833788e-05, |
|
"loss": 2.0801, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 72.8, |
|
"eval_accuracy": 0.5635193038184522, |
|
"eval_loss": 2.431992530822754, |
|
"eval_runtime": 239.4749, |
|
"eval_samples_per_second": 20.879, |
|
"eval_steps_per_second": 2.61, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"learning_rate": 1.3500324007776188e-05, |
|
"loss": 2.0695, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.5642300019285915, |
|
"eval_loss": 2.4284050464630127, |
|
"eval_runtime": 239.7223, |
|
"eval_samples_per_second": 20.857, |
|
"eval_steps_per_second": 2.607, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 73.2, |
|
"learning_rate": 1.3400321607718586e-05, |
|
"loss": 2.0566, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 73.2, |
|
"eval_accuracy": 0.5618769953465528, |
|
"eval_loss": 2.4493749141693115, |
|
"eval_runtime": 239.4603, |
|
"eval_samples_per_second": 20.88, |
|
"eval_steps_per_second": 2.61, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 73.4, |
|
"learning_rate": 1.3300319207660985e-05, |
|
"loss": 2.0452, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 73.4, |
|
"eval_accuracy": 0.5635672190600917, |
|
"eval_loss": 2.437802314758301, |
|
"eval_runtime": 239.4804, |
|
"eval_samples_per_second": 20.879, |
|
"eval_steps_per_second": 2.61, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 73.6, |
|
"learning_rate": 1.3200316807603382e-05, |
|
"loss": 2.0548, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 73.6, |
|
"eval_accuracy": 0.562618727556153, |
|
"eval_loss": 2.4448354244232178, |
|
"eval_runtime": 239.389, |
|
"eval_samples_per_second": 20.887, |
|
"eval_steps_per_second": 2.611, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 73.8, |
|
"learning_rate": 1.3100314407545783e-05, |
|
"loss": 2.0671, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 73.8, |
|
"eval_accuracy": 0.5631980095115114, |
|
"eval_loss": 2.429305076599121, |
|
"eval_runtime": 239.8305, |
|
"eval_samples_per_second": 20.848, |
|
"eval_steps_per_second": 2.606, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"learning_rate": 1.300031200748818e-05, |
|
"loss": 2.057, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.5637622237233351, |
|
"eval_loss": 2.4316015243530273, |
|
"eval_runtime": 238.715, |
|
"eval_samples_per_second": 20.945, |
|
"eval_steps_per_second": 2.618, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 74.2, |
|
"learning_rate": 1.290030960743058e-05, |
|
"loss": 2.0376, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 74.2, |
|
"eval_accuracy": 0.5624911002749817, |
|
"eval_loss": 2.4484307765960693, |
|
"eval_runtime": 239.8067, |
|
"eval_samples_per_second": 20.85, |
|
"eval_steps_per_second": 2.606, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 74.4, |
|
"learning_rate": 1.2800307207372977e-05, |
|
"loss": 2.0534, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 74.4, |
|
"eval_accuracy": 0.5637550768921247, |
|
"eval_loss": 2.4410548210144043, |
|
"eval_runtime": 239.5552, |
|
"eval_samples_per_second": 20.872, |
|
"eval_steps_per_second": 2.609, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 74.6, |
|
"learning_rate": 1.2700304807315378e-05, |
|
"loss": 2.0499, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 74.6, |
|
"eval_accuracy": 0.5629136359625349, |
|
"eval_loss": 2.4380035400390625, |
|
"eval_runtime": 238.4915, |
|
"eval_samples_per_second": 20.965, |
|
"eval_steps_per_second": 2.621, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 74.8, |
|
"learning_rate": 1.2600302407257775e-05, |
|
"loss": 2.0516, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 74.8, |
|
"eval_accuracy": 0.5635815880554783, |
|
"eval_loss": 2.4365627765655518, |
|
"eval_runtime": 240.1236, |
|
"eval_samples_per_second": 20.823, |
|
"eval_steps_per_second": 2.603, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 1.2500300007200174e-05, |
|
"loss": 2.0435, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.5636397510428476, |
|
"eval_loss": 2.434795618057251, |
|
"eval_runtime": 238.788, |
|
"eval_samples_per_second": 20.939, |
|
"eval_steps_per_second": 2.617, |
|
"step": 187500 |
|
} |
|
], |
|
"max_steps": 250000, |
|
"num_train_epochs": 100, |
|
"total_flos": 3.9409573713073645e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|