|
{ |
|
"best_metric": 0.2031262218952179, |
|
"best_model_checkpoint": "final_roberta_with_new_400k_plus_37k/checkpoint-5300", |
|
"epoch": 0.37867962274935696, |
|
"eval_steps": 100, |
|
"global_step": 5300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 259361.75, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.1264, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 272740.9375, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.1058, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 244078.703125, |
|
"learning_rate": 3e-06, |
|
"loss": 1.0828, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 234958.875, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.0388, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 270513.0625, |
|
"learning_rate": 5e-06, |
|
"loss": 0.985, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 186214.65625, |
|
"learning_rate": 6e-06, |
|
"loss": 0.8671, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 174922.8125, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 0.7386, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 191114.640625, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.6231, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 195687.9375, |
|
"learning_rate": 9e-06, |
|
"loss": 0.4322, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 333151.59375, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3174, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.8927859705061778, |
|
"eval_f1": 0.8918917476204463, |
|
"eval_loss": 0.32540708780288696, |
|
"eval_precision": 0.8963585084239793, |
|
"eval_recall": 0.8927859705061778, |
|
"eval_runtime": 12.4623, |
|
"eval_samples_per_second": 201.328, |
|
"eval_steps_per_second": 3.21, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 581737.1875, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.3052, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 789745.375, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.3366, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 282232.71875, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.3257, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 337977.96875, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.2982, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 622948.125, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.3382, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 438774.15625, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.2975, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 715256.4375, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.3695, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 294961.75, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.3001, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 526643.5, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.2853, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 432135.15625, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3285, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.8955759266640095, |
|
"eval_f1": 0.8950953630781538, |
|
"eval_loss": 0.2577860653400421, |
|
"eval_precision": 0.8967688785864537, |
|
"eval_recall": 0.8955759266640095, |
|
"eval_runtime": 12.4549, |
|
"eval_samples_per_second": 201.447, |
|
"eval_steps_per_second": 3.212, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 344853.5625, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.3057, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 325491.0, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.2563, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 788922.3125, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.3054, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 589439.25, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.3409, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 213858.8125, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2863, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 457191.5, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.266, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 456034.78125, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.2825, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 460380.375, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.2809, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 318752.53125, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.2558, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 487526.53125, |
|
"learning_rate": 3e-05, |
|
"loss": 0.247, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.8620964527700279, |
|
"eval_f1": 0.8588044269388889, |
|
"eval_loss": 0.39129751920700073, |
|
"eval_precision": 0.8782950809046319, |
|
"eval_recall": 0.8620964527700279, |
|
"eval_runtime": 12.4793, |
|
"eval_samples_per_second": 201.052, |
|
"eval_steps_per_second": 3.205, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 810950.4375, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.3178, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 197358.0625, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.2416, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 660009.25, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.1957, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 782952.625, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.3032, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 910589.1875, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2858, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 217997.765625, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.2892, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 353057.21875, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.2023, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 473318.84375, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.2521, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 176609.578125, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.2648, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 272719.65625, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2853, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.8736548425667596, |
|
"eval_f1": 0.8710864907473246, |
|
"eval_loss": 0.3394368290901184, |
|
"eval_precision": 0.8871063648493269, |
|
"eval_recall": 0.8736548425667596, |
|
"eval_runtime": 12.4669, |
|
"eval_samples_per_second": 201.253, |
|
"eval_steps_per_second": 3.208, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 398616.40625, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.2679, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 185647.96875, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.2532, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 436418.59375, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.2724, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 299492.25, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.2548, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 482227.65625, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.2769, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 246368.28125, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.2869, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 391130.0625, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.3358, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 284843.15625, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.2601, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 512920.8125, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.3797, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 320267.75, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3031, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.8537265842965325, |
|
"eval_f1": 0.8491421277003748, |
|
"eval_loss": 0.3923502266407013, |
|
"eval_precision": 0.8770289219330052, |
|
"eval_recall": 0.8537265842965325, |
|
"eval_runtime": 12.4502, |
|
"eval_samples_per_second": 201.524, |
|
"eval_steps_per_second": 3.213, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 944106.25, |
|
"learning_rate": 4.9987948322406484e-05, |
|
"loss": 0.3445, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 650689.8125, |
|
"learning_rate": 4.997589664481296e-05, |
|
"loss": 0.2683, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 404230.5, |
|
"learning_rate": 4.996384496721944e-05, |
|
"loss": 0.2732, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 253872.78125, |
|
"learning_rate": 4.995179328962592e-05, |
|
"loss": 0.2637, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 173572.625, |
|
"learning_rate": 4.9939741612032395e-05, |
|
"loss": 0.2878, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 234455.234375, |
|
"learning_rate": 4.9927689934438876e-05, |
|
"loss": 0.2105, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 238566.0, |
|
"learning_rate": 4.991563825684535e-05, |
|
"loss": 0.3066, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 476733.5, |
|
"learning_rate": 4.990358657925183e-05, |
|
"loss": 0.2801, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 279763.1875, |
|
"learning_rate": 4.9891534901658313e-05, |
|
"loss": 0.241, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 332317.40625, |
|
"learning_rate": 4.9879483224064795e-05, |
|
"loss": 0.2747, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.9079314467915505, |
|
"eval_f1": 0.9079421645959964, |
|
"eval_loss": 0.2531713545322418, |
|
"eval_precision": 0.9079554973313584, |
|
"eval_recall": 0.9079314467915505, |
|
"eval_runtime": 12.4925, |
|
"eval_samples_per_second": 200.84, |
|
"eval_steps_per_second": 3.202, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 156262.09375, |
|
"learning_rate": 4.986743154647127e-05, |
|
"loss": 0.2498, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 731199.375, |
|
"learning_rate": 4.985537986887775e-05, |
|
"loss": 0.2715, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 360661.875, |
|
"learning_rate": 4.9843328191284225e-05, |
|
"loss": 0.2522, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 338785.125, |
|
"learning_rate": 4.9831276513690706e-05, |
|
"loss": 0.2912, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 376656.71875, |
|
"learning_rate": 4.981922483609719e-05, |
|
"loss": 0.2842, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 173638.25, |
|
"learning_rate": 4.980717315850367e-05, |
|
"loss": 0.2145, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 799034.5, |
|
"learning_rate": 4.979512148091014e-05, |
|
"loss": 0.2167, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 274675.84375, |
|
"learning_rate": 4.9783069803316624e-05, |
|
"loss": 0.2779, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 194338.96875, |
|
"learning_rate": 4.97710181257231e-05, |
|
"loss": 0.269, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 284438.125, |
|
"learning_rate": 4.975896644812959e-05, |
|
"loss": 0.2797, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.863690713431646, |
|
"eval_f1": 0.8606919701702621, |
|
"eval_loss": 0.36067071557044983, |
|
"eval_precision": 0.8781306500206725, |
|
"eval_recall": 0.863690713431646, |
|
"eval_runtime": 12.4463, |
|
"eval_samples_per_second": 201.586, |
|
"eval_steps_per_second": 3.214, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 737474.625, |
|
"learning_rate": 4.974691477053606e-05, |
|
"loss": 0.3834, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 414523.40625, |
|
"learning_rate": 4.973486309294254e-05, |
|
"loss": 0.3192, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 240870.953125, |
|
"learning_rate": 4.972281141534902e-05, |
|
"loss": 0.2479, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 280922.09375, |
|
"learning_rate": 4.97107597377555e-05, |
|
"loss": 0.2549, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 258415.796875, |
|
"learning_rate": 4.969870806016197e-05, |
|
"loss": 0.294, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 409388.15625, |
|
"learning_rate": 4.968665638256846e-05, |
|
"loss": 0.2806, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 399257.46875, |
|
"learning_rate": 4.9674604704974935e-05, |
|
"loss": 0.2841, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 363038.375, |
|
"learning_rate": 4.966255302738142e-05, |
|
"loss": 0.3085, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 351745.78125, |
|
"learning_rate": 4.965050134978789e-05, |
|
"loss": 0.2652, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 297461.6875, |
|
"learning_rate": 4.963844967219437e-05, |
|
"loss": 0.2211, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.8880031885213232, |
|
"eval_f1": 0.8871620549900273, |
|
"eval_loss": 0.2910105884075165, |
|
"eval_precision": 0.8908776073001764, |
|
"eval_recall": 0.8880031885213232, |
|
"eval_runtime": 12.4814, |
|
"eval_samples_per_second": 201.018, |
|
"eval_steps_per_second": 3.205, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 287442.3125, |
|
"learning_rate": 4.962639799460085e-05, |
|
"loss": 0.2431, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 277648.125, |
|
"learning_rate": 4.9614346317007335e-05, |
|
"loss": 0.3058, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 309109.34375, |
|
"learning_rate": 4.960229463941381e-05, |
|
"loss": 0.1934, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 487191.03125, |
|
"learning_rate": 4.959024296182029e-05, |
|
"loss": 0.276, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 326520.6875, |
|
"learning_rate": 4.9578191284226765e-05, |
|
"loss": 0.2385, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 396849.90625, |
|
"learning_rate": 4.9566139606633246e-05, |
|
"loss": 0.2689, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 405153.84375, |
|
"learning_rate": 4.955408792903972e-05, |
|
"loss": 0.2374, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 228344.0, |
|
"learning_rate": 4.95420362514462e-05, |
|
"loss": 0.2317, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 232430.0625, |
|
"learning_rate": 4.9529984573852683e-05, |
|
"loss": 0.2397, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 343631.5, |
|
"learning_rate": 4.9517932896259165e-05, |
|
"loss": 0.2769, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.8824232762056596, |
|
"eval_f1": 0.8810340691374341, |
|
"eval_loss": 0.2833768129348755, |
|
"eval_precision": 0.8884468905314342, |
|
"eval_recall": 0.8824232762056596, |
|
"eval_runtime": 12.4726, |
|
"eval_samples_per_second": 201.162, |
|
"eval_steps_per_second": 3.207, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 313915.84375, |
|
"learning_rate": 4.950588121866564e-05, |
|
"loss": 0.2364, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 216686.984375, |
|
"learning_rate": 4.949382954107212e-05, |
|
"loss": 0.1952, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 254563.671875, |
|
"learning_rate": 4.94817778634786e-05, |
|
"loss": 0.3364, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 446411.09375, |
|
"learning_rate": 4.9469726185885076e-05, |
|
"loss": 0.208, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 236561.890625, |
|
"learning_rate": 4.945767450829156e-05, |
|
"loss": 0.2634, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 255751.90625, |
|
"learning_rate": 4.944562283069803e-05, |
|
"loss": 0.2675, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 314748.65625, |
|
"learning_rate": 4.943357115310451e-05, |
|
"loss": 0.2196, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 439258.875, |
|
"learning_rate": 4.9421519475510994e-05, |
|
"loss": 0.1836, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 437849.875, |
|
"learning_rate": 4.9409467797917476e-05, |
|
"loss": 0.2723, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 232659.609375, |
|
"learning_rate": 4.939741612032395e-05, |
|
"loss": 0.2412, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.9063371861299322, |
|
"eval_f1": 0.9060552663380613, |
|
"eval_loss": 0.23936249315738678, |
|
"eval_precision": 0.9068644806871264, |
|
"eval_recall": 0.9063371861299322, |
|
"eval_runtime": 12.4655, |
|
"eval_samples_per_second": 201.276, |
|
"eval_steps_per_second": 3.209, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 230467.96875, |
|
"learning_rate": 4.938536444273043e-05, |
|
"loss": 0.2282, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 471223.4375, |
|
"learning_rate": 4.9373312765136906e-05, |
|
"loss": 0.2779, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 358035.625, |
|
"learning_rate": 4.936126108754339e-05, |
|
"loss": 0.3137, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 254541.125, |
|
"learning_rate": 4.934920940994986e-05, |
|
"loss": 0.2323, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 736008.625, |
|
"learning_rate": 4.933715773235635e-05, |
|
"loss": 0.281, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 191029.328125, |
|
"learning_rate": 4.9325106054762824e-05, |
|
"loss": 0.2438, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 392851.15625, |
|
"learning_rate": 4.9313054377169305e-05, |
|
"loss": 0.3204, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 252810.3125, |
|
"learning_rate": 4.930100269957578e-05, |
|
"loss": 0.2651, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 406698.71875, |
|
"learning_rate": 4.928895102198226e-05, |
|
"loss": 0.279, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 383913.09375, |
|
"learning_rate": 4.927689934438874e-05, |
|
"loss": 0.3386, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.9015544041450777, |
|
"eval_f1": 0.9012812455239371, |
|
"eval_loss": 0.2400408387184143, |
|
"eval_precision": 0.9019548153454997, |
|
"eval_recall": 0.9015544041450777, |
|
"eval_runtime": 12.4676, |
|
"eval_samples_per_second": 201.242, |
|
"eval_steps_per_second": 3.208, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 216896.21875, |
|
"learning_rate": 4.9264847666795224e-05, |
|
"loss": 0.2311, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 290117.3125, |
|
"learning_rate": 4.92527959892017e-05, |
|
"loss": 0.277, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 219654.265625, |
|
"learning_rate": 4.924074431160818e-05, |
|
"loss": 0.2339, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 342770.09375, |
|
"learning_rate": 4.9228692634014654e-05, |
|
"loss": 0.3051, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 246765.90625, |
|
"learning_rate": 4.9216640956421135e-05, |
|
"loss": 0.2695, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 208931.578125, |
|
"learning_rate": 4.9204589278827617e-05, |
|
"loss": 0.2747, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 342173.96875, |
|
"learning_rate": 4.91925376012341e-05, |
|
"loss": 0.2172, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 255617.609375, |
|
"learning_rate": 4.918048592364057e-05, |
|
"loss": 0.2835, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 149436.703125, |
|
"learning_rate": 4.9168434246047054e-05, |
|
"loss": 0.2432, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 225822.0625, |
|
"learning_rate": 4.915638256845353e-05, |
|
"loss": 0.2743, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.904742925468314, |
|
"eval_f1": 0.9047753527069451, |
|
"eval_loss": 0.24210092425346375, |
|
"eval_precision": 0.9048312118166199, |
|
"eval_recall": 0.904742925468314, |
|
"eval_runtime": 12.4909, |
|
"eval_samples_per_second": 200.866, |
|
"eval_steps_per_second": 3.202, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 156733.046875, |
|
"learning_rate": 4.914433089086001e-05, |
|
"loss": 0.2321, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 139717.796875, |
|
"learning_rate": 4.913227921326649e-05, |
|
"loss": 0.1887, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 534506.8125, |
|
"learning_rate": 4.912022753567297e-05, |
|
"loss": 0.2929, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 190213.25, |
|
"learning_rate": 4.9108175858079446e-05, |
|
"loss": 0.2494, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 462159.28125, |
|
"learning_rate": 4.909612418048593e-05, |
|
"loss": 0.3134, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 290829.84375, |
|
"learning_rate": 4.90840725028924e-05, |
|
"loss": 0.2327, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 115473.984375, |
|
"learning_rate": 4.907202082529889e-05, |
|
"loss": 0.2199, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 172480.3125, |
|
"learning_rate": 4.9059969147705365e-05, |
|
"loss": 0.2344, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 271795.9375, |
|
"learning_rate": 4.9047917470111846e-05, |
|
"loss": 0.2757, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 134259.4375, |
|
"learning_rate": 4.903586579251832e-05, |
|
"loss": 0.2682, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.8768433638899961, |
|
"eval_f1": 0.875214624309524, |
|
"eval_loss": 0.2833414375782013, |
|
"eval_precision": 0.8838506474460517, |
|
"eval_recall": 0.8768433638899961, |
|
"eval_runtime": 12.4785, |
|
"eval_samples_per_second": 201.066, |
|
"eval_steps_per_second": 3.206, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 162955.359375, |
|
"learning_rate": 4.90238141149248e-05, |
|
"loss": 0.2077, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 345381.34375, |
|
"learning_rate": 4.9011762437331276e-05, |
|
"loss": 0.2563, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 353178.6875, |
|
"learning_rate": 4.899971075973776e-05, |
|
"loss": 0.2536, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 341959.53125, |
|
"learning_rate": 4.898765908214424e-05, |
|
"loss": 0.2174, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 366022.53125, |
|
"learning_rate": 4.897560740455071e-05, |
|
"loss": 0.3057, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 393534.71875, |
|
"learning_rate": 4.8963555726957194e-05, |
|
"loss": 0.2376, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 274654.625, |
|
"learning_rate": 4.8951504049363676e-05, |
|
"loss": 0.2659, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 138208.84375, |
|
"learning_rate": 4.893945237177015e-05, |
|
"loss": 0.1862, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 467137.15625, |
|
"learning_rate": 4.892740069417663e-05, |
|
"loss": 0.2283, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 317242.65625, |
|
"learning_rate": 4.891534901658311e-05, |
|
"loss": 0.3219, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.9071343164607414, |
|
"eval_f1": 0.9070423350315097, |
|
"eval_loss": 0.23825575411319733, |
|
"eval_precision": 0.9071045116108353, |
|
"eval_recall": 0.9071343164607414, |
|
"eval_runtime": 12.4977, |
|
"eval_samples_per_second": 200.757, |
|
"eval_steps_per_second": 3.201, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 181615.84375, |
|
"learning_rate": 4.890329733898959e-05, |
|
"loss": 0.2165, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 161155.140625, |
|
"learning_rate": 4.889124566139607e-05, |
|
"loss": 0.2607, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 398813.90625, |
|
"learning_rate": 4.887919398380254e-05, |
|
"loss": 0.2696, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 315529.625, |
|
"learning_rate": 4.8867142306209024e-05, |
|
"loss": 0.2688, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 518022.09375, |
|
"learning_rate": 4.8855090628615505e-05, |
|
"loss": 0.3062, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 271555.0, |
|
"learning_rate": 4.8843038951021987e-05, |
|
"loss": 0.2141, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 287849.21875, |
|
"learning_rate": 4.883098727342846e-05, |
|
"loss": 0.2469, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 269480.84375, |
|
"learning_rate": 4.881893559583494e-05, |
|
"loss": 0.2037, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 225872.734375, |
|
"learning_rate": 4.880688391824142e-05, |
|
"loss": 0.2867, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 291168.03125, |
|
"learning_rate": 4.8794832240647905e-05, |
|
"loss": 0.2211, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.904742925468314, |
|
"eval_f1": 0.9047391668676202, |
|
"eval_loss": 0.24535924196243286, |
|
"eval_precision": 0.9047356979299059, |
|
"eval_recall": 0.904742925468314, |
|
"eval_runtime": 12.5209, |
|
"eval_samples_per_second": 200.385, |
|
"eval_steps_per_second": 3.195, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 305500.625, |
|
"learning_rate": 4.878278056305438e-05, |
|
"loss": 0.2604, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 322610.6875, |
|
"learning_rate": 4.877072888546086e-05, |
|
"loss": 0.2416, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 244146.640625, |
|
"learning_rate": 4.8758677207867335e-05, |
|
"loss": 0.2461, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 319704.53125, |
|
"learning_rate": 4.8746625530273816e-05, |
|
"loss": 0.2368, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 292252.0, |
|
"learning_rate": 4.873457385268029e-05, |
|
"loss": 0.2351, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 134507.875, |
|
"learning_rate": 4.872252217508678e-05, |
|
"loss": 0.2423, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 228724.5625, |
|
"learning_rate": 4.871047049749325e-05, |
|
"loss": 0.1909, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 315720.09375, |
|
"learning_rate": 4.8698418819899735e-05, |
|
"loss": 0.2611, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 232667.03125, |
|
"learning_rate": 4.868636714230621e-05, |
|
"loss": 0.1903, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 263891.90625, |
|
"learning_rate": 4.867431546471269e-05, |
|
"loss": 0.2606, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.9222797927461139, |
|
"eval_f1": 0.9220731260773486, |
|
"eval_loss": 0.20830760896205902, |
|
"eval_precision": 0.9227685265016082, |
|
"eval_recall": 0.9222797927461139, |
|
"eval_runtime": 16.2543, |
|
"eval_samples_per_second": 154.359, |
|
"eval_steps_per_second": 2.461, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 221386.890625, |
|
"learning_rate": 4.8662263787119165e-05, |
|
"loss": 0.2349, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 363135.0, |
|
"learning_rate": 4.865021210952565e-05, |
|
"loss": 0.25, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 264439.53125, |
|
"learning_rate": 4.863816043193213e-05, |
|
"loss": 0.1789, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 325613.53125, |
|
"learning_rate": 4.862610875433861e-05, |
|
"loss": 0.2143, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 220411.890625, |
|
"learning_rate": 4.861405707674508e-05, |
|
"loss": 0.2629, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 259412.40625, |
|
"learning_rate": 4.8602005399151564e-05, |
|
"loss": 0.2525, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 614391.375, |
|
"learning_rate": 4.8589953721558046e-05, |
|
"loss": 0.2439, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 275747.875, |
|
"learning_rate": 4.857790204396453e-05, |
|
"loss": 0.2651, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 212869.203125, |
|
"learning_rate": 4.8565850366371e-05, |
|
"loss": 0.1677, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 259202.96875, |
|
"learning_rate": 4.855379868877748e-05, |
|
"loss": 0.1966, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.9003587086488641, |
|
"eval_f1": 0.9000880085084791, |
|
"eval_loss": 0.2688085734844208, |
|
"eval_precision": 0.9007296682986318, |
|
"eval_recall": 0.9003587086488641, |
|
"eval_runtime": 12.4695, |
|
"eval_samples_per_second": 201.21, |
|
"eval_steps_per_second": 3.208, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 164794.625, |
|
"learning_rate": 4.854174701118396e-05, |
|
"loss": 0.3116, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1090182.375, |
|
"learning_rate": 4.852969533359044e-05, |
|
"loss": 0.2294, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 393550.84375, |
|
"learning_rate": 4.851764365599692e-05, |
|
"loss": 0.2659, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 227773.296875, |
|
"learning_rate": 4.8505591978403394e-05, |
|
"loss": 0.2151, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 259306.171875, |
|
"learning_rate": 4.8493540300809875e-05, |
|
"loss": 0.2661, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 237662.640625, |
|
"learning_rate": 4.8481488623216357e-05, |
|
"loss": 0.2268, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 385510.71875, |
|
"learning_rate": 4.846943694562283e-05, |
|
"loss": 0.2745, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 163655.578125, |
|
"learning_rate": 4.845738526802931e-05, |
|
"loss": 0.3146, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 243399.0, |
|
"learning_rate": 4.8445333590435794e-05, |
|
"loss": 0.2467, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 327880.625, |
|
"learning_rate": 4.843328191284227e-05, |
|
"loss": 0.2205, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.8776404942208051, |
|
"eval_f1": 0.875183931389359, |
|
"eval_loss": 0.30761781334877014, |
|
"eval_precision": 0.8910948763461308, |
|
"eval_recall": 0.8776404942208051, |
|
"eval_runtime": 12.4538, |
|
"eval_samples_per_second": 201.465, |
|
"eval_steps_per_second": 3.212, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 256714.03125, |
|
"learning_rate": 4.842123023524875e-05, |
|
"loss": 0.2844, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 262816.8125, |
|
"learning_rate": 4.8409178557655224e-05, |
|
"loss": 0.2276, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 316480.125, |
|
"learning_rate": 4.8397126880061705e-05, |
|
"loss": 0.2421, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 225589.65625, |
|
"learning_rate": 4.8385075202468186e-05, |
|
"loss": 0.3464, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 185817.125, |
|
"learning_rate": 4.837302352487467e-05, |
|
"loss": 0.2356, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 88735.1875, |
|
"learning_rate": 4.836097184728114e-05, |
|
"loss": 0.182, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 794250.3125, |
|
"learning_rate": 4.834892016968762e-05, |
|
"loss": 0.2339, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 560309.375, |
|
"learning_rate": 4.83368684920941e-05, |
|
"loss": 0.2894, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 272938.0625, |
|
"learning_rate": 4.832481681450058e-05, |
|
"loss": 0.329, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 211817.265625, |
|
"learning_rate": 4.831276513690706e-05, |
|
"loss": 0.2242, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.9151056197688322, |
|
"eval_f1": 0.9149543121007149, |
|
"eval_loss": 0.2171379029750824, |
|
"eval_precision": 0.9152515101201911, |
|
"eval_recall": 0.9151056197688322, |
|
"eval_runtime": 12.4488, |
|
"eval_samples_per_second": 201.545, |
|
"eval_steps_per_second": 3.213, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 143415.46875, |
|
"learning_rate": 4.830071345931354e-05, |
|
"loss": 0.1574, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 282922.125, |
|
"learning_rate": 4.8288661781720016e-05, |
|
"loss": 0.2423, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 146414.75, |
|
"learning_rate": 4.82766101041265e-05, |
|
"loss": 0.2286, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 376618.875, |
|
"learning_rate": 4.826455842653297e-05, |
|
"loss": 0.2082, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 347305.625, |
|
"learning_rate": 4.825250674893945e-05, |
|
"loss": 0.2707, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 120735.5, |
|
"learning_rate": 4.8240455071345934e-05, |
|
"loss": 0.27, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 327705.75, |
|
"learning_rate": 4.8228403393752416e-05, |
|
"loss": 0.2446, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 204558.703125, |
|
"learning_rate": 4.821635171615889e-05, |
|
"loss": 0.2253, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 379880.46875, |
|
"learning_rate": 4.820430003856537e-05, |
|
"loss": 0.2475, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 275538.9375, |
|
"learning_rate": 4.8192248360971846e-05, |
|
"loss": 0.257, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.8911917098445595, |
|
"eval_f1": 0.8905276298091929, |
|
"eval_loss": 0.26427793502807617, |
|
"eval_precision": 0.893198513619984, |
|
"eval_recall": 0.8911917098445595, |
|
"eval_runtime": 12.4635, |
|
"eval_samples_per_second": 201.308, |
|
"eval_steps_per_second": 3.209, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 353166.90625, |
|
"learning_rate": 4.818019668337833e-05, |
|
"loss": 0.2724, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 226420.90625, |
|
"learning_rate": 4.816814500578481e-05, |
|
"loss": 0.2908, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 342758.125, |
|
"learning_rate": 4.815609332819129e-05, |
|
"loss": 0.2236, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 255585.25, |
|
"learning_rate": 4.8144041650597764e-05, |
|
"loss": 0.2951, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 179796.921875, |
|
"learning_rate": 4.8131989973004245e-05, |
|
"loss": 0.1814, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 214087.140625, |
|
"learning_rate": 4.811993829541072e-05, |
|
"loss": 0.3827, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 250333.71875, |
|
"learning_rate": 4.810788661781721e-05, |
|
"loss": 0.2592, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 334693.625, |
|
"learning_rate": 4.809583494022368e-05, |
|
"loss": 0.3138, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 204259.46875, |
|
"learning_rate": 4.8083783262630164e-05, |
|
"loss": 0.2725, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 362242.4375, |
|
"learning_rate": 4.807173158503664e-05, |
|
"loss": 0.2238, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.9131127939418094, |
|
"eval_f1": 0.9127816210997458, |
|
"eval_loss": 0.21650490164756775, |
|
"eval_precision": 0.9140733290376809, |
|
"eval_recall": 0.9131127939418094, |
|
"eval_runtime": 12.4586, |
|
"eval_samples_per_second": 201.386, |
|
"eval_steps_per_second": 3.211, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 427524.21875, |
|
"learning_rate": 4.805967990744312e-05, |
|
"loss": 0.1926, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 376668.125, |
|
"learning_rate": 4.8047628229849594e-05, |
|
"loss": 0.1828, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 181697.09375, |
|
"learning_rate": 4.803557655225608e-05, |
|
"loss": 0.2484, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 405359.8125, |
|
"learning_rate": 4.8023524874662556e-05, |
|
"loss": 0.3022, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 227001.171875, |
|
"learning_rate": 4.801147319706904e-05, |
|
"loss": 0.3152, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 291323.65625, |
|
"learning_rate": 4.799942151947551e-05, |
|
"loss": 0.2439, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 174109.375, |
|
"learning_rate": 4.7987369841881993e-05, |
|
"loss": 0.2465, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 337487.75, |
|
"learning_rate": 4.797531816428847e-05, |
|
"loss": 0.2281, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 225118.296875, |
|
"learning_rate": 4.796326648669495e-05, |
|
"loss": 0.2357, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 159729.390625, |
|
"learning_rate": 4.795121480910143e-05, |
|
"loss": 0.2313, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.899561578318055, |
|
"eval_f1": 0.8995921014681665, |
|
"eval_loss": 0.2312317192554474, |
|
"eval_precision": 0.8996410329041024, |
|
"eval_recall": 0.899561578318055, |
|
"eval_runtime": 12.481, |
|
"eval_samples_per_second": 201.026, |
|
"eval_steps_per_second": 3.205, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 215213.28125, |
|
"learning_rate": 4.7939163131507905e-05, |
|
"loss": 0.2576, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 691060.25, |
|
"learning_rate": 4.7927111453914386e-05, |
|
"loss": 0.2706, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 306584.34375, |
|
"learning_rate": 4.791505977632087e-05, |
|
"loss": 0.1879, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 60201.84375, |
|
"learning_rate": 4.790300809872735e-05, |
|
"loss": 0.2517, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 318122.8125, |
|
"learning_rate": 4.789095642113382e-05, |
|
"loss": 0.2004, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 737994.875, |
|
"learning_rate": 4.7878904743540304e-05, |
|
"loss": 0.3025, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 617771.5, |
|
"learning_rate": 4.786685306594678e-05, |
|
"loss": 0.2708, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 271784.375, |
|
"learning_rate": 4.785480138835326e-05, |
|
"loss": 0.2333, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 348172.15625, |
|
"learning_rate": 4.7842749710759735e-05, |
|
"loss": 0.2371, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 501798.375, |
|
"learning_rate": 4.783069803316622e-05, |
|
"loss": 0.1856, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.9107214029493822, |
|
"eval_f1": 0.9107676605487075, |
|
"eval_loss": 0.22687236964702606, |
|
"eval_precision": 0.9108709640812914, |
|
"eval_recall": 0.9107214029493822, |
|
"eval_runtime": 12.4871, |
|
"eval_samples_per_second": 200.927, |
|
"eval_steps_per_second": 3.203, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 294219.71875, |
|
"learning_rate": 4.78186463555727e-05, |
|
"loss": 0.2343, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 546334.75, |
|
"learning_rate": 4.780659467797918e-05, |
|
"loss": 0.2301, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 173881.875, |
|
"learning_rate": 4.779454300038565e-05, |
|
"loss": 0.2491, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 237170.28125, |
|
"learning_rate": 4.7782491322792134e-05, |
|
"loss": 0.2194, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 319085.8125, |
|
"learning_rate": 4.777043964519861e-05, |
|
"loss": 0.2308, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 365797.4375, |
|
"learning_rate": 4.77583879676051e-05, |
|
"loss": 0.2445, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 255985.921875, |
|
"learning_rate": 4.774633629001157e-05, |
|
"loss": 0.2578, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 253771.796875, |
|
"learning_rate": 4.773428461241805e-05, |
|
"loss": 0.2471, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 140455.671875, |
|
"learning_rate": 4.772223293482453e-05, |
|
"loss": 0.2496, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 356603.71875, |
|
"learning_rate": 4.771018125723101e-05, |
|
"loss": 0.2201, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.9059386209645277, |
|
"eval_f1": 0.9056498912765502, |
|
"eval_loss": 0.24249590933322906, |
|
"eval_precision": 0.9064880886538065, |
|
"eval_recall": 0.9059386209645277, |
|
"eval_runtime": 12.497, |
|
"eval_samples_per_second": 200.768, |
|
"eval_steps_per_second": 3.201, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 284860.53125, |
|
"learning_rate": 4.769812957963749e-05, |
|
"loss": 0.1525, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 250776.8125, |
|
"learning_rate": 4.768607790204397e-05, |
|
"loss": 0.2262, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 211438.5, |
|
"learning_rate": 4.7674026224450445e-05, |
|
"loss": 0.2277, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 368441.25, |
|
"learning_rate": 4.7661974546856926e-05, |
|
"loss": 0.259, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 241326.5, |
|
"learning_rate": 4.76499228692634e-05, |
|
"loss": 0.2286, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 98535.6640625, |
|
"learning_rate": 4.763787119166988e-05, |
|
"loss": 0.2078, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 254980.625, |
|
"learning_rate": 4.7625819514076363e-05, |
|
"loss": 0.2449, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 167483.0625, |
|
"learning_rate": 4.7613767836482845e-05, |
|
"loss": 0.2702, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 222062.484375, |
|
"learning_rate": 4.760171615888932e-05, |
|
"loss": 0.1956, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 405875.75, |
|
"learning_rate": 4.75896644812958e-05, |
|
"loss": 0.3332, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.9043443603029095, |
|
"eval_f1": 0.9044486833245423, |
|
"eval_loss": 0.22543533146381378, |
|
"eval_precision": 0.9048483388492391, |
|
"eval_recall": 0.9043443603029095, |
|
"eval_runtime": 12.4439, |
|
"eval_samples_per_second": 201.624, |
|
"eval_steps_per_second": 3.214, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 152190.84375, |
|
"learning_rate": 4.7577612803702275e-05, |
|
"loss": 0.2661, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 174183.640625, |
|
"learning_rate": 4.7565561126108756e-05, |
|
"loss": 0.2293, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 413301.1875, |
|
"learning_rate": 4.755350944851524e-05, |
|
"loss": 0.2136, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 536887.125, |
|
"learning_rate": 4.754145777092172e-05, |
|
"loss": 0.2134, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 276406.9375, |
|
"learning_rate": 4.752940609332819e-05, |
|
"loss": 0.2286, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 186448.703125, |
|
"learning_rate": 4.7517354415734674e-05, |
|
"loss": 0.2546, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 185627.4375, |
|
"learning_rate": 4.750530273814115e-05, |
|
"loss": 0.2528, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 368845.34375, |
|
"learning_rate": 4.749325106054763e-05, |
|
"loss": 0.2174, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 362864.34375, |
|
"learning_rate": 4.748119938295411e-05, |
|
"loss": 0.2209, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 91713.015625, |
|
"learning_rate": 4.7469147705360586e-05, |
|
"loss": 0.1843, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.8979673176564368, |
|
"eval_f1": 0.8970726226158635, |
|
"eval_loss": 0.2523791491985321, |
|
"eval_precision": 0.9019878131456466, |
|
"eval_recall": 0.8979673176564368, |
|
"eval_runtime": 12.4843, |
|
"eval_samples_per_second": 200.972, |
|
"eval_steps_per_second": 3.204, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 283047.71875, |
|
"learning_rate": 4.745709602776707e-05, |
|
"loss": 0.2767, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 256224.40625, |
|
"learning_rate": 4.744504435017355e-05, |
|
"loss": 0.2588, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 156059.90625, |
|
"learning_rate": 4.743299267258002e-05, |
|
"loss": 0.2688, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 165222.90625, |
|
"learning_rate": 4.7420940994986504e-05, |
|
"loss": 0.2281, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 84012.734375, |
|
"learning_rate": 4.7408889317392985e-05, |
|
"loss": 0.1764, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 146292.03125, |
|
"learning_rate": 4.739683763979946e-05, |
|
"loss": 0.2546, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 197499.578125, |
|
"learning_rate": 4.738478596220594e-05, |
|
"loss": 0.258, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 139515.015625, |
|
"learning_rate": 4.7372734284612416e-05, |
|
"loss": 0.2412, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 206191.359375, |
|
"learning_rate": 4.73606826070189e-05, |
|
"loss": 0.1909, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 263304.375, |
|
"learning_rate": 4.734863092942538e-05, |
|
"loss": 0.2728, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.8967716221602232, |
|
"eval_f1": 0.8957472911028488, |
|
"eval_loss": 0.23479728400707245, |
|
"eval_precision": 0.9016807725080417, |
|
"eval_recall": 0.8967716221602232, |
|
"eval_runtime": 12.4638, |
|
"eval_samples_per_second": 201.303, |
|
"eval_steps_per_second": 3.209, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 232704.671875, |
|
"learning_rate": 4.733657925183186e-05, |
|
"loss": 0.2076, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 479017.40625, |
|
"learning_rate": 4.7324527574238334e-05, |
|
"loss": 0.2137, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 279608.53125, |
|
"learning_rate": 4.7312475896644815e-05, |
|
"loss": 0.1979, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 185551.0, |
|
"learning_rate": 4.730042421905129e-05, |
|
"loss": 0.1977, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 222985.421875, |
|
"learning_rate": 4.728837254145777e-05, |
|
"loss": 0.2625, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 205608.65625, |
|
"learning_rate": 4.727632086386425e-05, |
|
"loss": 0.2073, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 251234.265625, |
|
"learning_rate": 4.7264269186270734e-05, |
|
"loss": 0.2447, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 513352.03125, |
|
"learning_rate": 4.725221750867721e-05, |
|
"loss": 0.3065, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 231737.0625, |
|
"learning_rate": 4.724016583108369e-05, |
|
"loss": 0.2311, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 309214.3125, |
|
"learning_rate": 4.7228114153490164e-05, |
|
"loss": 0.2131, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.913511359107214, |
|
"eval_f1": 0.9135765070264794, |
|
"eval_loss": 0.2209855616092682, |
|
"eval_precision": 0.9137703030306349, |
|
"eval_recall": 0.913511359107214, |
|
"eval_runtime": 12.5042, |
|
"eval_samples_per_second": 200.652, |
|
"eval_steps_per_second": 3.199, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 501373.90625, |
|
"learning_rate": 4.721606247589665e-05, |
|
"loss": 0.3289, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 121567.8046875, |
|
"learning_rate": 4.7204010798303126e-05, |
|
"loss": 0.1986, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 300041.3125, |
|
"learning_rate": 4.719195912070961e-05, |
|
"loss": 0.2294, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 458868.6875, |
|
"learning_rate": 4.717990744311608e-05, |
|
"loss": 0.2133, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 147848.984375, |
|
"learning_rate": 4.716785576552256e-05, |
|
"loss": 0.2769, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 332015.1875, |
|
"learning_rate": 4.715580408792904e-05, |
|
"loss": 0.22, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 472249.4375, |
|
"learning_rate": 4.7143752410335526e-05, |
|
"loss": 0.2495, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 967262.6875, |
|
"learning_rate": 4.7131700732742e-05, |
|
"loss": 0.2312, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 341954.1875, |
|
"learning_rate": 4.711964905514848e-05, |
|
"loss": 0.1765, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 464634.5625, |
|
"learning_rate": 4.7107597377554956e-05, |
|
"loss": 0.19, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.9123156636110004, |
|
"eval_f1": 0.9120359608178479, |
|
"eval_loss": 0.22591687738895416, |
|
"eval_precision": 0.9129663507904072, |
|
"eval_recall": 0.9123156636110004, |
|
"eval_runtime": 12.4552, |
|
"eval_samples_per_second": 201.442, |
|
"eval_steps_per_second": 3.212, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 241272.703125, |
|
"learning_rate": 4.709554569996144e-05, |
|
"loss": 0.1981, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 204981.140625, |
|
"learning_rate": 4.708349402236791e-05, |
|
"loss": 0.1911, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 327311.375, |
|
"learning_rate": 4.70714423447744e-05, |
|
"loss": 0.249, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 375379.96875, |
|
"learning_rate": 4.7059390667180874e-05, |
|
"loss": 0.1629, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 277301.8125, |
|
"learning_rate": 4.7047338989587356e-05, |
|
"loss": 0.2026, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 327376.71875, |
|
"learning_rate": 4.703528731199383e-05, |
|
"loss": 0.271, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 136987.484375, |
|
"learning_rate": 4.702323563440031e-05, |
|
"loss": 0.1453, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 353965.59375, |
|
"learning_rate": 4.701118395680679e-05, |
|
"loss": 0.2407, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 247193.09375, |
|
"learning_rate": 4.6999132279213274e-05, |
|
"loss": 0.1739, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 219308.640625, |
|
"learning_rate": 4.698708060161975e-05, |
|
"loss": 0.2099, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.9023515344758868, |
|
"eval_f1": 0.9016339685231911, |
|
"eval_loss": 0.2813716530799866, |
|
"eval_precision": 0.9054013251007967, |
|
"eval_recall": 0.9023515344758868, |
|
"eval_runtime": 12.4697, |
|
"eval_samples_per_second": 201.208, |
|
"eval_steps_per_second": 3.208, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 480314.5625, |
|
"learning_rate": 4.697502892402623e-05, |
|
"loss": 0.2492, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 327667.5, |
|
"learning_rate": 4.6962977246432704e-05, |
|
"loss": 0.2386, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 361772.9375, |
|
"learning_rate": 4.6950925568839185e-05, |
|
"loss": 0.2363, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 281198.5625, |
|
"learning_rate": 4.6938873891245667e-05, |
|
"loss": 0.2199, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 435042.5, |
|
"learning_rate": 4.692682221365214e-05, |
|
"loss": 0.2985, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 175672.875, |
|
"learning_rate": 4.691477053605862e-05, |
|
"loss": 0.2279, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 342795.84375, |
|
"learning_rate": 4.69027188584651e-05, |
|
"loss": 0.2441, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 288277.53125, |
|
"learning_rate": 4.689066718087158e-05, |
|
"loss": 0.2576, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 121460.078125, |
|
"learning_rate": 4.687861550327806e-05, |
|
"loss": 0.247, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 401488.15625, |
|
"learning_rate": 4.686656382568454e-05, |
|
"loss": 0.2209, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.9051414906337186, |
|
"eval_f1": 0.9046116025347221, |
|
"eval_loss": 0.2472696155309677, |
|
"eval_precision": 0.9070370928171988, |
|
"eval_recall": 0.9051414906337186, |
|
"eval_runtime": 12.4813, |
|
"eval_samples_per_second": 201.021, |
|
"eval_steps_per_second": 3.205, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 377680.78125, |
|
"learning_rate": 4.6854512148091015e-05, |
|
"loss": 0.288, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 233974.625, |
|
"learning_rate": 4.6842460470497496e-05, |
|
"loss": 0.2491, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 334937.875, |
|
"learning_rate": 4.683040879290397e-05, |
|
"loss": 0.2163, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 440934.28125, |
|
"learning_rate": 4.681835711531045e-05, |
|
"loss": 0.2135, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 103967.03125, |
|
"learning_rate": 4.680630543771693e-05, |
|
"loss": 0.1934, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 213988.359375, |
|
"learning_rate": 4.6794253760123415e-05, |
|
"loss": 0.2356, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 231522.921875, |
|
"learning_rate": 4.678220208252989e-05, |
|
"loss": 0.2235, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 301749.78125, |
|
"learning_rate": 4.677015040493637e-05, |
|
"loss": 0.2005, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 163900.3125, |
|
"learning_rate": 4.6758098727342845e-05, |
|
"loss": 0.1628, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 220932.828125, |
|
"learning_rate": 4.6746047049749326e-05, |
|
"loss": 0.2366, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.8991630131526505, |
|
"eval_f1": 0.8983219782452526, |
|
"eval_loss": 0.256120890378952, |
|
"eval_precision": 0.9028826072820753, |
|
"eval_recall": 0.8991630131526505, |
|
"eval_runtime": 12.4914, |
|
"eval_samples_per_second": 200.858, |
|
"eval_steps_per_second": 3.202, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 140245.703125, |
|
"learning_rate": 4.673399537215581e-05, |
|
"loss": 0.188, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 56319.390625, |
|
"learning_rate": 4.672194369456229e-05, |
|
"loss": 0.2311, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 417635.46875, |
|
"learning_rate": 4.670989201696876e-05, |
|
"loss": 0.2028, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 257533.140625, |
|
"learning_rate": 4.6697840339375244e-05, |
|
"loss": 0.2448, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 134527.328125, |
|
"learning_rate": 4.668578866178172e-05, |
|
"loss": 0.255, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 241991.234375, |
|
"learning_rate": 4.66737369841882e-05, |
|
"loss": 0.2519, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 292576.84375, |
|
"learning_rate": 4.666168530659468e-05, |
|
"loss": 0.2186, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 172253.8125, |
|
"learning_rate": 4.664963362900116e-05, |
|
"loss": 0.2119, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 162594.703125, |
|
"learning_rate": 4.663758195140764e-05, |
|
"loss": 0.2266, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 291196.75, |
|
"learning_rate": 4.662553027381412e-05, |
|
"loss": 0.3156, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.9095257074531686, |
|
"eval_f1": 0.9094360946444322, |
|
"eval_loss": 0.21921035647392273, |
|
"eval_precision": 0.9095010570473924, |
|
"eval_recall": 0.9095257074531686, |
|
"eval_runtime": 12.4898, |
|
"eval_samples_per_second": 200.883, |
|
"eval_steps_per_second": 3.203, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 359450.15625, |
|
"learning_rate": 4.661347859622059e-05, |
|
"loss": 0.1919, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 356027.1875, |
|
"learning_rate": 4.6601426918627074e-05, |
|
"loss": 0.1946, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 228787.578125, |
|
"learning_rate": 4.6589375241033555e-05, |
|
"loss": 0.2353, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 213314.375, |
|
"learning_rate": 4.6577323563440037e-05, |
|
"loss": 0.21, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 229541.46875, |
|
"learning_rate": 4.656527188584651e-05, |
|
"loss": 0.2157, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 424003.65625, |
|
"learning_rate": 4.655322020825299e-05, |
|
"loss": 0.2202, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 274433.0, |
|
"learning_rate": 4.654116853065947e-05, |
|
"loss": 0.2101, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 585877.375, |
|
"learning_rate": 4.6529116853065955e-05, |
|
"loss": 0.227, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 168976.125, |
|
"learning_rate": 4.651706517547243e-05, |
|
"loss": 0.2004, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 247893.96875, |
|
"learning_rate": 4.650501349787891e-05, |
|
"loss": 0.197, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.9063371861299322, |
|
"eval_f1": 0.9056639403550047, |
|
"eval_loss": 0.2382478266954422, |
|
"eval_precision": 0.9093486961575057, |
|
"eval_recall": 0.9063371861299322, |
|
"eval_runtime": 12.4721, |
|
"eval_samples_per_second": 201.168, |
|
"eval_steps_per_second": 3.207, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 418813.09375, |
|
"learning_rate": 4.6492961820285385e-05, |
|
"loss": 0.2256, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 273254.46875, |
|
"learning_rate": 4.6480910142691866e-05, |
|
"loss": 0.3162, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 222690.640625, |
|
"learning_rate": 4.646885846509834e-05, |
|
"loss": 0.2528, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 332726.59375, |
|
"learning_rate": 4.645680678750482e-05, |
|
"loss": 0.222, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 122768.3515625, |
|
"learning_rate": 4.64447551099113e-05, |
|
"loss": 0.2721, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 177160.875, |
|
"learning_rate": 4.643270343231778e-05, |
|
"loss": 0.2322, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 492673.46875, |
|
"learning_rate": 4.642065175472426e-05, |
|
"loss": 0.2355, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 120090.96875, |
|
"learning_rate": 4.640860007713074e-05, |
|
"loss": 0.2207, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 199731.515625, |
|
"learning_rate": 4.6396548399537215e-05, |
|
"loss": 0.1873, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 337224.9375, |
|
"learning_rate": 4.6384496721943696e-05, |
|
"loss": 0.2371, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.9139099242726185, |
|
"eval_f1": 0.9141014321392996, |
|
"eval_loss": 0.22432319819927216, |
|
"eval_precision": 0.9166452044770947, |
|
"eval_recall": 0.9139099242726185, |
|
"eval_runtime": 12.4637, |
|
"eval_samples_per_second": 201.305, |
|
"eval_steps_per_second": 3.209, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 144981.5625, |
|
"learning_rate": 4.637244504435018e-05, |
|
"loss": 0.2471, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 535936.1875, |
|
"learning_rate": 4.636039336675665e-05, |
|
"loss": 0.2198, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 262519.0, |
|
"learning_rate": 4.634834168916313e-05, |
|
"loss": 0.1173, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 408279.0625, |
|
"learning_rate": 4.633629001156961e-05, |
|
"loss": 0.2529, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 334028.15625, |
|
"learning_rate": 4.6324238333976096e-05, |
|
"loss": 0.2309, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 180496.734375, |
|
"learning_rate": 4.631218665638257e-05, |
|
"loss": 0.1904, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 247550.046875, |
|
"learning_rate": 4.630013497878905e-05, |
|
"loss": 0.2689, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 203775.171875, |
|
"learning_rate": 4.6288083301195526e-05, |
|
"loss": 0.2308, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 114762.5703125, |
|
"learning_rate": 4.627603162360201e-05, |
|
"loss": 0.1804, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 333855.78125, |
|
"learning_rate": 4.626397994600848e-05, |
|
"loss": 0.2273, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.913511359107214, |
|
"eval_f1": 0.9130592850413116, |
|
"eval_loss": 0.23622463643550873, |
|
"eval_precision": 0.9152968112748071, |
|
"eval_recall": 0.913511359107214, |
|
"eval_runtime": 12.479, |
|
"eval_samples_per_second": 201.057, |
|
"eval_steps_per_second": 3.205, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 156201.640625, |
|
"learning_rate": 4.625192826841497e-05, |
|
"loss": 0.1892, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 208585.484375, |
|
"learning_rate": 4.6239876590821444e-05, |
|
"loss": 0.2036, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 457924.40625, |
|
"learning_rate": 4.6227824913227925e-05, |
|
"loss": 0.2847, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 255040.125, |
|
"learning_rate": 4.62157732356344e-05, |
|
"loss": 0.1938, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 183455.359375, |
|
"learning_rate": 4.620372155804088e-05, |
|
"loss": 0.1977, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 344806.9375, |
|
"learning_rate": 4.6191669880447356e-05, |
|
"loss": 0.255, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 445422.90625, |
|
"learning_rate": 4.6179618202853844e-05, |
|
"loss": 0.167, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 228799.640625, |
|
"learning_rate": 4.616756652526032e-05, |
|
"loss": 0.1926, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 211847.53125, |
|
"learning_rate": 4.61555148476668e-05, |
|
"loss": 0.1977, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 337014.65625, |
|
"learning_rate": 4.6143463170073274e-05, |
|
"loss": 0.2504, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.8888003188521323, |
|
"eval_f1": 0.8873066761801871, |
|
"eval_loss": 0.2670985162258148, |
|
"eval_precision": 0.8964616545609316, |
|
"eval_recall": 0.8888003188521323, |
|
"eval_runtime": 12.4722, |
|
"eval_samples_per_second": 201.168, |
|
"eval_steps_per_second": 3.207, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 168669.875, |
|
"learning_rate": 4.6131411492479755e-05, |
|
"loss": 0.2568, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 390192.53125, |
|
"learning_rate": 4.6119359814886236e-05, |
|
"loss": 0.292, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 381233.5, |
|
"learning_rate": 4.610730813729272e-05, |
|
"loss": 0.2383, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 201262.109375, |
|
"learning_rate": 4.609525645969919e-05, |
|
"loss": 0.2706, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 159978.65625, |
|
"learning_rate": 4.6083204782105673e-05, |
|
"loss": 0.2194, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 334120.59375, |
|
"learning_rate": 4.607115310451215e-05, |
|
"loss": 0.2306, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 204698.265625, |
|
"learning_rate": 4.605910142691863e-05, |
|
"loss": 0.1931, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 354762.46875, |
|
"learning_rate": 4.604704974932511e-05, |
|
"loss": 0.2498, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 279956.8125, |
|
"learning_rate": 4.603499807173159e-05, |
|
"loss": 0.238, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 204264.0625, |
|
"learning_rate": 4.6022946394138066e-05, |
|
"loss": 0.1978, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.917098445595855, |
|
"eval_f1": 0.9169802729066538, |
|
"eval_loss": 0.20485247671604156, |
|
"eval_precision": 0.9171643432118469, |
|
"eval_recall": 0.917098445595855, |
|
"eval_runtime": 12.4749, |
|
"eval_samples_per_second": 201.125, |
|
"eval_steps_per_second": 3.206, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 224777.34375, |
|
"learning_rate": 4.601089471654455e-05, |
|
"loss": 0.1953, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 233138.46875, |
|
"learning_rate": 4.599884303895102e-05, |
|
"loss": 0.2356, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 130433.8515625, |
|
"learning_rate": 4.59867913613575e-05, |
|
"loss": 0.2269, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 208775.78125, |
|
"learning_rate": 4.5974739683763984e-05, |
|
"loss": 0.2384, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 195953.21875, |
|
"learning_rate": 4.5962688006170466e-05, |
|
"loss": 0.1926, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 169765.890625, |
|
"learning_rate": 4.595063632857694e-05, |
|
"loss": 0.2259, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 216638.4375, |
|
"learning_rate": 4.593858465098342e-05, |
|
"loss": 0.2306, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 219986.3125, |
|
"learning_rate": 4.5926532973389896e-05, |
|
"loss": 0.2169, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 525753.625, |
|
"learning_rate": 4.591448129579638e-05, |
|
"loss": 0.1942, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 207980.359375, |
|
"learning_rate": 4.590242961820286e-05, |
|
"loss": 0.2189, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.9099242726185731, |
|
"eval_f1": 0.9098638649336764, |
|
"eval_loss": 0.22675587236881256, |
|
"eval_precision": 0.9098782103745078, |
|
"eval_recall": 0.9099242726185731, |
|
"eval_runtime": 12.4751, |
|
"eval_samples_per_second": 201.121, |
|
"eval_steps_per_second": 3.206, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 167927.484375, |
|
"learning_rate": 4.589037794060933e-05, |
|
"loss": 0.2138, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 549348.0, |
|
"learning_rate": 4.5878326263015814e-05, |
|
"loss": 0.2528, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 86471.5546875, |
|
"learning_rate": 4.586627458542229e-05, |
|
"loss": 0.188, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 349328.78125, |
|
"learning_rate": 4.585422290782877e-05, |
|
"loss": 0.215, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 222607.4375, |
|
"learning_rate": 4.584217123023525e-05, |
|
"loss": 0.2179, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 304011.5, |
|
"learning_rate": 4.583011955264173e-05, |
|
"loss": 0.2179, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 150350.171875, |
|
"learning_rate": 4.581806787504821e-05, |
|
"loss": 0.2505, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 212958.984375, |
|
"learning_rate": 4.580601619745469e-05, |
|
"loss": 0.2408, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 118683.5703125, |
|
"learning_rate": 4.579396451986116e-05, |
|
"loss": 0.2103, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 160386.796875, |
|
"learning_rate": 4.5781912842267644e-05, |
|
"loss": 0.2171, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.9163013152650459, |
|
"eval_f1": 0.9161651055562922, |
|
"eval_loss": 0.213547021150589, |
|
"eval_precision": 0.9164113566074957, |
|
"eval_recall": 0.9163013152650459, |
|
"eval_runtime": 12.4758, |
|
"eval_samples_per_second": 201.11, |
|
"eval_steps_per_second": 3.206, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 284341.65625, |
|
"learning_rate": 4.5769861164674125e-05, |
|
"loss": 0.2502, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 210621.484375, |
|
"learning_rate": 4.5757809487080606e-05, |
|
"loss": 0.2148, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 437063.46875, |
|
"learning_rate": 4.574575780948708e-05, |
|
"loss": 0.2505, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 212374.59375, |
|
"learning_rate": 4.573370613189356e-05, |
|
"loss": 0.213, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 158491.328125, |
|
"learning_rate": 4.572165445430004e-05, |
|
"loss": 0.2041, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 386332.40625, |
|
"learning_rate": 4.570960277670652e-05, |
|
"loss": 0.2476, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 100670.15625, |
|
"learning_rate": 4.5697551099113e-05, |
|
"loss": 0.1967, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 486919.75, |
|
"learning_rate": 4.568549942151948e-05, |
|
"loss": 0.1781, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 261833.109375, |
|
"learning_rate": 4.5673447743925955e-05, |
|
"loss": 0.2773, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 290765.3125, |
|
"learning_rate": 4.5661396066332436e-05, |
|
"loss": 0.2325, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.8915902750099641, |
|
"eval_f1": 0.8904847371544249, |
|
"eval_loss": 0.26240846514701843, |
|
"eval_precision": 0.8965748968593881, |
|
"eval_recall": 0.8915902750099641, |
|
"eval_runtime": 12.4947, |
|
"eval_samples_per_second": 200.805, |
|
"eval_steps_per_second": 3.201, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 645535.5, |
|
"learning_rate": 4.564934438873891e-05, |
|
"loss": 0.1845, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 709512.5, |
|
"learning_rate": 4.56372927111454e-05, |
|
"loss": 0.2641, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 194677.859375, |
|
"learning_rate": 4.562524103355187e-05, |
|
"loss": 0.275, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 153537.4375, |
|
"learning_rate": 4.5613189355958354e-05, |
|
"loss": 0.2616, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 168272.890625, |
|
"learning_rate": 4.560113767836483e-05, |
|
"loss": 0.2226, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 311794.625, |
|
"learning_rate": 4.558908600077131e-05, |
|
"loss": 0.2237, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 312384.03125, |
|
"learning_rate": 4.5577034323177785e-05, |
|
"loss": 0.2552, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 177459.375, |
|
"learning_rate": 4.556498264558427e-05, |
|
"loss": 0.1759, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 281611.59375, |
|
"learning_rate": 4.555293096799075e-05, |
|
"loss": 0.2031, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 201760.8125, |
|
"learning_rate": 4.554087929039723e-05, |
|
"loss": 0.1888, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.8923874053407732, |
|
"eval_f1": 0.8911264735256401, |
|
"eval_loss": 0.2877594530582428, |
|
"eval_precision": 0.8986690061249697, |
|
"eval_recall": 0.8923874053407732, |
|
"eval_runtime": 12.4645, |
|
"eval_samples_per_second": 201.292, |
|
"eval_steps_per_second": 3.209, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 120513.65625, |
|
"learning_rate": 4.55288276128037e-05, |
|
"loss": 0.1995, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 236276.1875, |
|
"learning_rate": 4.5516775935210184e-05, |
|
"loss": 0.1812, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 315676.4375, |
|
"learning_rate": 4.550472425761666e-05, |
|
"loss": 0.2993, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 184856.0, |
|
"learning_rate": 4.549267258002315e-05, |
|
"loss": 0.2441, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 636716.1875, |
|
"learning_rate": 4.548062090242962e-05, |
|
"loss": 0.2447, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 235927.671875, |
|
"learning_rate": 4.54685692248361e-05, |
|
"loss": 0.2051, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 243305.515625, |
|
"learning_rate": 4.545651754724258e-05, |
|
"loss": 0.2596, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 370063.59375, |
|
"learning_rate": 4.544446586964906e-05, |
|
"loss": 0.2223, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 585004.625, |
|
"learning_rate": 4.543241419205554e-05, |
|
"loss": 0.2075, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 262047.875, |
|
"learning_rate": 4.5420362514462014e-05, |
|
"loss": 0.2345, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.8963730569948186, |
|
"eval_f1": 0.8953353148714563, |
|
"eval_loss": 0.24442929029464722, |
|
"eval_precision": 0.9013455886413335, |
|
"eval_recall": 0.8963730569948186, |
|
"eval_runtime": 12.4824, |
|
"eval_samples_per_second": 201.004, |
|
"eval_steps_per_second": 3.205, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 127394.015625, |
|
"learning_rate": 4.5408310836868495e-05, |
|
"loss": 0.1995, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 405064.8125, |
|
"learning_rate": 4.539625915927497e-05, |
|
"loss": 0.2691, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 632551.125, |
|
"learning_rate": 4.538420748168145e-05, |
|
"loss": 0.2758, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 103708.234375, |
|
"learning_rate": 4.537215580408793e-05, |
|
"loss": 0.2373, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 231929.875, |
|
"learning_rate": 4.5360104126494414e-05, |
|
"loss": 0.2152, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 302502.03125, |
|
"learning_rate": 4.534805244890089e-05, |
|
"loss": 0.2068, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 278650.0625, |
|
"learning_rate": 4.533600077130737e-05, |
|
"loss": 0.2494, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 224905.484375, |
|
"learning_rate": 4.5323949093713844e-05, |
|
"loss": 0.2022, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 279063.75, |
|
"learning_rate": 4.5311897416120325e-05, |
|
"loss": 0.2295, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 163691.515625, |
|
"learning_rate": 4.52998457385268e-05, |
|
"loss": 0.1688, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.908330011956955, |
|
"eval_f1": 0.9077357266944889, |
|
"eval_loss": 0.2479422241449356, |
|
"eval_precision": 0.9108744090612344, |
|
"eval_recall": 0.908330011956955, |
|
"eval_runtime": 12.5049, |
|
"eval_samples_per_second": 200.641, |
|
"eval_steps_per_second": 3.199, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 327312.96875, |
|
"learning_rate": 4.528779406093329e-05, |
|
"loss": 0.1556, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 274013.09375, |
|
"learning_rate": 4.527574238333976e-05, |
|
"loss": 0.2903, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 227320.96875, |
|
"learning_rate": 4.526369070574624e-05, |
|
"loss": 0.2432, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 101987.640625, |
|
"learning_rate": 4.525163902815272e-05, |
|
"loss": 0.2176, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 225351.109375, |
|
"learning_rate": 4.52395873505592e-05, |
|
"loss": 0.1864, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 208051.765625, |
|
"learning_rate": 4.5227535672965674e-05, |
|
"loss": 0.2659, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 342590.03125, |
|
"learning_rate": 4.521548399537216e-05, |
|
"loss": 0.2199, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 216820.671875, |
|
"learning_rate": 4.5203432317778636e-05, |
|
"loss": 0.2149, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 138170.34375, |
|
"learning_rate": 4.519138064018512e-05, |
|
"loss": 0.2442, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 247797.890625, |
|
"learning_rate": 4.517932896259159e-05, |
|
"loss": 0.2083, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.913511359107214, |
|
"eval_f1": 0.9130955830993255, |
|
"eval_loss": 0.21996097266674042, |
|
"eval_precision": 0.9150368156806639, |
|
"eval_recall": 0.913511359107214, |
|
"eval_runtime": 12.4935, |
|
"eval_samples_per_second": 200.824, |
|
"eval_steps_per_second": 3.202, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 120234.546875, |
|
"learning_rate": 4.516727728499807e-05, |
|
"loss": 0.1768, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 195769.40625, |
|
"learning_rate": 4.5155225607404554e-05, |
|
"loss": 0.2422, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 142246.734375, |
|
"learning_rate": 4.5143173929811036e-05, |
|
"loss": 0.1863, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 208686.890625, |
|
"learning_rate": 4.513112225221751e-05, |
|
"loss": 0.1961, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 344615.53125, |
|
"learning_rate": 4.511907057462399e-05, |
|
"loss": 0.2392, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 542719.9375, |
|
"learning_rate": 4.5107018897030466e-05, |
|
"loss": 0.2497, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 308383.09375, |
|
"learning_rate": 4.509496721943695e-05, |
|
"loss": 0.1903, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 246131.765625, |
|
"learning_rate": 4.508291554184343e-05, |
|
"loss": 0.2518, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 441606.1875, |
|
"learning_rate": 4.507086386424991e-05, |
|
"loss": 0.2086, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 181527.40625, |
|
"learning_rate": 4.5058812186656384e-05, |
|
"loss": 0.2475, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.9035472299721005, |
|
"eval_f1": 0.9030499023997222, |
|
"eval_loss": 0.23530976474285126, |
|
"eval_precision": 0.9051551236736851, |
|
"eval_recall": 0.9035472299721005, |
|
"eval_runtime": 12.5089, |
|
"eval_samples_per_second": 200.577, |
|
"eval_steps_per_second": 3.198, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 202878.53125, |
|
"learning_rate": 4.5046760509062865e-05, |
|
"loss": 0.2055, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 233816.40625, |
|
"learning_rate": 4.503470883146934e-05, |
|
"loss": 0.1779, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 177604.296875, |
|
"learning_rate": 4.502265715387582e-05, |
|
"loss": 0.2305, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 364165.34375, |
|
"learning_rate": 4.50106054762823e-05, |
|
"loss": 0.2123, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 155471.65625, |
|
"learning_rate": 4.4998553798688784e-05, |
|
"loss": 0.1939, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 337500.03125, |
|
"learning_rate": 4.498650212109526e-05, |
|
"loss": 0.2682, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 128476.1171875, |
|
"learning_rate": 4.497445044350174e-05, |
|
"loss": 0.1988, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 275538.78125, |
|
"learning_rate": 4.4962398765908214e-05, |
|
"loss": 0.2333, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 249258.46875, |
|
"learning_rate": 4.4950347088314695e-05, |
|
"loss": 0.1944, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 266623.78125, |
|
"learning_rate": 4.4938295410721176e-05, |
|
"loss": 0.1928, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.894380231167796, |
|
"eval_f1": 0.8933321704334167, |
|
"eval_loss": 0.2986622750759125, |
|
"eval_precision": 0.8992220854769614, |
|
"eval_recall": 0.894380231167796, |
|
"eval_runtime": 12.4701, |
|
"eval_samples_per_second": 201.202, |
|
"eval_steps_per_second": 3.208, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 536207.0, |
|
"learning_rate": 4.492624373312766e-05, |
|
"loss": 0.2723, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 152929.5, |
|
"learning_rate": 4.491419205553413e-05, |
|
"loss": 0.204, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 249052.25, |
|
"learning_rate": 4.490214037794061e-05, |
|
"loss": 0.2062, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 420176.1875, |
|
"learning_rate": 4.489008870034709e-05, |
|
"loss": 0.243, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 225057.75, |
|
"learning_rate": 4.487803702275357e-05, |
|
"loss": 0.2047, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 322600.46875, |
|
"learning_rate": 4.486598534516005e-05, |
|
"loss": 0.2563, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 215948.890625, |
|
"learning_rate": 4.4853933667566525e-05, |
|
"loss": 0.2187, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 304164.59375, |
|
"learning_rate": 4.4841881989973006e-05, |
|
"loss": 0.1994, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 435468.0625, |
|
"learning_rate": 4.482983031237948e-05, |
|
"loss": 0.2137, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 185575.71875, |
|
"learning_rate": 4.481777863478596e-05, |
|
"loss": 0.2008, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.876046233559187, |
|
"eval_f1": 0.8735264940770283, |
|
"eval_loss": 0.299306720495224, |
|
"eval_precision": 0.8896518291860559, |
|
"eval_recall": 0.876046233559187, |
|
"eval_runtime": 12.4793, |
|
"eval_samples_per_second": 201.052, |
|
"eval_steps_per_second": 3.205, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 115670.9296875, |
|
"learning_rate": 4.480572695719244e-05, |
|
"loss": 0.1804, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 192260.875, |
|
"learning_rate": 4.4793675279598924e-05, |
|
"loss": 0.2982, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 145098.75, |
|
"learning_rate": 4.47816236020054e-05, |
|
"loss": 0.2516, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 107989.4609375, |
|
"learning_rate": 4.476957192441188e-05, |
|
"loss": 0.1921, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 378746.71875, |
|
"learning_rate": 4.4757520246818355e-05, |
|
"loss": 0.2296, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 236829.796875, |
|
"learning_rate": 4.474546856922484e-05, |
|
"loss": 0.2355, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 226571.203125, |
|
"learning_rate": 4.473341689163132e-05, |
|
"loss": 0.2038, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 206681.625, |
|
"learning_rate": 4.47213652140378e-05, |
|
"loss": 0.2086, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 347856.59375, |
|
"learning_rate": 4.470931353644427e-05, |
|
"loss": 0.2322, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 251549.421875, |
|
"learning_rate": 4.4697261858850754e-05, |
|
"loss": 0.22, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.9035472299721005, |
|
"eval_f1": 0.9032851922362077, |
|
"eval_loss": 0.24311725795269012, |
|
"eval_precision": 0.9039388604136059, |
|
"eval_recall": 0.9035472299721005, |
|
"eval_runtime": 12.509, |
|
"eval_samples_per_second": 200.576, |
|
"eval_steps_per_second": 3.198, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 365455.96875, |
|
"learning_rate": 4.468521018125723e-05, |
|
"loss": 0.1983, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 135693.59375, |
|
"learning_rate": 4.4673158503663717e-05, |
|
"loss": 0.1764, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 332106.21875, |
|
"learning_rate": 4.466110682607019e-05, |
|
"loss": 0.2139, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 162781.546875, |
|
"learning_rate": 4.464905514847667e-05, |
|
"loss": 0.2205, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 167004.28125, |
|
"learning_rate": 4.463700347088315e-05, |
|
"loss": 0.2255, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 217623.34375, |
|
"learning_rate": 4.462495179328963e-05, |
|
"loss": 0.2438, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 191213.296875, |
|
"learning_rate": 4.46129001156961e-05, |
|
"loss": 0.2283, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 244383.078125, |
|
"learning_rate": 4.460084843810259e-05, |
|
"loss": 0.2034, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 260674.09375, |
|
"learning_rate": 4.4588796760509065e-05, |
|
"loss": 0.1769, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 363888.0625, |
|
"learning_rate": 4.4576745082915546e-05, |
|
"loss": 0.1844, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.917098445595855, |
|
"eval_f1": 0.9170851985417201, |
|
"eval_loss": 0.2590126693248749, |
|
"eval_precision": 0.9170766527573011, |
|
"eval_recall": 0.917098445595855, |
|
"eval_runtime": 12.478, |
|
"eval_samples_per_second": 201.074, |
|
"eval_steps_per_second": 3.206, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 219406.3125, |
|
"learning_rate": 4.456469340532202e-05, |
|
"loss": 0.2647, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 289715.0, |
|
"learning_rate": 4.45526417277285e-05, |
|
"loss": 0.2315, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 300951.78125, |
|
"learning_rate": 4.4540590050134977e-05, |
|
"loss": 0.2044, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 228631.484375, |
|
"learning_rate": 4.4528538372541465e-05, |
|
"loss": 0.1395, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 258748.0625, |
|
"learning_rate": 4.451648669494794e-05, |
|
"loss": 0.2106, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 261900.28125, |
|
"learning_rate": 4.450443501735442e-05, |
|
"loss": 0.1891, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 567360.5, |
|
"learning_rate": 4.4492383339760895e-05, |
|
"loss": 0.1796, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 247367.140625, |
|
"learning_rate": 4.4480331662167376e-05, |
|
"loss": 0.2354, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 180764.234375, |
|
"learning_rate": 4.446827998457386e-05, |
|
"loss": 0.2308, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 143473.34375, |
|
"learning_rate": 4.445622830698034e-05, |
|
"loss": 0.2235, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.904742925468314, |
|
"eval_f1": 0.9041326842192928, |
|
"eval_loss": 0.2420862317085266, |
|
"eval_precision": 0.9071727983486801, |
|
"eval_recall": 0.904742925468314, |
|
"eval_runtime": 12.4566, |
|
"eval_samples_per_second": 201.419, |
|
"eval_steps_per_second": 3.211, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 142405.03125, |
|
"learning_rate": 4.444417662938681e-05, |
|
"loss": 0.2276, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 245076.640625, |
|
"learning_rate": 4.4432124951793294e-05, |
|
"loss": 0.2051, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 313735.53125, |
|
"learning_rate": 4.442007327419977e-05, |
|
"loss": 0.2259, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 423567.96875, |
|
"learning_rate": 4.440802159660625e-05, |
|
"loss": 0.2329, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 220593.828125, |
|
"learning_rate": 4.439596991901273e-05, |
|
"loss": 0.1839, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 300864.0, |
|
"learning_rate": 4.4383918241419206e-05, |
|
"loss": 0.2598, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 338582.8125, |
|
"learning_rate": 4.437186656382569e-05, |
|
"loss": 0.2564, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 225039.421875, |
|
"learning_rate": 4.435981488623216e-05, |
|
"loss": 0.2375, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 306696.875, |
|
"learning_rate": 4.434776320863864e-05, |
|
"loss": 0.1867, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 175457.875, |
|
"learning_rate": 4.4335711531045124e-05, |
|
"loss": 0.2222, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.8947787963332005, |
|
"eval_f1": 0.8940640494290631, |
|
"eval_loss": 0.2958182394504547, |
|
"eval_precision": 0.8972950995667646, |
|
"eval_recall": 0.8947787963332005, |
|
"eval_runtime": 12.4883, |
|
"eval_samples_per_second": 200.907, |
|
"eval_steps_per_second": 3.203, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 274502.125, |
|
"learning_rate": 4.4323659853451605e-05, |
|
"loss": 0.2213, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 143913.453125, |
|
"learning_rate": 4.431160817585808e-05, |
|
"loss": 0.2485, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 134518.640625, |
|
"learning_rate": 4.429955649826456e-05, |
|
"loss": 0.215, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 203820.625, |
|
"learning_rate": 4.4287504820671036e-05, |
|
"loss": 0.2146, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 322863.5625, |
|
"learning_rate": 4.427545314307752e-05, |
|
"loss": 0.2328, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 356002.96875, |
|
"learning_rate": 4.4263401465484e-05, |
|
"loss": 0.2083, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 703977.0625, |
|
"learning_rate": 4.425134978789048e-05, |
|
"loss": 0.1654, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 221094.734375, |
|
"learning_rate": 4.4239298110296954e-05, |
|
"loss": 0.2014, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 273416.5625, |
|
"learning_rate": 4.4227246432703435e-05, |
|
"loss": 0.2901, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 188345.46875, |
|
"learning_rate": 4.421519475510991e-05, |
|
"loss": 0.2241, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.9210840972499004, |
|
"eval_f1": 0.9208697027387154, |
|
"eval_loss": 0.2031262218952179, |
|
"eval_precision": 0.9215894170459646, |
|
"eval_recall": 0.9210840972499004, |
|
"eval_runtime": 16.2579, |
|
"eval_samples_per_second": 154.325, |
|
"eval_steps_per_second": 2.46, |
|
"step": 5300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 41988, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 4.46240356466688e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|