{ "best_metric": 0.2031262218952179, "best_model_checkpoint": "final_roberta_with_new_400k_plus_37k/checkpoint-5300", "epoch": 0.37867962274935696, "eval_steps": 100, "global_step": 5300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 259361.75, "learning_rate": 1.0000000000000002e-06, "loss": 1.1264, "step": 10 }, { "epoch": 0.0, "grad_norm": 272740.9375, "learning_rate": 2.0000000000000003e-06, "loss": 1.1058, "step": 20 }, { "epoch": 0.0, "grad_norm": 244078.703125, "learning_rate": 3e-06, "loss": 1.0828, "step": 30 }, { "epoch": 0.0, "grad_norm": 234958.875, "learning_rate": 4.000000000000001e-06, "loss": 1.0388, "step": 40 }, { "epoch": 0.0, "grad_norm": 270513.0625, "learning_rate": 5e-06, "loss": 0.985, "step": 50 }, { "epoch": 0.0, "grad_norm": 186214.65625, "learning_rate": 6e-06, "loss": 0.8671, "step": 60 }, { "epoch": 0.01, "grad_norm": 174922.8125, "learning_rate": 7.000000000000001e-06, "loss": 0.7386, "step": 70 }, { "epoch": 0.01, "grad_norm": 191114.640625, "learning_rate": 8.000000000000001e-06, "loss": 0.6231, "step": 80 }, { "epoch": 0.01, "grad_norm": 195687.9375, "learning_rate": 9e-06, "loss": 0.4322, "step": 90 }, { "epoch": 0.01, "grad_norm": 333151.59375, "learning_rate": 1e-05, "loss": 0.3174, "step": 100 }, { "epoch": 0.01, "eval_accuracy": 0.8927859705061778, "eval_f1": 0.8918917476204463, "eval_loss": 0.32540708780288696, "eval_precision": 0.8963585084239793, "eval_recall": 0.8927859705061778, "eval_runtime": 12.4623, "eval_samples_per_second": 201.328, "eval_steps_per_second": 3.21, "step": 100 }, { "epoch": 0.01, "grad_norm": 581737.1875, "learning_rate": 1.1000000000000001e-05, "loss": 0.3052, "step": 110 }, { "epoch": 0.01, "grad_norm": 789745.375, "learning_rate": 1.2e-05, "loss": 0.3366, "step": 120 }, { "epoch": 0.01, "grad_norm": 282232.71875, "learning_rate": 1.3000000000000001e-05, "loss": 0.3257, "step": 130 }, { "epoch": 0.01, "grad_norm": 337977.96875, "learning_rate": 1.4000000000000001e-05, "loss": 0.2982, "step": 140 }, { "epoch": 0.01, "grad_norm": 622948.125, "learning_rate": 1.5e-05, "loss": 0.3382, "step": 150 }, { "epoch": 0.01, "grad_norm": 438774.15625, "learning_rate": 1.6000000000000003e-05, "loss": 0.2975, "step": 160 }, { "epoch": 0.01, "grad_norm": 715256.4375, "learning_rate": 1.7000000000000003e-05, "loss": 0.3695, "step": 170 }, { "epoch": 0.01, "grad_norm": 294961.75, "learning_rate": 1.8e-05, "loss": 0.3001, "step": 180 }, { "epoch": 0.01, "grad_norm": 526643.5, "learning_rate": 1.9e-05, "loss": 0.2853, "step": 190 }, { "epoch": 0.01, "grad_norm": 432135.15625, "learning_rate": 2e-05, "loss": 0.3285, "step": 200 }, { "epoch": 0.01, "eval_accuracy": 0.8955759266640095, "eval_f1": 0.8950953630781538, "eval_loss": 0.2577860653400421, "eval_precision": 0.8967688785864537, "eval_recall": 0.8955759266640095, "eval_runtime": 12.4549, "eval_samples_per_second": 201.447, "eval_steps_per_second": 3.212, "step": 200 }, { "epoch": 0.02, "grad_norm": 344853.5625, "learning_rate": 2.1e-05, "loss": 0.3057, "step": 210 }, { "epoch": 0.02, "grad_norm": 325491.0, "learning_rate": 2.2000000000000003e-05, "loss": 0.2563, "step": 220 }, { "epoch": 0.02, "grad_norm": 788922.3125, "learning_rate": 2.3000000000000003e-05, "loss": 0.3054, "step": 230 }, { "epoch": 0.02, "grad_norm": 589439.25, "learning_rate": 2.4e-05, "loss": 0.3409, "step": 240 }, { "epoch": 0.02, "grad_norm": 213858.8125, "learning_rate": 2.5e-05, "loss": 0.2863, "step": 250 }, { "epoch": 0.02, "grad_norm": 457191.5, "learning_rate": 2.6000000000000002e-05, "loss": 0.266, "step": 260 }, { "epoch": 0.02, "grad_norm": 456034.78125, "learning_rate": 2.7000000000000002e-05, "loss": 0.2825, "step": 270 }, { "epoch": 0.02, "grad_norm": 460380.375, "learning_rate": 2.8000000000000003e-05, "loss": 0.2809, "step": 280 }, { "epoch": 0.02, "grad_norm": 318752.53125, "learning_rate": 2.9e-05, "loss": 0.2558, "step": 290 }, { "epoch": 0.02, "grad_norm": 487526.53125, "learning_rate": 3e-05, "loss": 0.247, "step": 300 }, { "epoch": 0.02, "eval_accuracy": 0.8620964527700279, "eval_f1": 0.8588044269388889, "eval_loss": 0.39129751920700073, "eval_precision": 0.8782950809046319, "eval_recall": 0.8620964527700279, "eval_runtime": 12.4793, "eval_samples_per_second": 201.052, "eval_steps_per_second": 3.205, "step": 300 }, { "epoch": 0.02, "grad_norm": 810950.4375, "learning_rate": 3.1e-05, "loss": 0.3178, "step": 310 }, { "epoch": 0.02, "grad_norm": 197358.0625, "learning_rate": 3.2000000000000005e-05, "loss": 0.2416, "step": 320 }, { "epoch": 0.02, "grad_norm": 660009.25, "learning_rate": 3.3e-05, "loss": 0.1957, "step": 330 }, { "epoch": 0.02, "grad_norm": 782952.625, "learning_rate": 3.4000000000000007e-05, "loss": 0.3032, "step": 340 }, { "epoch": 0.03, "grad_norm": 910589.1875, "learning_rate": 3.5e-05, "loss": 0.2858, "step": 350 }, { "epoch": 0.03, "grad_norm": 217997.765625, "learning_rate": 3.6e-05, "loss": 0.2892, "step": 360 }, { "epoch": 0.03, "grad_norm": 353057.21875, "learning_rate": 3.7e-05, "loss": 0.2023, "step": 370 }, { "epoch": 0.03, "grad_norm": 473318.84375, "learning_rate": 3.8e-05, "loss": 0.2521, "step": 380 }, { "epoch": 0.03, "grad_norm": 176609.578125, "learning_rate": 3.9000000000000006e-05, "loss": 0.2648, "step": 390 }, { "epoch": 0.03, "grad_norm": 272719.65625, "learning_rate": 4e-05, "loss": 0.2853, "step": 400 }, { "epoch": 0.03, "eval_accuracy": 0.8736548425667596, "eval_f1": 0.8710864907473246, "eval_loss": 0.3394368290901184, "eval_precision": 0.8871063648493269, "eval_recall": 0.8736548425667596, "eval_runtime": 12.4669, "eval_samples_per_second": 201.253, "eval_steps_per_second": 3.208, "step": 400 }, { "epoch": 0.03, "grad_norm": 398616.40625, "learning_rate": 4.1e-05, "loss": 0.2679, "step": 410 }, { "epoch": 0.03, "grad_norm": 185647.96875, "learning_rate": 4.2e-05, "loss": 0.2532, "step": 420 }, { "epoch": 0.03, "grad_norm": 436418.59375, "learning_rate": 4.3e-05, "loss": 0.2724, "step": 430 }, { "epoch": 0.03, "grad_norm": 299492.25, "learning_rate": 4.4000000000000006e-05, "loss": 0.2548, "step": 440 }, { "epoch": 0.03, "grad_norm": 482227.65625, "learning_rate": 4.5e-05, "loss": 0.2769, "step": 450 }, { "epoch": 0.03, "grad_norm": 246368.28125, "learning_rate": 4.600000000000001e-05, "loss": 0.2869, "step": 460 }, { "epoch": 0.03, "grad_norm": 391130.0625, "learning_rate": 4.7e-05, "loss": 0.3358, "step": 470 }, { "epoch": 0.03, "grad_norm": 284843.15625, "learning_rate": 4.8e-05, "loss": 0.2601, "step": 480 }, { "epoch": 0.04, "grad_norm": 512920.8125, "learning_rate": 4.9e-05, "loss": 0.3797, "step": 490 }, { "epoch": 0.04, "grad_norm": 320267.75, "learning_rate": 5e-05, "loss": 0.3031, "step": 500 }, { "epoch": 0.04, "eval_accuracy": 0.8537265842965325, "eval_f1": 0.8491421277003748, "eval_loss": 0.3923502266407013, "eval_precision": 0.8770289219330052, "eval_recall": 0.8537265842965325, "eval_runtime": 12.4502, "eval_samples_per_second": 201.524, "eval_steps_per_second": 3.213, "step": 500 }, { "epoch": 0.04, "grad_norm": 944106.25, "learning_rate": 4.9987948322406484e-05, "loss": 0.3445, "step": 510 }, { "epoch": 0.04, "grad_norm": 650689.8125, "learning_rate": 4.997589664481296e-05, "loss": 0.2683, "step": 520 }, { "epoch": 0.04, "grad_norm": 404230.5, "learning_rate": 4.996384496721944e-05, "loss": 0.2732, "step": 530 }, { "epoch": 0.04, "grad_norm": 253872.78125, "learning_rate": 4.995179328962592e-05, "loss": 0.2637, "step": 540 }, { "epoch": 0.04, "grad_norm": 173572.625, "learning_rate": 4.9939741612032395e-05, "loss": 0.2878, "step": 550 }, { "epoch": 0.04, "grad_norm": 234455.234375, "learning_rate": 4.9927689934438876e-05, "loss": 0.2105, "step": 560 }, { "epoch": 0.04, "grad_norm": 238566.0, "learning_rate": 4.991563825684535e-05, "loss": 0.3066, "step": 570 }, { "epoch": 0.04, "grad_norm": 476733.5, "learning_rate": 4.990358657925183e-05, "loss": 0.2801, "step": 580 }, { "epoch": 0.04, "grad_norm": 279763.1875, "learning_rate": 4.9891534901658313e-05, "loss": 0.241, "step": 590 }, { "epoch": 0.04, "grad_norm": 332317.40625, "learning_rate": 4.9879483224064795e-05, "loss": 0.2747, "step": 600 }, { "epoch": 0.04, "eval_accuracy": 0.9079314467915505, "eval_f1": 0.9079421645959964, "eval_loss": 0.2531713545322418, "eval_precision": 0.9079554973313584, "eval_recall": 0.9079314467915505, "eval_runtime": 12.4925, "eval_samples_per_second": 200.84, "eval_steps_per_second": 3.202, "step": 600 }, { "epoch": 0.04, "grad_norm": 156262.09375, "learning_rate": 4.986743154647127e-05, "loss": 0.2498, "step": 610 }, { "epoch": 0.04, "grad_norm": 731199.375, "learning_rate": 4.985537986887775e-05, "loss": 0.2715, "step": 620 }, { "epoch": 0.05, "grad_norm": 360661.875, "learning_rate": 4.9843328191284225e-05, "loss": 0.2522, "step": 630 }, { "epoch": 0.05, "grad_norm": 338785.125, "learning_rate": 4.9831276513690706e-05, "loss": 0.2912, "step": 640 }, { "epoch": 0.05, "grad_norm": 376656.71875, "learning_rate": 4.981922483609719e-05, "loss": 0.2842, "step": 650 }, { "epoch": 0.05, "grad_norm": 173638.25, "learning_rate": 4.980717315850367e-05, "loss": 0.2145, "step": 660 }, { "epoch": 0.05, "grad_norm": 799034.5, "learning_rate": 4.979512148091014e-05, "loss": 0.2167, "step": 670 }, { "epoch": 0.05, "grad_norm": 274675.84375, "learning_rate": 4.9783069803316624e-05, "loss": 0.2779, "step": 680 }, { "epoch": 0.05, "grad_norm": 194338.96875, "learning_rate": 4.97710181257231e-05, "loss": 0.269, "step": 690 }, { "epoch": 0.05, "grad_norm": 284438.125, "learning_rate": 4.975896644812959e-05, "loss": 0.2797, "step": 700 }, { "epoch": 0.05, "eval_accuracy": 0.863690713431646, "eval_f1": 0.8606919701702621, "eval_loss": 0.36067071557044983, "eval_precision": 0.8781306500206725, "eval_recall": 0.863690713431646, "eval_runtime": 12.4463, "eval_samples_per_second": 201.586, "eval_steps_per_second": 3.214, "step": 700 }, { "epoch": 0.05, "grad_norm": 737474.625, "learning_rate": 4.974691477053606e-05, "loss": 0.3834, "step": 710 }, { "epoch": 0.05, "grad_norm": 414523.40625, "learning_rate": 4.973486309294254e-05, "loss": 0.3192, "step": 720 }, { "epoch": 0.05, "grad_norm": 240870.953125, "learning_rate": 4.972281141534902e-05, "loss": 0.2479, "step": 730 }, { "epoch": 0.05, "grad_norm": 280922.09375, "learning_rate": 4.97107597377555e-05, "loss": 0.2549, "step": 740 }, { "epoch": 0.05, "grad_norm": 258415.796875, "learning_rate": 4.969870806016197e-05, "loss": 0.294, "step": 750 }, { "epoch": 0.05, "grad_norm": 409388.15625, "learning_rate": 4.968665638256846e-05, "loss": 0.2806, "step": 760 }, { "epoch": 0.06, "grad_norm": 399257.46875, "learning_rate": 4.9674604704974935e-05, "loss": 0.2841, "step": 770 }, { "epoch": 0.06, "grad_norm": 363038.375, "learning_rate": 4.966255302738142e-05, "loss": 0.3085, "step": 780 }, { "epoch": 0.06, "grad_norm": 351745.78125, "learning_rate": 4.965050134978789e-05, "loss": 0.2652, "step": 790 }, { "epoch": 0.06, "grad_norm": 297461.6875, "learning_rate": 4.963844967219437e-05, "loss": 0.2211, "step": 800 }, { "epoch": 0.06, "eval_accuracy": 0.8880031885213232, "eval_f1": 0.8871620549900273, "eval_loss": 0.2910105884075165, "eval_precision": 0.8908776073001764, "eval_recall": 0.8880031885213232, "eval_runtime": 12.4814, "eval_samples_per_second": 201.018, "eval_steps_per_second": 3.205, "step": 800 }, { "epoch": 0.06, "grad_norm": 287442.3125, "learning_rate": 4.962639799460085e-05, "loss": 0.2431, "step": 810 }, { "epoch": 0.06, "grad_norm": 277648.125, "learning_rate": 4.9614346317007335e-05, "loss": 0.3058, "step": 820 }, { "epoch": 0.06, "grad_norm": 309109.34375, "learning_rate": 4.960229463941381e-05, "loss": 0.1934, "step": 830 }, { "epoch": 0.06, "grad_norm": 487191.03125, "learning_rate": 4.959024296182029e-05, "loss": 0.276, "step": 840 }, { "epoch": 0.06, "grad_norm": 326520.6875, "learning_rate": 4.9578191284226765e-05, "loss": 0.2385, "step": 850 }, { "epoch": 0.06, "grad_norm": 396849.90625, "learning_rate": 4.9566139606633246e-05, "loss": 0.2689, "step": 860 }, { "epoch": 0.06, "grad_norm": 405153.84375, "learning_rate": 4.955408792903972e-05, "loss": 0.2374, "step": 870 }, { "epoch": 0.06, "grad_norm": 228344.0, "learning_rate": 4.95420362514462e-05, "loss": 0.2317, "step": 880 }, { "epoch": 0.06, "grad_norm": 232430.0625, "learning_rate": 4.9529984573852683e-05, "loss": 0.2397, "step": 890 }, { "epoch": 0.06, "grad_norm": 343631.5, "learning_rate": 4.9517932896259165e-05, "loss": 0.2769, "step": 900 }, { "epoch": 0.06, "eval_accuracy": 0.8824232762056596, "eval_f1": 0.8810340691374341, "eval_loss": 0.2833768129348755, "eval_precision": 0.8884468905314342, "eval_recall": 0.8824232762056596, "eval_runtime": 12.4726, "eval_samples_per_second": 201.162, "eval_steps_per_second": 3.207, "step": 900 }, { "epoch": 0.07, "grad_norm": 313915.84375, "learning_rate": 4.950588121866564e-05, "loss": 0.2364, "step": 910 }, { "epoch": 0.07, "grad_norm": 216686.984375, "learning_rate": 4.949382954107212e-05, "loss": 0.1952, "step": 920 }, { "epoch": 0.07, "grad_norm": 254563.671875, "learning_rate": 4.94817778634786e-05, "loss": 0.3364, "step": 930 }, { "epoch": 0.07, "grad_norm": 446411.09375, "learning_rate": 4.9469726185885076e-05, "loss": 0.208, "step": 940 }, { "epoch": 0.07, "grad_norm": 236561.890625, "learning_rate": 4.945767450829156e-05, "loss": 0.2634, "step": 950 }, { "epoch": 0.07, "grad_norm": 255751.90625, "learning_rate": 4.944562283069803e-05, "loss": 0.2675, "step": 960 }, { "epoch": 0.07, "grad_norm": 314748.65625, "learning_rate": 4.943357115310451e-05, "loss": 0.2196, "step": 970 }, { "epoch": 0.07, "grad_norm": 439258.875, "learning_rate": 4.9421519475510994e-05, "loss": 0.1836, "step": 980 }, { "epoch": 0.07, "grad_norm": 437849.875, "learning_rate": 4.9409467797917476e-05, "loss": 0.2723, "step": 990 }, { "epoch": 0.07, "grad_norm": 232659.609375, "learning_rate": 4.939741612032395e-05, "loss": 0.2412, "step": 1000 }, { "epoch": 0.07, "eval_accuracy": 0.9063371861299322, "eval_f1": 0.9060552663380613, "eval_loss": 0.23936249315738678, "eval_precision": 0.9068644806871264, "eval_recall": 0.9063371861299322, "eval_runtime": 12.4655, "eval_samples_per_second": 201.276, "eval_steps_per_second": 3.209, "step": 1000 }, { "epoch": 0.07, "grad_norm": 230467.96875, "learning_rate": 4.938536444273043e-05, "loss": 0.2282, "step": 1010 }, { "epoch": 0.07, "grad_norm": 471223.4375, "learning_rate": 4.9373312765136906e-05, "loss": 0.2779, "step": 1020 }, { "epoch": 0.07, "grad_norm": 358035.625, "learning_rate": 4.936126108754339e-05, "loss": 0.3137, "step": 1030 }, { "epoch": 0.07, "grad_norm": 254541.125, "learning_rate": 4.934920940994986e-05, "loss": 0.2323, "step": 1040 }, { "epoch": 0.08, "grad_norm": 736008.625, "learning_rate": 4.933715773235635e-05, "loss": 0.281, "step": 1050 }, { "epoch": 0.08, "grad_norm": 191029.328125, "learning_rate": 4.9325106054762824e-05, "loss": 0.2438, "step": 1060 }, { "epoch": 0.08, "grad_norm": 392851.15625, "learning_rate": 4.9313054377169305e-05, "loss": 0.3204, "step": 1070 }, { "epoch": 0.08, "grad_norm": 252810.3125, "learning_rate": 4.930100269957578e-05, "loss": 0.2651, "step": 1080 }, { "epoch": 0.08, "grad_norm": 406698.71875, "learning_rate": 4.928895102198226e-05, "loss": 0.279, "step": 1090 }, { "epoch": 0.08, "grad_norm": 383913.09375, "learning_rate": 4.927689934438874e-05, "loss": 0.3386, "step": 1100 }, { "epoch": 0.08, "eval_accuracy": 0.9015544041450777, "eval_f1": 0.9012812455239371, "eval_loss": 0.2400408387184143, "eval_precision": 0.9019548153454997, "eval_recall": 0.9015544041450777, "eval_runtime": 12.4676, "eval_samples_per_second": 201.242, "eval_steps_per_second": 3.208, "step": 1100 }, { "epoch": 0.08, "grad_norm": 216896.21875, "learning_rate": 4.9264847666795224e-05, "loss": 0.2311, "step": 1110 }, { "epoch": 0.08, "grad_norm": 290117.3125, "learning_rate": 4.92527959892017e-05, "loss": 0.277, "step": 1120 }, { "epoch": 0.08, "grad_norm": 219654.265625, "learning_rate": 4.924074431160818e-05, "loss": 0.2339, "step": 1130 }, { "epoch": 0.08, "grad_norm": 342770.09375, "learning_rate": 4.9228692634014654e-05, "loss": 0.3051, "step": 1140 }, { "epoch": 0.08, "grad_norm": 246765.90625, "learning_rate": 4.9216640956421135e-05, "loss": 0.2695, "step": 1150 }, { "epoch": 0.08, "grad_norm": 208931.578125, "learning_rate": 4.9204589278827617e-05, "loss": 0.2747, "step": 1160 }, { "epoch": 0.08, "grad_norm": 342173.96875, "learning_rate": 4.91925376012341e-05, "loss": 0.2172, "step": 1170 }, { "epoch": 0.08, "grad_norm": 255617.609375, "learning_rate": 4.918048592364057e-05, "loss": 0.2835, "step": 1180 }, { "epoch": 0.09, "grad_norm": 149436.703125, "learning_rate": 4.9168434246047054e-05, "loss": 0.2432, "step": 1190 }, { "epoch": 0.09, "grad_norm": 225822.0625, "learning_rate": 4.915638256845353e-05, "loss": 0.2743, "step": 1200 }, { "epoch": 0.09, "eval_accuracy": 0.904742925468314, "eval_f1": 0.9047753527069451, "eval_loss": 0.24210092425346375, "eval_precision": 0.9048312118166199, "eval_recall": 0.904742925468314, "eval_runtime": 12.4909, "eval_samples_per_second": 200.866, "eval_steps_per_second": 3.202, "step": 1200 }, { "epoch": 0.09, "grad_norm": 156733.046875, "learning_rate": 4.914433089086001e-05, "loss": 0.2321, "step": 1210 }, { "epoch": 0.09, "grad_norm": 139717.796875, "learning_rate": 4.913227921326649e-05, "loss": 0.1887, "step": 1220 }, { "epoch": 0.09, "grad_norm": 534506.8125, "learning_rate": 4.912022753567297e-05, "loss": 0.2929, "step": 1230 }, { "epoch": 0.09, "grad_norm": 190213.25, "learning_rate": 4.9108175858079446e-05, "loss": 0.2494, "step": 1240 }, { "epoch": 0.09, "grad_norm": 462159.28125, "learning_rate": 4.909612418048593e-05, "loss": 0.3134, "step": 1250 }, { "epoch": 0.09, "grad_norm": 290829.84375, "learning_rate": 4.90840725028924e-05, "loss": 0.2327, "step": 1260 }, { "epoch": 0.09, "grad_norm": 115473.984375, "learning_rate": 4.907202082529889e-05, "loss": 0.2199, "step": 1270 }, { "epoch": 0.09, "grad_norm": 172480.3125, "learning_rate": 4.9059969147705365e-05, "loss": 0.2344, "step": 1280 }, { "epoch": 0.09, "grad_norm": 271795.9375, "learning_rate": 4.9047917470111846e-05, "loss": 0.2757, "step": 1290 }, { "epoch": 0.09, "grad_norm": 134259.4375, "learning_rate": 4.903586579251832e-05, "loss": 0.2682, "step": 1300 }, { "epoch": 0.09, "eval_accuracy": 0.8768433638899961, "eval_f1": 0.875214624309524, "eval_loss": 0.2833414375782013, "eval_precision": 0.8838506474460517, "eval_recall": 0.8768433638899961, "eval_runtime": 12.4785, "eval_samples_per_second": 201.066, "eval_steps_per_second": 3.206, "step": 1300 }, { "epoch": 0.09, "grad_norm": 162955.359375, "learning_rate": 4.90238141149248e-05, "loss": 0.2077, "step": 1310 }, { "epoch": 0.09, "grad_norm": 345381.34375, "learning_rate": 4.9011762437331276e-05, "loss": 0.2563, "step": 1320 }, { "epoch": 0.1, "grad_norm": 353178.6875, "learning_rate": 4.899971075973776e-05, "loss": 0.2536, "step": 1330 }, { "epoch": 0.1, "grad_norm": 341959.53125, "learning_rate": 4.898765908214424e-05, "loss": 0.2174, "step": 1340 }, { "epoch": 0.1, "grad_norm": 366022.53125, "learning_rate": 4.897560740455071e-05, "loss": 0.3057, "step": 1350 }, { "epoch": 0.1, "grad_norm": 393534.71875, "learning_rate": 4.8963555726957194e-05, "loss": 0.2376, "step": 1360 }, { "epoch": 0.1, "grad_norm": 274654.625, "learning_rate": 4.8951504049363676e-05, "loss": 0.2659, "step": 1370 }, { "epoch": 0.1, "grad_norm": 138208.84375, "learning_rate": 4.893945237177015e-05, "loss": 0.1862, "step": 1380 }, { "epoch": 0.1, "grad_norm": 467137.15625, "learning_rate": 4.892740069417663e-05, "loss": 0.2283, "step": 1390 }, { "epoch": 0.1, "grad_norm": 317242.65625, "learning_rate": 4.891534901658311e-05, "loss": 0.3219, "step": 1400 }, { "epoch": 0.1, "eval_accuracy": 0.9071343164607414, "eval_f1": 0.9070423350315097, "eval_loss": 0.23825575411319733, "eval_precision": 0.9071045116108353, "eval_recall": 0.9071343164607414, "eval_runtime": 12.4977, "eval_samples_per_second": 200.757, "eval_steps_per_second": 3.201, "step": 1400 }, { "epoch": 0.1, "grad_norm": 181615.84375, "learning_rate": 4.890329733898959e-05, "loss": 0.2165, "step": 1410 }, { "epoch": 0.1, "grad_norm": 161155.140625, "learning_rate": 4.889124566139607e-05, "loss": 0.2607, "step": 1420 }, { "epoch": 0.1, "grad_norm": 398813.90625, "learning_rate": 4.887919398380254e-05, "loss": 0.2696, "step": 1430 }, { "epoch": 0.1, "grad_norm": 315529.625, "learning_rate": 4.8867142306209024e-05, "loss": 0.2688, "step": 1440 }, { "epoch": 0.1, "grad_norm": 518022.09375, "learning_rate": 4.8855090628615505e-05, "loss": 0.3062, "step": 1450 }, { "epoch": 0.1, "grad_norm": 271555.0, "learning_rate": 4.8843038951021987e-05, "loss": 0.2141, "step": 1460 }, { "epoch": 0.11, "grad_norm": 287849.21875, "learning_rate": 4.883098727342846e-05, "loss": 0.2469, "step": 1470 }, { "epoch": 0.11, "grad_norm": 269480.84375, "learning_rate": 4.881893559583494e-05, "loss": 0.2037, "step": 1480 }, { "epoch": 0.11, "grad_norm": 225872.734375, "learning_rate": 4.880688391824142e-05, "loss": 0.2867, "step": 1490 }, { "epoch": 0.11, "grad_norm": 291168.03125, "learning_rate": 4.8794832240647905e-05, "loss": 0.2211, "step": 1500 }, { "epoch": 0.11, "eval_accuracy": 0.904742925468314, "eval_f1": 0.9047391668676202, "eval_loss": 0.24535924196243286, "eval_precision": 0.9047356979299059, "eval_recall": 0.904742925468314, "eval_runtime": 12.5209, "eval_samples_per_second": 200.385, "eval_steps_per_second": 3.195, "step": 1500 }, { "epoch": 0.11, "grad_norm": 305500.625, "learning_rate": 4.878278056305438e-05, "loss": 0.2604, "step": 1510 }, { "epoch": 0.11, "grad_norm": 322610.6875, "learning_rate": 4.877072888546086e-05, "loss": 0.2416, "step": 1520 }, { "epoch": 0.11, "grad_norm": 244146.640625, "learning_rate": 4.8758677207867335e-05, "loss": 0.2461, "step": 1530 }, { "epoch": 0.11, "grad_norm": 319704.53125, "learning_rate": 4.8746625530273816e-05, "loss": 0.2368, "step": 1540 }, { "epoch": 0.11, "grad_norm": 292252.0, "learning_rate": 4.873457385268029e-05, "loss": 0.2351, "step": 1550 }, { "epoch": 0.11, "grad_norm": 134507.875, "learning_rate": 4.872252217508678e-05, "loss": 0.2423, "step": 1560 }, { "epoch": 0.11, "grad_norm": 228724.5625, "learning_rate": 4.871047049749325e-05, "loss": 0.1909, "step": 1570 }, { "epoch": 0.11, "grad_norm": 315720.09375, "learning_rate": 4.8698418819899735e-05, "loss": 0.2611, "step": 1580 }, { "epoch": 0.11, "grad_norm": 232667.03125, "learning_rate": 4.868636714230621e-05, "loss": 0.1903, "step": 1590 }, { "epoch": 0.11, "grad_norm": 263891.90625, "learning_rate": 4.867431546471269e-05, "loss": 0.2606, "step": 1600 }, { "epoch": 0.11, "eval_accuracy": 0.9222797927461139, "eval_f1": 0.9220731260773486, "eval_loss": 0.20830760896205902, "eval_precision": 0.9227685265016082, "eval_recall": 0.9222797927461139, "eval_runtime": 16.2543, "eval_samples_per_second": 154.359, "eval_steps_per_second": 2.461, "step": 1600 }, { "epoch": 0.12, "grad_norm": 221386.890625, "learning_rate": 4.8662263787119165e-05, "loss": 0.2349, "step": 1610 }, { "epoch": 0.12, "grad_norm": 363135.0, "learning_rate": 4.865021210952565e-05, "loss": 0.25, "step": 1620 }, { "epoch": 0.12, "grad_norm": 264439.53125, "learning_rate": 4.863816043193213e-05, "loss": 0.1789, "step": 1630 }, { "epoch": 0.12, "grad_norm": 325613.53125, "learning_rate": 4.862610875433861e-05, "loss": 0.2143, "step": 1640 }, { "epoch": 0.12, "grad_norm": 220411.890625, "learning_rate": 4.861405707674508e-05, "loss": 0.2629, "step": 1650 }, { "epoch": 0.12, "grad_norm": 259412.40625, "learning_rate": 4.8602005399151564e-05, "loss": 0.2525, "step": 1660 }, { "epoch": 0.12, "grad_norm": 614391.375, "learning_rate": 4.8589953721558046e-05, "loss": 0.2439, "step": 1670 }, { "epoch": 0.12, "grad_norm": 275747.875, "learning_rate": 4.857790204396453e-05, "loss": 0.2651, "step": 1680 }, { "epoch": 0.12, "grad_norm": 212869.203125, "learning_rate": 4.8565850366371e-05, "loss": 0.1677, "step": 1690 }, { "epoch": 0.12, "grad_norm": 259202.96875, "learning_rate": 4.855379868877748e-05, "loss": 0.1966, "step": 1700 }, { "epoch": 0.12, "eval_accuracy": 0.9003587086488641, "eval_f1": 0.9000880085084791, "eval_loss": 0.2688085734844208, "eval_precision": 0.9007296682986318, "eval_recall": 0.9003587086488641, "eval_runtime": 12.4695, "eval_samples_per_second": 201.21, "eval_steps_per_second": 3.208, "step": 1700 }, { "epoch": 0.12, "grad_norm": 164794.625, "learning_rate": 4.854174701118396e-05, "loss": 0.3116, "step": 1710 }, { "epoch": 0.12, "grad_norm": 1090182.375, "learning_rate": 4.852969533359044e-05, "loss": 0.2294, "step": 1720 }, { "epoch": 0.12, "grad_norm": 393550.84375, "learning_rate": 4.851764365599692e-05, "loss": 0.2659, "step": 1730 }, { "epoch": 0.12, "grad_norm": 227773.296875, "learning_rate": 4.8505591978403394e-05, "loss": 0.2151, "step": 1740 }, { "epoch": 0.13, "grad_norm": 259306.171875, "learning_rate": 4.8493540300809875e-05, "loss": 0.2661, "step": 1750 }, { "epoch": 0.13, "grad_norm": 237662.640625, "learning_rate": 4.8481488623216357e-05, "loss": 0.2268, "step": 1760 }, { "epoch": 0.13, "grad_norm": 385510.71875, "learning_rate": 4.846943694562283e-05, "loss": 0.2745, "step": 1770 }, { "epoch": 0.13, "grad_norm": 163655.578125, "learning_rate": 4.845738526802931e-05, "loss": 0.3146, "step": 1780 }, { "epoch": 0.13, "grad_norm": 243399.0, "learning_rate": 4.8445333590435794e-05, "loss": 0.2467, "step": 1790 }, { "epoch": 0.13, "grad_norm": 327880.625, "learning_rate": 4.843328191284227e-05, "loss": 0.2205, "step": 1800 }, { "epoch": 0.13, "eval_accuracy": 0.8776404942208051, "eval_f1": 0.875183931389359, "eval_loss": 0.30761781334877014, "eval_precision": 0.8910948763461308, "eval_recall": 0.8776404942208051, "eval_runtime": 12.4538, "eval_samples_per_second": 201.465, "eval_steps_per_second": 3.212, "step": 1800 }, { "epoch": 0.13, "grad_norm": 256714.03125, "learning_rate": 4.842123023524875e-05, "loss": 0.2844, "step": 1810 }, { "epoch": 0.13, "grad_norm": 262816.8125, "learning_rate": 4.8409178557655224e-05, "loss": 0.2276, "step": 1820 }, { "epoch": 0.13, "grad_norm": 316480.125, "learning_rate": 4.8397126880061705e-05, "loss": 0.2421, "step": 1830 }, { "epoch": 0.13, "grad_norm": 225589.65625, "learning_rate": 4.8385075202468186e-05, "loss": 0.3464, "step": 1840 }, { "epoch": 0.13, "grad_norm": 185817.125, "learning_rate": 4.837302352487467e-05, "loss": 0.2356, "step": 1850 }, { "epoch": 0.13, "grad_norm": 88735.1875, "learning_rate": 4.836097184728114e-05, "loss": 0.182, "step": 1860 }, { "epoch": 0.13, "grad_norm": 794250.3125, "learning_rate": 4.834892016968762e-05, "loss": 0.2339, "step": 1870 }, { "epoch": 0.13, "grad_norm": 560309.375, "learning_rate": 4.83368684920941e-05, "loss": 0.2894, "step": 1880 }, { "epoch": 0.14, "grad_norm": 272938.0625, "learning_rate": 4.832481681450058e-05, "loss": 0.329, "step": 1890 }, { "epoch": 0.14, "grad_norm": 211817.265625, "learning_rate": 4.831276513690706e-05, "loss": 0.2242, "step": 1900 }, { "epoch": 0.14, "eval_accuracy": 0.9151056197688322, "eval_f1": 0.9149543121007149, "eval_loss": 0.2171379029750824, "eval_precision": 0.9152515101201911, "eval_recall": 0.9151056197688322, "eval_runtime": 12.4488, "eval_samples_per_second": 201.545, "eval_steps_per_second": 3.213, "step": 1900 }, { "epoch": 0.14, "grad_norm": 143415.46875, "learning_rate": 4.830071345931354e-05, "loss": 0.1574, "step": 1910 }, { "epoch": 0.14, "grad_norm": 282922.125, "learning_rate": 4.8288661781720016e-05, "loss": 0.2423, "step": 1920 }, { "epoch": 0.14, "grad_norm": 146414.75, "learning_rate": 4.82766101041265e-05, "loss": 0.2286, "step": 1930 }, { "epoch": 0.14, "grad_norm": 376618.875, "learning_rate": 4.826455842653297e-05, "loss": 0.2082, "step": 1940 }, { "epoch": 0.14, "grad_norm": 347305.625, "learning_rate": 4.825250674893945e-05, "loss": 0.2707, "step": 1950 }, { "epoch": 0.14, "grad_norm": 120735.5, "learning_rate": 4.8240455071345934e-05, "loss": 0.27, "step": 1960 }, { "epoch": 0.14, "grad_norm": 327705.75, "learning_rate": 4.8228403393752416e-05, "loss": 0.2446, "step": 1970 }, { "epoch": 0.14, "grad_norm": 204558.703125, "learning_rate": 4.821635171615889e-05, "loss": 0.2253, "step": 1980 }, { "epoch": 0.14, "grad_norm": 379880.46875, "learning_rate": 4.820430003856537e-05, "loss": 0.2475, "step": 1990 }, { "epoch": 0.14, "grad_norm": 275538.9375, "learning_rate": 4.8192248360971846e-05, "loss": 0.257, "step": 2000 }, { "epoch": 0.14, "eval_accuracy": 0.8911917098445595, "eval_f1": 0.8905276298091929, "eval_loss": 0.26427793502807617, "eval_precision": 0.893198513619984, "eval_recall": 0.8911917098445595, "eval_runtime": 12.4635, "eval_samples_per_second": 201.308, "eval_steps_per_second": 3.209, "step": 2000 }, { "epoch": 0.14, "grad_norm": 353166.90625, "learning_rate": 4.818019668337833e-05, "loss": 0.2724, "step": 2010 }, { "epoch": 0.14, "grad_norm": 226420.90625, "learning_rate": 4.816814500578481e-05, "loss": 0.2908, "step": 2020 }, { "epoch": 0.15, "grad_norm": 342758.125, "learning_rate": 4.815609332819129e-05, "loss": 0.2236, "step": 2030 }, { "epoch": 0.15, "grad_norm": 255585.25, "learning_rate": 4.8144041650597764e-05, "loss": 0.2951, "step": 2040 }, { "epoch": 0.15, "grad_norm": 179796.921875, "learning_rate": 4.8131989973004245e-05, "loss": 0.1814, "step": 2050 }, { "epoch": 0.15, "grad_norm": 214087.140625, "learning_rate": 4.811993829541072e-05, "loss": 0.3827, "step": 2060 }, { "epoch": 0.15, "grad_norm": 250333.71875, "learning_rate": 4.810788661781721e-05, "loss": 0.2592, "step": 2070 }, { "epoch": 0.15, "grad_norm": 334693.625, "learning_rate": 4.809583494022368e-05, "loss": 0.3138, "step": 2080 }, { "epoch": 0.15, "grad_norm": 204259.46875, "learning_rate": 4.8083783262630164e-05, "loss": 0.2725, "step": 2090 }, { "epoch": 0.15, "grad_norm": 362242.4375, "learning_rate": 4.807173158503664e-05, "loss": 0.2238, "step": 2100 }, { "epoch": 0.15, "eval_accuracy": 0.9131127939418094, "eval_f1": 0.9127816210997458, "eval_loss": 0.21650490164756775, "eval_precision": 0.9140733290376809, "eval_recall": 0.9131127939418094, "eval_runtime": 12.4586, "eval_samples_per_second": 201.386, "eval_steps_per_second": 3.211, "step": 2100 }, { "epoch": 0.15, "grad_norm": 427524.21875, "learning_rate": 4.805967990744312e-05, "loss": 0.1926, "step": 2110 }, { "epoch": 0.15, "grad_norm": 376668.125, "learning_rate": 4.8047628229849594e-05, "loss": 0.1828, "step": 2120 }, { "epoch": 0.15, "grad_norm": 181697.09375, "learning_rate": 4.803557655225608e-05, "loss": 0.2484, "step": 2130 }, { "epoch": 0.15, "grad_norm": 405359.8125, "learning_rate": 4.8023524874662556e-05, "loss": 0.3022, "step": 2140 }, { "epoch": 0.15, "grad_norm": 227001.171875, "learning_rate": 4.801147319706904e-05, "loss": 0.3152, "step": 2150 }, { "epoch": 0.15, "grad_norm": 291323.65625, "learning_rate": 4.799942151947551e-05, "loss": 0.2439, "step": 2160 }, { "epoch": 0.16, "grad_norm": 174109.375, "learning_rate": 4.7987369841881993e-05, "loss": 0.2465, "step": 2170 }, { "epoch": 0.16, "grad_norm": 337487.75, "learning_rate": 4.797531816428847e-05, "loss": 0.2281, "step": 2180 }, { "epoch": 0.16, "grad_norm": 225118.296875, "learning_rate": 4.796326648669495e-05, "loss": 0.2357, "step": 2190 }, { "epoch": 0.16, "grad_norm": 159729.390625, "learning_rate": 4.795121480910143e-05, "loss": 0.2313, "step": 2200 }, { "epoch": 0.16, "eval_accuracy": 0.899561578318055, "eval_f1": 0.8995921014681665, "eval_loss": 0.2312317192554474, "eval_precision": 0.8996410329041024, "eval_recall": 0.899561578318055, "eval_runtime": 12.481, "eval_samples_per_second": 201.026, "eval_steps_per_second": 3.205, "step": 2200 }, { "epoch": 0.16, "grad_norm": 215213.28125, "learning_rate": 4.7939163131507905e-05, "loss": 0.2576, "step": 2210 }, { "epoch": 0.16, "grad_norm": 691060.25, "learning_rate": 4.7927111453914386e-05, "loss": 0.2706, "step": 2220 }, { "epoch": 0.16, "grad_norm": 306584.34375, "learning_rate": 4.791505977632087e-05, "loss": 0.1879, "step": 2230 }, { "epoch": 0.16, "grad_norm": 60201.84375, "learning_rate": 4.790300809872735e-05, "loss": 0.2517, "step": 2240 }, { "epoch": 0.16, "grad_norm": 318122.8125, "learning_rate": 4.789095642113382e-05, "loss": 0.2004, "step": 2250 }, { "epoch": 0.16, "grad_norm": 737994.875, "learning_rate": 4.7878904743540304e-05, "loss": 0.3025, "step": 2260 }, { "epoch": 0.16, "grad_norm": 617771.5, "learning_rate": 4.786685306594678e-05, "loss": 0.2708, "step": 2270 }, { "epoch": 0.16, "grad_norm": 271784.375, "learning_rate": 4.785480138835326e-05, "loss": 0.2333, "step": 2280 }, { "epoch": 0.16, "grad_norm": 348172.15625, "learning_rate": 4.7842749710759735e-05, "loss": 0.2371, "step": 2290 }, { "epoch": 0.16, "grad_norm": 501798.375, "learning_rate": 4.783069803316622e-05, "loss": 0.1856, "step": 2300 }, { "epoch": 0.16, "eval_accuracy": 0.9107214029493822, "eval_f1": 0.9107676605487075, "eval_loss": 0.22687236964702606, "eval_precision": 0.9108709640812914, "eval_recall": 0.9107214029493822, "eval_runtime": 12.4871, "eval_samples_per_second": 200.927, "eval_steps_per_second": 3.203, "step": 2300 }, { "epoch": 0.17, "grad_norm": 294219.71875, "learning_rate": 4.78186463555727e-05, "loss": 0.2343, "step": 2310 }, { "epoch": 0.17, "grad_norm": 546334.75, "learning_rate": 4.780659467797918e-05, "loss": 0.2301, "step": 2320 }, { "epoch": 0.17, "grad_norm": 173881.875, "learning_rate": 4.779454300038565e-05, "loss": 0.2491, "step": 2330 }, { "epoch": 0.17, "grad_norm": 237170.28125, "learning_rate": 4.7782491322792134e-05, "loss": 0.2194, "step": 2340 }, { "epoch": 0.17, "grad_norm": 319085.8125, "learning_rate": 4.777043964519861e-05, "loss": 0.2308, "step": 2350 }, { "epoch": 0.17, "grad_norm": 365797.4375, "learning_rate": 4.77583879676051e-05, "loss": 0.2445, "step": 2360 }, { "epoch": 0.17, "grad_norm": 255985.921875, "learning_rate": 4.774633629001157e-05, "loss": 0.2578, "step": 2370 }, { "epoch": 0.17, "grad_norm": 253771.796875, "learning_rate": 4.773428461241805e-05, "loss": 0.2471, "step": 2380 }, { "epoch": 0.17, "grad_norm": 140455.671875, "learning_rate": 4.772223293482453e-05, "loss": 0.2496, "step": 2390 }, { "epoch": 0.17, "grad_norm": 356603.71875, "learning_rate": 4.771018125723101e-05, "loss": 0.2201, "step": 2400 }, { "epoch": 0.17, "eval_accuracy": 0.9059386209645277, "eval_f1": 0.9056498912765502, "eval_loss": 0.24249590933322906, "eval_precision": 0.9064880886538065, "eval_recall": 0.9059386209645277, "eval_runtime": 12.497, "eval_samples_per_second": 200.768, "eval_steps_per_second": 3.201, "step": 2400 }, { "epoch": 0.17, "grad_norm": 284860.53125, "learning_rate": 4.769812957963749e-05, "loss": 0.1525, "step": 2410 }, { "epoch": 0.17, "grad_norm": 250776.8125, "learning_rate": 4.768607790204397e-05, "loss": 0.2262, "step": 2420 }, { "epoch": 0.17, "grad_norm": 211438.5, "learning_rate": 4.7674026224450445e-05, "loss": 0.2277, "step": 2430 }, { "epoch": 0.17, "grad_norm": 368441.25, "learning_rate": 4.7661974546856926e-05, "loss": 0.259, "step": 2440 }, { "epoch": 0.18, "grad_norm": 241326.5, "learning_rate": 4.76499228692634e-05, "loss": 0.2286, "step": 2450 }, { "epoch": 0.18, "grad_norm": 98535.6640625, "learning_rate": 4.763787119166988e-05, "loss": 0.2078, "step": 2460 }, { "epoch": 0.18, "grad_norm": 254980.625, "learning_rate": 4.7625819514076363e-05, "loss": 0.2449, "step": 2470 }, { "epoch": 0.18, "grad_norm": 167483.0625, "learning_rate": 4.7613767836482845e-05, "loss": 0.2702, "step": 2480 }, { "epoch": 0.18, "grad_norm": 222062.484375, "learning_rate": 4.760171615888932e-05, "loss": 0.1956, "step": 2490 }, { "epoch": 0.18, "grad_norm": 405875.75, "learning_rate": 4.75896644812958e-05, "loss": 0.3332, "step": 2500 }, { "epoch": 0.18, "eval_accuracy": 0.9043443603029095, "eval_f1": 0.9044486833245423, "eval_loss": 0.22543533146381378, "eval_precision": 0.9048483388492391, "eval_recall": 0.9043443603029095, "eval_runtime": 12.4439, "eval_samples_per_second": 201.624, "eval_steps_per_second": 3.214, "step": 2500 }, { "epoch": 0.18, "grad_norm": 152190.84375, "learning_rate": 4.7577612803702275e-05, "loss": 0.2661, "step": 2510 }, { "epoch": 0.18, "grad_norm": 174183.640625, "learning_rate": 4.7565561126108756e-05, "loss": 0.2293, "step": 2520 }, { "epoch": 0.18, "grad_norm": 413301.1875, "learning_rate": 4.755350944851524e-05, "loss": 0.2136, "step": 2530 }, { "epoch": 0.18, "grad_norm": 536887.125, "learning_rate": 4.754145777092172e-05, "loss": 0.2134, "step": 2540 }, { "epoch": 0.18, "grad_norm": 276406.9375, "learning_rate": 4.752940609332819e-05, "loss": 0.2286, "step": 2550 }, { "epoch": 0.18, "grad_norm": 186448.703125, "learning_rate": 4.7517354415734674e-05, "loss": 0.2546, "step": 2560 }, { "epoch": 0.18, "grad_norm": 185627.4375, "learning_rate": 4.750530273814115e-05, "loss": 0.2528, "step": 2570 }, { "epoch": 0.18, "grad_norm": 368845.34375, "learning_rate": 4.749325106054763e-05, "loss": 0.2174, "step": 2580 }, { "epoch": 0.19, "grad_norm": 362864.34375, "learning_rate": 4.748119938295411e-05, "loss": 0.2209, "step": 2590 }, { "epoch": 0.19, "grad_norm": 91713.015625, "learning_rate": 4.7469147705360586e-05, "loss": 0.1843, "step": 2600 }, { "epoch": 0.19, "eval_accuracy": 0.8979673176564368, "eval_f1": 0.8970726226158635, "eval_loss": 0.2523791491985321, "eval_precision": 0.9019878131456466, "eval_recall": 0.8979673176564368, "eval_runtime": 12.4843, "eval_samples_per_second": 200.972, "eval_steps_per_second": 3.204, "step": 2600 }, { "epoch": 0.19, "grad_norm": 283047.71875, "learning_rate": 4.745709602776707e-05, "loss": 0.2767, "step": 2610 }, { "epoch": 0.19, "grad_norm": 256224.40625, "learning_rate": 4.744504435017355e-05, "loss": 0.2588, "step": 2620 }, { "epoch": 0.19, "grad_norm": 156059.90625, "learning_rate": 4.743299267258002e-05, "loss": 0.2688, "step": 2630 }, { "epoch": 0.19, "grad_norm": 165222.90625, "learning_rate": 4.7420940994986504e-05, "loss": 0.2281, "step": 2640 }, { "epoch": 0.19, "grad_norm": 84012.734375, "learning_rate": 4.7408889317392985e-05, "loss": 0.1764, "step": 2650 }, { "epoch": 0.19, "grad_norm": 146292.03125, "learning_rate": 4.739683763979946e-05, "loss": 0.2546, "step": 2660 }, { "epoch": 0.19, "grad_norm": 197499.578125, "learning_rate": 4.738478596220594e-05, "loss": 0.258, "step": 2670 }, { "epoch": 0.19, "grad_norm": 139515.015625, "learning_rate": 4.7372734284612416e-05, "loss": 0.2412, "step": 2680 }, { "epoch": 0.19, "grad_norm": 206191.359375, "learning_rate": 4.73606826070189e-05, "loss": 0.1909, "step": 2690 }, { "epoch": 0.19, "grad_norm": 263304.375, "learning_rate": 4.734863092942538e-05, "loss": 0.2728, "step": 2700 }, { "epoch": 0.19, "eval_accuracy": 0.8967716221602232, "eval_f1": 0.8957472911028488, "eval_loss": 0.23479728400707245, "eval_precision": 0.9016807725080417, "eval_recall": 0.8967716221602232, "eval_runtime": 12.4638, "eval_samples_per_second": 201.303, "eval_steps_per_second": 3.209, "step": 2700 }, { "epoch": 0.19, "grad_norm": 232704.671875, "learning_rate": 4.733657925183186e-05, "loss": 0.2076, "step": 2710 }, { "epoch": 0.19, "grad_norm": 479017.40625, "learning_rate": 4.7324527574238334e-05, "loss": 0.2137, "step": 2720 }, { "epoch": 0.2, "grad_norm": 279608.53125, "learning_rate": 4.7312475896644815e-05, "loss": 0.1979, "step": 2730 }, { "epoch": 0.2, "grad_norm": 185551.0, "learning_rate": 4.730042421905129e-05, "loss": 0.1977, "step": 2740 }, { "epoch": 0.2, "grad_norm": 222985.421875, "learning_rate": 4.728837254145777e-05, "loss": 0.2625, "step": 2750 }, { "epoch": 0.2, "grad_norm": 205608.65625, "learning_rate": 4.727632086386425e-05, "loss": 0.2073, "step": 2760 }, { "epoch": 0.2, "grad_norm": 251234.265625, "learning_rate": 4.7264269186270734e-05, "loss": 0.2447, "step": 2770 }, { "epoch": 0.2, "grad_norm": 513352.03125, "learning_rate": 4.725221750867721e-05, "loss": 0.3065, "step": 2780 }, { "epoch": 0.2, "grad_norm": 231737.0625, "learning_rate": 4.724016583108369e-05, "loss": 0.2311, "step": 2790 }, { "epoch": 0.2, "grad_norm": 309214.3125, "learning_rate": 4.7228114153490164e-05, "loss": 0.2131, "step": 2800 }, { "epoch": 0.2, "eval_accuracy": 0.913511359107214, "eval_f1": 0.9135765070264794, "eval_loss": 0.2209855616092682, "eval_precision": 0.9137703030306349, "eval_recall": 0.913511359107214, "eval_runtime": 12.5042, "eval_samples_per_second": 200.652, "eval_steps_per_second": 3.199, "step": 2800 }, { "epoch": 0.2, "grad_norm": 501373.90625, "learning_rate": 4.721606247589665e-05, "loss": 0.3289, "step": 2810 }, { "epoch": 0.2, "grad_norm": 121567.8046875, "learning_rate": 4.7204010798303126e-05, "loss": 0.1986, "step": 2820 }, { "epoch": 0.2, "grad_norm": 300041.3125, "learning_rate": 4.719195912070961e-05, "loss": 0.2294, "step": 2830 }, { "epoch": 0.2, "grad_norm": 458868.6875, "learning_rate": 4.717990744311608e-05, "loss": 0.2133, "step": 2840 }, { "epoch": 0.2, "grad_norm": 147848.984375, "learning_rate": 4.716785576552256e-05, "loss": 0.2769, "step": 2850 }, { "epoch": 0.2, "grad_norm": 332015.1875, "learning_rate": 4.715580408792904e-05, "loss": 0.22, "step": 2860 }, { "epoch": 0.21, "grad_norm": 472249.4375, "learning_rate": 4.7143752410335526e-05, "loss": 0.2495, "step": 2870 }, { "epoch": 0.21, "grad_norm": 967262.6875, "learning_rate": 4.7131700732742e-05, "loss": 0.2312, "step": 2880 }, { "epoch": 0.21, "grad_norm": 341954.1875, "learning_rate": 4.711964905514848e-05, "loss": 0.1765, "step": 2890 }, { "epoch": 0.21, "grad_norm": 464634.5625, "learning_rate": 4.7107597377554956e-05, "loss": 0.19, "step": 2900 }, { "epoch": 0.21, "eval_accuracy": 0.9123156636110004, "eval_f1": 0.9120359608178479, "eval_loss": 0.22591687738895416, "eval_precision": 0.9129663507904072, "eval_recall": 0.9123156636110004, "eval_runtime": 12.4552, "eval_samples_per_second": 201.442, "eval_steps_per_second": 3.212, "step": 2900 }, { "epoch": 0.21, "grad_norm": 241272.703125, "learning_rate": 4.709554569996144e-05, "loss": 0.1981, "step": 2910 }, { "epoch": 0.21, "grad_norm": 204981.140625, "learning_rate": 4.708349402236791e-05, "loss": 0.1911, "step": 2920 }, { "epoch": 0.21, "grad_norm": 327311.375, "learning_rate": 4.70714423447744e-05, "loss": 0.249, "step": 2930 }, { "epoch": 0.21, "grad_norm": 375379.96875, "learning_rate": 4.7059390667180874e-05, "loss": 0.1629, "step": 2940 }, { "epoch": 0.21, "grad_norm": 277301.8125, "learning_rate": 4.7047338989587356e-05, "loss": 0.2026, "step": 2950 }, { "epoch": 0.21, "grad_norm": 327376.71875, "learning_rate": 4.703528731199383e-05, "loss": 0.271, "step": 2960 }, { "epoch": 0.21, "grad_norm": 136987.484375, "learning_rate": 4.702323563440031e-05, "loss": 0.1453, "step": 2970 }, { "epoch": 0.21, "grad_norm": 353965.59375, "learning_rate": 4.701118395680679e-05, "loss": 0.2407, "step": 2980 }, { "epoch": 0.21, "grad_norm": 247193.09375, "learning_rate": 4.6999132279213274e-05, "loss": 0.1739, "step": 2990 }, { "epoch": 0.21, "grad_norm": 219308.640625, "learning_rate": 4.698708060161975e-05, "loss": 0.2099, "step": 3000 }, { "epoch": 0.21, "eval_accuracy": 0.9023515344758868, "eval_f1": 0.9016339685231911, "eval_loss": 0.2813716530799866, "eval_precision": 0.9054013251007967, "eval_recall": 0.9023515344758868, "eval_runtime": 12.4697, "eval_samples_per_second": 201.208, "eval_steps_per_second": 3.208, "step": 3000 }, { "epoch": 0.22, "grad_norm": 480314.5625, "learning_rate": 4.697502892402623e-05, "loss": 0.2492, "step": 3010 }, { "epoch": 0.22, "grad_norm": 327667.5, "learning_rate": 4.6962977246432704e-05, "loss": 0.2386, "step": 3020 }, { "epoch": 0.22, "grad_norm": 361772.9375, "learning_rate": 4.6950925568839185e-05, "loss": 0.2363, "step": 3030 }, { "epoch": 0.22, "grad_norm": 281198.5625, "learning_rate": 4.6938873891245667e-05, "loss": 0.2199, "step": 3040 }, { "epoch": 0.22, "grad_norm": 435042.5, "learning_rate": 4.692682221365214e-05, "loss": 0.2985, "step": 3050 }, { "epoch": 0.22, "grad_norm": 175672.875, "learning_rate": 4.691477053605862e-05, "loss": 0.2279, "step": 3060 }, { "epoch": 0.22, "grad_norm": 342795.84375, "learning_rate": 4.69027188584651e-05, "loss": 0.2441, "step": 3070 }, { "epoch": 0.22, "grad_norm": 288277.53125, "learning_rate": 4.689066718087158e-05, "loss": 0.2576, "step": 3080 }, { "epoch": 0.22, "grad_norm": 121460.078125, "learning_rate": 4.687861550327806e-05, "loss": 0.247, "step": 3090 }, { "epoch": 0.22, "grad_norm": 401488.15625, "learning_rate": 4.686656382568454e-05, "loss": 0.2209, "step": 3100 }, { "epoch": 0.22, "eval_accuracy": 0.9051414906337186, "eval_f1": 0.9046116025347221, "eval_loss": 0.2472696155309677, "eval_precision": 0.9070370928171988, "eval_recall": 0.9051414906337186, "eval_runtime": 12.4813, "eval_samples_per_second": 201.021, "eval_steps_per_second": 3.205, "step": 3100 }, { "epoch": 0.22, "grad_norm": 377680.78125, "learning_rate": 4.6854512148091015e-05, "loss": 0.288, "step": 3110 }, { "epoch": 0.22, "grad_norm": 233974.625, "learning_rate": 4.6842460470497496e-05, "loss": 0.2491, "step": 3120 }, { "epoch": 0.22, "grad_norm": 334937.875, "learning_rate": 4.683040879290397e-05, "loss": 0.2163, "step": 3130 }, { "epoch": 0.22, "grad_norm": 440934.28125, "learning_rate": 4.681835711531045e-05, "loss": 0.2135, "step": 3140 }, { "epoch": 0.23, "grad_norm": 103967.03125, "learning_rate": 4.680630543771693e-05, "loss": 0.1934, "step": 3150 }, { "epoch": 0.23, "grad_norm": 213988.359375, "learning_rate": 4.6794253760123415e-05, "loss": 0.2356, "step": 3160 }, { "epoch": 0.23, "grad_norm": 231522.921875, "learning_rate": 4.678220208252989e-05, "loss": 0.2235, "step": 3170 }, { "epoch": 0.23, "grad_norm": 301749.78125, "learning_rate": 4.677015040493637e-05, "loss": 0.2005, "step": 3180 }, { "epoch": 0.23, "grad_norm": 163900.3125, "learning_rate": 4.6758098727342845e-05, "loss": 0.1628, "step": 3190 }, { "epoch": 0.23, "grad_norm": 220932.828125, "learning_rate": 4.6746047049749326e-05, "loss": 0.2366, "step": 3200 }, { "epoch": 0.23, "eval_accuracy": 0.8991630131526505, "eval_f1": 0.8983219782452526, "eval_loss": 0.256120890378952, "eval_precision": 0.9028826072820753, "eval_recall": 0.8991630131526505, "eval_runtime": 12.4914, "eval_samples_per_second": 200.858, "eval_steps_per_second": 3.202, "step": 3200 }, { "epoch": 0.23, "grad_norm": 140245.703125, "learning_rate": 4.673399537215581e-05, "loss": 0.188, "step": 3210 }, { "epoch": 0.23, "grad_norm": 56319.390625, "learning_rate": 4.672194369456229e-05, "loss": 0.2311, "step": 3220 }, { "epoch": 0.23, "grad_norm": 417635.46875, "learning_rate": 4.670989201696876e-05, "loss": 0.2028, "step": 3230 }, { "epoch": 0.23, "grad_norm": 257533.140625, "learning_rate": 4.6697840339375244e-05, "loss": 0.2448, "step": 3240 }, { "epoch": 0.23, "grad_norm": 134527.328125, "learning_rate": 4.668578866178172e-05, "loss": 0.255, "step": 3250 }, { "epoch": 0.23, "grad_norm": 241991.234375, "learning_rate": 4.66737369841882e-05, "loss": 0.2519, "step": 3260 }, { "epoch": 0.23, "grad_norm": 292576.84375, "learning_rate": 4.666168530659468e-05, "loss": 0.2186, "step": 3270 }, { "epoch": 0.23, "grad_norm": 172253.8125, "learning_rate": 4.664963362900116e-05, "loss": 0.2119, "step": 3280 }, { "epoch": 0.24, "grad_norm": 162594.703125, "learning_rate": 4.663758195140764e-05, "loss": 0.2266, "step": 3290 }, { "epoch": 0.24, "grad_norm": 291196.75, "learning_rate": 4.662553027381412e-05, "loss": 0.3156, "step": 3300 }, { "epoch": 0.24, "eval_accuracy": 0.9095257074531686, "eval_f1": 0.9094360946444322, "eval_loss": 0.21921035647392273, "eval_precision": 0.9095010570473924, "eval_recall": 0.9095257074531686, "eval_runtime": 12.4898, "eval_samples_per_second": 200.883, "eval_steps_per_second": 3.203, "step": 3300 }, { "epoch": 0.24, "grad_norm": 359450.15625, "learning_rate": 4.661347859622059e-05, "loss": 0.1919, "step": 3310 }, { "epoch": 0.24, "grad_norm": 356027.1875, "learning_rate": 4.6601426918627074e-05, "loss": 0.1946, "step": 3320 }, { "epoch": 0.24, "grad_norm": 228787.578125, "learning_rate": 4.6589375241033555e-05, "loss": 0.2353, "step": 3330 }, { "epoch": 0.24, "grad_norm": 213314.375, "learning_rate": 4.6577323563440037e-05, "loss": 0.21, "step": 3340 }, { "epoch": 0.24, "grad_norm": 229541.46875, "learning_rate": 4.656527188584651e-05, "loss": 0.2157, "step": 3350 }, { "epoch": 0.24, "grad_norm": 424003.65625, "learning_rate": 4.655322020825299e-05, "loss": 0.2202, "step": 3360 }, { "epoch": 0.24, "grad_norm": 274433.0, "learning_rate": 4.654116853065947e-05, "loss": 0.2101, "step": 3370 }, { "epoch": 0.24, "grad_norm": 585877.375, "learning_rate": 4.6529116853065955e-05, "loss": 0.227, "step": 3380 }, { "epoch": 0.24, "grad_norm": 168976.125, "learning_rate": 4.651706517547243e-05, "loss": 0.2004, "step": 3390 }, { "epoch": 0.24, "grad_norm": 247893.96875, "learning_rate": 4.650501349787891e-05, "loss": 0.197, "step": 3400 }, { "epoch": 0.24, "eval_accuracy": 0.9063371861299322, "eval_f1": 0.9056639403550047, "eval_loss": 0.2382478266954422, "eval_precision": 0.9093486961575057, "eval_recall": 0.9063371861299322, "eval_runtime": 12.4721, "eval_samples_per_second": 201.168, "eval_steps_per_second": 3.207, "step": 3400 }, { "epoch": 0.24, "grad_norm": 418813.09375, "learning_rate": 4.6492961820285385e-05, "loss": 0.2256, "step": 3410 }, { "epoch": 0.24, "grad_norm": 273254.46875, "learning_rate": 4.6480910142691866e-05, "loss": 0.3162, "step": 3420 }, { "epoch": 0.25, "grad_norm": 222690.640625, "learning_rate": 4.646885846509834e-05, "loss": 0.2528, "step": 3430 }, { "epoch": 0.25, "grad_norm": 332726.59375, "learning_rate": 4.645680678750482e-05, "loss": 0.222, "step": 3440 }, { "epoch": 0.25, "grad_norm": 122768.3515625, "learning_rate": 4.64447551099113e-05, "loss": 0.2721, "step": 3450 }, { "epoch": 0.25, "grad_norm": 177160.875, "learning_rate": 4.643270343231778e-05, "loss": 0.2322, "step": 3460 }, { "epoch": 0.25, "grad_norm": 492673.46875, "learning_rate": 4.642065175472426e-05, "loss": 0.2355, "step": 3470 }, { "epoch": 0.25, "grad_norm": 120090.96875, "learning_rate": 4.640860007713074e-05, "loss": 0.2207, "step": 3480 }, { "epoch": 0.25, "grad_norm": 199731.515625, "learning_rate": 4.6396548399537215e-05, "loss": 0.1873, "step": 3490 }, { "epoch": 0.25, "grad_norm": 337224.9375, "learning_rate": 4.6384496721943696e-05, "loss": 0.2371, "step": 3500 }, { "epoch": 0.25, "eval_accuracy": 0.9139099242726185, "eval_f1": 0.9141014321392996, "eval_loss": 0.22432319819927216, "eval_precision": 0.9166452044770947, "eval_recall": 0.9139099242726185, "eval_runtime": 12.4637, "eval_samples_per_second": 201.305, "eval_steps_per_second": 3.209, "step": 3500 }, { "epoch": 0.25, "grad_norm": 144981.5625, "learning_rate": 4.637244504435018e-05, "loss": 0.2471, "step": 3510 }, { "epoch": 0.25, "grad_norm": 535936.1875, "learning_rate": 4.636039336675665e-05, "loss": 0.2198, "step": 3520 }, { "epoch": 0.25, "grad_norm": 262519.0, "learning_rate": 4.634834168916313e-05, "loss": 0.1173, "step": 3530 }, { "epoch": 0.25, "grad_norm": 408279.0625, "learning_rate": 4.633629001156961e-05, "loss": 0.2529, "step": 3540 }, { "epoch": 0.25, "grad_norm": 334028.15625, "learning_rate": 4.6324238333976096e-05, "loss": 0.2309, "step": 3550 }, { "epoch": 0.25, "grad_norm": 180496.734375, "learning_rate": 4.631218665638257e-05, "loss": 0.1904, "step": 3560 }, { "epoch": 0.26, "grad_norm": 247550.046875, "learning_rate": 4.630013497878905e-05, "loss": 0.2689, "step": 3570 }, { "epoch": 0.26, "grad_norm": 203775.171875, "learning_rate": 4.6288083301195526e-05, "loss": 0.2308, "step": 3580 }, { "epoch": 0.26, "grad_norm": 114762.5703125, "learning_rate": 4.627603162360201e-05, "loss": 0.1804, "step": 3590 }, { "epoch": 0.26, "grad_norm": 333855.78125, "learning_rate": 4.626397994600848e-05, "loss": 0.2273, "step": 3600 }, { "epoch": 0.26, "eval_accuracy": 0.913511359107214, "eval_f1": 0.9130592850413116, "eval_loss": 0.23622463643550873, "eval_precision": 0.9152968112748071, "eval_recall": 0.913511359107214, "eval_runtime": 12.479, "eval_samples_per_second": 201.057, "eval_steps_per_second": 3.205, "step": 3600 }, { "epoch": 0.26, "grad_norm": 156201.640625, "learning_rate": 4.625192826841497e-05, "loss": 0.1892, "step": 3610 }, { "epoch": 0.26, "grad_norm": 208585.484375, "learning_rate": 4.6239876590821444e-05, "loss": 0.2036, "step": 3620 }, { "epoch": 0.26, "grad_norm": 457924.40625, "learning_rate": 4.6227824913227925e-05, "loss": 0.2847, "step": 3630 }, { "epoch": 0.26, "grad_norm": 255040.125, "learning_rate": 4.62157732356344e-05, "loss": 0.1938, "step": 3640 }, { "epoch": 0.26, "grad_norm": 183455.359375, "learning_rate": 4.620372155804088e-05, "loss": 0.1977, "step": 3650 }, { "epoch": 0.26, "grad_norm": 344806.9375, "learning_rate": 4.6191669880447356e-05, "loss": 0.255, "step": 3660 }, { "epoch": 0.26, "grad_norm": 445422.90625, "learning_rate": 4.6179618202853844e-05, "loss": 0.167, "step": 3670 }, { "epoch": 0.26, "grad_norm": 228799.640625, "learning_rate": 4.616756652526032e-05, "loss": 0.1926, "step": 3680 }, { "epoch": 0.26, "grad_norm": 211847.53125, "learning_rate": 4.61555148476668e-05, "loss": 0.1977, "step": 3690 }, { "epoch": 0.26, "grad_norm": 337014.65625, "learning_rate": 4.6143463170073274e-05, "loss": 0.2504, "step": 3700 }, { "epoch": 0.26, "eval_accuracy": 0.8888003188521323, "eval_f1": 0.8873066761801871, "eval_loss": 0.2670985162258148, "eval_precision": 0.8964616545609316, "eval_recall": 0.8888003188521323, "eval_runtime": 12.4722, "eval_samples_per_second": 201.168, "eval_steps_per_second": 3.207, "step": 3700 }, { "epoch": 0.27, "grad_norm": 168669.875, "learning_rate": 4.6131411492479755e-05, "loss": 0.2568, "step": 3710 }, { "epoch": 0.27, "grad_norm": 390192.53125, "learning_rate": 4.6119359814886236e-05, "loss": 0.292, "step": 3720 }, { "epoch": 0.27, "grad_norm": 381233.5, "learning_rate": 4.610730813729272e-05, "loss": 0.2383, "step": 3730 }, { "epoch": 0.27, "grad_norm": 201262.109375, "learning_rate": 4.609525645969919e-05, "loss": 0.2706, "step": 3740 }, { "epoch": 0.27, "grad_norm": 159978.65625, "learning_rate": 4.6083204782105673e-05, "loss": 0.2194, "step": 3750 }, { "epoch": 0.27, "grad_norm": 334120.59375, "learning_rate": 4.607115310451215e-05, "loss": 0.2306, "step": 3760 }, { "epoch": 0.27, "grad_norm": 204698.265625, "learning_rate": 4.605910142691863e-05, "loss": 0.1931, "step": 3770 }, { "epoch": 0.27, "grad_norm": 354762.46875, "learning_rate": 4.604704974932511e-05, "loss": 0.2498, "step": 3780 }, { "epoch": 0.27, "grad_norm": 279956.8125, "learning_rate": 4.603499807173159e-05, "loss": 0.238, "step": 3790 }, { "epoch": 0.27, "grad_norm": 204264.0625, "learning_rate": 4.6022946394138066e-05, "loss": 0.1978, "step": 3800 }, { "epoch": 0.27, "eval_accuracy": 0.917098445595855, "eval_f1": 0.9169802729066538, "eval_loss": 0.20485247671604156, "eval_precision": 0.9171643432118469, "eval_recall": 0.917098445595855, "eval_runtime": 12.4749, "eval_samples_per_second": 201.125, "eval_steps_per_second": 3.206, "step": 3800 }, { "epoch": 0.27, "grad_norm": 224777.34375, "learning_rate": 4.601089471654455e-05, "loss": 0.1953, "step": 3810 }, { "epoch": 0.27, "grad_norm": 233138.46875, "learning_rate": 4.599884303895102e-05, "loss": 0.2356, "step": 3820 }, { "epoch": 0.27, "grad_norm": 130433.8515625, "learning_rate": 4.59867913613575e-05, "loss": 0.2269, "step": 3830 }, { "epoch": 0.27, "grad_norm": 208775.78125, "learning_rate": 4.5974739683763984e-05, "loss": 0.2384, "step": 3840 }, { "epoch": 0.28, "grad_norm": 195953.21875, "learning_rate": 4.5962688006170466e-05, "loss": 0.1926, "step": 3850 }, { "epoch": 0.28, "grad_norm": 169765.890625, "learning_rate": 4.595063632857694e-05, "loss": 0.2259, "step": 3860 }, { "epoch": 0.28, "grad_norm": 216638.4375, "learning_rate": 4.593858465098342e-05, "loss": 0.2306, "step": 3870 }, { "epoch": 0.28, "grad_norm": 219986.3125, "learning_rate": 4.5926532973389896e-05, "loss": 0.2169, "step": 3880 }, { "epoch": 0.28, "grad_norm": 525753.625, "learning_rate": 4.591448129579638e-05, "loss": 0.1942, "step": 3890 }, { "epoch": 0.28, "grad_norm": 207980.359375, "learning_rate": 4.590242961820286e-05, "loss": 0.2189, "step": 3900 }, { "epoch": 0.28, "eval_accuracy": 0.9099242726185731, "eval_f1": 0.9098638649336764, "eval_loss": 0.22675587236881256, "eval_precision": 0.9098782103745078, "eval_recall": 0.9099242726185731, "eval_runtime": 12.4751, "eval_samples_per_second": 201.121, "eval_steps_per_second": 3.206, "step": 3900 }, { "epoch": 0.28, "grad_norm": 167927.484375, "learning_rate": 4.589037794060933e-05, "loss": 0.2138, "step": 3910 }, { "epoch": 0.28, "grad_norm": 549348.0, "learning_rate": 4.5878326263015814e-05, "loss": 0.2528, "step": 3920 }, { "epoch": 0.28, "grad_norm": 86471.5546875, "learning_rate": 4.586627458542229e-05, "loss": 0.188, "step": 3930 }, { "epoch": 0.28, "grad_norm": 349328.78125, "learning_rate": 4.585422290782877e-05, "loss": 0.215, "step": 3940 }, { "epoch": 0.28, "grad_norm": 222607.4375, "learning_rate": 4.584217123023525e-05, "loss": 0.2179, "step": 3950 }, { "epoch": 0.28, "grad_norm": 304011.5, "learning_rate": 4.583011955264173e-05, "loss": 0.2179, "step": 3960 }, { "epoch": 0.28, "grad_norm": 150350.171875, "learning_rate": 4.581806787504821e-05, "loss": 0.2505, "step": 3970 }, { "epoch": 0.28, "grad_norm": 212958.984375, "learning_rate": 4.580601619745469e-05, "loss": 0.2408, "step": 3980 }, { "epoch": 0.29, "grad_norm": 118683.5703125, "learning_rate": 4.579396451986116e-05, "loss": 0.2103, "step": 3990 }, { "epoch": 0.29, "grad_norm": 160386.796875, "learning_rate": 4.5781912842267644e-05, "loss": 0.2171, "step": 4000 }, { "epoch": 0.29, "eval_accuracy": 0.9163013152650459, "eval_f1": 0.9161651055562922, "eval_loss": 0.213547021150589, "eval_precision": 0.9164113566074957, "eval_recall": 0.9163013152650459, "eval_runtime": 12.4758, "eval_samples_per_second": 201.11, "eval_steps_per_second": 3.206, "step": 4000 }, { "epoch": 0.29, "grad_norm": 284341.65625, "learning_rate": 4.5769861164674125e-05, "loss": 0.2502, "step": 4010 }, { "epoch": 0.29, "grad_norm": 210621.484375, "learning_rate": 4.5757809487080606e-05, "loss": 0.2148, "step": 4020 }, { "epoch": 0.29, "grad_norm": 437063.46875, "learning_rate": 4.574575780948708e-05, "loss": 0.2505, "step": 4030 }, { "epoch": 0.29, "grad_norm": 212374.59375, "learning_rate": 4.573370613189356e-05, "loss": 0.213, "step": 4040 }, { "epoch": 0.29, "grad_norm": 158491.328125, "learning_rate": 4.572165445430004e-05, "loss": 0.2041, "step": 4050 }, { "epoch": 0.29, "grad_norm": 386332.40625, "learning_rate": 4.570960277670652e-05, "loss": 0.2476, "step": 4060 }, { "epoch": 0.29, "grad_norm": 100670.15625, "learning_rate": 4.5697551099113e-05, "loss": 0.1967, "step": 4070 }, { "epoch": 0.29, "grad_norm": 486919.75, "learning_rate": 4.568549942151948e-05, "loss": 0.1781, "step": 4080 }, { "epoch": 0.29, "grad_norm": 261833.109375, "learning_rate": 4.5673447743925955e-05, "loss": 0.2773, "step": 4090 }, { "epoch": 0.29, "grad_norm": 290765.3125, "learning_rate": 4.5661396066332436e-05, "loss": 0.2325, "step": 4100 }, { "epoch": 0.29, "eval_accuracy": 0.8915902750099641, "eval_f1": 0.8904847371544249, "eval_loss": 0.26240846514701843, "eval_precision": 0.8965748968593881, "eval_recall": 0.8915902750099641, "eval_runtime": 12.4947, "eval_samples_per_second": 200.805, "eval_steps_per_second": 3.201, "step": 4100 }, { "epoch": 0.29, "grad_norm": 645535.5, "learning_rate": 4.564934438873891e-05, "loss": 0.1845, "step": 4110 }, { "epoch": 0.29, "grad_norm": 709512.5, "learning_rate": 4.56372927111454e-05, "loss": 0.2641, "step": 4120 }, { "epoch": 0.3, "grad_norm": 194677.859375, "learning_rate": 4.562524103355187e-05, "loss": 0.275, "step": 4130 }, { "epoch": 0.3, "grad_norm": 153537.4375, "learning_rate": 4.5613189355958354e-05, "loss": 0.2616, "step": 4140 }, { "epoch": 0.3, "grad_norm": 168272.890625, "learning_rate": 4.560113767836483e-05, "loss": 0.2226, "step": 4150 }, { "epoch": 0.3, "grad_norm": 311794.625, "learning_rate": 4.558908600077131e-05, "loss": 0.2237, "step": 4160 }, { "epoch": 0.3, "grad_norm": 312384.03125, "learning_rate": 4.5577034323177785e-05, "loss": 0.2552, "step": 4170 }, { "epoch": 0.3, "grad_norm": 177459.375, "learning_rate": 4.556498264558427e-05, "loss": 0.1759, "step": 4180 }, { "epoch": 0.3, "grad_norm": 281611.59375, "learning_rate": 4.555293096799075e-05, "loss": 0.2031, "step": 4190 }, { "epoch": 0.3, "grad_norm": 201760.8125, "learning_rate": 4.554087929039723e-05, "loss": 0.1888, "step": 4200 }, { "epoch": 0.3, "eval_accuracy": 0.8923874053407732, "eval_f1": 0.8911264735256401, "eval_loss": 0.2877594530582428, "eval_precision": 0.8986690061249697, "eval_recall": 0.8923874053407732, "eval_runtime": 12.4645, "eval_samples_per_second": 201.292, "eval_steps_per_second": 3.209, "step": 4200 }, { "epoch": 0.3, "grad_norm": 120513.65625, "learning_rate": 4.55288276128037e-05, "loss": 0.1995, "step": 4210 }, { "epoch": 0.3, "grad_norm": 236276.1875, "learning_rate": 4.5516775935210184e-05, "loss": 0.1812, "step": 4220 }, { "epoch": 0.3, "grad_norm": 315676.4375, "learning_rate": 4.550472425761666e-05, "loss": 0.2993, "step": 4230 }, { "epoch": 0.3, "grad_norm": 184856.0, "learning_rate": 4.549267258002315e-05, "loss": 0.2441, "step": 4240 }, { "epoch": 0.3, "grad_norm": 636716.1875, "learning_rate": 4.548062090242962e-05, "loss": 0.2447, "step": 4250 }, { "epoch": 0.3, "grad_norm": 235927.671875, "learning_rate": 4.54685692248361e-05, "loss": 0.2051, "step": 4260 }, { "epoch": 0.31, "grad_norm": 243305.515625, "learning_rate": 4.545651754724258e-05, "loss": 0.2596, "step": 4270 }, { "epoch": 0.31, "grad_norm": 370063.59375, "learning_rate": 4.544446586964906e-05, "loss": 0.2223, "step": 4280 }, { "epoch": 0.31, "grad_norm": 585004.625, "learning_rate": 4.543241419205554e-05, "loss": 0.2075, "step": 4290 }, { "epoch": 0.31, "grad_norm": 262047.875, "learning_rate": 4.5420362514462014e-05, "loss": 0.2345, "step": 4300 }, { "epoch": 0.31, "eval_accuracy": 0.8963730569948186, "eval_f1": 0.8953353148714563, "eval_loss": 0.24442929029464722, "eval_precision": 0.9013455886413335, "eval_recall": 0.8963730569948186, "eval_runtime": 12.4824, "eval_samples_per_second": 201.004, "eval_steps_per_second": 3.205, "step": 4300 }, { "epoch": 0.31, "grad_norm": 127394.015625, "learning_rate": 4.5408310836868495e-05, "loss": 0.1995, "step": 4310 }, { "epoch": 0.31, "grad_norm": 405064.8125, "learning_rate": 4.539625915927497e-05, "loss": 0.2691, "step": 4320 }, { "epoch": 0.31, "grad_norm": 632551.125, "learning_rate": 4.538420748168145e-05, "loss": 0.2758, "step": 4330 }, { "epoch": 0.31, "grad_norm": 103708.234375, "learning_rate": 4.537215580408793e-05, "loss": 0.2373, "step": 4340 }, { "epoch": 0.31, "grad_norm": 231929.875, "learning_rate": 4.5360104126494414e-05, "loss": 0.2152, "step": 4350 }, { "epoch": 0.31, "grad_norm": 302502.03125, "learning_rate": 4.534805244890089e-05, "loss": 0.2068, "step": 4360 }, { "epoch": 0.31, "grad_norm": 278650.0625, "learning_rate": 4.533600077130737e-05, "loss": 0.2494, "step": 4370 }, { "epoch": 0.31, "grad_norm": 224905.484375, "learning_rate": 4.5323949093713844e-05, "loss": 0.2022, "step": 4380 }, { "epoch": 0.31, "grad_norm": 279063.75, "learning_rate": 4.5311897416120325e-05, "loss": 0.2295, "step": 4390 }, { "epoch": 0.31, "grad_norm": 163691.515625, "learning_rate": 4.52998457385268e-05, "loss": 0.1688, "step": 4400 }, { "epoch": 0.31, "eval_accuracy": 0.908330011956955, "eval_f1": 0.9077357266944889, "eval_loss": 0.2479422241449356, "eval_precision": 0.9108744090612344, "eval_recall": 0.908330011956955, "eval_runtime": 12.5049, "eval_samples_per_second": 200.641, "eval_steps_per_second": 3.199, "step": 4400 }, { "epoch": 0.32, "grad_norm": 327312.96875, "learning_rate": 4.528779406093329e-05, "loss": 0.1556, "step": 4410 }, { "epoch": 0.32, "grad_norm": 274013.09375, "learning_rate": 4.527574238333976e-05, "loss": 0.2903, "step": 4420 }, { "epoch": 0.32, "grad_norm": 227320.96875, "learning_rate": 4.526369070574624e-05, "loss": 0.2432, "step": 4430 }, { "epoch": 0.32, "grad_norm": 101987.640625, "learning_rate": 4.525163902815272e-05, "loss": 0.2176, "step": 4440 }, { "epoch": 0.32, "grad_norm": 225351.109375, "learning_rate": 4.52395873505592e-05, "loss": 0.1864, "step": 4450 }, { "epoch": 0.32, "grad_norm": 208051.765625, "learning_rate": 4.5227535672965674e-05, "loss": 0.2659, "step": 4460 }, { "epoch": 0.32, "grad_norm": 342590.03125, "learning_rate": 4.521548399537216e-05, "loss": 0.2199, "step": 4470 }, { "epoch": 0.32, "grad_norm": 216820.671875, "learning_rate": 4.5203432317778636e-05, "loss": 0.2149, "step": 4480 }, { "epoch": 0.32, "grad_norm": 138170.34375, "learning_rate": 4.519138064018512e-05, "loss": 0.2442, "step": 4490 }, { "epoch": 0.32, "grad_norm": 247797.890625, "learning_rate": 4.517932896259159e-05, "loss": 0.2083, "step": 4500 }, { "epoch": 0.32, "eval_accuracy": 0.913511359107214, "eval_f1": 0.9130955830993255, "eval_loss": 0.21996097266674042, "eval_precision": 0.9150368156806639, "eval_recall": 0.913511359107214, "eval_runtime": 12.4935, "eval_samples_per_second": 200.824, "eval_steps_per_second": 3.202, "step": 4500 }, { "epoch": 0.32, "grad_norm": 120234.546875, "learning_rate": 4.516727728499807e-05, "loss": 0.1768, "step": 4510 }, { "epoch": 0.32, "grad_norm": 195769.40625, "learning_rate": 4.5155225607404554e-05, "loss": 0.2422, "step": 4520 }, { "epoch": 0.32, "grad_norm": 142246.734375, "learning_rate": 4.5143173929811036e-05, "loss": 0.1863, "step": 4530 }, { "epoch": 0.32, "grad_norm": 208686.890625, "learning_rate": 4.513112225221751e-05, "loss": 0.1961, "step": 4540 }, { "epoch": 0.33, "grad_norm": 344615.53125, "learning_rate": 4.511907057462399e-05, "loss": 0.2392, "step": 4550 }, { "epoch": 0.33, "grad_norm": 542719.9375, "learning_rate": 4.5107018897030466e-05, "loss": 0.2497, "step": 4560 }, { "epoch": 0.33, "grad_norm": 308383.09375, "learning_rate": 4.509496721943695e-05, "loss": 0.1903, "step": 4570 }, { "epoch": 0.33, "grad_norm": 246131.765625, "learning_rate": 4.508291554184343e-05, "loss": 0.2518, "step": 4580 }, { "epoch": 0.33, "grad_norm": 441606.1875, "learning_rate": 4.507086386424991e-05, "loss": 0.2086, "step": 4590 }, { "epoch": 0.33, "grad_norm": 181527.40625, "learning_rate": 4.5058812186656384e-05, "loss": 0.2475, "step": 4600 }, { "epoch": 0.33, "eval_accuracy": 0.9035472299721005, "eval_f1": 0.9030499023997222, "eval_loss": 0.23530976474285126, "eval_precision": 0.9051551236736851, "eval_recall": 0.9035472299721005, "eval_runtime": 12.5089, "eval_samples_per_second": 200.577, "eval_steps_per_second": 3.198, "step": 4600 }, { "epoch": 0.33, "grad_norm": 202878.53125, "learning_rate": 4.5046760509062865e-05, "loss": 0.2055, "step": 4610 }, { "epoch": 0.33, "grad_norm": 233816.40625, "learning_rate": 4.503470883146934e-05, "loss": 0.1779, "step": 4620 }, { "epoch": 0.33, "grad_norm": 177604.296875, "learning_rate": 4.502265715387582e-05, "loss": 0.2305, "step": 4630 }, { "epoch": 0.33, "grad_norm": 364165.34375, "learning_rate": 4.50106054762823e-05, "loss": 0.2123, "step": 4640 }, { "epoch": 0.33, "grad_norm": 155471.65625, "learning_rate": 4.4998553798688784e-05, "loss": 0.1939, "step": 4650 }, { "epoch": 0.33, "grad_norm": 337500.03125, "learning_rate": 4.498650212109526e-05, "loss": 0.2682, "step": 4660 }, { "epoch": 0.33, "grad_norm": 128476.1171875, "learning_rate": 4.497445044350174e-05, "loss": 0.1988, "step": 4670 }, { "epoch": 0.33, "grad_norm": 275538.78125, "learning_rate": 4.4962398765908214e-05, "loss": 0.2333, "step": 4680 }, { "epoch": 0.34, "grad_norm": 249258.46875, "learning_rate": 4.4950347088314695e-05, "loss": 0.1944, "step": 4690 }, { "epoch": 0.34, "grad_norm": 266623.78125, "learning_rate": 4.4938295410721176e-05, "loss": 0.1928, "step": 4700 }, { "epoch": 0.34, "eval_accuracy": 0.894380231167796, "eval_f1": 0.8933321704334167, "eval_loss": 0.2986622750759125, "eval_precision": 0.8992220854769614, "eval_recall": 0.894380231167796, "eval_runtime": 12.4701, "eval_samples_per_second": 201.202, "eval_steps_per_second": 3.208, "step": 4700 }, { "epoch": 0.34, "grad_norm": 536207.0, "learning_rate": 4.492624373312766e-05, "loss": 0.2723, "step": 4710 }, { "epoch": 0.34, "grad_norm": 152929.5, "learning_rate": 4.491419205553413e-05, "loss": 0.204, "step": 4720 }, { "epoch": 0.34, "grad_norm": 249052.25, "learning_rate": 4.490214037794061e-05, "loss": 0.2062, "step": 4730 }, { "epoch": 0.34, "grad_norm": 420176.1875, "learning_rate": 4.489008870034709e-05, "loss": 0.243, "step": 4740 }, { "epoch": 0.34, "grad_norm": 225057.75, "learning_rate": 4.487803702275357e-05, "loss": 0.2047, "step": 4750 }, { "epoch": 0.34, "grad_norm": 322600.46875, "learning_rate": 4.486598534516005e-05, "loss": 0.2563, "step": 4760 }, { "epoch": 0.34, "grad_norm": 215948.890625, "learning_rate": 4.4853933667566525e-05, "loss": 0.2187, "step": 4770 }, { "epoch": 0.34, "grad_norm": 304164.59375, "learning_rate": 4.4841881989973006e-05, "loss": 0.1994, "step": 4780 }, { "epoch": 0.34, "grad_norm": 435468.0625, "learning_rate": 4.482983031237948e-05, "loss": 0.2137, "step": 4790 }, { "epoch": 0.34, "grad_norm": 185575.71875, "learning_rate": 4.481777863478596e-05, "loss": 0.2008, "step": 4800 }, { "epoch": 0.34, "eval_accuracy": 0.876046233559187, "eval_f1": 0.8735264940770283, "eval_loss": 0.299306720495224, "eval_precision": 0.8896518291860559, "eval_recall": 0.876046233559187, "eval_runtime": 12.4793, "eval_samples_per_second": 201.052, "eval_steps_per_second": 3.205, "step": 4800 }, { "epoch": 0.34, "grad_norm": 115670.9296875, "learning_rate": 4.480572695719244e-05, "loss": 0.1804, "step": 4810 }, { "epoch": 0.34, "grad_norm": 192260.875, "learning_rate": 4.4793675279598924e-05, "loss": 0.2982, "step": 4820 }, { "epoch": 0.35, "grad_norm": 145098.75, "learning_rate": 4.47816236020054e-05, "loss": 0.2516, "step": 4830 }, { "epoch": 0.35, "grad_norm": 107989.4609375, "learning_rate": 4.476957192441188e-05, "loss": 0.1921, "step": 4840 }, { "epoch": 0.35, "grad_norm": 378746.71875, "learning_rate": 4.4757520246818355e-05, "loss": 0.2296, "step": 4850 }, { "epoch": 0.35, "grad_norm": 236829.796875, "learning_rate": 4.474546856922484e-05, "loss": 0.2355, "step": 4860 }, { "epoch": 0.35, "grad_norm": 226571.203125, "learning_rate": 4.473341689163132e-05, "loss": 0.2038, "step": 4870 }, { "epoch": 0.35, "grad_norm": 206681.625, "learning_rate": 4.47213652140378e-05, "loss": 0.2086, "step": 4880 }, { "epoch": 0.35, "grad_norm": 347856.59375, "learning_rate": 4.470931353644427e-05, "loss": 0.2322, "step": 4890 }, { "epoch": 0.35, "grad_norm": 251549.421875, "learning_rate": 4.4697261858850754e-05, "loss": 0.22, "step": 4900 }, { "epoch": 0.35, "eval_accuracy": 0.9035472299721005, "eval_f1": 0.9032851922362077, "eval_loss": 0.24311725795269012, "eval_precision": 0.9039388604136059, "eval_recall": 0.9035472299721005, "eval_runtime": 12.509, "eval_samples_per_second": 200.576, "eval_steps_per_second": 3.198, "step": 4900 }, { "epoch": 0.35, "grad_norm": 365455.96875, "learning_rate": 4.468521018125723e-05, "loss": 0.1983, "step": 4910 }, { "epoch": 0.35, "grad_norm": 135693.59375, "learning_rate": 4.4673158503663717e-05, "loss": 0.1764, "step": 4920 }, { "epoch": 0.35, "grad_norm": 332106.21875, "learning_rate": 4.466110682607019e-05, "loss": 0.2139, "step": 4930 }, { "epoch": 0.35, "grad_norm": 162781.546875, "learning_rate": 4.464905514847667e-05, "loss": 0.2205, "step": 4940 }, { "epoch": 0.35, "grad_norm": 167004.28125, "learning_rate": 4.463700347088315e-05, "loss": 0.2255, "step": 4950 }, { "epoch": 0.35, "grad_norm": 217623.34375, "learning_rate": 4.462495179328963e-05, "loss": 0.2438, "step": 4960 }, { "epoch": 0.36, "grad_norm": 191213.296875, "learning_rate": 4.46129001156961e-05, "loss": 0.2283, "step": 4970 }, { "epoch": 0.36, "grad_norm": 244383.078125, "learning_rate": 4.460084843810259e-05, "loss": 0.2034, "step": 4980 }, { "epoch": 0.36, "grad_norm": 260674.09375, "learning_rate": 4.4588796760509065e-05, "loss": 0.1769, "step": 4990 }, { "epoch": 0.36, "grad_norm": 363888.0625, "learning_rate": 4.4576745082915546e-05, "loss": 0.1844, "step": 5000 }, { "epoch": 0.36, "eval_accuracy": 0.917098445595855, "eval_f1": 0.9170851985417201, "eval_loss": 0.2590126693248749, "eval_precision": 0.9170766527573011, "eval_recall": 0.917098445595855, "eval_runtime": 12.478, "eval_samples_per_second": 201.074, "eval_steps_per_second": 3.206, "step": 5000 }, { "epoch": 0.36, "grad_norm": 219406.3125, "learning_rate": 4.456469340532202e-05, "loss": 0.2647, "step": 5010 }, { "epoch": 0.36, "grad_norm": 289715.0, "learning_rate": 4.45526417277285e-05, "loss": 0.2315, "step": 5020 }, { "epoch": 0.36, "grad_norm": 300951.78125, "learning_rate": 4.4540590050134977e-05, "loss": 0.2044, "step": 5030 }, { "epoch": 0.36, "grad_norm": 228631.484375, "learning_rate": 4.4528538372541465e-05, "loss": 0.1395, "step": 5040 }, { "epoch": 0.36, "grad_norm": 258748.0625, "learning_rate": 4.451648669494794e-05, "loss": 0.2106, "step": 5050 }, { "epoch": 0.36, "grad_norm": 261900.28125, "learning_rate": 4.450443501735442e-05, "loss": 0.1891, "step": 5060 }, { "epoch": 0.36, "grad_norm": 567360.5, "learning_rate": 4.4492383339760895e-05, "loss": 0.1796, "step": 5070 }, { "epoch": 0.36, "grad_norm": 247367.140625, "learning_rate": 4.4480331662167376e-05, "loss": 0.2354, "step": 5080 }, { "epoch": 0.36, "grad_norm": 180764.234375, "learning_rate": 4.446827998457386e-05, "loss": 0.2308, "step": 5090 }, { "epoch": 0.36, "grad_norm": 143473.34375, "learning_rate": 4.445622830698034e-05, "loss": 0.2235, "step": 5100 }, { "epoch": 0.36, "eval_accuracy": 0.904742925468314, "eval_f1": 0.9041326842192928, "eval_loss": 0.2420862317085266, "eval_precision": 0.9071727983486801, "eval_recall": 0.904742925468314, "eval_runtime": 12.4566, "eval_samples_per_second": 201.419, "eval_steps_per_second": 3.211, "step": 5100 }, { "epoch": 0.37, "grad_norm": 142405.03125, "learning_rate": 4.444417662938681e-05, "loss": 0.2276, "step": 5110 }, { "epoch": 0.37, "grad_norm": 245076.640625, "learning_rate": 4.4432124951793294e-05, "loss": 0.2051, "step": 5120 }, { "epoch": 0.37, "grad_norm": 313735.53125, "learning_rate": 4.442007327419977e-05, "loss": 0.2259, "step": 5130 }, { "epoch": 0.37, "grad_norm": 423567.96875, "learning_rate": 4.440802159660625e-05, "loss": 0.2329, "step": 5140 }, { "epoch": 0.37, "grad_norm": 220593.828125, "learning_rate": 4.439596991901273e-05, "loss": 0.1839, "step": 5150 }, { "epoch": 0.37, "grad_norm": 300864.0, "learning_rate": 4.4383918241419206e-05, "loss": 0.2598, "step": 5160 }, { "epoch": 0.37, "grad_norm": 338582.8125, "learning_rate": 4.437186656382569e-05, "loss": 0.2564, "step": 5170 }, { "epoch": 0.37, "grad_norm": 225039.421875, "learning_rate": 4.435981488623216e-05, "loss": 0.2375, "step": 5180 }, { "epoch": 0.37, "grad_norm": 306696.875, "learning_rate": 4.434776320863864e-05, "loss": 0.1867, "step": 5190 }, { "epoch": 0.37, "grad_norm": 175457.875, "learning_rate": 4.4335711531045124e-05, "loss": 0.2222, "step": 5200 }, { "epoch": 0.37, "eval_accuracy": 0.8947787963332005, "eval_f1": 0.8940640494290631, "eval_loss": 0.2958182394504547, "eval_precision": 0.8972950995667646, "eval_recall": 0.8947787963332005, "eval_runtime": 12.4883, "eval_samples_per_second": 200.907, "eval_steps_per_second": 3.203, "step": 5200 }, { "epoch": 0.37, "grad_norm": 274502.125, "learning_rate": 4.4323659853451605e-05, "loss": 0.2213, "step": 5210 }, { "epoch": 0.37, "grad_norm": 143913.453125, "learning_rate": 4.431160817585808e-05, "loss": 0.2485, "step": 5220 }, { "epoch": 0.37, "grad_norm": 134518.640625, "learning_rate": 4.429955649826456e-05, "loss": 0.215, "step": 5230 }, { "epoch": 0.37, "grad_norm": 203820.625, "learning_rate": 4.4287504820671036e-05, "loss": 0.2146, "step": 5240 }, { "epoch": 0.38, "grad_norm": 322863.5625, "learning_rate": 4.427545314307752e-05, "loss": 0.2328, "step": 5250 }, { "epoch": 0.38, "grad_norm": 356002.96875, "learning_rate": 4.4263401465484e-05, "loss": 0.2083, "step": 5260 }, { "epoch": 0.38, "grad_norm": 703977.0625, "learning_rate": 4.425134978789048e-05, "loss": 0.1654, "step": 5270 }, { "epoch": 0.38, "grad_norm": 221094.734375, "learning_rate": 4.4239298110296954e-05, "loss": 0.2014, "step": 5280 }, { "epoch": 0.38, "grad_norm": 273416.5625, "learning_rate": 4.4227246432703435e-05, "loss": 0.2901, "step": 5290 }, { "epoch": 0.38, "grad_norm": 188345.46875, "learning_rate": 4.421519475510991e-05, "loss": 0.2241, "step": 5300 }, { "epoch": 0.38, "eval_accuracy": 0.9210840972499004, "eval_f1": 0.9208697027387154, "eval_loss": 0.2031262218952179, "eval_precision": 0.9215894170459646, "eval_recall": 0.9210840972499004, "eval_runtime": 16.2579, "eval_samples_per_second": 154.325, "eval_steps_per_second": 2.46, "step": 5300 } ], "logging_steps": 10, "max_steps": 41988, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 4.46240356466688e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }